From 94c434a61e9e1cb96823d6d8fd5c1748085bf7f8 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Tue, 16 Jun 2026 18:34:21 +0800 Subject: [PATCH 01/38] test: add test cases for nested columns --- .../page_filtered_row_group_reader_test.cpp | 274 ++++++++++++++++++ 1 file changed, 274 insertions(+) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index a963efaba..795ae142b 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -873,5 +873,279 @@ TEST_F(PageFilteredRowGroupReaderTest, ComputePageRangesWithDictionaryEncoding) auto partial_concat = arrow::Concatenate(result_partial->chunks()).ValueOrDie(); ASSERT_TRUE(partial_concat->Equals(expected_struct)); } +/// Helper: build a StructArray with a top-level int32 "id" column and a nested struct column +/// "info" containing two int32 fields: "x" and "y". +/// id[i] = i, info.x[i] = i * 100, info.y[i] = i * 100 + 1, for i in [0, N). +/// +/// Arrow schema: { id: int32, info: struct } +/// Parquet leaf columns: [id (index 0), info.x (index 1), info.y (index 2)] +static std::shared_ptr MakeNestedStructData(int32_t num_rows) { + arrow::Int32Builder id_builder, x_builder, y_builder; + EXPECT_TRUE(id_builder.Reserve(num_rows).ok()); + EXPECT_TRUE(x_builder.Reserve(num_rows).ok()); + EXPECT_TRUE(y_builder.Reserve(num_rows).ok()); + for (int32_t i = 0; i < num_rows; ++i) { + id_builder.UnsafeAppend(i); + x_builder.UnsafeAppend(i * 100); + y_builder.UnsafeAppend(i * 100 + 1); + } + auto id_array = id_builder.Finish().ValueOrDie(); + auto x_array = x_builder.Finish().ValueOrDie(); + auto y_array = y_builder.Finish().ValueOrDie(); + + auto field_x = arrow::field("x", arrow::int32()); + auto field_y = arrow::field("y", arrow::int32()); + auto inner_struct = + arrow::StructArray::Make({x_array, y_array}, {field_x, field_y}).ValueOrDie(); + + auto field_id = arrow::field("id", arrow::int32()); + auto field_info = arrow::field("info", arrow::struct_({field_x, field_y})); + return arrow::StructArray::Make({id_array, inner_struct}, {field_id, field_info}).ValueOrDie(); +} + +/// Test: page-level filtering on a file with nested struct columns. +/// +/// This test exposes the bug where BuildPageFilteredSchema fails to correctly map +/// Parquet leaf column indices to Arrow fields for nested types, and +/// ReadFilteredRowGroup cannot correctly assemble nested column results. +/// +/// Schema: { id: int32, info: struct } +/// Parquet leaf columns: [id=0, info.x=1, info.y=2] +/// 100 rows, 10 per page, 1 row group. +/// Predicate: id >= 50 → pages 0-4 skipped, pages 5-9 read → 50 rows expected. +/// The read schema requests both "id" and "info" columns. +TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnPageFilter) { + std::string file_name = dir_->Str() + "/nested_struct_filter.parquet"; + auto data = MakeNestedStructData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + auto field_x = arrow::field("x", arrow::int32()); + auto field_y = arrow::field("y", arrow::int32()); + auto read_schema = arrow::schema({arrow::field("id", arrow::int32()), + arrow::field("info", arrow::struct_({field_x, field_y}))}); + + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + + std::shared_ptr result; + ReadWithPredicateImpl(file_name, read_schema, predicate, &result); + + // Should get rows 50-99 = 50 rows + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + // Build expected result: rows 50-99 from the original data + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + +/// Test: page-level filtering reading only the nested struct column (without the predicate column). +/// +/// This verifies that when reading a subset of columns that includes only a nested column, +/// the schema mapping and column assembly work correctly. +/// +/// Schema: { id: int32, info: struct } +/// Read schema: { info: struct } (only the nested column) +/// Predicate on "id": id >= 50. +TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnOnlyReadNestedField) { + std::string file_name = dir_->Str() + "/nested_struct_only_nested.parquet"; + auto data = MakeNestedStructData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + auto field_x = arrow::field("x", arrow::int32()); + auto field_y = arrow::field("y", arrow::int32()); + // Read only the nested "info" column + auto read_schema = arrow::schema({arrow::field("info", arrow::struct_({field_x, field_y}))}); + + // Predicate is on "id" (field_index=0 in file schema, not in read schema) + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + + std::shared_ptr result; + ReadWithPredicateImpl(file_name, read_schema, predicate, &result); + + // Should get rows 50-99 = 50 rows + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + // Build expected: only the "info" field from rows 50-99 + auto sliced = std::dynamic_pointer_cast(data->Slice(50, 50)); + ASSERT_TRUE(sliced); + // Extract only the "info" column (field index 1) and wrap as struct with single field + auto expected = + arrow::StructArray::Make({sliced->field(1)}, + {arrow::field("info", arrow::struct_({field_x, field_y}))}) + .ValueOrDie(); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + +/// Helper: build a StructArray with an int32 "id" column and a list "tags" column. +/// id[i] = i, tags[i] = [i*10, i*10+1], for i in [0, N). +/// +/// Arrow schema: { id: int32, tags: list } +/// Parquet leaf columns: [id (index 0), tags.item (index 1)] +static std::shared_ptr MakeListColumnData(int32_t num_rows) { + arrow::Int32Builder id_builder; + EXPECT_TRUE(id_builder.Reserve(num_rows).ok()); + for (int32_t i = 0; i < num_rows; ++i) { + id_builder.UnsafeAppend(i); + } + auto id_array = id_builder.Finish().ValueOrDie(); + + auto value_builder = std::make_shared(); + arrow::ListBuilder list_builder(arrow::default_memory_pool(), value_builder); + for (int32_t i = 0; i < num_rows; ++i) { + EXPECT_TRUE(list_builder.Append().ok()); + EXPECT_TRUE(value_builder->Append(i * 10).ok()); + EXPECT_TRUE(value_builder->Append(i * 10 + 1).ok()); + } + auto list_array = list_builder.Finish().ValueOrDie(); + + auto field_id = arrow::field("id", arrow::int32()); + auto field_tags = arrow::field("tags", arrow::list(arrow::field("item", arrow::int32()))); + return arrow::StructArray::Make({id_array, list_array}, {field_id, field_tags}).ValueOrDie(); +} + +/// Helper: build a StructArray with an int32 "id" column and a map "props" column. +/// id[i] = i, props[i] = {"k_i": i * 100}, for i in [0, N). +/// +/// Arrow schema: { id: int32, props: map } +/// Parquet leaf columns: [id (index 0), props.key (index 1), props.value (index 2)] +static std::shared_ptr MakeMapColumnData(int32_t num_rows) { + arrow::Int32Builder id_builder; + EXPECT_TRUE(id_builder.Reserve(num_rows).ok()); + for (int32_t i = 0; i < num_rows; ++i) { + id_builder.UnsafeAppend(i); + } + auto id_array = id_builder.Finish().ValueOrDie(); + + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + arrow::MapBuilder map_builder(arrow::default_memory_pool(), key_builder, value_builder); + for (int32_t i = 0; i < num_rows; ++i) { + EXPECT_TRUE(map_builder.Append().ok()); + std::string key = "k_" + std::to_string(i); + EXPECT_TRUE(key_builder->Append(key).ok()); + EXPECT_TRUE(value_builder->Append(i * 100).ok()); + } + auto map_array = map_builder.Finish().ValueOrDie(); + + auto field_id = arrow::field("id", arrow::int32()); + auto field_props = arrow::field("props", arrow::map(arrow::utf8(), arrow::int32())); + return arrow::StructArray::Make({id_array, map_array}, {field_id, field_props}).ValueOrDie(); +} + +/// Test: page-level filtering on a file with a list column. +/// +/// Schema: { id: int32, tags: list } +/// 100 rows, 10 per page, 1 row group. +/// Predicate: id >= 50 → pages 0-4 skipped, pages 5-9 read → 50 rows expected. +TEST_F(PageFilteredRowGroupReaderTest, NestedListColumnPageFilter) { + std::string file_name = dir_->Str() + "/nested_list_filter.parquet"; + auto data = MakeListColumnData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + auto read_schema = + arrow::schema({arrow::field("id", arrow::int32()), + arrow::field("tags", arrow::list(arrow::field("item", arrow::int32())))}); + + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + + std::shared_ptr result; + ReadWithPredicateImpl(file_name, read_schema, predicate, &result); + + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + // Build expected result: rows 50-99 from the original data + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + +/// Test: page-level filtering on a file with a map column. +/// +/// Schema: { id: int32, props: map } +/// 100 rows, 10 per page, 1 row group. +/// Predicate: id >= 50 → pages 0-4 skipped, pages 5-9 read → 50 rows expected. +TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { + std::string file_name = dir_->Str() + "/nested_map_filter.parquet"; + auto data = MakeMapColumnData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + auto read_schema = + arrow::schema({arrow::field("id", arrow::int32()), + arrow::field("props", arrow::map(arrow::utf8(), arrow::int32()))}); + + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + + std::shared_ptr result; + ReadWithPredicateImpl(file_name, read_schema, predicate, &result); + + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + // Build expected result: rows 50-99 from the original data + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + +/// Test: page-level filtering with multiple adjacent nested columns (struct + list). +/// +/// Schema: { id: int32, info: struct, tags: list } +/// This tests the boundary handling when two nested fields are adjacent in the schema. +/// Predicate: id >= 50. +TEST_F(PageFilteredRowGroupReaderTest, MultipleAdjacentNestedColumns) { + std::string file_name = dir_->Str() + "/multi_nested.parquet"; + + // Build data with id, info (struct), tags (list) + arrow::Int32Builder id_builder, x_builder, y_builder; + ASSERT_TRUE(id_builder.Reserve(100).ok()); + ASSERT_TRUE(x_builder.Reserve(100).ok()); + ASSERT_TRUE(y_builder.Reserve(100).ok()); + auto value_builder = std::make_shared(); + arrow::ListBuilder list_builder(arrow::default_memory_pool(), value_builder); + + for (int32_t i = 0; i < 100; ++i) { + id_builder.UnsafeAppend(i); + x_builder.UnsafeAppend(i * 100); + y_builder.UnsafeAppend(i * 100 + 1); + ASSERT_TRUE(list_builder.Append().ok()); + ASSERT_TRUE(value_builder->Append(i * 10).ok()); + } + auto id_array = id_builder.Finish().ValueOrDie(); + auto x_array = x_builder.Finish().ValueOrDie(); + auto y_array = y_builder.Finish().ValueOrDie(); + auto list_array = list_builder.Finish().ValueOrDie(); + + auto field_x = arrow::field("x", arrow::int32()); + auto field_y = arrow::field("y", arrow::int32()); + auto inner_struct = + arrow::StructArray::Make({x_array, y_array}, {field_x, field_y}).ValueOrDie(); + + auto field_id = arrow::field("id", arrow::int32()); + auto field_info = arrow::field("info", arrow::struct_({field_x, field_y})); + auto field_tags = arrow::field("tags", arrow::list(arrow::field("item", arrow::int32()))); + auto data = arrow::StructArray::Make({id_array, inner_struct, list_array}, + {field_id, field_info, field_tags}) + .ValueOrDie(); + + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + auto read_schema = arrow::schema({field_id, field_info, field_tags}); + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + + std::shared_ptr result; + ReadWithPredicateImpl(file_name, read_schema, predicate, &result); + + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + // Build expected result: rows 50-99 from the original data + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} } // namespace paimon::parquet::test From 30ea1d84998cab8443c456f5282b8b924274370f Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 10:14:52 +0800 Subject: [PATCH 02/38] walkaround: fallback nested field to RowGroup reading --- .../parquet/page_filtered_row_group_reader_test.cpp | 10 +++++----- .../format/parquet/parquet_file_batch_reader.cpp | 6 +++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 795ae142b..6cd558252 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -917,7 +917,7 @@ static std::shared_ptr MakeNestedStructData(int32_t num_rows TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnPageFilter) { std::string file_name = dir_->Str() + "/nested_struct_filter.parquet"; auto data = MakeNestedStructData(100); - WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); auto field_x = arrow::field("x", arrow::int32()); auto field_y = arrow::field("y", arrow::int32()); @@ -950,7 +950,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnPageFilter) { TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnOnlyReadNestedField) { std::string file_name = dir_->Str() + "/nested_struct_only_nested.parquet"; auto data = MakeNestedStructData(100); - WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); auto field_x = arrow::field("x", arrow::int32()); auto field_y = arrow::field("y", arrow::int32()); @@ -1043,7 +1043,7 @@ static std::shared_ptr MakeMapColumnData(int32_t num_rows) { TEST_F(PageFilteredRowGroupReaderTest, NestedListColumnPageFilter) { std::string file_name = dir_->Str() + "/nested_list_filter.parquet"; auto data = MakeListColumnData(100); - WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); auto read_schema = arrow::schema({arrow::field("id", arrow::int32()), @@ -1071,7 +1071,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedListColumnPageFilter) { TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { std::string file_name = dir_->Str() + "/nested_map_filter.parquet"; auto data = MakeMapColumnData(100); - WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); auto read_schema = arrow::schema({arrow::field("id", arrow::int32()), @@ -1131,7 +1131,7 @@ TEST_F(PageFilteredRowGroupReaderTest, MultipleAdjacentNestedColumns) { {field_id, field_info, field_tags}) .ValueOrDie(); - WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); auto read_schema = arrow::schema({field_id, field_info, field_tags}); auto predicate = PredicateBuilder::GreaterOrEqual( diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 7533cb99a..2d11fb658 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -39,6 +39,7 @@ #include "paimon/common/metrics/metrics_impl.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/options_utils.h" +#include "paimon/core/schema/arrow_schema_validator.h" #include "paimon/format/parquet/parquet_field_id_converter.h" #include "paimon/format/parquet/parquet_format_defs.h" #include "paimon/format/parquet/parquet_timestamp_converter.h" @@ -129,8 +130,11 @@ Status ParquetFileBatchReader::SetReadSchema( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr file_schema, reader_->GetSchema()); std::unordered_map> field_index_map; + bool has_nested_field = false; int32_t i = 0; for (const auto& field : file_schema->fields()) { + has_nested_field = + has_nested_field || ArrowSchemaValidator::IsNestedType(field->type()); std::vector v; FlattenSchema(field->type(), &i, &v); field_index_map[field->name()] = v; @@ -166,7 +170,7 @@ Status ParquetFileBatchReader::SetReadSchema( bool enable_page_index_filter, OptionsUtils::GetValueFromMap(options_, PARQUET_READ_ENABLE_PAGE_INDEX_FILTER, DEFAULT_PARQUET_READ_ENABLE_PAGE_INDEX_FILTER)); - if (enable_page_index_filter) { + if (enable_page_index_filter && !has_nested_field) { // Build column name to index map for page-level filtering. // For leaf columns, indices[0] is the correct leaf column index in Parquet. // For nested types (struct/list/map), FlattenSchema produces multiple leaf indices, From bed9fdf5b698b8320af04edcdc7ef45794472f48 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 10:20:02 +0800 Subject: [PATCH 03/38] style: add comments --- src/paimon/format/parquet/parquet_file_batch_reader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 2d11fb658..588a45f7c 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -170,6 +170,8 @@ Status ParquetFileBatchReader::SetReadSchema( bool enable_page_index_filter, OptionsUtils::GetValueFromMap(options_, PARQUET_READ_ENABLE_PAGE_INDEX_FILTER, DEFAULT_PARQUET_READ_ENABLE_PAGE_INDEX_FILTER)); + // walkaround: page index filter does not support nested fields for now, skip page index + // filter if there is any nested field in the schema if (enable_page_index_filter && !has_nested_field) { // Build column name to index map for page-level filtering. // For leaf columns, indices[0] is the correct leaf column index in Parquet. From dfe419d6476e4d95d61799a350b520f4b44aef3d Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 11:03:03 +0800 Subject: [PATCH 04/38] fix: judge has_nested_field on read-schema --- src/paimon/format/parquet/parquet_file_batch_reader.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 588a45f7c..701e420db 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -131,10 +131,12 @@ Status ParquetFileBatchReader::SetReadSchema( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr file_schema, reader_->GetSchema()); std::unordered_map> field_index_map; bool has_nested_field = false; - int32_t i = 0; - for (const auto& field : file_schema->fields()) { + for (const auto& field : read_schema->fields()) { has_nested_field = has_nested_field || ArrowSchemaValidator::IsNestedType(field->type()); + } + int32_t i = 0; + for (const auto& field : file_schema->fields()) { std::vector v; FlattenSchema(field->type(), &i, &v); field_index_map[field->name()] = v; From 2ceaf600bcd48dd1dd54224cfa548f3623717a91 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 14:29:45 +0800 Subject: [PATCH 05/38] fix: make tests clearer --- .../page_filtered_row_group_reader_test.cpp | 48 +++++++++---------- .../parquet/parquet_file_batch_reader.cpp | 6 ++- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 6cd558252..221ed7569 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -903,7 +903,7 @@ static std::shared_ptr MakeNestedStructData(int32_t num_rows return arrow::StructArray::Make({id_array, inner_struct}, {field_id, field_info}).ValueOrDie(); } -/// Test: page-level filtering on a file with nested struct columns. +/// Test: rowgroup-level filtering on a file with nested struct columns. /// /// This test exposes the bug where BuildPageFilteredSchema fails to correctly map /// Parquet leaf column indices to Arrow fields for nested types, and @@ -912,7 +912,7 @@ static std::shared_ptr MakeNestedStructData(int32_t num_rows /// Schema: { id: int32, info: struct } /// Parquet leaf columns: [id=0, info.x=1, info.y=2] /// 100 rows, 10 per page, 1 row group. -/// Predicate: id >= 50 → pages 0-4 skipped, pages 5-9 read → 50 rows expected. +/// Predicate: id >= 70 → row groups 0 skipped, row groups 1 read → 50 rows expected. /// The read schema requests both "id" and "info" columns. TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnPageFilter) { std::string file_name = dir_->Str() + "/nested_struct_filter.parquet"; @@ -925,7 +925,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnPageFilter) { arrow::field("info", arrow::struct_({field_x, field_y}))}); auto predicate = PredicateBuilder::GreaterOrEqual( - /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(70)); std::shared_ptr result; ReadWithPredicateImpl(file_name, read_schema, predicate, &result); @@ -939,27 +939,29 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnPageFilter) { ASSERT_TRUE(expected->Equals(result->chunk(0))); } -/// Test: page-level filtering reading only the nested struct column (without the predicate column). +/// Test: rowgroup-level filtering reading the nested struct column along with the predicate column. /// -/// This verifies that when reading a subset of columns that includes only a nested column, -/// the schema mapping and column assembly work correctly. +/// This verifies that when reading a subset of columns that includes a nested column +/// and the predicate column, the schema mapping and column assembly work correctly. /// /// Schema: { id: int32, info: struct } -/// Read schema: { info: struct } (only the nested column) +/// Read schema: { id: int32, info: struct } /// Predicate on "id": id >= 50. TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnOnlyReadNestedField) { std::string file_name = dir_->Str() + "/nested_struct_only_nested.parquet"; auto data = MakeNestedStructData(100); WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); + auto field_id = arrow::field("id", arrow::int32()); auto field_x = arrow::field("x", arrow::int32()); auto field_y = arrow::field("y", arrow::int32()); - // Read only the nested "info" column - auto read_schema = arrow::schema({arrow::field("info", arrow::struct_({field_x, field_y}))}); + auto field_info = arrow::field("info", arrow::struct_({field_x, field_y})); + // Read both "id" (needed for predicate evaluation) and "info" columns + auto read_schema = arrow::schema({field_id, field_info}); - // Predicate is on "id" (field_index=0 in file schema, not in read schema) + // Predicate is on "id" (field_index=0 in file schema) auto predicate = PredicateBuilder::GreaterOrEqual( - /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(70)); std::shared_ptr result; ReadWithPredicateImpl(file_name, read_schema, predicate, &result); @@ -968,13 +970,11 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedStructColumnOnlyReadNestedField) { ASSERT_TRUE(result); ASSERT_EQ(50, result->length()); - // Build expected: only the "info" field from rows 50-99 + // Build expected: "id" and "info" fields from rows 50-99 auto sliced = std::dynamic_pointer_cast(data->Slice(50, 50)); ASSERT_TRUE(sliced); - // Extract only the "info" column (field index 1) and wrap as struct with single field auto expected = - arrow::StructArray::Make({sliced->field(1)}, - {arrow::field("info", arrow::struct_({field_x, field_y}))}) + arrow::StructArray::Make({sliced->field(0), sliced->field(1)}, {field_id, field_info}) .ValueOrDie(); ASSERT_TRUE(expected->Equals(result->chunk(0))); } @@ -1035,11 +1035,11 @@ static std::shared_ptr MakeMapColumnData(int32_t num_rows) { return arrow::StructArray::Make({id_array, map_array}, {field_id, field_props}).ValueOrDie(); } -/// Test: page-level filtering on a file with a list column. +/// Test: rowgroup-level filtering on a file with a list column. /// /// Schema: { id: int32, tags: list } /// 100 rows, 10 per page, 1 row group. -/// Predicate: id >= 50 → pages 0-4 skipped, pages 5-9 read → 50 rows expected. +/// Predicate: id >= 50 → row groups 0 skipped, row groups 1 read → 50 rows expected. TEST_F(PageFilteredRowGroupReaderTest, NestedListColumnPageFilter) { std::string file_name = dir_->Str() + "/nested_list_filter.parquet"; auto data = MakeListColumnData(100); @@ -1050,7 +1050,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedListColumnPageFilter) { arrow::field("tags", arrow::list(arrow::field("item", arrow::int32())))}); auto predicate = PredicateBuilder::GreaterOrEqual( - /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(70)); std::shared_ptr result; ReadWithPredicateImpl(file_name, read_schema, predicate, &result); @@ -1063,11 +1063,11 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedListColumnPageFilter) { ASSERT_TRUE(expected->Equals(result->chunk(0))); } -/// Test: page-level filtering on a file with a map column. +/// Test: rowgroup filtering on a file with a map column. /// /// Schema: { id: int32, props: map } /// 100 rows, 10 per page, 1 row group. -/// Predicate: id >= 50 → pages 0-4 skipped, pages 5-9 read → 50 rows expected. +/// Predicate: id >= 70 → row groups 0 skipped, row groups 1 read → 50 rows expected. TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { std::string file_name = dir_->Str() + "/nested_map_filter.parquet"; auto data = MakeMapColumnData(100); @@ -1078,7 +1078,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { arrow::field("props", arrow::map(arrow::utf8(), arrow::int32()))}); auto predicate = PredicateBuilder::GreaterOrEqual( - /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(70)); std::shared_ptr result; ReadWithPredicateImpl(file_name, read_schema, predicate, &result); @@ -1091,11 +1091,11 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { ASSERT_TRUE(expected->Equals(result->chunk(0))); } -/// Test: page-level filtering with multiple adjacent nested columns (struct + list). +/// Test: rowgroup-level filtering with multiple adjacent nested columns (struct + list). /// /// Schema: { id: int32, info: struct, tags: list } /// This tests the boundary handling when two nested fields are adjacent in the schema. -/// Predicate: id >= 50. +/// Predicate: id >= 70 → row groups 0 skipped, row groups 1 read → 50 rows expected. TEST_F(PageFilteredRowGroupReaderTest, MultipleAdjacentNestedColumns) { std::string file_name = dir_->Str() + "/multi_nested.parquet"; @@ -1135,7 +1135,7 @@ TEST_F(PageFilteredRowGroupReaderTest, MultipleAdjacentNestedColumns) { auto read_schema = arrow::schema({field_id, field_info, field_tags}); auto predicate = PredicateBuilder::GreaterOrEqual( - /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(50)); + /*field_index=*/0, /*field_name=*/"id", FieldType::INT, Literal(70)); std::shared_ptr result; ReadWithPredicateImpl(file_name, read_schema, predicate, &result); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 701e420db..121f65d9b 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -132,8 +132,10 @@ Status ParquetFileBatchReader::SetReadSchema( std::unordered_map> field_index_map; bool has_nested_field = false; for (const auto& field : read_schema->fields()) { - has_nested_field = - has_nested_field || ArrowSchemaValidator::IsNestedType(field->type()); + if (ArrowSchemaValidator::IsNestedType(field->type())) { + has_nested_field = true; + break; + } } int32_t i = 0; for (const auto& field : file_schema->fields()) { From cb1a632df8bdb46b2899cf545dab259c0cc65b95 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 11:25:22 +0800 Subject: [PATCH 06/38] feat: support page-level bitmap pushdown --- .../format/parquet/file_reader_wrapper.cpp | 44 ++-- .../format/parquet/file_reader_wrapper.h | 1 + .../page_filtered_row_group_reader.cpp | 8 +- .../parquet/page_filtered_row_group_reader.h | 1 + .../page_filtered_row_group_reader_test.cpp | 220 ++++++++++++++++++ .../parquet/parquet_file_batch_reader.cpp | 170 +++++++++----- .../parquet/parquet_file_batch_reader.h | 22 +- src/paimon/format/parquet/row_ranges.h | 17 -- src/paimon/format/parquet/target_row_group.h | 89 +++++++ 9 files changed, 465 insertions(+), 107 deletions(-) create mode 100644 src/paimon/format/parquet/target_row_group.h diff --git a/src/paimon/format/parquet/file_reader_wrapper.cpp b/src/paimon/format/parquet/file_reader_wrapper.cpp index e7d6bf606..090da9122 100644 --- a/src/paimon/format/parquet/file_reader_wrapper.cpp +++ b/src/paimon/format/parquet/file_reader_wrapper.cpp @@ -164,14 +164,15 @@ void FileReaderWrapper::AdvanceToNextRowGroup() { current_row_group_idx_++; // Skip row groups excluded by read range. while (current_row_group_idx_ < target_row_groups_.size() && - target_row_groups_[current_row_group_idx_].excluded_by_read_range) { + target_row_groups_[current_row_group_idx_].IsExcludedByReadRange()) { current_row_group_idx_++; } if (current_row_group_idx_ >= target_row_groups_.size()) { next_row_to_read_ = num_rows_; } else { next_row_to_read_ = - all_row_group_ranges_[target_row_groups_[current_row_group_idx_].row_group_index].first; + all_row_group_ranges_[target_row_groups_[current_row_group_idx_].GetRowGroupIndex()] + .first; } } @@ -181,10 +182,10 @@ Status FileReaderWrapper::SeekToRow(uint64_t row_number) { filtered_global_offset_ = 0; for (uint64_t i = 0; i < target_row_groups_.size(); i++) { - if (target_row_groups_[i].excluded_by_read_range) { + if (target_row_groups_[i].IsExcludedByReadRange()) { continue; } - int32_t rg_id = target_row_groups_[i].row_group_index; + int32_t rg_id = target_row_groups_[i].GetRowGroupIndex(); uint64_t rg_start = all_row_group_ranges_[rg_id].first; uint64_t rg_end = all_row_group_ranges_[rg_id].second; if (row_number > rg_start && row_number < rg_end) { @@ -200,9 +201,9 @@ Status FileReaderWrapper::SeekToRow(uint64_t row_number) { // Rebuild batch_reader_ for non-page-filtered RGs at/after seek position. std::vector fully_matched_indices; for (uint64_t j = i; j < target_row_groups_.size(); j++) { - if (!target_row_groups_[j].excluded_by_read_range && - !target_row_groups_[j].is_partially_matched) { - fully_matched_indices.push_back(target_row_groups_[j].row_group_index); + if (!target_row_groups_[j].IsExcludedByReadRange() && + !target_row_groups_[j].IsPartiallyMatched()) { + fully_matched_indices.push_back(target_row_groups_[j].GetRowGroupIndex()); } } if (!fully_matched_indices.empty()) { @@ -222,7 +223,7 @@ Status FileReaderWrapper::SeekToRow(uint64_t row_number) { } Result> FileReaderWrapper::NextPageFiltered() { - int32_t rg_id = target_row_groups_[current_row_group_idx_].row_group_index; + int32_t rg_id = target_row_groups_[current_row_group_idx_].GetRowGroupIndex(); // Construct the per-RG streaming reader on demand. if (!current_page_filtered_reader_) { @@ -237,7 +238,7 @@ Result> FileReaderWrapper::NextPageFiltered( file_reader_->parquet_reader(), target_rg, target_column_indices_, page_filtered_read_schema_, file_reader_->properties().cache_options(), pre_buffered, page_ranges, max_chunksize, pool_)); - current_filtered_row_ranges_ = target_rg.row_ranges; + current_filtered_row_ranges_ = target_rg.GetRowRanges(); current_filtered_rg_start_ = all_row_group_ranges_[rg_id].first; filtered_global_offset_ = 0; } @@ -273,7 +274,7 @@ Result> FileReaderWrapper::NextFullyMatched( return std::shared_ptr(); } - int32_t rg_id = target_row_groups_[current_row_group_idx_].row_group_index; + int32_t rg_id = target_row_groups_[current_row_group_idx_].GetRowGroupIndex(); uint64_t rg_end = all_row_group_ranges_[rg_id].second; int64_t num_rows = record_batch->num_rows(); @@ -298,7 +299,7 @@ Result> FileReaderWrapper::Next() { while (current_row_group_idx_ < target_row_groups_.size()) { bool is_partially_matched = - target_row_groups_[current_row_group_idx_].is_partially_matched; + target_row_groups_[current_row_group_idx_].IsPartiallyMatched(); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr batch, is_partially_matched ? NextPageFiltered() : NextFullyMatched()); if (batch) { @@ -368,9 +369,9 @@ std::vector<::arrow::io::ReadRange> FileReaderWrapper::CollectPreBufferRanges( auto file_metadata = file_reader_->parquet_reader()->metadata(); for (const auto& trg : target_row_groups_) { - if (trg.excluded_by_read_range) continue; + if (trg.IsExcludedByReadRange()) continue; - if (trg.is_partially_matched) { + if (trg.IsPartiallyMatched()) { // Page-filtered RGs: only matching page byte ranges. auto page_ranges = PageFilteredRowGroupReader::ComputePageRanges( file_reader_->parquet_reader(), trg, column_indices); @@ -378,7 +379,7 @@ std::vector<::arrow::io::ReadRange> FileReaderWrapper::CollectPreBufferRanges( std::make_move_iterator(page_ranges.end())); } else { // Fully-matched RGs: entire column chunk ranges. - auto rg_metadata = file_metadata->RowGroup(trg.row_group_index); + auto rg_metadata = file_metadata->RowGroup(trg.GetRowGroupIndex()); for (int32_t col_idx : column_indices) { auto col_chunk = rg_metadata->ColumnChunk(col_idx); int64_t offset = col_chunk->data_page_offset(); @@ -416,12 +417,12 @@ Status FileReaderWrapper::PrepareForReading(const std::vector& t std::vector fully_matched_row_groups; uint64_t active_count = 0; for (const auto& trg : target_row_groups_) { - if (trg.excluded_by_read_range) { + if (trg.IsExcludedByReadRange()) { continue; } active_count++; - if (!trg.is_partially_matched) { - fully_matched_row_groups.push_back(trg.row_group_index); + if (!trg.IsPartiallyMatched()) { + fully_matched_row_groups.push_back(trg.GetRowGroupIndex()); } } @@ -455,14 +456,15 @@ Status FileReaderWrapper::PrepareForReading(const std::vector& t // Reset read state. Find the first non-excluded row group. uint64_t first_active_idx = 0; while (first_active_idx < target_row_groups_.size() && - target_row_groups_[first_active_idx].excluded_by_read_range) { + target_row_groups_[first_active_idx].IsExcludedByReadRange()) { first_active_idx++; } if (first_active_idx >= target_row_groups_.size()) { next_row_to_read_ = num_rows_; } else { next_row_to_read_ = - all_row_group_ranges_[target_row_groups_[first_active_idx].row_group_index].first; + all_row_group_ranges_[target_row_groups_[first_active_idx].GetRowGroupIndex()] + .first; } previous_first_row_ = std::numeric_limits::max(); current_row_group_idx_ = first_active_idx; @@ -476,7 +478,7 @@ Status FileReaderWrapper::ApplyReadRanges( const std::vector>& read_ranges) { if (read_ranges.empty()) { for (auto& trg : target_row_groups_) { - trg.excluded_by_read_range = true; + trg.SetExcludedByReadRange(true); } reader_initialized_ = false; return Status::OK(); @@ -492,7 +494,7 @@ Status FileReaderWrapper::ApplyReadRanges( } // Mark each target row group as excluded or not based on the matching set. for (auto& trg : target_row_groups_) { - trg.excluded_by_read_range = matching_rg_indices.count(trg.row_group_index) == 0; + trg.SetExcludedByReadRange(matching_rg_indices.count(trg.GetRowGroupIndex()) == 0); } reader_initialized_ = false; return Status::OK(); diff --git a/src/paimon/format/parquet/file_reader_wrapper.h b/src/paimon/format/parquet/file_reader_wrapper.h index 748d4052f..29ecb2bb8 100644 --- a/src/paimon/format/parquet/file_reader_wrapper.h +++ b/src/paimon/format/parquet/file_reader_wrapper.h @@ -33,6 +33,7 @@ #include "arrow/type_fwd.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/format/parquet/row_ranges.h" +#include "paimon/format/parquet/target_row_group.h" #include "paimon/result.h" #include "paimon/status.h" #include "parquet/arrow/reader.h" diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader.cpp index 9c87438b8..20c5efb97 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader.cpp @@ -234,8 +234,8 @@ Result> PageFilteredRowGroupReader::Re const ::arrow::io::CacheOptions& cache_options, bool pre_buffered, const std::vector<::arrow::io::ReadRange>& page_ranges, int64_t max_chunksize, std::shared_ptr<::arrow::MemoryPool> pool) { - const auto& row_ranges = target_row_group.row_ranges; - int32_t row_group_index = target_row_group.row_group_index; + const auto& row_ranges = target_row_group.GetRowRanges(); + int32_t row_group_index = target_row_group.GetRowGroupIndex(); if (row_ranges.IsEmpty()) { PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr empty_table, @@ -289,8 +289,8 @@ Result> PageFilteredRowGroupReader::Re std::vector<::arrow::io::ReadRange> PageFilteredRowGroupReader::ComputePageRanges( ::parquet::ParquetFileReader* parquet_reader, const TargetRowGroup& target_row_group, const std::vector& column_indices) { - int32_t row_group_index = target_row_group.row_group_index; - const auto& row_ranges = target_row_group.row_ranges; + int32_t row_group_index = target_row_group.GetRowGroupIndex(); + const auto& row_ranges = target_row_group.GetRowRanges(); std::vector<::arrow::io::ReadRange> ranges; auto file_metadata = parquet_reader->metadata(); diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader.h b/src/paimon/format/parquet/page_filtered_row_group_reader.h index 5092bb5ca..c7376512f 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader.h +++ b/src/paimon/format/parquet/page_filtered_row_group_reader.h @@ -27,6 +27,7 @@ #include "arrow/record_batch.h" #include "arrow/type.h" #include "paimon/format/parquet/row_ranges.h" +#include "paimon/format/parquet/target_row_group.h" #include "paimon/result.h" #include "parquet/column_reader.h" #include "parquet/file_reader.h" diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 221ed7569..8f058a096 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -44,6 +44,7 @@ #include "paimon/status.h" #include "paimon/testing/utils/read_result_collector.h" #include "paimon/testing/utils/testharness.h" +#include "paimon/utils/roaring_bitmap32.h" #include "parquet/arrow/reader.h" #include "parquet/file_reader.h" #include "parquet/properties.h" @@ -129,6 +130,29 @@ class PageFilteredRowGroupReaderTest : public ::testing::Test { paimon::test::ReadResultCollector::CollectResult(batch_reader.get())); } + /// Read back a Parquet file with a predicate, a bitmap, and page index filter enabled. + void ReadWithPredicateAndBitmapImpl(const std::string& file_name, + const std::shared_ptr& read_schema, + const std::shared_ptr& predicate, + const RoaringBitmap32& bitmap, + std::shared_ptr* out, + int32_t batch_size = 1024) { + ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file_name)); + ASSERT_OK_AND_ASSIGN(int64_t length, in->Length()); + auto in_stream = std::make_shared(in, arrow_pool_, length); + + std::map options; + options[PARQUET_READ_ENABLE_PAGE_INDEX_FILTER] = "true"; + ASSERT_OK_AND_ASSIGN( + auto batch_reader, + ParquetFileBatchReader::Create(std::move(in_stream), arrow_pool_, options, batch_size)); + auto c_schema = std::make_unique(); + ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); + ASSERT_OK(batch_reader->SetReadSchema(c_schema.get(), predicate, bitmap)); + ASSERT_OK_AND_ASSIGN(*out, + paimon::test::ReadResultCollector::CollectResult(batch_reader.get())); + } + protected: std::shared_ptr arrow_pool_; std::shared_ptr pool_; @@ -1146,6 +1170,202 @@ TEST_F(PageFilteredRowGroupReaderTest, MultipleAdjacentNestedColumns) { // Build expected result: rows 50-99 from the original data auto expected = data->Slice(50, 50); ASSERT_TRUE(expected->Equals(result->chunk(0))); +/// Test: bitmap hits all pages of a subset of row groups (no predicate). +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// RG0: rows 0-99, RG1: rows 100-199. +/// Bitmap: {0..99} hits all pages of RG0, RG1 is excluded entirely. +/// Expected: 100 rows (0-99). +TEST_F(PageFilteredRowGroupReaderTest, BitmapAllPagesSomeRowGroups) { + std::string file_name = dir_->Str() + "/bitmap_all_pages_rg.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(0, 100); // hits all of RG0 + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, /*predicate=*/nullptr, bitmap, &result); + ASSERT_TRUE(result); + ASSERT_EQ(100, result->length()); + + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + ASSERT_TRUE(struct_arr); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 100; ++i) { + ASSERT_EQ(i, val_arr->Value(i)); + } +} + +/// Test: bitmap hits partial pages of a row group (no predicate). +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// Bitmap: {30..59} hits pages 3-5 of RG0 (rows 30-59), RG1 excluded. +/// Expected: 30 rows (30-59). +TEST_F(PageFilteredRowGroupReaderTest, BitmapPartialPagesSingleRowGroup) { + std::string file_name = dir_->Str() + "/bitmap_partial_pages_rg.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(90, 110); // hits pages 3-5 of RG0 + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, /*predicate=*/nullptr, bitmap, &result); + ASSERT_TRUE(result); + ASSERT_EQ(20, result->length()); + + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + ASSERT_TRUE(struct_arr); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 20; ++i) { + ASSERT_EQ(90 + i, val_arr->Value(i)); + } +} + +/// Test: bitmap hits all pages of some row groups and partial pages of others. +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// Bitmap: {0..99} hits all of RG0 + {120..149} hits pages 2-4 of RG1. +/// Expected: 100 (RG0) + 30 (RG1 partial) = 130 rows. +TEST_F(PageFilteredRowGroupReaderTest, BitmapAllAndPartialPagesMixed) { + std::string file_name = dir_->Str() + "/bitmap_all_and_partial.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(0, 100); // all of RG0 + bitmap.AddRange(120, 150); // pages 2-4 of RG1 + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, /*predicate=*/nullptr, bitmap, &result); + ASSERT_TRUE(result); + ASSERT_EQ(130, result->length()); + + // Verify: rows 0-99 + 120-149 + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + ASSERT_TRUE(struct_arr); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 100; ++i) { + ASSERT_EQ(i, val_arr->Value(i)); + } + for (int32_t i = 0; i < 30; ++i) { + ASSERT_EQ(120 + i, val_arr->Value(100 + i)); + } +} + +/// Test: bitmap + predicate both applied, bitmap hits all pages of some row groups. +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// Bitmap: {0..99} hits all of RG0. +/// Predicate: val >= 50. Page-level filtering on RG0: pages 5-9. +/// Expected: 50 rows (50-99). +TEST_F(PageFilteredRowGroupReaderTest, BitmapAllPagesWithPredicate) { + std::string file_name = dir_->Str() + "/bitmap_all_predicate.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(0, 100); // hits all of RG0 + + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"val", FieldType::INT, Literal(50)); + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, predicate, bitmap, &result); + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + ASSERT_TRUE(struct_arr); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 50; ++i) { + ASSERT_EQ(50 + i, val_arr->Value(i)); + } +} + +/// Test: bitmap + predicate both applied, bitmap hits partial pages of a row group. +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// Bitmap: {30..59} hits pages 3-5 of RG0 (rows 30-59). +/// Predicate: val >= 40. Page-level filtering further narrows to pages 4-5 (rows 40-59). +/// Expected: 20 rows (40-59). +TEST_F(PageFilteredRowGroupReaderTest, BitmapPartialPagesWithPredicate) { + std::string file_name = dir_->Str() + "/bitmap_partial_predicate.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(30, 60); // hits pages 3-5 of RG0 + + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/0, /*field_name=*/"val", FieldType::INT, Literal(40)); + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, predicate, bitmap, &result); + ASSERT_TRUE(result); + ASSERT_EQ(20, result->length()); + + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 20; ++i) { + ASSERT_EQ(40 + i, val_arr->Value(i)); + } +} + +/// Test: bitmap + predicate both applied, bitmap hits all pages of some RG and +/// partial pages of another. +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// Bitmap: {0..99} (all of RG0) + {120..149} (pages 2-4 of RG1). +/// Predicate: val >= 50 AND val < 160. +/// RG0: all pages → page-filtered to val>=50 → rows 50-99 (50 rows) +/// RG1: pages 2-4 (120-149) → page-filtered to val>=50 AND val<160 → all match (30 rows) +/// Expected: 80 rows (50-99 + 120-149). +TEST_F(PageFilteredRowGroupReaderTest, BitmapMixedWithPredicate) { + std::string file_name = dir_->Str() + "/bitmap_mixed_predicate.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(0, 100); // all of RG0 + bitmap.AddRange(120, 150); // pages 2-4 of RG1 + + ASSERT_OK_AND_ASSIGN( + auto predicate, + PredicateBuilder::And( + {PredicateBuilder::GreaterOrEqual(/*field_index=*/0, /*field_name=*/"val", + FieldType::INT, Literal(50)), + PredicateBuilder::LessThan(/*field_index=*/0, /*field_name=*/"val", FieldType::INT, + Literal(160))})); + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, predicate, bitmap, &result); + ASSERT_TRUE(result); + ASSERT_EQ(80, result->length()); + + // Verify: rows 50-99 + 120-149 + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + ASSERT_TRUE(struct_arr); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 50; ++i) { + ASSERT_EQ(50 + i, val_arr->Value(i)); + } + for (int32_t i = 0; i < 30; ++i) { + ASSERT_EQ(120 + i, val_arr->Value(50 + i)); + } } } // namespace paimon::parquet::test diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 121f65d9b..809c037c4 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -42,6 +42,7 @@ #include "paimon/core/schema/arrow_schema_validator.h" #include "paimon/format/parquet/parquet_field_id_converter.h" #include "paimon/format/parquet/parquet_format_defs.h" +#include "paimon/format/parquet/parquet_schema_util.h" #include "paimon/format/parquet/parquet_timestamp_converter.h" #include "paimon/format/parquet/predicate_converter.h" #include "paimon/reader/batch_reader.h" @@ -155,21 +156,23 @@ Status ParquetFileBatchReader::SetReadSchema( } } - std::vector row_groups = arrow::internal::Iota(reader_->GetNumberOfRowGroups()); + TargetRowGroups target_row_groups = + TargetRowGroup::MakeSerialRowGroups(reader_->GetNumberOfRowGroups()); if (predicate) { - PAIMON_ASSIGN_OR_RAISE(row_groups, - FilterRowGroupsByPredicate(predicate, file_schema, row_groups)); + PAIMON_ASSIGN_OR_RAISE( + target_row_groups, + FilterRowGroupsByPredicate(predicate, file_schema, target_row_groups)); } if (selection_bitmap) { - PAIMON_ASSIGN_OR_RAISE(row_groups, - FilterRowGroupsByBitmap(selection_bitmap.value(), row_groups)); + PAIMON_ASSIGN_OR_RAISE( + target_row_groups, + FilterRowGroupsByBitmap(selection_bitmap.value(), target_row_groups)); } // Apply page-level filtering after bitmap pruning so we don't read page index // pages for row groups that the bitmap already excluded. // If no predicate is provided, skip page-level filtering, row_group_row_ranges will be // empty - std::map row_group_row_ranges; - if (predicate && !row_groups.empty()) { + if (predicate && !target_row_groups.empty()) { PAIMON_ASSIGN_OR_RAISE( bool enable_page_index_filter, OptionsUtils::GetValueFromMap(options_, PARQUET_READ_ENABLE_PAGE_INDEX_FILTER, @@ -188,13 +191,9 @@ Status ParquetFileBatchReader::SetReadSchema( column_name_to_index[name] = indices[0]; } } - - std::pair, std::map> page_filter_result; PAIMON_ASSIGN_OR_RAISE( - page_filter_result, - FilterRowGroupsByPageIndex(predicate, column_name_to_index, row_groups)); - row_groups = std::move(page_filter_result.first); - row_group_row_ranges = std::move(page_filter_result.second); + target_row_groups, + FilterRowGroupsByPageIndex(predicate, column_name_to_index, target_row_groups)); } } @@ -202,30 +201,17 @@ Status ParquetFileBatchReader::SetReadSchema( metrics_->SetCounter(ParquetMetrics::READ_ROW_GROUPS_TOTAL, reader_->GetNumberOfRowGroups()); - metrics_->SetCounter(ParquetMetrics::READ_ROW_GROUPS_AFTER_FILTER, row_groups.size()); - - // Build TargetRowGroup list with page-filter info in one shot. - std::vector target_row_groups; - for (int32_t rg_id : row_groups) { - auto it = row_group_row_ranges.find(rg_id); - if (it != row_group_row_ranges.end()) { - target_row_groups.emplace_back(/*rg_index=*/rg_id, /*is_partially_matched=*/true, - /*ranges=*/it->second); - } else { - target_row_groups.emplace_back(/*rg_index=*/rg_id, - /*is_partially_matched=*/false, - /*ranges=*/RowRanges()); - } - } + metrics_->SetCounter(ParquetMetrics::READ_ROW_GROUPS_AFTER_FILTER, + target_row_groups.size()); PAIMON_RETURN_NOT_OK(reader_->PrepareForReadingLazy(target_row_groups, column_indices)); } PAIMON_PARQUET_CATCH_AND_RETURN_STATUS("ParquetFileBatchReader::SetReadSchema") return Status::OK(); } -Result> ParquetFileBatchReader::FilterRowGroupsByPredicate( +Result ParquetFileBatchReader::FilterRowGroupsByPredicate( const std::shared_ptr& predicate, const std::shared_ptr file_schema, - const std::vector& src_row_groups) const { + const TargetRowGroups& src_row_groups) const { if (!predicate) { return Status::Invalid("cannot pushdown an empty predicate"); } @@ -248,58 +234,122 @@ Result> ParquetFileBatchReader::FilterRowGroupsByPredicate( std::shared_ptr file_fragment, parquet_file_format->MakeFragment( file_source, /*partition_expression=*/PredicateConverter::AlwaysTrue(), - /*physical_schema=*/nullptr, /*row_groups=*/src_row_groups)); + /*physical_schema=*/nullptr, + /*row_groups=*/TargetRowGroup::GetRowGroupIndices(src_row_groups))); PAIMON_RETURN_NOT_OK_FROM_ARROW( file_fragment->EnsureCompleteMetadata(reader_->GetFileReader())); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(arrow::dataset::FragmentVector target_fragments, file_fragment->SplitByRowGroup(bind_expr)); - std::vector target_row_groups; + TargetRowGroups target_row_groups; target_row_groups.reserve(src_row_groups.size()); for (const auto& fragment : target_fragments) { auto parquet_fragment = dynamic_cast(fragment.get()); if (!parquet_fragment) { return Status::Invalid("cannot cast to ParquetFileFragment in ParquetFileBatchReader"); } - target_row_groups.insert(target_row_groups.end(), parquet_fragment->row_groups().begin(), - parquet_fragment->row_groups().end()); + for (auto rg_index : parquet_fragment->row_groups()) { + target_row_groups.emplace_back(rg_index); + } } return target_row_groups; } -Result> ParquetFileBatchReader::FilterRowGroupsByBitmap( - const RoaringBitmap32& bitmap, const std::vector& src_row_groups) const { +Result ParquetFileBatchReader::FilterRowGroupsByBitmap( + const RoaringBitmap32& bitmap, const TargetRowGroups& src_row_groups) const { if (bitmap.IsEmpty()) { return Status::Invalid("cannot push down an empty bitmap to ParquetFileBatchReader"); } + + auto meta_data = reader_->GetFileReader()->parquet_reader()->metadata(); const auto& all_row_group_ranges = reader_->GetAllRowGroupRanges(); - // filter row groups by row range - std::vector target_row_groups; - for (const auto& row_group_idx : src_row_groups) { + + TargetRowGroups target_row_groups; + for (const auto& row_group : src_row_groups) { + int32_t row_group_idx = row_group.GetRowGroupIndex(); if (static_cast(row_group_idx) >= all_row_group_ranges.size()) { return Status::Invalid( fmt::format("src row group {} not in row group meta", row_group_idx)); } const auto& [start_row_idx, end_row_idx] = all_row_group_ranges[row_group_idx]; - if (bitmap.ContainsAny(start_row_idx, end_row_idx)) { - target_row_groups.push_back(row_group_idx); + if (!bitmap.ContainsAny(start_row_idx, end_row_idx)) { + continue; + } + + int64_t rg_row_count = meta_data->RowGroup(row_group_idx)->num_rows(); + auto page_ranges = FilterPagesByBitmap(bitmap, row_group_idx, start_row_idx, rg_row_count); + if (page_ranges.has_value()) { + target_row_groups.emplace_back(/*row_group_idx=*/row_group_idx, + /*is_partially_matched=*/true, + /*row_ranges=*/page_ranges.value()); + } else { + target_row_groups.emplace_back(row_group_idx); } } return target_row_groups; } +std::optional ParquetFileBatchReader::FilterPagesByBitmap(const RoaringBitmap32& bitmap, + int32_t row_group_idx, + uint64_t rg_start_row, + int64_t rg_row_count) const { + int32_t column_with_offset_index = FindColumnWithOffsetIndex(row_group_idx); + if (column_with_offset_index < 0) { + return std::nullopt; + } + + auto page_index_reader = reader_->GetPageIndexReader(); + if (!page_index_reader) { + return std::nullopt; + } + + auto rg_page_index_reader = page_index_reader->RowGroup(row_group_idx); + if (!rg_page_index_reader) { + return std::nullopt; + } + + auto offset_index = rg_page_index_reader->GetOffsetIndex(column_with_offset_index); + if (!offset_index) { + return std::nullopt; + } + + const auto& pages = offset_index->page_locations(); + auto num_pages = static_cast(pages.size()); + RowRanges filtered_row_ranges; + for (int64_t i = 0; i < num_pages; ++i) { + int64_t page_start_row = pages[i].first_row_index; + // The bitmap is [from, to) while page row range is [from, to] + int64_t page_end_row = + (i + 1 < num_pages) ? pages[i + 1].first_row_index - 1 : rg_row_count - 1; + if (bitmap.ContainsAny(rg_start_row + page_start_row, rg_start_row + page_end_row + 1)) { + filtered_row_ranges.Add(RowRanges::Range(page_start_row, page_end_row)); + } + } + return filtered_row_ranges; +} + +int32_t ParquetFileBatchReader::FindColumnWithOffsetIndex(int32_t row_group_idx) const { + auto rg_meta = reader_->GetFileReader()->parquet_reader()->metadata()->RowGroup(row_group_idx); + if (!rg_meta) { + return -1; + } + for (int col = 0; col < rg_meta->num_columns(); ++col) { + if (rg_meta->ColumnChunk(col)->GetOffsetIndexLocation().has_value()) { + return col; + } + } + return -1; +} + // Uses page-level column index statistics to filter row groups and store per-row-group // RowRanges for true page-level skipping. A row group is excluded if ALL its pages are // determined to not match the predicate. For partially matched row groups, RowRanges // are stored for page-level filtering during reading. -Result, std::map>> -ParquetFileBatchReader::FilterRowGroupsByPageIndex( +Result ParquetFileBatchReader::FilterRowGroupsByPageIndex( const std::shared_ptr& predicate, const std::map& column_name_to_index, - const std::vector& src_row_groups) { - std::map rg_row_ranges; - + const TargetRowGroups& src_row_groups) const { if (!predicate) { - return std::make_pair(src_row_groups, rg_row_ranges); + return src_row_groups; } auto page_index_reader = reader_->GetPageIndexReader(); @@ -307,35 +357,41 @@ ParquetFileBatchReader::FilterRowGroupsByPageIndex( PAIMON_LOG_DEBUG(logger_, "Page index not available in file, skipping page-level filtering (%s)", PARQUET_WRITE_ENABLE_PAGE_INDEX); - return std::make_pair(src_row_groups, rg_row_ranges); + return src_row_groups; } auto file_metadata = reader_->GetFileReader()->parquet_reader()->metadata(); - std::vector target_row_groups; - target_row_groups.reserve(src_row_groups.size()); + TargetRowGroups target_row_groups; - for (int32_t row_group_idx : src_row_groups) { + for (const auto& row_group : src_row_groups) { + int32_t row_group_idx = row_group.GetRowGroupIndex(); auto result = reader_->CalculateFilteredRowRanges(row_group_idx, predicate, column_name_to_index); if (!result.ok()) { - target_row_groups.push_back(row_group_idx); + target_row_groups.emplace_back(row_group); continue; } const auto& row_ranges = result.value(); if (!row_ranges.IsEmpty()) { - target_row_groups.push_back(row_group_idx); - int64_t rg_row_count = file_metadata->RowGroup(row_group_idx)->num_rows(); - if (row_ranges.RowCount() < rg_row_count) { - rg_row_ranges[row_group_idx] = row_ranges; + auto intersection = row_group.IsPartiallyMatched() + ? RowRanges::Intersection(row_group.GetRowRanges(), row_ranges) + : row_ranges; + if (intersection.IsEmpty()) { + continue; + } + if (intersection.RowCount() < rg_row_count) { + target_row_groups.emplace_back(row_group_idx, true, intersection); + } else { + target_row_groups.emplace_back(row_group_idx); } } } - return std::make_pair(std::move(target_row_groups), std::move(rg_row_ranges)); + return target_row_groups; } Result ParquetFileBatchReader::NextBatch() { diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 8dc412c30..de945788c 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -37,6 +37,7 @@ #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/format/parquet/file_reader_wrapper.h" #include "paimon/format/parquet/row_ranges.h" +#include "paimon/format/parquet/target_row_group.h" #include "paimon/logging.h" #include "paimon/reader/prefetch_file_batch_reader.h" #include "paimon/result.h" @@ -149,22 +150,27 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { index_vector->push_back((*index)++); } } + int32_t FindColumnWithOffsetIndex(int32_t row_group_idx) const; + + std::optional FilterPagesByBitmap(const RoaringBitmap32& bitmap, + int32_t row_group_idx, uint64_t rg_start_row, + int64_t rg_row_count) const; // precondition: predicate supposed not be empty - Result> FilterRowGroupsByPredicate( + Result FilterRowGroupsByPredicate( const std::shared_ptr& predicate, const std::shared_ptr file_schema, - const std::vector& src_row_groups) const; + const TargetRowGroups& src_row_groups) const; - Result> FilterRowGroupsByBitmap( - const RoaringBitmap32& bitmap, const std::vector& src_row_groups) const; + Result FilterRowGroupsByBitmap(const RoaringBitmap32& bitmap, + const TargetRowGroups& src_row_groups) const; // Apply page-level filtering using column index. // Returns (filtered row groups, per-row-group RowRanges for partial matches). - Result, std::map>> - FilterRowGroupsByPageIndex(const std::shared_ptr& predicate, - const std::map& column_name_to_index, - const std::vector& src_row_groups); + Result FilterRowGroupsByPageIndex( + const std::shared_ptr& predicate, + const std::map& column_name_to_index, + const TargetRowGroups& src_row_groups) const; private: std::map options_; diff --git a/src/paimon/format/parquet/row_ranges.h b/src/paimon/format/parquet/row_ranges.h index 46c3f4d21..b2b8338db 100644 --- a/src/paimon/format/parquet/row_ranges.h +++ b/src/paimon/format/parquet/row_ranges.h @@ -105,21 +105,4 @@ class RowRanges { private: std::vector ranges_; }; - -struct TargetRowGroup { - int32_t row_group_index{-1}; - bool is_partially_matched{false}; - // page-filtered row ranges, only valid if is_partially_matched is true. - RowRanges row_ranges; - // Whether this row group has been excluded by ApplyReadRanges. - // When true, this row group is logically skipped during iteration - // but retained so that a subsequent wider ApplyReadRanges can restore it. - bool excluded_by_read_range{false}; - - TargetRowGroup() = default; - TargetRowGroup(int32_t rg_index, bool is_partially_matched, RowRanges ranges) - : row_group_index(rg_index), - is_partially_matched(is_partially_matched), - row_ranges(std::move(ranges)) {} -}; } // namespace paimon::parquet diff --git a/src/paimon/format/parquet/target_row_group.h b/src/paimon/format/parquet/target_row_group.h new file mode 100644 index 000000000..d0efb7e6b --- /dev/null +++ b/src/paimon/format/parquet/target_row_group.h @@ -0,0 +1,89 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "paimon/format/parquet/row_ranges.h" + +namespace paimon::parquet { +class TargetRowGroup; +using TargetRowGroups = std::vector; +class TargetRowGroup { + public: + explicit TargetRowGroup(int32_t rg_index) : row_group_index(rg_index) {} + TargetRowGroup(int32_t rg_index, bool is_partially_matched, RowRanges ranges) + : row_group_index(rg_index), + is_partially_matched(is_partially_matched), + row_ranges(std::move(ranges)) {} + + TargetRowGroup(const TargetRowGroup& other) = default; + + bool IsExcludedByReadRange() const { + return excluded_by_read_range; + } + + void SetExcludedByReadRange(bool excluded) { + excluded_by_read_range = excluded; + } + + int32_t GetRowGroupIndex() const { + return row_group_index; + } + + bool IsPartiallyMatched() const { + return is_partially_matched; + } + + const RowRanges& GetRowRanges() const { + return row_ranges; + } + + static TargetRowGroups MakeSerialRowGroups(int32_t num_row_groups) { + TargetRowGroups target_row_groups; + target_row_groups.reserve(num_row_groups); + for (int32_t i = 0; i < num_row_groups; ++i) { + target_row_groups.emplace_back(i); + } + return target_row_groups; + } + + static std::vector GetRowGroupIndices(const TargetRowGroups& target_row_groups) { + std::vector indices; + indices.reserve(target_row_groups.size()); + for (const auto& rg : target_row_groups) { + indices.push_back(rg.GetRowGroupIndex()); + } + return indices; + } + + private: + int32_t row_group_index{-1}; + bool is_partially_matched{false}; + // page-filtered row ranges, only valid if is_partially_matched is true. + RowRanges row_ranges; + // Whether this row group has been excluded by ApplyReadRanges. + // When true, this row group is logically skipped during iteration + // but retained so that a subsequent wider ApplyReadRanges can restore it. + bool excluded_by_read_range{false}; +}; + +} // namespace paimon::parquet From 9a5f4eea8df3913e715d254d5e36d07133e3828e Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 15:16:57 +0800 Subject: [PATCH 07/38] fix: fallback bitmap pushdown to rowgroup-level --- .../page_filtered_row_group_reader_test.cpp | 90 +++++++++++++++++++ .../parquet/parquet_file_batch_reader.cpp | 17 +++- .../parquet/parquet_file_batch_reader.h | 3 +- 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 8f058a096..4e60a8628 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -1115,6 +1115,95 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { ASSERT_TRUE(expected->Equals(result->chunk(0))); } +/// Test: nested map projection falls back to row-group-level filtering when page index filter is +/// unavailable for nested read schemas. +/// +/// Schema: { id: int32, props: map } +/// Read schema only contains the nested "props" column. +/// 100 rows, 10 per page, 1 row group. +/// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. +/// Because nested schema disables page-level filtering, the entire first row group (0..49) is read, +/// so rows [0, 99] should all be returned. +TEST_F(PageFilteredRowGroupReaderTest, NestedMapRowGroupFallback) { + std::string file_name = dir_->Str() + "/nested_map_projection_fallback.parquet"; + auto data = MakeMapColumnData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); + + auto field_props = arrow::field("props", arrow::map(arrow::utf8(), arrow::int32())); + auto read_schema = arrow::schema({arrow::field("id", arrow::int32()), field_props}); + + RoaringBitmap32 bitmap; + bitmap.AddRange(70, 100); + + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, nullptr, bitmap, &result); + + ASSERT_TRUE(result); + // Because page-level filtering is skipped for nested schemas, we read full row groups. + ASSERT_EQ(50, result->length()); + + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + +/// Test: nested list projection falls back to row-group-level filtering when page index filter is +/// unavailable for nested read schemas. +/// +/// Schema: { id: int32, tags: list } +/// Read schema only contains the nested "tags" column. +/// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. +/// Because nested schema disables page-level filtering, the entire first row group (0..49) is read. +TEST_F(PageFilteredRowGroupReaderTest, NestedListRowGroupFallback) { + std::string file_name = dir_->Str() + "/nested_list_projection_fallback.parquet"; + auto data = MakeListColumnData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); + + auto field_tags = arrow::field("tags", arrow::list(arrow::field("item", arrow::int32()))); + auto read_schema = arrow::schema({arrow::field("id", arrow::int32()), field_tags}); + + RoaringBitmap32 bitmap; + bitmap.AddRange(70, 100); + + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, nullptr, bitmap, &result); + + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + +/// Test: nested struct projection falls back to row-group-level filtering when page index filter is +/// unavailable for nested read schemas. +/// +/// Schema: { id: int32, info: struct } +/// Read schema only contains the nested "info" column. +/// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. +/// Because nested schema disables page-level filtering, the entire first row group (0..49) is read. +TEST_F(PageFilteredRowGroupReaderTest, NestedStructRowGroupFallback) { + std::string file_name = dir_->Str() + "/nested_struct_projection_fallback.parquet"; + auto data = MakeNestedStructData(100); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); + + auto field_x = arrow::field("x", arrow::int32()); + auto field_y = arrow::field("y", arrow::int32()); + auto field_info = arrow::field("info", arrow::struct_({field_x, field_y})); + auto read_schema = arrow::schema({arrow::field("id", arrow::int32()), field_info}); + + RoaringBitmap32 bitmap; + bitmap.AddRange(70, 100); + + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, nullptr, bitmap, &result); + + ASSERT_TRUE(result); + ASSERT_EQ(50, result->length()); + + auto expected = data->Slice(50, 50); + ASSERT_TRUE(expected->Equals(result->chunk(0))); +} + /// Test: rowgroup-level filtering with multiple adjacent nested columns (struct + list). /// /// Schema: { id: int32, info: struct, tags: list } @@ -1170,6 +1259,7 @@ TEST_F(PageFilteredRowGroupReaderTest, MultipleAdjacentNestedColumns) { // Build expected result: rows 50-99 from the original data auto expected = data->Slice(50, 50); ASSERT_TRUE(expected->Equals(result->chunk(0))); +} /// Test: bitmap hits all pages of a subset of row groups (no predicate). /// /// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 809c037c4..8f221bf6d 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -164,9 +164,11 @@ Status ParquetFileBatchReader::SetReadSchema( FilterRowGroupsByPredicate(predicate, file_schema, target_row_groups)); } if (selection_bitmap) { - PAIMON_ASSIGN_OR_RAISE( - target_row_groups, - FilterRowGroupsByBitmap(selection_bitmap.value(), target_row_groups)); + // walkaround: page index filter does not support nested fields for now, skip page index + // bitmap pushdown if there is any nested field in the schema + PAIMON_ASSIGN_OR_RAISE(target_row_groups, + FilterRowGroupsByBitmap(selection_bitmap.value(), + target_row_groups, has_nested_field)); } // Apply page-level filtering after bitmap pruning so we don't read page index // pages for row groups that the bitmap already excluded. @@ -255,7 +257,8 @@ Result ParquetFileBatchReader::FilterRowGroupsByPredicate( } Result ParquetFileBatchReader::FilterRowGroupsByBitmap( - const RoaringBitmap32& bitmap, const TargetRowGroups& src_row_groups) const { + const RoaringBitmap32& bitmap, const TargetRowGroups& src_row_groups, + bool has_nested_column) const { if (bitmap.IsEmpty()) { return Status::Invalid("cannot push down an empty bitmap to ParquetFileBatchReader"); } @@ -276,6 +279,12 @@ Result ParquetFileBatchReader::FilterRowGroupsByBitmap( } int64_t rg_row_count = meta_data->RowGroup(row_group_idx)->num_rows(); + if (has_nested_column) { + // For nested schema, we cannot apply page-level filtering, so we directly add the whole + // row group if bitmap matches. + target_row_groups.emplace_back(row_group_idx); + continue; + } auto page_ranges = FilterPagesByBitmap(bitmap, row_group_idx, start_row_idx, rg_row_count); if (page_ranges.has_value()) { target_row_groups.emplace_back(/*row_group_idx=*/row_group_idx, diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index de945788c..8ccabc5ed 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -163,7 +163,8 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { const TargetRowGroups& src_row_groups) const; Result FilterRowGroupsByBitmap(const RoaringBitmap32& bitmap, - const TargetRowGroups& src_row_groups) const; + const TargetRowGroups& src_row_groups, + bool has_nested_column) const; // Apply page-level filtering using column index. // Returns (filtered row groups, per-row-group RowRanges for partial matches). From 09d2121624c7a9d2487f4772c8bbb0724d5532dc Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 15:28:53 +0800 Subject: [PATCH 08/38] style: update tests names --- .../format/parquet/page_filtered_row_group_reader_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 4e60a8628..1d55beb7f 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -1124,7 +1124,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { /// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. /// Because nested schema disables page-level filtering, the entire first row group (0..49) is read, /// so rows [0, 99] should all be returned. -TEST_F(PageFilteredRowGroupReaderTest, NestedMapRowGroupFallback) { +TEST_F(PageFilteredRowGroupReaderTest, NestedMapBitmapFallback) { std::string file_name = dir_->Str() + "/nested_map_projection_fallback.parquet"; auto data = MakeMapColumnData(100); WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); @@ -1153,7 +1153,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapRowGroupFallback) { /// Read schema only contains the nested "tags" column. /// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. /// Because nested schema disables page-level filtering, the entire first row group (0..49) is read. -TEST_F(PageFilteredRowGroupReaderTest, NestedListRowGroupFallback) { +TEST_F(PageFilteredRowGroupReaderTest, NestedListBitmapFallback) { std::string file_name = dir_->Str() + "/nested_list_projection_fallback.parquet"; auto data = MakeListColumnData(100); WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); @@ -1181,7 +1181,7 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedListRowGroupFallback) { /// Read schema only contains the nested "info" column. /// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. /// Because nested schema disables page-level filtering, the entire first row group (0..49) is read. -TEST_F(PageFilteredRowGroupReaderTest, NestedStructRowGroupFallback) { +TEST_F(PageFilteredRowGroupReaderTest, NestedStructBitmapFallback) { std::string file_name = dir_->Str() + "/nested_struct_projection_fallback.parquet"; auto data = MakeNestedStructData(100); WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/50); From 3ea428dea0c3632cc19731d3ec049626174e77a8 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 15:51:20 +0800 Subject: [PATCH 09/38] stye: update comments --- .../format/parquet/page_filtered_row_group_reader_test.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 1d55beb7f..16008f105 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -1119,7 +1119,6 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapColumnPageFilter) { /// unavailable for nested read schemas. /// /// Schema: { id: int32, props: map } -/// Read schema only contains the nested "props" column. /// 100 rows, 10 per page, 1 row group. /// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. /// Because nested schema disables page-level filtering, the entire first row group (0..49) is read, @@ -1150,7 +1149,6 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedMapBitmapFallback) { /// unavailable for nested read schemas. /// /// Schema: { id: int32, tags: list } -/// Read schema only contains the nested "tags" column. /// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. /// Because nested schema disables page-level filtering, the entire first row group (0..49) is read. TEST_F(PageFilteredRowGroupReaderTest, NestedListBitmapFallback) { @@ -1178,7 +1176,6 @@ TEST_F(PageFilteredRowGroupReaderTest, NestedListBitmapFallback) { /// unavailable for nested read schemas. /// /// Schema: { id: int32, info: struct } -/// Read schema only contains the nested "info" column. /// Predicate: id >= 30 would be a partial-row-group match at first 50-row group. /// Because nested schema disables page-level filtering, the entire first row group (0..49) is read. TEST_F(PageFilteredRowGroupReaderTest, NestedStructBitmapFallback) { From 76e728226c718cb35025fb6465d43dfc5a1d0952 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Wed, 17 Jun 2026 17:27:48 +0800 Subject: [PATCH 10/38] fix: set 'SupportPreciseBitmapSelection to true' --- src/paimon/format/parquet/parquet_file_batch_reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 8ccabc5ed..035d85b43 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -120,7 +120,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { } bool SupportPreciseBitmapSelection() const override { - return false; + return true; } private: From 8078ca4efb8d05ca7398ac326192eff4749ebc9d Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Thu, 18 Jun 2026 14:02:11 +0800 Subject: [PATCH 11/38] fxi: set 'SupportPreciseBitmapSelection to false' --- src/paimon/format/parquet/parquet_file_batch_reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 035d85b43..8ccabc5ed 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -120,7 +120,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { } bool SupportPreciseBitmapSelection() const override { - return true; + return false; } private: From 48cd61024d5cf9d84179348b62c7674b51c083ca Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 22 Jun 2026 10:06:55 +0800 Subject: [PATCH 12/38] fix: add assign operator for TargetRowGroup --- src/paimon/format/parquet/target_row_group.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/paimon/format/parquet/target_row_group.h b/src/paimon/format/parquet/target_row_group.h index d0efb7e6b..c621b9ca7 100644 --- a/src/paimon/format/parquet/target_row_group.h +++ b/src/paimon/format/parquet/target_row_group.h @@ -36,6 +36,7 @@ class TargetRowGroup { row_ranges(std::move(ranges)) {} TargetRowGroup(const TargetRowGroup& other) = default; + TargetRowGroup& operator=(const TargetRowGroup& other) = default; bool IsExcludedByReadRange() const { return excluded_by_read_range; From 246e85470a57bbcbd5d2d3629c213fa486d89d19 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 22 Jun 2026 10:39:04 +0800 Subject: [PATCH 13/38] fix: use PARQUET_READ_ENABLE_PAGE_INDEX_FILTER to control bitmap filtering level --- .../page_filtered_row_group_reader_test.cpp | 37 ++++++++++++++++++- .../parquet/parquet_file_batch_reader.cpp | 20 +++++----- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 16008f105..6f1f2e32b 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -136,13 +136,15 @@ class PageFilteredRowGroupReaderTest : public ::testing::Test { const std::shared_ptr& predicate, const RoaringBitmap32& bitmap, std::shared_ptr* out, - int32_t batch_size = 1024) { + int32_t batch_size = 1024, + bool enable_page_index_filter = true) { ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file_name)); ASSERT_OK_AND_ASSIGN(int64_t length, in->Length()); auto in_stream = std::make_shared(in, arrow_pool_, length); std::map options; - options[PARQUET_READ_ENABLE_PAGE_INDEX_FILTER] = "true"; + options[PARQUET_READ_ENABLE_PAGE_INDEX_FILTER] = + enable_page_index_filter ? "true" : "false"; ASSERT_OK_AND_ASSIGN( auto batch_reader, ParquetFileBatchReader::Create(std::move(in_stream), arrow_pool_, options, batch_size)); @@ -1347,6 +1349,37 @@ TEST_F(PageFilteredRowGroupReaderTest, BitmapAllAndPartialPagesMixed) { } } +/// Test: bitmap hits partial pages of a row group, with page-filtered option disabled. +/// +/// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. +/// Bitmap: {0..99} hits all of RG0 + {120..149} hits pages 2-4 of RG1. +/// Expected: 100 rows (100-199) because page-filtered option is disabled, so page-level bitmap is +/// ignored. +TEST_F(PageFilteredRowGroupReaderTest, BitmapWithPageFilteredOptionDisabled) { + std::string file_name = dir_->Str() + "/bitmap_all_and_partial.parquet"; + auto data = MakeSequentialIntData(200); + WriteTestFile(file_name, data, /*write_batch_size=*/10, /*max_row_group_length=*/100); + + RoaringBitmap32 bitmap; + bitmap.AddRange(120, 150); // pages 2-4 of RG1 + + auto read_schema = arrow::schema({arrow::field("val", arrow::int32())}); + std::shared_ptr result; + ReadWithPredicateAndBitmapImpl(file_name, read_schema, /*predicate=*/nullptr, bitmap, &result, + 1024, false); + ASSERT_TRUE(result); + ASSERT_EQ(100, result->length()); + + // Verify: 100-199 + auto flat = arrow::Concatenate(result->chunks()).ValueOrDie(); + auto struct_arr = std::dynamic_pointer_cast(flat); + ASSERT_TRUE(struct_arr); + auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); + for (int32_t i = 0; i < 100; ++i) { + ASSERT_EQ(100 + i, val_arr->Value(100 + i)); + } +} + /// Test: bitmap + predicate both applied, bitmap hits all pages of some row groups. /// /// 200 rows, 10 rows per page, 100 rows per row group → 2 row groups. diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 8f221bf6d..14d4fdcc3 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -158,6 +158,11 @@ Status ParquetFileBatchReader::SetReadSchema( TargetRowGroups target_row_groups = TargetRowGroup::MakeSerialRowGroups(reader_->GetNumberOfRowGroups()); + PAIMON_ASSIGN_OR_RAISE( + bool enable_page_index_filter, + OptionsUtils::GetValueFromMap(options_, PARQUET_READ_ENABLE_PAGE_INDEX_FILTER, + DEFAULT_PARQUET_READ_ENABLE_PAGE_INDEX_FILTER)); + if (predicate) { PAIMON_ASSIGN_OR_RAISE( target_row_groups, @@ -166,19 +171,16 @@ Status ParquetFileBatchReader::SetReadSchema( if (selection_bitmap) { // walkaround: page index filter does not support nested fields for now, skip page index // bitmap pushdown if there is any nested field in the schema - PAIMON_ASSIGN_OR_RAISE(target_row_groups, - FilterRowGroupsByBitmap(selection_bitmap.value(), - target_row_groups, has_nested_field)); + PAIMON_ASSIGN_OR_RAISE( + target_row_groups, + FilterRowGroupsByBitmap(selection_bitmap.value(), target_row_groups, + !has_nested_field && enable_page_index_filter)); } // Apply page-level filtering after bitmap pruning so we don't read page index // pages for row groups that the bitmap already excluded. // If no predicate is provided, skip page-level filtering, row_group_row_ranges will be // empty if (predicate && !target_row_groups.empty()) { - PAIMON_ASSIGN_OR_RAISE( - bool enable_page_index_filter, - OptionsUtils::GetValueFromMap(options_, PARQUET_READ_ENABLE_PAGE_INDEX_FILTER, - DEFAULT_PARQUET_READ_ENABLE_PAGE_INDEX_FILTER)); // walkaround: page index filter does not support nested fields for now, skip page index // filter if there is any nested field in the schema if (enable_page_index_filter && !has_nested_field) { @@ -258,7 +260,7 @@ Result ParquetFileBatchReader::FilterRowGroupsByPredicate( Result ParquetFileBatchReader::FilterRowGroupsByBitmap( const RoaringBitmap32& bitmap, const TargetRowGroups& src_row_groups, - bool has_nested_column) const { + bool enable_page_filtered) const { if (bitmap.IsEmpty()) { return Status::Invalid("cannot push down an empty bitmap to ParquetFileBatchReader"); } @@ -279,7 +281,7 @@ Result ParquetFileBatchReader::FilterRowGroupsByBitmap( } int64_t rg_row_count = meta_data->RowGroup(row_group_idx)->num_rows(); - if (has_nested_column) { + if (!enable_page_filtered) { // For nested schema, we cannot apply page-level filtering, so we directly add the whole // row group if bitmap matches. target_row_groups.emplace_back(row_group_idx); From d9c952d7c0967e30a70d4ec71dce5e4a5a6366b5 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 22 Jun 2026 15:01:50 +0800 Subject: [PATCH 14/38] fix: use bitmap to get row ranges instead of the first column with page index --- .../page_filtered_row_group_reader_test.cpp | 6 +- .../parquet/parquet_file_batch_reader.cpp | 77 +++++-------- .../parquet/parquet_file_batch_reader.h | 10 +- .../parquet_file_batch_reader_test.cpp | 109 +++++++++++++++++- 4 files changed, 143 insertions(+), 59 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 6f1f2e32b..1b752e7c8 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -137,14 +137,14 @@ class PageFilteredRowGroupReaderTest : public ::testing::Test { const RoaringBitmap32& bitmap, std::shared_ptr* out, int32_t batch_size = 1024, - bool enable_page_index_filter = true) { + bool enable_page_level_filter = true) { ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file_name)); ASSERT_OK_AND_ASSIGN(int64_t length, in->Length()); auto in_stream = std::make_shared(in, arrow_pool_, length); std::map options; options[PARQUET_READ_ENABLE_PAGE_INDEX_FILTER] = - enable_page_index_filter ? "true" : "false"; + enable_page_level_filter ? "true" : "false"; ASSERT_OK_AND_ASSIGN( auto batch_reader, ParquetFileBatchReader::Create(std::move(in_stream), arrow_pool_, options, batch_size)); @@ -1376,7 +1376,7 @@ TEST_F(PageFilteredRowGroupReaderTest, BitmapWithPageFilteredOptionDisabled) { ASSERT_TRUE(struct_arr); auto val_arr = std::dynamic_pointer_cast(struct_arr->field(0)); for (int32_t i = 0; i < 100; ++i) { - ASSERT_EQ(100 + i, val_arr->Value(100 + i)); + ASSERT_EQ(100 + i, val_arr->Value(i)); } } diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 14d4fdcc3..7fa111141 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -287,68 +287,49 @@ Result ParquetFileBatchReader::FilterRowGroupsByBitmap( target_row_groups.emplace_back(row_group_idx); continue; } - auto page_ranges = FilterPagesByBitmap(bitmap, row_group_idx, start_row_idx, rg_row_count); - if (page_ranges.has_value()) { - target_row_groups.emplace_back(/*row_group_idx=*/row_group_idx, - /*is_partially_matched=*/true, - /*row_ranges=*/page_ranges.value()); - } else { - target_row_groups.emplace_back(row_group_idx); - } + auto page_ranges = BitmapToRowRanges(bitmap, start_row_idx, end_row_idx); + target_row_groups.emplace_back(/*row_group_idx=*/row_group_idx, + /*is_partially_matched=*/true, + /*row_ranges=*/page_ranges); } return target_row_groups; } -std::optional ParquetFileBatchReader::FilterPagesByBitmap(const RoaringBitmap32& bitmap, - int32_t row_group_idx, - uint64_t rg_start_row, - int64_t rg_row_count) const { - int32_t column_with_offset_index = FindColumnWithOffsetIndex(row_group_idx); - if (column_with_offset_index < 0) { - return std::nullopt; - } +RowRanges ParquetFileBatchReader::BitmapToRowRanges(const RoaringBitmap32& bitmap, + uint64_t start_row, uint64_t end_row) { + RowRanges row_ranges; - auto page_index_reader = reader_->GetPageIndexReader(); - if (!page_index_reader) { - return std::nullopt; + if (bitmap.IsEmpty() || start_row >= end_row) { + return row_ranges; } - auto rg_page_index_reader = page_index_reader->RowGroup(row_group_idx); - if (!rg_page_index_reader) { - return std::nullopt; - } + auto it = bitmap.EqualOrLarger(static_cast(start_row)); + auto end = bitmap.End(); - auto offset_index = rg_page_index_reader->GetOffsetIndex(column_with_offset_index); - if (!offset_index) { - return std::nullopt; + if (it == end || static_cast(*it) >= end_row) { + return row_ranges; } - const auto& pages = offset_index->page_locations(); - auto num_pages = static_cast(pages.size()); - RowRanges filtered_row_ranges; - for (int64_t i = 0; i < num_pages; ++i) { - int64_t page_start_row = pages[i].first_row_index; - // The bitmap is [from, to) while page row range is [from, to] - int64_t page_end_row = - (i + 1 < num_pages) ? pages[i + 1].first_row_index - 1 : rg_row_count - 1; - if (bitmap.ContainsAny(rg_start_row + page_start_row, rg_start_row + page_end_row + 1)) { - filtered_row_ranges.Add(RowRanges::Range(page_start_row, page_end_row)); + int64_t range_start = *it; + int64_t range_end = *it; + + for (++it; it != end; ++it) { + int32_t current = *it; + if (static_cast(current) >= end_row) { + break; } - } - return filtered_row_ranges; -} -int32_t ParquetFileBatchReader::FindColumnWithOffsetIndex(int32_t row_group_idx) const { - auto rg_meta = reader_->GetFileReader()->parquet_reader()->metadata()->RowGroup(row_group_idx); - if (!rg_meta) { - return -1; - } - for (int col = 0; col < rg_meta->num_columns(); ++col) { - if (rg_meta->ColumnChunk(col)->GetOffsetIndexLocation().has_value()) { - return col; + if (current == range_end + 1) { + range_end = current; + } else { + row_ranges.Add(RowRanges::Range(range_start - start_row, range_end - start_row)); + range_start = current; + range_end = current; } } - return -1; + + row_ranges.Add(RowRanges::Range(range_start - start_row, range_end - start_row)); + return row_ranges; } // Uses page-level column index statistics to filter row groups and store per-row-group diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 8ccabc5ed..5ddb8daeb 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -150,11 +150,13 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { index_vector->push_back((*index)++); } } - int32_t FindColumnWithOffsetIndex(int32_t row_group_idx) const; - std::optional FilterPagesByBitmap(const RoaringBitmap32& bitmap, - int32_t row_group_idx, uint64_t rg_start_row, - int64_t rg_row_count) const; + static RowRanges BitmapToRowRanges(const RoaringBitmap32& bitmap, uint64_t start_row, + uint64_t end_row); + + Result FilterPagesByBitmap(const RoaringBitmap32& bitmap, + int32_t row_group_idx, uint64_t rg_start_row, + int64_t rg_row_count) const; // precondition: predicate supposed not be empty Result FilterRowGroupsByPredicate( diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 73501cbfd..121c090fa 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -134,12 +134,15 @@ class ParquetFileBatchReaderTest : public ::testing::Test, std::unique_ptr PrepareParquetFileBatchReader( const std::string& file_name, const std::shared_ptr& read_schema, const std::shared_ptr& predicate, - const std::optional& selection_bitmap, int32_t batch_size) const { + const std::optional& selection_bitmap, int32_t batch_size, + bool enable_page_level_filter = false) const { EXPECT_OK_AND_ASSIGN(auto input_stream, fs_->Open(file_name)); auto length = fs_->GetFileStatus(file_name).value()->GetLen(); auto in_stream = std::make_unique(std::move(input_stream), pool_, length); - std::map options = {}; + std::map options; + options[PARQUET_READ_ENABLE_PAGE_INDEX_FILTER] = + enable_page_level_filter ? "true" : "false"; return PrepareParquetFileBatchReader(std::move(in_stream), options, read_schema, predicate, selection_bitmap, batch_size); } @@ -467,7 +470,7 @@ TEST_F(ParquetFileBatchReaderTest, TestCreateArrowReaderProperties) { } } -TEST_F(ParquetFileBatchReaderTest, TestBitmapPushDownWithMultiRowGroups) { +TEST_F(ParquetFileBatchReaderTest, TestBitmapRowGroupPushDownWithMultiRowGroups) { arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; auto arrow_type = arrow::struct_(fields); auto src_array = std::dynamic_pointer_cast( @@ -505,8 +508,48 @@ TEST_F(ParquetFileBatchReaderTest, TestBitmapPushDownWithMultiRowGroups) { auto expected_array = arrow::ChunkedArray::Make({src_array->Slice(0, 6)}).ValueOrDie(); ASSERT_TRUE(result_array->Equals(expected_array)) << result_array->ToString(); } +TEST_F(ParquetFileBatchReaderTest, TestBitmapPagePushDownWithMultiRowGroups) { + arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; + auto arrow_type = arrow::struct_(fields); + auto src_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow_type, R"([ + [0], + [1], + [2], + [3], + [4], + [5], + [6], + [7], + [8], + [9], + [10], + [11] + ])") + .ValueOrDie()); + auto src_schema = arrow::schema(fields); + std::optional bitmap = RoaringBitmap32::From({3, 5}); + // data in file rowGroup0:[0, 1, 2, 3, 4, 5] | rowGroup1:[6, 7, 8, 9, 10, 11] + + auto arrow_schema = arrow::schema(fields); + WriteArray(file_path_, src_array, arrow_schema, /*write_batch_size=*/12, + /*enable_dictionary=*/true, + /*max_row_group_length=*/6); + + auto parquet_batch_reader = + PrepareParquetFileBatchReader(file_path_, arrow_schema, /*predicate=*/nullptr, bitmap, + /*batch_size=*/12, /*enable_page_level_filter=*/true); + + ASSERT_OK_AND_ASSIGN( + std::shared_ptr result_array, + paimon::test::ReadResultCollector::CollectResult(parquet_batch_reader.get())); + + auto expected_array = + arrow::ChunkedArray::Make({src_array->Slice(3, 1), src_array->Slice(6, 1)}).ValueOrDie(); + ASSERT_TRUE(result_array->Equals(expected_array)) << result_array->ToString(); +} -TEST_F(ParquetFileBatchReaderTest, TestPredicateAndBitmapPushDown) { +TEST_F(ParquetFileBatchReaderTest, TestPredicateAndBitmapRowGroupPushDown) { arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; auto arrow_type = arrow::struct_(fields); arrow::StructBuilder struct_builder(arrow_type, arrow::default_memory_pool(), @@ -563,6 +606,64 @@ TEST_F(ParquetFileBatchReaderTest, TestPredicateAndBitmapPushDown) { ASSERT_FALSE(result_array); } } +TEST_F(ParquetFileBatchReaderTest, TestPredicateAndBitmapPagePushDown) { + arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; + auto arrow_type = arrow::struct_(fields); + arrow::StructBuilder struct_builder(arrow_type, arrow::default_memory_pool(), + {std::make_shared()}); + auto int_builder = static_cast(struct_builder.field_builder(0)); + int32_t length = 1024; + for (int32_t i = 0; i < length; ++i) { + ASSERT_TRUE(struct_builder.Append().ok()); + ASSERT_TRUE(int_builder->Append(i).ok()); + } + // data file: + // rowGroup0: [0, 256) + // rowGroup1: [256, 512) + // rowGroup2: [512, 768) + // rowGroup3: [768, 1024) + std::shared_ptr src_array; + ASSERT_TRUE(struct_builder.Finish(&src_array).ok()); + auto src_schema = arrow::schema(fields); + auto arrow_schema = arrow::schema(fields); + WriteArray(file_path_, src_array, arrow_schema, /*write_batch_size=*/1024, + /*enable_dictionary=*/true, + /*max_row_group_length=*/256); + { + // simple case + std::optional bitmap = RoaringBitmap32::From({100, 400, 600}); + ASSERT_OK_AND_ASSIGN( + auto predicate, + PredicateBuilder::Or( + {PredicateBuilder::LessThan(/*field_index=*/0, /*field_name=*/"f0", FieldType::INT, + Literal(255)), + PredicateBuilder::GreaterThan(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(600))})); + auto parquet_batch_reader = + PrepareParquetFileBatchReader(file_path_, arrow_schema, predicate, bitmap, + /*batch_size=*/length, /*enable_page_level_filter=*/true); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr result_array, + paimon::test::ReadResultCollector::CollectResult(parquet_batch_reader.get())); + + auto expected_array = + arrow::ChunkedArray::Make({src_array->Slice(100, 1), src_array->Slice(600, 1)}) + .ValueOrDie(); + ASSERT_TRUE(result_array->Equals(expected_array)) << result_array->ToString(); + } + { + // test all data has been filtered out with predicate and bitmap pushdown + std::optional bitmap = RoaringBitmap32::From({100, 400, 600}); + auto predicate = PredicateBuilder::GreaterThan(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(800)); + auto parquet_batch_reader = PrepareParquetFileBatchReader( + file_path_, arrow_schema, predicate, bitmap, /*batch_size=*/length); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr result_array, + paimon::test::ReadResultCollector::CollectResult(parquet_batch_reader.get())); + ASSERT_FALSE(result_array); + } +} TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { // if only read partition fields, format reader will set empty read schema From 1c495e4d209e48f8366784bcfd8376d79d8e3dca Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 22 Jun 2026 15:03:36 +0800 Subject: [PATCH 15/38] fix: test --- src/paimon/format/parquet/parquet_file_batch_reader_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 121c090fa..0be8fa0be 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -545,7 +545,7 @@ TEST_F(ParquetFileBatchReaderTest, TestBitmapPagePushDownWithMultiRowGroups) { paimon::test::ReadResultCollector::CollectResult(parquet_batch_reader.get())); auto expected_array = - arrow::ChunkedArray::Make({src_array->Slice(3, 1), src_array->Slice(6, 1)}).ValueOrDie(); + arrow::ChunkedArray::Make({src_array->Slice(3, 1), src_array->Slice(5, 1)}).ValueOrDie(); ASSERT_TRUE(result_array->Equals(expected_array)) << result_array->ToString(); } From 5d534db0fff289e1031aaeb493b2f75a9068fe18 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 10:36:44 +0800 Subject: [PATCH 16/38] fix: FileBatchReader returns discontinuous batch, and change GetPreviousBatchFirstRowId to GetGlobalRowId --- .../shared_shredding_file_reader.cpp | 4 +- .../shredding/shared_shredding_file_reader.h | 2 +- .../bitmap/apply_bitmap_index_batch_reader.h | 14 +-- .../reader/delegating_prefetch_reader.h | 4 +- .../prefetch_file_batch_reader_impl.cpp | 67 +++++++++---- .../reader/prefetch_file_batch_reader_impl.h | 6 +- .../prefetch_file_batch_reader_impl_test.cpp | 31 +++--- .../apply_deletion_vector_batch_reader.h | 16 ++- .../complete_row_tracking_fields_reader.cpp | 8 +- .../io/complete_row_tracking_fields_reader.h | 4 +- src/paimon/core/io/field_mapping_reader.h | 4 +- .../io/key_value_data_file_record_reader.cpp | 8 +- .../io/key_value_data_file_record_reader.h | 7 +- .../format/avro/avro_file_batch_reader.h | 4 +- .../avro/avro_file_batch_reader_test.cpp | 22 ++--- .../format/blob/blob_file_batch_reader.h | 6 +- .../blob/blob_file_batch_reader_test.cpp | 14 ++- .../format/lance/lance_file_batch_reader.h | 6 +- .../lance/lance_format_reader_writer_test.cpp | 13 ++- src/paimon/format/orc/orc_file_batch_reader.h | 4 +- .../format/orc/orc_file_batch_reader_test.cpp | 14 +-- .../parquet/parquet_file_batch_reader.cpp | 58 ++++++++++- .../parquet/parquet_file_batch_reader.h | 18 +++- .../parquet_file_batch_reader_test.cpp | 99 +++++++++++++++++-- src/paimon/format/parquet/row_ranges.cpp | 6 ++ src/paimon/format/parquet/row_ranges.h | 2 + .../testing/mock/mock_file_batch_reader.h | 4 +- 27 files changed, 310 insertions(+), 135 deletions(-) diff --git a/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp b/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp index 4c6bbc0ca..c9c249caf 100644 --- a/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp +++ b/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp @@ -469,8 +469,8 @@ void SharedShreddingFileReader::Close() { reader_->Close(); } -Result SharedShreddingFileReader::GetPreviousBatchFirstRowNumber() const { - return reader_->GetPreviousBatchFirstRowNumber(); +Result SharedShreddingFileReader::GetGlobalRowId(uint64_t batch_row_id) const { + return reader_->GetGlobalRowId(batch_row_id); } Result SharedShreddingFileReader::GetNumberOfRows() const { diff --git a/src/paimon/common/data/shredding/shared_shredding_file_reader.h b/src/paimon/common/data/shredding/shared_shredding_file_reader.h index c89e919e2..7998b7ab1 100644 --- a/src/paimon/common/data/shredding/shared_shredding_file_reader.h +++ b/src/paimon/common/data/shredding/shared_shredding_file_reader.h @@ -46,7 +46,7 @@ class SharedShreddingFileReader : public FileBatchReader { void Close() override; - Result GetPreviousBatchFirstRowNumber() const override; + Result GetGlobalRowId(uint64_t batch_row_id) const override; Result GetNumberOfRows() const override; diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index 7e2c9338a..66b75bb3e 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -80,8 +80,8 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { return Status::Invalid("ApplyBitmapIndexBatchReader does not support SetReadSchema"); } - Result GetPreviousBatchFirstRowNumber() const override { - return reader_->GetPreviousBatchFirstRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { @@ -95,11 +95,11 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { private: Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; - PAIMON_ASSIGN_OR_RAISE(int32_t start_pos, reader_->GetPreviousBatchFirstRowNumber()); - int32_t length = batch_size; - for (auto iter = bitmap_.EqualOrLarger(start_pos); - iter != bitmap_.End() && *iter < start_pos + length; ++iter) { - is_valid.Add(*iter - start_pos); + for (int32_t i = 0; i < batch_size; ++i) { + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetGlobalRowId(i)); + if (bitmap_.Contains(global_row_id)) { + is_valid.Add(i); + } } return is_valid; } diff --git a/src/paimon/common/reader/delegating_prefetch_reader.h b/src/paimon/common/reader/delegating_prefetch_reader.h index fe2e3eda2..de92431cc 100644 --- a/src/paimon/common/reader/delegating_prefetch_reader.h +++ b/src/paimon/common/reader/delegating_prefetch_reader.h @@ -54,8 +54,8 @@ class DelegatingPrefetchReader : public FileBatchReader { return prefetch_reader_->SetReadSchema(read_schema, predicate, selection_bitmap); } - Result GetPreviousBatchFirstRowNumber() const override { - return GetReader()->GetPreviousBatchFirstRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return GetReader()->GetGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp index da74f3484..cf842583c 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "arrow/array/array_base.h" @@ -40,6 +41,19 @@ class Schema; namespace paimon { +namespace { + +std::pair ComputeBatchSliceByReadRange( + const std::vector& global_row_ids, const std::pair& read_range) { + auto begin_it = + std::lower_bound(global_row_ids.begin(), global_row_ids.end(), read_range.first); + auto end_it = std::lower_bound(global_row_ids.begin(), global_row_ids.end(), read_range.second); + return {static_cast(std::distance(global_row_ids.begin(), begin_it)), + static_cast(std::distance(global_row_ids.begin(), end_it))}; +} + +} // namespace + Result> PrefetchFileBatchReaderImpl::Create( const std::string& data_file_path, const ReaderBuilder* reader_builder, const std::shared_ptr& fs, uint32_t prefetch_max_parallel_num, int32_t batch_size, @@ -265,6 +279,7 @@ Status PrefetchFileBatchReaderImpl::CleanUp() { read_ranges_.clear(); read_ranges_in_group_.clear(); + current_batch_global_row_ids_.clear(); clean_prefetch_queue(); for (size_t i = 0; i < readers_pos_.size(); i++) { readers_pos_[i]->store(0); @@ -409,42 +424,53 @@ Status PrefetchFileBatchReaderImpl::EnsureReaderPosition( Status PrefetchFileBatchReaderImpl::HandleReadResult( size_t reader_idx, const std::pair& read_range, ReadBatchWithBitmap&& read_batch_with_bitmap) { - PAIMON_ASSIGN_OR_RAISE(uint64_t first_row_number, - readers_[reader_idx]->GetPreviousBatchFirstRowNumber()); auto& prefetch_queue = prefetch_queues_[reader_idx]; if (!BatchReader::IsEofBatch(read_batch_with_bitmap)) { auto& [read_batch, bitmap] = read_batch_with_bitmap; auto& [c_array, c_schema] = read_batch; - - if (first_row_number >= read_range.second) { - // fully out of range, data before first_row_number has been filtered out - readers_pos_[reader_idx]->store(first_row_number); + std::vector global_row_ids; + global_row_ids.reserve(c_array->length); + for (int64_t i = 0; i < c_array->length; ++i) { + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, readers_[reader_idx]->GetGlobalRowId(i)); + global_row_ids.push_back(global_row_id); + } + if (global_row_ids.empty()) { ReaderUtils::ReleaseReadBatch(std::move(read_batch)); return Status::OK(); - } else if (first_row_number + c_array->length > read_range.second) { - // partially out of range, data before read_range.second has been effectively consumed + } + auto [slice_begin, slice_end] = ComputeBatchSliceByReadRange(global_row_ids, read_range); + if (slice_begin >= slice_end) { + readers_pos_[reader_idx]->store(read_range.second); + ReaderUtils::ReleaseReadBatch(std::move(read_batch)); + return Status::OK(); + } else if (slice_begin > 0 || slice_end < c_array->length) { readers_pos_[reader_idx]->store(read_range.second); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr src_array, arrow::ImportArray(c_array.get(), c_schema.get())); - int32_t target_length = read_range.second - first_row_number; - auto array = src_array->Slice(/*offset=*/0, target_length); + auto array = src_array->Slice(slice_begin, slice_end - slice_begin); PAIMON_RETURN_NOT_OK_FROM_ARROW( arrow::ExportArray(*array, c_array.get(), c_schema.get())); - bitmap.RemoveRange(target_length, src_array->length()); + RoaringBitmap32 sliced_bitmap; + for (auto iter = bitmap.EqualOrLarger(slice_begin); + iter != bitmap.End() && *iter < slice_end; ++iter) { + sliced_bitmap.Add(*iter - slice_begin); + } + bitmap = std::move(sliced_bitmap); + global_row_ids = std::vector(global_row_ids.begin() + slice_begin, + global_row_ids.begin() + slice_end); } else { - // all within the range, data before readers_[reader_idx]->GetNextRowToRead() has been - // effectively consumed readers_pos_[reader_idx]->store(readers_[reader_idx]->GetNextRowToRead()); } if (bitmap.IsEmpty()) { ReaderUtils::ReleaseReadBatch(std::move(read_batch)); return Status::OK(); } - prefetch_queue->push({read_range, std::move(read_batch_with_bitmap), first_row_number}); + prefetch_queue->push( + {read_range, std::move(read_batch_with_bitmap), std::move(global_row_ids)}); } else { std::pair eof_range; PAIMON_ASSIGN_OR_RAISE(eof_range, EofRange()); - prefetch_queue->push({eof_range, std::move(read_batch_with_bitmap), first_row_number}); + prefetch_queue->push({eof_range, std::move(read_batch_with_bitmap), {}}); readers_pos_[reader_idx]->store(std::numeric_limits::max()); } return Status::OK(); @@ -527,7 +553,7 @@ Result PrefetchFileBatchReaderImpl::NextBatchW std::unique_lock lock(working_mutex_); cv_.notify_one(); } - previous_batch_first_row_num_ = prefetch_batch.value().previous_batch_first_row_num; + current_batch_global_row_ids_ = std::move(prefetch_batch.value().global_row_ids); return std::move(prefetch_batch).value().batch; } } @@ -537,7 +563,7 @@ Result PrefetchFileBatchReaderImpl::NextBatchW assert(false); return Status::Invalid("peek batch not suppose to be nullptr"); } - previous_batch_first_row_num_ = peek_batch->previous_batch_first_row_num; + // current_batch_global_row_ids_.clear(); return BatchReader::MakeEofBatchWithBitmap(); } if (value_count == prefetch_queues_.size()) { @@ -571,8 +597,11 @@ Result> PrefetchFileBatchReaderImpl::GetFileSchem return readers_[0]->GetFileSchema(); } -Result PrefetchFileBatchReaderImpl::GetPreviousBatchFirstRowNumber() const { - return previous_batch_first_row_num_; +Result PrefetchFileBatchReaderImpl::GetGlobalRowId(uint64_t batch_row_id) const { + if (batch_row_id >= current_batch_global_row_ids_.size()) { + return std::numeric_limits::max(); + } + return current_batch_global_row_ids_[batch_row_id]; } Result PrefetchFileBatchReaderImpl::GetNumberOfRows() const { diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h index 5ed9fb352..4d99dc843 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h @@ -76,7 +76,7 @@ class PrefetchFileBatchReaderImpl : public PrefetchFileBatchReader { const std::optional& selection_bitmap) override; Status SeekToRow(uint64_t row_number) override; - Result GetPreviousBatchFirstRowNumber() const override; + Result GetGlobalRowId(uint64_t batch_row_id) const override; Result GetNumberOfRows() const override; uint64_t GetNextRowToRead() const override; void Close() override; @@ -105,7 +105,7 @@ class PrefetchFileBatchReaderImpl : public PrefetchFileBatchReader { struct PrefetchBatch { std::pair read_range; BatchReader::ReadBatchWithBitmap batch; - uint64_t previous_batch_first_row_num; + std::vector global_row_ids; }; PrefetchFileBatchReaderImpl( @@ -160,7 +160,7 @@ class PrefetchFileBatchReaderImpl : public PrefetchFileBatchReader { std::unique_ptr background_thread_; Status read_status_; std::atomic is_shutdown_ = false; - uint64_t previous_batch_first_row_num_ = std::numeric_limits::max(); + std::vector current_batch_global_row_ids_; bool need_prefetch_ = false; bool read_ranges_freshed_ = false; const uint32_t prefetch_queue_capacity_; diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index 7c1acef32..ef78927ca 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -283,11 +283,10 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestSimple) { /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 101); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); } @@ -605,11 +604,10 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestReadWithLargeBatchSize) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 101); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); } @@ -633,11 +631,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPartialReaderSuccessRead) { } arrow::ArrayVector result_array_vector; - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); auto& [batch, bitmap] = batch_with_bitmap; ASSERT_EQ(batch.first->length, bitmap.Cardinality()); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 0); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), 0); ASSERT_OK_AND_ASSIGN(auto array, ReadResultCollector::GetArray(std::move(batch))); result_array_vector.push_back(array); ASSERT_OK(prefetch_reader->GetReadStatus()); @@ -678,9 +676,9 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { ->SetNextBatchStatus(Status::IOError("mock error")); } - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); auto batch_result = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_FALSE(batch_result.ok()); ASSERT_TRUE(batch_result.status().IsIOError()); ASSERT_FALSE(prefetch_reader->is_shutdown_); @@ -689,7 +687,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { // call NextBatch again, will still return error status auto batch_result2 = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_FALSE(batch_result2.ok()); ASSERT_TRUE(batch_result2.status().IsIOError()); } @@ -706,11 +704,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPrefetchWithEmptyData) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 0); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_FALSE(result_array); } @@ -726,11 +724,10 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 10); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -832,11 +829,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestPrefetchWithPredicatePushdownWithCom PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 90); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), 80); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 30)); @@ -868,11 +865,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); ASSERT_OK(reader->RefreshReadRanges()); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 90); + ASSERT_EQ(reader->GetGlobalRowId(0).value(), 80); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 20)); diff --git a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h index 5e2ecf279..f6d824a76 100644 --- a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h +++ b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h @@ -82,8 +82,8 @@ class ApplyDeletionVectorBatchReader : public FileBatchReader { return Status::Invalid("ApplyDeletionVectorBatchReader does not support SetReadSchema"); } - Result GetPreviousBatchFirstRowNumber() const override { - return reader_->GetPreviousBatchFirstRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { @@ -96,9 +96,15 @@ class ApplyDeletionVectorBatchReader : public FileBatchReader { private: Result Filter(int32_t batch_size) const { - PAIMON_ASSIGN_OR_RAISE(uint64_t previous_batch_first_row_number, - reader_->GetPreviousBatchFirstRowNumber()); - return deletion_vector_->IsValid(previous_batch_first_row_number, batch_size); + RoaringBitmap32 is_valid; + for (int32_t i = 0; i < batch_size; ++i) { + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetGlobalRowId(i)); + PAIMON_ASSIGN_OR_RAISE(bool is_deleted, deletion_vector_->IsDeleted(global_row_id)); + if (!is_deleted) { + is_valid.Add(i); + } + } + return is_valid; } private: diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp index 2aef9b29f..8ed53615a 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp @@ -86,15 +86,13 @@ CompleteRowTrackingFieldsBatchReader::NextBatchWithBitmap() { std::string row_id_field_name = SpecialFields::RowId().Name(); if (read_schema_->GetFieldIndex(row_id_field_name) != -1) { row_id_array = src_struct_array->GetFieldByName(row_id_field_name); - PAIMON_ASSIGN_OR_RAISE(uint64_t previous_batch_first_row_number, - reader_->GetPreviousBatchFirstRowNumber()); - auto row_id_convert_func = [previous_batch_first_row_number, - this](int32_t idx_in_array) -> Result { + auto row_id_convert_func = [this](int32_t idx_in_array) -> Result { if (first_row_id_ == std::nullopt) { return Status::Invalid( "unexpected: read _ROW_ID special field, but first row id is null in meta"); } - return first_row_id_.value() + previous_batch_first_row_number + idx_in_array; + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetGlobalRowId(idx_in_array)); + return first_row_id_.value() + global_row_id; }; PAIMON_RETURN_NOT_OK(ConvertRowTrackingField(src_struct_array->length(), /*init_value=*/0, row_id_convert_func, &row_id_array)); diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.h b/src/paimon/core/io/complete_row_tracking_fields_reader.h index cc2f9f7bf..6f49fea90 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.h +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.h @@ -60,8 +60,8 @@ class CompleteRowTrackingFieldsBatchReader : public FileBatchReader { reader_->Close(); } - Result GetPreviousBatchFirstRowNumber() const override { - return reader_->GetPreviousBatchFirstRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/core/io/field_mapping_reader.h b/src/paimon/core/io/field_mapping_reader.h index da1897814..39c02dcaf 100644 --- a/src/paimon/core/io/field_mapping_reader.h +++ b/src/paimon/core/io/field_mapping_reader.h @@ -74,8 +74,8 @@ class FieldMappingReader : public FileBatchReader { return Status::Invalid("FieldMappingReader does not support SetReadSchema"); } - Result GetPreviousBatchFirstRowNumber() const override { - return reader_->GetPreviousBatchFirstRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/core/io/key_value_data_file_record_reader.cpp b/src/paimon/core/io/key_value_data_file_record_reader.cpp index a4edd04e0..ea5deb5dd 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader.cpp +++ b/src/paimon/core/io/key_value_data_file_record_reader.cpp @@ -81,15 +81,14 @@ Result KeyValueDataFileRecordReader::Iterator::Next() { Result> KeyValueDataFileRecordReader::Iterator::NextWithFilePos() { PAIMON_ASSIGN_OR_RAISE(KeyValue kv, Next()); - return std::make_pair(previous_batch_first_row_number_ + cursor_ - 1, std::move(kv)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->reader_->GetGlobalRowId(cursor_ - 1)); + return std::make_pair(static_cast(global_row_id), std::move(kv)); } Result> KeyValueDataFileRecordReader::NextBatch() { Reset(); PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatchWithBitmap batch_with_bitmap, reader_->NextBatchWithBitmap()); - PAIMON_ASSIGN_OR_RAISE(int64_t previous_batch_first_row_number, - reader_->GetPreviousBatchFirstRowNumber()); if (BatchReader::IsEofBatch(batch_with_bitmap)) { // reader eof, just return return std::unique_ptr(); @@ -140,8 +139,7 @@ Result> KeyValueDataFileRecordRe key_ctx_ = std::make_shared(key_fields, pool_); value_ctx_ = std::make_shared(value_fields, pool_); ArrowUtils::TraverseArray(data_batch); - return std::make_unique( - this, previous_batch_first_row_number); + return std::make_unique(this); } void KeyValueDataFileRecordReader::Reset() { diff --git a/src/paimon/core/io/key_value_data_file_record_reader.h b/src/paimon/core/io/key_value_data_file_record_reader.h index c271a3bdb..3fe87a89e 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader.h +++ b/src/paimon/core/io/key_value_data_file_record_reader.h @@ -54,16 +54,13 @@ class KeyValueDataFileRecordReader : public KeyValueRecordReader { class Iterator : public KeyValueRecordReader::Iterator { public: - Iterator(KeyValueDataFileRecordReader* reader, int64_t previous_batch_first_row_number) - : previous_batch_first_row_number_(previous_batch_first_row_number), - reader_(reader), - selection_cardinality_(reader->selection_bitmap_.Cardinality()) {} + explicit Iterator(KeyValueDataFileRecordReader* reader) + : reader_(reader), selection_cardinality_(reader->selection_bitmap_.Cardinality()) {} Result HasNext() const override; Result Next() override; Result> NextWithFilePos(); private: - int64_t previous_batch_first_row_number_; mutable int64_t cursor_ = 0; KeyValueDataFileRecordReader* reader_ = nullptr; int64_t selection_cardinality_ = 0; diff --git a/src/paimon/format/avro/avro_file_batch_reader.h b/src/paimon/format/avro/avro_file_batch_reader.h index 98d5deede..355e849a6 100644 --- a/src/paimon/format/avro/avro_file_batch_reader.h +++ b/src/paimon/format/avro/avro_file_batch_reader.h @@ -45,8 +45,8 @@ class AvroFileBatchReader : public FileBatchReader { Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, const std::optional& selection_bitmap) override; - Result GetPreviousBatchFirstRowNumber() const override { - return previous_first_row_; + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return previous_first_row_ + batch_row_id; } Result GetNumberOfRows() const override; diff --git a/src/paimon/format/avro/avro_file_batch_reader_test.cpp b/src/paimon/format/avro/avro_file_batch_reader_test.cpp index f4f052a34..bd2b1e1fd 100644 --- a/src/paimon/format/avro/avro_file_batch_reader_test.cpp +++ b/src/paimon/format/avro/avro_file_batch_reader_test.cpp @@ -327,7 +327,7 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaRejectNestedSubFieldProjection) "does not support nested sub-field projection"); } -TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchFirstRowNumber) { +TEST_F(AvroFileBatchReaderTest, TestGetGlobalRowId) { std::string path = paimon::test::GetDataDir() + "/avro/append_simple.db/" "append_simple/bucket-0/" @@ -352,26 +352,25 @@ TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchFirstRowNumber) { ASSERT_OK_AND_ASSIGN(auto num_rows, reader->GetNumberOfRows()); ASSERT_EQ(4, num_rows); - ASSERT_EQ(std::numeric_limits::max(), - reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(1, reader->GetGlobalRowId(0).value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(2, reader->GetGlobalRowId(0).value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(3, reader->GetGlobalRowId(0).value()); ArrowArrayRelease(batch4.first.get()); ArrowSchemaRelease(batch4.second.get()); ASSERT_OK_AND_ASSIGN(auto batch5, reader->NextBatch()); - ASSERT_EQ(4, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(4, reader->GetGlobalRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch5)); } @@ -397,7 +396,7 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaResetsReaderToFirstRow) { ASSERT_OK_AND_ASSIGN(auto reader, reader_builder->Build(in)); ASSERT_OK_AND_ASSIGN(auto first_batch, reader->NextBatch()); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); auto first_array = arrow::ImportArray(first_batch.first.get(), first_batch.second.get()).ValueOrDie(); ASSERT_TRUE(first_array->Equals(src_array->Slice(0, 2))) << first_array->ToString(); @@ -407,11 +406,10 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaResetsReaderToFirstRow) { ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); ASSERT_OK(reader->SetReadSchema(c_schema.get(), /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt)); - ASSERT_EQ(std::numeric_limits::max(), - reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto projected_batch, reader->NextBatch()); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); auto projected_array = arrow::ImportArray(projected_batch.first.get(), projected_batch.second.get()).ValueOrDie(); auto expected_projected_array = arrow::ipc::internal::json::ArrayFromJSON( diff --git a/src/paimon/format/blob/blob_file_batch_reader.h b/src/paimon/format/blob/blob_file_batch_reader.h index 06287d759..0c04b43f3 100644 --- a/src/paimon/format/blob/blob_file_batch_reader.h +++ b/src/paimon/format/blob/blob_file_batch_reader.h @@ -97,14 +97,14 @@ class BlobFileBatchReader : public FileBatchReader { Result NextBatch() override; - Result GetPreviousBatchFirstRowNumber() const override { + Result GetGlobalRowId(uint64_t batch_row_id) const override { if (all_blob_lengths_.size() != target_blob_lengths_.size()) { return Status::Invalid( - "Cannot call GetPreviousBatchFirstRowNumber in BlobFileBatchReader because, after " + "Cannot call GetGlobalRowId in BlobFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } - return previous_batch_first_row_number_; + return previous_batch_first_row_number_ + batch_row_id; } Result GetNumberOfRows() const override { diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index bde27d64d..f3687eb5e 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -169,22 +169,21 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(3, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), - reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(1, reader->GetGlobalRowId(0).value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(2, reader->GetGlobalRowId(0).value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(3, reader->GetGlobalRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); } @@ -255,8 +254,7 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(0, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), - reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch)); } diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index fb2628035..361561411 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -41,15 +41,15 @@ class LanceFileBatchReader : public FileBatchReader { Result NextBatch() override; - Result GetPreviousBatchFirstRowNumber() const override { + Result GetGlobalRowId(uint64_t batch_row_id) const override { if (!read_row_ids_.empty() && read_row_ids_.size() != num_rows_) { // TODO(xinyu.lxy): support function return Status::Invalid( - "Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after " + "Cannot call GetGlobalRowId in LanceFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } - return previous_batch_first_row_num_; + return previous_batch_first_row_num_ + batch_row_id; } Result GetNumberOfRows() const override { diff --git a/src/paimon/format/lance/lance_format_reader_writer_test.cpp b/src/paimon/format/lance/lance_format_reader_writer_test.cpp index b1ad6be73..eab303905 100644 --- a/src/paimon/format/lance/lance_format_reader_writer_test.cpp +++ b/src/paimon/format/lance/lance_format_reader_writer_test.cpp @@ -478,27 +478,26 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK_AND_ASSIGN( std::unique_ptr reader, LanceFileBatchReader::Create(file_path, /*batch_size=*/4, /*batch_readahead=*/2)); - ASSERT_EQ(std::numeric_limits::max(), - reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); // first batch row 0-3 ASSERT_OK_AND_ASSIGN(auto read_batch, reader->NextBatch()); ASSERT_OK_AND_ASSIGN(auto read_array, paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); ASSERT_TRUE(read_array->Equals(array->Slice(0, 4))); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); // second batch 4-5 ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); ASSERT_OK_AND_ASSIGN(read_array, paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); ASSERT_TRUE(read_array->Equals(array->Slice(4, 2))); - ASSERT_EQ(4, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(4, reader->GetGlobalRowId(0).value()); // eof ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(read_batch)); - ASSERT_EQ(6, reader->GetPreviousBatchFirstRowNumber().value()); + ASSERT_EQ(6, reader->GetGlobalRowId(0).value()); // test with bitmap pushdown ArrowSchema c_read_schema; @@ -506,8 +505,8 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK(reader->SetReadSchema(&c_read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/RoaringBitmap32::From({0, 3}))); ASSERT_NOK_WITH_MSG( - reader->GetPreviousBatchFirstRowNumber(), - "Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after bitmap " + reader->GetGlobalRowId(0), + "Cannot call GetGlobalRowId in LanceFileBatchReader because, after bitmap " "pushdown, rows in the array returned by NextBatch are no longer contiguous."); } } // namespace paimon::lance::test diff --git a/src/paimon/format/orc/orc_file_batch_reader.h b/src/paimon/format/orc/orc_file_batch_reader.h index c2460f3a7..0e3fade23 100644 --- a/src/paimon/format/orc/orc_file_batch_reader.h +++ b/src/paimon/format/orc/orc_file_batch_reader.h @@ -62,8 +62,8 @@ class OrcFileBatchReader : public PrefetchFileBatchReader { // OrcFileBatchReader. Therefore, we need to hold BatchReader when using output ArrowArray. Result NextBatch() override; - Result GetPreviousBatchFirstRowNumber() const override { - return reader_->GetRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetRowNumber() + batch_row_id; } Result GetNumberOfRows() const override { diff --git a/src/paimon/format/orc/orc_file_batch_reader_test.cpp b/src/paimon/format/orc/orc_file_batch_reader_test.cpp index de80861b6..57665fa0d 100644 --- a/src/paimon/format/orc/orc_file_batch_reader_test.cpp +++ b/src/paimon/format/orc/orc_file_batch_reader_test.cpp @@ -492,10 +492,10 @@ TEST_P(OrcFileBatchReaderTest, TestNextBatchSimple) { for (auto batch_size : {1, 2, 3, 5, 8, 10}) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, batch_size, natural_read_size); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( orc_batch_reader.get())); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 8); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 8); orc_batch_reader->Close(); auto expected_array = std::make_shared(struct_array_); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -766,18 +766,18 @@ TEST_F(OrcFileBatchReaderTest, TestReadNoField) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, /*batch_size=*/3, /*natural_read_size=*/10); // read 3 rows - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), -1); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), -1); ASSERT_OK_AND_ASSIGN(auto batch1, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 0); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 0); // read 3 rows ASSERT_OK_AND_ASSIGN(auto batch2, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 3); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 3); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 6); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 6); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 8); + ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 8); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); orc_batch_reader->Close(); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 5e6893c39..16c79dd0e 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -39,6 +39,7 @@ #include "paimon/common/metrics/metrics_impl.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/options_utils.h" +#include "paimon/common/utils/scope_guard.h" #include "paimon/common/utils/string_utils.h" #include "paimon/core/schema/arrow_schema_validator.h" #include "paimon/core/utils/nested_projection_utils.h" @@ -213,11 +214,13 @@ Status ParquetFileBatchReader::SetReadSchema( target_row_groups.emplace_back(/*rg_index=*/rg_id, /*is_partially_matched=*/true, /*ranges=*/it->second); } else { - target_row_groups.emplace_back(/*rg_index=*/rg_id, - /*is_partially_matched=*/false, - /*ranges=*/RowRanges()); + target_row_groups.emplace_back( + /*rg_index=*/rg_id, + /*is_partially_matched=*/false, + /*ranges=*/RowRanges(Range(0, reader_->GetAllRowGroupRanges()[rg_id].second))); } } + PAIMON_ASSIGN_OR_RAISE(all_row_ranges_, GetAllTargetRowRanges(target_row_groups)); PAIMON_RETURN_NOT_OK(reader_->PrepareForReadingLazy(target_row_groups, column_indices)); } PAIMON_PARQUET_CATCH_AND_RETURN_STATUS("ParquetFileBatchReader::SetReadSchema") @@ -361,6 +364,7 @@ Result ParquetFileBatchReader::NextBatch() { "equal with read schema {}", array->type()->ToString(), read_data_type_->ToString())); } + PAIMON_RETURN_NOT_OK(GenerateRowMapping(array->length())); std::unique_ptr c_array = std::make_unique(); std::unique_ptr c_schema = std::make_unique(); PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*array, c_array.get(), c_schema.get())); @@ -526,4 +530,52 @@ Result> ParquetFileBatchReader::ComputeNestedColumnIndices( return indices; } +Result ParquetFileBatchReader::GetAllTargetRowRanges( + const std::vector& target_row_groups) const { + auto all_row_group_ranges = reader_->GetAllRowGroupRanges(); + RowRanges all_ranges; + for (const auto& target_row_group : target_row_groups) { + for (const auto& range : target_row_group.row_ranges.GetRanges()) { + all_ranges.Add( + Range(range.from + all_row_group_ranges[target_row_group.row_group_index].first, + range.to + all_row_group_ranges[target_row_group.row_group_index].first)); + } + } + return all_ranges; +} + +Status ParquetFileBatchReader::GenerateRowMapping(int64_t batch_length) { + const std::vector& all_ranges = all_row_ranges_.GetRanges(); + PAIMON_ASSIGN_OR_RAISE(int64_t batch_start_row, reader_->GetPreviousBatchFirstRowNumber()); + + auto cur_range_it = + std::upper_bound(all_ranges.begin(), all_ranges.end(), batch_start_row, + [](int64_t value, const Range& r) { return value < r.from; }); + if (cur_range_it == all_ranges.begin()) { + return Status::Invalid("No range found!"); + } + --cur_range_it; + if (batch_start_row < cur_range_it->from || batch_start_row > cur_range_it->to) { + return Status::Invalid( + fmt::format("Batch start row {} is not in the current range [{}, {}]!", batch_start_row, + cur_range_it->from, cur_range_it->to)); + } + + std::vector row_mapping; + row_mapping.reserve(batch_length); + int64_t global_row = batch_start_row; + for (int64_t i = 0; i < batch_length; ++i) { + if (global_row > cur_range_it->to) { + ++cur_range_it; + if (cur_range_it == all_ranges.end()) { + return Status::Invalid("Batch length exceeds the total row ranges!"); + } + global_row = cur_range_it->from; + } + row_mapping.push_back(global_row); + global_row++; + } + row_mapping_ = std::move(row_mapping); + return Status::OK(); +} } // namespace paimon::parquet diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 4bd684e8f..c89669632 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include @@ -94,9 +96,11 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { Result>> GenReadRanges( bool* need_prefetch) const override; - Result GetPreviousBatchFirstRowNumber() const override { - assert(reader_); - return reader_->GetPreviousBatchFirstRowNumber(); + Result GetGlobalRowId(uint64_t batch_row_id) const override { + if (batch_row_id >= row_mapping_.size()) { + return std::numeric_limits::max(); + } + return row_mapping_[batch_row_id]; } Result GetNumberOfRows() const override { @@ -173,6 +177,9 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { const std::shared_ptr& read_schema, const std::shared_ptr& file_schema); + Result GetAllTargetRowRanges( + const std::vector& target_row_groups) const; + // precondition: predicate supposed not be empty Result> FilterRowGroupsByPredicate( const std::shared_ptr& predicate, @@ -189,6 +196,8 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { const std::map& column_name_to_index, const std::vector& src_row_groups); + Status GenerateRowMapping(int64_t batch_length); + private: std::map options_; // hold the lifecycle of arrow memory pool. @@ -204,6 +213,9 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { uint64_t read_rows_ = 0; uint64_t read_batch_count_ = 0; + + RowRanges all_row_ranges_; + std::vector row_mapping_; }; } // namespace paimon::parquet diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index ec353f071..4699fdff0 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -168,12 +168,15 @@ class ParquetFileBatchReaderTest : public ::testing::Test, void WriteArray(const std::string& file_path, const std::shared_ptr& src_array, const std::shared_ptr& arrow_schema, int64_t write_batch_size, - bool enable_dictionary, int64_t max_row_group_length) const { + bool enable_dictionary, int64_t max_row_group_length, + int64_t max_page_size = 1024 * 1024 * 1024) const { ASSERT_OK_AND_ASSIGN(std::shared_ptr out, fs_->Create(file_path, /*overwrite=*/true)); ::parquet::WriterProperties::Builder builder; builder.write_batch_size(write_batch_size); builder.max_row_group_length(max_row_group_length); + builder.data_pagesize(max_page_size); + builder.enable_write_page_index(); enable_dictionary ? builder.enable_dictionary() : builder.disable_dictionary(); auto writer_properties = builder.build(); ASSERT_OK_AND_ASSIGN(auto format_writer, ParquetFormatWriter::Create( @@ -229,6 +232,31 @@ class ParquetFileBatchReaderTest : public ::testing::Test, std::shared_ptr struct_array_; }; +static std::shared_ptr MakeSequentialIntData(int32_t num_rows) { + arrow::Int32Builder val_builder; + EXPECT_TRUE(val_builder.Reserve(num_rows).ok()); + for (int32_t i = 0; i < num_rows; ++i) { + val_builder.UnsafeAppend(i); + } + auto val_array = val_builder.Finish().ValueOrDie(); + auto field = arrow::field("f0", arrow::int32()); + return arrow::StructArray::Make({val_array}, {field}).ValueOrDie(); +} + +static Result> CollectOneBatch(ParquetFileBatchReader* reader) { + PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatch batch, reader->NextBatch()); + if (BatchReader::IsEofBatch(batch)) { + return std::shared_ptr(nullptr); + } + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr result_array, + arrow::ImportArray(batch.first.get(), batch.second.get())); + auto struct_array = std::dynamic_pointer_cast(result_array); + if (!struct_array) { + return Status::Invalid("CollectOneBatch expected StructArray"); + } + return struct_array; +} + TEST_F(ParquetFileBatchReaderTest, TestParquetMetadataCacheReusesSerializedFooter) { WriteArray(file_path_, struct_array_, schema_, /*write_batch_size=*/struct_array_->length(), /*enable_dictionary=*/false, @@ -447,11 +475,10 @@ TEST_F(ParquetFileBatchReaderTest, TestNextBatchSimple) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, schema_, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, batch_size); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( parquet_batch_reader.get())); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 6); parquet_batch_reader->Close(); auto expected_array = std::make_shared(struct_array_); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -812,19 +839,19 @@ TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { PrepareParquetFileBatchReader(file_name, read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, /*batch_size=*/2); // read 2 rows - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto batch1, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 0); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 0); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch2, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 2); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 2); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 4); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 4); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 6); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 4); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); parquet_batch_reader->Close(); @@ -1013,4 +1040,60 @@ TEST_F(ParquetFileBatchReaderTest, TestAddMetadataPerFieldMetadata) { ASSERT_TRUE(data->Equals(*result_array->chunk(0))) << result_array->ToString(); } +TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { + arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; + auto src_array = MakeSequentialIntData(12); + // data in file rowGroup0:[0, 1, 2, 3, 4, 5] | rowGroup1:[6, 7, 8, 9, 10, 11] + // one row per page + auto arrow_schema = arrow::schema(fields); + WriteArray(file_path_, src_array, arrow_schema, /*write_batch_size=*/1, + /*enable_dictionary=*/true, /*max_row_group_length=*/12, /*max_page_size=*/1); + + // 1<=f0<=3 || 5<=f0<=6 + ASSERT_OK_AND_ASSIGN( + auto predicate, + PredicateBuilder::Or({PredicateBuilder::Between(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(1), Literal(3)), + PredicateBuilder::Between(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(5), Literal(6))})); + + auto parquet_batch_reader = PrepareParquetFileBatchReader( + file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2); + + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), + std::numeric_limits::max()); + + ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); + auto expected_batch1 = src_array->Slice(1, 2); + ASSERT_TRUE(batch1->Equals(expected_batch1)) << batch1->ToString(); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 1); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(1).value(), 2); + + // Not adjacent pages + ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); + auto expected_batch2 = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ +[3], +[5] + ])") + .ValueOrDie()); + ASSERT_TRUE(batch2->Equals(expected_batch2)) << batch2->ToString(); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 3); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(1).value(), 5); + + // Only one record read + ASSERT_OK_AND_ASSIGN(auto batch3, CollectOneBatch(parquet_batch_reader.get())); + auto expected_batch3 = src_array->Slice(6, 1); + ASSERT_TRUE(batch3->Equals(expected_batch3)) << batch3->ToString(); + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 6); + // out of bound, return max value + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(1).value(), + std::numeric_limits::max()); + + ASSERT_OK_AND_ASSIGN(auto eof_batch, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_EQ(nullptr, eof_batch); + // previous batch is eof, return last none-eof batch's row id + ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 6); +} + } // namespace paimon::parquet::test diff --git a/src/paimon/format/parquet/row_ranges.cpp b/src/paimon/format/parquet/row_ranges.cpp index 1b03715be..6e20780b8 100644 --- a/src/paimon/format/parquet/row_ranges.cpp +++ b/src/paimon/format/parquet/row_ranges.cpp @@ -104,6 +104,12 @@ void RowRanges::Add(const Range& range) { ranges_.insert(it, merged); } +void RowRanges::Union(const RowRanges& other) { + for (const auto& range : other.ranges_) { + Add(range); + } +} + std::optional RowRanges::MapFilteredIndexToOriginalRow(int64_t filtered_index) const { int64_t accumulated = 0; for (const auto& range : ranges_) { diff --git a/src/paimon/format/parquet/row_ranges.h b/src/paimon/format/parquet/row_ranges.h index 46c3f4d21..f6b41178f 100644 --- a/src/paimon/format/parquet/row_ranges.h +++ b/src/paimon/format/parquet/row_ranges.h @@ -92,6 +92,8 @@ class RowRanges { /// Adds a range to the end of the list, maintaining sorted disjoint ranges. void Add(const Range& range); + void Union(const RowRanges& other); + /// Maps a filtered-result index to the original row index within the row group. /// For example, if RowRanges = {[10,19], [50,59]}, then: /// MapFilteredIndexToOriginalRow(0) = 10 (first row of first range) diff --git a/src/paimon/testing/mock/mock_file_batch_reader.h b/src/paimon/testing/mock/mock_file_batch_reader.h index eb2bc1b59..6eda3b005 100644 --- a/src/paimon/testing/mock/mock_file_batch_reader.h +++ b/src/paimon/testing/mock/mock_file_batch_reader.h @@ -149,8 +149,8 @@ class MockFileBatchReader : public PrefetchFileBatchReader { return metrics; } - Result GetPreviousBatchFirstRowNumber() const override { - return previous_batch_first_row_num_; + Result GetGlobalRowId(uint64_t batch_row_id) const override { + return previous_batch_first_row_num_ + batch_row_id; } Result GetNumberOfRows() const override { From ad034986b2fcc665ca9142fc9b628d73deb000ec Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 10:44:26 +0800 Subject: [PATCH 17/38] style: change interface name --- .../shared_shredding_file_reader.cpp | 5 ++-- .../shredding/shared_shredding_file_reader.h | 2 +- .../bitmap/apply_bitmap_index_batch_reader.h | 6 ++-- .../reader/delegating_prefetch_reader.h | 4 +-- .../prefetch_file_batch_reader_impl.cpp | 6 ++-- .../reader/prefetch_file_batch_reader_impl.h | 2 +- .../prefetch_file_batch_reader_impl_test.cpp | 29 ++++++++++--------- .../apply_deletion_vector_batch_reader.h | 6 ++-- .../complete_row_tracking_fields_reader.cpp | 3 +- .../io/complete_row_tracking_fields_reader.h | 4 +-- src/paimon/core/io/field_mapping_reader.h | 4 +-- .../io/key_value_data_file_record_reader.cpp | 3 +- .../format/avro/avro_file_batch_reader.h | 2 +- .../avro/avro_file_batch_reader_test.cpp | 20 ++++++------- .../format/blob/blob_file_batch_reader.h | 4 +-- .../blob/blob_file_batch_reader_test.cpp | 12 ++++---- .../format/lance/lance_file_batch_reader.h | 4 +-- .../lance/lance_format_reader_writer_test.cpp | 12 ++++---- src/paimon/format/orc/orc_file_batch_reader.h | 2 +- .../format/orc/orc_file_batch_reader_test.cpp | 14 ++++----- .../parquet/parquet_file_batch_reader.h | 2 +- .../parquet_file_batch_reader_test.cpp | 28 +++++++++--------- .../testing/mock/mock_file_batch_reader.h | 2 +- 23 files changed, 91 insertions(+), 85 deletions(-) diff --git a/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp b/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp index c9c249caf..00fe9bd27 100644 --- a/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp +++ b/src/paimon/common/data/shredding/shared_shredding_file_reader.cpp @@ -469,8 +469,9 @@ void SharedShreddingFileReader::Close() { reader_->Close(); } -Result SharedShreddingFileReader::GetGlobalRowId(uint64_t batch_row_id) const { - return reader_->GetGlobalRowId(batch_row_id); +Result SharedShreddingFileReader::GetPreviousBatchGlobalRowId( + uint64_t batch_row_id) const { + return reader_->GetPreviousBatchGlobalRowId(batch_row_id); } Result SharedShreddingFileReader::GetNumberOfRows() const { diff --git a/src/paimon/common/data/shredding/shared_shredding_file_reader.h b/src/paimon/common/data/shredding/shared_shredding_file_reader.h index 7998b7ab1..5003874aa 100644 --- a/src/paimon/common/data/shredding/shared_shredding_file_reader.h +++ b/src/paimon/common/data/shredding/shared_shredding_file_reader.h @@ -46,7 +46,7 @@ class SharedShreddingFileReader : public FileBatchReader { void Close() override; - Result GetGlobalRowId(uint64_t batch_row_id) const override; + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override; Result GetNumberOfRows() const override; diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index 66b75bb3e..8213c4f2a 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -80,8 +80,8 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { return Status::Invalid("ApplyBitmapIndexBatchReader does not support SetReadSchema"); } - Result GetGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetGlobalRowId(batch_row_id); + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { @@ -96,7 +96,7 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; for (int32_t i = 0; i < batch_size; ++i) { - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetGlobalRowId(i)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetPreviousBatchGlobalRowId(i)); if (bitmap_.Contains(global_row_id)) { is_valid.Add(i); } diff --git a/src/paimon/common/reader/delegating_prefetch_reader.h b/src/paimon/common/reader/delegating_prefetch_reader.h index de92431cc..64d57a155 100644 --- a/src/paimon/common/reader/delegating_prefetch_reader.h +++ b/src/paimon/common/reader/delegating_prefetch_reader.h @@ -54,8 +54,8 @@ class DelegatingPrefetchReader : public FileBatchReader { return prefetch_reader_->SetReadSchema(read_schema, predicate, selection_bitmap); } - Result GetGlobalRowId(uint64_t batch_row_id) const override { - return GetReader()->GetGlobalRowId(batch_row_id); + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + return GetReader()->GetPreviousBatchGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp index cf842583c..b779d75c5 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp @@ -431,7 +431,8 @@ Status PrefetchFileBatchReaderImpl::HandleReadResult( std::vector global_row_ids; global_row_ids.reserve(c_array->length); for (int64_t i = 0; i < c_array->length; ++i) { - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, readers_[reader_idx]->GetGlobalRowId(i)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, + readers_[reader_idx]->GetPreviousBatchGlobalRowId(i)); global_row_ids.push_back(global_row_id); } if (global_row_ids.empty()) { @@ -597,7 +598,8 @@ Result> PrefetchFileBatchReaderImpl::GetFileSchem return readers_[0]->GetFileSchema(); } -Result PrefetchFileBatchReaderImpl::GetGlobalRowId(uint64_t batch_row_id) const { +Result PrefetchFileBatchReaderImpl::GetPreviousBatchGlobalRowId( + uint64_t batch_row_id) const { if (batch_row_id >= current_batch_global_row_ids_.size()) { return std::numeric_limits::max(); } diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h index 4d99dc843..908e06b85 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h @@ -76,7 +76,7 @@ class PrefetchFileBatchReaderImpl : public PrefetchFileBatchReader { const std::optional& selection_bitmap) override; Status SeekToRow(uint64_t row_number) override; - Result GetGlobalRowId(uint64_t batch_row_id) const override; + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override; Result GetNumberOfRows() const override; uint64_t GetNextRowToRead() const override; void Close() override; diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index ef78927ca..cac4905d0 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -283,7 +283,8 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestSimple) { /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), + std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -604,7 +605,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestReadWithLargeBatchSize) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -631,11 +632,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPartialReaderSuccessRead) { } arrow::ArrayVector result_array_vector; - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); auto& [batch, bitmap] = batch_with_bitmap; ASSERT_EQ(batch.first->length, bitmap.Cardinality()); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), 0); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), 0); ASSERT_OK_AND_ASSIGN(auto array, ReadResultCollector::GetArray(std::move(batch))); result_array_vector.push_back(array); ASSERT_OK(prefetch_reader->GetReadStatus()); @@ -676,9 +677,9 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { ->SetNextBatchStatus(Status::IOError("mock error")); } - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); auto batch_result = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_FALSE(batch_result.ok()); ASSERT_TRUE(batch_result.status().IsIOError()); ASSERT_FALSE(prefetch_reader->is_shutdown_); @@ -687,7 +688,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { // call NextBatch again, will still return error status auto batch_result2 = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_FALSE(batch_result2.ok()); ASSERT_TRUE(batch_result2.status().IsIOError()); } @@ -704,11 +705,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPrefetchWithEmptyData) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_FALSE(result_array); } @@ -724,7 +725,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -829,11 +830,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestPrefetchWithPredicatePushdownWithCom PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), 80); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), 80); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 30)); @@ -865,11 +866,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); ASSERT_OK(reader->RefreshReadRanges()); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetGlobalRowId(0).value(), 80); + ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), 80); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 20)); diff --git a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h index f6d824a76..cb2867075 100644 --- a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h +++ b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h @@ -82,8 +82,8 @@ class ApplyDeletionVectorBatchReader : public FileBatchReader { return Status::Invalid("ApplyDeletionVectorBatchReader does not support SetReadSchema"); } - Result GetGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetGlobalRowId(batch_row_id); + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { @@ -98,7 +98,7 @@ class ApplyDeletionVectorBatchReader : public FileBatchReader { Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; for (int32_t i = 0; i < batch_size; ++i) { - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetGlobalRowId(i)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetPreviousBatchGlobalRowId(i)); PAIMON_ASSIGN_OR_RAISE(bool is_deleted, deletion_vector_->IsDeleted(global_row_id)); if (!is_deleted) { is_valid.Add(i); diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp index 8ed53615a..54e55b380 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp @@ -91,7 +91,8 @@ CompleteRowTrackingFieldsBatchReader::NextBatchWithBitmap() { return Status::Invalid( "unexpected: read _ROW_ID special field, but first row id is null in meta"); } - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetGlobalRowId(idx_in_array)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, + reader_->GetPreviousBatchGlobalRowId(idx_in_array)); return first_row_id_.value() + global_row_id; }; PAIMON_RETURN_NOT_OK(ConvertRowTrackingField(src_struct_array->length(), /*init_value=*/0, diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.h b/src/paimon/core/io/complete_row_tracking_fields_reader.h index 6f49fea90..b3a42cb69 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.h +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.h @@ -60,8 +60,8 @@ class CompleteRowTrackingFieldsBatchReader : public FileBatchReader { reader_->Close(); } - Result GetGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetGlobalRowId(batch_row_id); + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/core/io/field_mapping_reader.h b/src/paimon/core/io/field_mapping_reader.h index 39c02dcaf..5a75ba440 100644 --- a/src/paimon/core/io/field_mapping_reader.h +++ b/src/paimon/core/io/field_mapping_reader.h @@ -74,8 +74,8 @@ class FieldMappingReader : public FileBatchReader { return Status::Invalid("FieldMappingReader does not support SetReadSchema"); } - Result GetGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetGlobalRowId(batch_row_id); + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchGlobalRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/core/io/key_value_data_file_record_reader.cpp b/src/paimon/core/io/key_value_data_file_record_reader.cpp index ea5deb5dd..9ccc71486 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader.cpp +++ b/src/paimon/core/io/key_value_data_file_record_reader.cpp @@ -81,7 +81,8 @@ Result KeyValueDataFileRecordReader::Iterator::Next() { Result> KeyValueDataFileRecordReader::Iterator::NextWithFilePos() { PAIMON_ASSIGN_OR_RAISE(KeyValue kv, Next()); - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->reader_->GetGlobalRowId(cursor_ - 1)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, + reader_->reader_->GetPreviousBatchGlobalRowId(cursor_ - 1)); return std::make_pair(static_cast(global_row_id), std::move(kv)); } diff --git a/src/paimon/format/avro/avro_file_batch_reader.h b/src/paimon/format/avro/avro_file_batch_reader.h index 355e849a6..463264f39 100644 --- a/src/paimon/format/avro/avro_file_batch_reader.h +++ b/src/paimon/format/avro/avro_file_batch_reader.h @@ -45,7 +45,7 @@ class AvroFileBatchReader : public FileBatchReader { Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, const std::optional& selection_bitmap) override; - Result GetGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { return previous_first_row_ + batch_row_id; } diff --git a/src/paimon/format/avro/avro_file_batch_reader_test.cpp b/src/paimon/format/avro/avro_file_batch_reader_test.cpp index bd2b1e1fd..c3970bf33 100644 --- a/src/paimon/format/avro/avro_file_batch_reader_test.cpp +++ b/src/paimon/format/avro/avro_file_batch_reader_test.cpp @@ -327,7 +327,7 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaRejectNestedSubFieldProjection) "does not support nested sub-field projection"); } -TEST_F(AvroFileBatchReaderTest, TestGetGlobalRowId) { +TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchGlobalRowId) { std::string path = paimon::test::GetDataDir() + "/avro/append_simple.db/" "append_simple/bucket-0/" @@ -352,25 +352,25 @@ TEST_F(AvroFileBatchReaderTest, TestGetGlobalRowId) { ASSERT_OK_AND_ASSIGN(auto num_rows, reader->GetNumberOfRows()); ASSERT_EQ(4, num_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(1, reader->GetPreviousBatchGlobalRowId(0).value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(2, reader->GetPreviousBatchGlobalRowId(0).value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(3, reader->GetPreviousBatchGlobalRowId(0).value()); ArrowArrayRelease(batch4.first.get()); ArrowSchemaRelease(batch4.second.get()); ASSERT_OK_AND_ASSIGN(auto batch5, reader->NextBatch()); - ASSERT_EQ(4, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(4, reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch5)); } @@ -396,7 +396,7 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaResetsReaderToFirstRow) { ASSERT_OK_AND_ASSIGN(auto reader, reader_builder->Build(in)); ASSERT_OK_AND_ASSIGN(auto first_batch, reader->NextBatch()); - ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); auto first_array = arrow::ImportArray(first_batch.first.get(), first_batch.second.get()).ValueOrDie(); ASSERT_TRUE(first_array->Equals(src_array->Slice(0, 2))) << first_array->ToString(); @@ -406,10 +406,10 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaResetsReaderToFirstRow) { ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); ASSERT_OK(reader->SetReadSchema(c_schema.get(), /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt)); - ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto projected_batch, reader->NextBatch()); - ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); auto projected_array = arrow::ImportArray(projected_batch.first.get(), projected_batch.second.get()).ValueOrDie(); auto expected_projected_array = arrow::ipc::internal::json::ArrayFromJSON( diff --git a/src/paimon/format/blob/blob_file_batch_reader.h b/src/paimon/format/blob/blob_file_batch_reader.h index 0c04b43f3..15d6c8075 100644 --- a/src/paimon/format/blob/blob_file_batch_reader.h +++ b/src/paimon/format/blob/blob_file_batch_reader.h @@ -97,10 +97,10 @@ class BlobFileBatchReader : public FileBatchReader { Result NextBatch() override; - Result GetGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { if (all_blob_lengths_.size() != target_blob_lengths_.size()) { return Status::Invalid( - "Cannot call GetGlobalRowId in BlobFileBatchReader because, after " + "Cannot call GetPreviousBatchGlobalRowId in BlobFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index f3687eb5e..afee869b5 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -169,21 +169,21 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(3, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(1, reader->GetPreviousBatchGlobalRowId(0).value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(2, reader->GetPreviousBatchGlobalRowId(0).value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(3, reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); } @@ -254,7 +254,7 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(0, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch)); } diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index 361561411..ba8e16cf2 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -41,11 +41,11 @@ class LanceFileBatchReader : public FileBatchReader { Result NextBatch() override; - Result GetGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { if (!read_row_ids_.empty() && read_row_ids_.size() != num_rows_) { // TODO(xinyu.lxy): support function return Status::Invalid( - "Cannot call GetGlobalRowId in LanceFileBatchReader because, after " + "Cannot call GetPreviousBatchGlobalRowId in LanceFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } diff --git a/src/paimon/format/lance/lance_format_reader_writer_test.cpp b/src/paimon/format/lance/lance_format_reader_writer_test.cpp index eab303905..920cd4b0e 100644 --- a/src/paimon/format/lance/lance_format_reader_writer_test.cpp +++ b/src/paimon/format/lance/lance_format_reader_writer_test.cpp @@ -478,26 +478,26 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK_AND_ASSIGN( std::unique_ptr reader, LanceFileBatchReader::Create(file_path, /*batch_size=*/4, /*batch_readahead=*/2)); - ASSERT_EQ(std::numeric_limits::max(), reader->GetGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); // first batch row 0-3 ASSERT_OK_AND_ASSIGN(auto read_batch, reader->NextBatch()); ASSERT_OK_AND_ASSIGN(auto read_array, paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); ASSERT_TRUE(read_array->Equals(array->Slice(0, 4))); - ASSERT_EQ(0, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); // second batch 4-5 ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); ASSERT_OK_AND_ASSIGN(read_array, paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); ASSERT_TRUE(read_array->Equals(array->Slice(4, 2))); - ASSERT_EQ(4, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(4, reader->GetPreviousBatchGlobalRowId(0).value()); // eof ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(read_batch)); - ASSERT_EQ(6, reader->GetGlobalRowId(0).value()); + ASSERT_EQ(6, reader->GetPreviousBatchGlobalRowId(0).value()); // test with bitmap pushdown ArrowSchema c_read_schema; @@ -505,8 +505,8 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK(reader->SetReadSchema(&c_read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/RoaringBitmap32::From({0, 3}))); ASSERT_NOK_WITH_MSG( - reader->GetGlobalRowId(0), - "Cannot call GetGlobalRowId in LanceFileBatchReader because, after bitmap " + reader->GetPreviousBatchGlobalRowId(0), + "Cannot call GetPreviousBatchGlobalRowId in LanceFileBatchReader because, after bitmap " "pushdown, rows in the array returned by NextBatch are no longer contiguous."); } } // namespace paimon::lance::test diff --git a/src/paimon/format/orc/orc_file_batch_reader.h b/src/paimon/format/orc/orc_file_batch_reader.h index 0e3fade23..3cd870db3 100644 --- a/src/paimon/format/orc/orc_file_batch_reader.h +++ b/src/paimon/format/orc/orc_file_batch_reader.h @@ -62,7 +62,7 @@ class OrcFileBatchReader : public PrefetchFileBatchReader { // OrcFileBatchReader. Therefore, we need to hold BatchReader when using output ArrowArray. Result NextBatch() override; - Result GetGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { return reader_->GetRowNumber() + batch_row_id; } diff --git a/src/paimon/format/orc/orc_file_batch_reader_test.cpp b/src/paimon/format/orc/orc_file_batch_reader_test.cpp index 57665fa0d..aef2ccbba 100644 --- a/src/paimon/format/orc/orc_file_batch_reader_test.cpp +++ b/src/paimon/format/orc/orc_file_batch_reader_test.cpp @@ -492,10 +492,10 @@ TEST_P(OrcFileBatchReaderTest, TestNextBatchSimple) { for (auto batch_size : {1, 2, 3, 5, 8, 10}) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, batch_size, natural_read_size); - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), -1); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( orc_batch_reader.get())); - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 8); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 8); orc_batch_reader->Close(); auto expected_array = std::make_shared(struct_array_); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -766,18 +766,18 @@ TEST_F(OrcFileBatchReaderTest, TestReadNoField) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, /*batch_size=*/3, /*natural_read_size=*/10); // read 3 rows - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), -1); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), -1); ASSERT_OK_AND_ASSIGN(auto batch1, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 0); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 0); // read 3 rows ASSERT_OK_AND_ASSIGN(auto batch2, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 3); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 3); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 6); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 6); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetGlobalRowId(0).value(), 8); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 8); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); orc_batch_reader->Close(); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index c89669632..8dde7e6a3 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -96,7 +96,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { Result>> GenReadRanges( bool* need_prefetch) const override; - Result GetGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { if (batch_row_id >= row_mapping_.size()) { return std::numeric_limits::max(); } diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 4699fdff0..c92cfc196 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -475,7 +475,7 @@ TEST_F(ParquetFileBatchReaderTest, TestNextBatchSimple) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, schema_, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, batch_size); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( parquet_batch_reader.get())); @@ -839,19 +839,19 @@ TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { PrepareParquetFileBatchReader(file_name, read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, /*batch_size=*/2); // read 2 rows - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto batch1, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 0); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 0); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch2, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 2); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 2); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 4); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 4); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 4); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 4); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); parquet_batch_reader->Close(); @@ -1060,14 +1060,14 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); auto expected_batch1 = src_array->Slice(1, 2); ASSERT_TRUE(batch1->Equals(expected_batch1)) << batch1->ToString(); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 1); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(1).value(), 2); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 1); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(1).value(), 2); // Not adjacent pages ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); @@ -1078,22 +1078,22 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { ])") .ValueOrDie()); ASSERT_TRUE(batch2->Equals(expected_batch2)) << batch2->ToString(); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 3); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(1).value(), 5); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 3); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(1).value(), 5); // Only one record read ASSERT_OK_AND_ASSIGN(auto batch3, CollectOneBatch(parquet_batch_reader.get())); auto expected_batch3 = src_array->Slice(6, 1); ASSERT_TRUE(batch3->Equals(expected_batch3)) << batch3->ToString(); - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 6); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 6); // out of bound, return max value - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(1).value(), + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(1).value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto eof_batch, CollectOneBatch(parquet_batch_reader.get())); ASSERT_EQ(nullptr, eof_batch); // previous batch is eof, return last none-eof batch's row id - ASSERT_EQ(parquet_batch_reader->GetGlobalRowId(0).value(), 6); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 6); } } // namespace paimon::parquet::test diff --git a/src/paimon/testing/mock/mock_file_batch_reader.h b/src/paimon/testing/mock/mock_file_batch_reader.h index 6eda3b005..6ef6428e1 100644 --- a/src/paimon/testing/mock/mock_file_batch_reader.h +++ b/src/paimon/testing/mock/mock_file_batch_reader.h @@ -149,7 +149,7 @@ class MockFileBatchReader : public PrefetchFileBatchReader { return metrics; } - Result GetGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { return previous_batch_first_row_num_ + batch_row_id; } From cd7bd44ab39566537719dbd6d608a1a9c728fcce Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 10:46:29 +0800 Subject: [PATCH 18/38] update header files --- include/paimon/reader/file_batch_reader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/paimon/reader/file_batch_reader.h b/include/paimon/reader/file_batch_reader.h index 272de3c82..586c8cbc7 100644 --- a/include/paimon/reader/file_batch_reader.h +++ b/include/paimon/reader/file_batch_reader.h @@ -46,8 +46,8 @@ class PAIMON_EXPORT FileBatchReader : public BatchReader { using BatchReader::NextBatch; using BatchReader::NextBatchWithBitmap; - /// Get the row number of the first row in the previously read batch. - virtual Result GetPreviousBatchFirstRowNumber() const = 0; + /// Get the global row number of the row in the previously read batch. + virtual Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const = 0; /// Get the number of rows in the file. virtual Result GetNumberOfRows() const = 0; From 41f932d3c1eb2712ae4b0a8c231ac22249022cca Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 14:06:49 +0800 Subject: [PATCH 19/38] fix: return Status::Invalid intead of returning max value --- .../prefetch_file_batch_reader_impl.cpp | 11 ++++- .../prefetch_file_batch_reader_impl_test.cpp | 33 +++++++------ .../parquet/parquet_file_batch_reader.cpp | 1 + .../parquet/parquet_file_batch_reader.h | 9 +++- .../parquet_file_batch_reader_test.cpp | 47 ++++++++++--------- 5 files changed, 62 insertions(+), 39 deletions(-) diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp index b779d75c5..d22b1d318 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp @@ -564,7 +564,7 @@ Result PrefetchFileBatchReaderImpl::NextBatchW assert(false); return Status::Invalid("peek batch not suppose to be nullptr"); } - // current_batch_global_row_ids_.clear(); + current_batch_global_row_ids_.clear(); return BatchReader::MakeEofBatchWithBitmap(); } if (value_count == prefetch_queues_.size()) { @@ -600,8 +600,15 @@ Result> PrefetchFileBatchReaderImpl::GetFileSchem Result PrefetchFileBatchReaderImpl::GetPreviousBatchGlobalRowId( uint64_t batch_row_id) const { + if (current_batch_global_row_ids_.size() == 0) { + return Status::Invalid( + "Last batch is not read or last batch is empty, cannot get previous batch global row " + "id"); + } if (batch_row_id >= current_batch_global_row_ids_.size()) { - return std::numeric_limits::max(); + return Status::Invalid( + fmt::format("batch_row_id {} is out of range, last batch row count is {}", batch_row_id, + current_batch_global_row_ids_.size())); } return current_batch_global_row_ids_[batch_row_id]; } diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index f9713af54..120cd67e6 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -284,8 +284,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestSimple) { /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), - std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -606,10 +605,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestReadWithLargeBatchSize) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); } @@ -633,11 +633,13 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPartialReaderSuccessRead) { } arrow::ArrayVector result_array_vector; - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); auto& [batch, bitmap] = batch_with_bitmap; ASSERT_EQ(batch.first->length, bitmap.Cardinality()); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), 0); + uint64_t global_row_id = 0; + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 0); ASSERT_OK_AND_ASSIGN(auto array, ReadResultCollector::GetArray(std::move(batch))); result_array_vector.push_back(array); ASSERT_OK(prefetch_reader->GetReadStatus()); @@ -678,9 +680,9 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { ->SetNextBatchStatus(Status::IOError("mock error")); } - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); auto batch_result = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_FALSE(batch_result.ok()); ASSERT_TRUE(batch_result.status().IsIOError()); ASSERT_FALSE(prefetch_reader->is_shutdown_); @@ -689,7 +691,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { // call NextBatch again, will still return error status auto batch_result2 = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_FALSE(batch_result2.ok()); ASSERT_TRUE(batch_result2.status().IsIOError()); } @@ -706,11 +708,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPrefetchWithEmptyData) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_FALSE(result_array); } @@ -726,7 +728,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -735,6 +737,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { // continue to call NextBatch() after reading eof ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_TRUE(BatchReader::IsEofBatch(batch_with_bitmap)); } @@ -831,11 +834,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestPrefetchWithPredicatePushdownWithCom PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), 80); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 30)); @@ -867,11 +870,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); ASSERT_OK(reader->RefreshReadRanges()); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), std::numeric_limits::max()); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchGlobalRowId(0).value(), 80); + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 20)); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 16c79dd0e..e6617ecab 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -346,6 +346,7 @@ Result ParquetFileBatchReader::NextBatch() { try { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr batch, reader_->Next()); if (batch == nullptr) { + row_mapping_.clear(); return BatchReader::MakeEofBatch(); } PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 8dde7e6a3..4d2743eef 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -97,8 +97,15 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { bool* need_prefetch) const override; Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + if (row_mapping_.size() == 0) { + return Status::Invalid( + "Last batch is not read or last batch is empty, cannot get previous batch global " + "row id"); + } if (batch_row_id >= row_mapping_.size()) { - return std::numeric_limits::max(); + return Status::Invalid( + fmt::format("batch_row_id {} is out of range, last batch row count is {}", + batch_row_id, row_mapping_.size())); } return row_mapping_[batch_row_id]; } diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index c92cfc196..c9ddaab8e 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -475,8 +475,6 @@ TEST_F(ParquetFileBatchReaderTest, TestNextBatchSimple) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, schema_, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, batch_size); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), - std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( parquet_batch_reader.get())); parquet_batch_reader->Close(); @@ -838,21 +836,24 @@ TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, /*batch_size=*/2); + uint64_t global_row_id = 0; // read 2 rows - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), - std::numeric_limits::max()); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch1, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 0); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 0); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch2, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 2); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 2); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 4); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 4); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 4); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); parquet_batch_reader->Close(); arrow::FieldVector fields; @@ -1060,14 +1061,17 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), - std::numeric_limits::max()); - + uint64_t global_row_id = 0; + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); auto expected_batch1 = src_array->Slice(1, 2); ASSERT_TRUE(batch1->Equals(expected_batch1)) << batch1->ToString(); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 1); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(1).value(), 2); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 1); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + ASSERT_EQ(global_row_id, 2); + // out of bound return invalid + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); // Not adjacent pages ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); @@ -1078,22 +1082,23 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { ])") .ValueOrDie()); ASSERT_TRUE(batch2->Equals(expected_batch2)) << batch2->ToString(); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 3); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(1).value(), 5); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 3); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + ASSERT_EQ(global_row_id, 5); // Only one record read ASSERT_OK_AND_ASSIGN(auto batch3, CollectOneBatch(parquet_batch_reader.get())); auto expected_batch3 = src_array->Slice(6, 1); ASSERT_TRUE(batch3->Equals(expected_batch3)) << batch3->ToString(); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 6); - // out of bound, return max value - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(1).value(), - std::numeric_limits::max()); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 6); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); ASSERT_OK_AND_ASSIGN(auto eof_batch, CollectOneBatch(parquet_batch_reader.get())); ASSERT_EQ(nullptr, eof_batch); - // previous batch is eof, return last none-eof batch's row id - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 6); + // previous batch is eof, return invalid. + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); } } // namespace paimon::parquet::test From 6bd98d8cee06c80b8e09f6933aa53e3978d879eb Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 14:09:12 +0800 Subject: [PATCH 20/38] fix: lance and blob return NotImplemented --- src/paimon/format/blob/blob_file_batch_reader.h | 2 +- src/paimon/format/lance/lance_file_batch_reader.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/paimon/format/blob/blob_file_batch_reader.h b/src/paimon/format/blob/blob_file_batch_reader.h index 15d6c8075..c4619aff5 100644 --- a/src/paimon/format/blob/blob_file_batch_reader.h +++ b/src/paimon/format/blob/blob_file_batch_reader.h @@ -99,7 +99,7 @@ class BlobFileBatchReader : public FileBatchReader { Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { if (all_blob_lengths_.size() != target_blob_lengths_.size()) { - return Status::Invalid( + return Status::NotImplemented( "Cannot call GetPreviousBatchGlobalRowId in BlobFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index ba8e16cf2..cfd899c76 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -44,7 +44,7 @@ class LanceFileBatchReader : public FileBatchReader { Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { if (!read_row_ids_.empty() && read_row_ids_.size() != num_rows_) { // TODO(xinyu.lxy): support function - return Status::Invalid( + return Status::NotImplemented( "Cannot call GetPreviousBatchGlobalRowId in LanceFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); From a6945676a4b6b4cc7d5ad7280900c693fd77b095 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 14:42:36 +0800 Subject: [PATCH 21/38] fix: add inclusive extend for fully matched rowgroup in SetReadSchema --- .../parquet/parquet_file_batch_reader.cpp | 4 +- .../parquet_file_batch_reader_test.cpp | 43 ++++++++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index e6617ecab..123eef80b 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -217,7 +217,9 @@ Status ParquetFileBatchReader::SetReadSchema( target_row_groups.emplace_back( /*rg_index=*/rg_id, /*is_partially_matched=*/false, - /*ranges=*/RowRanges(Range(0, reader_->GetAllRowGroupRanges()[rg_id].second))); + /*ranges=*/ + RowRanges(Range(0, reader_->GetAllRowGroupRanges()[rg_id].second - + reader_->GetAllRowGroupRanges()[rg_id].first - 1))); } } PAIMON_ASSIGN_OR_RAISE(all_row_ranges_, GetAllTargetRowRanges(target_row_groups)); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index c9ddaab8e..627586ffc 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -1041,10 +1041,10 @@ TEST_F(ParquetFileBatchReaderTest, TestAddMetadataPerFieldMetadata) { ASSERT_TRUE(data->Equals(*result_array->chunk(0))) << result_array->ToString(); } -TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { +TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; auto src_array = MakeSequentialIntData(12); - // data in file rowGroup0:[0, 1, 2, 3, 4, 5] | rowGroup1:[6, 7, 8, 9, 10, 11] + // data in file rowGroup0:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] // one row per page auto arrow_schema = arrow::schema(fields); WriteArray(file_path_, src_array, arrow_schema, /*write_batch_size=*/1, @@ -1101,4 +1101,43 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMapping) { ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); } +TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { + arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; + auto src_array = MakeSequentialIntData(12); + // data in file RowGroup0:[0, 1, 2] | RowGroup1:[3, 4, 5] | RowGroup2:[6, 7, 8] | RowGroup3:[9, + // 10, 11] one row per page + auto arrow_schema = arrow::schema(fields); + WriteArray(file_path_, src_array, arrow_schema, /*write_batch_size=*/1, + /*enable_dictionary=*/true, /*max_row_group_length=*/3, /*max_page_size=*/1); + + // 3<=f0<=5 || f0==6 || f0==8 + // RowGroup 1 is fully matched, RowGroup 2 is partially matched, RowGroup 0 and RowGroup 3 are + // not matched. + ASSERT_OK_AND_ASSIGN( + auto predicate, + PredicateBuilder::Or({PredicateBuilder::Between(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(3), Literal(5)), + PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(6)), + PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(8))})); + + auto parquet_batch_reader = PrepareParquetFileBatchReader( + file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); + + uint64_t global_row_id = 0; + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 3); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); + ASSERT_EQ(global_row_id, 5); + + ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 6); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + ASSERT_EQ(global_row_id, 8); +} + } // namespace paimon::parquet::test From eb48e4239d55526f4497347f297dd7942aadf8d2 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 14:58:31 +0800 Subject: [PATCH 22/38] fix: calling SetReadSchema many time do not clear row_mapping --- .../parquet/parquet_file_batch_reader.cpp | 3 +- .../parquet/parquet_file_batch_reader.h | 3 +- .../parquet_file_batch_reader_test.cpp | 50 +++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 123eef80b..73d066b6a 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -534,7 +534,8 @@ Result> ParquetFileBatchReader::ComputeNestedColumnIndices( } Result ParquetFileBatchReader::GetAllTargetRowRanges( - const std::vector& target_row_groups) const { + const std::vector& target_row_groups) { + row_mapping_.clear(); auto all_row_group_ranges = reader_->GetAllRowGroupRanges(); RowRanges all_ranges; for (const auto& target_row_group : target_row_groups) { diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 4d2743eef..b05345037 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -184,8 +184,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { const std::shared_ptr& read_schema, const std::shared_ptr& file_schema); - Result GetAllTargetRowRanges( - const std::vector& target_row_groups) const; + Result GetAllTargetRowRanges(const std::vector& target_row_groups); // precondition: predicate supposed not be empty Result> FilterRowGroupsByPredicate( diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 627586ffc..23e516dd4 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -1140,4 +1140,54 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { ASSERT_EQ(global_row_id, 8); } +TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { + arrow::FieldVector fields = {arrow::field("f0", arrow::int32())}; + auto src_array = MakeSequentialIntData(12); + // data in file RowGroup0:[0, 1, 2] | RowGroup1:[3, 4, 5] | RowGroup2:[6, 7, 8] | RowGroup3:[9, + // 10, 11] one row per page + auto arrow_schema = arrow::schema(fields); + WriteArray(file_path_, src_array, arrow_schema, /*write_batch_size=*/1, + /*enable_dictionary=*/true, /*max_row_group_length=*/3, /*max_page_size=*/1); + + // 1<=f0<=3 || 6<=f0<=7 + ASSERT_OK_AND_ASSIGN( + auto predicate, + PredicateBuilder::Or({PredicateBuilder::Between(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(1), Literal(3)), + PredicateBuilder::Between(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(6), Literal(7))})); + + auto parquet_batch_reader = PrepareParquetFileBatchReader( + file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); + + uint64_t global_row_id = 0; + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 1); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + ASSERT_EQ(global_row_id, 2); + + ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 3); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + + ASSERT_OK_AND_ASSIGN( + predicate, + PredicateBuilder::Or({PredicateBuilder::Between(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(3), Literal(5))})); + + std::unique_ptr c_schema = std::make_unique(); + auto arrow_status = arrow::ExportSchema(*arrow_schema, c_schema.get()); + ASSERT_OK( + parquet_batch_reader->SetReadSchema(c_schema.get(), /*predicate=*/predicate, std::nullopt)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_OK_AND_ASSIGN(auto batch3, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 3); + ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); + ASSERT_EQ(global_row_id, 5); +} + } // namespace paimon::parquet::test From 7b0079453e87b968ff364262cdea34776bcba699 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 16:01:27 +0800 Subject: [PATCH 23/38] test: add test for PrefetchFileBatchReaderImpl --- .../prefetch_file_batch_reader_impl_test.cpp | 52 +++++++++++++++++++ .../parquet_file_batch_reader_test.cpp | 49 ++++++++--------- .../testing/utils/read_result_collector.h | 47 +++++++++++++++++ 3 files changed, 121 insertions(+), 27 deletions(-) diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index 120cd67e6..e3cda6eea 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -914,4 +914,56 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPrefetchWithBitmap) { ASSERT_TRUE(result_chunk_array->Equals(expected_chunk_array)); } +TEST_P(PrefetchFileBatchReaderImplTest, TestRowMapping) { + auto [file_format, cache_mode] = GetParam(); + auto data_array = PrepareArray(90); + PrepareTestData(file_format, data_array, /*stripe_row_count=*/30, /*row_index_stride=*/10); + auto schema = arrow::schema(fields_); + ASSERT_OK_AND_ASSIGN( + auto predicate, + PredicateBuilder::Or({ + PredicateBuilder::Between(/*field_index=*/1, /*field_name=*/"f1", FieldType::BIGINT, + Literal(20l), Literal(29l)), + PredicateBuilder::Between(/*field_index=*/1, /*field_name=*/"f1", FieldType::BIGINT, + Literal(70l), Literal(79l)), + })); + + auto reader = + PreparePrefetchReader(file_format, schema.get(), predicate, + /*selection_bitmap=*/std::nullopt, + /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); + uint64_t global_row_id = 0; + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_OK_AND_ASSIGN(auto batch, + paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 20); + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(5)); + ASSERT_EQ(global_row_id, 25); + + ASSERT_OK_AND_ASSIGN(batch, + paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 70); + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(5)); + ASSERT_EQ(global_row_id, 75); + + // Set read schema again + std::unique_ptr c_schema = std::make_unique(); + ASSERT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); + predicate = PredicateBuilder::Between(/*field_index=*/1, /*field_name=*/"f1", FieldType::BIGINT, + Literal(30l), Literal(49l)); + ASSERT_OK(reader->SetReadSchema(c_schema.get(), predicate, std::nullopt)); + + ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_OK_AND_ASSIGN(batch, + paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_EQ(global_row_id, 30); + ASSERT_OK_AND_ASSIGN(batch, + paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); + ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(5)); + ASSERT_EQ(global_row_id, 45); +} + } // namespace paimon::test diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 23e516dd4..4e705674f 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -243,20 +243,6 @@ static std::shared_ptr MakeSequentialIntData(int32_t num_row return arrow::StructArray::Make({val_array}, {field}).ValueOrDie(); } -static Result> CollectOneBatch(ParquetFileBatchReader* reader) { - PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatch batch, reader->NextBatch()); - if (BatchReader::IsEofBatch(batch)) { - return std::shared_ptr(nullptr); - } - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr result_array, - arrow::ImportArray(batch.first.get(), batch.second.get())); - auto struct_array = std::dynamic_pointer_cast(result_array); - if (!struct_array) { - return Status::Invalid("CollectOneBatch expected StructArray"); - } - return struct_array; -} - TEST_F(ParquetFileBatchReaderTest, TestParquetMetadataCacheReusesSerializedFooter) { WriteArray(file_path_, struct_array_, schema_, /*write_batch_size=*/struct_array_->length(), /*enable_dictionary=*/false, @@ -1063,9 +1049,10 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch1, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); auto expected_batch1 = src_array->Slice(1, 2); - ASSERT_TRUE(batch1->Equals(expected_batch1)) << batch1->ToString(); + ASSERT_TRUE(batch1->chunk(0)->Equals(expected_batch1)) << batch1->ToString(); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 1); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); @@ -1074,28 +1061,31 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); // Not adjacent pages - ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch2, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); auto expected_batch2 = std::dynamic_pointer_cast( arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ [3], [5] ])") .ValueOrDie()); - ASSERT_TRUE(batch2->Equals(expected_batch2)) << batch2->ToString(); + ASSERT_TRUE(batch2->chunk(0)->Equals(expected_batch2)) << batch2->ToString(); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); ASSERT_EQ(global_row_id, 5); // Only one record read - ASSERT_OK_AND_ASSIGN(auto batch3, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch3, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); auto expected_batch3 = src_array->Slice(6, 1); - ASSERT_TRUE(batch3->Equals(expected_batch3)) << batch3->ToString(); + ASSERT_TRUE(batch3->chunk(0)->Equals(expected_batch3)) << batch3->ToString(); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 6); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); - ASSERT_OK_AND_ASSIGN(auto eof_batch, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto eof_batch, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); ASSERT_EQ(nullptr, eof_batch); // previous batch is eof, return invalid. ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); @@ -1127,13 +1117,15 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch1, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); ASSERT_EQ(global_row_id, 5); - ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch2, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 6); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); @@ -1162,13 +1154,15 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch1, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch1, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 1); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); ASSERT_EQ(global_row_id, 2); - ASSERT_OK_AND_ASSIGN(auto batch2, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch2, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); @@ -1179,11 +1173,12 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { FieldType::INT, Literal(3), Literal(5))})); std::unique_ptr c_schema = std::make_unique(); - auto arrow_status = arrow::ExportSchema(*arrow_schema, c_schema.get()); + ASSERT_TRUE(arrow::ExportSchema(*arrow_schema, c_schema.get()).ok()); ASSERT_OK( parquet_batch_reader->SetReadSchema(c_schema.get(), /*predicate=*/predicate, std::nullopt)); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch3, CollectOneBatch(parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN(auto batch3, paimon::test::ReadResultCollector::CollectResultOneBatch( + parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); diff --git a/src/paimon/testing/utils/read_result_collector.h b/src/paimon/testing/utils/read_result_collector.h index 5106f3b26..ce16a66dc 100644 --- a/src/paimon/testing/utils/read_result_collector.h +++ b/src/paimon/testing/utils/read_result_collector.h @@ -131,6 +131,53 @@ class ReadResultCollector { return chunk_array; } + static Result> CollectResultOneBatch( + BatchReader* batch_reader) { + return CollectResultOneBatch(batch_reader, /*max simulated data processing time*/ 0); + } + + static Result> CollectResultOneBatch( + BatchReader* batch_reader, int64_t max_data_processing_time_in_us) { + int64_t seed = DateTimeUtils::GetCurrentUTCTimeUs(); + std::srand(seed); + auto batch_result = batch_reader->NextBatch(); + BatchReader::ReadBatch batch; + if (!batch_result.ok()) { + if (batch_result.status().ToString().find("should use NextBatchWithBitmap") != + std::string::npos) { + PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatchWithBitmap batch_with_bitmap, + batch_reader->NextBatchWithBitmap()); + if (BatchReader::IsEofBatch(batch_with_bitmap)) { + return std::shared_ptr(); + } + assert(!batch_with_bitmap.second.IsEmpty()); + PAIMON_ASSIGN_OR_RAISE( + batch, ReaderUtils::ApplyBitmapToReadBatch(std::move(batch_with_bitmap), + arrow::default_memory_pool())); + } else { + return batch_result.status(); + } + } else { + batch = std::move(batch_result).value(); + if (BatchReader::IsEofBatch(batch)) { + return std::shared_ptr(); + } + } + auto& [c_array, c_schema] = batch; + assert(c_array->length > 0); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result_array, + arrow::ImportArray(c_array.get(), c_schema.get())); + PAIMON_ASSIGN_OR_RAISE( + auto converted_array, + DictArrayConverter::ConvertDictArray(result_array, arrow::default_memory_pool())); + if (max_data_processing_time_in_us > 0) { + usleep(std::rand() % max_data_processing_time_in_us); + } + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto chunk_array, + arrow::ChunkedArray::Make({converted_array})); + return chunk_array; + } + static Result> GetArray(BatchReader::ReadBatch&& batch) { if (BatchReader::IsEofBatch(batch)) { return std::shared_ptr(); From 0d9a174cabca67a271828ff35e0bedc2dc72212b Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 16:13:42 +0800 Subject: [PATCH 24/38] style: replace auto in assigning macro with explicit type --- .../prefetch_file_batch_reader_impl_test.cpp | 2 +- .../parquet_file_batch_reader_test.cpp | 45 +++++++++++-------- .../testing/utils/read_result_collector.h | 4 +- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index e3cda6eea..350854900 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -934,7 +934,7 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestRowMapping) { /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); uint64_t global_row_id = 0; ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch, + ASSERT_OK_AND_ASSIGN(std::shared_ptr batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 20); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 4e705674f..530915545 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -1049,8 +1049,9 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch1, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch1, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch1 = src_array->Slice(1, 2); ASSERT_TRUE(batch1->chunk(0)->Equals(expected_batch1)) << batch1->ToString(); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); @@ -1061,8 +1062,9 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); // Not adjacent pages - ASSERT_OK_AND_ASSIGN(auto batch2, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch2, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch2 = std::dynamic_pointer_cast( arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ [3], @@ -1076,16 +1078,18 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { ASSERT_EQ(global_row_id, 5); // Only one record read - ASSERT_OK_AND_ASSIGN(auto batch3, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch3, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch3 = src_array->Slice(6, 1); ASSERT_TRUE(batch3->chunk(0)->Equals(expected_batch3)) << batch3->ToString(); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 6); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); - ASSERT_OK_AND_ASSIGN(auto eof_batch, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr eof_batch, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_EQ(nullptr, eof_batch); // previous batch is eof, return invalid. ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); @@ -1117,15 +1121,17 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch1, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch1, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); ASSERT_EQ(global_row_id, 5); - ASSERT_OK_AND_ASSIGN(auto batch2, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch2, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 6); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); @@ -1154,15 +1160,17 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch1, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch1, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 1); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); ASSERT_EQ(global_row_id, 2); - ASSERT_OK_AND_ASSIGN(auto batch2, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch2, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); @@ -1177,8 +1185,9 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { ASSERT_OK( parquet_batch_reader->SetReadSchema(c_schema.get(), /*predicate=*/predicate, std::nullopt)); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_OK_AND_ASSIGN(auto batch3, paimon::test::ReadResultCollector::CollectResultOneBatch( - parquet_batch_reader.get())); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr batch3, + paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); ASSERT_EQ(global_row_id, 3); ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); diff --git a/src/paimon/testing/utils/read_result_collector.h b/src/paimon/testing/utils/read_result_collector.h index ce16a66dc..f2ab0d1fc 100644 --- a/src/paimon/testing/utils/read_result_collector.h +++ b/src/paimon/testing/utils/read_result_collector.h @@ -165,7 +165,7 @@ class ReadResultCollector { } auto& [c_array, c_schema] = batch; assert(c_array->length > 0); - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result_array, + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr result_array, arrow::ImportArray(c_array.get(), c_schema.get())); PAIMON_ASSIGN_OR_RAISE( auto converted_array, @@ -173,7 +173,7 @@ class ReadResultCollector { if (max_data_processing_time_in_us > 0) { usleep(std::rand() % max_data_processing_time_in_us); } - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto chunk_array, + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr chunk_array, arrow::ChunkedArray::Make({converted_array})); return chunk_array; } From 09c3ef537824392947bff695184fae9c70e2ba2a Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Fri, 26 Jun 2026 16:58:27 +0800 Subject: [PATCH 25/38] fix: mismatched Create function call --- .../format/parquet/page_filtered_row_group_reader_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 3c5d34820..29850a877 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -147,7 +147,7 @@ class PageFilteredRowGroupReaderTest : public ::testing::Test { enable_page_level_filter ? "true" : "false"; ASSERT_OK_AND_ASSIGN( auto batch_reader, - ParquetFileBatchReader::Create(std::move(in_stream), arrow_pool_, options, batch_size)); + ParquetFileBatchReader::Create(std::move(in_stream), options, batch_size, nullptr,arrow_pool_)); auto c_schema = std::make_unique(); ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); ASSERT_OK(batch_reader->SetReadSchema(c_schema.get(), predicate, bitmap)); From 1336922035230a416652a8fdd81ef936449f8c94 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 10:12:41 +0800 Subject: [PATCH 26/38] style: rename interfaces and parameters --- .../map_shared_shredding_file_reader.cpp | 4 +- .../map_shared_shredding_file_reader.h | 2 +- .../bitmap/apply_bitmap_index_batch_reader.h | 6 +- .../reader/delegating_prefetch_reader.h | 4 +- .../prefetch_file_batch_reader_impl.cpp | 4 +- .../reader/prefetch_file_batch_reader_impl.h | 2 +- .../prefetch_file_batch_reader_impl_test.cpp | 66 ++++++------- .../apply_deletion_vector_batch_reader.h | 8 +- .../complete_row_tracking_fields_reader.cpp | 6 +- .../io/complete_row_tracking_fields_reader.h | 4 +- src/paimon/core/io/field_mapping_reader.h | 4 +- .../io/key_value_data_file_record_reader.cpp | 2 +- .../format/avro/avro_file_batch_reader.h | 2 +- .../avro/avro_file_batch_reader_test.cpp | 20 ++-- .../format/blob/blob_file_batch_reader.h | 4 +- .../blob/blob_file_batch_reader_test.cpp | 12 +-- .../format/lance/lance_file_batch_reader.h | 4 +- .../lance/lance_format_reader_writer_test.cpp | 12 +-- src/paimon/format/orc/orc_file_batch_reader.h | 2 +- .../format/orc/orc_file_batch_reader_test.cpp | 14 +-- .../parquet/parquet_file_batch_reader.h | 2 +- .../parquet_file_batch_reader_test.cpp | 96 +++++++++---------- .../testing/mock/mock_file_batch_reader.h | 2 +- 23 files changed, 141 insertions(+), 141 deletions(-) diff --git a/src/paimon/common/data/shredding/map_shared_shredding_file_reader.cpp b/src/paimon/common/data/shredding/map_shared_shredding_file_reader.cpp index 435f21f53..71deb1114 100644 --- a/src/paimon/common/data/shredding/map_shared_shredding_file_reader.cpp +++ b/src/paimon/common/data/shredding/map_shared_shredding_file_reader.cpp @@ -391,9 +391,9 @@ void MapSharedShreddingFileReader::Close() { reader_->Close(); } -Result MapSharedShreddingFileReader::GetPreviousBatchGlobalRowId( +Result MapSharedShreddingFileReader::GetPreviousBatchFileRowId( uint64_t batch_row_id) const { - return reader_->GetPreviousBatchGlobalRowId(batch_row_id); + return reader_->GetPreviousBatchFileRowId(batch_row_id); } Result MapSharedShreddingFileReader::GetNumberOfRows() const { diff --git a/src/paimon/common/data/shredding/map_shared_shredding_file_reader.h b/src/paimon/common/data/shredding/map_shared_shredding_file_reader.h index 74359cf36..46f053517 100644 --- a/src/paimon/common/data/shredding/map_shared_shredding_file_reader.h +++ b/src/paimon/common/data/shredding/map_shared_shredding_file_reader.h @@ -60,7 +60,7 @@ class MapSharedShreddingFileReader : public FileBatchReader { void Close() override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override; + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override; Result GetNumberOfRows() const override; diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index 8213c4f2a..e8428eaf3 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -80,8 +80,8 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { return Status::Invalid("ApplyBitmapIndexBatchReader does not support SetReadSchema"); } - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetPreviousBatchGlobalRowId(batch_row_id); + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchFileRowId(batch_row_id); } Result GetNumberOfRows() const override { @@ -96,7 +96,7 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; for (int32_t i = 0; i < batch_size; ++i) { - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetPreviousBatchGlobalRowId(i)); + PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetPreviousBatchFileRowId(i)); if (bitmap_.Contains(global_row_id)) { is_valid.Add(i); } diff --git a/src/paimon/common/reader/delegating_prefetch_reader.h b/src/paimon/common/reader/delegating_prefetch_reader.h index 64d57a155..0a22f8263 100644 --- a/src/paimon/common/reader/delegating_prefetch_reader.h +++ b/src/paimon/common/reader/delegating_prefetch_reader.h @@ -54,8 +54,8 @@ class DelegatingPrefetchReader : public FileBatchReader { return prefetch_reader_->SetReadSchema(read_schema, predicate, selection_bitmap); } - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { - return GetReader()->GetPreviousBatchGlobalRowId(batch_row_id); + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { + return GetReader()->GetPreviousBatchFileRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp index d22b1d318..dc8a9ad5f 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp @@ -432,7 +432,7 @@ Status PrefetchFileBatchReaderImpl::HandleReadResult( global_row_ids.reserve(c_array->length); for (int64_t i = 0; i < c_array->length; ++i) { PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, - readers_[reader_idx]->GetPreviousBatchGlobalRowId(i)); + readers_[reader_idx]->GetPreviousBatchFileRowId(i)); global_row_ids.push_back(global_row_id); } if (global_row_ids.empty()) { @@ -598,7 +598,7 @@ Result> PrefetchFileBatchReaderImpl::GetFileSchem return readers_[0]->GetFileSchema(); } -Result PrefetchFileBatchReaderImpl::GetPreviousBatchGlobalRowId( +Result PrefetchFileBatchReaderImpl::GetPreviousBatchFileRowId( uint64_t batch_row_id) const { if (current_batch_global_row_ids_.size() == 0) { return Status::Invalid( diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h index 908e06b85..19f936c8f 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h @@ -76,7 +76,7 @@ class PrefetchFileBatchReaderImpl : public PrefetchFileBatchReader { const std::optional& selection_bitmap) override; Status SeekToRow(uint64_t row_number) override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override; + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override; Result GetNumberOfRows() const override; uint64_t GetNextRowToRead() const override; void Close() override; diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index 350854900..c5d60f322 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -284,7 +284,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestSimple) { /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -605,11 +605,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestReadWithLargeBatchSize) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); } @@ -633,13 +633,13 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPartialReaderSuccessRead) { } arrow::ArrayVector result_array_vector; - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); auto& [batch, bitmap] = batch_with_bitmap; ASSERT_EQ(batch.first->length, bitmap.Cardinality()); - uint64_t global_row_id = 0; - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 0); + uint64_t file_row_id = 0; + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 0); ASSERT_OK_AND_ASSIGN(auto array, ReadResultCollector::GetArray(std::move(batch))); result_array_vector.push_back(array); ASSERT_OK(prefetch_reader->GetReadStatus()); @@ -680,9 +680,9 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { ->SetNextBatchStatus(Status::IOError("mock error")); } - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); auto batch_result = reader->NextBatchWithBitmap(); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_FALSE(batch_result.ok()); ASSERT_TRUE(batch_result.status().IsIOError()); ASSERT_FALSE(prefetch_reader->is_shutdown_); @@ -691,7 +691,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { // call NextBatch again, will still return error status auto batch_result2 = reader->NextBatchWithBitmap(); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_FALSE(batch_result2.ok()); ASSERT_TRUE(batch_result2.status().IsIOError()); } @@ -708,11 +708,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPrefetchWithEmptyData) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_FALSE(result_array); } @@ -728,7 +728,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); @@ -737,7 +737,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { // continue to call NextBatch() after reading eof ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_TRUE(BatchReader::IsEofBatch(batch_with_bitmap)); } @@ -834,11 +834,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestPrefetchWithPredicatePushdownWithCom PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 30)); @@ -870,11 +870,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); ASSERT_OK(reader->RefreshReadRanges()); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 20)); @@ -932,21 +932,21 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestRowMapping) { PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - uint64_t global_row_id = 0; - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + uint64_t file_row_id = 0; + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(std::shared_ptr batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 20); - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(5)); - ASSERT_EQ(global_row_id, 25); + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 20); + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(5)); + ASSERT_EQ(file_row_id, 25); ASSERT_OK_AND_ASSIGN(batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 70); - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(5)); - ASSERT_EQ(global_row_id, 75); + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 70); + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(5)); + ASSERT_EQ(file_row_id, 75); // Set read schema again std::unique_ptr c_schema = std::make_unique(); @@ -955,15 +955,15 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestRowMapping) { Literal(30l), Literal(49l)); ASSERT_OK(reader->SetReadSchema(c_schema.get(), predicate, std::nullopt)); - ASSERT_NOK(reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 30); + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 30); ASSERT_OK_AND_ASSIGN(batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, reader->GetPreviousBatchGlobalRowId(5)); - ASSERT_EQ(global_row_id, 45); + ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(5)); + ASSERT_EQ(file_row_id, 45); } } // namespace paimon::test diff --git a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h index cb2867075..48fec8b31 100644 --- a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h +++ b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h @@ -82,8 +82,8 @@ class ApplyDeletionVectorBatchReader : public FileBatchReader { return Status::Invalid("ApplyDeletionVectorBatchReader does not support SetReadSchema"); } - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetPreviousBatchGlobalRowId(batch_row_id); + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchFileRowId(batch_row_id); } Result GetNumberOfRows() const override { @@ -98,8 +98,8 @@ class ApplyDeletionVectorBatchReader : public FileBatchReader { Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; for (int32_t i = 0; i < batch_size; ++i) { - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetPreviousBatchGlobalRowId(i)); - PAIMON_ASSIGN_OR_RAISE(bool is_deleted, deletion_vector_->IsDeleted(global_row_id)); + PAIMON_ASSIGN_OR_RAISE(uint64_t file_row_id, reader_->GetPreviousBatchFileRowId(i)); + PAIMON_ASSIGN_OR_RAISE(bool is_deleted, deletion_vector_->IsDeleted(file_row_id)); if (!is_deleted) { is_valid.Add(i); } diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp index 54e55b380..29b610b16 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp @@ -91,9 +91,9 @@ CompleteRowTrackingFieldsBatchReader::NextBatchWithBitmap() { return Status::Invalid( "unexpected: read _ROW_ID special field, but first row id is null in meta"); } - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, - reader_->GetPreviousBatchGlobalRowId(idx_in_array)); - return first_row_id_.value() + global_row_id; + PAIMON_ASSIGN_OR_RAISE(uint64_t file_row_id, + reader_->GetPreviousBatchFileRowId(idx_in_array)); + return first_row_id_.value() + file_row_id; }; PAIMON_RETURN_NOT_OK(ConvertRowTrackingField(src_struct_array->length(), /*init_value=*/0, row_id_convert_func, &row_id_array)); diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.h b/src/paimon/core/io/complete_row_tracking_fields_reader.h index b3a42cb69..2aa535d79 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.h +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.h @@ -60,8 +60,8 @@ class CompleteRowTrackingFieldsBatchReader : public FileBatchReader { reader_->Close(); } - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetPreviousBatchGlobalRowId(batch_row_id); + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchFileRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/core/io/field_mapping_reader.h b/src/paimon/core/io/field_mapping_reader.h index 71d715b89..9efd4a07c 100644 --- a/src/paimon/core/io/field_mapping_reader.h +++ b/src/paimon/core/io/field_mapping_reader.h @@ -77,8 +77,8 @@ class FieldMappingReader : public FileBatchReader { return Status::Invalid("FieldMappingReader does not support SetReadSchema"); } - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { - return reader_->GetPreviousBatchGlobalRowId(batch_row_id); + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { + return reader_->GetPreviousBatchFileRowId(batch_row_id); } Result GetNumberOfRows() const override { diff --git a/src/paimon/core/io/key_value_data_file_record_reader.cpp b/src/paimon/core/io/key_value_data_file_record_reader.cpp index 9ccc71486..8d12ec746 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader.cpp +++ b/src/paimon/core/io/key_value_data_file_record_reader.cpp @@ -82,7 +82,7 @@ Result KeyValueDataFileRecordReader::Iterator::Next() { Result> KeyValueDataFileRecordReader::Iterator::NextWithFilePos() { PAIMON_ASSIGN_OR_RAISE(KeyValue kv, Next()); PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, - reader_->reader_->GetPreviousBatchGlobalRowId(cursor_ - 1)); + reader_->reader_->GetPreviousBatchFileRowId(cursor_ - 1)); return std::make_pair(static_cast(global_row_id), std::move(kv)); } diff --git a/src/paimon/format/avro/avro_file_batch_reader.h b/src/paimon/format/avro/avro_file_batch_reader.h index 463264f39..a90d82c5a 100644 --- a/src/paimon/format/avro/avro_file_batch_reader.h +++ b/src/paimon/format/avro/avro_file_batch_reader.h @@ -45,7 +45,7 @@ class AvroFileBatchReader : public FileBatchReader { Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, const std::optional& selection_bitmap) override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { return previous_first_row_ + batch_row_id; } diff --git a/src/paimon/format/avro/avro_file_batch_reader_test.cpp b/src/paimon/format/avro/avro_file_batch_reader_test.cpp index c3970bf33..2e1e00c42 100644 --- a/src/paimon/format/avro/avro_file_batch_reader_test.cpp +++ b/src/paimon/format/avro/avro_file_batch_reader_test.cpp @@ -327,7 +327,7 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaRejectNestedSubFieldProjection) "does not support nested sub-field projection"); } -TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchGlobalRowId) { +TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchFileRowId) { std::string path = paimon::test::GetDataDir() + "/avro/append_simple.db/" "append_simple/bucket-0/" @@ -352,25 +352,25 @@ TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchGlobalRowId) { ASSERT_OK_AND_ASSIGN(auto num_rows, reader->GetNumberOfRows()); ASSERT_EQ(4, num_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchFileRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(1, reader->GetPreviousBatchFileRowId(0).value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(2, reader->GetPreviousBatchFileRowId(0).value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(3, reader->GetPreviousBatchFileRowId(0).value()); ArrowArrayRelease(batch4.first.get()); ArrowSchemaRelease(batch4.second.get()); ASSERT_OK_AND_ASSIGN(auto batch5, reader->NextBatch()); - ASSERT_EQ(4, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(4, reader->GetPreviousBatchFileRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch5)); } @@ -396,7 +396,7 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaResetsReaderToFirstRow) { ASSERT_OK_AND_ASSIGN(auto reader, reader_builder->Build(in)); ASSERT_OK_AND_ASSIGN(auto first_batch, reader->NextBatch()); - ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchFileRowId(0).value()); auto first_array = arrow::ImportArray(first_batch.first.get(), first_batch.second.get()).ValueOrDie(); ASSERT_TRUE(first_array->Equals(src_array->Slice(0, 2))) << first_array->ToString(); @@ -406,10 +406,10 @@ TEST_F(AvroFileBatchReaderTest, TestSetReadSchemaResetsReaderToFirstRow) { ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); ASSERT_OK(reader->SetReadSchema(c_schema.get(), /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt)); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto projected_batch, reader->NextBatch()); - ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchFileRowId(0).value()); auto projected_array = arrow::ImportArray(projected_batch.first.get(), projected_batch.second.get()).ValueOrDie(); auto expected_projected_array = arrow::ipc::internal::json::ArrayFromJSON( diff --git a/src/paimon/format/blob/blob_file_batch_reader.h b/src/paimon/format/blob/blob_file_batch_reader.h index c4619aff5..06afe6acc 100644 --- a/src/paimon/format/blob/blob_file_batch_reader.h +++ b/src/paimon/format/blob/blob_file_batch_reader.h @@ -97,10 +97,10 @@ class BlobFileBatchReader : public FileBatchReader { Result NextBatch() override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { if (all_blob_lengths_.size() != target_blob_lengths_.size()) { return Status::NotImplemented( - "Cannot call GetPreviousBatchGlobalRowId in BlobFileBatchReader because, after " + "Cannot call GetPreviousBatchFileRowId in BlobFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index afee869b5..0cfe9351f 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -169,21 +169,21 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(3, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchFileRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(1, reader->GetPreviousBatchFileRowId(0).value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(2, reader->GetPreviousBatchFileRowId(0).value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(3, reader->GetPreviousBatchFileRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); } @@ -254,7 +254,7 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(0, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); ASSERT_OK_AND_ASSIGN(auto batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch)); } diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index cfd899c76..c7ce9a4f4 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -41,11 +41,11 @@ class LanceFileBatchReader : public FileBatchReader { Result NextBatch() override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { if (!read_row_ids_.empty() && read_row_ids_.size() != num_rows_) { // TODO(xinyu.lxy): support function return Status::NotImplemented( - "Cannot call GetPreviousBatchGlobalRowId in LanceFileBatchReader because, after " + "Cannot call GetPreviousBatchFileRowId in LanceFileBatchReader because, after " "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } diff --git a/src/paimon/format/lance/lance_format_reader_writer_test.cpp b/src/paimon/format/lance/lance_format_reader_writer_test.cpp index 920cd4b0e..061d950f9 100644 --- a/src/paimon/format/lance/lance_format_reader_writer_test.cpp +++ b/src/paimon/format/lance/lance_format_reader_writer_test.cpp @@ -478,26 +478,26 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK_AND_ASSIGN( std::unique_ptr reader, LanceFileBatchReader::Create(file_path, /*batch_size=*/4, /*batch_readahead=*/2)); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); // first batch row 0-3 ASSERT_OK_AND_ASSIGN(auto read_batch, reader->NextBatch()); ASSERT_OK_AND_ASSIGN(auto read_array, paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); ASSERT_TRUE(read_array->Equals(array->Slice(0, 4))); - ASSERT_EQ(0, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(0, reader->GetPreviousBatchFileRowId(0).value()); // second batch 4-5 ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); ASSERT_OK_AND_ASSIGN(read_array, paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); ASSERT_TRUE(read_array->Equals(array->Slice(4, 2))); - ASSERT_EQ(4, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(4, reader->GetPreviousBatchFileRowId(0).value()); // eof ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(read_batch)); - ASSERT_EQ(6, reader->GetPreviousBatchGlobalRowId(0).value()); + ASSERT_EQ(6, reader->GetPreviousBatchFileRowId(0).value()); // test with bitmap pushdown ArrowSchema c_read_schema; @@ -505,8 +505,8 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK(reader->SetReadSchema(&c_read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/RoaringBitmap32::From({0, 3}))); ASSERT_NOK_WITH_MSG( - reader->GetPreviousBatchGlobalRowId(0), - "Cannot call GetPreviousBatchGlobalRowId in LanceFileBatchReader because, after bitmap " + reader->GetPreviousBatchFileRowId(0), + "Cannot call GetPreviousBatchFileRowId in LanceFileBatchReader because, after bitmap " "pushdown, rows in the array returned by NextBatch are no longer contiguous."); } } // namespace paimon::lance::test diff --git a/src/paimon/format/orc/orc_file_batch_reader.h b/src/paimon/format/orc/orc_file_batch_reader.h index 3cd870db3..0f60c3779 100644 --- a/src/paimon/format/orc/orc_file_batch_reader.h +++ b/src/paimon/format/orc/orc_file_batch_reader.h @@ -62,7 +62,7 @@ class OrcFileBatchReader : public PrefetchFileBatchReader { // OrcFileBatchReader. Therefore, we need to hold BatchReader when using output ArrowArray. Result NextBatch() override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { return reader_->GetRowNumber() + batch_row_id; } diff --git a/src/paimon/format/orc/orc_file_batch_reader_test.cpp b/src/paimon/format/orc/orc_file_batch_reader_test.cpp index b0f56cf3f..87490e0a2 100644 --- a/src/paimon/format/orc/orc_file_batch_reader_test.cpp +++ b/src/paimon/format/orc/orc_file_batch_reader_test.cpp @@ -493,10 +493,10 @@ TEST_P(OrcFileBatchReaderTest, TestNextBatchSimple) { for (auto batch_size : {1, 2, 3, 5, 8, 10}) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, batch_size, natural_read_size); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), -1); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( orc_batch_reader.get())); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 8); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), 8); orc_batch_reader->Close(); auto expected_array = std::make_shared(struct_array_); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -767,18 +767,18 @@ TEST_F(OrcFileBatchReaderTest, TestReadNoField) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, /*batch_size=*/3, /*natural_read_size=*/10); // read 3 rows - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), -1); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), -1); ASSERT_OK_AND_ASSIGN(auto batch1, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 0); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), 0); // read 3 rows ASSERT_OK_AND_ASSIGN(auto batch2, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 3); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), 3); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 6); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), 6); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchGlobalRowId(0).value(), 8); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFileRowId(0).value(), 8); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); orc_batch_reader->Close(); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index b05345037..06237cec7 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -96,7 +96,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { Result>> GenReadRanges( bool* need_prefetch) const override; - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { if (row_mapping_.size() == 0) { return Status::Invalid( "Last batch is not read or last batch is empty, cannot get previous batch global " diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 530915545..c7255c209 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -822,24 +822,24 @@ TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, /*batch_size=*/2); - uint64_t global_row_id = 0; + uint64_t file_row_id = 0; // read 2 rows - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch1, parquet_batch_reader->NextBatch()); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 0); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 0); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch2, parquet_batch_reader->NextBatch()); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 2); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 2); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, parquet_batch_reader->NextBatch()); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 4); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 4); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, parquet_batch_reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); parquet_batch_reader->Close(); arrow::FieldVector fields; @@ -1047,19 +1047,19 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2); - uint64_t global_row_id = 0; - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + uint64_t file_row_id = 0; + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch1, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch1 = src_array->Slice(1, 2); ASSERT_TRUE(batch1->chunk(0)->Equals(expected_batch1)) << batch1->ToString(); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 1); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); - ASSERT_EQ(global_row_id, 2); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 1); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); + ASSERT_EQ(file_row_id, 2); // out of bound return invalid - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(2)); // Not adjacent pages ASSERT_OK_AND_ASSIGN( @@ -1072,10 +1072,10 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { ])") .ValueOrDie()); ASSERT_TRUE(batch2->chunk(0)->Equals(expected_batch2)) << batch2->ToString(); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 3); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); - ASSERT_EQ(global_row_id, 5); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 3); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); + ASSERT_EQ(file_row_id, 5); // Only one record read ASSERT_OK_AND_ASSIGN( @@ -1083,16 +1083,16 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch3 = src_array->Slice(6, 1); ASSERT_TRUE(batch3->chunk(0)->Equals(expected_batch3)) << batch3->ToString(); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 6); - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 6); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(1)); ASSERT_OK_AND_ASSIGN( std::shared_ptr eof_batch, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); ASSERT_EQ(nullptr, eof_batch); // previous batch is eof, return invalid. - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); } TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { @@ -1119,23 +1119,23 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); - uint64_t global_row_id = 0; - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + uint64_t file_row_id = 0; + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch1, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 3); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); - ASSERT_EQ(global_row_id, 5); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 3); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(2)); + ASSERT_EQ(file_row_id, 5); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch2, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 6); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); - ASSERT_EQ(global_row_id, 8); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 6); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); + ASSERT_EQ(file_row_id, 8); } TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { @@ -1158,22 +1158,22 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); - uint64_t global_row_id = 0; - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + uint64_t file_row_id = 0; + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch1, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 1); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); - ASSERT_EQ(global_row_id, 2); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 1); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); + ASSERT_EQ(file_row_id, 2); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch2, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 3); - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(1)); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 3); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(1)); ASSERT_OK_AND_ASSIGN( predicate, @@ -1184,14 +1184,14 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { ASSERT_TRUE(arrow::ExportSchema(*arrow_schema, c_schema.get()).ok()); ASSERT_OK( parquet_batch_reader->SetReadSchema(c_schema.get(), /*predicate=*/predicate, std::nullopt)); - ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); + ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch3, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); - ASSERT_EQ(global_row_id, 3); - ASSERT_OK_AND_ASSIGN(global_row_id, parquet_batch_reader->GetPreviousBatchGlobalRowId(2)); - ASSERT_EQ(global_row_id, 5); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(file_row_id, 3); + ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(2)); + ASSERT_EQ(file_row_id, 5); } } // namespace paimon::parquet::test diff --git a/src/paimon/testing/mock/mock_file_batch_reader.h b/src/paimon/testing/mock/mock_file_batch_reader.h index be6a0e9ef..a28d38dd1 100644 --- a/src/paimon/testing/mock/mock_file_batch_reader.h +++ b/src/paimon/testing/mock/mock_file_batch_reader.h @@ -156,7 +156,7 @@ class MockFileBatchReader : public PrefetchFileBatchReader { return metrics; } - Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const override { + Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { return previous_batch_first_row_num_ + batch_row_id; } From 8f82f44d0b852f35b73064e9af9b31cc5f6b2f25 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 10:23:06 +0800 Subject: [PATCH 27/38] fix: use a more efficient way to apply bitmap --- .../file_index/bitmap/apply_bitmap_index_batch_reader.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index e8428eaf3..dd851c78b 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -96,12 +96,10 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; for (int32_t i = 0; i < batch_size; ++i) { - PAIMON_ASSIGN_OR_RAISE(uint64_t global_row_id, reader_->GetPreviousBatchFileRowId(i)); - if (bitmap_.Contains(global_row_id)) { - is_valid.Add(i); - } + PAIMON_ASSIGN_OR_RAISE(uint64_t file_row_id, reader_->GetPreviousBatchFileRowId(i)); + is_valid.Add(i); } - return is_valid; + return RoaringBitmap32::And(bitmap_, is_valid); } private: From d3b73e176f8b0a2705ba5af78976b9bd67e705f9 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 10:34:08 +0800 Subject: [PATCH 28/38] update headers --- include/paimon/reader/file_batch_reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/paimon/reader/file_batch_reader.h b/include/paimon/reader/file_batch_reader.h index 586c8cbc7..ddb4e999d 100644 --- a/include/paimon/reader/file_batch_reader.h +++ b/include/paimon/reader/file_batch_reader.h @@ -47,7 +47,7 @@ class PAIMON_EXPORT FileBatchReader : public BatchReader { using BatchReader::NextBatchWithBitmap; /// Get the global row number of the row in the previously read batch. - virtual Result GetPreviousBatchGlobalRowId(uint64_t batch_row_id) const = 0; + virtual Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const = 0; /// Get the number of rows in the file. virtual Result GetNumberOfRows() const = 0; From fcc1ac87d4320661fba158230681990117d4a8d7 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 10:55:15 +0800 Subject: [PATCH 29/38] fix: use iterator to apply bitmap --- .../bitmap/apply_bitmap_index_batch_reader.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index dd851c78b..a6de3af3e 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -94,12 +94,24 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { private: Result Filter(int32_t batch_size) const { - RoaringBitmap32 is_valid; + RoaringBitmap32 result; + auto bitmap_iter = bitmap_.Begin(); + auto bitmap_end = bitmap_.End(); + for (int32_t i = 0; i < batch_size; ++i) { PAIMON_ASSIGN_OR_RAISE(uint64_t file_row_id, reader_->GetPreviousBatchFileRowId(i)); - is_valid.Add(i); + while (bitmap_iter != bitmap_end && + static_cast(*bitmap_iter) < file_row_id) { + ++bitmap_iter; + } + if (bitmap_iter == bitmap_end) { + break; + } + if (static_cast(*bitmap_iter) == file_row_id) { + result.Add(i); + } } - return RoaringBitmap32::And(bitmap_, is_valid); + return result; } private: From 0fb2decbca49118c897f879281169e335c8f47b6 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 11:30:02 +0800 Subject: [PATCH 30/38] test: add assertion --- .../common/reader/prefetch_file_batch_reader_impl_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index c5d60f322..a30e90bfd 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -732,6 +732,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); From a3e37bdd8904a50a95a48c59db8dc91d05483459 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 11:41:42 +0800 Subject: [PATCH 31/38] test: use '.value()' directly to validate the result. --- .../bitmap/apply_bitmap_index_batch_reader.h | 5 +- .../prefetch_file_batch_reader_impl_test.cpp | 29 +++++----- .../parquet_file_batch_reader_test.cpp | 55 ++++++------------- 3 files changed, 32 insertions(+), 57 deletions(-) diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index a6de3af3e..8f770478f 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -100,12 +100,11 @@ class ApplyBitmapIndexBatchReader : public FileBatchReader { for (int32_t i = 0; i < batch_size; ++i) { PAIMON_ASSIGN_OR_RAISE(uint64_t file_row_id, reader_->GetPreviousBatchFileRowId(i)); - while (bitmap_iter != bitmap_end && - static_cast(*bitmap_iter) < file_row_id) { + while (bitmap_iter != bitmap_end && static_cast(*bitmap_iter) < file_row_id) { ++bitmap_iter; } if (bitmap_iter == bitmap_end) { - break; + break; } if (static_cast(*bitmap_iter) == file_row_id) { result.Add(i); diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index a30e90bfd..ceeeb9c57 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -637,9 +637,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPartialReaderSuccessRead) { ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); auto& [batch, bitmap] = batch_with_bitmap; ASSERT_EQ(batch.first->length, bitmap.Cardinality()); - uint64_t file_row_id = 0; - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 0); + ASSERT_EQ(reader->GetPreviousBatchFileRowId(0).value(), 0); ASSERT_OK_AND_ASSIGN(auto array, ReadResultCollector::GetArray(std::move(batch))); result_array_vector.push_back(array); ASSERT_OK(prefetch_reader->GetReadStatus()); @@ -933,21 +931,18 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestRowMapping) { PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - uint64_t file_row_id = 0; ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(std::shared_ptr batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 20); - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(5)); - ASSERT_EQ(file_row_id, 25); + for (uint64_t i = 0; i < 10; i++) { + ASSERT_EQ(reader->GetPreviousBatchFileRowId(i).value(), 20 + i); + } ASSERT_OK_AND_ASSIGN(batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 70); - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(5)); - ASSERT_EQ(file_row_id, 75); + for (uint64_t i = 0; i < 10; i++) { + ASSERT_EQ(reader->GetPreviousBatchFileRowId(i).value(), 70 + i); + } // Set read schema again std::unique_ptr c_schema = std::make_unique(); @@ -959,12 +954,14 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestRowMapping) { ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 30); + for (uint64_t i = 0; i < 10; i++) { + ASSERT_EQ(reader->GetPreviousBatchFileRowId(i).value(), 30 + i); + } ASSERT_OK_AND_ASSIGN(batch, paimon::test::ReadResultCollector::CollectResultOneBatch(reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, reader->GetPreviousBatchFileRowId(5)); - ASSERT_EQ(file_row_id, 45); + for (uint64_t i = 0; i < 10; i++) { + ASSERT_EQ(reader->GetPreviousBatchFileRowId(i).value(), 40 + i); + } } } // namespace paimon::test diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index c7255c209..3e2f51f58 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -822,20 +822,16 @@ TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, /*batch_size=*/2); - uint64_t file_row_id = 0; // read 2 rows ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch1, parquet_batch_reader->NextBatch()); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 0); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 0); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch2, parquet_batch_reader->NextBatch()); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 2); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 2); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, parquet_batch_reader->NextBatch()); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 4); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 4); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, parquet_batch_reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); @@ -1047,17 +1043,14 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2); - uint64_t file_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch1, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch1 = src_array->Slice(1, 2); ASSERT_TRUE(batch1->chunk(0)->Equals(expected_batch1)) << batch1->ToString(); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 1); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); - ASSERT_EQ(file_row_id, 2); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 1); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(1).value(), 2); // out of bound return invalid ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(2)); @@ -1072,10 +1065,8 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { ])") .ValueOrDie()); ASSERT_TRUE(batch2->chunk(0)->Equals(expected_batch2)) << batch2->ToString(); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 3); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); - ASSERT_EQ(file_row_id, 5); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 3); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(1).value(), 5); // Only one record read ASSERT_OK_AND_ASSIGN( @@ -1083,8 +1074,7 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); auto expected_batch3 = src_array->Slice(6, 1); ASSERT_TRUE(batch3->chunk(0)->Equals(expected_batch3)) << batch3->ToString(); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 6); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 6); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(1)); ASSERT_OK_AND_ASSIGN( @@ -1119,23 +1109,18 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); - uint64_t file_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch1, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 3); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(2)); - ASSERT_EQ(file_row_id, 5); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 3); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(2).value(), 5); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch2, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 6); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); - ASSERT_EQ(file_row_id, 8); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 6); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(1).value(), 8); } TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { @@ -1158,21 +1143,17 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { auto parquet_batch_reader = PrepareParquetFileBatchReader( file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); - uint64_t file_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch1, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 1); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(1)); - ASSERT_EQ(file_row_id, 2); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 1); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(1).value(), 2); ASSERT_OK_AND_ASSIGN( std::shared_ptr batch2, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 3); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 3); ASSERT_NOK(parquet_batch_reader->GetPreviousBatchFileRowId(1)); ASSERT_OK_AND_ASSIGN( @@ -1188,10 +1169,8 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { ASSERT_OK_AND_ASSIGN( std::shared_ptr batch3, paimon::test::ReadResultCollector::CollectResultOneBatch(parquet_batch_reader.get())); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(0)); - ASSERT_EQ(file_row_id, 3); - ASSERT_OK_AND_ASSIGN(file_row_id, parquet_batch_reader->GetPreviousBatchFileRowId(2)); - ASSERT_EQ(file_row_id, 5); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(0).value(), 3); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFileRowId(2).value(), 5); } } // namespace paimon::parquet::test From 8820e7ceffc3aa9258e5a2a2305ffc8d986945ca Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 11:55:31 +0800 Subject: [PATCH 32/38] update comments --- src/paimon/testing/utils/read_result_collector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/testing/utils/read_result_collector.h b/src/paimon/testing/utils/read_result_collector.h index f2ab0d1fc..10f7f8dae 100644 --- a/src/paimon/testing/utils/read_result_collector.h +++ b/src/paimon/testing/utils/read_result_collector.h @@ -133,7 +133,7 @@ class ReadResultCollector { static Result> CollectResultOneBatch( BatchReader* batch_reader) { - return CollectResultOneBatch(batch_reader, /*max simulated data processing time*/ 0); + return CollectResultOneBatch(batch_reader, /*max_simulated_data_processing_time*/ 0); } static Result> CollectResultOneBatch( From 376a3124f0bf3cd0b7bdd7564a5670012620a5bd Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 14:02:46 +0800 Subject: [PATCH 33/38] style: change method name --- src/paimon/format/parquet/parquet_file_batch_reader.cpp | 7 ++++--- src/paimon/format/parquet/parquet_file_batch_reader.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 73d066b6a..3fc379006 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -222,7 +222,7 @@ Status ParquetFileBatchReader::SetReadSchema( reader_->GetAllRowGroupRanges()[rg_id].first - 1))); } } - PAIMON_ASSIGN_OR_RAISE(all_row_ranges_, GetAllTargetRowRanges(target_row_groups)); + PAIMON_RETURN_NOT_OK(UpdateAllTargetRowranges(target_row_groups)); PAIMON_RETURN_NOT_OK(reader_->PrepareForReadingLazy(target_row_groups, column_indices)); } PAIMON_PARQUET_CATCH_AND_RETURN_STATUS("ParquetFileBatchReader::SetReadSchema") @@ -533,7 +533,7 @@ Result> ParquetFileBatchReader::ComputeNestedColumnIndices( return indices; } -Result ParquetFileBatchReader::GetAllTargetRowRanges( +Status ParquetFileBatchReader::UpdateAllTargetRowranges( const std::vector& target_row_groups) { row_mapping_.clear(); auto all_row_group_ranges = reader_->GetAllRowGroupRanges(); @@ -545,7 +545,8 @@ Result ParquetFileBatchReader::GetAllTargetRowRanges( range.to + all_row_group_ranges[target_row_group.row_group_index].first)); } } - return all_ranges; + all_row_ranges_ = std::move(all_ranges); + return Status::OK(); } Status ParquetFileBatchReader::GenerateRowMapping(int64_t batch_length) { diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 06237cec7..e103694c0 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -184,7 +184,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { const std::shared_ptr& read_schema, const std::shared_ptr& file_schema); - Result GetAllTargetRowRanges(const std::vector& target_row_groups); + Status UpdateAllTargetRowranges(const std::vector& target_row_groups); // precondition: predicate supposed not be empty Result> FilterRowGroupsByPredicate( From f1c02db59d4cece571814d9396d1adc752c05899 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 14:17:02 +0800 Subject: [PATCH 34/38] fix: small fixes --- .../common/reader/prefetch_file_batch_reader_impl.cpp | 1 - src/paimon/format/blob/blob_file_batch_reader.h | 3 +++ src/paimon/format/lance/lance_file_batch_reader.h | 3 +++ src/paimon/format/parquet/row_ranges.cpp | 6 ------ src/paimon/format/parquet/row_ranges.h | 4 +--- src/paimon/testing/mock/mock_file_batch_reader.h | 5 ++++- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp index dc8a9ad5f..5940a4d48 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include "arrow/array/array_base.h" diff --git a/src/paimon/format/blob/blob_file_batch_reader.h b/src/paimon/format/blob/blob_file_batch_reader.h index 06afe6acc..998939c8a 100644 --- a/src/paimon/format/blob/blob_file_batch_reader.h +++ b/src/paimon/format/blob/blob_file_batch_reader.h @@ -104,6 +104,9 @@ class BlobFileBatchReader : public FileBatchReader { "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } + if (previous_batch_first_row_number_ == std::numeric_limits::max()) { + return Status::Invalid("No batch has been read yet."); + } return previous_batch_first_row_number_ + batch_row_id; } diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index c7ce9a4f4..5cfbdb981 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -49,6 +49,9 @@ class LanceFileBatchReader : public FileBatchReader { "bitmap pushdown, rows in the array returned by NextBatch are no longer " "contiguous."); } + if (previous_batch_first_row_num_ == std::numeric_limits::max()) { + return Status::Invalid("No batch has been read yet"); + } return previous_batch_first_row_num_ + batch_row_id; } diff --git a/src/paimon/format/parquet/row_ranges.cpp b/src/paimon/format/parquet/row_ranges.cpp index 6e20780b8..1b03715be 100644 --- a/src/paimon/format/parquet/row_ranges.cpp +++ b/src/paimon/format/parquet/row_ranges.cpp @@ -104,12 +104,6 @@ void RowRanges::Add(const Range& range) { ranges_.insert(it, merged); } -void RowRanges::Union(const RowRanges& other) { - for (const auto& range : other.ranges_) { - Add(range); - } -} - std::optional RowRanges::MapFilteredIndexToOriginalRow(int64_t filtered_index) const { int64_t accumulated = 0; for (const auto& range : ranges_) { diff --git a/src/paimon/format/parquet/row_ranges.h b/src/paimon/format/parquet/row_ranges.h index f6b41178f..6174ac4eb 100644 --- a/src/paimon/format/parquet/row_ranges.h +++ b/src/paimon/format/parquet/row_ranges.h @@ -92,8 +92,6 @@ class RowRanges { /// Adds a range to the end of the list, maintaining sorted disjoint ranges. void Add(const Range& range); - void Union(const RowRanges& other); - /// Maps a filtered-result index to the original row index within the row group. /// For example, if RowRanges = {[10,19], [50,59]}, then: /// MapFilteredIndexToOriginalRow(0) = 10 (first row of first range) @@ -111,7 +109,7 @@ class RowRanges { struct TargetRowGroup { int32_t row_group_index{-1}; bool is_partially_matched{false}; - // page-filtered row ranges, only valid if is_partially_matched is true. + RowRanges row_ranges; // Whether this row group has been excluded by ApplyReadRanges. // When true, this row group is logically skipped during iteration diff --git a/src/paimon/testing/mock/mock_file_batch_reader.h b/src/paimon/testing/mock/mock_file_batch_reader.h index a28d38dd1..439d5c296 100644 --- a/src/paimon/testing/mock/mock_file_batch_reader.h +++ b/src/paimon/testing/mock/mock_file_batch_reader.h @@ -157,6 +157,9 @@ class MockFileBatchReader : public PrefetchFileBatchReader { } Result GetPreviousBatchFileRowId(uint64_t batch_row_id) const override { + if (previous_batch_first_row_num_ == std::numeric_limits::max()) { + return Status::Invalid("No batch has been read yet"); + } return previous_batch_first_row_num_ + batch_row_id; } @@ -191,7 +194,7 @@ class MockFileBatchReader : public PrefetchFileBatchReader { int32_t batch_size_ = 0; int32_t current_pos_ = 0; int32_t read_end_pos_ = 0; - int32_t previous_batch_first_row_num_ = -1; + uint64_t previous_batch_first_row_num_ = std::numeric_limits::max(); Status next_batch_status_; bool enable_randomize_batch_size_ = true; std::vector> read_ranges_; From ad66b22c0b20aeea1332be32693f9bd5a335508b Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 14:33:07 +0800 Subject: [PATCH 35/38] fix: blob test --- src/paimon/format/blob/blob_file_batch_reader_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index 0cfe9351f..8329f0557 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -254,7 +254,7 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(0, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch)); } From 25ef3d096f0dd66c98826e9499032922fcec52a0 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 15:06:32 +0800 Subject: [PATCH 36/38] fix: blob and lance tests --- src/paimon/format/blob/blob_file_batch_reader_test.cpp | 4 ++-- src/paimon/format/lance/lance_format_reader_writer_test.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index 8329f0557..c372fe2a2 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -169,7 +169,7 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(3, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); @@ -183,7 +183,7 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchFileRowId(0).value()); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); } diff --git a/src/paimon/format/lance/lance_format_reader_writer_test.cpp b/src/paimon/format/lance/lance_format_reader_writer_test.cpp index 061d950f9..71a073413 100644 --- a/src/paimon/format/lance/lance_format_reader_writer_test.cpp +++ b/src/paimon/format/lance/lance_format_reader_writer_test.cpp @@ -478,7 +478,7 @@ TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { ASSERT_OK_AND_ASSIGN( std::unique_ptr reader, LanceFileBatchReader::Create(file_path, /*batch_size=*/4, /*batch_readahead=*/2)); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFileRowId(0).value()); + ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); // first batch row 0-3 ASSERT_OK_AND_ASSIGN(auto read_batch, reader->NextBatch()); From 4409afb7b4957007542fac0609e7ba545cda9bd4 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 15:16:44 +0800 Subject: [PATCH 37/38] fix: unittest --- .../page_filtered_row_group_reader_test.cpp | 6 ++-- .../parquet/parquet_file_batch_reader.cpp | 31 ++++++++++++------- .../parquet_file_batch_reader_test.cpp | 9 ++++-- src/paimon/format/parquet/target_row_group.h | 12 ++++--- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp index 29850a877..2de1d306c 100644 --- a/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp +++ b/src/paimon/format/parquet/page_filtered_row_group_reader_test.cpp @@ -145,9 +145,9 @@ class PageFilteredRowGroupReaderTest : public ::testing::Test { std::map options; options[PARQUET_READ_ENABLE_PAGE_INDEX_FILTER] = enable_page_level_filter ? "true" : "false"; - ASSERT_OK_AND_ASSIGN( - auto batch_reader, - ParquetFileBatchReader::Create(std::move(in_stream), options, batch_size, nullptr,arrow_pool_)); + ASSERT_OK_AND_ASSIGN(auto batch_reader, + ParquetFileBatchReader::Create(std::move(in_stream), options, + batch_size, nullptr, arrow_pool_)); auto c_schema = std::make_unique(); ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); ASSERT_OK(batch_reader->SetReadSchema(c_schema.get(), predicate, bitmap)); diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.cpp b/src/paimon/format/parquet/parquet_file_batch_reader.cpp index 28cd22410..3c5c49223 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include "arrow/acero/options.h" @@ -159,7 +160,7 @@ Status ParquetFileBatchReader::SetReadSchema( } TargetRowGroups target_row_groups = - TargetRowGroup::MakeSerialRowGroups(reader_->GetNumberOfRowGroups()); + TargetRowGroup::MakeSerialRowGroups(reader_->GetAllRowGroupRanges()); PAIMON_ASSIGN_OR_RAISE( bool enable_page_index_filter, OptionsUtils::GetValueFromMap(options_, PARQUET_READ_ENABLE_PAGE_INDEX_FILTER, @@ -261,7 +262,7 @@ Result ParquetFileBatchReader::FilterRowGroupsByPredicate( return Status::Invalid("cannot cast to ParquetFileFragment in ParquetFileBatchReader"); } for (auto rg_index : parquet_fragment->row_groups()) { - target_row_groups.emplace_back(rg_index); + target_row_groups.emplace_back(src_row_groups[rg_index]); } } return target_row_groups; @@ -293,13 +294,17 @@ Result ParquetFileBatchReader::FilterRowGroupsByBitmap( if (!enable_page_filtered) { // For nested schema, we cannot apply page-level filtering, so we directly add the whole // row group if bitmap matches. - target_row_groups.emplace_back(row_group_idx); + target_row_groups.emplace_back(row_group); continue; } auto page_ranges = BitmapToRowRanges(bitmap, start_row_idx, end_row_idx); - target_row_groups.emplace_back(/*row_group_idx=*/row_group_idx, - /*is_partially_matched=*/true, - /*row_ranges=*/page_ranges); + if (page_ranges.RowCount() < rg_row_count) { + target_row_groups.emplace_back(/*row_group_idx=*/row_group_idx, + /*is_partially_matched=*/true, + /*row_ranges=*/page_ranges); + } else { + target_row_groups.emplace_back(row_group); + } } return target_row_groups; } @@ -387,7 +392,7 @@ Result ParquetFileBatchReader::FilterRowGroupsByPageIndex( if (intersection.RowCount() < rg_row_count) { target_row_groups.emplace_back(row_group_idx, true, intersection); } else { - target_row_groups.emplace_back(row_group_idx); + target_row_groups.emplace_back(row_group); } } } @@ -590,10 +595,10 @@ Result ParquetFileBatchReader::GetAllTargetRowRanges( auto all_row_group_ranges = reader_->GetAllRowGroupRanges(); RowRanges all_ranges; for (const auto& target_row_group : target_row_groups) { + auto row_group_idx = target_row_group.GetRowGroupIndex(); for (const auto& range : target_row_group.GetRowRanges().GetRanges()) { - all_ranges.Add( - Range(range.from + all_row_group_ranges[target_row_group.GetRowGroupIndex()].first, - range.to + all_row_group_ranges[target_row_group.GetRowGroupIndex()].first)); + all_ranges.Add(Range(all_row_group_ranges[row_group_idx].first + range.from, + all_row_group_ranges[row_group_idx].first + range.to)); } } return all_ranges; @@ -607,7 +612,11 @@ Status ParquetFileBatchReader::GenerateRowMapping(int64_t batch_length) { std::upper_bound(all_ranges.begin(), all_ranges.end(), batch_start_row, [](int64_t value, const Range& r) { return value < r.from; }); if (cur_range_it == all_ranges.begin()) { - return Status::Invalid("No range found!"); + std::stringstream s; + for (auto range : all_ranges) { + s << "range: [" << range.from << ", " << range.to << "]" << std::endl; + } + return Status::Invalid(fmt::format("No range found! {} {}", s.str(), all_ranges.size())); } --cur_range_it; if (batch_start_row < cur_range_it->from || batch_start_row > cur_range_it->to) { diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index ba3ffe644..d0ea266ec 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -1146,7 +1146,8 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSimple) { FieldType::INT, Literal(5), Literal(6))})); auto parquet_batch_reader = PrepareParquetFileBatchReader( - file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2); + file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/2, + /*enable_page_level_filter=*/true); uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); @@ -1218,7 +1219,8 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingFullyAndPartially) { FieldType::INT, Literal(8))})); auto parquet_batch_reader = PrepareParquetFileBatchReader( - file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); + file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3, + /*enable_page_level_filter=*/true); uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); @@ -1257,7 +1259,8 @@ TEST_F(ParquetFileBatchReaderTest, TestRowMappingSetReadSchemaTwice) { FieldType::INT, Literal(6), Literal(7))})); auto parquet_batch_reader = PrepareParquetFileBatchReader( - file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3); + file_path_, arrow_schema, /*predicate=*/predicate, std::nullopt, /*batch_size=*/3, + /*enable_page_level_filter=*/true); uint64_t global_row_id = 0; ASSERT_NOK(parquet_batch_reader->GetPreviousBatchGlobalRowId(0)); diff --git a/src/paimon/format/parquet/target_row_group.h b/src/paimon/format/parquet/target_row_group.h index c621b9ca7..119ddb228 100644 --- a/src/paimon/format/parquet/target_row_group.h +++ b/src/paimon/format/parquet/target_row_group.h @@ -58,11 +58,13 @@ class TargetRowGroup { return row_ranges; } - static TargetRowGroups MakeSerialRowGroups(int32_t num_row_groups) { + static TargetRowGroups MakeSerialRowGroups( + const std::vector>& ranges) { TargetRowGroups target_row_groups; - target_row_groups.reserve(num_row_groups); - for (int32_t i = 0; i < num_row_groups; ++i) { - target_row_groups.emplace_back(i); + target_row_groups.reserve(ranges.size()); + for (size_t i = 0; i < ranges.size(); ++i) { + target_row_groups.emplace_back( + i, false, RowRanges(Range(0, ranges[i].second - ranges[i].first - 1))); } return target_row_groups; } @@ -79,7 +81,7 @@ class TargetRowGroup { private: int32_t row_group_index{-1}; bool is_partially_matched{false}; - // page-filtered row ranges, only valid if is_partially_matched is true. + // Local row ranges RowRanges row_ranges; // Whether this row group has been excluded by ApplyReadRanges. // When true, this row group is logically skipped during iteration From 8482e909672cb7c6f7ff53905b2cde9c51641b13 Mon Sep 17 00:00:00 2001 From: zhouhongfeng Date: Mon, 29 Jun 2026 15:36:25 +0800 Subject: [PATCH 38/38] fix: blob --- src/paimon/format/blob/blob_file_batch_reader_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index c372fe2a2..c9d5dbd95 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -183,7 +183,7 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_NOK(reader->GetPreviousBatchFileRowId(0)); + ASSERT_EQ(3, reader->GetPreviousBatchFileRowId(0).value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); }