Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions src/iceberg/json_serde.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,14 @@ Result<std::unique_ptr<SortField>> SortFieldFromJson(const nlohmann::json& json)
null_order);
}

Result<std::unique_ptr<SortOrder>> SortOrderFromJson(
const nlohmann::json& json, const std::shared_ptr<Schema>& current_schema) {
namespace {

struct ParsedSortOrder {
int32_t order_id;
std::vector<SortField> fields;
};

Result<ParsedSortOrder> ParseSortOrder(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue<int32_t>(json, kOrderId));
ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue<nlohmann::json>(json, kFields));

Expand All @@ -283,19 +289,30 @@ Result<std::unique_ptr<SortOrder>> SortOrderFromJson(
ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json));
sort_fields.push_back(std::move(*sort_field));
}
return SortOrder::Make(*current_schema, order_id, std::move(sort_fields));
return ParsedSortOrder{.order_id = order_id, .fields = std::move(sort_fields)};
}

Result<std::unique_ptr<SortOrder>> SortOrderFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue<int32_t>(json, kOrderId));
ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue<nlohmann::json>(json, kFields));
} // namespace

std::vector<SortField> sort_fields;
for (const auto& field_json : fields) {
ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json));
sort_fields.push_back(std::move(*sort_field));
Result<std::unique_ptr<SortOrder>> SortOrderFromJson(
const nlohmann::json& json, const std::shared_ptr<Schema>& current_schema,
int32_t default_sort_order_id) {
ICEBERG_ASSIGN_OR_RAISE(auto parsed, ParseSortOrder(json));
if (parsed.order_id == default_sort_order_id) {
return SortOrder::Make(*current_schema, parsed.order_id, std::move(parsed.fields));
}
return SortOrder::Make(order_id, std::move(sort_fields));
return SortOrder::Make(parsed.order_id, std::move(parsed.fields));
}

Result<std::unique_ptr<SortOrder>> SortOrderFromJson(
const nlohmann::json& json, const std::shared_ptr<Schema>& current_schema) {
ICEBERG_ASSIGN_OR_RAISE(auto parsed, ParseSortOrder(json));
return SortOrder::Make(*current_schema, parsed.order_id, std::move(parsed.fields));
}

Result<std::unique_ptr<SortOrder>> SortOrderFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto parsed, ParseSortOrder(json));
return SortOrder::Make(parsed.order_id, std::move(parsed.fields));
}

nlohmann::json ToJson(const SchemaField& field) {
Expand Down Expand Up @@ -1107,8 +1124,9 @@ Status ParseSortOrders(const nlohmann::json& json, int8_t format_version,
ICEBERG_ASSIGN_OR_RAISE(auto sort_order_array,
GetJsonValue<nlohmann::json>(json, kSortOrders));
for (const auto& sort_order_json : sort_order_array) {
ICEBERG_ASSIGN_OR_RAISE(auto sort_order,
SortOrderFromJson(sort_order_json, current_schema));
ICEBERG_ASSIGN_OR_RAISE(
auto sort_order,
SortOrderFromJson(sort_order_json, current_schema, default_sort_order_id));
sort_orders.push_back(std::move(sort_order));
}
} else {
Expand Down
63 changes: 63 additions & 0 deletions src/iceberg/test/metadata_serde_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,50 @@ void AssertSnapshotById(const TableMetadata& metadata, int64_t snapshot_id,
EXPECT_EQ(*snapshot.value(), expected_snapshot);
}

nlohmann::json HistoricalSortOrderWithDroppedFieldMetadataJson(
int32_t default_sort_order_id) {
nlohmann::json metadata_json = R"({
"format-version": 2,
"table-uuid": "test-uuid-1234",
"location": "s3://bucket/test",
"last-sequence-number": 0,
"last-updated-ms": 0,
"last-column-id": 2,
"schemas": [
{
"type": "struct",
"schema-id": 1,
"fields": [
{"id": 1, "name": "id", "type": "int", "required": true}
]
}
],
"current-schema-id": 1,
"partition-specs": [{"spec-id": 0, "fields": []}],
"default-spec-id": 0,
"last-partition-id": 999,
"sort-orders": [
{"order-id": 1, "fields": [
{"transform": "identity", "source-id": 1, "direction": "asc", "null-order": "nulls-first"},
{"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"}
]},
{"order-id": 2, "fields": [
{"transform": "identity", "source-id": 1, "direction": "asc", "null-order": "nulls-first"}
]}
],
"properties": {},
"current-snapshot-id": null,
"refs": {},
"snapshots": [],
"statistics": [],
"partition-statistics": [],
"snapshot-log": [],
"metadata-log": []
})"_json;
metadata_json["default-sort-order-id"] = default_sort_order_id;
return metadata_json;
}

} // namespace

TEST(MetadataSerdeTest, DeserializeV1Valid) {
Expand Down Expand Up @@ -486,6 +530,25 @@ TEST(MetadataSerdeTest, DeserializeV2MissingSortOrder) {
"sort-orders must exist");
}

TEST(MetadataSerdeTest, DeserializeHistoricalSortOrderWithDroppedField) {
auto metadata =
TableMetadataFromJson(HistoricalSortOrderWithDroppedFieldMetadataJson(2));
ASSERT_THAT(metadata, IsOk());
ASSERT_EQ(metadata.value()->sort_orders.size(), 2);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we assert the historical sort order still has two fields, including source_id() == 2? Otherwise this would still pass if the dropped-field sort term were skipped. A negative case where that order is the default sort order would also help verify default sort order validation is still strict.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the tests to cover both preserving the dropped-field historical sort order and rejecting it as the default sort order.

EXPECT_EQ(metadata.value()->sort_orders[0]->order_id(), 1);
ASSERT_EQ(metadata.value()->sort_orders[0]->fields().size(), 2);
EXPECT_EQ(metadata.value()->sort_orders[0]->fields()[0].source_id(), 1);
EXPECT_EQ(metadata.value()->sort_orders[0]->fields()[1].source_id(), 2);
EXPECT_EQ(metadata.value()->sort_orders[1]->order_id(), 2);
}

TEST(MetadataSerdeTest, DeserializeDefaultSortOrderWithDroppedFieldFails) {
auto metadata =
TableMetadataFromJson(HistoricalSortOrderWithDroppedFieldMetadataJson(1));
ASSERT_THAT(metadata, IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(metadata, HasErrorMessage("Cannot find source column for sort field"));
}

TEST(MetadataSerdeTest, EncryptionKeysRoundTrip) {
nlohmann::json metadata_json = R"({
"format-version": 2,
Expand Down
Loading