diff --git a/src/iceberg/partition_spec.cc b/src/iceberg/partition_spec.cc index ed5aa831a..96287fb87 100644 --- a/src/iceberg/partition_spec.cc +++ b/src/iceberg/partition_spec.cc @@ -192,10 +192,7 @@ Status PartitionSpec::Validate(const Schema& schema, bool allow_missing_fields) partition_field); } const auto& source_type = source_field.value().get().type(); - if (!field_transform->CanTransform(*source_type)) { - return InvalidArgument("Invalid source type {} for transform {}", - source_type->ToString(), field_transform->ToString()); - } + ICEBERG_RETURN_UNEXPECTED(field_transform->Validate(source_type)); // The only valid parent types for a PartitionField are StructTypes. This must be // checked recursively. diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index b317efb90..04de6b835 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -96,10 +96,7 @@ Status SortOrder::Validate(const Schema& schema) const { const auto& source_type = schema_field.value().get().type(); - if (!field.transform()->CanTransform(*source_type)) { - return InvalidArgument("Invalid source type {} for transform {}", - source_type->ToString(), field.transform()->ToString()); - } + ICEBERG_RETURN_UNEXPECTED(field.transform()->Validate(source_type)); } return {}; } diff --git a/src/iceberg/test/partition_spec_test.cc b/src/iceberg/test/partition_spec_test.cc index e3b86e274..da1724e49 100644 --- a/src/iceberg/test/partition_spec_test.cc +++ b/src/iceberg/test/partition_spec_test.cc @@ -219,6 +219,22 @@ TEST(PartitionSpecTest, InvalidTransformForType) { EXPECT_THAT(result_void, IsOk()); } +TEST(PartitionSpecTest, InvalidParameterizedTransform) { + auto field_id = SchemaField::MakeRequired(1, "id", int32()); + auto field_name = SchemaField::MakeRequired(2, "name", string()); + Schema schema({field_id, field_name}, Schema::kInitialSchemaId); + + PartitionField bucket_field(1, 1000, "id_bucket", Transform::Bucket(0)); + auto bucket_result = PartitionSpec::Make(schema, 1, {bucket_field}, false); + EXPECT_THAT(bucket_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(bucket_result, HasErrorMessage("Number of buckets must be positive")); + + PartitionField truncate_field(2, 1000, "name_trunc", Transform::Truncate(0)); + auto truncate_result = PartitionSpec::Make(schema, 1, {truncate_field}, false); + EXPECT_THAT(truncate_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(truncate_result, HasErrorMessage("Width must be positive")); +} + TEST(PartitionSpecTest, SourceIdNotFound) { auto field1 = SchemaField::MakeRequired(1, "id", int64()); auto field2 = SchemaField::MakeRequired(2, "ts", timestamp()); diff --git a/src/iceberg/test/sort_order_test.cc b/src/iceberg/test/sort_order_test.cc index d2bef580a..bd23d89c1 100644 --- a/src/iceberg/test/sort_order_test.cc +++ b/src/iceberg/test/sort_order_test.cc @@ -225,6 +225,21 @@ TEST_F(SortOrderTest, MakeInvalidSortOrderTransformCannotApply) { EXPECT_THAT(sort_order, HasErrorMessage("Invalid source type")); } +TEST_F(SortOrderTest, MakeInvalidParameterizedTransform) { + SortField bucket_field(1, Transform::Bucket(0), SortDirection::kAscending, + NullOrder::kFirst); + auto bucket_order = SortOrder::Make(*schema_, 1, std::vector{bucket_field}); + EXPECT_THAT(bucket_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(bucket_order, HasErrorMessage("Number of buckets must be positive")); + + SortField truncate_field(2, Transform::Truncate(0), SortDirection::kAscending, + NullOrder::kFirst); + auto truncate_order = + SortOrder::Make(*schema_, 1, std::vector{truncate_field}); + EXPECT_THAT(truncate_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(truncate_order, HasErrorMessage("Width must be positive")); +} + TEST_F(SortOrderTest, MakeInvalidSortOrderNonPrimitiveField) { auto struct_field = std::make_unique( 4, "struct_field", diff --git a/src/iceberg/test/transform_test.cc b/src/iceberg/test/transform_test.cc index 8d7cd880d..65a2b5e58 100644 --- a/src/iceberg/test/transform_test.cc +++ b/src/iceberg/test/transform_test.cc @@ -154,10 +154,12 @@ TEST(TransformFromStringTest, PositiveCases) { } TEST(TransformFromStringTest, NegativeCases) { - constexpr std::array invalid_cases = { + constexpr std::array invalid_cases = { "bucket", // missing param "bucket[]", // empty param "bucket[abc]", // invalid number + "bucket[0]", // invalid number of buckets + "truncate[0]", // invalid width "unknown", // unsupported transform "bucket[16", // missing closing bracket "truncate[1]extra" // extra characters diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 453941c95..ccbd80a43 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -27,6 +27,7 @@ #include "iceberg/expression/term.h" #include "iceberg/result.h" #include "iceberg/transform_function.h" +#include "iceberg/transform_internal.h" #include "iceberg/type.h" #include "iceberg/util/base64.h" #include "iceberg/util/checked_cast.h" @@ -232,6 +233,19 @@ bool Transform::CanTransform(const Type& source_type) const { std::unreachable(); } +Status Transform::Validate(const std::shared_ptr& source_type) const { + if (!source_type) { + return InvalidArgument("Source type cannot be null for transform {}", ToString()); + } + // Keep type mismatches as InvalidArgument; Bind reports them as NotSupported. + if (!CanTransform(*source_type)) { + return InvalidArgument("Invalid source type {} for transform {}", + source_type->ToString(), ToString()); + } + ICEBERG_RETURN_UNEXPECTED(Bind(source_type)); + return {}; +} + bool Transform::PreservesOrder() const { switch (transform_type_) { case TransformType::kUnknown: @@ -554,9 +568,11 @@ Result> TransformFromString(std::string_view transfor StringUtils::ParseNumber(match[2].str())); if (type_str == kBucketName) { + ICEBERG_RETURN_UNEXPECTED(internal::ValidateBucketTransformParameter(param)); return Transform::Bucket(param); } if (type_str == kTruncateName) { + ICEBERG_RETURN_UNEXPECTED(internal::ValidateTruncateTransformParameter(param)); return Transform::Truncate(param); } } diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 6b855a74d..405519535 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -159,6 +159,11 @@ class ICEBERG_EXPORT Transform : public util::Formattable { /// \return true if this transform can be applied to the type, false otherwise bool CanTransform(const Type& source_type) const; + /// \brief Validates whether this transform can bind to the given source type. + /// \param source_type The source type to validate against. + /// \return Error status if the transform cannot bind to the source type. + Status Validate(const std::shared_ptr& source_type) const; + /// \brief Whether the transform preserves the order of values (is monotonic). bool PreservesOrder() const; diff --git a/src/iceberg/transform_function.cc b/src/iceberg/transform_function.cc index 4325c53d1..8c8eafe17 100644 --- a/src/iceberg/transform_function.cc +++ b/src/iceberg/transform_function.cc @@ -22,6 +22,7 @@ #include #include "iceberg/expression/literal.h" +#include "iceberg/transform_internal.h" #include "iceberg/type.h" #include "iceberg/type_fwd.h" #include "iceberg/util/bucket_util.h" @@ -90,9 +91,7 @@ Result> BucketTransform::Make( return NotSupported("{} is not a valid input type for bucket transform", source_type->ToString()); } - if (num_buckets <= 0) { - return InvalidArgument("Number of buckets must be positive, got {}", num_buckets); - } + ICEBERG_RETURN_UNEXPECTED(internal::ValidateBucketTransformParameter(num_buckets)); return std::make_unique(source_type, num_buckets); } @@ -124,9 +123,7 @@ Result> TruncateTransform::Make( return NotSupported("{} is not a valid input type for truncate transform", source_type->ToString()); } - if (width <= 0) { - return InvalidArgument("Width must be positive, got {}", width); - } + ICEBERG_RETURN_UNEXPECTED(internal::ValidateTruncateTransformParameter(width)); return std::make_unique(source_type, width); } diff --git a/src/iceberg/transform_internal.h b/src/iceberg/transform_internal.h new file mode 100644 index 000000000..800bf92b9 --- /dev/null +++ b/src/iceberg/transform_internal.h @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/result.h" + +namespace iceberg::internal { + +inline Status ValidateBucketTransformParameter(int32_t num_buckets) { + if (num_buckets <= 0) { + return InvalidArgument("Number of buckets must be positive, got {}", num_buckets); + } + return {}; +} + +inline Status ValidateTruncateTransformParameter(int32_t width) { + if (width <= 0) { + return InvalidArgument("Width must be positive, got {}", width); + } + return {}; +} + +} // namespace iceberg::internal