From d08ac8559d213bdaf07c2ef3153b854bca23f548 Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Mon, 15 Dec 2025 15:58:03 +0800 Subject: [PATCH 1/2] feat: replace sort order for pending update --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/expression/term.h | 2 + src/iceberg/meson.build | 2 + src/iceberg/table_metadata.cc | 5 + src/iceberg/table_metadata.h | 7 + src/iceberg/test/CMakeLists.txt | 1 + src/iceberg/test/meson.build | 1 + src/iceberg/test/replace_sort_order_test.cc | 256 ++++++++++++++++++++ src/iceberg/type_fwd.h | 1 + src/iceberg/update/replace_sort_order.cc | 150 ++++++++++++ src/iceberg/update/replace_sort_order.h | 107 ++++++++ 11 files changed, 533 insertions(+) create mode 100644 src/iceberg/test/replace_sort_order_test.cc create mode 100644 src/iceberg/update/replace_sort_order.cc create mode 100644 src/iceberg/update/replace_sort_order.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index a0d93967f..37cbec1cf 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -75,6 +75,7 @@ set(ICEBERG_SOURCES transform.cc transform_function.cc type.cc + update/replace_sort_order.cc update/update_properties.cc util/bucket_util.cc util/conversions.cc diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h index 5e834af51..b50ea9242 100644 --- a/src/iceberg/expression/term.h +++ b/src/iceberg/expression/term.h @@ -151,6 +151,8 @@ class ICEBERG_EXPORT BoundReference std::shared_ptr type() const override { return field_.type(); } + int32_t field_id() const { return field_.field_id(); } + bool MayProduceNull() const override { return field_.optional(); } bool Equals(const BoundTerm& other) const override; diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index d473d72e1..7245f9ac8 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -97,6 +97,7 @@ iceberg_sources = files( 'transform.cc', 'transform_function.cc', 'type.cc', + 'update/replace_sort_order.cc', 'update/update_properties.cc', 'util/bucket_util.cc', 'util/conversions.cc', @@ -196,6 +197,7 @@ install_headers( 'transform.h', 'type_fwd.h', 'type.h', + 'update/replace_sort_order.h', 'update/update_properties.h', ], subdir: 'iceberg', diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 4e814e027..1c6f7817c 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -758,6 +759,10 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveEncryptionKey(std::string_view throw IcebergError(std::format("{} not implemented", __FUNCTION__)); } +const std::vector>& TableMetadataBuilder::Changes() const { + return impl_->changes; +} + Result> TableMetadataBuilder::Build() { // 1. Check for accumulated errors ICEBERG_RETURN_UNEXPECTED(CheckErrors()); diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index f428fd34f..ccc3b4f6e 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -419,6 +419,13 @@ class ICEBERG_EXPORT TableMetadataBuilder : public ErrorCollector { /// \return A Result containing the constructed TableMetadata or an error Result> Build(); + /// \brief Get the list of changes made to the table metadata + /// + /// \return A vector of unique pointers to TableUpdates representing the changes + /// \note We perform error checks and validate metadata consistency in Build(), so call + /// Build() before calling Changes(). + const std::vector>& Changes() const; + /// \brief Destructor ~TableMetadataBuilder() override; diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 2c4e0f512..f43dd7015 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -71,6 +71,7 @@ add_iceberg_test(table_test SOURCES json_internal_test.cc metrics_config_test.cc + replace_sort_order_test.cc schema_json_test.cc table_test.cc table_metadata_builder_test.cc diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index f12b43afa..d3a796c85 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -48,6 +48,7 @@ iceberg_tests = { 'sources': files( 'json_internal_test.cc', 'metrics_config_test.cc', + 'replace_sort_order_test.cc', 'schema_json_test.cc', 'table_metadata_builder_test.cc', 'table_requirement_test.cc', diff --git a/src/iceberg/test/replace_sort_order_test.cc b/src/iceberg/test/replace_sort_order_test.cc new file mode 100644 index 000000000..06394ebb8 --- /dev/null +++ b/src/iceberg/test/replace_sort_order_test.cc @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/update/replace_sort_order.h" + +#include +#include +#include + +#include +#include + +#include "iceberg/expression/term.h" +#include "iceberg/result.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/sort_field.h" +#include "iceberg/table.h" +#include "iceberg/table_identifier.h" +#include "iceberg/table_metadata.h" +#include "iceberg/test/matchers.h" +#include "iceberg/test/mock_catalog.h" +#include "iceberg/transform.h" + +namespace iceberg { + +class ReplaceSortOrderTest : public ::testing::Test { + protected: + void SetUp() override { + // Create a simple schema with various field types + SchemaField field1(1, "id", std::make_shared(), false); + SchemaField field2(2, "name", std::make_shared(), true); + SchemaField field3(3, "ts", std::make_shared(), true); + SchemaField field4(4, "price", std::make_shared(10, 2), true); + schema_ = std::make_shared( + std::vector{field1, field2, field3, field4}, 1); + + // Create basic table metadata + metadata_ = std::make_shared(); + metadata_->schemas.push_back(schema_); + metadata_->current_schema_id = 1; + + // Create catalog and table identifier + catalog_ = std::make_shared(); + identifier_ = TableIdentifier(Namespace({"test"}), "table"); + } + + std::shared_ptr schema_; + std::shared_ptr metadata_; + std::shared_ptr catalog_; + TableIdentifier identifier_; +}; + +TEST_F(ReplaceSortOrderTest, AscendingSingleField) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("id").value(); + auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); + + update.Asc(std::move(term), NullOrder::kFirst); + + auto result = update.Apply(); + EXPECT_THAT(result, IsOk()); + + auto sort_order = update.GetBuiltSortOrder(); + ASSERT_NE(sort_order, nullptr); + EXPECT_EQ(sort_order->fields().size(), 1); + + const auto& field = sort_order->fields()[0]; + EXPECT_EQ(field.source_id(), 1); + EXPECT_EQ(field.direction(), SortDirection::kAscending); + EXPECT_EQ(field.null_order(), NullOrder::kFirst); +} + +TEST_F(ReplaceSortOrderTest, DescendingSingleField) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("name").value(); + auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); + + update.Desc(std::move(term), NullOrder::kLast); + + auto result = update.Apply(); + EXPECT_THAT(result, IsOk()); + + auto sort_order = update.GetBuiltSortOrder(); + ASSERT_NE(sort_order, nullptr); + EXPECT_EQ(sort_order->fields().size(), 1); + + const auto& field = sort_order->fields()[0]; + EXPECT_EQ(field.source_id(), 2); + EXPECT_EQ(field.direction(), SortDirection::kDescending); + EXPECT_EQ(field.null_order(), NullOrder::kLast); +} + +TEST_F(ReplaceSortOrderTest, MultipleFields) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref1 = NamedReference::Make("name").value(); + auto term1 = UnboundTransform::Make(std::move(ref1), Transform::Identity()).value(); + + auto ref2 = NamedReference::Make("id").value(); + auto term2 = UnboundTransform::Make(std::move(ref2), Transform::Identity()).value(); + + update.Asc(std::move(term1), NullOrder::kFirst) + .Desc(std::move(term2), NullOrder::kLast); + + auto result = update.Apply(); + EXPECT_THAT(result, IsOk()); + + auto sort_order = update.GetBuiltSortOrder(); + ASSERT_NE(sort_order, nullptr); + EXPECT_EQ(sort_order->fields().size(), 2); + + // Check first field + const auto& field1 = sort_order->fields()[0]; + EXPECT_EQ(field1.source_id(), 2); // name field + EXPECT_EQ(field1.direction(), SortDirection::kAscending); + EXPECT_EQ(field1.null_order(), NullOrder::kFirst); + + // Check second field + const auto& field2 = sort_order->fields()[1]; + EXPECT_EQ(field2.source_id(), 1); // id field + EXPECT_EQ(field2.direction(), SortDirection::kDescending); + EXPECT_EQ(field2.null_order(), NullOrder::kLast); +} + +TEST_F(ReplaceSortOrderTest, WithTransform) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("ts").value(); + auto term = UnboundTransform::Make(std::move(ref), Transform::Day()).value(); + + update.Asc(std::move(term), NullOrder::kFirst); + + auto result = update.Apply(); + EXPECT_THAT(result, IsOk()); + + auto sort_order = update.GetBuiltSortOrder(); + ASSERT_NE(sort_order, nullptr); + EXPECT_EQ(sort_order->fields().size(), 1); + + const auto& field = sort_order->fields()[0]; + EXPECT_EQ(field.source_id(), 3); + EXPECT_EQ(field.transform()->ToString(), "day"); + EXPECT_EQ(field.direction(), SortDirection::kAscending); + EXPECT_EQ(field.null_order(), NullOrder::kFirst); +} + +TEST_F(ReplaceSortOrderTest, WithBucketTransform) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("name").value(); + auto term = UnboundTransform::Make(std::move(ref), Transform::Bucket(10)).value(); + + update.Desc(std::move(term), NullOrder::kLast); + + auto result = update.Apply(); + EXPECT_THAT(result, IsOk()); + + auto sort_order = update.GetBuiltSortOrder(); + ASSERT_NE(sort_order, nullptr); + EXPECT_EQ(sort_order->fields().size(), 1); + + const auto& field = sort_order->fields()[0]; + EXPECT_EQ(field.source_id(), 2); + EXPECT_EQ(field.transform()->ToString(), "bucket[10]"); + EXPECT_EQ(field.direction(), SortDirection::kDescending); + EXPECT_EQ(field.null_order(), NullOrder::kLast); +} + +TEST_F(ReplaceSortOrderTest, NullTerm) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + update.Asc(nullptr, NullOrder::kFirst); + + auto result = update.Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("Term cannot be null")); +} + +TEST_F(ReplaceSortOrderTest, InvalidTransformForType) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + // Try to apply day transform to a string field (invalid) + auto ref = NamedReference::Make("name").value(); + auto term = UnboundTransform::Make(std::move(ref), Transform::Day()).value(); + + update.Asc(std::move(term), NullOrder::kFirst); + + auto result = update.Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("not a valid input type")); +} + +TEST_F(ReplaceSortOrderTest, NonExistentField) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("nonexistent").value(); + auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); + + update.Asc(std::move(term), NullOrder::kFirst); + + auto result = update.Apply(); + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); + EXPECT_THAT(result, HasErrorMessage("Cannot find")); +} + +TEST_F(ReplaceSortOrderTest, CaseSensitiveTrue) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("ID").value(); // Uppercase + auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); + + update.CaseSensitive(true).Asc(std::move(term), NullOrder::kFirst); + + auto result = update.Apply(); + // Should fail because schema has "id" (lowercase) + EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); +} + +TEST_F(ReplaceSortOrderTest, CaseSensitiveFalse) { + ReplaceSortOrder update(identifier_, catalog_, metadata_); + + auto ref = NamedReference::Make("ID").value(); // Uppercase + auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); + + update.CaseSensitive(false).Asc(std::move(term), NullOrder::kFirst); + + auto result = update.Apply(); + // Should succeed because case-insensitive matching + EXPECT_THAT(result, IsOk()); + + auto sort_order = update.GetBuiltSortOrder(); + ASSERT_NE(sort_order, nullptr); + EXPECT_EQ(sort_order->fields().size(), 1); + EXPECT_EQ(sort_order->fields()[0].source_id(), 1); +} + +} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 0e1867f60..4afb57f07 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -163,6 +163,7 @@ class PendingUpdate; template class PendingUpdateTyped; class UpdateProperties; +class ReplaceSortOrder; /// ---------------------------------------------------------------------------- /// TODO: Forward declarations below are not added yet. diff --git a/src/iceberg/update/replace_sort_order.cc b/src/iceberg/update/replace_sort_order.cc new file mode 100644 index 000000000..f3d74025f --- /dev/null +++ b/src/iceberg/update/replace_sort_order.cc @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/update/replace_sort_order.h" + +#include +#include +#include + +#include "iceberg/catalog.h" +#include "iceberg/expression/term.h" +#include "iceberg/result.h" +#include "iceberg/sort_field.h" +#include "iceberg/sort_order.h" +#include "iceberg/table.h" +#include "iceberg/table_identifier.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_requirements.h" +#include "iceberg/transform.h" +#include "iceberg/util/checked_cast.h" +#include "iceberg/util/error_collector.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +ReplaceSortOrder::ReplaceSortOrder(TableIdentifier identifier, + std::shared_ptr catalog, + std::shared_ptr base) + : identifier_(std::move(identifier)), + catalog_(std::move(catalog)), + base_metadata_(std::move(base)) {} + +ReplaceSortOrder& ReplaceSortOrder::Asc(std::shared_ptr term, + NullOrder null_order) { + if (!term) { + return AddError(ErrorKind::kInvalidArgument, "Term cannot be null"); + } + if (term->kind() != BoundTerm::Kind::kTransform) { + return AddError(ErrorKind::kInvalidArgument, "Term must be a transform term"); + } + if (!term->is_unbound()) { + return AddError(ErrorKind::kInvalidArgument, "Term must be unbound"); + } + // use checked-cast to get UnboundTransform + auto unbound_transform = internal::checked_pointer_cast(term); + + BUILDER_ASSIGN_OR_RETURN(auto schema, base_metadata_->Schema()); + BUILDER_ASSIGN_OR_RETURN(auto bound_term, + unbound_transform->Bind(*schema, case_sensitive_)); + return AddSortField(bound_term->reference(), unbound_transform->transform(), + SortDirection::kAscending, null_order); +} + +ReplaceSortOrder& ReplaceSortOrder::Desc(std::shared_ptr term, + NullOrder null_order) { + if (!term) { + return AddError(ErrorKind::kInvalidArgument, "Transform term cannot be null"); + } + if (term->kind() != BoundTerm::Kind::kTransform) { + return AddError(ErrorKind::kInvalidArgument, "Term must be a transform term"); + } + if (!term->is_unbound()) { + return AddError(ErrorKind::kInvalidArgument, "Term must be unbound"); + } + // use checked-cast to get UnboundTransform + auto unbound_transform = internal::checked_pointer_cast(term); + BUILDER_ASSIGN_OR_RETURN(auto schema, base_metadata_->Schema()); + BUILDER_ASSIGN_OR_RETURN(auto bound_term, + unbound_transform->Bind(*schema, case_sensitive_)); + return AddSortField(bound_term->reference(), unbound_transform->transform(), + SortDirection::kDescending, null_order); +} + +ReplaceSortOrder& ReplaceSortOrder::CaseSensitive(bool case_sensitive) { + case_sensitive_ = case_sensitive; + return *this; +} + +ReplaceSortOrder& ReplaceSortOrder::AddSortField(std::shared_ptr ref, + std::shared_ptr transform, + SortDirection direction, + NullOrder null_order) { + int32_t source_id = ref->field_id(); + const auto& field = ref->field(); + + // Validate that the transform can be applied to the field type 、 + if (!transform->CanTransform(*field.type())) { + return AddError( + ErrorKind::kInvalidArgument, + std::format("Invalid transform {} for field '{}' with type {}", + transform->ToString(), field.name(), field.type()->ToString())); + } + + sort_fields_.emplace_back(source_id, std::move(transform), direction, null_order); + return *this; +} + +Status ReplaceSortOrder::Apply() { + ICEBERG_RETURN_UNEXPECTED(CheckErrors()); + // Note: We use kInitialSortOrderId (1) here like the Java implementation. + // The actual sort order ID will be assigned by TableMetadataBuilder when + // the AddSortOrder update is applied. + ICEBERG_ASSIGN_OR_RAISE(auto order, + SortOrder::Make(SortOrder::kInitialSortOrderId, sort_fields_)); + ICEBERG_RETURN_UNEXPECTED(order->Validate(*base_metadata_->Schema().value())); + built_sort_order_ = std::move(order); + return {}; +} + +Status ReplaceSortOrder::Commit() { + ICEBERG_RETURN_UNEXPECTED(Apply()); + + // Use TableMetadataBuilder to generate the changes + auto builder = TableMetadataBuilder::BuildFrom(base_metadata_.get()); + builder->SetDefaultSortOrder(built_sort_order_); + + // Call Build() for error checks and validate metadata consistency. We simply ignore the + // returned Tablemetadata here, this may need to be optimized in the future. + ICEBERG_RETURN_UNEXPECTED(builder->Build()); + const auto& updates = builder->Changes(); + if (!updates.empty()) { + ICEBERG_ASSIGN_OR_RAISE(auto requirements, + TableRequirements::ForUpdateTable(*base_metadata_, updates)); + ICEBERG_RETURN_UNEXPECTED(catalog_->UpdateTable(identifier_, requirements, updates)); + } + + return {}; +} + +std::shared_ptr ReplaceSortOrder::GetBuiltSortOrder() const { + return built_sort_order_; +} + +} // namespace iceberg diff --git a/src/iceberg/update/replace_sort_order.h b/src/iceberg/update/replace_sort_order.h new file mode 100644 index 000000000..fd1131d1a --- /dev/null +++ b/src/iceberg/update/replace_sort_order.h @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/expression/term.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/pending_update.h" +#include "iceberg/sort_field.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_identifier.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief Replacing table sort order with a newly created order. +class ICEBERG_EXPORT ReplaceSortOrder : public PendingUpdate { + public: + /// \brief Constructs a ReplaceSortOrder for the specified table. + /// + /// \param identifier The table identifier + /// \param catalog The catalog containing the table + /// \param metadata The current table metadata + ReplaceSortOrder(TableIdentifier identifier, std::shared_ptr catalog, + std::shared_ptr base); + + /// \brief Add a field to the sort by field name, ascending with the given null order. + /// + /// \param term A term referencing the field + /// \param null_order The null order (first or last) + /// \return Reference to this ReplaceSortOrder for chaining + ReplaceSortOrder& Asc(std::shared_ptr term, NullOrder null_order); + + /// \brief Add a field to the sort by field name, descending with the given null order. + /// + /// \param term A transform term referencing the field + /// \param null_order The null order (first or last) + /// \return Reference to this ReplaceSortOrder for chaining + ReplaceSortOrder& Desc(std::shared_ptr term, NullOrder null_order); + + /// \brief Set case sensitivity of sort column name resolution. + /// + /// \param case_sensitive When true, column name resolution is case-sensitive + /// \return Reference to this ReplaceSortOrder for chaining + ReplaceSortOrder& CaseSensitive(bool case_sensitive); + + /// \brief Applies the sort order changes without committing them. + /// + /// Validates the pending sort order changes but does not commit them to the table. + /// This method can be used to validate changes before actually committing them. + /// + /// \return Status::OK if the changes are valid, or an error if validation fails + Status Apply() override; + + /// \brief Commits the sort order changes to the table. + /// + /// Validates the changes and applies them to the table through the catalog. + /// + /// \return Status::OK if the changes are valid and committed successfully, or an error + Status Commit() override; + + /// \brief Get the built sort order after applying changes. + /// + /// \return The built SortOrder object. + std::shared_ptr GetBuiltSortOrder() const; + + private: + /// \brief Helper to add a sort field after binding the term. + /// + /// \param ref The bound reference to the field + /// \param transform The transform to apply + /// \param direction The sort direction + /// \param null_order The null order + /// \return Reference to this ReplaceSortOrder for chaining + ReplaceSortOrder& AddSortField(std::shared_ptr ref, + std::shared_ptr transform, + SortDirection direction, NullOrder null_order); + + TableIdentifier identifier_; + std::shared_ptr catalog_; + std::shared_ptr base_metadata_; + + std::vector sort_fields_; + bool case_sensitive_ = true; + std::shared_ptr built_sort_order_; +}; + +} // namespace iceberg From 647ada297173157665558d13fd534d57c510fd5a Mon Sep 17 00:00:00 2001 From: Li Feiyang Date: Wed, 17 Dec 2025 11:37:33 +0800 Subject: [PATCH 2/2] 1 --- src/iceberg/meson.build | 3 +- src/iceberg/test/replace_sort_order_test.cc | 24 ++++---- src/iceberg/type_fwd.h | 2 - src/iceberg/update/meson.build | 21 +++++++ src/iceberg/update/replace_sort_order.cc | 63 ++++++--------------- src/iceberg/update/replace_sort_order.h | 29 ++++------ 6 files changed, 63 insertions(+), 79 deletions(-) create mode 100644 src/iceberg/update/meson.build diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 7245f9ac8..e255d063d 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -197,8 +197,6 @@ install_headers( 'transform.h', 'type_fwd.h', 'type.h', - 'update/replace_sort_order.h', - 'update/update_properties.h', ], subdir: 'iceberg', ) @@ -207,6 +205,7 @@ subdir('catalog') subdir('expression') subdir('manifest') subdir('row') +subdir('update') subdir('util') if get_option('tests').enabled() diff --git a/src/iceberg/test/replace_sort_order_test.cc b/src/iceberg/test/replace_sort_order_test.cc index 06394ebb8..c27273166 100644 --- a/src/iceberg/test/replace_sort_order_test.cc +++ b/src/iceberg/test/replace_sort_order_test.cc @@ -73,7 +73,7 @@ TEST_F(ReplaceSortOrderTest, AscendingSingleField) { auto ref = NamedReference::Make("id").value(); auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); - update.Asc(std::move(term), NullOrder::kFirst); + update.AddSortField(std::move(term), SortDirection::kAscending, NullOrder::kFirst); auto result = update.Apply(); EXPECT_THAT(result, IsOk()); @@ -94,7 +94,7 @@ TEST_F(ReplaceSortOrderTest, DescendingSingleField) { auto ref = NamedReference::Make("name").value(); auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); - update.Desc(std::move(term), NullOrder::kLast); + update.AddSortField(std::move(term), SortDirection::kDescending, NullOrder::kLast); auto result = update.Apply(); EXPECT_THAT(result, IsOk()); @@ -118,8 +118,8 @@ TEST_F(ReplaceSortOrderTest, MultipleFields) { auto ref2 = NamedReference::Make("id").value(); auto term2 = UnboundTransform::Make(std::move(ref2), Transform::Identity()).value(); - update.Asc(std::move(term1), NullOrder::kFirst) - .Desc(std::move(term2), NullOrder::kLast); + update.AddSortField(std::move(term1), SortDirection::kAscending, NullOrder::kFirst) + .AddSortField(std::move(term2), SortDirection::kDescending, NullOrder::kLast); auto result = update.Apply(); EXPECT_THAT(result, IsOk()); @@ -147,7 +147,7 @@ TEST_F(ReplaceSortOrderTest, WithTransform) { auto ref = NamedReference::Make("ts").value(); auto term = UnboundTransform::Make(std::move(ref), Transform::Day()).value(); - update.Asc(std::move(term), NullOrder::kFirst); + update.AddSortField(std::move(term), SortDirection::kAscending, NullOrder::kFirst); auto result = update.Apply(); EXPECT_THAT(result, IsOk()); @@ -169,7 +169,7 @@ TEST_F(ReplaceSortOrderTest, WithBucketTransform) { auto ref = NamedReference::Make("name").value(); auto term = UnboundTransform::Make(std::move(ref), Transform::Bucket(10)).value(); - update.Desc(std::move(term), NullOrder::kLast); + update.AddSortField(std::move(term), SortDirection::kDescending, NullOrder::kLast); auto result = update.Apply(); EXPECT_THAT(result, IsOk()); @@ -188,7 +188,7 @@ TEST_F(ReplaceSortOrderTest, WithBucketTransform) { TEST_F(ReplaceSortOrderTest, NullTerm) { ReplaceSortOrder update(identifier_, catalog_, metadata_); - update.Asc(nullptr, NullOrder::kFirst); + update.AddSortField(nullptr, SortDirection::kAscending, NullOrder::kFirst); auto result = update.Apply(); EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); @@ -202,7 +202,7 @@ TEST_F(ReplaceSortOrderTest, InvalidTransformForType) { auto ref = NamedReference::Make("name").value(); auto term = UnboundTransform::Make(std::move(ref), Transform::Day()).value(); - update.Asc(std::move(term), NullOrder::kFirst); + update.AddSortField(std::move(term), SortDirection::kAscending, NullOrder::kFirst); auto result = update.Apply(); EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); @@ -215,7 +215,7 @@ TEST_F(ReplaceSortOrderTest, NonExistentField) { auto ref = NamedReference::Make("nonexistent").value(); auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); - update.Asc(std::move(term), NullOrder::kFirst); + update.AddSortField(std::move(term), SortDirection::kAscending, NullOrder::kFirst); auto result = update.Apply(); EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed)); @@ -228,7 +228,8 @@ TEST_F(ReplaceSortOrderTest, CaseSensitiveTrue) { auto ref = NamedReference::Make("ID").value(); // Uppercase auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); - update.CaseSensitive(true).Asc(std::move(term), NullOrder::kFirst); + update.CaseSensitive(true).AddSortField(std::move(term), SortDirection::kAscending, + NullOrder::kFirst); auto result = update.Apply(); // Should fail because schema has "id" (lowercase) @@ -241,7 +242,8 @@ TEST_F(ReplaceSortOrderTest, CaseSensitiveFalse) { auto ref = NamedReference::Make("ID").value(); // Uppercase auto term = UnboundTransform::Make(std::move(ref), Transform::Identity()).value(); - update.CaseSensitive(false).Asc(std::move(term), NullOrder::kFirst); + update.CaseSensitive(false).AddSortField(std::move(term), SortDirection::kAscending, + NullOrder::kFirst); auto result = update.Apply(); // Should succeed because case-insensitive matching diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 4afb57f07..5b7c616fc 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -160,8 +160,6 @@ class TableMetadataBuilder; class TableUpdateContext; class PendingUpdate; -template -class PendingUpdateTyped; class UpdateProperties; class ReplaceSortOrder; diff --git a/src/iceberg/update/meson.build b/src/iceberg/update/meson.build new file mode 100644 index 000000000..7134fa3d5 --- /dev/null +++ b/src/iceberg/update/meson.build @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_headers( + ['replace_sort_order.h', 'update_properties.h'], + subdir: 'iceberg/update', +) diff --git a/src/iceberg/update/replace_sort_order.cc b/src/iceberg/update/replace_sort_order.cc index f3d74025f..82cefb50a 100644 --- a/src/iceberg/update/replace_sort_order.cc +++ b/src/iceberg/update/replace_sort_order.cc @@ -46,12 +46,13 @@ ReplaceSortOrder::ReplaceSortOrder(TableIdentifier identifier, catalog_(std::move(catalog)), base_metadata_(std::move(base)) {} -ReplaceSortOrder& ReplaceSortOrder::Asc(std::shared_ptr term, - NullOrder null_order) { +ReplaceSortOrder& ReplaceSortOrder::AddSortField(std::shared_ptr term, + SortDirection direction, + NullOrder null_order) { if (!term) { return AddError(ErrorKind::kInvalidArgument, "Term cannot be null"); } - if (term->kind() != BoundTerm::Kind::kTransform) { + if (term->kind() != Term::Kind::kTransform) { return AddError(ErrorKind::kInvalidArgument, "Term must be a transform term"); } if (!term->is_unbound()) { @@ -60,31 +61,14 @@ ReplaceSortOrder& ReplaceSortOrder::Asc(std::shared_ptr term, // use checked-cast to get UnboundTransform auto unbound_transform = internal::checked_pointer_cast(term); - BUILDER_ASSIGN_OR_RETURN(auto schema, base_metadata_->Schema()); + BUILDER_ASSIGN_OR_RETURN(auto schema, GetSchema()); BUILDER_ASSIGN_OR_RETURN(auto bound_term, unbound_transform->Bind(*schema, case_sensitive_)); - return AddSortField(bound_term->reference(), unbound_transform->transform(), - SortDirection::kAscending, null_order); -} -ReplaceSortOrder& ReplaceSortOrder::Desc(std::shared_ptr term, - NullOrder null_order) { - if (!term) { - return AddError(ErrorKind::kInvalidArgument, "Transform term cannot be null"); - } - if (term->kind() != BoundTerm::Kind::kTransform) { - return AddError(ErrorKind::kInvalidArgument, "Term must be a transform term"); - } - if (!term->is_unbound()) { - return AddError(ErrorKind::kInvalidArgument, "Term must be unbound"); - } - // use checked-cast to get UnboundTransform - auto unbound_transform = internal::checked_pointer_cast(term); - BUILDER_ASSIGN_OR_RETURN(auto schema, base_metadata_->Schema()); - BUILDER_ASSIGN_OR_RETURN(auto bound_term, - unbound_transform->Bind(*schema, case_sensitive_)); - return AddSortField(bound_term->reference(), unbound_transform->transform(), - SortDirection::kDescending, null_order); + int32_t source_id = bound_term->reference()->field_id(); + sort_fields_.emplace_back(source_id, unbound_transform->transform(), direction, + null_order); + return *this; } ReplaceSortOrder& ReplaceSortOrder::CaseSensitive(bool case_sensitive) { @@ -92,25 +76,6 @@ ReplaceSortOrder& ReplaceSortOrder::CaseSensitive(bool case_sensitive) { return *this; } -ReplaceSortOrder& ReplaceSortOrder::AddSortField(std::shared_ptr ref, - std::shared_ptr transform, - SortDirection direction, - NullOrder null_order) { - int32_t source_id = ref->field_id(); - const auto& field = ref->field(); - - // Validate that the transform can be applied to the field type 、 - if (!transform->CanTransform(*field.type())) { - return AddError( - ErrorKind::kInvalidArgument, - std::format("Invalid transform {} for field '{}' with type {}", - transform->ToString(), field.name(), field.type()->ToString())); - } - - sort_fields_.emplace_back(source_id, std::move(transform), direction, null_order); - return *this; -} - Status ReplaceSortOrder::Apply() { ICEBERG_RETURN_UNEXPECTED(CheckErrors()); // Note: We use kInitialSortOrderId (1) here like the Java implementation. @@ -118,7 +83,8 @@ Status ReplaceSortOrder::Apply() { // the AddSortOrder update is applied. ICEBERG_ASSIGN_OR_RAISE(auto order, SortOrder::Make(SortOrder::kInitialSortOrderId, sort_fields_)); - ICEBERG_RETURN_UNEXPECTED(order->Validate(*base_metadata_->Schema().value())); + ICEBERG_ASSIGN_OR_RAISE(auto schema, GetSchema()); + ICEBERG_RETURN_UNEXPECTED(order->Validate(*schema)); built_sort_order_ = std::move(order); return {}; } @@ -147,4 +113,11 @@ std::shared_ptr ReplaceSortOrder::GetBuiltSortOrder() const { return built_sort_order_; } +Result> ReplaceSortOrder::GetSchema() { + if (!cached_schema_) { + ICEBERG_ASSIGN_OR_RAISE(cached_schema_, base_metadata_->Schema()); + } + return cached_schema_; +} + } // namespace iceberg diff --git a/src/iceberg/update/replace_sort_order.h b/src/iceberg/update/replace_sort_order.h index fd1131d1a..dcc24451d 100644 --- a/src/iceberg/update/replace_sort_order.h +++ b/src/iceberg/update/replace_sort_order.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include "iceberg/expression/term.h" @@ -43,19 +44,14 @@ class ICEBERG_EXPORT ReplaceSortOrder : public PendingUpdate { ReplaceSortOrder(TableIdentifier identifier, std::shared_ptr catalog, std::shared_ptr base); - /// \brief Add a field to the sort by field name, ascending with the given null order. - /// - /// \param term A term referencing the field - /// \param null_order The null order (first or last) - /// \return Reference to this ReplaceSortOrder for chaining - ReplaceSortOrder& Asc(std::shared_ptr term, NullOrder null_order); - - /// \brief Add a field to the sort by field name, descending with the given null order. + /// \brief Add a sort field to the sort order. /// /// \param term A transform term referencing the field + /// \param direction The sort direction (ascending or descending) /// \param null_order The null order (first or last) /// \return Reference to this ReplaceSortOrder for chaining - ReplaceSortOrder& Desc(std::shared_ptr term, NullOrder null_order); + ReplaceSortOrder& AddSortField(std::shared_ptr term, SortDirection direction, + NullOrder null_order); /// \brief Set case sensitivity of sort column name resolution. /// @@ -84,16 +80,8 @@ class ICEBERG_EXPORT ReplaceSortOrder : public PendingUpdate { std::shared_ptr GetBuiltSortOrder() const; private: - /// \brief Helper to add a sort field after binding the term. - /// - /// \param ref The bound reference to the field - /// \param transform The transform to apply - /// \param direction The sort direction - /// \param null_order The null order - /// \return Reference to this ReplaceSortOrder for chaining - ReplaceSortOrder& AddSortField(std::shared_ptr ref, - std::shared_ptr transform, - SortDirection direction, NullOrder null_order); + /// \brief Get the schema, loading and caching it on first access. + Result> GetSchema(); TableIdentifier identifier_; std::shared_ptr catalog_; @@ -102,6 +90,9 @@ class ICEBERG_EXPORT ReplaceSortOrder : public PendingUpdate { std::vector sort_fields_; bool case_sensitive_ = true; std::shared_ptr built_sort_order_; + + // Cached schema to avoid repeated lookups + std::shared_ptr cached_schema_; }; } // namespace iceberg