3333
3434namespace iceberg {
3535
36- Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id)
37- : StructType(std::move(fields)), schema_id_(schema_id) {}
36+ Schema::Schema (std::vector<SchemaField> fields, std::optional<int32_t > schema_id,
37+ std::vector<int32_t > identifier_field_ids)
38+ : StructType(std::move(fields)),
39+ schema_id_ (schema_id),
40+ identifier_field_ids_(std::move(identifier_field_ids)) {}
41+
42+ Result<std::unique_ptr<Schema>> Schema::Make (
43+ std::vector<SchemaField> fields, std::optional<int32_t > schema_id,
44+ const std::vector<std::string>& identifier_field_names) {
45+ auto schema = std::make_unique<Schema>(std::move (fields), schema_id);
46+
47+ std::vector<int32_t > fresh_identifier_ids;
48+ for (const auto & name : identifier_field_names) {
49+ ICEBERG_ASSIGN_OR_RAISE (auto field, schema->FindFieldByName (name));
50+ if (!field) {
51+ return InvalidSchema (" Cannot find identifier field: {}" , name);
52+ }
53+ fresh_identifier_ids.push_back (field.value ().get ().field_id ());
54+ }
55+ schema->identifier_field_ids_ = std::move (fresh_identifier_ids);
56+ return schema;
57+ }
3858
3959std::optional<int32_t > Schema::schema_id () const { return schema_id_; }
4060
@@ -48,15 +68,16 @@ std::string Schema::ToString() const {
4868}
4969
5070bool Schema::Equals (const Schema& other) const {
51- return schema_id_ == other.schema_id_ && fields_ == other.fields_ ;
71+ return schema_id_ == other.schema_id_ && fields_ == other.fields_ &&
72+ identifier_field_ids_ == other.identifier_field_ids_ ;
5273}
5374
5475Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
5576 std::string_view name, bool case_sensitive) const {
5677 if (case_sensitive) {
57- ICEBERG_ASSIGN_OR_RAISE (auto name_to_id, name_to_id_ .Get (*this ));
58- auto it = name_to_id .get ().find (name);
59- if (it == name_to_id .get ().end ()) {
78+ ICEBERG_ASSIGN_OR_RAISE (auto name_id_map, name_id_map_ .Get (*this ));
79+ auto it = name_id_map .get (). name_to_id .find (name);
80+ if (it == name_id_map .get (). name_to_id .end ()) {
6081 return std::nullopt ;
6182 };
6283 return FindFieldById (it->second );
@@ -77,21 +98,22 @@ Schema::InitIdToFieldMap(const Schema& self) {
7798 return id_to_field;
7899}
79100
80- Result<std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>>
81- Schema::InitNameToIdMap ( const Schema& self) {
82- std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>> name_to_id;
83- NameToIdVisitor visitor (name_to_id, /* case_sensitive=*/ true );
101+ Result<Schema::NameIdMap> Schema::InitNameIdMap ( const Schema& self) {
102+ NameIdMap name_id_map;
103+ NameToIdVisitor visitor (name_id_map. name_to_id , &name_id_map. id_to_name ,
104+ /* case_sensitive=*/ true );
84105 ICEBERG_RETURN_UNEXPECTED (
85106 VisitTypeInline (self, &visitor, /* path=*/ " " , /* short_path=*/ " " ));
86107 visitor.Finish ();
87- return name_to_id ;
108+ return name_id_map ;
88109}
89110
90111Result<std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>>
91112Schema::InitLowerCaseNameToIdMap (const Schema& self) {
92113 std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>
93114 lowercase_name_to_id;
94- NameToIdVisitor visitor (lowercase_name_to_id, /* case_sensitive=*/ false );
115+ NameToIdVisitor visitor (lowercase_name_to_id, /* id_to_name=*/ nullptr ,
116+ /* case_sensitive=*/ false );
95117 ICEBERG_RETURN_UNEXPECTED (
96118 VisitTypeInline (self, &visitor, /* path=*/ " " , /* short_path=*/ " " ));
97119 visitor.Finish ();
@@ -108,6 +130,16 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
108130 return it->second ;
109131}
110132
133+ Result<std::optional<std::string_view>> Schema::FindColumnNameById (
134+ int32_t field_id) const {
135+ ICEBERG_ASSIGN_OR_RAISE (auto name_id_map, name_id_map_.Get (*this ));
136+ auto it = name_id_map.get ().id_to_name .find (field_id);
137+ if (it == name_id_map.get ().id_to_name .end ()) {
138+ return std::nullopt ;
139+ }
140+ return it->second ;
141+ }
142+
111143Result<std::unordered_map<int32_t , std::vector<size_t >>> Schema::InitIdToPositionPath (
112144 const Schema& self) {
113145 PositionPathVisitor visitor;
@@ -179,4 +211,21 @@ Result<std::unique_ptr<Schema>> Schema::Project(
179211 std::nullopt );
180212}
181213
214+ const std::vector<int32_t >& Schema::IdentifierFieldIds () const {
215+ return identifier_field_ids_;
216+ }
217+
218+ Result<std::vector<std::string>> Schema::IdentifierFieldNames () const {
219+ std::vector<std::string> names;
220+ names.reserve (identifier_field_ids_.size ());
221+ for (auto id : identifier_field_ids_) {
222+ ICEBERG_ASSIGN_OR_RAISE (auto name, FindColumnNameById (id));
223+ if (!name.has_value ()) {
224+ return InvalidSchema (" Cannot find the field of the specified field id: {}" , id);
225+ }
226+ names.emplace_back (name.value ());
227+ }
228+ return names;
229+ }
230+
182231} // namespace iceberg
0 commit comments