3535
3636namespace iceberg {
3737
38+ struct SchemaReassignIdContext {
39+ Schema::IdMap ids_to_reassigned;
40+ Schema::IdMap ids_to_original;
41+ };
42+
3843namespace {
3944
45+ const Schema::IdMap& EmptyIdMap () {
46+ static const Schema::IdMap kEmpty ;
47+ return kEmpty ;
48+ }
49+
50+ void RecordIdReassignment (int32_t old_id, int32_t new_id,
51+ Schema::IdMap& ids_to_reassigned,
52+ Schema::IdMap& ids_to_original) {
53+ if (new_id != old_id) {
54+ ids_to_reassigned[old_id] = new_id;
55+ ids_to_original[new_id] = old_id;
56+ }
57+ }
58+
59+ SchemaField ReassignField (const SchemaField& field, int32_t new_id,
60+ const Schema::GetId& get_id, Schema::IdMap& ids_to_reassigned,
61+ Schema::IdMap& ids_to_original);
62+
63+ std::shared_ptr<Type> ReassignTypeIds (const std::shared_ptr<Type>& type,
64+ const Schema::GetId& get_id,
65+ Schema::IdMap& ids_to_reassigned,
66+ Schema::IdMap& ids_to_original) {
67+ switch (type->type_id ()) {
68+ case TypeId::kStruct : {
69+ const auto & struct_type = static_cast <const StructType&>(*type);
70+ const auto & fields = struct_type.fields ();
71+ std::vector<int32_t > new_ids;
72+ new_ids.reserve (fields.size ());
73+ for (const auto & field : fields) {
74+ const auto new_id = get_id (field.field_id ());
75+ RecordIdReassignment (field.field_id (), new_id, ids_to_reassigned,
76+ ids_to_original);
77+ new_ids.push_back (new_id);
78+ }
79+
80+ std::vector<SchemaField> reassigned_fields;
81+ reassigned_fields.reserve (fields.size ());
82+ for (size_t i = 0 ; i < fields.size (); ++i) {
83+ reassigned_fields.emplace_back (ReassignField (fields[i], new_ids[i], get_id,
84+ ids_to_reassigned, ids_to_original));
85+ }
86+ return std::make_shared<StructType>(std::move (reassigned_fields));
87+ }
88+ case TypeId::kList : {
89+ const auto & list_type = static_cast <const ListType&>(*type);
90+ const auto & element = list_type.element ();
91+ const auto new_id = get_id (element.field_id ());
92+ RecordIdReassignment (element.field_id (), new_id, ids_to_reassigned,
93+ ids_to_original);
94+ return std::make_shared<ListType>(
95+ ReassignField (element, new_id, get_id, ids_to_reassigned, ids_to_original));
96+ }
97+ case TypeId::kMap : {
98+ const auto & map_type = static_cast <const MapType&>(*type);
99+ const auto & key = map_type.key ();
100+ const auto & value = map_type.value ();
101+ const auto new_key_id = get_id (key.field_id ());
102+ const auto new_value_id = get_id (value.field_id ());
103+ RecordIdReassignment (key.field_id (), new_key_id, ids_to_reassigned,
104+ ids_to_original);
105+ RecordIdReassignment (value.field_id (), new_value_id, ids_to_reassigned,
106+ ids_to_original);
107+ return std::make_shared<MapType>(
108+ ReassignField (key, new_key_id, get_id, ids_to_reassigned, ids_to_original),
109+ ReassignField (value, new_value_id, get_id, ids_to_reassigned, ids_to_original));
110+ }
111+ default :
112+ return type;
113+ }
114+ }
115+
116+ SchemaField ReassignField (const SchemaField& field, int32_t new_id,
117+ const Schema::GetId& get_id, Schema::IdMap& ids_to_reassigned,
118+ Schema::IdMap& ids_to_original) {
119+ return {new_id, std::string (field.name ()),
120+ ReassignTypeIds (field.type (), get_id, ids_to_reassigned, ids_to_original),
121+ field.optional (), std::string (field.doc ())};
122+ }
123+
124+ std::vector<SchemaField> ReassignIds (std::vector<SchemaField> fields,
125+ const Schema::GetId& get_id,
126+ SchemaReassignIdContext& reassign_id_context) {
127+ auto reassigned_type = ReassignTypeIds (std::make_shared<StructType>(std::move (fields)),
128+ get_id, reassign_id_context.ids_to_reassigned ,
129+ reassign_id_context.ids_to_original );
130+ const auto & reassigned_fields =
131+ internal::checked_cast<const StructType&>(*reassigned_type).fields ();
132+ return {reassigned_fields.begin (), reassigned_fields.end ()};
133+ }
134+
40135Status ValidateFieldNullability (const Type& type) {
41136 auto validate_field = [&](const SchemaField& field) -> Status {
42137 ICEBERG_PRECHECK (field.optional () || field.type ()->type_id () != TypeId::kUnknown ,
@@ -73,17 +168,23 @@ Status ValidateFieldNullability(const Type& type) {
73168
74169} // namespace
75170
76- Schema::Schema (std::vector<SchemaField> fields, int32_t schema_id)
171+ Schema::Schema (std::vector<SchemaField> fields, int32_t schema_id, GetId get_id )
77172 : StructType(std::move(fields)),
78173 schema_id_ (schema_id),
79- cache_(std::make_unique<SchemaCache>(this )) {}
174+ cache_(std::make_unique<SchemaCache>(this )) {
175+ if (get_id) {
176+ reassign_id_context_ = std::make_unique<SchemaReassignIdContext>();
177+ fields_ = ReassignIds (std::move (fields_), get_id, *reassign_id_context_);
178+ }
179+ }
80180
81181Schema::~Schema () = default ;
82182
83183Result<std::unique_ptr<Schema>> Schema::Make (std::vector<SchemaField> fields,
84184 int32_t schema_id,
85- std::vector<int32_t > identifier_field_ids) {
86- auto schema = std::make_unique<Schema>(std::move (fields), schema_id);
185+ std::vector<int32_t > identifier_field_ids,
186+ GetId get_id) {
187+ auto schema = std::make_unique<Schema>(std::move (fields), schema_id, std::move (get_id));
87188
88189 if (!identifier_field_ids.empty ()) {
89190 auto id_to_parent = IndexParents (*schema);
@@ -99,8 +200,8 @@ Result<std::unique_ptr<Schema>> Schema::Make(std::vector<SchemaField> fields,
99200
100201Result<std::unique_ptr<Schema>> Schema::Make (
101202 std::vector<SchemaField> fields, int32_t schema_id,
102- const std::vector<std::string>& identifier_field_names) {
103- auto schema = std::make_unique<Schema>(std::move (fields), schema_id);
203+ const std::vector<std::string>& identifier_field_names, GetId get_id ) {
204+ auto schema = std::make_unique<Schema>(std::move (fields), schema_id, std::move (get_id) );
104205
105206 std::vector<int32_t > fresh_identifier_ids;
106207 for (const auto & name : identifier_field_names) {
@@ -181,6 +282,14 @@ const std::shared_ptr<Schema>& Schema::EmptySchema() {
181282
182283int32_t Schema::schema_id () const { return schema_id_; }
183284
285+ const Schema::IdMap& Schema::IdsToReassigned () const {
286+ return reassign_id_context_ ? reassign_id_context_->ids_to_reassigned : EmptyIdMap ();
287+ }
288+
289+ const Schema::IdMap& Schema::IdsToOriginal () const {
290+ return reassign_id_context_ ? reassign_id_context_->ids_to_original : EmptyIdMap ();
291+ }
292+
184293std::string Schema::ToString () const {
185294 std::string repr = " schema<" ;
186295 for (const auto & field : fields_) {
0 commit comments