From 30d2edd8b2b9e6eb7eb8665364dc2075ee81105d Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Wed, 24 Jun 2026 23:42:22 +0200 Subject: [PATCH 1/3] validate duplicate schema field ids on unmarshal --- schema.go | 4 ++ schema_test.go | 103 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/schema.go b/schema.go index 542031cef..0669e1754 100644 --- a/schema.go +++ b/schema.go @@ -278,6 +278,10 @@ func (s *Schema) UnmarshalJSON(b []byte) error { return err } + if err := checkDuplicateFieldIDs(nil, aux.Fields); err != nil { + return err + } + s.init() s.fields = aux.Fields diff --git a/schema_test.go b/schema_test.go index 846ea9b8e..5a4585651 100644 --- a/schema_test.go +++ b/schema_test.go @@ -442,6 +442,109 @@ func TestUnmarshalSchema(t *testing.T) { assert.True(t, tableSchemaSimple.Equals(&schema)) } +func TestUnmarshalSchemaRejectsDuplicateFieldIDs(t *testing.T) { + tests := []struct { + name string + schema string + contains string + }{ + { + name: "top level", + schema: `{ + "type": "struct", + "fields": [ + {"id": 1, "name": "foo", "type": "string", "required": false}, + {"id": 1, "name": "bar", "type": "int", "required": true} + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "multiple fields for id 1: foo and bar", + }, + { + name: "nested struct", + schema: `{ + "type": "struct", + "fields": [ + {"id": 1, "name": "id", "type": "long", "required": true}, + { + "id": 2, + "name": "payload", + "type": { + "type": "struct", + "fields": [ + {"id": 3, "name": "inner", "type": "string", "required": false}, + {"id": 3, "name": "inner_dup", "type": "int", "required": false} + ] + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "multiple fields for id 3: inner and inner_dup", + }, + { + name: "list element", + schema: `{ + "type": "struct", + "fields": [ + {"id": 1, "name": "id", "type": "long", "required": true}, + { + "id": 2, + "name": "items", + "type": { + "type": "list", + "element-id": 1, + "element": "int", + "element-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "multiple fields for id 1: id and element", + }, + { + name: "map key", + schema: `{ + "type": "struct", + "fields": [ + {"id": 1, "name": "id", "type": "long", "required": true}, + { + "id": 2, + "name": "props", + "type": { + "type": "map", + "key-id": 1, + "key": "string", + "value-id": 3, + "value": "int", + "value-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "multiple fields for id 1: id and key", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var schema iceberg.Schema + err := json.Unmarshal([]byte(tt.schema), &schema) + require.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.ErrorContains(t, err, tt.contains) + }) + } +} + func TestPruneColumnsString(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, false) require.NoError(t, err) From bbc551613777bb19f4cd2f657eebf1fe0ad09018 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Fri, 26 Jun 2026 22:23:50 +0200 Subject: [PATCH 2/3] fix: require list and map field ids on schema unmarshal --- schema_test.go | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ types.go | 32 ++++++++++++--- 2 files changed, 134 insertions(+), 6 deletions(-) diff --git a/schema_test.go b/schema_test.go index 5a4585651..eaa569575 100644 --- a/schema_test.go +++ b/schema_test.go @@ -545,6 +545,114 @@ func TestUnmarshalSchemaRejectsDuplicateFieldIDs(t *testing.T) { } } +func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { + tests := []struct { + name string + schema string + contains string + }{ + { + name: "missing list element id", + schema: `{ + "type": "struct", + "fields": [ + { + "id": 1, + "name": "items", + "type": { + "type": "list", + "element": "int", + "element-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "field is missing required 'element-id' key in JSON", + }, + { + name: "zero list element id", + schema: `{ + "type": "struct", + "fields": [ + { + "id": 1, + "name": "items", + "type": { + "type": "list", + "element-id": 0, + "element": "int", + "element-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "field 'element-id' must not be 0", + }, + { + name: "missing map key id", + schema: `{ + "type": "struct", + "fields": [ + { + "id": 1, + "name": "props", + "type": { + "type": "map", + "key": "string", + "value-id": 2, + "value": "int", + "value-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "field is missing required 'key-id' key in JSON", + }, + { + name: "zero map value id", + schema: `{ + "type": "struct", + "fields": [ + { + "id": 1, + "name": "props", + "type": { + "type": "map", + "key-id": 2, + "key": "string", + "value-id": 0, + "value": "int", + "value-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "field 'value-id' must not be 0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var schema iceberg.Schema + err := json.Unmarshal([]byte(tt.schema), &schema) + require.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.ErrorContains(t, err, tt.contains) + }) + } +} + func TestPruneColumnsString(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, false) require.NoError(t, err) diff --git a/types.go b/types.go index 282c7d2bc..6d51f3cf3 100644 --- a/types.go +++ b/types.go @@ -444,7 +444,7 @@ func (l *ListType) String() string { return fmt.Sprintf("list<%s>", l.Element) } func (l *ListType) UnmarshalJSON(b []byte) error { aux := struct { - ID int `json:"element-id"` + ID *int `json:"element-id"` Elem typeIFace `json:"element"` Req bool `json:"element-required"` }{} @@ -452,7 +452,14 @@ func (l *ListType) UnmarshalJSON(b []byte) error { return err } - l.ElementID = aux.ID + if aux.ID == nil { + return fmt.Errorf("%w: field is missing required 'element-id' key in JSON", ErrInvalidSchema) + } + if *aux.ID == 0 { + return fmt.Errorf("%w: field 'element-id' must not be 0", ErrInvalidSchema) + } + + l.ElementID = *aux.ID l.Element = aux.Elem.Type l.ElementRequired = aux.Req @@ -524,9 +531,9 @@ func (m *MapType) String() string { func (m *MapType) UnmarshalJSON(b []byte) error { aux := struct { - KeyID int `json:"key-id"` + KeyID *int `json:"key-id"` Key typeIFace `json:"key"` - ValueID int `json:"value-id"` + ValueID *int `json:"value-id"` Value typeIFace `json:"value"` ValueReq *bool `json:"value-required"` }{} @@ -534,8 +541,21 @@ func (m *MapType) UnmarshalJSON(b []byte) error { return err } - m.KeyID, m.KeyType = aux.KeyID, aux.Key.Type - m.ValueID, m.ValueType = aux.ValueID, aux.Value.Type + if aux.KeyID == nil { + return fmt.Errorf("%w: field is missing required 'key-id' key in JSON", ErrInvalidSchema) + } + if *aux.KeyID == 0 { + return fmt.Errorf("%w: field 'key-id' must not be 0", ErrInvalidSchema) + } + if aux.ValueID == nil { + return fmt.Errorf("%w: field is missing required 'value-id' key in JSON", ErrInvalidSchema) + } + if *aux.ValueID == 0 { + return fmt.Errorf("%w: field 'value-id' must not be 0", ErrInvalidSchema) + } + + m.KeyID, m.KeyType = *aux.KeyID, aux.Key.Type + m.ValueID, m.ValueType = *aux.ValueID, aux.Value.Type if aux.ValueReq == nil { m.ValueRequired = true } else { From a7970cee4edd5c3bedea000ccf1fcf8a5bd8a8ce Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Mon, 29 Jun 2026 21:00:54 +0200 Subject: [PATCH 3/3] fix: allow zero collection field ids on unmarshal --- schema_test.go | 105 +++++++++++++++++++++++++++++++++++++++++++++---- types.go | 10 +---- 2 files changed, 98 insertions(+), 17 deletions(-) diff --git a/schema_test.go b/schema_test.go index eaa569575..a4dd4a29f 100644 --- a/schema_test.go +++ b/schema_test.go @@ -545,7 +545,7 @@ func TestUnmarshalSchemaRejectsDuplicateFieldIDs(t *testing.T) { } } -func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { +func TestUnmarshalSchemaRejectsMissingCollectionFieldIDs(t *testing.T) { tests := []struct { name string schema string @@ -572,6 +572,70 @@ func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { }`, contains: "field is missing required 'element-id' key in JSON", }, + { + name: "missing map key id", + schema: `{ + "type": "struct", + "fields": [ + { + "id": 1, + "name": "props", + "type": { + "type": "map", + "key": "string", + "value-id": 2, + "value": "int", + "value-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "field is missing required 'key-id' key in JSON", + }, + { + name: "missing map value id", + schema: `{ + "type": "struct", + "fields": [ + { + "id": 1, + "name": "props", + "type": { + "type": "map", + "key-id": 2, + "key": "string", + "value": "int", + "value-required": false + }, + "required": false + } + ], + "schema-id": 1, + "identifier-field-ids": [] + }`, + contains: "field is missing required 'value-id' key in JSON", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var schema iceberg.Schema + err := json.Unmarshal([]byte(tt.schema), &schema) + require.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.ErrorContains(t, err, tt.contains) + }) + } +} + +func TestUnmarshalSchemaAllowsZeroCollectionFieldIDs(t *testing.T) { + tests := []struct { + name string + schema string + check func(*testing.T, iceberg.NestedField) + }{ { name: "zero list element id", schema: `{ @@ -592,10 +656,15 @@ func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { "schema-id": 1, "identifier-field-ids": [] }`, - contains: "field 'element-id' must not be 0", + check: func(t *testing.T, field iceberg.NestedField) { + t.Helper() + listType, ok := field.Type.(*iceberg.ListType) + require.True(t, ok) + assert.Equal(t, 0, listType.ElementID) + }, }, { - name: "missing map key id", + name: "zero map key id", schema: `{ "type": "struct", "fields": [ @@ -604,6 +673,7 @@ func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { "name": "props", "type": { "type": "map", + "key-id": 0, "key": "string", "value-id": 2, "value": "int", @@ -615,7 +685,12 @@ func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { "schema-id": 1, "identifier-field-ids": [] }`, - contains: "field is missing required 'key-id' key in JSON", + check: func(t *testing.T, field iceberg.NestedField) { + t.Helper() + mapType, ok := field.Type.(*iceberg.MapType) + require.True(t, ok) + assert.Equal(t, 0, mapType.KeyID) + }, }, { name: "zero map value id", @@ -639,20 +714,34 @@ func TestUnmarshalSchemaRejectsMissingOrZeroCollectionFieldIDs(t *testing.T) { "schema-id": 1, "identifier-field-ids": [] }`, - contains: "field 'value-id' must not be 0", + check: func(t *testing.T, field iceberg.NestedField) { + t.Helper() + mapType, ok := field.Type.(*iceberg.MapType) + require.True(t, ok) + assert.Equal(t, 0, mapType.ValueID) + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var schema iceberg.Schema - err := json.Unmarshal([]byte(tt.schema), &schema) - require.ErrorIs(t, err, iceberg.ErrInvalidSchema) - assert.ErrorContains(t, err, tt.contains) + require.NoError(t, json.Unmarshal([]byte(tt.schema), &schema)) + require.Equal(t, 1, schema.NumFields()) + tt.check(t, schema.Field(0)) }) } } +func TestUnmarshalSchemaRoundTripsCollectionFieldIDs(t *testing.T) { + data, err := json.Marshal(tableSchemaNested) + require.NoError(t, err) + + var schema iceberg.Schema + require.NoError(t, json.Unmarshal(data, &schema)) + assert.True(t, tableSchemaNested.Equals(&schema)) +} + func TestPruneColumnsString(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, false) require.NoError(t, err) diff --git a/types.go b/types.go index 6d51f3cf3..351f0808c 100644 --- a/types.go +++ b/types.go @@ -455,9 +455,6 @@ func (l *ListType) UnmarshalJSON(b []byte) error { if aux.ID == nil { return fmt.Errorf("%w: field is missing required 'element-id' key in JSON", ErrInvalidSchema) } - if *aux.ID == 0 { - return fmt.Errorf("%w: field 'element-id' must not be 0", ErrInvalidSchema) - } l.ElementID = *aux.ID l.Element = aux.Elem.Type @@ -544,15 +541,10 @@ func (m *MapType) UnmarshalJSON(b []byte) error { if aux.KeyID == nil { return fmt.Errorf("%w: field is missing required 'key-id' key in JSON", ErrInvalidSchema) } - if *aux.KeyID == 0 { - return fmt.Errorf("%w: field 'key-id' must not be 0", ErrInvalidSchema) - } + if aux.ValueID == nil { return fmt.Errorf("%w: field is missing required 'value-id' key in JSON", ErrInvalidSchema) } - if *aux.ValueID == 0 { - return fmt.Errorf("%w: field 'value-id' must not be 0", ErrInvalidSchema) - } m.KeyID, m.KeyType = *aux.KeyID, aux.Key.Type m.ValueID, m.ValueType = *aux.ValueID, aux.Value.Type