apache · zeroshade · Jul 3, 2026 · Jun 24, 2026 · Jun 26, 2026 · Jun 29, 2026
@@ -278,6 +278,10 @@ func (s *Schema) UnmarshalJSON(b []byte) error {
 		return err
 	}
 
+	if err := checkDuplicateFieldIDs(nil, aux.Fields); err != nil {
+		return err
+	}
+
 	s.init()
 
 	s.fields = aux.Fields

@@ -442,6 +442,306 @@ func TestUnmarshalSchema(t *testing.T) {
 	assert.True(t, tableSchemaSimple.Equals(&schema))
 }
 
+func TestUnmarshalSchemaRejectsDuplicateFieldIDs(t *testing.T) {
+	tests := []struct {
+		name     string
+		schema   string
+		contains string
+	}{
+		{
+			name: "top level",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{"id": 1, "name": "foo", "type": "string", "required": false},
+					{"id": 1, "name": "bar", "type": "int", "required": true}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "multiple fields for id 1: foo and bar",
+		},
+		{
+			name: "nested struct",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{"id": 1, "name": "id", "type": "long", "required": true},
+					{
+						"id": 2,
+						"name": "payload",
+						"type": {
+							"type": "struct",
+							"fields": [
+								{"id": 3, "name": "inner", "type": "string", "required": false},
+								{"id": 3, "name": "inner_dup", "type": "int", "required": false}
+							]
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "multiple fields for id 3: inner and inner_dup",
+		},
+		{
+			name: "list element",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{"id": 1, "name": "id", "type": "long", "required": true},
+					{
+						"id": 2,
+						"name": "items",
+						"type": {
+							"type": "list",
+							"element-id": 1,
+							"element": "int",
+							"element-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "multiple fields for id 1: id and element",
+		},
+		{
+			name: "map key",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{"id": 1, "name": "id", "type": "long", "required": true},
+					{
+						"id": 2,
+						"name": "props",
+						"type": {
+							"type": "map",
+							"key-id": 1,
+							"key": "string",
+							"value-id": 3,
+							"value": "int",
+							"value-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "multiple fields for id 1: id and key",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var schema iceberg.Schema
+			err := json.Unmarshal([]byte(tt.schema), &schema)
+			require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+			assert.ErrorContains(t, err, tt.contains)
+		})
+	}
+}
+
+func TestUnmarshalSchemaRejectsMissingCollectionFieldIDs(t *testing.T) {
+	tests := []struct {
+		name     string
+		schema   string
+		contains string
+	}{
+		{
+			name: "missing list element id",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{
+						"id": 1,
+						"name": "items",
+						"type": {
+							"type": "list",
+							"element": "int",
+							"element-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "field is missing required 'element-id' key in JSON",
+		},
+		{
+			name: "missing map key id",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{
+						"id": 1,
+						"name": "props",
+						"type": {
+							"type": "map",
+							"key": "string",
+							"value-id": 2,
+							"value": "int",
+							"value-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "field is missing required 'key-id' key in JSON",
+		},
+		{
+			name: "missing map value id",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{
+						"id": 1,
+						"name": "props",
+						"type": {
+							"type": "map",
+							"key-id": 2,
+							"key": "string",
+							"value": "int",
+							"value-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			contains: "field is missing required 'value-id' key in JSON",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var schema iceberg.Schema
+			err := json.Unmarshal([]byte(tt.schema), &schema)
+			require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+			assert.ErrorContains(t, err, tt.contains)
+		})
+	}
+}
+
+func TestUnmarshalSchemaAllowsZeroCollectionFieldIDs(t *testing.T) {
+	tests := []struct {
+		name   string
+		schema string
+		check  func(*testing.T, iceberg.NestedField)
+	}{
+		{
+			name: "zero list element id",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{
+						"id": 1,
+						"name": "items",
+						"type": {
+							"type": "list",
+							"element-id": 0,
+							"element": "int",
+							"element-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			check: func(t *testing.T, field iceberg.NestedField) {
+				t.Helper()
+				listType, ok := field.Type.(*iceberg.ListType)
+				require.True(t, ok)
+				assert.Equal(t, 0, listType.ElementID)
+			},
+		},
+		{
+			name: "zero map key id",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{
+						"id": 1,
+						"name": "props",
+						"type": {
+							"type": "map",
+							"key-id": 0,
+							"key": "string",
+							"value-id": 2,
+							"value": "int",
+							"value-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			check: func(t *testing.T, field iceberg.NestedField) {
+				t.Helper()
+				mapType, ok := field.Type.(*iceberg.MapType)
+				require.True(t, ok)
+				assert.Equal(t, 0, mapType.KeyID)
+			},
+		},
+		{
+			name: "zero map value id",
+			schema: `{
+				"type": "struct",
+				"fields": [
+					{
+						"id": 1,
+						"name": "props",
+						"type": {
+							"type": "map",
+							"key-id": 2,
+							"key": "string",
+							"value-id": 0,
+							"value": "int",
+							"value-required": false
+						},
+						"required": false
+					}
+				],
+				"schema-id": 1,
+				"identifier-field-ids": []
+			}`,
+			check: func(t *testing.T, field iceberg.NestedField) {
+				t.Helper()
+				mapType, ok := field.Type.(*iceberg.MapType)
+				require.True(t, ok)
+				assert.Equal(t, 0, mapType.ValueID)
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var schema iceberg.Schema
+			require.NoError(t, json.Unmarshal([]byte(tt.schema), &schema))
+			require.Equal(t, 1, schema.NumFields())
+			tt.check(t, schema.Field(0))
+		})
+	}
+}
+
+func TestUnmarshalSchemaRoundTripsCollectionFieldIDs(t *testing.T) {
+	data, err := json.Marshal(tableSchemaNested)
+	require.NoError(t, err)
+
+	var schema iceberg.Schema
+	require.NoError(t, json.Unmarshal(data, &schema))
+	assert.True(t, tableSchemaNested.Equals(&schema))
+}
+
 func TestPruneColumnsString(t *testing.T) {
 	sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, false)
 	require.NoError(t, err)

@@ -444,15 +444,19 @@ func (l *ListType) String() string { return fmt.Sprintf("list<%s>", l.Element) }
 
 func (l *ListType) UnmarshalJSON(b []byte) error {
 	aux := struct {
-		ID   int       `json:"element-id"`
+		ID   *int      `json:"element-id"`
 		Elem typeIFace `json:"element"`
 		Req  bool      `json:"element-required"`
 	}{}
 	if err := json.Unmarshal(b, &aux); err != nil {
 		return err
 	}
 
-	l.ElementID = aux.ID
+	if aux.ID == nil {
+		return fmt.Errorf("%w: field is missing required 'element-id' key in JSON", ErrInvalidSchema)
+	}
+
+	l.ElementID = *aux.ID
 	l.Element = aux.Elem.Type
 	l.ElementRequired = aux.Req
 
@@ -524,18 +528,26 @@ func (m *MapType) String() string {
 
 func (m *MapType) UnmarshalJSON(b []byte) error {
 	aux := struct {
-		KeyID    int       `json:"key-id"`
+		KeyID    *int      `json:"key-id"`
 		Key      typeIFace `json:"key"`
-		ValueID  int       `json:"value-id"`
+		ValueID  *int      `json:"value-id"`
 		Value    typeIFace `json:"value"`
 		ValueReq *bool     `json:"value-required"`
 	}{}
 	if err := json.Unmarshal(b, &aux); err != nil {
 		return err
 	}
 
-	m.KeyID, m.KeyType = aux.KeyID, aux.Key.Type
-	m.ValueID, m.ValueType = aux.ValueID, aux.Value.Type
+	if aux.KeyID == nil {
+		return fmt.Errorf("%w: field is missing required 'key-id' key in JSON", ErrInvalidSchema)
+	}
+
+	if aux.ValueID == nil {
+		return fmt.Errorf("%w: field is missing required 'value-id' key in JSON", ErrInvalidSchema)
+	}
+
+	m.KeyID, m.KeyType = *aux.KeyID, aux.Key.Type
+	m.ValueID, m.ValueType = *aux.ValueID, aux.Value.Type
 	if aux.ValueReq == nil {
 		m.ValueRequired = true
 	} else {