diff --git a/docling-core/src/main/java/ai/docling/core/DoclingDocument.java b/docling-core/src/main/java/ai/docling/core/DoclingDocument.java index 157690dd..450fe963 100644 --- a/docling-core/src/main/java/ai/docling/core/DoclingDocument.java +++ b/docling-core/src/main/java/ai/docling/core/DoclingDocument.java @@ -64,17 +64,30 @@ public class DoclingDocument { private List pictures; @JsonProperty("tables") + @JsonSetter(nulls = Nulls.AS_EMPTY) @lombok.Singular private List tables; @JsonProperty("key_value_items") + @JsonSetter(nulls = Nulls.AS_EMPTY) @lombok.Singular private List keyValueItems; @JsonProperty("form_items") + @JsonSetter(nulls = Nulls.AS_EMPTY) @lombok.Singular private List formItems; + @JsonProperty("field_regions") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List fieldRegions; + + @JsonProperty("field_items") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List fieldItems; + @JsonProperty("pages") @lombok.Singular private Map pages; @@ -179,6 +192,223 @@ public static class RefItem { public static class Builder { } } + /** + * Fine-granular reference item that captures an optional character span range alongside a JSON + * pointer reference. The {@code range} field serializes as a 2-element JSON integer array + * {@code [start, end]} to match the Python {@code tuple[int, int]} wire format. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = FineRef.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class FineRef { + @JsonProperty("$ref") + private String ref; + + /** + * Optional character span as a 2-element JSON array {@code [start_inclusive, end_exclusive]}, + * matching the Python {@code Optional[tuple[int, int]]} wire format. + */ + @JsonProperty("range") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("range") + private List range; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Source metadata for a cue extracted from a media track (audio, video, subtitles, etc.). + * Serialized as a flat JSON object with a {@code "kind": "track"} discriminator field. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = TrackSource.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static final class TrackSource implements SourceType { + @JsonProperty("kind") + @lombok.Builder.Default + private String kind = "track"; + + @JsonProperty("start_time") + private Double startTime; + + @JsonProperty("end_time") + private Double endTime; + + @JsonProperty("identifier") + @Nullable + private String identifier; + + @JsonProperty("voice") + @Nullable + private String voice; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { + /** + * Guards the {@code kind} discriminator: it is fixed to {@code "track"} and may not be set to + * any other value. The constant default declared on {@link TrackSource} is always used, so an + * unsupported discriminator can never be built or serialized. + */ + @JsonProperty("kind") + public Builder kind(String kind) { + if (kind != null && !"track".equals(kind)) { + throw new IllegalArgumentException("TrackSource.kind must be \"track\" but was: " + kind); + } + return this; + } + } + } + + /** + * Detected human language of a document node, expressed as a BCP 47 code (e.g. {@code "en"}). + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = LanguageMetaField.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class LanguageMetaField { + @JsonProperty("confidence") + @Nullable + private Double confidence; + + @JsonProperty("created_by") + @Nullable + private String createdBy; + + @JsonProperty("code") + private String code; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * A named entity mention extracted from text, carrying the entity text, an optional type label, + * and an optional character span as a 2-element JSON array {@code [start, end]}. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = EntityMention.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class EntityMention { + @JsonProperty("confidence") + @Nullable + private Double confidence; + + @JsonProperty("created_by") + @Nullable + private String createdBy; + + @JsonProperty("text") + private String text; + + @JsonProperty("orig") + @Nullable + private String orig; + + @JsonProperty("label") + @Nullable + private String label; + + /** + * Character span as a 2-element JSON array {@code [start, end]} (0-indexed, + * end-exclusive), matching the Python {@code CharSpan = tuple[int, int]} wire format. + */ + @JsonProperty("charspan") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("charspan") + private List charspan; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Container for named entity mentions associated with a document node. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = EntitiesMetaField.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class EntitiesMetaField { + @JsonProperty("mentions") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List mentions; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Container for salient keyword or keyphrase metadata associated with a document node. + * Values are order-preserving and deduplicated. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = KeywordsMetaField.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class KeywordsMetaField { + @JsonProperty("values") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("value") + private List values; + + /** + * Returns the keyword values order-preserving and deduplicated, upholding the same uniqueness + * invariant as the Python {@code UniqueList} source model. + */ + public List getValues() { + return values == null ? List.of() : List.copyOf(new java.util.LinkedHashSet<>(values)); + } + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Container for higher-level subject category or thematic label metadata associated with a + * document node. Values are order-preserving and deduplicated. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = TopicsMetaField.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class TopicsMetaField { + @JsonProperty("values") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("value") + private List values; + + /** + * Returns the topic values order-preserving and deduplicated, upholding the same uniqueness + * invariant as the Python {@code UniqueList} source model. + */ + public List getValues() { + return values == null ? List.of() : List.copyOf(new java.util.LinkedHashSet<>(values)); + } + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + @JsonInclude(JsonInclude.Include.NON_EMPTY) @tools.jackson.databind.annotation.JsonDeserialize(builder = BaseMeta.Builder.class) @lombok.extern.jackson.Jacksonized @@ -190,6 +420,22 @@ public static class BaseMeta { @Nullable private SummaryMetaField summary; + @JsonProperty("language") + @Nullable + private LanguageMetaField language; + + @JsonProperty("entities") + @Nullable + private EntitiesMetaField entities; + + @JsonProperty("keywords") + @Nullable + private KeywordsMetaField keywords; + + @JsonProperty("topics") + @Nullable + private TopicsMetaField topics; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -204,11 +450,11 @@ public static class SummaryMetaField { @JsonProperty("confidence") @Nullable private Double confidence; - + @JsonProperty("created_by") @Nullable private String createdBy; - + @JsonProperty("text") private String text; @@ -216,6 +462,17 @@ public static class SummaryMetaField { public static class Builder { } } + @JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.EXISTING_PROPERTY, + property = "kind", + visible = true + ) + @JsonSubTypes({ + @Type(value = TrackSource.class, name = "track") + }) + public sealed interface SourceType permits TrackSource { } + public enum DocItemLabel { @JsonProperty("caption") CAPTION, @JsonProperty("chart") CHART, @@ -239,7 +496,21 @@ public enum DocItemLabel { @JsonProperty("section_header") SECTION_HEADER, @JsonProperty("table") TABLE, @JsonProperty("text") TEXT, - @JsonProperty("title") TITLE + @JsonProperty("title") TITLE, + @JsonProperty("field_region") FIELD_REGION, + @JsonProperty("field_heading") FIELD_HEADING, + @JsonProperty("field_item") FIELD_ITEM, + @JsonProperty("field_key") FIELD_KEY, + @JsonProperty("field_value") FIELD_VALUE, + @JsonProperty("field_hint") FIELD_HINT, + @JsonProperty("marker") MARKER + } + + public enum Orientation { + @JsonProperty("rot_0") ROT_0, + @JsonProperty("rot_90") ROT_90, + @JsonProperty("rot_180") ROT_180, + @JsonProperty("rot_270") ROT_270 } @JsonInclude(JsonInclude.Include.NON_EMPTY) @@ -255,6 +526,8 @@ public enum DocItemLabel { @Type(value = ListItem.class, name = "list_item"), @Type(value = CodeItem.class, name = "code"), @Type(value = FormulaItem.class, name = "formula"), + @Type(value = FieldHeadingItem.class, name = "field_heading"), + @Type(value = FieldValueItem.class, name = "field_value"), @Type(value = TextItem.class, name = "text"), @Type(value = TextItem.class, name = "paragraph"), @Type(value = TextItem.class, name = "caption"), @@ -264,9 +537,14 @@ public enum DocItemLabel { @Type(value = TextItem.class, name = "reference"), @Type(value = TextItem.class, name = "checkbox_selected"), @Type(value = TextItem.class, name = "checkbox_unselected"), - @Type(value = TextItem.class, name = "empty_value") + @Type(value = TextItem.class, name = "empty_value"), + @Type(value = TextItem.class, name = "field_key"), + @Type(value = TextItem.class, name = "field_hint"), + @Type(value = TextItem.class, name = "marker"), + @Type(value = TextItem.class, name = "handwritten_text") }) - public sealed interface BaseTextItem permits TitleItem, SectionHeaderItem, ListItem, CodeItem, FormulaItem, TextItem { + public sealed interface BaseTextItem + permits TitleItem, SectionHeaderItem, FieldHeadingItem, FieldValueItem, ListItem, CodeItem, FormulaItem, TextItem { String getSelfRef(); @@ -290,6 +568,10 @@ public sealed interface BaseTextItem permits TitleItem, SectionHeaderItem, ListI @Nullable String getHyperlink(); + + List getSource(); + + List getComments(); } public enum Script { @@ -372,6 +654,16 @@ public static final class TitleItem implements BaseTextItem { @Nullable private String hyperlink; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -424,6 +716,81 @@ public static final class SectionHeaderItem implements BaseTextItem { @Nullable private String hyperlink; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @JsonProperty("level") + private Integer level; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = FieldHeadingItem.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static final class FieldHeadingItem implements BaseTextItem { + @JsonProperty("self_ref") + private String selfRef; + + @JsonProperty("parent") + @Nullable + private RefItem parent; + + @JsonProperty("children") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("child") + private List children; + + @JsonProperty("content_layer") + private ContentLayer contentLayer; + + @JsonProperty("meta") + @Nullable + private BaseMeta meta; + + @JsonProperty("label") + private DocItemLabel label; + + @JsonProperty("prov") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("prov") + private List prov; + + @JsonProperty("orig") + private String orig; + + @JsonProperty("text") + private String text; + + @JsonProperty("formatting") + @Nullable + private Formatting formatting; + + @JsonProperty("hyperlink") + @Nullable + private String hyperlink; + + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @JsonProperty("level") private Integer level; @@ -479,6 +846,16 @@ public static final class ListItem implements BaseTextItem { @Nullable private String hyperlink; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @JsonProperty("enumerated") private boolean enumerated; @@ -538,6 +915,16 @@ public static final class CodeItem implements BaseTextItem { @Nullable private String hyperlink; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @JsonProperty("captions") @JsonSetter(nulls = Nulls.AS_EMPTY) @lombok.Singular @@ -613,6 +1000,16 @@ public static final class FormulaItem implements BaseTextItem { @Nullable private String hyperlink; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -665,6 +1062,82 @@ public static final class TextItem implements BaseTextItem { @Nullable private String hyperlink; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = FieldValueItem.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static final class FieldValueItem implements BaseTextItem { + @JsonProperty("self_ref") + private String selfRef; + + @JsonProperty("parent") + @Nullable + private RefItem parent; + + @JsonProperty("children") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("child") + private List children; + + @JsonProperty("content_layer") + private ContentLayer contentLayer; + + @JsonProperty("meta") + @Nullable + private BaseMeta meta; + + @JsonProperty("label") + private DocItemLabel label; + + @JsonProperty("prov") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("prov") + private List prov; + + @JsonProperty("orig") + private String orig; + + @JsonProperty("text") + private String text; + + @JsonProperty("formatting") + @Nullable + private Formatting formatting; + + @JsonProperty("hyperlink") + @Nullable + private String hyperlink; + + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @JsonProperty("kind") + @Nullable + private String kind; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -719,6 +1192,21 @@ public static class PictureItem { @Nullable private ImageRef image; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @JsonProperty("annotations") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List> annotations; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -733,23 +1221,74 @@ public static class PictureMeta { @JsonProperty("summary") @Nullable private SummaryMetaField summary; - + + @JsonProperty("language") + @Nullable + private LanguageMetaField language; + + @JsonProperty("entities") + @Nullable + private EntitiesMetaField entities; + + @JsonProperty("keywords") + @Nullable + private KeywordsMetaField keywords; + + @JsonProperty("topics") + @Nullable + private TopicsMetaField topics; + @JsonProperty("description") @Nullable private DescriptionMetaField description; - + @JsonProperty("classification") @Nullable private PictureClassificationMetaField classification; - + @JsonProperty("molecule") @Nullable private MoleculeMetaField molecule; - + @JsonProperty("tabular_chart") @Nullable private TabularChartMetaField tabularChart; + @JsonProperty("code") + @Nullable + private CodeMetaField code; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Source-code representation associated with a picture node (e.g. the code that renders a chart + * or diagram). Corresponds to the Python {@code CodeMetaField}; {@code language} is carried as a + * raw string to mirror the lenient handling of {@code code_language} elsewhere in the model. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = CodeMetaField.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class CodeMetaField { + @JsonProperty("confidence") + @Nullable + private Double confidence; + + @JsonProperty("created_by") + @Nullable + private String createdBy; + + @JsonProperty("text") + private String text; + + @JsonProperty("language") + @Nullable + private String language; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -1007,6 +1546,21 @@ public static class TableItem { @JsonProperty("data") private TableData data; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @JsonProperty("annotations") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List> annotations; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -1021,7 +1575,23 @@ public static class FloatingMeta { @JsonProperty("summary") @Nullable private SummaryMetaField summary; - + + @JsonProperty("language") + @Nullable + private LanguageMetaField language; + + @JsonProperty("entities") + @Nullable + private EntitiesMetaField entities; + + @JsonProperty("keywords") + @Nullable + private KeywordsMetaField keywords; + + @JsonProperty("topics") + @Nullable + private TopicsMetaField topics; + @JsonProperty("description") @Nullable private DescriptionMetaField description; @@ -1040,7 +1610,7 @@ public static class TableData { @JsonProperty("table_cells") @JsonSetter(nulls = Nulls.AS_EMPTY) @lombok.Singular - private List tableCells; + private List tableCells; @JsonProperty("num_rows") private Integer numRows; @@ -1053,6 +1623,10 @@ public static class TableData { @lombok.Singular("grid") private List> grid; + @JsonProperty("orientation") + @Nullable + private Orientation orientation; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -1100,6 +1674,10 @@ public static class TableCell { @JsonProperty("fillable") private boolean fillable; + @JsonProperty("ref") + @Nullable + private RefItem ref; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -1160,6 +1738,16 @@ public static class KeyValueItem { @JsonProperty("graph") private GraphData graph; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } @@ -1307,6 +1895,121 @@ public static class FormItem { @JsonProperty("graph") private GraphData graph; + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Represents a form field region container, grouping one or more related field items within a + * document. Corresponds to the Python {@code FieldRegionItem} with label {@code "field_region"}. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = FieldRegionItem.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class FieldRegionItem { + @JsonProperty("self_ref") + private String selfRef; + + @JsonProperty("parent") + @Nullable + private RefItem parent; + + @JsonProperty("children") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("child") + private List children; + + @JsonProperty("content_layer") + private ContentLayer contentLayer; + + @JsonProperty("meta") + @Nullable + private BaseMeta meta; + + @JsonProperty("label") + private DocItemLabel label; + + @JsonProperty("prov") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("prov") + private List prov; + + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") + public static class Builder { } + } + + /** + * Represents a single form field item within a document, typically nested inside a + * {@link FieldRegionItem}. Corresponds to the Python {@code FieldItem} with label + * {@code "field_item"}. + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @tools.jackson.databind.annotation.JsonDeserialize(builder = FieldItem.Builder.class) + @lombok.extern.jackson.Jacksonized + @lombok.Builder(toBuilder = true) + @lombok.Getter + @lombok.ToString + public static class FieldItem { + @JsonProperty("self_ref") + private String selfRef; + + @JsonProperty("parent") + @Nullable + private RefItem parent; + + @JsonProperty("children") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("child") + private List children; + + @JsonProperty("content_layer") + private ContentLayer contentLayer; + + @JsonProperty("meta") + @Nullable + private BaseMeta meta; + + @JsonProperty("label") + private DocItemLabel label; + + @JsonProperty("prov") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("prov") + private List prov; + + @JsonProperty("source") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular("source") + private List source; + + @JsonProperty("comments") + @JsonSetter(nulls = Nulls.AS_EMPTY) + @lombok.Singular + private List comments; + @tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "") public static class Builder { } } diff --git a/docling-core/src/test/java/ai/docling/core/DoclingDocumentTests.java b/docling-core/src/test/java/ai/docling/core/DoclingDocumentTests.java index 8b20fddc..124e94ed 100644 --- a/docling-core/src/test/java/ai/docling/core/DoclingDocumentTests.java +++ b/docling-core/src/test/java/ai/docling/core/DoclingDocumentTests.java @@ -1,6 +1,9 @@ package ai.docling.core; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.List; import org.junit.jupiter.api.Test; @@ -10,6 +13,7 @@ import ai.docling.core.DoclingDocument.DocItemLabel; import ai.docling.core.DoclingDocument.GroupItem; import ai.docling.core.DoclingDocument.GroupLabel; +import ai.docling.core.DoclingDocument.Orientation; import ai.docling.core.DoclingDocument.TitleItem; /** @@ -85,4 +89,469 @@ void shouldDeserializeFurnitureField() throws Exception { assertThat(document.getFurniture().getLabel()).isEqualTo(GroupLabel.UNSPECIFIED); } + @Test + void shouldDeserializeCurrentBaseMetaFields() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "meta-document", + "body": { + "self_ref": "#/body", + "content_layer": "body", + "label": "unspecified", + "meta": { + "summary": { + "confidence": 0.91, + "created_by": "summary-model", + "text": "Short summary" + }, + "language": { + "confidence": 0.99, + "created_by": "language-detector", + "code": "en" + }, + "entities": { + "mentions": [ + { + "confidence": 0.88, + "created_by": "ner-model", + "text": "Docling", + "orig": "Docling", + "label": "ORG", + "charspan": [0, 7] + } + ] + }, + "keywords": { "values": ["document", "conversion"] }, + "topics": { "values": ["engineering"] } + } + } + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + assertThat(document.getBody().getMeta().getSummary().getText()).isEqualTo("Short summary"); + assertThat(document.getBody().getMeta().getLanguage().getCode()).isEqualTo("en"); + assertThat(document.getBody().getMeta().getEntities().getMentions()).hasSize(1); + assertThat(document.getBody().getMeta().getEntities().getMentions().get(0).getCharspan()).containsExactly(0, 7); + assertThat(document.getBody().getMeta().getKeywords().getValues()).containsExactly("document", "conversion"); + assertThat(document.getBody().getMeta().getTopics().getValues()).containsExactly("engineering"); + + String serialized = mapper.writeValueAsString(document); + + assertThat(serialized).contains("\"language\""); + assertThat(serialized).contains("\"entities\""); + assertThat(serialized).contains("\"keywords\""); + assertThat(serialized).contains("\"topics\""); + } + + @Test + void shouldDeserializeFieldRegionsAndFieldItems() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "field-document", + "field_regions": [ + { + "self_ref": "#/field_regions/0", + "children": [{"$ref": "#/field_items/0"}], + "content_layer": "body", + "label": "field_region", + "source": [ + { + "kind": "track", + "start_time": 1.25, + "end_time": 2.5, + "identifier": "caption-track", + "voice": "speaker-1" + } + ], + "comments": [ + { + "$ref": "#/texts/0", + "range": [1, 5] + } + ] + } + ], + "field_items": [ + { + "self_ref": "#/field_items/0", + "parent": {"$ref": "#/field_regions/0"}, + "content_layer": "body", + "label": "field_item" + } + ] + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + assertThat(document.getFieldRegions()).hasSize(1); + assertThat(document.getFieldRegions().get(0).getLabel()).isEqualTo(DocItemLabel.FIELD_REGION); + assertThat(document.getFieldRegions().get(0).getSource().get(0)) + .isInstanceOfSatisfying( + DoclingDocument.TrackSource.class, + track -> assertThat(track.getIdentifier()).isEqualTo("caption-track")); + assertThat(document.getFieldRegions().get(0).getComments().get(0).getRef()).isEqualTo("#/texts/0"); + assertThat(document.getFieldRegions().get(0).getComments().get(0).getRange()).containsExactly(1, 5); + assertThat(document.getFieldItems()).hasSize(1); + assertThat(document.getFieldItems().get(0).getLabel()).isEqualTo(DocItemLabel.FIELD_ITEM); + } + + @Test + void shouldSerializeFieldSourcesAndFineRefsUsingDoclingJsonShape() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + DoclingDocument.FieldRegionItem fieldRegion = DoclingDocument.FieldRegionItem.builder() + .selfRef("#/field_regions/0") + .contentLayer(ContentLayer.BODY) + .label(DocItemLabel.FIELD_REGION) + .source(DoclingDocument.TrackSource.builder() + .startTime(1.25) + .endTime(2.5) + .identifier("caption-track") + .voice("speaker-1") + .build()) + .comment(DoclingDocument.FineRef.builder() + .ref("#/texts/0") + .range(java.util.List.of(1, 5)) + .build()) + .build(); + DoclingDocument document = DoclingDocument.builder() + .name("field-source-document") + .fieldRegion(fieldRegion) + .build(); + + String json = mapper.writeValueAsString(document); + + assertThat(json).contains("\"kind\":\"track\""); + assertThat(json).contains("\"start_time\":1.25"); + assertThat(json).contains("\"$ref\":\"#/texts/0\""); + assertThat(json).contains("\"range\":[1,5]"); + assertThat(json).doesNotContain("\"track\":{"); + assertThat(json).doesNotContain("\"ref\":\"#/texts/0\""); + // The polymorphic discriminator must reuse TrackSource's own "kind" property, + // not emit a duplicate one (As.EXISTING_PROPERTY, like BaseTextItem/label). + assertThat(json.split("\"kind\":\"track\"", -1).length - 1).isEqualTo(1); + } + + @Test + void shouldDeserializeNullFieldCollectionsAsEmptyLists() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "null-fields-document", + "field_regions": null, + "field_items": null + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + assertThat(document.getFieldRegions()).isEmpty(); + assertThat(document.getFieldItems()).isEmpty(); + } + + @Test + void shouldDeserializeFieldItemWithMetaProvenanceSourceAndComments() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "rich-field-document", + "field_items": [ + { + "self_ref": "#/field_items/0", + "children": [{"$ref": "#/texts/0"}], + "content_layer": "body", + "label": "field_item", + "meta": { + "language": { + "confidence": 0.98, + "created_by": "language-detector", + "code": "de" + } + }, + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 10.0, + "t": 20.0, + "r": 100.0, + "b": 40.0, + "coord_origin": "top_left" + }, + "charspan": [2, 12] + } + ], + "source": [ + { + "kind": "track", + "start_time": 10.0, + "end_time": 12.0 + } + ], + "comments": [ + { + "$ref": "#/texts/1", + "range": [4, 9] + } + ] + } + ] + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + assertThat(document.getFieldItems()).hasSize(1); + DoclingDocument.FieldItem fieldItem = document.getFieldItems().get(0); + assertThat(fieldItem.getLabel()).isEqualTo(DocItemLabel.FIELD_ITEM); + assertThat(fieldItem.getChildren().get(0).getRef()).isEqualTo("#/texts/0"); + assertThat(fieldItem.getMeta().getLanguage().getCode()).isEqualTo("de"); + assertThat(fieldItem.getProv().get(0).getPageNo()).isEqualTo(3); + assertThat(fieldItem.getProv().get(0).getCharspan()).containsExactly(2, 12); + assertThat(fieldItem.getSource().get(0)).isInstanceOf(DoclingDocument.TrackSource.class); + assertThat(fieldItem.getComments().get(0).getRef()).isEqualTo("#/texts/1"); + assertThat(fieldItem.getComments().get(0).getRange()).containsExactly(4, 9); + } + + @Test + void shouldDeserializeCurrentTableDataFields() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "table-document", + "tables": [ + { + "self_ref": "#/tables/0", + "label": "table", + "data": { + "num_rows": 1, + "num_cols": 1, + "orientation": "rot_90", + "grid": [ + [ + { + "text": "Amount", + "column_header": true, + "ref": {"$ref": "#/texts/0"} + } + ] + ] + } + } + ] + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + assertThat(document.getTables()).hasSize(1); + assertThat(document.getTables().get(0).getData().getOrientation()).isEqualTo(Orientation.ROT_90); + assertThat(document.getTables().get(0).getData().getGrid().get(0).get(0).getRef().getRef()).isEqualTo("#/texts/0"); + } + + @Test + void shouldDeserializeCurrentFieldTextVariants() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "field-text-document", + "texts": [ + { + "self_ref": "#/texts/0", + "content_layer": "body", + "label": "field_heading", + "orig": "Account", + "text": "Account", + "level": 2, + "source": [ + { + "kind": "track", + "start_time": 0.0, + "end_time": 1.0 + } + ], + "comments": [ + { + "$ref": "#/texts/2", + "range": [0, 4] + } + ] + }, + { + "self_ref": "#/texts/1", + "content_layer": "body", + "label": "field_value", + "orig": "12345", + "text": "12345", + "kind": "fillable" + } + ] + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + assertThat(document.getTexts()).hasSize(2); + assertThat(document.getTexts().get(0)) + .isInstanceOfSatisfying( + DoclingDocument.FieldHeadingItem.class, + heading -> { + assertThat(heading.getLevel()).isEqualTo(2); + assertThat(heading.getSource().get(0)).isInstanceOf(DoclingDocument.TrackSource.class); + assertThat(heading.getComments().get(0).getRange()).containsExactly(0, 4); + }); + assertThat(document.getTexts().get(1)) + .isInstanceOfSatisfying( + DoclingDocument.FieldValueItem.class, + value -> { + assertThat(value.getKind()).isEqualTo("fillable"); + assertThat(value.getLabel()).isEqualTo(DocItemLabel.FIELD_VALUE); + }); + } + + @Test + void shouldDeserializePictureMetaIncludingCode() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "picture-meta-document", + "pictures": [ + { + "self_ref": "#/pictures/0", + "content_layer": "body", + "label": "picture", + "meta": { + "description": { "text": "A bar chart", "created_by": "vlm" }, + "classification": { + "predictions": [ + { "class_name": "bar_chart", "confidence": 0.97, "created_by": "classifier" } + ] + }, + "molecule": { "smi": "C1=CC=CC=C1", "confidence": 0.9 }, + "tabular_chart": { + "title": "Sales", + "chart_data": { "num_rows": 0, "num_cols": 0 } + }, + "code": { + "text": "plt.bar(x, y)", + "language": "Python", + "confidence": 0.8, + "created_by": "code-model" + } + } + } + ] + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + DoclingDocument.PictureMeta meta = document.getPictures().get(0).getMeta(); + assertThat(meta.getDescription().getText()).isEqualTo("A bar chart"); + assertThat(meta.getClassification().getPredictions().get(0).getClassName()).isEqualTo("bar_chart"); + assertThat(meta.getMolecule().getSmi()).isEqualTo("C1=CC=CC=C1"); + assertThat(meta.getTabularChart().getTitle()).isEqualTo("Sales"); + assertThat(meta.getCode().getText()).isEqualTo("plt.bar(x, y)"); + assertThat(meta.getCode().getLanguage()).isEqualTo("Python"); + assertThat(meta.getCode().getConfidence()).isEqualTo(0.8); + + String serialized = mapper.writeValueAsString(document); + + assertThat(serialized).contains("\"code\""); + assertThat(serialized).contains("\"language\":\"Python\""); + } + + @Test + void shouldDeserializeTableCellsAsTypedTableCells() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "table-cells-document", + "tables": [ + { + "self_ref": "#/tables/0", + "label": "table", + "data": { + "num_rows": 1, + "num_cols": 2, + "table_cells": [ + { + "text": "Header", + "column_header": true, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1 + }, + { + "text": "Value", + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "ref": {"$ref": "#/texts/0"} + } + ] + } + } + ] + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + List cells = document.getTables().get(0).getData().getTableCells(); + assertThat(cells).hasSize(2); + assertThat(cells.get(0)).isInstanceOf(DoclingDocument.TableCell.class); + assertThat(cells.get(0).getText()).isEqualTo("Header"); + assertThat(cells.get(0).isColumnHeader()).isTrue(); + assertThat(cells.get(0).getRef()).isNull(); + assertThat(cells.get(1).getText()).isEqualTo("Value"); + assertThat(cells.get(1).getRef().getRef()).isEqualTo("#/texts/0"); + } + + @Test + void shouldDeduplicateKeywordsAndTopicsPreservingOrder() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + String json = """ + { + "name": "dedup-document", + "body": { + "self_ref": "#/body", + "content_layer": "body", + "label": "unspecified", + "meta": { + "keywords": { "values": ["alpha", "beta", "alpha", "gamma", "beta"] }, + "topics": { "values": ["x", "x", "y"] } + } + } + } + """; + + DoclingDocument document = mapper.readValue(json, DoclingDocument.class); + + // Order-preserving, deduplicated — matching the Python UniqueList invariant. + assertThat(document.getBody().getMeta().getKeywords().getValues()) + .containsExactly("alpha", "beta", "gamma"); + assertThat(document.getBody().getMeta().getTopics().getValues()) + .containsExactly("x", "y"); + } + + @Test + void shouldRejectNonTrackKindOnTrackSource() { + assertThatThrownBy( + () -> DoclingDocument.TrackSource.builder().kind("bogus").startTime(0.0).endTime(1.0).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("track"); + + // The fixed discriminator default is still produced without setting it. + DoclingDocument.TrackSource source = + DoclingDocument.TrackSource.builder().startTime(0.0).endTime(1.0).build(); + assertThat(source.getKind()).isEqualTo("track"); + } + }