Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@
import sys
import typing
import warnings
from collections.abc import Iterable
from dataclasses import dataclass
from enum import Enum
from io import BytesIO
from pathlib import Path
from typing import (
Annotated,
Any,
Dict,
Final,
Expand All @@ -28,6 +31,7 @@
)
from urllib.parse import unquote

import orjson
import pandas as pd
import yaml
from PIL import Image as PILImage
Expand All @@ -37,15 +41,17 @@
ConfigDict,
Field,
FieldSerializationInfo,
SerializerFunctionWrapHandler,
StringConstraints,
computed_field,
field_serializer,
field_validator,
model_serializer,
model_validator,
validate_call,
)
from tabulate import tabulate
from typing_extensions import Annotated, Self, deprecated, override
from tabulate import _column_type, tabulate
from typing_extensions import Self, deprecated, override

from docling_core.search.package import VERSION_PATTERN
from docling_core.types.base import _JSON_POINTER_REGEX
Expand Down Expand Up @@ -4860,7 +4866,10 @@ def save_as_json(
coord_precision: Optional[int] = None,
confid_precision: Optional[int] = None,
):
"""Save as json."""
"""
Save as json.
Indent can either be 2 or 0 (no indent).
"""
if isinstance(filename, str):
filename = Path(filename)
artifacts_dir, reference_path = self._get_output_paths(filename, artifacts_dir)
Expand All @@ -4875,8 +4884,8 @@ def save_as_json(
out = new_doc.export_to_dict(
coord_precision=coord_precision, confid_precision=confid_precision
)
with open(filename, "w", encoding="utf-8") as fw:
json.dump(out, fw, indent=indent)
with open(filename, "wb") as fw:
fw.write(orjson.dumps(out, option=orjson.OPT_INDENT_2 if indent else None))

@classmethod
def load_from_json(cls, filename: Union[str, Path]) -> "DoclingDocument":
Expand Down
12 changes: 7 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,27 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
"Typing :: Typed",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
requires-python = '>=3.9,<4.0'
requires-python = '>=3.10,<4.0'
dependencies = [
'jsonschema (>=4.16.0,<5.0.0)',
'pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)',
'jsonref (>=1.1.0,<2.0.0)',
'tabulate (>=0.9.0,<0.10.0)',
'pandas (>=2.1.4,<3.0.0)',
'tabulate (>=0.9.0,<0.11.0)',
'pandas (>=2.1.4,<4.0.0)',
'pillow (>=10.0.0,<13.0.0)',
'pyyaml (>=5.1,<7.0.0)',
'typing-extensions (>=4.12.2,<5.0.0)',
'typer (>=0.12.5,<0.20.0)',
'typer (>=0.12.5,<0.25.0)',
'latex2mathml (>=3.77.0,<4.0.0)',
"defusedxml (>=0.7.1, <0.8.0)",
"pydantic-settings>=2.14.0",
"orjson>=3.11.5,<4.0.0",
]

[project.urls]
Expand Down
22 changes: 11 additions & 11 deletions test/data/doc/constructed_doc.embedded.json.gt
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@
"orig": "item of leading list",
"text": "item of leading list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/1",
Expand Down Expand Up @@ -507,7 +507,7 @@
"orig": "list item 1",
"text": "list item 1",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/7",
Expand All @@ -521,7 +521,7 @@
"orig": "list item 2",
"text": "list item 2",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/8",
Expand All @@ -539,7 +539,7 @@
"orig": "list item 3",
"text": "list item 3",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/9",
Expand Down Expand Up @@ -613,7 +613,7 @@
"orig": "list item 4",
"text": "list item 4",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/14",
Expand Down Expand Up @@ -663,7 +663,7 @@
"orig": "item 1 of list",
"text": "item 1 of list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/18",
Expand Down Expand Up @@ -705,7 +705,7 @@
"orig": "item 1 of neighboring list",
"text": "item 1 of neighboring list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/21",
Expand All @@ -723,7 +723,7 @@
"orig": "item 2 of neighboring list",
"text": "item 2 of neighboring list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/22",
Expand All @@ -737,7 +737,7 @@
"orig": "item 1 of sub list",
"text": "item 1 of sub list",
"enumerated": false,
"marker": "\u25a1"
"marker": ""
},
{
"self_ref": "#/texts/23",
Expand All @@ -755,7 +755,7 @@
"orig": "",
"text": "",
"enumerated": false,
"marker": "\u25a1"
"marker": ""
},
{
"self_ref": "#/texts/24",
Expand Down Expand Up @@ -813,7 +813,7 @@
"orig": "",
"text": "",
"enumerated": false,
"marker": "\u25a1"
"marker": ""
},
{
"self_ref": "#/texts/28",
Expand Down
22 changes: 11 additions & 11 deletions test/data/doc/constructed_doc.referenced.json.gt
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@
"orig": "item of leading list",
"text": "item of leading list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/1",
Expand Down Expand Up @@ -507,7 +507,7 @@
"orig": "list item 1",
"text": "list item 1",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/7",
Expand All @@ -521,7 +521,7 @@
"orig": "list item 2",
"text": "list item 2",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/8",
Expand All @@ -539,7 +539,7 @@
"orig": "list item 3",
"text": "list item 3",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/9",
Expand Down Expand Up @@ -613,7 +613,7 @@
"orig": "list item 4",
"text": "list item 4",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/14",
Expand Down Expand Up @@ -663,7 +663,7 @@
"orig": "item 1 of list",
"text": "item 1 of list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/18",
Expand Down Expand Up @@ -705,7 +705,7 @@
"orig": "item 1 of neighboring list",
"text": "item 1 of neighboring list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/21",
Expand All @@ -723,7 +723,7 @@
"orig": "item 2 of neighboring list",
"text": "item 2 of neighboring list",
"enumerated": false,
"marker": "\u25a0"
"marker": ""
},
{
"self_ref": "#/texts/22",
Expand All @@ -737,7 +737,7 @@
"orig": "item 1 of sub list",
"text": "item 1 of sub list",
"enumerated": false,
"marker": "\u25a1"
"marker": ""
},
{
"self_ref": "#/texts/23",
Expand All @@ -755,7 +755,7 @@
"orig": "",
"text": "",
"enumerated": false,
"marker": "\u25a1"
"marker": ""
},
{
"self_ref": "#/texts/24",
Expand Down Expand Up @@ -813,7 +813,7 @@
"orig": "",
"text": "",
"enumerated": false,
"marker": "\u25a1"
"marker": ""
},
{
"self_ref": "#/texts/28",
Expand Down
Loading
Loading