diff --git a/openviking/parse/parsers/constants.py b/openviking/parse/parsers/constants.py index 74609115e..e6f49f091 100644 --- a/openviking/parse/parsers/constants.py +++ b/openviking/parse/parsers/constants.py @@ -213,6 +213,7 @@ ".properties", ".toml", ".json", + ".jsonl", ".yaml", ".yml", ".xml", diff --git a/tests/test_upload_utils.py b/tests/test_upload_utils.py index d665714da..35c0041c2 100644 --- a/tests/test_upload_utils.py +++ b/tests/test_upload_utils.py @@ -111,6 +111,11 @@ def test_documentation_extensions(self) -> None: def test_additional_text_extensions(self) -> None: assert is_text_file("settings.ini") is True assert is_text_file("data.csv") is True + # .jsonl is treated as text (matching .json) so upload-time encoding + # normalization applies, mirroring its inclusion in the vectorization + # text-extension set (#2745); otherwise a legacy-encoded .jsonl skips + # UTF-8 normalization while .json does not (#2744/#2770). + assert is_text_file("data.jsonl") is True def test_non_text_extensions(self) -> None: assert is_text_file("photo.png") is False