Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/ocr_bench/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ class ModelConfig:
size="3B",
default_flavor="l4x1",
),
# PaddleOCR-VL-1.5 uses transformers batch inference (no vLLM/flashinfer), so it
# runs on the default uv-script image — unlike 1.6, which needs the prebuilt image.
"paddleocr-vl-1.5": ModelConfig(
script="https://huggingface.co/datasets/uv-scripts/ocr/raw/main/paddleocr-vl-1.5.py",
model_id="PaddlePaddle/PaddleOCR-VL-1.5",
size="0.9B",
default_flavor="l4x1",
),
# Image-mode models (Qwen3.5 / flashinfer) — need the vllm/vllm-openai image.
"nuextract3": ModelConfig(
script="https://huggingface.co/datasets/uv-scripts/ocr/raw/main/nuextract3.py",
Expand Down
11 changes: 10 additions & 1 deletion tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_image_mode_fields_default_none(self):

class TestModelRegistry:
def test_has_core_models(self):
assert len(MODEL_REGISTRY) == 9
assert len(MODEL_REGISTRY) == 10

def test_default_models_exist_in_registry(self):
for slug in DEFAULT_MODELS:
Expand Down Expand Up @@ -80,6 +80,15 @@ def test_image_mode_models_not_in_defaults(self):
assert "nuextract3" not in DEFAULT_MODELS
assert "paddleocr-vl-1.6" not in DEFAULT_MODELS

def test_paddleocr_vl_15_is_standard(self):
# 1.5 uses transformers batch inference (no vLLM/flashinfer), so it runs on
# the default uv-script image — unlike 1.6, it needs no image-mode config.
cfg = MODEL_REGISTRY["paddleocr-vl-1.5"]
assert cfg.default_flavor == "l4x1"
assert cfg.image is None
assert cfg.python is None
assert cfg.env is None


class TestListModels:
def test_returns_sorted_slugs(self):
Expand Down
Loading