CAIIVS · stanlrt · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/docs/contributor/modules/data.md b/docs/contributor/modules/data.md
@@ -69,6 +69,20 @@ referenceable by name in `data.source`. Registration lives in
 5. **Update docs** — add the new sample name to
    {doc}`/modules/data/own-vs-built-in`.
 
+## Adding a label format
+
+1. Create `src/raitap/data/adapters/<format>.py` with a class decorated
+   `@label_format`. Set `format = LabelFormat.<name>` and
+   `supported_tasks = frozenset({...})`.
+2. Implement `to_detection_records` and/or `to_classification_records`,
+   returning the native record shape (`{sample_id, boxes (xyxy), labels}` or
+   `{sample_id, label}`). Raise `ValueError` for an unsupported task.
+3. Import it in `src/raitap/data/_label_format_adapters.py` so the decorator
+   fires.
+4. Add a `LabelFormat` member in `src/raitap/data/types.py` and a row to the
+   label-format table in `docs/modules/data/configuration.md`.
+5. Add tests in `src/raitap/data/tests/test_label_formats.py`.
+
 ## Sample discovery and label alignment
 
 `data.source` directories are walked **recursively** (`Path.rglob`); sample

diff --git a/docs/modules/data/configuration.md b/docs/modules/data/configuration.md
@@ -105,6 +105,16 @@ myst:
   nested ImageFolder layouts (e.g. `NORMAL/IM-0001.jpeg`) — required when
   filename stems collide across class subdirs. `"stem"` matches by basename only (flat-dir layouts).
 
+:option: labels.format
+:allowed: "native", "coco", "yolo", "voc"
+:default: "native"
+:description: External label file format. `"native"` (default) reads RAITAP's
+  own shape (classification: CSV/TSV/Parquet or the `"directory"` source;
+  detection: the JSON record list). `"coco"`, `"yolo"`, and `"voc"` convert a
+  standard annotation file to the native shape before alignment. `"yolo"` and
+  `"voc"` are detection only; `"coco"` serves detection and classification.
+  Non-native formats align by sample id, so a labels id is required.
+
 :option: input_metadata
 :allowed: dict, null
 :default: null
@@ -180,6 +190,22 @@ data = DataConfig(
 )
 ```
 
+**Label formats.** RAITAP reads common annotation formats directly via `data.labels.format`.
+
+| Format   | Detection | Classification | Source layout                                  |
+| -------- | --------- | -------------- | ---------------------------------------------- |
+| `native` | yes       | yes            | JSON record list / CSV-TSV-Parquet             |
+| `coco`   | yes       | yes            | single `instances.json`                        |
+| `yolo`   | yes       | no             | dir of per-image `.txt` (needs `data.source`)  |
+| `voc`    | yes       | no             | dir of per-image `.xml`                        |
+
+COCO and YOLO labels keep their category ids unchanged. VOC class names map to
+ids by `model.class_names` order, else the standard 20-class VOC order.
+
+Detection formats match each record's `sample_id` against the discovered image
+file by exact name, so the image directory must be flat (nested subdirs are not
+matched). Classification labels still align via `labels.id_strategy`.
+
 For tabular models whose backend expects an unusual per-sample layout (such
 as ACAS Xu, a Torch network whose forward takes `(N, 1, 1, 5)`), supply
 `input_metadata.shape` explicitly so the pipeline reshapes the flat feature

diff --git a/src/raitap/configs/schema.py b/src/raitap/configs/schema.py
@@ -5,7 +5,7 @@
 
 from omegaconf import MISSING
 
-from raitap.data.types import IdStrategy, LabelEncoding
+from raitap.data.types import IdStrategy, LabelEncoding, LabelFormat
 from raitap.types import Hardware, TaskKind
 
 if TYPE_CHECKING:
@@ -87,6 +87,10 @@ class LabelsConfig:
     #                     (supports nested ImageFolder layouts with colliding stems).
     #   "stem"          — flat-dir / basename matching: match by ``Path(id).stem`` only.
     id_strategy: IdStrategy = IdStrategy.auto
+    # External label file format. ``native`` (default) reads RAITAP's own
+    # shape. ``coco`` / ``yolo`` / ``voc`` are converted to the native
+    # intermediate before alignment. Requires id-based alignment (sample_ids).
+    format: LabelFormat = LabelFormat.native
 
 
 @dataclass

diff --git a/src/raitap/data/__init__.py b/src/raitap/data/__init__.py
@@ -13,12 +13,13 @@
 
 from typing import TYPE_CHECKING, Any
 
-from .types import DIRECTORY_LABELS_SOURCE, IdStrategy, LabelEncoding, Preprocessing
+from .types import DIRECTORY_LABELS_SOURCE, IdStrategy, LabelEncoding, LabelFormat, Preprocessing
 
 if TYPE_CHECKING:
     from raitap.configs.schema import DataConfig, LabelsConfig
 
     from .data import Data, load_numpy_from_source, load_tensor_from_source
+    from .label_formats import LabelFormatAdapter, resolve_label_format_adapter
     from .metadata import DataInputMetadata, infer_data_input_metadata
     from .preprocessing import (
         DataPreprocessingFactory,
@@ -36,6 +37,8 @@
     "DataPreprocessingFactory",
     "IdStrategy",
     "LabelEncoding",
+    "LabelFormat",
+    "LabelFormatAdapter",
     "LabelsConfig",
     "ModelInputTransformationFactory",
     "Preprocessing",
@@ -44,6 +47,7 @@
     "load_tensor_from_source",
     "raitap_model_input_transformation_factory",
     "raitap_preprocessing_factory",
+    "resolve_label_format_adapter",
 ]
 
 
@@ -69,6 +73,11 @@
         "raitap.data.preprocessing",
         "raitap_preprocessing_factory",
     ),
+    "LabelFormatAdapter": ("raitap.data.label_formats", "LabelFormatAdapter"),
+    "resolve_label_format_adapter": (
+        "raitap.data.label_formats",
+        "resolve_label_format_adapter",
+    ),
 }
 
 

diff --git a/src/raitap/data/_label_format_adapters.py b/src/raitap/data/_label_format_adapters.py
@@ -0,0 +1,12 @@
+# pyright: reportUnusedImport=false
+"""Imports every in-tree label-format adapter so the decorators fire.
+
+Imported for its side effects by
+``raitap.data.label_formats.resolve_label_format_adapter``. Every import in this
+module is intentionally side-effect-only (registers an adapter), so the
+file-level ``reportUnusedImport=false`` above is correct.
+"""
+
+from __future__ import annotations
+
+from raitap.data.adapters import coco, voc, yolo  # noqa: F401
diff --git a/src/raitap/data/adapters/__init__.py b/src/raitap/data/adapters/__init__.py
@@ -0,0 +1 @@
+"""Built-in label-format adapters (issue #338)."""
diff --git a/src/raitap/data/adapters/coco.py b/src/raitap/data/adapters/coco.py
@@ -0,0 +1,72 @@
+"""COCO label-format adapter (issue #338)."""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+from raitap.data.label_formats import (
+    ClassificationRecord,
+    DetectionRecord,
+    label_format,
+)
+from raitap.data.types import LabelFormat
+from raitap.types import TaskKind
+
+
+@label_format
+class CocoAdapter:
+    """COCO ``instances.json`` -> native records.
+
+    Detection: ``bbox`` is ``[x, y, w, h]`` -> ``[x1, y1, x2, y2]``;
+    ``category_id`` passes through unchanged so labels stay in the model's
+    label space. Classification: one label per image (the image's single
+    annotation category); images with 0 or >1 categories raise.
+    """
+
+    format = LabelFormat.coco
+    supported_tasks = frozenset({TaskKind.detection, TaskKind.classification})
+
+    def _load(self, source: Path) -> dict[str, Any]:
+        with source.open() as fh:
+            data = json.load(fh)
+        if not isinstance(data, dict) or "images" not in data:
+            raise ValueError(f"COCO file {source} must be an object with an 'images' array.")
+        return data
+
+    def to_detection_records(
+        self, source: Path, *, image_dir: Path | None, class_names: list[str] | None
+    ) -> list[DetectionRecord]:
+        data = self._load(source)
+        file_by_image: dict[int, str] = {img["id"]: img["file_name"] for img in data["images"]}
+        boxes: dict[int, list[list[float]]] = {iid: [] for iid in file_by_image}
+        labels: dict[int, list[int]] = {iid: [] for iid in file_by_image}
+        for ann in data.get("annotations", []):
+            iid = ann["image_id"]
+            x, y, w, h = ann["bbox"]
+            boxes[iid].append([x, y, x + w, y + h])
+            labels[iid].append(int(ann["category_id"]))
+        return [
+            {"sample_id": file_by_image[iid], "boxes": boxes[iid], "labels": labels[iid]}
+            for iid in file_by_image
+        ]
+
+    def to_classification_records(self, source: Path) -> list[ClassificationRecord]:
+        data = self._load(source)
+        file_by_image: dict[int, str] = {img["id"]: img["file_name"] for img in data["images"]}
+        cats: dict[int, set[int]] = {iid: set() for iid in file_by_image}
+        for ann in data.get("annotations", []):
+            cats[ann["image_id"]].add(int(ann["category_id"]))
+        records: list[ClassificationRecord] = []
+        for iid, name in file_by_image.items():
+            cat_set = cats[iid]
+            if len(cat_set) != 1:
+                raise ValueError(
+                    f"COCO classification needs exactly one category per image; "
+                    f"image {name!r} has {len(cat_set)}."
+                )
+            records.append({"sample_id": name, "label": next(iter(cat_set))})
+        return records
diff --git a/src/raitap/data/adapters/voc.py b/src/raitap/data/adapters/voc.py
@@ -0,0 +1,100 @@
+"""Pascal-VOC label-format adapter (issue #338)."""
+
+from __future__ import annotations
+
+import xml.etree.ElementTree as ET
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+from raitap.data.label_formats import (
+    ClassificationRecord,
+    DetectionRecord,
+    label_format,
+)
+from raitap.data.types import LabelFormat
+from raitap.types import TaskKind
+
+#: Canonical Pascal-VOC class order (index = label id) when no class_names given.
+_VOC_CLASSES = (
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor",
+)
+
+
+def _coord(box: ET.Element, tag: str, xml_path: Path) -> float:
+    text = box.findtext(tag)
+    if text is None:
+        raise ValueError(f"VOC bndbox in {xml_path.name} missing <{tag}>.")
+    return float(text)
+
+
+@label_format
+class VocAdapter:
+    """Pascal-VOC per-image ``.xml`` -> native detection records.
+
+    Boxes are already ``[xmin, ymin, xmax, ymax]`` pixels. Class names map to
+    ids by their position in ``class_names`` (else the standard 20-class VOC
+    order).
+    """
+
+    format = LabelFormat.voc
+    supported_tasks = frozenset({TaskKind.detection})
+
+    def to_detection_records(
+        self, source: Path, *, image_dir: Path | None, class_names: list[str] | None
+    ) -> list[DetectionRecord]:
+        name_to_id = {
+            name: idx for idx, name in enumerate(class_names if class_names else _VOC_CLASSES)
+        }
+        records: list[DetectionRecord] = []
+        for xml_path in sorted(source.glob("*.xml")):
+            root = ET.parse(xml_path).getroot()
+            filename_el = root.find("filename")
+            if filename_el is None or not filename_el.text:
+                raise ValueError(f"VOC file {xml_path} has no <filename>.")
+            boxes: list[list[float]] = []
+            labels: list[int] = []
+            for obj in root.findall("object"):
+                name = obj.findtext("name")
+                if name not in name_to_id:
+                    raise ValueError(
+                        f"VOC class {name!r} in {xml_path.name} is not in the "
+                        f"class list {sorted(name_to_id)}."
+                    )
+                box = obj.find("bndbox")
+                if box is None:
+                    raise ValueError(f"VOC object in {xml_path.name} has no <bndbox>.")
+                boxes.append(
+                    [
+                        _coord(box, "xmin", xml_path),
+                        _coord(box, "ymin", xml_path),
+                        _coord(box, "xmax", xml_path),
+                        _coord(box, "ymax", xml_path),
+                    ]
+                )
+                labels.append(name_to_id[name])
+            records.append({"sample_id": filename_el.text, "boxes": boxes, "labels": labels})
+        return records
+
+    def to_classification_records(self, source: Path) -> list[ClassificationRecord]:
+        raise ValueError("VOC is a detection-only format.")
diff --git a/src/raitap/data/adapters/yolo.py b/src/raitap/data/adapters/yolo.py
@@ -0,0 +1,71 @@
+"""YOLO label-format adapter (issue #338)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from PIL import Image
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+from raitap.data.label_formats import (
+    ClassificationRecord,
+    DetectionRecord,
+    label_format,
+)
+from raitap.data.types import LabelFormat
+from raitap.types import TaskKind
+
+_IMAGE_SUFFIXES = (".jpg", ".jpeg", ".png", ".bmp", ".webp")
+
+
+@label_format
+class YoloAdapter:
+    """YOLO per-image ``.txt`` (``class cx cy w h``, normalised) -> native records.
+
+    Boxes are denormalised with each image's pixel size, read from
+    ``image_dir``. Class indices pass through unchanged.
+    """
+
+    format = LabelFormat.yolo
+    supported_tasks = frozenset({TaskKind.detection})
+
+    def _image_for(self, image_dir: Path, stem: str) -> Path:
+        for suffix in _IMAGE_SUFFIXES:
+            candidate = image_dir / f"{stem}{suffix}"
+            if candidate.exists():
+                return candidate
+        raise ValueError(f"YOLO adapter found no image for label {stem!r} in {image_dir}.")
+
+    def to_detection_records(
+        self, source: Path, *, image_dir: Path | None, class_names: list[str] | None
+    ) -> list[DetectionRecord]:
+        if image_dir is None:
+            raise ValueError(
+                "YOLO labels need image_dir to denormalise boxes; "
+                "set data.source to the image directory."
+            )
+        records: list[DetectionRecord] = []
+        for txt in sorted(source.glob("*.txt")):
+            image_path = self._image_for(image_dir, txt.stem)
+            with Image.open(image_path) as im:
+                width, height = im.size
+            boxes: list[list[float]] = []
+            labels: list[int] = []
+            for line in txt.read_text().splitlines():
+                parts = line.split()
+                if not parts:
+                    continue
+                cls, cx, cy, bw, bh = (float(p) for p in parts[:5])
+                x1 = (cx - bw / 2) * width
+                y1 = (cy - bh / 2) * height
+                x2 = (cx + bw / 2) * width
+                y2 = (cy + bh / 2) * height
+                boxes.append([x1, y1, x2, y2])
+                labels.append(int(cls))
+            records.append({"sample_id": image_path.name, "boxes": boxes, "labels": labels})
+        return records
+
+    def to_classification_records(self, source: Path) -> list[ClassificationRecord]:
+        raise ValueError("YOLO is a detection-only format.")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Built-in label-format adapters (issue #338)."""