Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/contributor/modules/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ referenceable by name in `data.source`. Registration lives in
5. **Update docs** — add the new sample name to
{doc}`/modules/data/own-vs-built-in`.

## Adding a label format

1. Create `src/raitap/data/adapters/<format>.py` with a class decorated
`@label_format`. Set `format = LabelFormat.<name>` and
`supported_tasks = frozenset({...})`.
2. Implement `to_detection_records` and/or `to_classification_records`,
returning the native record shape (`{sample_id, boxes (xyxy), labels}` or
`{sample_id, label}`). Raise `ValueError` for an unsupported task.
3. Import it in `src/raitap/data/_label_format_adapters.py` so the decorator
fires.
4. Add a `LabelFormat` member in `src/raitap/data/types.py` and a row to the
label-format table in `docs/modules/data/configuration.md`.
5. Add tests in `src/raitap/data/tests/test_label_formats.py`.

## Sample discovery and label alignment

`data.source` directories are walked **recursively** (`Path.rglob`); sample
Expand Down
26 changes: 26 additions & 0 deletions docs/modules/data/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ myst:
nested ImageFolder layouts (e.g. `NORMAL/IM-0001.jpeg`) — required when
filename stems collide across class subdirs. `"stem"` matches by basename only (flat-dir layouts).

:option: labels.format
:allowed: "native", "coco", "yolo", "voc"
:default: "native"
:description: External label file format. `"native"` (default) reads RAITAP's
own shape (classification: CSV/TSV/Parquet or the `"directory"` source;
detection: the JSON record list). `"coco"`, `"yolo"`, and `"voc"` convert a
standard annotation file to the native shape before alignment. `"yolo"` and
`"voc"` are detection only; `"coco"` serves detection and classification.
Non-native formats align by sample id, so a labels id is required.

:option: input_metadata
:allowed: dict, null
:default: null
Expand Down Expand Up @@ -180,6 +190,22 @@ data = DataConfig(
)
```

**Label formats.** RAITAP reads common annotation formats directly via `data.labels.format`.

| Format | Detection | Classification | Source layout |
| -------- | --------- | -------------- | ---------------------------------------------- |
| `native` | yes | yes | JSON record list / CSV-TSV-Parquet |
| `coco` | yes | yes | single `instances.json` |
| `yolo` | yes | no | dir of per-image `.txt` (needs `data.source`) |
| `voc` | yes | no | dir of per-image `.xml` |

COCO and YOLO labels keep their category ids unchanged. VOC class names map to
ids by `model.class_names` order, else the standard 20-class VOC order.

Detection formats match each record's `sample_id` against the discovered image
file by exact name, so the image directory must be flat (nested subdirs are not
matched). Classification labels still align via `labels.id_strategy`.

For tabular models whose backend expects an unusual per-sample layout (such
as ACAS Xu, a Torch network whose forward takes `(N, 1, 1, 5)`), supply
`input_metadata.shape` explicitly so the pipeline reshapes the flat feature
Expand Down
6 changes: 5 additions & 1 deletion src/raitap/configs/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from omegaconf import MISSING

from raitap.data.types import IdStrategy, LabelEncoding
from raitap.data.types import IdStrategy, LabelEncoding, LabelFormat
from raitap.types import Hardware, TaskKind

if TYPE_CHECKING:
Expand Down Expand Up @@ -87,6 +87,10 @@ class LabelsConfig:
# (supports nested ImageFolder layouts with colliding stems).
# "stem" — flat-dir / basename matching: match by ``Path(id).stem`` only.
id_strategy: IdStrategy = IdStrategy.auto
# External label file format. ``native`` (default) reads RAITAP's own
# shape. ``coco`` / ``yolo`` / ``voc`` are converted to the native
# intermediate before alignment. Requires id-based alignment (sample_ids).
format: LabelFormat = LabelFormat.native


@dataclass
Expand Down
11 changes: 10 additions & 1 deletion src/raitap/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@

from typing import TYPE_CHECKING, Any

from .types import DIRECTORY_LABELS_SOURCE, IdStrategy, LabelEncoding, Preprocessing
from .types import DIRECTORY_LABELS_SOURCE, IdStrategy, LabelEncoding, LabelFormat, Preprocessing

if TYPE_CHECKING:
from raitap.configs.schema import DataConfig, LabelsConfig

from .data import Data, load_numpy_from_source, load_tensor_from_source
from .label_formats import LabelFormatAdapter, resolve_label_format_adapter
from .metadata import DataInputMetadata, infer_data_input_metadata
from .preprocessing import (
DataPreprocessingFactory,
Expand All @@ -36,6 +37,8 @@
"DataPreprocessingFactory",
"IdStrategy",
"LabelEncoding",
"LabelFormat",
"LabelFormatAdapter",
"LabelsConfig",
"ModelInputTransformationFactory",
"Preprocessing",
Expand All @@ -44,6 +47,7 @@
"load_tensor_from_source",
"raitap_model_input_transformation_factory",
"raitap_preprocessing_factory",
"resolve_label_format_adapter",
]


Expand All @@ -69,6 +73,11 @@
"raitap.data.preprocessing",
"raitap_preprocessing_factory",
),
"LabelFormatAdapter": ("raitap.data.label_formats", "LabelFormatAdapter"),
"resolve_label_format_adapter": (
"raitap.data.label_formats",
"resolve_label_format_adapter",
),
}


Expand Down
12 changes: 12 additions & 0 deletions src/raitap/data/_label_format_adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# pyright: reportUnusedImport=false
"""Imports every in-tree label-format adapter so the decorators fire.

Imported for its side effects by
``raitap.data.label_formats.resolve_label_format_adapter``. Every import in this
module is intentionally side-effect-only (registers an adapter), so the
file-level ``reportUnusedImport=false`` above is correct.
"""

from __future__ import annotations

from raitap.data.adapters import coco, voc, yolo # noqa: F401
Comment thread
stanlrt marked this conversation as resolved.
1 change: 1 addition & 0 deletions src/raitap/data/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Built-in label-format adapters (issue #338)."""
72 changes: 72 additions & 0 deletions src/raitap/data/adapters/coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""COCO label-format adapter (issue #338)."""

from __future__ import annotations

import json
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from pathlib import Path

from raitap.data.label_formats import (
ClassificationRecord,
DetectionRecord,
label_format,
)
from raitap.data.types import LabelFormat
from raitap.types import TaskKind


@label_format
class CocoAdapter:
"""COCO ``instances.json`` -> native records.

Detection: ``bbox`` is ``[x, y, w, h]`` -> ``[x1, y1, x2, y2]``;
``category_id`` passes through unchanged so labels stay in the model's
label space. Classification: one label per image (the image's single
annotation category); images with 0 or >1 categories raise.
"""

format = LabelFormat.coco
supported_tasks = frozenset({TaskKind.detection, TaskKind.classification})

def _load(self, source: Path) -> dict[str, Any]:
with source.open() as fh:
data = json.load(fh)
if not isinstance(data, dict) or "images" not in data:
raise ValueError(f"COCO file {source} must be an object with an 'images' array.")
return data

def to_detection_records(
self, source: Path, *, image_dir: Path | None, class_names: list[str] | None
) -> list[DetectionRecord]:
data = self._load(source)
file_by_image: dict[int, str] = {img["id"]: img["file_name"] for img in data["images"]}
boxes: dict[int, list[list[float]]] = {iid: [] for iid in file_by_image}
labels: dict[int, list[int]] = {iid: [] for iid in file_by_image}
for ann in data.get("annotations", []):
iid = ann["image_id"]
x, y, w, h = ann["bbox"]
boxes[iid].append([x, y, x + w, y + h])
labels[iid].append(int(ann["category_id"]))
return [
{"sample_id": file_by_image[iid], "boxes": boxes[iid], "labels": labels[iid]}
for iid in file_by_image
]

def to_classification_records(self, source: Path) -> list[ClassificationRecord]:
data = self._load(source)
file_by_image: dict[int, str] = {img["id"]: img["file_name"] for img in data["images"]}
cats: dict[int, set[int]] = {iid: set() for iid in file_by_image}
for ann in data.get("annotations", []):
cats[ann["image_id"]].add(int(ann["category_id"]))
records: list[ClassificationRecord] = []
for iid, name in file_by_image.items():
cat_set = cats[iid]
if len(cat_set) != 1:
raise ValueError(
f"COCO classification needs exactly one category per image; "
f"image {name!r} has {len(cat_set)}."
)
records.append({"sample_id": name, "label": next(iter(cat_set))})
return records
100 changes: 100 additions & 0 deletions src/raitap/data/adapters/voc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""Pascal-VOC label-format adapter (issue #338)."""

from __future__ import annotations

import xml.etree.ElementTree as ET
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path

from raitap.data.label_formats import (
ClassificationRecord,
DetectionRecord,
label_format,
)
from raitap.data.types import LabelFormat
from raitap.types import TaskKind

#: Canonical Pascal-VOC class order (index = label id) when no class_names given.
_VOC_CLASSES = (
"aeroplane",
"bicycle",
"bird",
"boat",
"bottle",
"bus",
"car",
"cat",
"chair",
"cow",
"diningtable",
"dog",
"horse",
"motorbike",
"person",
"pottedplant",
"sheep",
"sofa",
"train",
"tvmonitor",
)


def _coord(box: ET.Element, tag: str, xml_path: Path) -> float:
text = box.findtext(tag)
if text is None:
raise ValueError(f"VOC bndbox in {xml_path.name} missing <{tag}>.")
return float(text)


@label_format
class VocAdapter:
"""Pascal-VOC per-image ``.xml`` -> native detection records.

Boxes are already ``[xmin, ymin, xmax, ymax]`` pixels. Class names map to
ids by their position in ``class_names`` (else the standard 20-class VOC
order).
"""

format = LabelFormat.voc
supported_tasks = frozenset({TaskKind.detection})

def to_detection_records(
self, source: Path, *, image_dir: Path | None, class_names: list[str] | None
) -> list[DetectionRecord]:
name_to_id = {
name: idx for idx, name in enumerate(class_names if class_names else _VOC_CLASSES)
}
records: list[DetectionRecord] = []
for xml_path in sorted(source.glob("*.xml")):
root = ET.parse(xml_path).getroot()
filename_el = root.find("filename")
if filename_el is None or not filename_el.text:
raise ValueError(f"VOC file {xml_path} has no <filename>.")
boxes: list[list[float]] = []
labels: list[int] = []
for obj in root.findall("object"):
name = obj.findtext("name")
if name not in name_to_id:
raise ValueError(
f"VOC class {name!r} in {xml_path.name} is not in the "
f"class list {sorted(name_to_id)}."
)
box = obj.find("bndbox")
if box is None:
raise ValueError(f"VOC object in {xml_path.name} has no <bndbox>.")
boxes.append(
[
_coord(box, "xmin", xml_path),
_coord(box, "ymin", xml_path),
_coord(box, "xmax", xml_path),
_coord(box, "ymax", xml_path),
]
)
labels.append(name_to_id[name])
records.append({"sample_id": filename_el.text, "boxes": boxes, "labels": labels})
return records

def to_classification_records(self, source: Path) -> list[ClassificationRecord]:
raise ValueError("VOC is a detection-only format.")
71 changes: 71 additions & 0 deletions src/raitap/data/adapters/yolo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""YOLO label-format adapter (issue #338)."""

from __future__ import annotations

from typing import TYPE_CHECKING

from PIL import Image

if TYPE_CHECKING:
from pathlib import Path

from raitap.data.label_formats import (
ClassificationRecord,
DetectionRecord,
label_format,
)
from raitap.data.types import LabelFormat
from raitap.types import TaskKind

_IMAGE_SUFFIXES = (".jpg", ".jpeg", ".png", ".bmp", ".webp")


@label_format
class YoloAdapter:
"""YOLO per-image ``.txt`` (``class cx cy w h``, normalised) -> native records.

Boxes are denormalised with each image's pixel size, read from
``image_dir``. Class indices pass through unchanged.
"""

format = LabelFormat.yolo
supported_tasks = frozenset({TaskKind.detection})

def _image_for(self, image_dir: Path, stem: str) -> Path:
for suffix in _IMAGE_SUFFIXES:
candidate = image_dir / f"{stem}{suffix}"
if candidate.exists():
return candidate
raise ValueError(f"YOLO adapter found no image for label {stem!r} in {image_dir}.")

def to_detection_records(
self, source: Path, *, image_dir: Path | None, class_names: list[str] | None
) -> list[DetectionRecord]:
if image_dir is None:
raise ValueError(
"YOLO labels need image_dir to denormalise boxes; "
"set data.source to the image directory."
)
records: list[DetectionRecord] = []
for txt in sorted(source.glob("*.txt")):
image_path = self._image_for(image_dir, txt.stem)
with Image.open(image_path) as im:
width, height = im.size
boxes: list[list[float]] = []
labels: list[int] = []
for line in txt.read_text().splitlines():
parts = line.split()
if not parts:
continue
cls, cx, cy, bw, bh = (float(p) for p in parts[:5])
x1 = (cx - bw / 2) * width
y1 = (cy - bh / 2) * height
x2 = (cx + bw / 2) * width
y2 = (cy + bh / 2) * height
boxes.append([x1, y1, x2, y2])
labels.append(int(cls))
records.append({"sample_id": image_path.name, "boxes": boxes, "labels": labels})
return records

def to_classification_records(self, source: Path) -> list[ClassificationRecord]:
raise ValueError("YOLO is a detection-only format.")
Loading
Loading