Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions contributor-configs/tree-xgboost-shap/assessment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ data:
name: synthetic-tabular
source: ${hydra:runtime.cwd}/contributor-configs/tree-xgboost-shap/artifacts/features.csv
forward_batch_size: 8
# Declares the modality so the resolver skips the image-only "no preprocessing"
# warning (image preprocessing is meaningless for tabular features).
input_metadata:
kind: tabular

transparency:
treeshap:
Expand Down
12 changes: 7 additions & 5 deletions src/raitap/data/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,9 @@ def _suppress_off_warning(data_cfg: DataConfig, *, acknowledge_off: bool = False


_OFF_WARNING = (
"Image preprocessing is OFF.\n"
"No image preprocessing set in config.\n"
"RAITAP is forwarding your images to the model unchanged.\n"
"Most pretrained image models expect normalized inputs — without\n"
"Most pretrained image models expect normalized inputs — without "
"preprocessing, your accuracy results may be silently incorrect.\n"
"\n"
"Set both knobs in your config:\n"
Expand All @@ -338,15 +338,17 @@ def _suppress_off_warning(data_cfg: DataConfig, *, acknowledge_off: bool = False
" preprocessing: ./resize.py\n"
" model_input_transformation: ./normalize.py\n"
"\n"
"Not using image data? Set data.input_metadata.kind (e.g. tabular) in your config.\n"
"\n"
"If you've already preprocessed your images, silence this message:\n"
" - Python API: pass acknowledge_preprocessing_off=True to raitap.run(...)\n"
" - CLI: re-run with --acknowledge-preprocessing-off"
)

_MODEL_OFF_WARNING = (
"data.model_input_transformation is OFF.\n"
"Data-side preprocessing will run, but no transformation is applied at\n"
"the model boundary. Pretrained models that expect normalized inputs\n"
"No data.model_input_transformation set in config.\n"
"Data-side preprocessing will run, but no transformation is applied at "
"the model boundary. Pretrained models that expect normalized inputs "
"(ImageNet mean/std, etc.) will produce silently incorrect predictions.\n"
"\n"
"Set the model-side knob:\n"
Expand Down
8 changes: 4 additions & 4 deletions src/raitap/data/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_both_off_no_keys_warns(monkeypatch: pytest.MonkeyPatch) -> None:
assert result.model_module is None
assert not result.is_active
assert len(result.warnings) == 1
assert "preprocessing is OFF" in result.warnings[0]
assert "No image preprocessing set in config" in result.warnings[0]


def test_both_off_suppression_via_kwarg(monkeypatch: pytest.MonkeyPatch) -> None:
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_both_off_kind_explicit_none_still_warns(monkeypatch: pytest.MonkeyPatch
assert result.data_origin == "off"
assert result.model_origin == "off"
assert len(result.warnings) == 1
assert "preprocessing is OFF" in result.warnings[0]
assert "No image preprocessing set in config" in result.warnings[0]


# ---------------------------------------------------------------------------
Expand All @@ -103,7 +103,7 @@ def test_model_input_transformation_off_warns_when_data_side_set(
assert result.model_origin == "off"
assert result.data_module is not None
assert result.model_module is None
assert any("model_input_transformation is OFF" in w for w in result.warnings)
assert any("No data.model_input_transformation set in config" in w for w in result.warnings)


def test_model_input_transformation_off_suppressed_for_tabular(
Expand Down Expand Up @@ -363,7 +363,7 @@ def test_custom_file_data_side_only(monkeypatch: pytest.MonkeyPatch, tmp_path: P
assert result.model_module is None
assert result.data_file_path == fixture.resolve()
# model side off + image data → warning about model side
assert any("model_input_transformation is OFF" in w for w in result.warnings)
assert any("No data.model_input_transformation set in config" in w for w in result.warnings)


def test_custom_file_both_sides_from_same_file(
Expand Down
5 changes: 4 additions & 1 deletion src/raitap/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,10 @@ def _apply_preprocessing(
backend.model = nn.Sequential(model_module, backend.model)

for warning in resolved.warnings:
raitap_log.warn(warning)
# Preprocessing is a data-module concept (see the "Preprocessing:" info
# line below); attribute the warning to data, not the pipeline frame the
# deferred-log replay would otherwise infer.
raitap_log.warn(warning, module=Module.data)

if resolved.is_active:
# Deferred by ``_run_pipeline`` (this runs inside ``raitap_log.deferred()``)
Expand Down
Loading