Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
6e1d4b9
chore: replace pdm with uv
Adames4 Jan 25, 2026
d7a1ef3
feat: configs
Adames4 Jan 25, 2026
34abac4
feat: dataset creation
Adames4 Jan 25, 2026
80eca85
fix: configs
Adames4 Jan 25, 2026
97335ef
fix: configs
Adames4 Jan 25, 2026
727c729
feat: add scripts
Adames4 Jan 25, 2026
fa9f581
fix: invalid job name
Adames4 Jan 25, 2026
fd978b3
fix: set slide_id as index and ensure nancy is an integer
Adames4 Jan 25, 2026
2869916
chore: dependencies
Adames4 Jan 25, 2026
725e17d
feat: processed data
Adames4 Jan 25, 2026
f7026b3
feat: tissue masks
Adames4 Jan 25, 2026
ed49c42
feat: script
Adames4 Jan 25, 2026
87113e1
feat: script
Adames4 Jan 25, 2026
de98fef
fix: job name
Adames4 Jan 25, 2026
929d298
fix: exclude .git folder from ray env
Adames4 Jan 25, 2026
81d4df3
fix: exclude .venv folder from ray env
Adames4 Jan 25, 2026
3f0321f
fix: typo
Adames4 Jan 25, 2026
2ecde13
feat: job script
Adames4 Jan 25, 2026
6cae5d0
feat: config
Adames4 Jan 25, 2026
616ed48
feat: job script
Adames4 Jan 25, 2026
bd0c6ac
fix: PR comments
Adames4 Feb 11, 2026
9ffea96
chore: dependencies
Adames4 Feb 11, 2026
0db7d08
chore: Merge branch 'feature/dataset' into feature/tissue-masks
Adames4 Feb 11, 2026
b833059
feat: refactor configs
Adames4 Feb 11, 2026
3300341
fix: PR comments
Adames4 Feb 11, 2026
e7a4dfa
chore: Merge branch 'master' into feature/tissue-masks
Adames4 Feb 14, 2026
76196ef
fix: typo
Adames4 Feb 14, 2026
6bf43f1
feat: update dataset uris
Adames4 Feb 14, 2026
978ceee
fix: PR
Adames4 Feb 14, 2026
003d93b
chore: mypy
Adames4 Feb 14, 2026
c13ee1a
feat: with ray init
Adames4 Feb 14, 2026
e7b5a87
feat: script
Adames4 Feb 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ strict = True
ignore_missing_imports = True
disallow_untyped_calls = False
disable_error_code = no-any-return
exclude = scripts
exclude = ^(scripts|project_name)/
5 changes: 5 additions & 0 deletions configs/dataset/processed/ftn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
- /dataset/raw/ftn@_here_
- _self_

uri: mlflow-artifacts:/86/d2b2f1835fc647e2ba3639ce606f4768/artifacts/dataset.csv
5 changes: 5 additions & 0 deletions configs/dataset/processed/ikem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
- /dataset/raw/ikem@_here_
- _self_

uri: mlflow-artifacts:/86/7c6e7cc142494d45b6513185318d4462/artifacts/dataset.csv
5 changes: 5 additions & 0 deletions configs/dataset/processed/knl_patos.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
- /dataset/raw/knl_patos@_here_
- _self_

uri: mlflow-artifacts:/86/f690f64ded624da9a7150a7a92385aec/artifacts/dataset.csv
11 changes: 11 additions & 0 deletions configs/preprocessing/tissue_masks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# @package _global_

level: 3
max_concurrent: 64
artifact_path: tissue_masks

metadata:
run_name: "🎭 Tissue Masks: ${dataset.institution}"
description: Tissue masks for ${dataset.institution} at level ${level}
hyperparams:
level: ${level}
Empty file removed preprocessing/.gitkeep
Empty file.
4 changes: 2 additions & 2 deletions preprocessing/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def get_labels(folder_path: Path, labels: list[str]) -> pd.DataFrame:
return labels_df


def get_slides(folder_path: Path, pattern: re.Pattern) -> pd.DataFrame:
def get_slides(folder_path: Path, pattern: re.Pattern[str]) -> pd.DataFrame:
slides = []
for slide_path in folder_path.iterdir():
if not pattern.fullmatch(slide_path.name):
Expand All @@ -48,7 +48,7 @@ def get_slides(folder_path: Path, pattern: re.Pattern) -> pd.DataFrame:


def create_dataset(
folder: str, labels: list[str], institution: str, pattern: re.Pattern
folder: str, labels: list[str], institution: str, pattern: re.Pattern[str]
) -> tuple[pd.DataFrame, list[str], list[str]]:
folder_path = Path(folder)
labels_df = get_labels(folder_path, labels)
Expand Down
56 changes: 56 additions & 0 deletions preprocessing/tissue_masks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import cast

import hydra
import pandas as pd
import pyvips
import ray
from mlflow.artifacts import download_artifacts
from omegaconf import DictConfig
from openslide import OpenSlide
from rationai.masks import (
process_items,
slide_resolution,
tissue_mask,
write_big_tiff,
)
from rationai.mlkit import autolog, with_cli_args
from rationai.mlkit.lightning.loggers import MLFlowLogger


@ray.remote(memory=4 * 1024**3)
def process_slide(slide_path: str, level: int, output_path: Path) -> None:
with OpenSlide(slide_path) as slide:
mpp_x, mpp_y = slide_resolution(slide, level)

image = cast("pyvips.Image", pyvips.Image.new_from_file(slide_path, level=level))
mask = tissue_mask(image, mpp=(mpp_x + mpp_y) / 2)
mask_path = output_path / Path(slide_path).with_suffix(".tiff").name

write_big_tiff(mask, path=mask_path, mpp_x=mpp_x, mpp_y=mpp_y)


@with_cli_args(["+preprocessing=tissue_masks"])
@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None)
@autolog
def main(config: DictConfig, logger: MLFlowLogger) -> None:
dataset = pd.read_csv(download_artifacts(config.dataset.uri))

with TemporaryDirectory() as output_dir:
process_items(
dataset["path"],
process_item=process_slide,
fn_kwargs={
"level": config.level,
"output_path": Path(output_dir),
},
max_concurrent=config.max_concurrent,
)

logger.log_artifacts(local_dir=output_dir, artifact_path=config.artifact_path)


if __name__ == "__main__":
with ray.init(runtime_env={"excludes": [".git", ".venv"]}):
main()
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"pandas>=2.3.3",
"pyvips<3.1",
"rationai-mlkit",
"rationai-masks",
"ray>=2.52.1",
"torch>=2.9.0",
"torchmetrics>=1.8.2",
Expand All @@ -25,4 +26,5 @@ job = ["rationai-kube-jobs"]

[tool.uv.sources]
rationai-mlkit = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }
rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/masks.git" }
rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" }
2 changes: 1 addition & 1 deletion scripts/preprocessing/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"git clone https://github.com/RationAI/ulcerative-colitis.git workdir",
"cd workdir",
"uv sync --frozen",
"uv run -m preprocessing.create_dataset +data=raw/...",
"uv run -m preprocessing.create_dataset +dataset=raw/...",
],
storage=[storage.secure.DATA],
)
18 changes: 18 additions & 0 deletions scripts/preprocessing/tissue_masks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from kube_jobs import storage, submit_job


submit_job(
job_name="ulcerative-colitis-tissue-masks-...",
username=...,
public=False,
cpu=64,
memory="32Gi",
shm="16Gi",
script=[
"git clone https://github.com/RationAI/ulcerative-colitis.git workdir",
"cd workdir",
"uv sync --frozen",
"uv run --active -m preprocessing.tissue_masks +dataset=processed/...",
],
storage=[storage.secure.DATA],
)
2 changes: 2 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.