Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ strict = True
ignore_missing_imports = True
disallow_untyped_calls = False
disable_error_code = no-any-return
exclude = ^(scripts|project_name)/
exclude = ^(scripts|project_name)/
2 changes: 1 addition & 1 deletion configs/dataset/processed/ftn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ defaults:
- /dataset/raw/ftn@_here_
- _self_

uri: mlflow-artifacts:/86/d2b2f1835fc647e2ba3639ce606f4768/artifacts/dataset.csv
uri: mlflow-artifacts:/86/d2b2f1835fc647e2ba3639ce606f4768/artifacts/dataset.csv
2 changes: 1 addition & 1 deletion configs/dataset/processed/ikem.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ defaults:
- /dataset/raw/ikem@_here_
- _self_

uri: mlflow-artifacts:/86/7c6e7cc142494d45b6513185318d4462/artifacts/dataset.csv
uri: mlflow-artifacts:/86/7c6e7cc142494d45b6513185318d4462/artifacts/dataset.csv
2 changes: 1 addition & 1 deletion configs/dataset/processed/knl_patos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ defaults:
- /dataset/raw/knl_patos@_here_
- _self_

uri: mlflow-artifacts:/86/f690f64ded624da9a7150a7a92385aec/artifacts/dataset.csv
uri: mlflow-artifacts:/86/f690f64ded624da9a7150a7a92385aec/artifacts/dataset.csv
2 changes: 1 addition & 1 deletion configs/dataset/raw/ftn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ folder: /mnt/data/FTN/colon/IBD_AI
regex_pattern: ^[0-9]{1,6}_2[0-5]\.czi$
labels:
- IBD_AI_FTN.xlsx
- IBD_AI_FTN_doplnek.xlsx
- IBD_AI_FTN_doplnek.xlsx
2 changes: 1 addition & 1 deletion configs/dataset/raw/ikem.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ regex_pattern: ^[0-9]{1,5}_2[1-4]_HE(?:_0[1-6])?\.czi$
labels:
- Fab_IBD_AI_12_2024.csv
- IBD_AI_2.xlsx
- missing.xlsx
- missing.xlsx
2 changes: 1 addition & 1 deletion configs/dataset/raw/knl_patos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ labels:
- IBD_AI_Liberec.xlsx
- IBD_AI_Liberec_02.xlsx
- IBD_AI_Liberec_10_2025.xlsx
- IBD_AI_Liberec_28_10_2025.xlsx
- IBD_AI_Liberec_28_10_2025.xlsx
20 changes: 20 additions & 0 deletions configs/preprocessing/quality_control.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

output_dir: ${project_dir}/quality_control/${dataset.institution}

request_timeout: 18000
max_concurrent: 5

qc_parameters:
mask_level: 3
sample_level: 1
check_residual: True
check_folding: False
check_focus: True
wb_correction: True


metadata:
run_name: "🎭 QC Masks: ${dataset.institution}"
description: Quality control masks for ${dataset.institution} institution
hyperparams: ${qc_parameters}
2 changes: 1 addition & 1 deletion configs/preprocessing/tissue_masks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ metadata:
run_name: "🎭 Tissue Masks: ${dataset.institution}"
description: Tissue masks for ${dataset.institution} at level ${level}
hyperparams:
level: ${level}
level: ${level}
122 changes: 122 additions & 0 deletions preprocessing/quality_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# credits: https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/lymph-nodes/-/blob/develop/preprocessing/qc.py?ref_type=heads

import asyncio
from collections.abc import Generator
from pathlib import Path
from typing import TypedDict

import hydra
import mlflow.artifacts
import pandas as pd
import rationai
from omegaconf import DictConfig
from rationai.mlkit import autolog, with_cli_args
from rationai.mlkit.lightning.loggers import MLFlowLogger
from rationai.types import SlideCheckConfig
from tqdm.asyncio import tqdm


class QCParameters(TypedDict):
mask_level: int
sample_level: int
check_residual: bool
check_folding: bool
check_focus: bool
wb_correction: bool


def get_qc_masks(qc_parameters: QCParameters) -> Generator[tuple[str, str], None, None]:
if qc_parameters["check_focus"]:
yield ("Piqe_focus_score_piqe_median", "blur_per_tile")
yield ("Piqe_piqe_median_activity_mask", "blur_per_pixel")

if qc_parameters["check_residual"]:
yield ("ResidualArtifactsAndCoverage_cov_percent_heatmap", "artifacts_per_tile")
yield ("ResidualArtifactsAndCoverage_coverage_mask", "artifacts_per_pixel")

if qc_parameters["check_folding"]:
yield ("FoldingFunction_folding_test", "folds_per_pixel")


def organize_masks(output_path: Path, subdir: str, mask_prefix: str) -> None:
prefix_dir = output_path / subdir
prefix_dir.mkdir(parents=True, exist_ok=True)

# Glob has to be wrapped in list, because we're modifying the directory!!!
for file in list(output_path.glob(f"{mask_prefix}_*.tiff")):
slide_name = file.name.replace(f"{mask_prefix}_", "")
destination = prefix_dir / slide_name
file.rename(destination)


async def qc_main(
output_path: Path,
slides: list[str],
logger: MLFlowLogger,
request_timeout: int,
max_concurrent: int,
qc_parameters: QCParameters,
) -> None:
async with rationai.AsyncClient() as client: # type: ignore[attr-defined]
async for result in tqdm(
client.qc.check_slides(
slides,
output_path,
config=SlideCheckConfig(**qc_parameters),
timeout=request_timeout,
max_concurrent=max_concurrent,
),
total=len(slides),
):
if not result.success:
with open(output_path / "qc_errors.log", "a") as log_file:
log_file.write(
f"Failed to process {result.wsi_path}: {result.error}\n"
)

# Organize generated masks into subdirectories
for prefix, artifact_name in get_qc_masks(qc_parameters):
organize_masks(Path(output_path), artifact_name, prefix)

# Merge generated csv files
csvs = list(Path(output_path).glob("*.csv"))
pd.concat([pd.read_csv(f) for f in csvs]).to_csv(
Path(output_path, "qc_metrics.csv"), index=False
)

# Remove individual csv files
for f in csvs:
f.unlink()

logger.log_artifacts(local_dir=str(output_path))


def download_dataset(uri: str) -> pd.DataFrame:
path = mlflow.artifacts.download_artifacts(artifact_uri=uri)
df = pd.read_csv(path)
return df


@with_cli_args(["+preprocessing=quality_control"])
@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None)
@autolog
def main(config: DictConfig, logger: MLFlowLogger) -> None:
df = download_dataset(config.dataset.uri)

output_path = Path(config.output_dir)
output_path.mkdir(parents=True, exist_ok=True)

asyncio.run(
qc_main(
output_path=output_path,
slides=df["path"].to_list(),
logger=logger,
request_timeout=config.request_timeout,
max_concurrent=config.max_concurrent,
qc_parameters=config.qc_parameters,
)
)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"openpyxl>=3.1.5",
"pandas>=2.3.3",
"pyvips<3.1",
"rationai-sdk",
"rationai-mlkit",
"rationai-masks",
"ray>=2.52.1",
Expand All @@ -28,3 +29,4 @@ job = ["rationai-kube-jobs"]
rationai-mlkit = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }
rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/masks.git" }
rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" }
rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }
17 changes: 17 additions & 0 deletions scripts/preprocessing/quality_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from kube_jobs import storage, submit_job


submit_job(
job_name="ulcerative-colitis-quality-control-...",
username=...,
public=False,
cpu=2,
memory="4Gi",
script=[
"git clone https://github.com/RationAI/ulcerative-colitis.git workdir",
"cd workdir",
"uv sync --frozen",
"uv run -m preprocessing.quality_control +dataset=processed/...",
],
storage=[storage.secure.DATA],
)
67 changes: 67 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.