Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""CLI for applying a fitted LOT pipeline to an embedding zarr.

Usage
-----
viscy-dynaclr apply-lot-correction -c config.yaml

Transforms all cells through StandardScaler → PCA → LOT and writes a new
zarr whose ``.X`` contains the corrected embeddings (shape n_cells × n_pca).
All ``.obs`` metadata from the input zarr is preserved.

Example config (YAML)
---------------------
input_zarr: /path/to/lightsheet_organelle.zarr
pipeline: /path/to/lot_pipeline.pkl
output_zarr: /path/to/corrected_organelle.zarr
overwrite: false
"""

import logging
from pathlib import Path

import click
from pydantic import ValidationError

from viscy.representation.evaluation.lot_correction import (
apply_lot_correction,
load_lot_pipeline,
)
from viscy.representation.evaluation.lot_correction_config import LotApplyConfig
from viscy.utils.cli_utils import load_config

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")


@click.command(context_settings={"help_option_names": ["-h", "--help"]})
@click.option(
"-c",
"--config",
type=click.Path(exists=True, path_type=Path),
required=True,
help="Path to YAML configuration file.",
)
def main(config: Path):
"""Apply a fitted LOT pipeline to correct batch effects in an embedding zarr."""
click.echo("=" * 60)
click.echo("LOT BATCH CORRECTION — APPLY")
click.echo("=" * 60)

try:
config_dict = load_config(config)
apply_config = LotApplyConfig(**config_dict)
except ValidationError as e:
click.echo(f"\nConfiguration validation failed:\n{e}", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"\nFailed to load configuration: {e}", err=True)
raise click.Abort()

click.echo(f"\nConfiguration loaded: {config}")
click.echo(f" Input zarr: {apply_config.input_zarr}")
click.echo(f" Pipeline: {apply_config.pipeline}")
click.echo(f" Output zarr: {apply_config.output_zarr}")
click.echo(f" Overwrite: {apply_config.overwrite}")

try:
pipeline = load_lot_pipeline(apply_config.pipeline)
click.echo(
f"\nPipeline loaded — n_pca={pipeline['n_pca']}, "
f"PCA variance={pipeline.get('pca_variance_explained', float('nan')):.1f}%"
)
apply_lot_correction(
input_zarr=apply_config.input_zarr,
pipeline=pipeline,
output_zarr=apply_config.output_zarr,
overwrite=apply_config.overwrite,
)
click.echo(f"\nCorrected zarr written to: {apply_config.output_zarr}")
except Exception as e:
click.echo(f"\nApplication failed: {e}", err=True)
raise click.Abort()


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Example: apply fitted LOT pipeline to the light-sheet organelle zarr
# Output zarr will have corrected embeddings in target (confocal) PCA space
# Shape: (n_cells, n_pca=50) — all obs metadata preserved

input_zarr: /hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_22_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/predictions/DynaCLR-2D-BagOfChannels-timeaware/v3/timeaware_organelle_160patch_104ckpt.zarr
pipeline: /tmp/lot_test/lot_organelle_pipeline.pkl
output_zarr: /tmp/lot_test/lot_corrected_organelle.zarr
overwrite: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Example: fit LOT correction pipeline on G3BP1 organelle channel
# Source = light-sheet (LS1 + LS2 combined), Target = confocal 223-patch
#
# LS G3BP1 uninfected wells: C/1/
# Confocal G3BP1 uninfected: fov_name starts with "G3BP1/uninfected"

source_zarr: /hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_22_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/predictions/DynaCLR-2D-BagOfChannels-timeaware/v3/timeaware_organelle_160patch_104ckpt.zarr
target_zarr: /hpc/projects/intracellular_dashboard/organelle_box/2026_03_10_A549_strong_organelles_DENV_ZIKV_time_course/5-phenotyping/predictions/DynaCLR-2D-BagOfChannels-timeaware/v3/organelle_223patch_104ckpt.zarr

source_uninf_filter:
column: fov_name
startswith:
- "C/1/"

target_uninf_filter:
column: fov_name
startswith:
- "G3BP1/uninfected"

n_pca: 50
ns_lot: 3000
random_seed: 42

output_pipeline: /tmp/lot_test/lot_organelle_pipeline.pkl
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""CLI for fitting a LOT batch-correction pipeline on embedding zarrs.

Usage
-----
viscy-dynaclr fit-lot-correction -c config.yaml

The fitted pipeline (StandardScaler + PCA + LinearTransport) is saved to
the path specified by ``output_pipeline`` in the config file.

Example config (YAML)
---------------------
source_zarr: /path/to/lightsheet_organelle.zarr
target_zarr: /path/to/confocal_organelle.zarr
source_uninf_filter:
column: fov_name
startswith:
- "C/1/"
target_uninf_filter:
column: fov_name
startswith:
- "G3BP1/uninfected"
n_pca: 50
ns_lot: 3000
random_seed: 42
output_pipeline: /path/to/lot_pipeline.pkl
"""

import logging
from pathlib import Path

import click
from pydantic import ValidationError

from viscy.representation.evaluation.lot_correction import (
fit_lot_correction,
save_lot_pipeline,
)
from viscy.representation.evaluation.lot_correction_config import LotFitConfig
from viscy.utils.cli_utils import load_config

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")


@click.command(context_settings={"help_option_names": ["-h", "--help"]})
@click.option(
"-c",
"--config",
type=click.Path(exists=True, path_type=Path),
required=True,
help="Path to YAML configuration file.",
)
def main(config: Path):
"""Fit a LOT batch-correction pipeline on source and target embedding zarrs."""
click.echo("=" * 60)
click.echo("LOT BATCH CORRECTION — FIT")
click.echo("=" * 60)

try:
config_dict = load_config(config)
fit_config = LotFitConfig(**config_dict)
except ValidationError as e:
click.echo(f"\nConfiguration validation failed:\n{e}", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"\nFailed to load configuration: {e}", err=True)
raise click.Abort()

click.echo(f"\nConfiguration loaded: {config}")
click.echo(f" Source zarr: {fit_config.source_zarr}")
click.echo(f" Target zarr: {fit_config.target_zarr}")
click.echo(f" n_pca: {fit_config.n_pca}")
click.echo(f" ns_lot: {fit_config.ns_lot}")
click.echo(f" Random seed: {fit_config.random_seed}")
click.echo(f" Output: {fit_config.output_pipeline}")

try:
pipeline = fit_lot_correction(
source_zarr=fit_config.source_zarr,
target_zarr=fit_config.target_zarr,
source_uninf_filter=fit_config.source_uninf_filter.to_dict(),
target_uninf_filter=fit_config.target_uninf_filter.to_dict(),
n_pca=fit_config.n_pca,
ns_lot=fit_config.ns_lot,
random_seed=fit_config.random_seed,
)
click.echo(
f"\nPipeline fitted — PCA explained variance: "
f"{pipeline['pca_variance_explained']:.1f}%"
)
save_lot_pipeline(pipeline, fit_config.output_pipeline)
click.echo(f"Pipeline saved to: {fit_config.output_pipeline}")
except Exception as e:
click.echo(f"\nFitting failed: {e}", err=True)
raise click.Abort()


if __name__ == "__main__":
main()
Empty file.
132 changes: 132 additions & 0 deletions applications/DynaCLR/evaluation/mmd/compute_mmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""CLI for computing MMD² between two groups of cell embeddings.

Usage
-----
viscy-dynaclr compute-mmd -c config.yaml

The command compares two groups (A and B) defined by obs filters on one or
two AnnData zarrs. An optional ``group_by`` field splits the comparison into
per-group rows (e.g. per organelle per timepoint).

Example configs
---------------

**Biological signal** (ZIKV vs uninfected, same zarr, per organelle/timepoint):

zarr_a: /path/to/organelle_embeddings.zarr
filter_a:
column: condition
startswith: ["uninfected"]
filter_b:
column: condition
equals: "ZIKV"
group_by:
- organelle
- timepoint
use_pca: true
n_pca: 50
n_perm: 1000
max_cells: 2000
random_seed: 42
output_csv: mmd_results.csv

**Batch effect** (light-sheet vs confocal, two zarrs):

zarr_a: /path/to/lightsheet.zarr
zarr_b: /path/to/confocal.zarr
filter_a:
column: fov_name
startswith: ["C/1/"]
filter_b:
column: fov_name
startswith: ["G3BP1/uninfected"]
group_by: []
use_pca: true
n_pca: 50
n_perm: 0
max_cells: 2000
random_seed: 42
output_csv: batch_mmd.csv
"""

import logging
from pathlib import Path

import click
from pydantic import ValidationError

from viscy.representation.evaluation.mmd import compute_mmd
from viscy.representation.evaluation.mmd_config import ComputeMMDConfig
from viscy.utils.cli_utils import load_config

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")


@click.command(context_settings={"help_option_names": ["-h", "--help"]})
@click.option(
"-c",
"--config",
type=click.Path(exists=True, path_type=Path),
required=True,
help="Path to YAML configuration file.",
)
def main(config: Path):
"""Compute MMD² between two groups of cell embeddings from AnnData zarrs."""
click.echo("=" * 60)
click.echo("MMD COMPUTATION")
click.echo("=" * 60)

try:
config_dict = load_config(config)
mmd_config = ComputeMMDConfig(**config_dict)
except ValidationError as e:
click.echo(f"\nConfiguration validation failed:\n{e}", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"\nFailed to load configuration: {e}", err=True)
raise click.Abort()

click.echo(f"\nConfiguration loaded: {config}")
click.echo(f" Zarr A: {mmd_config.zarr_a}")
click.echo(f" Zarr B: {mmd_config.zarr_b or '(same as A)'}")
click.echo(f" Filter A: {mmd_config.filter_a}")
click.echo(f" Filter B: {mmd_config.filter_b}")
click.echo(f" Group by: {mmd_config.group_by or '(none — single overall)'}")
click.echo(f" PCA: {'yes, n=' + str(mmd_config.n_pca) if mmd_config.use_pca else 'no'}")
click.echo(f" n_perm: {mmd_config.n_perm or 'skipped'}")
click.echo(f" max_cells: {mmd_config.max_cells}")
click.echo(f" Output: {mmd_config.output_csv}")

try:
results = compute_mmd(
zarr_a=mmd_config.zarr_a,
zarr_b=mmd_config.zarr_b,
filter_a=mmd_config.filter_a.to_dict() if mmd_config.filter_a else None,
filter_b=mmd_config.filter_b.to_dict() if mmd_config.filter_b else None,
group_by=mmd_config.group_by or None,
use_pca=mmd_config.use_pca,
n_pca=mmd_config.n_pca,
n_perm=mmd_config.n_perm,
max_cells=mmd_config.max_cells,
random_seed=mmd_config.random_seed,
)

if results.empty:
click.echo("\nNo results computed — check filters and group_by columns.")
raise click.Abort()

output_path = Path(mmd_config.output_csv)
output_path.parent.mkdir(parents=True, exist_ok=True)
results.to_csv(output_path, index=False)
click.echo(f"\nResults ({len(results)} rows) written to: {output_path}")
click.echo("\n" + results.to_string(index=False))

except click.Abort:
raise
except Exception as e:
click.echo(f"\nMMD computation failed: {e}", err=True)
raise click.Abort()


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Example: batch effect — light-sheet vs confocal (two different zarrs)
# Uninfected G3BP1 cells only, no grouping (single overall MMD)

zarr_a: /hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_22_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/predictions/DynaCLR-2D-BagOfChannels-timeaware/v3/timeaware_organelle_160patch_104ckpt.zarr
zarr_b: /hpc/projects/intracellular_dashboard/organelle_box/2026_03_10_A549_strong_organelles_DENV_ZIKV_time_course/5-phenotyping/predictions/DynaCLR-2D-BagOfChannels-timeaware/v3/organelle_223patch_104ckpt.zarr

filter_a:
column: fov_name
startswith:
- "C/1/"

filter_b:
column: fov_name
startswith:
- "G3BP1/uninfected"

group_by: [] # single overall comparison

use_pca: true
n_pca: 50
n_perm: 0
max_cells: 2000
random_seed: 42

output_csv: /tmp/mmd_test/mmd_batch_effect.csv
Loading
Loading