diff --git a/.gitignore b/.gitignore
index 2797057..920b43e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -131,3 +131,4 @@ job_sub/datasets/**/*_local.*
 # Local-only analysis / figure-generation scripts (notebooks, sensitivity-analysis
 # helpers, etc.). Anything dropped under this folder stays out of git.
 job_sub/utils/analysis/
+utils/
diff --git a/job_sub/conf/config.yaml b/job_sub/conf/config.yaml
index 6bf5800..9a7d157 100755
--- a/job_sub/conf/config.yaml
+++ b/job_sub/conf/config.yaml
@@ -22,7 +22,7 @@ subset_ids_path: ${dataset_field:${datasets_file},${dataset_index},subset_ids_pa
 results_root_dir: /storage2/wangzitongLab/share/deepdraw_opt/jerry
 
 # number of random seeds to run for each active learning experiment
-num_seeds_per_job: 30
+num_seeds_per_job: 20
 seed_start: 0
 parallelize_seeds: true
 
@@ -30,7 +30,7 @@ parallelize_seeds: true
 al_settings:
   batch_size: 12
   starting_batch_size: 12
-  max_rounds: 29 # not including the initial selection
+  max_rounds: 19 # not including the initial selection
   feature_transforms: ${feature_transforms}
   target_transforms: ${target_transforms}
   output_dir: ${hydra:runtime.output_dir}
@@ -47,14 +47,14 @@ hydra:
     subdir: ${dataset_name}/${override_values:${hydra.job.override_dirname},dataset_index|single_array_across_datasets|al_settings.seed,default}
   sweeper: # multirun mode sweeps over these parameters
     params:
-      initial_selection_strategy: probcover_euclidean, core_set, random
-      embedding_model: gLM2_166k_kneedle
-      query_strategy: botorch_qlog_nei, topk, botorch_q_ucb
+      initial_selection_strategy: kmedoids
+      embedding_model: 166k_alphagenome_1bp_embeddingkneedle
+      query_strategy: botorch_mes
       predictor: botorch_gp
   launcher:
-    timeout_min: 720
+    timeout_min: 1440
     # everything below is used only for submitting jobs to the cluster
-    partition: intel-sc3,wzt_20250411 # no spacing between names
+    partition: intel-sc3-32c,amd-ep5 # no spacing between names
     cpus_per_task: 1
     qos: huge
     mem_per_cpu: 30GB
diff --git a/utils/__init__.py b/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/utils/add_n_top.py b/utils/add_n_top.py
deleted file mode 100644
index c018e83..0000000
--- a/utils/add_n_top.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python3
-"""
-Add n_top to results.csv files by recomputing top_p counts.
-
-Example:
-  python utils/add_n_top.py job_sub/multirun/2025-12-19
-  python utils/add_n_top.py job_sub/multirun/2025-12-19 --top-p 0.01 --column-name n_top_1e2 --overwrite
-"""
-
-from __future__ import annotations
-
-import argparse
-import ast
-import csv
-import json
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-import pandas as pd
-import yaml
-from tqdm import tqdm
-
-DEFAULT_LABEL_KEY = "Fold Change (Induced/Basal)"
-DEFAULT_DATASETS_YAML = (
-    Path(__file__).resolve().parents[1] / "job_sub" / "datasets" / "datasets.yaml"
-)
-
-
-def _parse_selected_ids(value: Any) -> list[Any]:
-    if value is None:
-        return []
-    if isinstance(value, list):
-        return value
-    text = str(value).strip()
-    if not text:
-        return []
-    try:
-        parsed = ast.literal_eval(text)
-    except (ValueError, SyntaxError):
-        return [item for item in text.split(",") if item.strip()]
-    if isinstance(parsed, list):
-        return parsed
-    return [parsed]
-
-
-def _normalize_id(value: Any) -> str:
-    if isinstance(value, int | np.integer):
-        return str(int(value))
-    text = str(value).strip()
-    if not text:
-        return ""
-    try:
-        return str(int(text))
-    except ValueError:
-        try:
-            return str(int(float(text)))
-        except ValueError:
-            return text
-
-
-def _load_yaml(path: Path) -> dict[str, Any]:
-    if not path.exists():
-        return {}
-    return yaml.safe_load(path.read_text()) or {}
-
-
-def _resolve_path(raw: Any, base_dir: Path) -> Path | None:
-    if raw in (None, "", "null"):
-        return None
-    path = Path(str(raw)).expanduser()
-    if path.is_absolute():
-        return path
-    return (base_dir / path).resolve()
-
-
-def _load_subset_ids(path: Path) -> np.ndarray:
-    subset_ids = []
-    for line in path.read_text().splitlines():
-        text = line.strip()
-        if not text:
-            continue
-        try:
-            subset_ids.append(int(text))
-        except ValueError as exc:
-            raise ValueError(
-                f"Invalid sample id '{text}' in subset file {path}"
-            ) from exc
-    if not subset_ids:
-        raise ValueError(f"Subset ids file {path} did not contain any sample ids.")
-    return np.asarray(subset_ids, dtype=np.int64)
-
-
-def _load_sample_ids(embeddings_path: Path) -> np.ndarray:
-    data = np.load(embeddings_path, allow_pickle=True)
-    if "ids" not in data:
-        raise ValueError(
-            f"'ids' array not found in {embeddings_path}. Available keys: {list(data.keys())}"
-        )
-    return data["ids"].astype(np.int64)
-
-
-def _load_labels(
-    metadata_path: Path, label_key: str, sample_ids: np.ndarray
-) -> np.ndarray:
-    df = pd.read_csv(metadata_path, usecols=[label_key])
-    df = df.iloc[sample_ids]
-    return df[label_key].to_numpy()
-
-
-def _compute_top_id_set(
-    embeddings_path: Path,
-    metadata_path: Path,
-    label_key: str,
-    subset_ids_path: Path | None,
-    top_p: float,
-) -> set[str]:
-    sample_ids = _load_sample_ids(embeddings_path)
-    if subset_ids_path is not None:
-        subset_ids = _load_subset_ids(subset_ids_path)
-        mask = np.isin(sample_ids, subset_ids)
-        if not np.any(mask):
-            raise ValueError(
-                "Subset id filtering removed all samples. "
-                "Ensure the subset ids match those stored in the embeddings file."
-            )
-        sample_ids = sample_ids[mask]
-
-    labels = _load_labels(metadata_path, label_key, sample_ids)
-    sorted_indices = np.argsort(labels)
-    num_top = max(1, int(len(labels) * top_p))
-    top_indices = sorted_indices[-num_top:]
-    return {_normalize_id(item) for item in sample_ids[top_indices]}
-
-
-def _load_summary(path: Path) -> dict[str, Any]:
-    if not path.exists():
-        raise FileNotFoundError(f"summary.json not found: {path}")
-    return json.loads(path.read_text())
-
-
-def _extract_override_value(overrides: list[Any], key: str) -> str | None:
-    for entry in overrides:
-        text = str(entry).strip()
-        if not text:
-            continue
-        # Strip + prefix used by Hydra for adding new keys
-        if text.startswith("+"):
-            text = text[1:].strip()
-        if "=" in text:
-            candidate_key, value = text.split("=", 1)
-            if candidate_key.strip() == key:
-                return value.strip()
-        if text.startswith(f"{key}:"):
-            return text.split(":", 1)[1].strip()
-    return None
-
-
-def _resolve_embedding_model(summary: dict[str, Any]) -> str | None:
-    model = str(summary.get("embedding_model", "")).strip()
-    if model and model.lower() != "none":
-        return model
-    overrides = summary.get("hydra_overrides") or []
-    override_value = _extract_override_value(overrides, "embedding_model")
-    if override_value:
-        return override_value
-    return None
-
-
-def _load_dataset_map(datasets_yaml_path: Path) -> dict[str, dict[str, Path | None]]:
-    if not datasets_yaml_path.exists():
-        raise FileNotFoundError(f"Datasets YAML not found: {datasets_yaml_path}")
-    payload = _load_yaml(datasets_yaml_path)
-    datasets = payload.get("datasets") or []
-    if not datasets:
-        raise ValueError(f"No datasets found in {datasets_yaml_path}")
-
-    base_dir = datasets_yaml_path.parent
-    dataset_map: dict[str, dict[str, Path | None]] = {}
-    for entry in datasets:
-        name = str(entry.get("name", "")).strip()
-        if not name:
-            raise ValueError(f"Dataset entry missing name in {datasets_yaml_path}")
-        metadata_raw = str(entry.get("metadata_path", "")).strip()
-        if not metadata_raw:
-            raise ValueError(
-                f"Dataset '{name}' missing metadata_path in {datasets_yaml_path}"
-            )
-        embedding_raw = str(entry.get("embedding_dir", "")).strip()
-        if not embedding_raw:
-            raise ValueError(
-                f"Dataset '{name}' missing embedding_dir in {datasets_yaml_path}"
-            )
-        subset_raw = entry.get("subset_ids_path")
-
-        dataset_map[name] = {
-            "metadata_path": _resolve_path(metadata_raw, base_dir),
-            "embedding_dir": _resolve_path(embedding_raw, base_dir),
-            "subset_ids_path": _resolve_path(subset_raw, base_dir)
-            if subset_raw
-            else None,
-        }
-
-    return dataset_map
-
-
-def _load_rows(path: Path) -> tuple[list[dict[str, Any]], list[str]]:
-    with path.open(newline="") as handle:
-        reader = csv.DictReader(handle)
-        rows = [dict(row) for row in reader]
-        fieldnames = list(reader.fieldnames or [])
-    return rows, fieldnames
-
-
-def _write_rows(path: Path, rows: list[dict[str, Any]], fieldnames: list[str]) -> None:
-    tmp_path = path.with_suffix(".tmp")
-    with tmp_path.open("w", newline="") as handle:
-        writer = csv.DictWriter(handle, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(rows)
-    tmp_path.replace(path)
-
-
-def _update_results_csv(
-    results_path: Path,
-    top_ids: set[str],
-    column_name: str,
-    overwrite: bool,
-) -> bool:
-    rows, fieldnames = _load_rows(results_path)
-    if not rows:
-        return False
-    if "selected_sample_ids" not in fieldnames:
-        raise ValueError(f"'selected_sample_ids' column missing in {results_path}")
-    if column_name in fieldnames and not overwrite:
-        return False
-
-    for row in rows:
-        selected_ids = _parse_selected_ids(row.get("selected_sample_ids"))
-        normalized = {_normalize_id(item) for item in selected_ids}
-        row[column_name] = str(sum(1 for item in normalized if item in top_ids))
-
-    if column_name not in fieldnames:
-        fieldnames.append(column_name)
-    _write_rows(results_path, rows, fieldnames)
-    return True
-
-
-def _iter_results(root: Path) -> list[Path]:
-    return [path for path in root.rglob("results.csv") if path.is_file()]
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description="Add n_top to results.csv files by recomputing top_p."
-    )
-    parser.add_argument(
-        "root_dir",
-        type=Path,
-        help="Directory containing results.csv files (e.g. job_sub/multirun/2025-12-19).",
-    )
-    parser.add_argument(
-        "--datasets-yaml",
-        type=Path,
-        default=DEFAULT_DATASETS_YAML,
-        help="Path to datasets.yaml (default: job_sub/datasets/datasets.yaml).",
-    )
-    parser.add_argument(
-        "--top-p",
-        type=float,
-        default=0.01,
-        help="Top percentage used to recompute n_top (default: 0.01).",
-    )
-    parser.add_argument(
-        "--column-name",
-        type=str,
-        default="n_top_1e2",
-        help="Column name to write (default: n_top_1e2).",
-    )
-    parser.add_argument(
-        "--overwrite",
-        action="store_true",
-        help="Overwrite the column if it already exists.",
-    )
-    parser.add_argument(
-        "--label-key",
-        type=str,
-        default=DEFAULT_LABEL_KEY,
-        help="Label column in the metadata CSV.",
-    )
-    args = parser.parse_args()
-
-    if not 0.0 < args.top_p <= 1.0:
-        raise SystemExit("top_p must be between 0 and 1.")
-
-    root_dir = args.root_dir
-    if not root_dir.exists():
-        raise SystemExit(f"Root dir not found: {root_dir}")
-
-    dataset_map = _load_dataset_map(args.datasets_yaml)
-
-    results_paths = _iter_results(root_dir)
-    if not results_paths:
-        raise SystemExit(f"No results.csv found under {root_dir}")
-
-    cache: dict[tuple[Path, Path, Path | None, str, float], set[str]] = {}
-    updated = 0
-    skipped = 0
-    for results_path in tqdm(results_paths, desc="Processing runs", unit="run"):
-        run_dir = results_path.parent
-        try:
-            summary = _load_summary(run_dir / "summary.json")
-            dataset_name = str(summary.get("dataset_name", "")).strip()
-            if not dataset_name:
-                raise ValueError("dataset_name missing in summary.json")
-            dataset_spec = dataset_map.get(dataset_name)
-            if dataset_spec is None:
-                raise ValueError(
-                    f"Dataset '{dataset_name}' not found in {args.datasets_yaml}"
-                )
-            embedding_model = _resolve_embedding_model(summary)
-            if not embedding_model:
-                raise ValueError(
-                    f"embedding_model missing for dataset '{dataset_name}'"
-                )
-            embedding_dir = dataset_spec.get("embedding_dir")
-            if embedding_dir is None:
-                raise ValueError(f"embedding_dir missing for dataset '{dataset_name}'")
-            embedding_file = (
-                embedding_model
-                if embedding_model.endswith(".npz")
-                else f"{embedding_model}.npz"
-            )
-            embeddings_path = Path(embedding_dir) / embedding_file
-            metadata_path = dataset_spec.get("metadata_path")
-            if metadata_path is None:
-                raise ValueError(f"metadata_path missing for dataset '{dataset_name}'")
-            subset_ids_path = dataset_spec.get("subset_ids_path")
-
-            cache_key = (
-                embeddings_path,
-                Path(metadata_path),
-                Path(subset_ids_path) if subset_ids_path else None,
-                args.label_key,
-                args.top_p,
-            )
-            top_ids = cache.get(cache_key)
-            if top_ids is None:
-                top_ids = _compute_top_id_set(
-                    embeddings_path=embeddings_path,
-                    metadata_path=metadata_path,
-                    label_key=args.label_key,
-                    subset_ids_path=subset_ids_path,
-                    top_p=args.top_p,
-                )
-                cache[cache_key] = top_ids
-
-            changed = _update_results_csv(
-                results_path=results_path,
-                top_ids=top_ids,
-                column_name=args.column_name,
-                overwrite=args.overwrite,
-            )
-            if changed:
-                updated += 1
-            else:
-                skipped += 1
-        except Exception as exc:
-            tqdm.write(f"Skipping {results_path}: {exc}")
-            skipped += 1
-
-    tqdm.write(f"Updated {updated} runs, skipped {skipped} runs.")
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/utils/baseline_scores.py b/utils/baseline_scores.py
deleted file mode 100644
index 911d71e..0000000
--- a/utils/baseline_scores.py
+++ /dev/null
@@ -1,340 +0,0 @@
-"""
-Compute random baseline summary metrics across datasets, reported per round.
-
-Example:
-    python utils/baseline_scores.py \\
-        --datasets-yaml job_sub/datasets/datasets.yaml \\
-        --output-csv results/baseline_scores.csv \\
-        --num-experiments 1000 \\
-        --num-rounds 10 \\
-        --num-samples-per-round 12
-"""
-
-from __future__ import annotations
-
-import argparse
-from dataclasses import dataclass
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-import yaml
-from tqdm import tqdm
-
-DEFAULT_DATASETS_YAML = (
-    Path(__file__).resolve().parents[1] / "job_sub" / "datasets" / "datasets.yaml"
-)
-DEFAULT_LABEL_KEY = "Fold Change (Induced/Basal)"
-DEFAULT_OUTPUT_CSV = (
-    Path(__file__).resolve().parents[1] / "results" / "baseline_scores.csv"
-)
-
-
-@dataclass(frozen=True)
-class DatasetSpec:
-    name: str
-    metadata_path: Path
-    subset_ids_path: Path | None = None
-
-
-def load_dataset_specs(dataset_yaml_path: Path) -> list[DatasetSpec]:
-    if not dataset_yaml_path.exists():
-        raise FileNotFoundError(f"Dataset YAML not found: {dataset_yaml_path}")
-
-    with dataset_yaml_path.open("r") as handle:
-        payload = yaml.safe_load(handle) or {}
-
-    datasets = payload.get("datasets") or []
-    if not datasets:
-        raise ValueError(f"No datasets found in {dataset_yaml_path}")
-
-    specs: list[DatasetSpec] = []
-    for entry in datasets:
-        name = str(entry.get("name", "")).strip()
-        if not name:
-            raise ValueError(f"Dataset entry missing name in {dataset_yaml_path}")
-
-        metadata_raw = str(entry.get("metadata_path", "")).strip()
-        if not metadata_raw:
-            raise ValueError(
-                f"Dataset '{name}' missing metadata_path in {dataset_yaml_path}"
-            )
-        metadata_path = Path(metadata_raw).expanduser()
-        if not metadata_path.is_absolute():
-            metadata_path = (dataset_yaml_path.parent / metadata_path).resolve()
-
-        subset_raw = entry.get("subset_ids_path")
-        subset_ids_path = None
-        if subset_raw:
-            subset_ids_path = Path(str(subset_raw)).expanduser()
-            if not subset_ids_path.is_absolute():
-                subset_ids_path = (dataset_yaml_path.parent / subset_ids_path).resolve()
-
-        specs.append(
-            DatasetSpec(
-                name=name,
-                metadata_path=metadata_path,
-                subset_ids_path=subset_ids_path,
-            )
-        )
-
-    return specs
-
-
-def load_subset_ids(subset_ids_path: Path) -> np.ndarray:
-    subset_ids = []
-    for line in subset_ids_path.read_text().splitlines():
-        text = line.strip()
-        if not text:
-            continue
-        try:
-            subset_ids.append(int(text))
-        except ValueError as exc:
-            raise ValueError(
-                f"Invalid sample id '{text}' in subset file {subset_ids_path}"
-            ) from exc
-    if not subset_ids:
-        raise ValueError(f"Subset ids file {subset_ids_path} did not contain any ids.")
-    return np.asarray(subset_ids, dtype=np.int64)
-
-
-def load_label_array(
-    metadata_path: Path,
-    label_key: str,
-    label_cache: dict[tuple[Path, str], np.ndarray],
-) -> np.ndarray:
-    cache_key = (metadata_path, label_key)
-    if cache_key in label_cache:
-        return label_cache[cache_key]
-
-    try:
-        df = pd.read_csv(metadata_path, usecols=[label_key])
-    except ValueError as exc:
-        raise ValueError(
-            f"Label key '{label_key}' not found in {metadata_path}"
-        ) from exc
-
-    series = pd.to_numeric(df[label_key], errors="coerce")
-    label_cache[cache_key] = series.to_numpy()
-    return label_cache[cache_key]
-
-
-def load_labels(
-    dataset: DatasetSpec,
-    label_key: str,
-    label_cache: dict[tuple[Path, str], np.ndarray],
-    subset_cache: dict[Path, np.ndarray],
-) -> tuple[np.ndarray, np.ndarray]:
-    labels = load_label_array(dataset.metadata_path, label_key, label_cache)
-    sample_ids = np.arange(len(labels), dtype=np.int64)
-
-    if dataset.subset_ids_path is not None:
-        subset_ids_path = dataset.subset_ids_path
-        if subset_ids_path not in subset_cache:
-            subset_cache[subset_ids_path] = load_subset_ids(subset_ids_path)
-        subset_ids = subset_cache[subset_ids_path]
-        if np.any(subset_ids < 0) or subset_ids.max() >= len(labels):
-            raise ValueError(
-                f"Subset ids in {subset_ids_path} are out of bounds for "
-                f"{dataset.metadata_path} (len={len(labels)})"
-            )
-        labels = labels[subset_ids]
-        sample_ids = subset_ids
-
-    finite_mask = np.isfinite(labels)
-    labels = labels[finite_mask]
-    sample_ids = sample_ids[finite_mask]
-    if labels.size == 0:
-        raise ValueError(
-            f"No finite labels found for dataset '{dataset.name}' after filtering."
-        )
-    return labels, sample_ids
-
-
-def build_top_mask(labels: np.ndarray, top_p: float) -> np.ndarray:
-    num_top = max(1, int(len(labels) * top_p))
-    if num_top >= len(labels):
-        return np.ones(len(labels), dtype=bool)
-    top_indices = np.argsort(labels)[-num_top:]
-    top_mask = np.zeros(len(labels), dtype=bool)
-    top_mask[top_indices] = True
-    return top_mask
-
-
-def draw_random_rounds(
-    num_samples: int,
-    num_rounds: int,
-    num_samples_per_round: int,
-    rng: np.random.Generator,
-) -> np.ndarray:
-    if num_rounds <= 0 or num_samples_per_round <= 0:
-        raise ValueError("num_rounds and num_samples_per_round must be > 0.")
-    total_samples = num_rounds * num_samples_per_round
-    if total_samples > num_samples:
-        raise ValueError(
-            "Cannot sample without replacement: requested samples exceed dataset size."
-        )
-    selections = rng.choice(num_samples, size=total_samples, replace=False)
-    return selections.reshape(num_rounds, num_samples_per_round)
-
-
-def compute_random_summary_metrics_history(
-    labels: np.ndarray,
-    top_mask: np.ndarray,
-    max_label: float,
-    num_rounds: int,
-    num_samples_per_round: int,
-    seed: int,
-) -> list[dict[str, float]]:
-    rng = np.random.default_rng(seed)
-    rounds = draw_random_rounds(
-        num_samples=len(labels),
-        num_rounds=num_rounds,
-        num_samples_per_round=num_samples_per_round,
-        rng=rng,
-    )
-    round_labels = labels[rounds]
-    normalized_true = round_labels.max(axis=1) / max_label
-    n_top = top_mask[rounds].sum(axis=1).astype(np.float64)
-    cumulative_max = np.maximum.accumulate(normalized_true)
-    cumulative_max_sum = np.cumsum(cumulative_max)
-    cumulative_n_top = np.cumsum(n_top)
-    cumulative_n_top_sum = np.cumsum(cumulative_n_top)
-    selected_per_round = np.full(num_rounds, num_samples_per_round, dtype=np.float64)
-    cumulative_selected = np.cumsum(selected_per_round)
-    cumulative_selected_sum = np.cumsum(cumulative_selected)
-    top_hits = np.where(n_top >= 1)[0]
-    if top_hits.size:
-        first_hit = float(top_hits[0] + 1)
-        rounds_to_top_history = np.full(num_rounds, first_hit, dtype=np.float64)
-        rounds_to_top_history[: int(first_hit) - 1] = np.nan
-    else:
-        rounds_to_top_history = np.full(num_rounds, np.nan, dtype=np.float64)
-
-    history: list[dict[str, float]] = []
-    for idx in range(num_rounds):
-        prefix_len = idx + 1
-        denom = float(cumulative_selected_sum[idx])
-        history.append(
-            {
-                "round": idx,
-                "auc_true": float(cumulative_max_sum[idx] / prefix_len),
-                "avg_top": float(cumulative_n_top_sum[idx] / denom) if denom else 0.0,
-                "rounds_to_top": float(rounds_to_top_history[idx]),
-                "overall_true": float(cumulative_max[idx]),
-                "max_train_spearman": float("nan"),
-                "max_extreme_value_auc": float("nan"),
-            }
-        )
-    return history
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Compute random baseline metrics for each dataset in a YAML file."
-    )
-    parser.add_argument(
-        "--datasets-yaml",
-        default=str(DEFAULT_DATASETS_YAML),
-        help="Path to the datasets YAML file.",
-    )
-    parser.add_argument(
-        "--output-csv",
-        default=str(DEFAULT_OUTPUT_CSV),
-        help="Path to save aggregated results as CSV.",
-    )
-    parser.add_argument(
-        "--label-key",
-        default=DEFAULT_LABEL_KEY,
-        help="Column name in the metadata CSV containing target labels.",
-    )
-    parser.add_argument("--num-experiments", type=int, default=10000)
-    parser.add_argument("--num-rounds", type=int, default=10)
-    parser.add_argument("--num-samples-per-round", type=int, default=12)
-    parser.add_argument(
-        "--top-p",
-        type=float,
-        default=0.01,
-        help="Top percentage used for avg_top (matches active learning defaults).",
-    )
-    parser.add_argument(
-        "--dataset",
-        action="append",
-        default=[],
-        help="Dataset name to include (can be repeated).",
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    args = parse_args()
-    dataset_yaml_path = Path(args.datasets_yaml).expanduser()
-    output_csv = Path(args.output_csv).expanduser()
-
-    if args.num_experiments <= 0:
-        raise ValueError("num_experiments must be > 0.")
-    if not 0.0 < args.top_p <= 1.0:
-        raise ValueError("top_p must be between 0 and 1.")
-
-    datasets = load_dataset_specs(dataset_yaml_path)
-    if args.dataset:
-        requested = set(args.dataset)
-        datasets = [dataset for dataset in datasets if dataset.name in requested]
-        missing = requested - {dataset.name for dataset in datasets}
-        if missing:
-            raise ValueError(
-                f"Requested datasets not found in {dataset_yaml_path}: {sorted(missing)}"
-            )
-
-    label_cache: dict[tuple[Path, str], np.ndarray] = {}
-    subset_cache: dict[Path, np.ndarray] = {}
-    experiment_seeds = np.arange(args.num_experiments, dtype=np.int64)
-
-    random_states = []
-    for dataset in tqdm(datasets):
-        labels, _ = load_labels(dataset, args.label_key, label_cache, subset_cache)
-        dataset_max_label = float(np.max(labels))
-        top_mask = build_top_mask(labels, args.top_p)
-        for seed_value in experiment_seeds:
-            summary_metrics_history = compute_random_summary_metrics_history(
-                labels=labels,
-                top_mask=top_mask,
-                max_label=dataset_max_label,
-                num_rounds=args.num_rounds,
-                num_samples_per_round=args.num_samples_per_round,
-                seed=int(seed_value),
-            )
-            for summary_metrics in summary_metrics_history:
-                random_states.append(
-                    {
-                        "dataset_name": dataset.name,
-                        "query_strategy": "RANDOM",
-                        "predictor": "NONE",
-                        "initial_selection": "RANDOM",
-                        "embedding_model": "NONE",
-                        "feature_transforms": "NONE",
-                        "target_transforms": "NONE",
-                        "seed": int(seed_value),
-                        "round": summary_metrics["round"],
-                        "overall_true": summary_metrics["overall_true"],
-                        "auc_true": summary_metrics["auc_true"],
-                        "avg_top": summary_metrics["avg_top"],
-                        "rounds_to_top": summary_metrics["rounds_to_top"],
-                        "max_train_spearman": summary_metrics["max_train_spearman"],
-                        "max_extreme_value_auc": summary_metrics[
-                            "max_extreme_value_auc"
-                        ],
-                        "dataset_max_label": dataset_max_label,
-                    }
-                )
-
-    df = pd.DataFrame(random_states)
-    if df.empty:
-        raise ValueError("No datasets selected; nothing to write.")
-
-    output_csv.parent.mkdir(parents=True, exist_ok=True)
-    df.to_csv(output_csv, index=False)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/cancel_long_jobs.sh b/utils/cancel_long_jobs.sh
deleted file mode 100644
index 514ee0d..0000000
--- a/utils/cancel_long_jobs.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-interval_seconds=300
-threshold=10
-
-if ! command -v squeue >/dev/null 2>&1; then
-  echo "squeue not found in PATH" >&2
-  exit 1
-fi
-
-if ! command -v scancel >/dev/null 2>&1; then
-  echo "scancel not found in PATH" >&2
-  exit 1
-fi
-
-while true; do
-  array_job_ids=()
-  while IFS= read -r job_id; do
-    [[ "$job_id" == *"_"* ]] || continue
-    base_id="${job_id%%_*}"
-    array_job_ids+=("$base_id")
-  done < <(squeue -u "${USER}" -h -o "%i")
-
-  if ((${#array_job_ids[@]} > 0)); then
-    mapfile -t unique_base_ids < <(printf "%s\n" "${array_job_ids[@]}" | sort -u)
-    for base_id in "${unique_base_ids[@]}"; do
-      task_count=$(squeue -u "${USER}" -h -o "%i" | awk -v base="${base_id}_" '$1 ~ "^"base {count++} END {print count+0}')
-      echo "Array job ${base_id}: remaining tasks = ${task_count}"
-      if ((task_count > 0 && task_count < threshold)); then
-        echo "Canceling array job ${base_id} (remaining tasks: ${task_count})"
-        scancel "${base_id}"
-      fi
-    done
-  else
-    echo "No array jobs found."
-  fi
-
-  sleep "${interval_seconds}"
-done
diff --git a/utils/concat_embedding.py b/utils/concat_embedding.py
deleted file mode 100644
index d78b816..0000000
--- a/utils/concat_embedding.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env python3
-"""
-Concatenate two embedding NPZ files by matching sample ids.
-
-By default, embeddings are concatenated as-is. Optional L2 normalization can be
-enabled before concatenation, and optional PCA can be applied after
-concatenation to reach a target explained variance ratio.
-
-Usage examples:
-  python utils/concat_embedding.py a.npz b.npz out.npz
-  python utils/concat_embedding.py a.npz b.npz out.npz --normalize
-  python utils/concat_embedding.py a.npz b.npz out.npz --normalize --pca-var 0.95
-"""
-
-from __future__ import annotations
-
-import argparse
-from pathlib import Path
-
-import numpy as np
-
-
-def _load_npz(path: Path) -> tuple[np.ndarray, np.ndarray]:
-    data = np.load(path, allow_pickle=True)
-    if "embeddings" not in data or "ids" not in data:
-        raise ValueError(
-            f"{path} must contain 'embeddings' and 'ids' arrays. "
-            f"Found keys: {list(data.keys())}"
-        )
-    embeddings = np.asarray(data["embeddings"])
-    ids = np.asarray(data["ids"])
-    if ids.ndim != 1:
-        raise ValueError(f"{path} ids must be 1D, got shape {ids.shape}")
-    if embeddings.shape[0] != ids.shape[0]:
-        raise ValueError(
-            f"{path} embeddings/ids length mismatch: "
-            f"{embeddings.shape[0]} vs {ids.shape[0]}"
-        )
-    return embeddings, ids
-
-
-def _ensure_unique(ids: np.ndarray, label: str) -> None:
-    unique_count = np.unique(ids).size
-    if unique_count != ids.size:
-        raise ValueError(f"{label} ids contain duplicates ({ids.size - unique_count}).")
-
-
-def _l2_normalize(embeddings: np.ndarray, eps: float = 1e-12) -> np.ndarray:
-    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
-    return embeddings / np.maximum(norms, eps)
-
-
-def _apply_pca_variance(
-    embeddings: np.ndarray, target_variance: float
-) -> tuple[np.ndarray, int]:
-    if not 0.0 < target_variance <= 1.0:
-        raise ValueError("target_variance must be in (0, 1].")
-    mean = np.mean(embeddings, axis=0, keepdims=True)
-    centered = embeddings - mean
-    _, s, vt = np.linalg.svd(centered, full_matrices=False)
-    if s.size == 0:
-        return centered, 0
-    var = (s**2) / max(embeddings.shape[0] - 1, 1)
-    total_var = float(np.sum(var))
-    if total_var <= 0:
-        return centered, 1
-    explained_ratio = var / total_var
-    cumulative = np.cumsum(explained_ratio)
-    n_components = int(np.searchsorted(cumulative, target_variance) + 1)
-    components = vt[:n_components]
-    return centered @ components.T, n_components
-
-
-def concat_embeddings(
-    path_a: Path,
-    path_b: Path,
-    output_path: Path,
-    normalize: bool = False,
-    pca_variance: float | None = None,
-) -> None:
-    emb_a, ids_a = _load_npz(path_a)
-    emb_b, ids_b = _load_npz(path_b)
-    _ensure_unique(ids_a, f"{path_a}")
-    _ensure_unique(ids_b, f"{path_b}")
-
-    ids_b_set = set(ids_b.tolist())
-    mask_a = np.isin(ids_a, ids_b)
-    ids_common = ids_a[mask_a]
-    if ids_common.size == 0:
-        raise ValueError("No overlapping ids found between the two files.")
-
-    index_b = {int(id_val): idx for idx, id_val in enumerate(ids_b)}
-    idx_a = np.nonzero(mask_a)[0]
-    idx_b = np.array([index_b[int(id_val)] for id_val in ids_common], dtype=int)
-
-    emb_a_sel = emb_a[idx_a]
-    emb_b_sel = emb_b[idx_b]
-    if normalize:
-        emb_a_sel = _l2_normalize(emb_a_sel)
-        emb_b_sel = _l2_normalize(emb_b_sel)
-    emb_concat = np.concatenate([emb_a_sel, emb_b_sel], axis=1)
-    if pca_variance is not None:
-        emb_concat, n_components = _apply_pca_variance(emb_concat, pca_variance)
-
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    np.savez(output_path, embeddings=emb_concat, ids=ids_common)
-
-    print(f"Saved concatenated embeddings to {output_path}")
-    print(f"File A: {path_a} ({ids_a.size} ids, {emb_a.shape[1]} dims)")
-    print(f"File B: {path_b} ({ids_b.size} ids, {emb_b.shape[1]} dims)")
-    print(f"Overlap: {ids_common.size} ids, output shape: {emb_concat.shape}")
-    if pca_variance is not None:
-        print(f"PCA retained {n_components} components for {pca_variance:.2f} variance")
-
-
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Concatenate two embedding NPZ files by matching ids."
-    )
-    parser.add_argument("embedding_a", type=Path, help="Path to first NPZ file.")
-    parser.add_argument("embedding_b", type=Path, help="Path to second NPZ file.")
-    parser.add_argument("output", type=Path, help="Path for output NPZ file.")
-    parser.add_argument(
-        "--normalize",
-        action="store_true",
-        help="Enable L2 normalization before concatenation.",
-    )
-    parser.add_argument(
-        "--pca-var",
-        type=float,
-        default=None,
-        help="Target explained variance ratio for PCA (e.g., 0.95).",
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    args = _parse_args()
-    concat_embeddings(
-        args.embedding_a,
-        args.embedding_b,
-        args.output,
-        normalize=args.normalize,
-        pca_variance=args.pca_var,
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/generate_partial_summaries.py b/utils/generate_partial_summaries.py
deleted file mode 100644
index 8b099a7..0000000
--- a/utils/generate_partial_summaries.py
+++ /dev/null
@@ -1,226 +0,0 @@
-#!/usr/bin/env python3
-"""
-Generate summary_n.json files for partial runs based on results.csv.
-
-Example:
-  python utils/generate_partial_summaries.py job_sub/multirun/2026-01-01
-  python utils/generate_partial_summaries.py job_sub/multirun/2026-01-01 --n 3,5 --overwrite
-"""
-
-from __future__ import annotations
-
-import argparse
-import ast
-import csv
-import json
-import os
-from collections.abc import Iterable
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-from tqdm import tqdm
-
-SUMMARY_METRIC_RULES = {
-    "auc_true": ("max_accumulate", "normalized_true"),
-    "avg_top": ("top_mean", "n_top"),
-    "rounds_to_top": ("rounds_to_top", "n_top"),
-    "overall_true": ("max_overall", "normalized_true"),
-    "max_train_spearman": ("max_overall", "train_spearman"),
-    "max_extreme_value_auc": ("max_overall", "extreme_value_auc"),
-}
-
-
-def _parse_float(value: Any) -> float:
-    if value is None:
-        return float("nan")
-    if isinstance(value, int | float):
-        return float(value)
-    text = str(value).strip()
-    if not text:
-        return float("nan")
-    try:
-        return float(text)
-    except ValueError:
-        return float("nan")
-
-
-def _parse_selected_ids(value: Any) -> list[Any]:
-    if value is None:
-        return []
-    if isinstance(value, list):
-        return value
-    text = str(value).strip()
-    if not text:
-        return []
-    try:
-        parsed = ast.literal_eval(text)
-    except (ValueError, SyntaxError):
-        return [item for item in text.split(",") if item.strip()]
-    if isinstance(parsed, list):
-        return parsed
-    return [parsed]
-
-
-def _load_rows(path: Path) -> list[dict[str, Any]]:
-    with path.open(newline="") as handle:
-        reader = csv.DictReader(handle)
-        rows = [dict(row) for row in reader]
-    for row in rows:
-        try:
-            row["_round"] = int(row.get("round", 0))
-        except (TypeError, ValueError):
-            row["_round"] = 0
-    return sorted(rows, key=lambda r: r["_round"])
-
-
-def _compute_summary(rows: list[dict[str, Any]]) -> dict[str, float]:
-    if not rows:
-        return {name: float("nan") for name in SUMMARY_METRIC_RULES}
-
-    columns = set(rows[0].keys())
-    n_top_col = None
-    if "n_top" in columns:
-        n_top_col = "n_top"
-    elif "n_selected_in_top" in columns:
-        n_top_col = "n_selected_in_top"
-
-    selected_counts = [
-        len(_parse_selected_ids(row.get("selected_sample_ids"))) for row in rows
-    ]
-    cumulative_selected = int(np.sum(np.cumsum(selected_counts)))
-
-    summary: dict[str, float] = {}
-    for metric_name, (rule, metric_column) in SUMMARY_METRIC_RULES.items():
-        column = metric_column
-        if metric_column == "n_top" and n_top_col is not None:
-            column = n_top_col
-        if column not in columns:
-            summary[metric_name] = float("nan")
-            continue
-
-        values = np.array([_parse_float(row.get(column)) for row in rows], dtype=float)
-        if rule == "top_mean":
-            if cumulative_selected <= 0:
-                summary[metric_name] = 0.0
-            else:
-                cumulative_sum = np.cumsum(values)
-                summary[metric_name] = (
-                    float(np.sum(cumulative_sum)) / cumulative_selected
-                )
-        elif rule == "mean":
-            summary[metric_name] = float(np.nanmean(values))
-        elif rule == "max_accumulate":
-            cumulative_max = np.maximum.accumulate(values)
-            summary[metric_name] = float(np.sum(cumulative_max)) / len(values)
-        elif rule == "max_overall":
-            finite = values[np.isfinite(values)]
-            summary[metric_name] = (
-                float(np.max(finite)) if finite.size else float("nan")
-            )
-        elif rule == "rounds_to_top":
-            hits = np.where(values >= 1)[0]
-            summary[metric_name] = float(hits[0] + 1) if hits.size else float("nan")
-        else:
-            raise ValueError(f"Unknown summary metric rule: {rule}")
-
-    return summary
-
-
-def _load_base_summary(path: Path) -> dict[str, Any]:
-    if not path.exists():
-        return {}
-    return json.loads(path.read_text())
-
-
-def _resolve_metrics_to_update(_: dict[str, Any]) -> list[str]:
-    return list(SUMMARY_METRIC_RULES.keys())
-
-
-def _iter_results(root: Path) -> Iterable[Path]:
-    max_depth = 4  # root/<time>/<dataset>/<sweep>/seed_*
-    root = root.resolve()
-    for dirpath, dirnames, filenames in tqdm(
-        os.walk(root), desc="Scanning directories", unit="dir"
-    ):
-        rel_parts = Path(dirpath).relative_to(root).parts
-        depth = len(rel_parts)
-        if Path(dirpath).name.startswith("seed_"):
-            if "results.csv" in filenames:
-                yield Path(dirpath) / "results.csv"
-            dirnames[:] = []
-            continue
-        if depth >= max_depth:
-            dirnames[:] = []
-
-
-def _write_summary(path: Path, data: dict[str, Any]) -> None:
-    path.write_text(json.dumps(data, indent=2))
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=(
-            "Create summary_n.json files for partial runs based on results.csv."
-        )
-    )
-    parser.add_argument(
-        "multirun_dir",
-        type=Path,
-        help="Path to the multirun folder (e.g. job_sub/multirun/2026-01-01).",
-    )
-    parser.add_argument(
-        "--n",
-        type=str,
-        default=None,
-        help="Comma-separated list of n values to generate (default: all rounds).",
-    )
-    parser.add_argument(
-        "--overwrite",
-        action="store_true",
-        help="Overwrite existing summary_n.json files.",
-    )
-    args = parser.parse_args()
-
-    multirun_dir = args.multirun_dir
-    if not multirun_dir.exists():
-        raise SystemExit(f"Multirun dir not found: {multirun_dir}")
-
-    explicit_ns: list[int] | None = None
-    if args.n:
-        explicit_ns = [int(item) for item in args.n.split(",") if item.strip()]
-
-    results_paths = list(_iter_results(multirun_dir))
-    for results_path in tqdm(results_paths, desc="Processing runs", unit="run"):
-        run_dir = results_path.parent
-        if explicit_ns and not args.overwrite:
-            expected = [run_dir / f"summary_{n}.json" for n in explicit_ns if n >= 1]
-            if expected and all(path.exists() for path in expected):
-                continue
-        rows = _load_rows(results_path)
-        if not rows:
-            continue
-
-        base_summary = _load_base_summary(run_dir / "summary.json")
-        metrics_to_update = _resolve_metrics_to_update(base_summary)
-
-        max_rounds = len(rows)
-        ns = explicit_ns or list(range(1, max_rounds + 1))
-        for n in ns:
-            if n < 1 or n > max_rounds:
-                continue
-            output_path = run_dir / f"summary_{n}.json"
-            if output_path.exists() and not args.overwrite:
-                continue
-            partial_rows = rows[:n]
-            computed = _compute_summary(partial_rows)
-            summary = dict(base_summary)
-            for key in metrics_to_update:
-                summary[key] = computed.get(key, float("nan"))
-            _write_summary(output_path, summary)
-
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())