From 28301e0d201b8aac7fa57ca948016d68c3f905ac Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 03:46:54 +0000
Subject: [PATCH 01/56] feat: add K8s and Slurm scheduler backends for
 profiling jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a scheduler package with:
- ProfileJobSpec dataclass for all profiling parameters
- BaseScheduler ABC with render/submit/dry_run interface
- K8sScheduler: generates valid K8s Job YAML with GPU resources,
  PVC/hostPath volumes, nodeSelector, serviceAccount support
- SlurmScheduler: generates sbatch scripts with docker/enroot/bare-metal
  container runtimes, module loading, and custom #SBATCH directives
- scripts/submit_profile.py: unified CLI entry point with --scheduler
  {k8s,slurm}, --dry-run (default) and --submit modes

Zero external dependencies — uses only Python stdlib.
---
 schedulers/__init__.py    |  12 ++
 schedulers/base.py        | 129 +++++++++++++++++++++
 schedulers/k8s.py         | 140 ++++++++++++++++++++++
 schedulers/slurm.py       | 141 ++++++++++++++++++++++
 scripts/submit_profile.py | 238 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 660 insertions(+)
 create mode 100644 schedulers/__init__.py
 create mode 100644 schedulers/base.py
 create mode 100644 schedulers/k8s.py
 create mode 100644 schedulers/slurm.py
 create mode 100644 scripts/submit_profile.py

diff --git a/schedulers/__init__.py b/schedulers/__init__.py
new file mode 100644
index 0000000..6e1547b
--- /dev/null
+++ b/schedulers/__init__.py
@@ -0,0 +1,12 @@
+"""Scheduler backends for submitting FlowSim profiling jobs to K8s or Slurm."""
+
+from schedulers.base import BaseScheduler, ProfileJobSpec
+from schedulers.k8s import K8sScheduler
+from schedulers.slurm import SlurmScheduler
+
+__all__ = [
+    "BaseScheduler",
+    "K8sScheduler",
+    "ProfileJobSpec",
+    "SlurmScheduler",
+]
diff --git a/schedulers/base.py b/schedulers/base.py
new file mode 100644
index 0000000..df40429
--- /dev/null
+++ b/schedulers/base.py
@@ -0,0 +1,129 @@
+"""Abstract base class for FlowSim job schedulers."""
+
+from __future__ import annotations
+
+import abc
+import shlex
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class ProfileJobSpec:
+    """All parameters needed to run a stage-profiling job.
+
+    The scheduler backends render this into a K8s Job YAML or Slurm
+    sbatch script.
+    """
+
+    # -- Profiling workload --
+    collect: str  # "perf", "shapes", or "all"
+    model_path: str
+    tp: int = 1
+    dp: int = 1
+    bs: int = 1
+    input_len: int = 2048
+    existing_ctx: int = 0
+    decode_tokens: int = 32
+    warmup_n: int = 5
+    disable_chunked_prefill: bool = False
+    max_prefill_tokens: int = 131072
+
+    # -- Infrastructure --
+    image: str = "flowsim-image:latest"
+    gpus: int = 1  # total GPU count (must be >= tp * dp)
+    host: str = "0.0.0.0"
+    port: int = 30001
+    output_dir: str = "/flowsim/stage_traces"
+    log_dir: str = "/flowsim/tests/test-artifacts"
+    job_name: str = ""
+
+    # -- Extra server opts (appended verbatim) --
+    extra_server_opts: str = ""
+
+    def build_server_opts(self) -> str:
+        """Build the ``--server-opts`` string for run_stage_profile.py."""
+        parts = [
+            f"--model-path {self.model_path}",
+            f"--tp {self.tp}",
+            f"--host {self.host}",
+            f"--port {self.port}",
+        ]
+        if self.dp > 1:
+            parts.append(f"--dp {self.dp}")
+        if self.extra_server_opts:
+            parts.append(self.extra_server_opts)
+        return " ".join(parts)
+
+    def build_profile_command(self) -> list[str]:
+        """Build the full ``python scripts/run_stage_profile.py ...`` command."""
+        cmd = [
+            "python",
+            "scripts/run_stage_profile.py",
+            "--collect",
+            self.collect,
+            "--launch-server",
+            "--server-opts",
+            self.build_server_opts(),
+            "--bs",
+            str(self.bs),
+            "--input-len",
+            str(self.input_len),
+            "--existing-ctx",
+            str(self.existing_ctx),
+            "--decode-tokens",
+            str(self.decode_tokens),
+            "--warmup-n",
+            str(self.warmup_n),
+            "--host",
+            self.host,
+            "--port",
+            str(self.port),
+            "--output-dir",
+            self.output_dir,
+            "--log-dir",
+            self.log_dir,
+        ]
+        if self.disable_chunked_prefill:
+            cmd.append("--disable-chunked-prefill")
+            cmd.extend(["--max-prefill-tokens", str(self.max_prefill_tokens)])
+        return cmd
+
+    def build_shell_command(self) -> str:
+        """Build a single shell command string (properly quoted)."""
+        cmd = self.build_profile_command()
+        # Quote the --server-opts value since it contains spaces
+        quoted = []
+        i = 0
+        while i < len(cmd):
+            if cmd[i] == "--server-opts" and i + 1 < len(cmd):
+                quoted.append(cmd[i])
+                quoted.append(shlex.quote(cmd[i + 1]))
+                i += 2
+            else:
+                quoted.append(cmd[i])
+                i += 1
+        return " ".join(quoted)
+
+    def default_job_name(self) -> str:
+        """Generate a default job name from workload params."""
+        if self.job_name:
+            return self.job_name
+        model_short = self.model_path.split("/")[-1].lower().replace(".", "-")
+        return f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
+
+
+class BaseScheduler(abc.ABC):
+    """Abstract scheduler backend."""
+
+    @abc.abstractmethod
+    def render(self, spec: ProfileJobSpec) -> str:
+        """Render the job manifest / script as a string."""
+
+    @abc.abstractmethod
+    def submit(self, spec: ProfileJobSpec) -> str:
+        """Submit the job and return a job identifier string."""
+
+    def dry_run(self, spec: ProfileJobSpec) -> str:
+        """Render and return the manifest without submitting."""
+        return self.render(spec)
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
new file mode 100644
index 0000000..533967e
--- /dev/null
+++ b/schedulers/k8s.py
@@ -0,0 +1,140 @@
+"""Kubernetes Job scheduler for FlowSim profiling."""
+
+from __future__ import annotations
+
+import subprocess
+import tempfile
+
+from schedulers.base import BaseScheduler, ProfileJobSpec
+
+
+class K8sScheduler(BaseScheduler):
+    """Generate and optionally submit a Kubernetes Job for profiling.
+
+    Parameters
+    ----------
+    namespace : str
+        Kubernetes namespace for the Job.
+    pvc_name : str, optional
+        Name of a PersistentVolumeClaim to mount for trace output.
+        If empty, uses ``emptyDir`` (traces are lost when the pod exits).
+    host_output_dir : str, optional
+        If set (and *pvc_name* is empty), use a ``hostPath`` volume at
+        this path instead of a PVC.
+    node_selector : dict, optional
+        Kubernetes nodeSelector labels (e.g., ``{"gpu": "a100"}``).
+    service_account : str, optional
+        ServiceAccount name for the pod.
+    shm_size : str
+        Size of ``/dev/shm`` (shared memory).  Defaults to ``"16Gi"``.
+    """
+
+    def __init__(
+        self,
+        *,
+        namespace: str = "default",
+        pvc_name: str = "",
+        host_output_dir: str = "",
+        node_selector: dict[str, str] | None = None,
+        service_account: str = "",
+        shm_size: str = "16Gi",
+    ) -> None:
+        self.namespace = namespace
+        self.pvc_name = pvc_name
+        self.host_output_dir = host_output_dir
+        self.node_selector = node_selector or {}
+        self.service_account = service_account
+        self.shm_size = shm_size
+
+    def render(self, spec: ProfileJobSpec) -> str:
+        job_name = spec.default_job_name()[:63]  # K8s name limit
+        cmd = spec.build_profile_command()
+
+        lines: list[str] = []
+        _a = lines.append
+
+        _a("apiVersion: batch/v1")
+        _a("kind: Job")
+        _a("metadata:")
+        _a(f"  name: {job_name}")
+        _a(f"  namespace: {self.namespace}")
+        _a("  labels:")
+        _a("    app: flowsim")
+        _a("    component: profiling")
+        _a(f"    collect: {spec.collect}")
+        _a("spec:")
+        _a("  backoffLimit: 0")
+        _a("  ttlSecondsAfterFinished: 86400")
+        _a("  template:")
+        _a("    metadata:")
+        _a("      labels:")
+        _a("        app: flowsim")
+        _a("        component: profiling")
+        _a("    spec:")
+        if self.service_account:
+            _a(f"      serviceAccountName: {self.service_account}")
+        if self.node_selector:
+            _a("      nodeSelector:")
+            for k, v in self.node_selector.items():
+                _a(f"        {k}: {v}")
+        _a("      restartPolicy: Never")
+        _a("      containers:")
+        _a("        - name: profiler")
+        _a(f"          image: {spec.image}")
+        _a("          imagePullPolicy: IfNotPresent")
+        _a("          workingDir: /flowsim")
+        _a("          command:")
+        for c in cmd:
+            _a(f'            - "{c}"')
+        _a("          env:")
+        _a("            - name: SGLANG_PROFILE_KERNELS")
+        _a('              value: "1"')
+        _a("          resources:")
+        _a("            limits:")
+        _a(f'              nvidia.com/gpu: "{spec.gpus}"')
+        _a("            requests:")
+        _a(f'              nvidia.com/gpu: "{spec.gpus}"')
+
+        # volumeMounts
+        _a("          volumeMounts:")
+        _a("            - name: dshm")
+        _a("              mountPath: /dev/shm")
+        if self.pvc_name or self.host_output_dir:
+            _a("            - name: output")
+            _a(f"              mountPath: {spec.output_dir}")
+
+        # volumes
+        _a("      volumes:")
+        _a("        - name: dshm")
+        _a("          emptyDir:")
+        _a("            medium: Memory")
+        _a(f"            sizeLimit: {self.shm_size}")
+        if self.pvc_name:
+            _a("        - name: output")
+            _a("          persistentVolumeClaim:")
+            _a(f"            claimName: {self.pvc_name}")
+        elif self.host_output_dir:
+            _a("        - name: output")
+            _a("          hostPath:")
+            _a(f"            path: {self.host_output_dir}")
+            _a("            type: DirectoryOrCreate")
+
+        return "\n".join(lines) + "\n"
+
+    def submit(self, spec: ProfileJobSpec) -> str:
+        manifest = self.render(spec)
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".yaml", delete=False
+        ) as f:
+            f.write(manifest)
+            f.flush()
+            result = subprocess.run(
+                ["kubectl", "apply", "-f", f.name],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode != 0:
+                raise RuntimeError(
+                    f"kubectl apply failed:\n{result.stderr.strip()}"
+                )
+            return result.stdout.strip()
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
new file mode 100644
index 0000000..4aa18d8
--- /dev/null
+++ b/schedulers/slurm.py
@@ -0,0 +1,141 @@
+"""Slurm sbatch scheduler for FlowSim profiling."""
+
+from __future__ import annotations
+
+import subprocess
+import tempfile
+import textwrap
+
+from schedulers.base import BaseScheduler, ProfileJobSpec
+
+
+class SlurmScheduler(BaseScheduler):
+    """Generate and optionally submit an sbatch script for profiling.
+
+    Parameters
+    ----------
+    partition : str
+        Slurm partition to submit to.
+    time_limit : str
+        Wall-clock time limit (e.g., ``"01:00:00"``).
+    account : str, optional
+        ``--account`` for which allocation to charge.
+    constraint : str, optional
+        ``--constraint`` node feature (e.g., ``"gpu80g"``).
+    container_runtime : str
+        How to run the container inside the allocation.
+        ``"docker"``  -> ``docker run``
+        ``"enroot"``  -> ``srun --container-image``
+        ``"none"``    -> run bare-metal (no container)
+    container_mounts : str
+        Bind-mount string passed to the container runtime
+        (e.g., ``"/data:/data"``).
+    modules : list[str]
+        ``module load`` commands to run before the job
+        (relevant for ``"none"`` runtime).
+    extra_sbatch : list[str]
+        Additional ``#SBATCH`` lines, each *without* the ``#SBATCH`` prefix.
+    """
+
+    def __init__(
+        self,
+        *,
+        partition: str = "gpu",
+        time_limit: str = "02:00:00",
+        account: str = "",
+        constraint: str = "",
+        container_runtime: str = "none",
+        container_mounts: str = "",
+        modules: list[str] | None = None,
+        extra_sbatch: list[str] | None = None,
+    ) -> None:
+        self.partition = partition
+        self.time_limit = time_limit
+        self.account = account
+        self.constraint = constraint
+        self.container_runtime = container_runtime
+        self.container_mounts = container_mounts
+        self.modules = modules or []
+        self.extra_sbatch = extra_sbatch or []
+
+    def render(self, spec: ProfileJobSpec) -> str:
+        job_name = spec.default_job_name()
+        cmd = spec.build_shell_command()
+
+        lines = [
+            "#!/bin/bash",
+            f"#SBATCH --job-name={job_name}",
+            f"#SBATCH --partition={self.partition}",
+            f"#SBATCH --gpus-per-node={spec.gpus}",
+            f"#SBATCH --ntasks=1",
+            f"#SBATCH --time={self.time_limit}",
+            f"#SBATCH --output={spec.output_dir}/{job_name}_%j.log",
+        ]
+
+        if self.account:
+            lines.append(f"#SBATCH --account={self.account}")
+        if self.constraint:
+            lines.append(f"#SBATCH --constraint={self.constraint}")
+        for extra in self.extra_sbatch:
+            lines.append(f"#SBATCH {extra}")
+
+        lines.append("")
+        lines.append("set -euo pipefail")
+        lines.append("")
+
+        if self.modules:
+            for mod in self.modules:
+                lines.append(f"module load {mod}")
+            lines.append("")
+
+        lines.append("export SGLANG_PROFILE_KERNELS=1")
+        lines.append("")
+
+        if self.container_runtime == "docker":
+            mounts = ""
+            if self.container_mounts:
+                mounts = f" -v {self.container_mounts}"
+            lines.append(
+                f"docker run --gpus all --ipc=host --shm-size=16g"
+                f"{mounts} -w /flowsim {spec.image} \\"
+            )
+            lines.append(f"  {cmd}")
+        elif self.container_runtime == "enroot":
+            mounts = ""
+            if self.container_mounts:
+                mounts = f" --container-mounts={self.container_mounts}"
+            lines.append(
+                f"srun --container-image={spec.image}"
+                f" --container-workdir=/flowsim"
+                f"{mounts} \\"
+            )
+            lines.append(f"  {cmd}")
+        elif self.container_runtime == "none":
+            lines.append(f"cd /flowsim")
+            lines.append(cmd)
+        else:
+            raise ValueError(
+                f"Unknown container_runtime: {self.container_runtime!r}. "
+                "Choose from: docker, enroot, none"
+            )
+
+        lines.append("")
+        return "\n".join(lines)
+
+    def submit(self, spec: ProfileJobSpec) -> str:
+        script = self.render(spec)
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".sh", delete=False
+        ) as f:
+            f.write(script)
+            f.flush()
+            result = subprocess.run(
+                ["sbatch", f.name],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode != 0:
+                raise RuntimeError(
+                    f"sbatch failed:\n{result.stderr.strip()}"
+                )
+            return result.stdout.strip()
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
new file mode 100644
index 0000000..8e309dd
--- /dev/null
+++ b/scripts/submit_profile.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+"""Submit FlowSim profiling jobs to Kubernetes or Slurm.
+
+Usage examples
+--------------
+
+Dry-run (print Kubernetes Job YAML to stdout):
+
+    python scripts/submit_profile.py \\
+        --scheduler k8s \\
+        --collect perf \\
+        --model-path Qwen/Qwen3-235B-A22B-FP8 \\
+        --tp 4 --gpus 4 \\
+        --bs 1 --input-len 2048 --decode-tokens 32 \\
+        --image flowsim-image:latest \\
+        --k8s-namespace default \\
+        --k8s-pvc flowsim-traces \\
+        --dry-run
+
+Dry-run (print Slurm sbatch script to stdout):
+
+    python scripts/submit_profile.py \\
+        --scheduler slurm \\
+        --collect perf \\
+        --model-path Qwen/Qwen3-235B-A22B-FP8 \\
+        --tp 4 --gpus 4 \\
+        --slurm-partition gpu-a100 \\
+        --slurm-time 02:00:00 \\
+        --dry-run
+
+Submit directly to cluster:
+
+    python scripts/submit_profile.py \\
+        --scheduler k8s \\
+        ... \\
+        --submit
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+# Allow running from the repo root as ``python scripts/submit_profile.py``
+sys.path.insert(0, ".")
+
+from schedulers.base import ProfileJobSpec
+from schedulers.k8s import K8sScheduler
+from schedulers.slurm import SlurmScheduler
+
+
+def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description="Submit FlowSim profiling jobs to K8s or Slurm.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    # -- Scheduler choice --
+    p.add_argument(
+        "--scheduler",
+        choices=["k8s", "slurm"],
+        required=True,
+        help="Scheduler backend.",
+    )
+
+    # -- Profiling workload (mirrors run_stage_profile.py) --
+    wl = p.add_argument_group("workload")
+    wl.add_argument(
+        "--collect",
+        choices=["perf", "shapes", "all"],
+        required=True,
+    )
+    wl.add_argument("--model-path", required=True, help="HF model path")
+    wl.add_argument("--tp", type=int, default=1)
+    wl.add_argument("--dp", type=int, default=1)
+    wl.add_argument("--bs", type=int, default=1, help="Batch size")
+    wl.add_argument("--input-len", type=int, default=2048)
+    wl.add_argument("--existing-ctx", type=int, default=0)
+    wl.add_argument("--decode-tokens", type=int, default=32)
+    wl.add_argument("--warmup-n", type=int, default=5)
+    wl.add_argument(
+        "--disable-chunked-prefill", action="store_true",
+    )
+    wl.add_argument("--max-prefill-tokens", type=int, default=131072)
+    wl.add_argument(
+        "--extra-server-opts",
+        default="",
+        help="Extra server options appended verbatim",
+    )
+
+    # -- Infrastructure --
+    infra = p.add_argument_group("infrastructure")
+    infra.add_argument("--image", default="flowsim-image:latest")
+    infra.add_argument(
+        "--gpus", type=int, default=1, help="Total GPU count",
+    )
+    infra.add_argument("--host", default="0.0.0.0")
+    infra.add_argument("--port", type=int, default=30001)
+    infra.add_argument("--output-dir", default="/flowsim/stage_traces")
+    infra.add_argument(
+        "--log-dir", default="/flowsim/tests/test-artifacts",
+    )
+    infra.add_argument("--job-name", default="")
+
+    # -- Kubernetes-specific --
+    k8s = p.add_argument_group("kubernetes options")
+    k8s.add_argument("--k8s-namespace", default="default")
+    k8s.add_argument(
+        "--k8s-pvc",
+        default="",
+        help="PVC name for output volume (omit for emptyDir)",
+    )
+    k8s.add_argument(
+        "--k8s-host-output-dir",
+        default="",
+        help="hostPath for output (used when --k8s-pvc is empty)",
+    )
+    k8s.add_argument(
+        "--k8s-node-selector",
+        action="append",
+        default=[],
+        metavar="KEY=VALUE",
+        help="Node selector labels (repeatable)",
+    )
+    k8s.add_argument("--k8s-service-account", default="")
+    k8s.add_argument("--k8s-shm-size", default="16Gi")
+
+    # -- Slurm-specific --
+    slurm = p.add_argument_group("slurm options")
+    slurm.add_argument("--slurm-partition", default="gpu")
+    slurm.add_argument("--slurm-time", default="02:00:00")
+    slurm.add_argument("--slurm-account", default="")
+    slurm.add_argument("--slurm-constraint", default="")
+    slurm.add_argument(
+        "--slurm-container-runtime",
+        choices=["docker", "enroot", "none"],
+        default="none",
+    )
+    slurm.add_argument("--slurm-container-mounts", default="")
+    slurm.add_argument(
+        "--slurm-module",
+        action="append",
+        default=[],
+        help="Modules to load (repeatable)",
+    )
+    slurm.add_argument(
+        "--slurm-extra-sbatch",
+        action="append",
+        default=[],
+        metavar="DIRECTIVE",
+        help="Extra #SBATCH directives (repeatable, without prefix)",
+    )
+
+    # -- Action --
+    action = p.add_mutually_exclusive_group()
+    action.add_argument(
+        "--dry-run",
+        action="store_true",
+        default=True,
+        help="Print the rendered manifest to stdout (default)",
+    )
+    action.add_argument(
+        "--submit",
+        action="store_true",
+        help="Actually submit the job to the cluster",
+    )
+
+    return p.parse_args(argv)
+
+
+def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
+    return ProfileJobSpec(
+        collect=args.collect,
+        model_path=args.model_path,
+        tp=args.tp,
+        dp=args.dp,
+        bs=args.bs,
+        input_len=args.input_len,
+        existing_ctx=args.existing_ctx,
+        decode_tokens=args.decode_tokens,
+        warmup_n=args.warmup_n,
+        disable_chunked_prefill=args.disable_chunked_prefill,
+        max_prefill_tokens=args.max_prefill_tokens,
+        image=args.image,
+        gpus=args.gpus,
+        host=args.host,
+        port=args.port,
+        output_dir=args.output_dir,
+        log_dir=args.log_dir,
+        job_name=args.job_name,
+        extra_server_opts=args.extra_server_opts,
+    )
+
+
+def _build_scheduler(args: argparse.Namespace):
+    if args.scheduler == "k8s":
+        node_sel = {}
+        for item in args.k8s_node_selector:
+            k, _, v = item.partition("=")
+            if not v:
+                sys.exit(f"Bad --k8s-node-selector format: {item!r} (use KEY=VALUE)")
+            node_sel[k] = v
+        return K8sScheduler(
+            namespace=args.k8s_namespace,
+            pvc_name=args.k8s_pvc,
+            host_output_dir=args.k8s_host_output_dir,
+            node_selector=node_sel,
+            service_account=args.k8s_service_account,
+            shm_size=args.k8s_shm_size,
+        )
+    else:
+        return SlurmScheduler(
+            partition=args.slurm_partition,
+            time_limit=args.slurm_time,
+            account=args.slurm_account,
+            constraint=args.slurm_constraint,
+            container_runtime=args.slurm_container_runtime,
+            container_mounts=args.slurm_container_mounts,
+            modules=args.slurm_module,
+            extra_sbatch=args.slurm_extra_sbatch,
+        )
+
+
+def main(argv: list[str] | None = None) -> None:
+    args = _parse_args(argv)
+    spec = _build_spec(args)
+    scheduler = _build_scheduler(args)
+
+    if args.submit:
+        result = scheduler.submit(spec)
+        print(result)
+    else:
+        print(scheduler.dry_run(spec))
+
+
+if __name__ == "__main__":
+    main()

From 26c9f476cbe0000281d97ca156d8d327e1a465b7 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:00:44 +0000
Subject: [PATCH 02/56] feat: switch to proper API clients for remote
 submission
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

K8s:
- render() now builds a dict and serializes via yaml.safe_dump (falls
  back to json.dumps if PyYAML is absent). Fixes YAML injection when
  values contain : # or quotes.
- submit() uses the 'kubernetes' Python client (kubeconfig / in-cluster).
- New args: --k8s-kubeconfig, --k8s-context.

Slurm:
- submit() now posts to slurmrestd REST API via urllib.request (stdlib).
- Supports JWT auth, configurable API version (v0.0.39–v0.0.41+),
  and TLS certificate verification toggle.
- New args: --slurm-rest-url, --slurm-jwt-token, --slurm-api-version,
  --slurm-no-verify-ssl.

render() / dry-run remain zero-dependency (stdlib only).
submit() requires 'kubernetes' package for K8s; Slurm uses stdlib.
---
 schedulers/k8s.py         | 200 ++++++++++++++++++++++----------------
 schedulers/slurm.py       | 133 +++++++++++++++++++++----
 scripts/submit_profile.py |  38 ++++++++
 3 files changed, 266 insertions(+), 105 deletions(-)

diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 533967e..9b957a9 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -1,12 +1,29 @@
-"""Kubernetes Job scheduler for FlowSim profiling."""
+"""Kubernetes Job scheduler for FlowSim profiling.
+
+Uses the ``kubernetes`` Python client for remote submission.
+The ``render()`` / ``dry_run()`` path uses stdlib only (json fallback if
+PyYAML is not installed — JSON is valid YAML 1.2 and ``kubectl`` accepts it).
+"""
 
 from __future__ import annotations
 
-import subprocess
-import tempfile
+import json
 
 from schedulers.base import BaseScheduler, ProfileJobSpec
 
+# Optional: nicer YAML output for dry-run.
+try:
+    import yaml as _yaml  # type: ignore[import-untyped]
+
+    def _dump(obj: dict) -> str:
+        return _yaml.safe_dump(obj, default_flow_style=False, sort_keys=False)
+
+except ImportError:
+    _yaml = None  # type: ignore[assignment]
+
+    def _dump(obj: dict) -> str:  # type: ignore[misc]
+        return json.dumps(obj, indent=2, ensure_ascii=False) + "\n"
+
 
 class K8sScheduler(BaseScheduler):
     """Generate and optionally submit a Kubernetes Job for profiling.
@@ -15,6 +32,11 @@ class K8sScheduler(BaseScheduler):
     ----------
     namespace : str
         Kubernetes namespace for the Job.
+    kubeconfig : str, optional
+        Path to a kubeconfig file.  When empty, the ``kubernetes`` client
+        tries in-cluster config, then ``~/.kube/config``.
+    context : str, optional
+        kubeconfig context to activate.
     pvc_name : str, optional
         Name of a PersistentVolumeClaim to mount for trace output.
         If empty, uses ``emptyDir`` (traces are lost when the pod exits).
@@ -33,6 +55,8 @@ def __init__(
         self,
         *,
         namespace: str = "default",
+        kubeconfig: str = "",
+        context: str = "",
         pvc_name: str = "",
         host_output_dir: str = "",
         node_selector: dict[str, str] | None = None,
@@ -40,6 +64,8 @@ def __init__(
         shm_size: str = "16Gi",
     ) -> None:
         self.namespace = namespace
+        self.kubeconfig = kubeconfig
+        self.context = context
         self.pvc_name = pvc_name
         self.host_output_dir = host_output_dir
         self.node_selector = node_selector or {}
@@ -47,94 +73,96 @@ def __init__(
         self.shm_size = shm_size
 
     def render(self, spec: ProfileJobSpec) -> str:
-        job_name = spec.default_job_name()[:63]  # K8s name limit
+        return _dump(self._build_job_dict(spec))
+
+    # -----------------------------------------------------------------
+    # Build a plain-dict manifest (used by both render and submit)
+    # -----------------------------------------------------------------
+    def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
+        """Return the Job manifest as a nested Python dict."""
+        job_name = spec.default_job_name()[:63]
         cmd = spec.build_profile_command()
 
-        lines: list[str] = []
-        _a = lines.append
-
-        _a("apiVersion: batch/v1")
-        _a("kind: Job")
-        _a("metadata:")
-        _a(f"  name: {job_name}")
-        _a(f"  namespace: {self.namespace}")
-        _a("  labels:")
-        _a("    app: flowsim")
-        _a("    component: profiling")
-        _a(f"    collect: {spec.collect}")
-        _a("spec:")
-        _a("  backoffLimit: 0")
-        _a("  ttlSecondsAfterFinished: 86400")
-        _a("  template:")
-        _a("    metadata:")
-        _a("      labels:")
-        _a("        app: flowsim")
-        _a("        component: profiling")
-        _a("    spec:")
-        if self.service_account:
-            _a(f"      serviceAccountName: {self.service_account}")
-        if self.node_selector:
-            _a("      nodeSelector:")
-            for k, v in self.node_selector.items():
-                _a(f"        {k}: {v}")
-        _a("      restartPolicy: Never")
-        _a("      containers:")
-        _a("        - name: profiler")
-        _a(f"          image: {spec.image}")
-        _a("          imagePullPolicy: IfNotPresent")
-        _a("          workingDir: /flowsim")
-        _a("          command:")
-        for c in cmd:
-            _a(f'            - "{c}"')
-        _a("          env:")
-        _a("            - name: SGLANG_PROFILE_KERNELS")
-        _a('              value: "1"')
-        _a("          resources:")
-        _a("            limits:")
-        _a(f'              nvidia.com/gpu: "{spec.gpus}"')
-        _a("            requests:")
-        _a(f'              nvidia.com/gpu: "{spec.gpus}"')
-
-        # volumeMounts
-        _a("          volumeMounts:")
-        _a("            - name: dshm")
-        _a("              mountPath: /dev/shm")
-        if self.pvc_name or self.host_output_dir:
-            _a("            - name: output")
-            _a(f"              mountPath: {spec.output_dir}")
-
-        # volumes
-        _a("      volumes:")
-        _a("        - name: dshm")
-        _a("          emptyDir:")
-        _a("            medium: Memory")
-        _a(f"            sizeLimit: {self.shm_size}")
+        # volumes + mounts
+        volume_mounts = [{"name": "dshm", "mountPath": "/dev/shm"}]
+        volumes: list[dict] = [
+            {"name": "dshm", "emptyDir": {"medium": "Memory", "sizeLimit": self.shm_size}},
+        ]
         if self.pvc_name:
-            _a("        - name: output")
-            _a("          persistentVolumeClaim:")
-            _a(f"            claimName: {self.pvc_name}")
+            volume_mounts.append({"name": "output", "mountPath": spec.output_dir})
+            volumes.append({"name": "output", "persistentVolumeClaim": {"claimName": self.pvc_name}})
         elif self.host_output_dir:
-            _a("        - name: output")
-            _a("          hostPath:")
-            _a(f"            path: {self.host_output_dir}")
-            _a("            type: DirectoryOrCreate")
+            volume_mounts.append({"name": "output", "mountPath": spec.output_dir})
+            volumes.append({"name": "output", "hostPath": {"path": self.host_output_dir, "type": "DirectoryOrCreate"}})
 
-        return "\n".join(lines) + "\n"
+        container = {
+            "name": "profiler",
+            "image": spec.image,
+            "imagePullPolicy": "IfNotPresent",
+            "workingDir": "/flowsim",
+            "command": cmd,
+            "env": [{"name": "SGLANG_PROFILE_KERNELS", "value": "1"}],
+            "resources": {
+                "limits": {"nvidia.com/gpu": str(spec.gpus)},
+                "requests": {"nvidia.com/gpu": str(spec.gpus)},
+            },
+            "volumeMounts": volume_mounts,
+        }
+
+        pod_spec: dict = {
+            "restartPolicy": "Never",
+            "containers": [container],
+            "volumes": volumes,
+        }
+        if self.service_account:
+            pod_spec["serviceAccountName"] = self.service_account
+        if self.node_selector:
+            pod_spec["nodeSelector"] = dict(self.node_selector)
+
+        return {
+            "apiVersion": "batch/v1",
+            "kind": "Job",
+            "metadata": {
+                "name": job_name,
+                "namespace": self.namespace,
+                "labels": {"app": "flowsim", "component": "profiling", "collect": spec.collect},
+            },
+            "spec": {
+                "backoffLimit": 0,
+                "ttlSecondsAfterFinished": 86400,
+                "template": {
+                    "metadata": {"labels": {"app": "flowsim", "component": "profiling"}},
+                    "spec": pod_spec,
+                },
+            },
+        }
 
     def submit(self, spec: ProfileJobSpec) -> str:
-        manifest = self.render(spec)
-        with tempfile.NamedTemporaryFile(
-            mode="w", suffix=".yaml", delete=False
-        ) as f:
-            f.write(manifest)
-            f.flush()
-            result = subprocess.run(
-                ["kubectl", "apply", "-f", f.name],
-                capture_output=True,
-                text=True,
+        """Submit via the ``kubernetes`` Python client (``pip install kubernetes``)."""
+        try:
+            from kubernetes import client as k8s_client, config as k8s_config
+        except ImportError:
+            raise RuntimeError(
+                "The 'kubernetes' package is required for --submit. "
+                "Install it with: pip install kubernetes"
             )
-            if result.returncode != 0:
-                raise RuntimeError(
-                    f"kubectl apply failed:\n{result.stderr.strip()}"
-                )
-            return result.stdout.strip()
+
+        # Load kubeconfig / in-cluster config
+        config_kwargs: dict = {}
+        if self.kubeconfig:
+            config_kwargs["config_file"] = self.kubeconfig
+        if self.context:
+            config_kwargs["context"] = self.context
+
+        try:
+            k8s_config.load_kube_config(**config_kwargs)
+        except k8s_config.ConfigException:
+            k8s_config.load_incluster_config()
+
+        body = self._build_job_dict(spec)
+        batch_api = k8s_client.BatchV1Api()
+        resp = batch_api.create_namespaced_job(
+            namespace=self.namespace,
+            body=body,
+        )
+        return f"job.batch/{resp.metadata.name} created (namespace={resp.metadata.namespace})"
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 4aa18d8..9261a15 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -1,14 +1,23 @@
-"""Slurm sbatch scheduler for FlowSim profiling."""
+"""Slurm sbatch scheduler for FlowSim profiling.
+
+``render()`` / ``dry_run()`` produce a standalone bash script (zero deps).
+``submit()`` posts the script to a slurmrestd endpoint via stdlib
+``urllib.request`` — no extra packages needed.
+"""
 
 from __future__ import annotations
 
-import subprocess
-import tempfile
-import textwrap
+import json
+import ssl
+import urllib.error
+import urllib.request
 
 from schedulers.base import BaseScheduler, ProfileJobSpec
 
 
+_DEFAULT_API_VERSION = "v0.0.40"
+
+
 class SlurmScheduler(BaseScheduler):
     """Generate and optionally submit an sbatch script for profiling.
 
@@ -18,6 +27,17 @@ class SlurmScheduler(BaseScheduler):
         Slurm partition to submit to.
     time_limit : str
         Wall-clock time limit (e.g., ``"01:00:00"``).
+    rest_url : str
+        Base URL of the slurmrestd daemon
+        (e.g., ``"https://slurm.example.com:6820"``).
+        Required only for ``submit()``.
+    jwt_token : str
+        JWT/auth token for slurmrestd.  Required only for ``submit()``.
+    api_version : str
+        slurmrestd OpenAPI version (default: ``"v0.0.40"``).
+        Adjust to match your cluster (``v0.0.39``, ``v0.0.41``, …).
+    verify_ssl : bool
+        Whether to verify the slurmrestd TLS certificate (default True).
     account : str, optional
         ``--account`` for which allocation to charge.
     constraint : str, optional
@@ -42,6 +62,10 @@ def __init__(
         *,
         partition: str = "gpu",
         time_limit: str = "02:00:00",
+        rest_url: str = "",
+        jwt_token: str = "",
+        api_version: str = _DEFAULT_API_VERSION,
+        verify_ssl: bool = True,
         account: str = "",
         constraint: str = "",
         container_runtime: str = "none",
@@ -51,6 +75,10 @@ def __init__(
     ) -> None:
         self.partition = partition
         self.time_limit = time_limit
+        self.rest_url = rest_url.rstrip("/")
+        self.jwt_token = jwt_token
+        self.api_version = api_version
+        self.verify_ssl = verify_ssl
         self.account = account
         self.constraint = constraint
         self.container_runtime = container_runtime
@@ -123,19 +151,86 @@ def render(self, spec: ProfileJobSpec) -> str:
         return "\n".join(lines)
 
     def submit(self, spec: ProfileJobSpec) -> str:
-        script = self.render(spec)
-        with tempfile.NamedTemporaryFile(
-            mode="w", suffix=".sh", delete=False
-        ) as f:
-            f.write(script)
-            f.flush()
-            result = subprocess.run(
-                ["sbatch", f.name],
-                capture_output=True,
-                text=True,
+        """Submit the job via slurmrestd REST API.
+
+        Requires ``rest_url`` and ``jwt_token`` to be set.
+        Uses only ``urllib.request`` from the standard library.
+        """
+        if not self.rest_url:
+            raise RuntimeError(
+                "--slurm-rest-url is required for --submit. "
+                "Point it at your slurmrestd endpoint "
+                "(e.g. https://slurm.example.com:6820)."
+            )
+        if not self.jwt_token:
+            raise RuntimeError(
+                "--slurm-jwt-token is required for --submit. "
+                "Generate one via: scontrol token lifespan=3600"
             )
-            if result.returncode != 0:
-                raise RuntimeError(
-                    f"sbatch failed:\n{result.stderr.strip()}"
-                )
-            return result.stdout.strip()
+
+        script = self.render(spec)
+        job_name = spec.default_job_name()
+
+        url = (
+            f"{self.rest_url}/slurm/{self.api_version}/job/submit"
+        )
+
+        # slurmrestd job submission payload
+        payload = {
+            "script": script,
+            "job": {
+                "name": job_name,
+                "partition": self.partition,
+                "time_limit": {"number": self._parse_time_minutes(), "set": True},
+                "tasks": 1,
+                "current_working_directory": "/flowsim",
+                "environment": ["PATH=/usr/local/bin:/usr/bin:/bin"],
+            },
+        }
+        if self.account:
+            payload["job"]["account"] = self.account
+
+        data = json.dumps(payload).encode()
+        headers = {
+            "Content-Type": "application/json",
+            "X-SLURM-USER-TOKEN": self.jwt_token,
+        }
+        req = urllib.request.Request(url, data=data, headers=headers, method="POST")
+
+        ctx: ssl.SSLContext | None = None
+        if not self.verify_ssl:
+            ctx = ssl.create_default_context()
+            ctx.check_hostname = False
+            ctx.verify_mode = ssl.CERT_NONE
+
+        try:
+            with urllib.request.urlopen(req, context=ctx) as resp:
+                body = json.loads(resp.read())
+        except urllib.error.HTTPError as exc:
+            detail = exc.read().decode(errors="replace")
+            raise RuntimeError(
+                f"slurmrestd returned HTTP {exc.code}:\n{detail}"
+            ) from exc
+        except urllib.error.URLError as exc:
+            raise RuntimeError(
+                f"Cannot reach slurmrestd at {self.rest_url}: {exc.reason}"
+            ) from exc
+
+        # Response contains job_id on success, errors array on failure
+        errors = body.get("errors") or []
+        if errors:
+            msgs = "; ".join(e.get("error", str(e)) for e in errors)
+            raise RuntimeError(f"slurmrestd job submit failed: {msgs}")
+
+        job_id = body.get("job_id", "unknown")
+        return f"Submitted batch job {job_id}"
+
+    def _parse_time_minutes(self) -> int:
+        """Convert HH:MM:SS time_limit to total minutes."""
+        parts = self.time_limit.split(":")
+        if len(parts) == 3:
+            h, m, s = int(parts[0]), int(parts[1]), int(parts[2])
+            return h * 60 + m + (1 if s > 0 else 0)
+        if len(parts) == 2:
+            return int(parts[0]) * 60 + int(parts[1])
+        return int(parts[0])
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 8e309dd..d4bc47c 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -106,6 +106,16 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     # -- Kubernetes-specific --
     k8s = p.add_argument_group("kubernetes options")
     k8s.add_argument("--k8s-namespace", default="default")
+    k8s.add_argument(
+        "--k8s-kubeconfig",
+        default="",
+        help="Path to kubeconfig file (empty = default lookup)",
+    )
+    k8s.add_argument(
+        "--k8s-context",
+        default="",
+        help="kubeconfig context to use",
+    )
     k8s.add_argument(
         "--k8s-pvc",
         default="",
@@ -130,6 +140,28 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     slurm = p.add_argument_group("slurm options")
     slurm.add_argument("--slurm-partition", default="gpu")
     slurm.add_argument("--slurm-time", default="02:00:00")
+    slurm.add_argument(
+        "--slurm-rest-url",
+        default="",
+        help="slurmrestd base URL (e.g. https://slurm.example.com:6820). "
+             "Required for --submit.",
+    )
+    slurm.add_argument(
+        "--slurm-jwt-token",
+        default="",
+        help="JWT token for slurmrestd auth. "
+             "Generate via: scontrol token lifespan=3600",
+    )
+    slurm.add_argument(
+        "--slurm-api-version",
+        default="v0.0.40",
+        help="slurmrestd OpenAPI version (default: v0.0.40)",
+    )
+    slurm.add_argument(
+        "--slurm-no-verify-ssl",
+        action="store_true",
+        help="Skip TLS certificate verification for slurmrestd",
+    )
     slurm.add_argument("--slurm-account", default="")
     slurm.add_argument("--slurm-constraint", default="")
     slurm.add_argument(
@@ -203,6 +235,8 @@ def _build_scheduler(args: argparse.Namespace):
             node_sel[k] = v
         return K8sScheduler(
             namespace=args.k8s_namespace,
+            kubeconfig=args.k8s_kubeconfig,
+            context=args.k8s_context,
             pvc_name=args.k8s_pvc,
             host_output_dir=args.k8s_host_output_dir,
             node_selector=node_sel,
@@ -213,6 +247,10 @@ def _build_scheduler(args: argparse.Namespace):
         return SlurmScheduler(
             partition=args.slurm_partition,
             time_limit=args.slurm_time,
+            rest_url=args.slurm_rest_url,
+            jwt_token=args.slurm_jwt_token,
+            api_version=args.slurm_api_version,
+            verify_ssl=not args.slurm_no_verify_ssl,
             account=args.slurm_account,
             constraint=args.slurm_constraint,
             container_runtime=args.slurm_container_runtime,

From 52294a60dba8f383906e3271724fef2569b17314 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:07:00 +0000
Subject: [PATCH 03/56] chore: add proper pyproject.toml with dependency
 declarations

- Core deps: requests, perfetto, numpy, pandas
- Optional dependency groups:
  k8s:  kubernetes>=27.0, PyYAML>=6.0
  slurm: (stdlib only, no extra deps)
  sim:  scalesim, scipy, torch
  viz:  matplotlib, seaborn
  api:  fastapi, pydantic, uvicorn
  dev:  black, pytest
  all:  everything
- Entry point: flowsim-submit -> scripts.submit_profile:main
- requires-python >= 3.10
---
 pyproject.toml | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 0b237ec..ecf3f9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,66 @@
+[build-system]
+requires = ["setuptools>=68.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "flowsim"
+version = "0.1.0"
+description = "Workload simulation pipeline for kernel-level inference profiling"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.10"
+dependencies = [
+    "requests>=2.28",
+    "perfetto>=0.7",
+    "numpy>=1.24",
+    "pandas>=1.5",
+]
+
+[project.optional-dependencies]
+# Scheduler backends -------------------------------------------------------
+k8s = [
+    "kubernetes>=27.0",    # K8s Python client for remote job submission
+    "PyYAML>=6.0",         # nicer YAML dry-run output (json fallback w/o this)
+]
+slurm = []                 # Slurm REST API uses stdlib urllib only
+
+# Full simulation stack (matches Dockerfile) --------------------------------
+sim = [
+    "scalesim>=2.0",
+    "scipy>=1.10",
+    "torch>=2.0",
+]
+
+# Visualization -------------------------------------------------------------
+viz = [
+    "matplotlib>=3.7",
+    "seaborn>=0.12",
+]
+
+# Backend API ---------------------------------------------------------------
+api = [
+    "fastapi>=0.100",
+    "pydantic>=2.0",
+    "uvicorn>=0.23",
+]
+
+# Development ---------------------------------------------------------------
+dev = [
+    "black>=23.0",
+    "pytest>=7.0",
+]
+
+# Everything ----------------------------------------------------------------
+all = [
+    "flowsim[k8s,sim,viz,api,dev]",
+]
+
+[project.scripts]
+flowsim-submit = "scripts.submit_profile:main"
+
 [tool.black]
 line-length = 80
 include = '\.pyi?$'
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]

From 54b615210232765964194565d6d3453e890f1f02 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:12:29 +0000
Subject: [PATCH 04/56] fix: make pip install -e . and entry point actually
 work

- Add scripts/__init__.py so 'scripts' is a findable package
- Remove sys.path hack from submit_profile.py (not needed after install)
- Add [tool.setuptools.packages.find] with explicit include list
  (excludes tests/ and backend/ from the installable package)
- Improve K8s submit error: catch both kubeconfig and in-cluster
  failures and show a single clear message with --k8s-kubeconfig hint

Verified: pip install -e '.[k8s]' -> flowsim-submit --dry-run works.
---
 pyproject.toml            |  8 ++++++++
 schedulers/k8s.py         | 12 +++++++++++-
 scripts/__init__.py       |  0
 scripts/submit_profile.py |  3 ---
 4 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 scripts/__init__.py

diff --git a/pyproject.toml b/pyproject.toml
index ecf3f9b..f92fad0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,14 @@ all = [
     "flowsim[k8s,sim,viz,api,dev]",
 ]
 
+[tool.setuptools.packages.find]
+include = [
+    "schedulers*",
+    "scripts*",
+    "simulator*",
+    "utils*",
+]
+
 [project.scripts]
 flowsim-submit = "scripts.submit_profile:main"
 
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 9b957a9..6b58ea9 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -157,7 +157,17 @@ def submit(self, spec: ProfileJobSpec) -> str:
         try:
             k8s_config.load_kube_config(**config_kwargs)
         except k8s_config.ConfigException:
-            k8s_config.load_incluster_config()
+            try:
+                k8s_config.load_incluster_config()
+            except k8s_config.ConfigException:
+                hint = ""
+                if not self.kubeconfig:
+                    hint = " Try --k8s-kubeconfig /path/to/kubeconfig."
+                raise RuntimeError(
+                    "No valid Kubernetes configuration found. "
+                    "Checked kubeconfig file and in-cluster environment."
+                    + hint
+                )
 
         body = self._build_job_dict(spec)
         batch_api = k8s_client.BatchV1Api()
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index d4bc47c..2e24318 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -41,9 +41,6 @@
 import argparse
 import sys
 
-# Allow running from the repo root as ``python scripts/submit_profile.py``
-sys.path.insert(0, ".")
-
 from schedulers.base import ProfileJobSpec
 from schedulers.k8s import K8sScheduler
 from schedulers.slurm import SlurmScheduler

From 9e1c1f49c0db69215d64a28db39f5718e1c96f5b Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:16:11 +0000
Subject: [PATCH 05/56] refactor: unified CLI as 'flowsim submit' instead of
 'flowsim-submit'

- Add scripts/cli.py with subcommand routing (flowsim {submit, ...})
- Entry point changed: flowsim-submit -> flowsim
- 'flowsim submit' delegates to submit_profile.main()
- Extensible for future subcommands (profile, parse, simulate)
---
 pyproject.toml |  2 +-
 scripts/cli.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 scripts/cli.py

diff --git a/pyproject.toml b/pyproject.toml
index f92fad0..feade94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,7 +64,7 @@ include = [
 ]
 
 [project.scripts]
-flowsim-submit = "scripts.submit_profile:main"
+flowsim = "scripts.cli:main"
 
 [tool.black]
 line-length = 80
diff --git a/scripts/cli.py b/scripts/cli.py
new file mode 100644
index 0000000..5ea74bd
--- /dev/null
+++ b/scripts/cli.py
@@ -0,0 +1,44 @@
+"""FlowSim CLI — unified entry point.
+
+Usage::
+
+    flowsim submit --scheduler k8s --collect perf --model-path ... --dry-run
+    flowsim submit --scheduler slurm --collect perf --model-path ... --submit
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="flowsim",
+        description="FlowSim: workload simulation pipeline CLI",
+    )
+    sub = parser.add_subparsers(dest="command")
+    sub.required = True
+
+    # ---- submit ----
+    sub.add_parser(
+        "submit",
+        help="Submit a profiling job to K8s or Slurm",
+        add_help=False,  # submit_profile has its own --help
+    )
+
+    # Parse only the subcommand, pass the rest through
+    args, remaining = parser.parse_known_args(argv)
+
+    if args.command == "submit":
+        from scripts.submit_profile import main as submit_main
+
+        submit_main(remaining)
+        return 0
+
+    parser.print_help()
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From d37d8f3a22fd5463691981e0807804eba6878414 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:18:20 +0000
Subject: [PATCH 06/56] fix: 'flowsim submit' submits by default, --dry-run to
 preview

Removed the redundant --submit flag. The subcommand name already implies
submission; --dry-run is the opt-out.
---
 scripts/submit_profile.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 2e24318..41d6f54 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -182,17 +182,10 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     )
 
     # -- Action --
-    action = p.add_mutually_exclusive_group()
-    action.add_argument(
+    p.add_argument(
         "--dry-run",
         action="store_true",
-        default=True,
-        help="Print the rendered manifest to stdout (default)",
-    )
-    action.add_argument(
-        "--submit",
-        action="store_true",
-        help="Actually submit the job to the cluster",
+        help="Only print the rendered manifest; do not submit",
     )
 
     return p.parse_args(argv)
@@ -262,11 +255,11 @@ def main(argv: list[str] | None = None) -> None:
     spec = _build_spec(args)
     scheduler = _build_scheduler(args)
 
-    if args.submit:
+    if args.dry_run:
+        print(scheduler.dry_run(spec))
+    else:
         result = scheduler.submit(spec)
         print(result)
-    else:
-        print(scheduler.dry_run(spec))
 
 
 if __name__ == "__main__":

From af48f0c0621c5f226c1aff3980a3383a970e94de Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:19:40 +0000
Subject: [PATCH 07/56] fix: validate cluster connection params before submit

- Slurm: fail fast if --slurm-rest-url or --slurm-jwt-token missing
- K8s: warn to stderr when no explicit kubeconfig/context provided
- --dry-run skips validation (no cluster needed for manifest preview)
---
 scripts/submit_profile.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 41d6f54..9654e40 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -253,6 +253,11 @@ def _build_scheduler(args: argparse.Namespace):
 def main(argv: list[str] | None = None) -> None:
     args = _parse_args(argv)
     spec = _build_spec(args)
+
+    # Validate connection params before building the scheduler
+    if not args.dry_run:
+        _validate_connection(args)
+
     scheduler = _build_scheduler(args)
 
     if args.dry_run:
@@ -262,5 +267,31 @@ def main(argv: list[str] | None = None) -> None:
         print(result)
 
 
+def _validate_connection(args: argparse.Namespace) -> None:
+    """Fail fast if required cluster connection params are missing."""
+    if args.scheduler == "k8s":
+        # kubernetes client can auto-discover from ~/.kube/config or
+        # in-cluster env, but warn if nothing explicit is given
+        if not args.k8s_kubeconfig and not args.k8s_context:
+            print(
+                "Note: no --k8s-kubeconfig or --k8s-context specified. "
+                "Will try ~/.kube/config and in-cluster auto-discovery.",
+                file=sys.stderr,
+            )
+    elif args.scheduler == "slurm":
+        missing = []
+        if not args.slurm_rest_url:
+            missing.append("--slurm-rest-url")
+        if not args.slurm_jwt_token:
+            missing.append("--slurm-jwt-token")
+        if missing:
+            sys.exit(
+                f"Error: {', '.join(missing)} required for Slurm submission.\n"
+                f"  --slurm-rest-url: slurmrestd endpoint "
+                f"(e.g. https://slurm.example.com:6820)\n"
+                f"  --slurm-jwt-token: generate via 'scontrol token lifespan=3600'"
+            )
+
+
 if __name__ == "__main__":
     main()

From 87a2c332c1d73c5460efbb83cb513280a4bb28bc Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:21:22 +0000
Subject: [PATCH 08/56] feat: support env vars for cluster connection params

Connection params now read from environment variables as defaults,
so you don't have to pass them every invocation:

K8s:
  KUBECONFIG             -> --k8s-kubeconfig
  FLOWSIM_K8S_NAMESPACE  -> --k8s-namespace
  FLOWSIM_K8S_CONTEXT    -> --k8s-context

Slurm:
  FLOWSIM_SLURM_REST_URL     -> --slurm-rest-url
  FLOWSIM_SLURM_JWT_TOKEN    -> --slurm-jwt-token
  FLOWSIM_SLURM_PARTITION    -> --slurm-partition
  FLOWSIM_SLURM_TIME         -> --slurm-time
  FLOWSIM_SLURM_API_VERSION  -> --slurm-api-version

CLI flags override env vars. Env var names shown in --help.
---
 scripts/submit_profile.py | 41 +++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 9654e40..0d95efc 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -39,6 +39,7 @@
 from __future__ import annotations
 
 import argparse
+import os
 import sys
 
 from schedulers.base import ProfileJobSpec
@@ -102,16 +103,20 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
 
     # -- Kubernetes-specific --
     k8s = p.add_argument_group("kubernetes options")
-    k8s.add_argument("--k8s-namespace", default="default")
+    k8s.add_argument(
+        "--k8s-namespace",
+        default=os.environ.get("FLOWSIM_K8S_NAMESPACE", "default"),
+        help="K8s namespace (env: FLOWSIM_K8S_NAMESPACE)",
+    )
     k8s.add_argument(
         "--k8s-kubeconfig",
-        default="",
-        help="Path to kubeconfig file (empty = default lookup)",
+        default=os.environ.get("KUBECONFIG", ""),
+        help="Path to kubeconfig file (env: KUBECONFIG)",
     )
     k8s.add_argument(
         "--k8s-context",
-        default="",
-        help="kubeconfig context to use",
+        default=os.environ.get("FLOWSIM_K8S_CONTEXT", ""),
+        help="kubeconfig context to use (env: FLOWSIM_K8S_CONTEXT)",
     )
     k8s.add_argument(
         "--k8s-pvc",
@@ -135,24 +140,30 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
 
     # -- Slurm-specific --
     slurm = p.add_argument_group("slurm options")
-    slurm.add_argument("--slurm-partition", default="gpu")
-    slurm.add_argument("--slurm-time", default="02:00:00")
+    slurm.add_argument(
+        "--slurm-partition",
+        default=os.environ.get("FLOWSIM_SLURM_PARTITION", "gpu"),
+        help="Slurm partition (env: FLOWSIM_SLURM_PARTITION)",
+    )
+    slurm.add_argument(
+        "--slurm-time",
+        default=os.environ.get("FLOWSIM_SLURM_TIME", "02:00:00"),
+        help="Wall time limit (env: FLOWSIM_SLURM_TIME)",
+    )
     slurm.add_argument(
         "--slurm-rest-url",
-        default="",
-        help="slurmrestd base URL (e.g. https://slurm.example.com:6820). "
-             "Required for --submit.",
+        default=os.environ.get("FLOWSIM_SLURM_REST_URL", ""),
+        help="slurmrestd base URL (env: FLOWSIM_SLURM_REST_URL)",
     )
     slurm.add_argument(
         "--slurm-jwt-token",
-        default="",
-        help="JWT token for slurmrestd auth. "
-             "Generate via: scontrol token lifespan=3600",
+        default=os.environ.get("FLOWSIM_SLURM_JWT_TOKEN", ""),
+        help="JWT token for slurmrestd (env: FLOWSIM_SLURM_JWT_TOKEN)",
     )
     slurm.add_argument(
         "--slurm-api-version",
-        default="v0.0.40",
-        help="slurmrestd OpenAPI version (default: v0.0.40)",
+        default=os.environ.get("FLOWSIM_SLURM_API_VERSION", "v0.0.40"),
+        help="slurmrestd API version (env: FLOWSIM_SLURM_API_VERSION)",
     )
     slurm.add_argument(
         "--slurm-no-verify-ssl",

From 63ab491b0ed5b25c3101e982fd00722993ad4aea Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:30:43 +0000
Subject: [PATCH 09/56] feat: config-first approach with flowsim init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No more built-in defaults for cluster connection params. Users must
configure before submitting:

  flowsim init                 # copies templates to ~/.flowsim/
  vim ~/.flowsim/k8s.yaml      # fill in kubeconfig, namespace, etc.
  vim ~/.flowsim/slurm.yaml    # fill in rest_url, partition, etc.
  flowsim submit ...           # works

Changes:
- Add 'flowsim init' subcommand (copies templates, --force to overwrite)
- Split config into ~/.flowsim/k8s.yaml and ~/.flowsim/slurm.yaml
- Templates have empty REQUIRED fields — submit fails if unfilled
- Config loader: schedulers/config.py with per-scheduler load functions
- Priority: CLI flag > env var > config file (no silent fallbacks)
- Slurm jwt_token_cmd: execute a command to get token at submit time
- --dry-run skips all validation (no config needed for preview)
---
 schedulers/config.py            | 108 ++++++++++++++++++++++++++++++++
 schedulers/templates/k8s.yaml   |  27 ++++++++
 schedulers/templates/slurm.yaml |  30 +++++++++
 scripts/cli.py                  |  56 +++++++++++++++--
 scripts/submit_profile.py       | 108 ++++++++++++++++++++++----------
 5 files changed, 292 insertions(+), 37 deletions(-)
 create mode 100644 schedulers/config.py
 create mode 100644 schedulers/templates/k8s.yaml
 create mode 100644 schedulers/templates/slurm.yaml

diff --git a/schedulers/config.py b/schedulers/config.py
new file mode 100644
index 0000000..011bf42
--- /dev/null
+++ b/schedulers/config.py
@@ -0,0 +1,108 @@
+"""Load FlowSim scheduler config from per-scheduler YAML files.
+
+Config file lookup (per scheduler):
+
+K8s:
+  1. ``FLOWSIM_K8S_CONFIG`` env var
+  2. ``~/.flowsim/k8s.yaml``
+
+Slurm:
+  1. ``FLOWSIM_SLURM_CONFIG`` env var
+  2. ``~/.flowsim/slurm.yaml``
+
+Priority (highest → lowest):
+    CLI flag  >  env var  >  config file  >  built-in default
+
+Template files are in ``schedulers/templates/k8s.yaml`` and
+``schedulers/templates/slurm.yaml``.  Copy to ``~/.flowsim/`` and edit.
+
+For Slurm, use ``jwt_token_cmd`` instead of ``jwt_token`` to avoid
+storing secrets in plaintext.  The command is executed at submit time
+and its stdout is used as the token.
+"""
+
+from __future__ import annotations
+
+import os
+import shlex
+import subprocess
+from pathlib import Path
+
+# Optional: try PyYAML, fall back to JSON
+try:
+    import yaml as _yaml
+
+    def _load_yaml(path: Path) -> dict:
+        with open(path) as f:
+            return _yaml.safe_load(f) or {}
+
+except ImportError:
+    import json as _json
+
+    def _load_yaml(path: Path) -> dict:  # type: ignore[misc]
+        """Fallback: accept JSON (valid YAML 1.2 subset)."""
+        with open(path) as f:
+            return _json.load(f)
+
+
+_CONFIG_DIR = Path.home() / ".flowsim"
+
+
+def _resolve_path(env_var: str, filename: str) -> Path | None:
+    """Return the config file path, or None if it doesn't exist."""
+    env = os.environ.get(env_var)
+    if env:
+        p = Path(env)
+        return p if p.is_file() else None
+    default = _CONFIG_DIR / filename
+    return default if default.is_file() else None
+
+
+def load_k8s_config() -> dict:
+    """Load ``~/.flowsim/k8s.yaml`` (or ``FLOWSIM_K8S_CONFIG``)."""
+    path = _resolve_path("FLOWSIM_K8S_CONFIG", "k8s.yaml")
+    if path is None:
+        return {}
+    try:
+        return _load_yaml(path)
+    except Exception:
+        return {}
+
+
+def load_slurm_config() -> dict:
+    """Load ``~/.flowsim/slurm.yaml`` (or ``FLOWSIM_SLURM_CONFIG``)."""
+    path = _resolve_path("FLOWSIM_SLURM_CONFIG", "slurm.yaml")
+    if path is None:
+        return {}
+    try:
+        return _load_yaml(path)
+    except Exception:
+        return {}
+
+
+def resolve_jwt_token(slurm_cfg: dict) -> str:
+    """Get the JWT token from config, executing jwt_token_cmd if needed."""
+    token = slurm_cfg.get("jwt_token", "")
+    if token:
+        return str(token)
+
+    cmd = slurm_cfg.get("jwt_token_cmd", "")
+    if cmd:
+        result = subprocess.run(
+            shlex.split(cmd),
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip()
+
+    return ""
+
+
+def cfg_get(cfg: dict, key: str, fallback: str = "") -> str:
+    """Get a value from a flat config dict, or fallback."""
+    val = cfg.get(key)
+    if val is not None:
+        return str(val)
+    return fallback
diff --git a/schedulers/templates/k8s.yaml b/schedulers/templates/k8s.yaml
new file mode 100644
index 0000000..bac3a77
--- /dev/null
+++ b/schedulers/templates/k8s.yaml
@@ -0,0 +1,27 @@
+# FlowSim Kubernetes scheduler config
+#
+# Created by: flowsim init
+# Location:   ~/.flowsim/k8s.yaml
+#
+# Fill in the values below, then submit with:
+#   flowsim submit --scheduler k8s --collect perf --model-path ...
+#
+# CLI flags and env vars can override individual values.
+
+# REQUIRED — path to your kubeconfig file
+kubeconfig: ""              # e.g. /home/me/.kube/prod.kubeconfig
+
+# REQUIRED — which context and namespace to use
+context: ""                 # e.g. prod-cluster (empty = current-context)
+namespace: ""               # e.g. ml-team
+
+# Output storage (pick one or leave both empty for emptyDir)
+pvc: ""                     # PVC name for trace output
+host_output_dir: ""         # hostPath alternative to PVC
+
+# Optional
+service_account: ""
+shm_size: "16Gi"
+# node_selector:
+#   gpu: a100
+#   tier: high
diff --git a/schedulers/templates/slurm.yaml b/schedulers/templates/slurm.yaml
new file mode 100644
index 0000000..0910f4a
--- /dev/null
+++ b/schedulers/templates/slurm.yaml
@@ -0,0 +1,30 @@
+# FlowSim Slurm scheduler config
+#
+# Created by: flowsim init
+# Location:   ~/.flowsim/slurm.yaml
+#
+# Fill in the values below, then submit with:
+#   flowsim submit --scheduler slurm --collect perf --model-path ...
+#
+# CLI flags and env vars can override individual values.
+
+# REQUIRED — slurmrestd endpoint
+rest_url: ""                # e.g. https://slurm.corp.com:6820
+
+# REQUIRED — authentication (pick one)
+# jwt_token: ""             # not recommended — stored in plaintext
+jwt_token_cmd: ""           # e.g. "scontrol token lifespan=3600"
+
+# REQUIRED — cluster settings
+partition: ""               # e.g. gpu-h100
+account: ""                 # e.g. my-project
+
+# Optional
+api_version: "v0.0.40"
+time: "02:00:00"
+constraint: ""
+container_runtime: "none"   # docker | enroot | none
+container_mounts: ""
+# modules:
+#   - cuda/12.6
+#   - anaconda3
diff --git a/scripts/cli.py b/scripts/cli.py
index 5ea74bd..dd2d825 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -2,14 +2,54 @@
 
 Usage::
 
-    flowsim submit --scheduler k8s --collect perf --model-path ... --dry-run
-    flowsim submit --scheduler slurm --collect perf --model-path ... --submit
+    flowsim init                           # set up ~/.flowsim/ config files
+    flowsim submit --scheduler k8s ...     # submit a profiling job
+    flowsim submit ... --dry-run           # preview manifest without submitting
 """
 
 from __future__ import annotations
 
 import argparse
+import shutil
 import sys
+from pathlib import Path
+
+
+_TEMPLATE_DIR = Path(__file__).resolve().parent.parent / "schedulers" / "templates"
+_CONFIG_DIR = Path.home() / ".flowsim"
+
+
+def _cmd_init(argv: list[str]) -> int:
+    """Copy config templates to ~/.flowsim/."""
+    parser = argparse.ArgumentParser(
+        prog="flowsim init",
+        description="Initialize ~/.flowsim/ with scheduler config templates.",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Overwrite existing config files",
+    )
+    args = parser.parse_args(argv)
+
+    _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+
+    templates = list(_TEMPLATE_DIR.glob("*.yaml"))
+    if not templates:
+        print(f"Error: no templates found in {_TEMPLATE_DIR}", file=sys.stderr)
+        return 1
+
+    for src in templates:
+        dst = _CONFIG_DIR / src.name
+        if dst.exists() and not args.force:
+            print(f"  skip  {dst}  (already exists, use --force to overwrite)")
+        else:
+            shutil.copy2(src, dst)
+            print(f"  wrote {dst}")
+
+    print(f"\nEdit the files in {_CONFIG_DIR}/ to configure your cluster,")
+    print("then run: flowsim submit --scheduler <k8s|slurm> ...")
+    return 0
 
 
 def main(argv: list[str] | None = None) -> int:
@@ -20,16 +60,22 @@ def main(argv: list[str] | None = None) -> int:
     sub = parser.add_subparsers(dest="command")
     sub.required = True
 
-    # ---- submit ----
+    sub.add_parser(
+        "init",
+        help="Initialize ~/.flowsim/ with config templates",
+        add_help=False,
+    )
     sub.add_parser(
         "submit",
         help="Submit a profiling job to K8s or Slurm",
-        add_help=False,  # submit_profile has its own --help
+        add_help=False,
     )
 
-    # Parse only the subcommand, pass the rest through
     args, remaining = parser.parse_known_args(argv)
 
+    if args.command == "init":
+        return _cmd_init(remaining)
+
     if args.command == "submit":
         from scripts.submit_profile import main as submit_main
 
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 0d95efc..5a41349 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -43,11 +43,21 @@
 import sys
 
 from schedulers.base import ProfileJobSpec
+from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_jwt_token
 from schedulers.k8s import K8sScheduler
 from schedulers.slurm import SlurmScheduler
 
 
+def _d(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
+    """Resolve default: env var > config file > fallback."""
+    return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
+
+
 def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    # Load per-scheduler config files for defaults
+    k8s_cfg = load_k8s_config()
+    slurm_cfg = load_slurm_config()
+
     p = argparse.ArgumentParser(
         description="Submit FlowSim profiling jobs to K8s or Slurm.",
         formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -102,30 +112,30 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     infra.add_argument("--job-name", default="")
 
     # -- Kubernetes-specific --
-    k8s = p.add_argument_group("kubernetes options")
+    k8s = p.add_argument_group("kubernetes options (config: ~/.flowsim/k8s.yaml)")
     k8s.add_argument(
         "--k8s-namespace",
-        default=os.environ.get("FLOWSIM_K8S_NAMESPACE", "default"),
+        default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
         help="K8s namespace (env: FLOWSIM_K8S_NAMESPACE)",
     )
     k8s.add_argument(
         "--k8s-kubeconfig",
-        default=os.environ.get("KUBECONFIG", ""),
+        default=_d("KUBECONFIG", k8s_cfg, "kubeconfig", ""),
         help="Path to kubeconfig file (env: KUBECONFIG)",
     )
     k8s.add_argument(
         "--k8s-context",
-        default=os.environ.get("FLOWSIM_K8S_CONTEXT", ""),
-        help="kubeconfig context to use (env: FLOWSIM_K8S_CONTEXT)",
+        default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
+        help="kubeconfig context (env: FLOWSIM_K8S_CONTEXT)",
     )
     k8s.add_argument(
         "--k8s-pvc",
-        default="",
+        default=cfg_get(k8s_cfg, "pvc", ""),
         help="PVC name for output volume (omit for emptyDir)",
     )
     k8s.add_argument(
         "--k8s-host-output-dir",
-        default="",
+        default=cfg_get(k8s_cfg, "host_output_dir", ""),
         help="hostPath for output (used when --k8s-pvc is empty)",
     )
     k8s.add_argument(
@@ -135,34 +145,40 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         metavar="KEY=VALUE",
         help="Node selector labels (repeatable)",
     )
-    k8s.add_argument("--k8s-service-account", default="")
-    k8s.add_argument("--k8s-shm-size", default="16Gi")
+    k8s.add_argument(
+        "--k8s-service-account",
+        default=cfg_get(k8s_cfg, "service_account", ""),
+    )
+    k8s.add_argument(
+        "--k8s-shm-size",
+        default=cfg_get(k8s_cfg, "shm_size", "16Gi"),
+    )
 
     # -- Slurm-specific --
-    slurm = p.add_argument_group("slurm options")
+    slurm = p.add_argument_group("slurm options (config: ~/.flowsim/slurm.yaml)")
     slurm.add_argument(
         "--slurm-partition",
-        default=os.environ.get("FLOWSIM_SLURM_PARTITION", "gpu"),
+        default=_d("FLOWSIM_SLURM_PARTITION", slurm_cfg, "partition", ""),
         help="Slurm partition (env: FLOWSIM_SLURM_PARTITION)",
     )
     slurm.add_argument(
         "--slurm-time",
-        default=os.environ.get("FLOWSIM_SLURM_TIME", "02:00:00"),
+        default=_d("FLOWSIM_SLURM_TIME", slurm_cfg, "time", "02:00:00"),
         help="Wall time limit (env: FLOWSIM_SLURM_TIME)",
     )
     slurm.add_argument(
         "--slurm-rest-url",
-        default=os.environ.get("FLOWSIM_SLURM_REST_URL", ""),
+        default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
         help="slurmrestd base URL (env: FLOWSIM_SLURM_REST_URL)",
     )
     slurm.add_argument(
         "--slurm-jwt-token",
-        default=os.environ.get("FLOWSIM_SLURM_JWT_TOKEN", ""),
+        default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
         help="JWT token for slurmrestd (env: FLOWSIM_SLURM_JWT_TOKEN)",
     )
     slurm.add_argument(
         "--slurm-api-version",
-        default=os.environ.get("FLOWSIM_SLURM_API_VERSION", "v0.0.40"),
+        default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
         help="slurmrestd API version (env: FLOWSIM_SLURM_API_VERSION)",
     )
     slurm.add_argument(
@@ -170,19 +186,30 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         action="store_true",
         help="Skip TLS certificate verification for slurmrestd",
     )
-    slurm.add_argument("--slurm-account", default="")
-    slurm.add_argument("--slurm-constraint", default="")
+    slurm.add_argument(
+        "--slurm-account",
+        default=cfg_get(slurm_cfg, "account", ""),
+    )
+    slurm.add_argument(
+        "--slurm-constraint",
+        default=cfg_get(slurm_cfg, "constraint", ""),
+    )
     slurm.add_argument(
         "--slurm-container-runtime",
         choices=["docker", "enroot", "none"],
-        default="none",
+        default=cfg_get(slurm_cfg, "container_runtime", "none"),
+    )
+    slurm.add_argument(
+        "--slurm-container-mounts",
+        default=cfg_get(slurm_cfg, "container_mounts", ""),
     )
-    slurm.add_argument("--slurm-container-mounts", default="")
+    # Modules from config (list) + CLI (append)
+    cfg_modules = slurm_cfg.get("modules") if isinstance(slurm_cfg.get("modules"), list) else []
     slurm.add_argument(
         "--slurm-module",
         action="append",
-        default=[],
-        help="Modules to load (repeatable)",
+        default=[str(m) for m in cfg_modules],
+        help="Modules to load (repeatable, merged with config)",
     )
     slurm.add_argument(
         "--slurm-extra-sbatch",
@@ -263,12 +290,19 @@ def _build_scheduler(args: argparse.Namespace):
 
 def main(argv: list[str] | None = None) -> None:
     args = _parse_args(argv)
-    spec = _build_spec(args)
 
-    # Validate connection params before building the scheduler
+    # Resolve Slurm JWT token from jwt_token_cmd in config if needed
+    if args.scheduler == "slurm" and not args.slurm_jwt_token:
+        slurm_cfg = load_slurm_config()
+        token = resolve_jwt_token(slurm_cfg)
+        if token:
+            args.slurm_jwt_token = token
+
+    # Validate required connection params before submit
     if not args.dry_run:
         _validate_connection(args)
 
+    spec = _build_spec(args)
     scheduler = _build_scheduler(args)
 
     if args.dry_run:
@@ -278,29 +312,39 @@ def main(argv: list[str] | None = None) -> None:
         print(result)
 
 
+_INIT_HINT = "Run 'flowsim init' to create config files."
+
+
 def _validate_connection(args: argparse.Namespace) -> None:
     """Fail fast if required cluster connection params are missing."""
     if args.scheduler == "k8s":
-        # kubernetes client can auto-discover from ~/.kube/config or
-        # in-cluster env, but warn if nothing explicit is given
+        if not args.k8s_namespace:
+            sys.exit(
+                "Error: K8s namespace not set.\n"
+                "Set it in ~/.flowsim/k8s.yaml, FLOWSIM_K8S_NAMESPACE env var,\n"
+                f"or --k8s-namespace flag. {_INIT_HINT}"
+            )
+        # kubeconfig is optional (in-cluster auto-discovery), but warn
         if not args.k8s_kubeconfig and not args.k8s_context:
             print(
-                "Note: no --k8s-kubeconfig or --k8s-context specified. "
+                "Note: no kubeconfig or context specified. "
                 "Will try ~/.kube/config and in-cluster auto-discovery.",
                 file=sys.stderr,
             )
     elif args.scheduler == "slurm":
         missing = []
         if not args.slurm_rest_url:
-            missing.append("--slurm-rest-url")
+            missing.append("rest_url (--slurm-rest-url)")
         if not args.slurm_jwt_token:
-            missing.append("--slurm-jwt-token")
+            missing.append("jwt_token/jwt_token_cmd (--slurm-jwt-token)")
+        if not args.slurm_partition:
+            missing.append("partition (--slurm-partition)")
         if missing:
             sys.exit(
-                f"Error: {', '.join(missing)} required for Slurm submission.\n"
-                f"  --slurm-rest-url: slurmrestd endpoint "
-                f"(e.g. https://slurm.example.com:6820)\n"
-                f"  --slurm-jwt-token: generate via 'scontrol token lifespan=3600'"
+                "Error: missing required Slurm config:\n"
+                + "\n".join(f"  - {m}" for m in missing)
+                + f"\n\nSet them in ~/.flowsim/slurm.yaml or via CLI flags.\n"
+                + _INIT_HINT
             )
 
 

From 7116fef3de3586e258949bf0b33e0e931ca27fa1 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:36:57 +0000
Subject: [PATCH 10/56] refactor: flowsim init takes CLI args instead of
 interactive prompts

- flowsim init k8s --kubeconfig ... --namespace ...
- flowsim init slurm --rest-url ... --partition ... --account ...
- Required fields enforced by argparse, --help shows everything
- --force to overwrite existing config
- Demote --dry-run to [debug] in submit help text
- Remove template-copy approach, use _save_yaml() directly
---
 schedulers/config.py      |  14 ++++
 scripts/cli.py            | 136 ++++++++++++++++++++++++++++++--------
 scripts/submit_profile.py |   2 +-
 3 files changed, 124 insertions(+), 28 deletions(-)

diff --git a/schedulers/config.py b/schedulers/config.py
index 011bf42..4f35494 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -48,6 +48,20 @@ def _load_yaml(path: Path) -> dict:  # type: ignore[misc]
 _CONFIG_DIR = Path.home() / ".flowsim"
 
 
+def _save_yaml(path: Path, data: dict) -> None:
+    """Write a dict to a YAML file (uses PyYAML if available, else JSON)."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        import yaml as _y
+        with open(path, "w") as f:
+            _y.safe_dump(data, f, default_flow_style=False, sort_keys=False)
+    except ImportError:
+        import json as _j
+        with open(path, "w") as f:
+            _j.dump(data, f, indent=2, ensure_ascii=False)
+            f.write("\n")
+
+
 def _resolve_path(env_var: str, filename: str) -> Path | None:
     """Return the config file path, or None if it doesn't exist."""
     env = os.environ.get(env_var)
diff --git a/scripts/cli.py b/scripts/cli.py
index dd2d825..5cd370a 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -2,53 +2,135 @@
 
 Usage::
 
-    flowsim init                           # set up ~/.flowsim/ config files
-    flowsim submit --scheduler k8s ...     # submit a profiling job
-    flowsim submit ... --dry-run           # preview manifest without submitting
+    flowsim init k8s --kubeconfig ~/.kube/config --namespace ml-team ...
+    flowsim init slurm --rest-url https://slurm:6820 --partition gpu ...
+    flowsim submit --scheduler k8s --collect perf --model-path ...
+    flowsim submit ... --dry-run   # debug: preview manifest
 """
 
 from __future__ import annotations
 
 import argparse
-import shutil
 import sys
 from pathlib import Path
 
 
-_TEMPLATE_DIR = Path(__file__).resolve().parent.parent / "schedulers" / "templates"
 _CONFIG_DIR = Path.home() / ".flowsim"
 
 
+def _init_k8s_parser(sub: argparse._SubParsersAction) -> None:
+    p = sub.add_parser("k8s", help="Configure Kubernetes scheduler")
+    p.add_argument("--kubeconfig", required=True,
+                   help="Path to kubeconfig file (REQUIRED)")
+    p.add_argument("--context", default="",
+                   help="Kubeconfig context (empty = current-context)")
+    p.add_argument("--namespace", required=True,
+                   help="Kubernetes namespace (REQUIRED)")
+    p.add_argument("--pvc", default="",
+                   help="PVC name for trace output")
+    p.add_argument("--host-output-dir", default="",
+                   help="hostPath alternative to PVC")
+    p.add_argument("--service-account", default="",
+                   help="Service account for the job pod")
+    p.add_argument("--shm-size", default="16Gi",
+                   help="Shared memory size (default: 16Gi)")
+    p.add_argument("--force", action="store_true",
+                   help="Overwrite existing config file")
+
+
+def _init_slurm_parser(sub: argparse._SubParsersAction) -> None:
+    p = sub.add_parser("slurm", help="Configure Slurm scheduler")
+    p.add_argument("--rest-url", required=True,
+                   help="slurmrestd endpoint URL (REQUIRED)")
+    p.add_argument("--partition", required=True,
+                   help="Slurm partition (REQUIRED)")
+    p.add_argument("--account", required=True,
+                   help="Slurm account (REQUIRED)")
+    p.add_argument("--jwt-token-cmd", default="",
+                   help='Command to get JWT token, e.g. "scontrol token lifespan=3600"')
+    p.add_argument("--jwt-token", default="",
+                   help="Static JWT token (not recommended)")
+    p.add_argument("--api-version", default="v0.0.40",
+                   help="slurmrestd API version (default: v0.0.40)")
+    p.add_argument("--time", default="02:00:00",
+                   help="Job time limit (default: 02:00:00)")
+    p.add_argument("--constraint", default="",
+                   help="Node constraint")
+    p.add_argument("--container-runtime", default="none",
+                   choices=["docker", "enroot", "none"],
+                   help="Container runtime (default: none)")
+    p.add_argument("--container-mounts", default="",
+                   help="Container mount spec")
+    p.add_argument("--force", action="store_true",
+                   help="Overwrite existing config file")
+
+
 def _cmd_init(argv: list[str]) -> int:
-    """Copy config templates to ~/.flowsim/."""
+    """Save scheduler config to ~/.flowsim/ from CLI args."""
+    from schedulers.config import _save_yaml
+
     parser = argparse.ArgumentParser(
         prog="flowsim init",
-        description="Initialize ~/.flowsim/ with scheduler config templates.",
-    )
-    parser.add_argument(
-        "--force",
-        action="store_true",
-        help="Overwrite existing config files",
+        description=(
+            "Configure a scheduler and save to ~/.flowsim/.\n\n"
+            "Examples:\n"
+            "  flowsim init k8s --kubeconfig ~/.kube/config --namespace ml-team\n"
+            "  flowsim init slurm --rest-url https://slurm:6820 "
+            "--partition gpu --account proj"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    args = parser.parse_args(argv)
+    sub = parser.add_subparsers(dest="scheduler")
+    sub.required = True
+    _init_k8s_parser(sub)
+    _init_slurm_parser(sub)
 
-    _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    args = parser.parse_args(argv)
 
-    templates = list(_TEMPLATE_DIR.glob("*.yaml"))
-    if not templates:
-        print(f"Error: no templates found in {_TEMPLATE_DIR}", file=sys.stderr)
+    if args.scheduler == "k8s":
+        kube_path = Path(args.kubeconfig).expanduser()
+        if not kube_path.is_file():
+            print(f"Error: kubeconfig not found: {kube_path}", file=sys.stderr)
+            return 1
+        cfg = {
+            "kubeconfig": str(kube_path),
+            "context": args.context,
+            "namespace": args.namespace,
+            "pvc": args.pvc,
+            "host_output_dir": args.host_output_dir,
+            "service_account": args.service_account,
+            "shm_size": args.shm_size,
+        }
+        dst = _CONFIG_DIR / "k8s.yaml"
+
+    elif args.scheduler == "slurm":
+        if not args.jwt_token_cmd and not args.jwt_token:
+            print("Error: provide --jwt-token-cmd or --jwt-token", file=sys.stderr)
+            return 1
+        cfg = {
+            "rest_url": args.rest_url,
+            "jwt_token_cmd": args.jwt_token_cmd,
+            "jwt_token": args.jwt_token,
+            "partition": args.partition,
+            "account": args.account,
+            "api_version": args.api_version,
+            "time": args.time,
+            "constraint": args.constraint,
+            "container_runtime": args.container_runtime,
+            "container_mounts": args.container_mounts,
+        }
+        dst = _CONFIG_DIR / "slurm.yaml"
+    else:
+        parser.print_help()
         return 1
 
-    for src in templates:
-        dst = _CONFIG_DIR / src.name
-        if dst.exists() and not args.force:
-            print(f"  skip  {dst}  (already exists, use --force to overwrite)")
-        else:
-            shutil.copy2(src, dst)
-            print(f"  wrote {dst}")
+    if dst.exists() and not args.force:
+        print(f"Error: {dst} already exists (use --force to overwrite)",
+              file=sys.stderr)
+        return 1
 
-    print(f"\nEdit the files in {_CONFIG_DIR}/ to configure your cluster,")
-    print("then run: flowsim submit --scheduler <k8s|slurm> ...")
+    _save_yaml(dst, cfg)
+    print(f"Saved {dst}")
     return 0
 
 
@@ -62,7 +144,7 @@ def main(argv: list[str] | None = None) -> int:
 
     sub.add_parser(
         "init",
-        help="Initialize ~/.flowsim/ with config templates",
+        help="Configure a scheduler (k8s/slurm) and save to ~/.flowsim/",
         add_help=False,
     )
     sub.add_parser(
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 5a41349..8400bec 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -223,7 +223,7 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     p.add_argument(
         "--dry-run",
         action="store_true",
-        help="Only print the rendered manifest; do not submit",
+        help="[debug] Print rendered manifest without submitting",
     )
 
     return p.parse_args(argv)

From 8987c389f446754fe42dce116c4843ec1b1d54e6 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:50:20 +0000
Subject: [PATCH 11/56] feat: PD disaggregation support + multi-node Docker
 test infra

Docker test environments:
- kind-multi-node.yaml: 1 control-plane + 2 workers (GPU 0, GPU 1)
- slurm-compose.yaml: slurmctld + 2 slurmd (GPU 0, GPU 1) + slurmrestd
- slurm-node.dockerfile + slurm.conf: Slurm 23.11 with JWT auth

PD disaggregation:
- ProfileJobSpec: disagg_mode, disagg_transfer_backend, disagg_bootstrap_port,
  disagg_prefill_pp, disagg_ib_device
- as_prefill() / as_decode() helpers for creating PD pairs
- BaseScheduler: render_pd_pair() and submit_pd_pair()
- CLI: --pd flag submits prefill + decode job pair
- --disagg-transfer-backend (mooncake/nixl), --disagg-bootstrap-port, etc.

Bugfix:
- resolve_jwt_token: catch FileNotFoundError when jwt_token_cmd binary missing
---
 dockerfiles/kind-multi-node.yaml  |  64 +++++++++++++
 dockerfiles/slurm-compose.yaml    | 152 ++++++++++++++++++++++++++++++
 dockerfiles/slurm-node.dockerfile |  52 ++++++++++
 dockerfiles/slurm.conf            |  48 ++++++++++
 schedulers/base.py                |  42 ++++++++-
 schedulers/config.py              |  19 ++--
 scripts/submit_profile.py         |  47 ++++++++-
 7 files changed, 413 insertions(+), 11 deletions(-)
 create mode 100644 dockerfiles/kind-multi-node.yaml
 create mode 100644 dockerfiles/slurm-compose.yaml
 create mode 100644 dockerfiles/slurm-node.dockerfile
 create mode 100644 dockerfiles/slurm.conf

diff --git a/dockerfiles/kind-multi-node.yaml b/dockerfiles/kind-multi-node.yaml
new file mode 100644
index 0000000..c2208c4
--- /dev/null
+++ b/dockerfiles/kind-multi-node.yaml
@@ -0,0 +1,64 @@
+# kind cluster config — 1 control-plane + 2 GPU worker nodes
+#
+# Each worker gets one GPU via NVIDIA device plugin.
+# Requires: kind, kubectl, nvidia-container-toolkit
+#
+# Usage:
+#   # Install kind (once)
+#   curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.27.0/kind-linux-amd64
+#   chmod +x ./kind && sudo mv ./kind /usr/local/bin/
+#
+#   # Create cluster
+#   kind create cluster --name flowsim --config dockerfiles/kind-multi-node.yaml
+#
+#   # Install NVIDIA device plugin (exposes GPUs to K8s)
+#   kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml
+#
+#   # Verify
+#   kubectl get nodes
+#   kubectl describe node flowsim-worker  | grep nvidia.com/gpu
+#   kubectl describe node flowsim-worker2 | grep nvidia.com/gpu
+#
+#   # Init FlowSim
+#   flowsim init k8s --kubeconfig ~/.kube/config \
+#       --context kind-flowsim --namespace default --force
+#
+#   # Submit a profiling job
+#   flowsim submit --scheduler k8s --collect perf \
+#       --model-path /models/Qwen-7B --gpus 1
+#
+#   # Teardown
+#   kind delete cluster --name flowsim
+
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+
+nodes:
+  - role: control-plane
+
+  - role: worker
+    extraMounts:
+      # Pass GPU 0 into this node
+      - hostPath: /dev/nvidia0
+        containerPath: /dev/nvidia0
+      - hostPath: /dev/nvidiactl
+        containerPath: /dev/nvidiactl
+      - hostPath: /dev/nvidia-uvm
+        containerPath: /dev/nvidia-uvm
+      # Mount model weights (adjust to your path)
+      - hostPath: /home/administrator/zhangt
+        containerPath: /workspace
+        readOnly: true
+
+  - role: worker
+    extraMounts:
+      # Pass GPU 1 into this node
+      - hostPath: /dev/nvidia1
+        containerPath: /dev/nvidia1
+      - hostPath: /dev/nvidiactl
+        containerPath: /dev/nvidiactl
+      - hostPath: /dev/nvidia-uvm
+        containerPath: /dev/nvidia-uvm
+      - hostPath: /home/administrator/zhangt
+        containerPath: /workspace
+        readOnly: true
diff --git a/dockerfiles/slurm-compose.yaml b/dockerfiles/slurm-compose.yaml
new file mode 100644
index 0000000..29f694d
--- /dev/null
+++ b/dockerfiles/slurm-compose.yaml
@@ -0,0 +1,152 @@
+# Slurm test cluster — slurmctld + 2 compute nodes (GPU 0, GPU 1) + slurmrestd
+#
+# Usage:
+#   cd dockerfiles/
+#   docker compose -f slurm-compose.yaml up -d
+#
+#   # Wait for cluster to be ready (~30s)
+#   docker exec slurmctld sinfo
+#
+#   # Get JWT token for REST API
+#   docker exec slurmctld scontrol token lifespan=3600
+#
+#   # Init FlowSim
+#   flowsim init slurm --rest-url http://localhost:6820 \
+#       --partition normal --account default \
+#       --jwt-token-cmd "docker exec slurmctld scontrol token lifespan=3600" \
+#       --force
+#
+#   # Submit a job
+#   flowsim submit --scheduler slurm --collect perf \
+#       --model-path /models/Qwen-7B --gpus 1
+#
+#   # Teardown
+#   docker compose -f slurm-compose.yaml down -v
+
+x-slurm-base: &slurm-base
+  build:
+    context: .
+    dockerfile: slurm-node.dockerfile
+  volumes:
+    - slurm-etc:/etc/slurm
+    - munge-socket:/run/munge
+    # Share workspace for model weights / traces
+    - /home/administrator/zhangt:/workspace:ro
+  networks:
+    - slurm-net
+
+services:
+  # ---- Munge (shared auth daemon) ----
+  munge:
+    <<: *slurm-base
+    container_name: munge
+    hostname: munge
+    command: >
+      bash -c "
+        if [ ! -f /etc/munge/munge.key ]; then
+          mungekey --create --force
+        fi
+        chown munge:munge /etc/munge/munge.key
+        chmod 400 /etc/munge/munge.key
+        gosu munge munged --foreground
+      "
+    volumes:
+      - munge-key:/etc/munge
+      - munge-socket:/run/munge
+
+  # ---- Controller ----
+  slurmctld:
+    <<: *slurm-base
+    container_name: slurmctld
+    hostname: slurmctld
+    command: >
+      bash -c "
+        until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
+        slurmctld -D -vvv
+      "
+    depends_on:
+      - munge
+    volumes:
+      - slurm-etc:/etc/slurm
+      - munge-key:/etc/munge:ro
+      - munge-socket:/run/munge
+      - slurm-state:/var/spool/slurmctld
+
+  # ---- Compute node 0 (GPU 0) ----
+  slurmd-0:
+    <<: *slurm-base
+    container_name: slurmd-0
+    hostname: slurmd-0
+    command: >
+      bash -c "
+        until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
+        slurmd -D -vvv
+      "
+    depends_on:
+      - slurmctld
+    volumes:
+      - slurm-etc:/etc/slurm:ro
+      - munge-key:/etc/munge:ro
+      - munge-socket:/run/munge
+      - /home/administrator/zhangt:/workspace:ro
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]
+              capabilities: [gpu]
+
+  # ---- Compute node 1 (GPU 1) ----
+  slurmd-1:
+    <<: *slurm-base
+    container_name: slurmd-1
+    hostname: slurmd-1
+    command: >
+      bash -c "
+        until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
+        slurmd -D -vvv
+      "
+    depends_on:
+      - slurmctld
+    volumes:
+      - slurm-etc:/etc/slurm:ro
+      - munge-key:/etc/munge:ro
+      - munge-socket:/run/munge
+      - /home/administrator/zhangt:/workspace:ro
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["1"]
+              capabilities: [gpu]
+
+  # ---- REST API ----
+  slurmrestd:
+    <<: *slurm-base
+    container_name: slurmrestd
+    hostname: slurmrestd
+    command: >
+      bash -c "
+        until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
+        slurmrestd -a rest_auth/jwt 0.0.0.0:6820 -vvv
+      "
+    depends_on:
+      - slurmctld
+    ports:
+      - "6820:6820"
+    volumes:
+      - slurm-etc:/etc/slurm:ro
+      - munge-key:/etc/munge:ro
+      - munge-socket:/run/munge
+
+volumes:
+  slurm-etc:
+  slurm-state:
+  munge-key:
+  munge-socket:
+
+networks:
+  slurm-net:
+    driver: bridge
diff --git a/dockerfiles/slurm-node.dockerfile b/dockerfiles/slurm-node.dockerfile
new file mode 100644
index 0000000..397284d
--- /dev/null
+++ b/dockerfiles/slurm-node.dockerfile
@@ -0,0 +1,52 @@
+# Slurm node image — controller, compute, and REST API
+#
+# Based on Ubuntu 22.04 with Slurm 23.11 + munge + JWT support.
+# Used by slurm-compose.yaml.
+
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    gosu \
+    libhttp-parser-dev \
+    libjson-c-dev \
+    libjwt-dev \
+    libmunge-dev \
+    munge \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Slurm 23.11 from source (includes slurmrestd + JWT auth)
+ARG SLURM_VERSION=23.11.10
+RUN cd /tmp && \
+    wget -q https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 && \
+    tar xjf slurm-${SLURM_VERSION}.tar.bz2 && \
+    cd slurm-${SLURM_VERSION} && \
+    ./configure \
+        --prefix=/usr \
+        --sysconfdir=/etc/slurm \
+        --with-jwt \
+        --with-http-parser \
+        --with-json \
+        --enable-slurmrestd && \
+    make -j"$(nproc)" && \
+    make install && \
+    rm -rf /tmp/slurm-*
+
+# Create required directories and users
+RUN useradd -r -s /sbin/nologin slurm && \
+    mkdir -p /etc/slurm /var/spool/slurmctld /var/spool/slurmd /var/log/slurm && \
+    chown slurm:slurm /var/spool/slurmctld /var/spool/slurmd /var/log/slurm
+
+# Slurm config — 2 compute nodes, 1 GPU each
+COPY slurm.conf /etc/slurm/slurm.conf
+
+# JWT key for REST API auth
+RUN dd if=/dev/urandom bs=32 count=1 2>/dev/null | base64 > /etc/slurm/jwt_hs256.key && \
+    chown slurm:slurm /etc/slurm/jwt_hs256.key && \
+    chmod 0600 /etc/slurm/jwt_hs256.key
+
+CMD ["bash"]
diff --git a/dockerfiles/slurm.conf b/dockerfiles/slurm.conf
new file mode 100644
index 0000000..734509d
--- /dev/null
+++ b/dockerfiles/slurm.conf
@@ -0,0 +1,48 @@
+# slurm.conf — minimal 2-node cluster for FlowSim testing
+#
+# Controller: slurmctld
+# Compute:    slurmd-0 (1 GPU), slurmd-1 (1 GPU)
+# REST API:   slurmrestd on port 6820
+
+ClusterName=flowsim
+SlurmctldHost=slurmctld
+
+# Auth
+AuthType=auth/munge
+AuthAltTypes=auth/jwt
+AuthAltParameters=jwt_key=/etc/slurm/jwt_hs256.key
+
+# Paths
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+StateSaveLocation=/var/spool/slurmctld
+SlurmdSpoolDir=/var/spool/slurmd
+SlurmctldLogFile=/var/log/slurm/slurmctld.log
+SlurmdLogFile=/var/log/slurm/slurmd.log
+
+# Scheduling
+SchedulerType=sched/backfill
+SelectType=select/cons_tres
+SelectTypeParameters=CR_Core_Memory
+
+# GPU support
+GresTypes=gpu
+
+# Accounting (minimal)
+AccountingStorageType=accounting_storage/none
+JobAcctGatherType=jobacct_gather/none
+
+# Timeouts
+SlurmctldTimeout=30
+SlurmdTimeout=30
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+
+# Partitions
+PartitionName=normal Nodes=slurmd-[0-1] Default=YES MaxTime=INFINITE State=UP
+
+# Node definitions — 1 GPU each
+NodeName=slurmd-0 CPUs=8 RealMemory=32000 Gres=gpu:1 State=UNKNOWN
+NodeName=slurmd-1 CPUs=8 RealMemory=32000 Gres=gpu:1 State=UNKNOWN
diff --git a/schedulers/base.py b/schedulers/base.py
index df40429..1427e8e 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -38,6 +38,13 @@ class ProfileJobSpec:
     log_dir: str = "/flowsim/tests/test-artifacts"
     job_name: str = ""
 
+    # -- PD disaggregation --
+    disagg_mode: str = ""  # "prefill", "decode", or "" (unified)
+    disagg_transfer_backend: str = "mooncake"  # "mooncake" or "nixl"
+    disagg_bootstrap_port: int = 8998
+    disagg_prefill_pp: int = 1
+    disagg_ib_device: str = ""
+
     # -- Extra server opts (appended verbatim) --
     extra_server_opts: str = ""
 
@@ -51,6 +58,14 @@ def build_server_opts(self) -> str:
         ]
         if self.dp > 1:
             parts.append(f"--dp {self.dp}")
+        if self.disagg_mode:
+            parts.append(f"--disaggregation-mode {self.disagg_mode}")
+            parts.append(f"--disaggregation-transfer-backend {self.disagg_transfer_backend}")
+            parts.append(f"--disaggregation-bootstrap-port {self.disagg_bootstrap_port}")
+            if self.disagg_prefill_pp > 1:
+                parts.append(f"--disaggregation-prefill-pp {self.disagg_prefill_pp}")
+            if self.disagg_ib_device:
+                parts.append(f"--disaggregation-ib-device {self.disagg_ib_device}")
         if self.extra_server_opts:
             parts.append(self.extra_server_opts)
         return " ".join(parts)
@@ -110,7 +125,20 @@ def default_job_name(self) -> str:
         if self.job_name:
             return self.job_name
         model_short = self.model_path.split("/")[-1].lower().replace(".", "-")
-        return f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
+        name = f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
+        if self.disagg_mode:
+            name += f"-{self.disagg_mode}"
+        return name
+
+    def as_prefill(self) -> "ProfileJobSpec":
+        """Return a copy configured as the prefill instance."""
+        from dataclasses import replace
+        return replace(self, disagg_mode="prefill")
+
+    def as_decode(self) -> "ProfileJobSpec":
+        """Return a copy configured as the decode instance."""
+        from dataclasses import replace
+        return replace(self, disagg_mode="decode")
 
 
 class BaseScheduler(abc.ABC):
@@ -127,3 +155,15 @@ def submit(self, spec: ProfileJobSpec) -> str:
     def dry_run(self, spec: ProfileJobSpec) -> str:
         """Render and return the manifest without submitting."""
         return self.render(spec)
+
+    def render_pd_pair(self, spec: ProfileJobSpec) -> str:
+        """Render both prefill and decode manifests for PD disaggregation."""
+        prefill = self.render(spec.as_prefill())
+        decode = self.render(spec.as_decode())
+        return f"# === PREFILL INSTANCE ===\n{prefill}\n# === DECODE INSTANCE ===\n{decode}"
+
+    def submit_pd_pair(self, spec: ProfileJobSpec) -> str:
+        """Submit both prefill and decode jobs."""
+        r1 = self.submit(spec.as_prefill())
+        r2 = self.submit(spec.as_decode())
+        return f"[prefill] {r1}\n[decode]  {r2}"
diff --git a/schedulers/config.py b/schedulers/config.py
index 4f35494..185c87f 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -102,14 +102,17 @@ def resolve_jwt_token(slurm_cfg: dict) -> str:
 
     cmd = slurm_cfg.get("jwt_token_cmd", "")
     if cmd:
-        result = subprocess.run(
-            shlex.split(cmd),
-            capture_output=True,
-            text=True,
-            timeout=30,
-        )
-        if result.returncode == 0:
-            return result.stdout.strip()
+        try:
+            result = subprocess.run(
+                shlex.split(cmd),
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except (FileNotFoundError, OSError):
+            pass
 
     return ""
 
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 8400bec..3701892 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -226,6 +226,37 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         help="[debug] Print rendered manifest without submitting",
     )
 
+    # -- PD disaggregation --
+    pd = p.add_argument_group("PD disaggregation")
+    pd.add_argument(
+        "--pd",
+        action="store_true",
+        help="Submit a prefill + decode job pair (PD disaggregation)",
+    )
+    pd.add_argument(
+        "--disagg-transfer-backend",
+        default="mooncake",
+        choices=["mooncake", "nixl"],
+        help="KV transfer backend (default: mooncake)",
+    )
+    pd.add_argument(
+        "--disagg-bootstrap-port",
+        type=int,
+        default=8998,
+        help="Bootstrap port for PD coordination (default: 8998)",
+    )
+    pd.add_argument(
+        "--disagg-prefill-pp",
+        type=int,
+        default=1,
+        help="Pipeline parallelism for prefill instance (default: 1)",
+    )
+    pd.add_argument(
+        "--disagg-ib-device",
+        default="",
+        help="InfiniBand device for RDMA transfer",
+    )
+
     return p.parse_args(argv)
 
 
@@ -250,6 +281,10 @@ def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
         log_dir=args.log_dir,
         job_name=args.job_name,
         extra_server_opts=args.extra_server_opts,
+        disagg_transfer_backend=args.disagg_transfer_backend,
+        disagg_bootstrap_port=args.disagg_bootstrap_port,
+        disagg_prefill_pp=args.disagg_prefill_pp,
+        disagg_ib_device=args.disagg_ib_device,
     )
 
 
@@ -305,10 +340,18 @@ def main(argv: list[str] | None = None) -> None:
     spec = _build_spec(args)
     scheduler = _build_scheduler(args)
 
+    is_pd = args.pd
+
     if args.dry_run:
-        print(scheduler.dry_run(spec))
+        if is_pd:
+            print(scheduler.render_pd_pair(spec))
+        else:
+            print(scheduler.dry_run(spec))
     else:
-        result = scheduler.submit(spec)
+        if is_pd:
+            result = scheduler.submit_pd_pair(spec)
+        else:
+            result = scheduler.submit(spec)
         print(result)
 
 

From 6f1bda283467885a11972cdac46d14dd8439a796 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 04:59:38 +0000
Subject: [PATCH 12/56] chore: add dev-setup.sh/dev-teardown.sh for one-click
 test clusters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- dev-setup.sh: auto-installs kind/kubectl, creates kind cluster, starts
  Slurm compose, runs flowsim init — all in one command
- dev-teardown.sh: tears down both clusters cleanly
- Supports 'kind', 'slurm', or 'all' (default) targets
- Verified: kind cluster creation + K8s Job submit + PD pair submit all work
---
 dockerfiles/dev-setup.sh    | 157 ++++++++++++++++++++++++++++++++++++
 dockerfiles/dev-teardown.sh |  44 ++++++++++
 2 files changed, 201 insertions(+)
 create mode 100755 dockerfiles/dev-setup.sh
 create mode 100755 dockerfiles/dev-teardown.sh

diff --git a/dockerfiles/dev-setup.sh b/dockerfiles/dev-setup.sh
new file mode 100755
index 0000000..d948bf0
--- /dev/null
+++ b/dockerfiles/dev-setup.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# dev-setup.sh — one-shot setup for FlowSim test clusters (kind + Slurm)
+#
+# Usage:
+#   ./dockerfiles/dev-setup.sh          # setup both kind + slurm
+#   ./dockerfiles/dev-setup.sh kind     # kind only
+#   ./dockerfiles/dev-setup.sh slurm    # slurm only
+#
+# Teardown:
+#   ./dockerfiles/dev-teardown.sh
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+KIND_VERSION="v0.27.0"
+KIND_CLUSTER_NAME="flowsim"
+KUBECTL_STABLE_URL="https://dl.k8s.io/release/stable.txt"
+NVIDIA_DEVICE_PLUGIN="https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml"
+
+log()  { printf "\033[1;32m[setup]\033[0m %s\n" "$*"; }
+warn() { printf "\033[1;33m[setup]\033[0m %s\n" "$*"; }
+err()  { printf "\033[1;31m[setup]\033[0m %s\n" "$*" >&2; exit 1; }
+
+# ----------------------------------------------------------------
+# Dependency checks & auto-install
+# ----------------------------------------------------------------
+ensure_docker() {
+    command -v docker >/dev/null || err "Docker is required but not installed."
+    docker info >/dev/null 2>&1 || err "Docker daemon not running."
+    log "Docker: $(docker --version)"
+}
+
+ensure_kind() {
+    if command -v kind >/dev/null; then
+        log "kind already installed: $(kind version)"
+        return
+    fi
+    log "Installing kind ${KIND_VERSION}..."
+    curl -fsSLo /tmp/kind "https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-linux-amd64"
+    chmod +x /tmp/kind
+    sudo mv /tmp/kind /usr/local/bin/kind
+    log "kind installed: $(kind version)"
+}
+
+ensure_kubectl() {
+    if command -v kubectl >/dev/null; then
+        log "kubectl already installed"
+        return
+    fi
+    log "Installing kubectl..."
+    local ver
+    ver="$(curl -fsSL "${KUBECTL_STABLE_URL}")"
+    curl -fsSLo /tmp/kubectl "https://dl.k8s.io/release/${ver}/bin/linux/amd64/kubectl"
+    chmod +x /tmp/kubectl
+    sudo mv /tmp/kubectl /usr/local/bin/kubectl
+    log "kubectl installed: $(kubectl version --client --short 2>/dev/null || true)"
+}
+
+# ----------------------------------------------------------------
+# Kind cluster
+# ----------------------------------------------------------------
+setup_kind() {
+    ensure_docker
+    ensure_kind
+    ensure_kubectl
+
+    if kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER_NAME}$"; then
+        warn "kind cluster '${KIND_CLUSTER_NAME}' already exists, skipping creation"
+    else
+        log "Creating kind cluster '${KIND_CLUSTER_NAME}' (1 control-plane + 2 GPU workers)..."
+        kind create cluster --name "${KIND_CLUSTER_NAME}" \
+            --config "${SCRIPT_DIR}/kind-multi-node.yaml"
+        log "Installing NVIDIA device plugin..."
+        kubectl apply -f "${NVIDIA_DEVICE_PLUGIN}"
+    fi
+
+    log "Cluster nodes:"
+    kubectl get nodes -o wide
+    echo
+
+    log "Initializing FlowSim K8s config..."
+    local kubeconfig
+    kubeconfig="${HOME}/.kube/config"
+    flowsim init k8s \
+        --kubeconfig "${kubeconfig}" \
+        --context "kind-${KIND_CLUSTER_NAME}" \
+        --namespace default \
+        --force
+    echo
+    log "Kind cluster ready. Test with:"
+    log "  flowsim submit --scheduler k8s --collect perf --model-path <path> --dry-run"
+}
+
+# ----------------------------------------------------------------
+# Slurm cluster (docker compose)
+# ----------------------------------------------------------------
+setup_slurm() {
+    ensure_docker
+
+    if ! docker compose version >/dev/null 2>&1; then
+        err "docker compose v2 is required but not available."
+    fi
+
+    log "Building and starting Slurm cluster (slurmctld + 2 slurmd + slurmrestd)..."
+    docker compose -f "${SCRIPT_DIR}/slurm-compose.yaml" up -d --build
+
+    log "Waiting for slurmctld to become ready..."
+    local retries=30
+    while ! docker exec slurmctld sinfo >/dev/null 2>&1; do
+        retries=$((retries - 1))
+        if [ "${retries}" -le 0 ]; then
+            err "slurmctld did not become ready in time"
+        fi
+        sleep 2
+    done
+
+    log "Slurm cluster status:"
+    docker exec slurmctld sinfo
+    echo
+
+    log "Initializing FlowSim Slurm config..."
+    flowsim init slurm \
+        --rest-url "http://localhost:6820" \
+        --partition normal \
+        --account default \
+        --jwt-token-cmd "docker exec slurmctld scontrol token lifespan=3600" \
+        --force
+    echo
+    log "Slurm cluster ready. Test with:"
+    log "  flowsim submit --scheduler slurm --collect perf --model-path <path> --dry-run"
+}
+
+# ----------------------------------------------------------------
+# Main
+# ----------------------------------------------------------------
+target="${1:-all}"
+
+case "${target}" in
+    kind)
+        setup_kind
+        ;;
+    slurm)
+        setup_slurm
+        ;;
+    all)
+        setup_kind
+        echo
+        setup_slurm
+        ;;
+    *)
+        echo "Usage: $0 [kind|slurm|all]"
+        exit 1
+        ;;
+esac
+
+echo
+log "All done. Teardown with: ./dockerfiles/dev-teardown.sh"
diff --git a/dockerfiles/dev-teardown.sh b/dockerfiles/dev-teardown.sh
new file mode 100755
index 0000000..154b049
--- /dev/null
+++ b/dockerfiles/dev-teardown.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# dev-teardown.sh — tear down FlowSim test clusters
+#
+# Usage:
+#   ./dockerfiles/dev-teardown.sh          # teardown both
+#   ./dockerfiles/dev-teardown.sh kind     # kind only
+#   ./dockerfiles/dev-teardown.sh slurm    # slurm only
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+KIND_CLUSTER_NAME="flowsim"
+
+log()  { printf "\033[1;32m[teardown]\033[0m %s\n" "$*"; }
+warn() { printf "\033[1;33m[teardown]\033[0m %s\n" "$*"; }
+
+teardown_kind() {
+    if command -v kind >/dev/null && kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER_NAME}$"; then
+        log "Deleting kind cluster '${KIND_CLUSTER_NAME}'..."
+        kind delete cluster --name "${KIND_CLUSTER_NAME}"
+    else
+        warn "kind cluster '${KIND_CLUSTER_NAME}' not found, skipping"
+    fi
+}
+
+teardown_slurm() {
+    if docker compose -f "${SCRIPT_DIR}/slurm-compose.yaml" ps --quiet 2>/dev/null | head -1 | grep -q .; then
+        log "Stopping Slurm containers..."
+        docker compose -f "${SCRIPT_DIR}/slurm-compose.yaml" down -v
+    else
+        warn "Slurm containers not running, skipping"
+    fi
+}
+
+target="${1:-all}"
+
+case "${target}" in
+    kind)   teardown_kind ;;
+    slurm)  teardown_slurm ;;
+    all)    teardown_kind; teardown_slurm ;;
+    *)      echo "Usage: $0 [kind|slurm|all]"; exit 1 ;;
+esac
+
+log "Done."

From d2bb08e5cfa48ce974c470d86836e0828a18ac59 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 05:05:24 +0000
Subject: [PATCH 13/56] =?UTF-8?q?feat:=20add=20local=20scheduler=20backend?=
 =?UTF-8?q?=20=E2=80=94=20flowsim=20submit=20--scheduler=20local?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- LocalScheduler runs profiling via subprocess on this machine
- --local-gpus to set CUDA_VISIBLE_DEVICES (e.g. '0' or '0,1')
- --local-workdir for custom working directory
- No cluster config needed; replaces manual 'python scripts/run_stage_profile.py'
- Supports --pd for local PD disaggregation testing
- Skips cluster connection validation for local scheduler
---
 schedulers/__init__.py    |  4 +-
 schedulers/local.py       | 80 +++++++++++++++++++++++++++++++++++++++
 scripts/submit_profile.py | 35 +++++++++++++++--
 3 files changed, 114 insertions(+), 5 deletions(-)
 create mode 100644 schedulers/local.py

diff --git a/schedulers/__init__.py b/schedulers/__init__.py
index 6e1547b..fd20eb2 100644
--- a/schedulers/__init__.py
+++ b/schedulers/__init__.py
@@ -1,12 +1,14 @@
-"""Scheduler backends for submitting FlowSim profiling jobs to K8s or Slurm."""
+"""Scheduler backends for submitting FlowSim profiling jobs."""
 
 from schedulers.base import BaseScheduler, ProfileJobSpec
 from schedulers.k8s import K8sScheduler
+from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 
 __all__ = [
     "BaseScheduler",
     "K8sScheduler",
+    "LocalScheduler",
     "ProfileJobSpec",
     "SlurmScheduler",
 ]
diff --git a/schedulers/local.py b/schedulers/local.py
new file mode 100644
index 0000000..c1cb1fe
--- /dev/null
+++ b/schedulers/local.py
@@ -0,0 +1,80 @@
+"""Local scheduler — run profiling directly on this machine.
+
+``render()`` returns the shell command string.
+``submit()`` executes it as a subprocess.
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+
+from schedulers.base import BaseScheduler, ProfileJobSpec
+
+
+class LocalScheduler(BaseScheduler):
+    """Run profiling jobs locally via subprocess.
+
+    Parameters
+    ----------
+    gpus : str
+        ``CUDA_VISIBLE_DEVICES`` value (e.g., ``"0"`` or ``"0,1"``).
+        Empty string means use all visible GPUs.
+    workdir : str
+        Working directory for the subprocess.
+        Defaults to the FlowSim project root.
+    """
+
+    def __init__(
+        self,
+        *,
+        gpus: str = "",
+        workdir: str = "",
+    ) -> None:
+        self.gpus = gpus
+        self.workdir = workdir or self._find_project_root()
+
+    @staticmethod
+    def _find_project_root() -> str:
+        """Walk up from this file to find the FlowSim project root."""
+        d = os.path.dirname(os.path.abspath(__file__))
+        # schedulers/ is one level below project root
+        return os.path.dirname(d)
+
+    def render(self, spec: ProfileJobSpec) -> str:
+        lines = []
+        if self.gpus:
+            lines.append(f"export CUDA_VISIBLE_DEVICES={self.gpus}")
+        lines.append("export SGLANG_PROFILE_KERNELS=1")
+        lines.append(f"cd {self.workdir}")
+        lines.append(spec.build_shell_command())
+        return "\n".join(lines)
+
+    def submit(self, spec: ProfileJobSpec) -> str:
+        """Run the profiling command locally as a subprocess."""
+        cmd = spec.build_shell_command()
+
+        env = os.environ.copy()
+        env["SGLANG_PROFILE_KERNELS"] = "1"
+        if self.gpus:
+            env["CUDA_VISIBLE_DEVICES"] = self.gpus
+
+        job_name = spec.default_job_name()
+        print(f"[local] Running {job_name}...")
+        print(f"[local] cmd: {cmd}")
+        print(f"[local] workdir: {self.workdir}")
+        if self.gpus:
+            print(f"[local] CUDA_VISIBLE_DEVICES={self.gpus}")
+        print()
+
+        result = subprocess.run(
+            cmd,
+            shell=True,
+            cwd=self.workdir,
+            env=env,
+        )
+
+        if result.returncode != 0:
+            return f"[local] {job_name} FAILED (exit code {result.returncode})"
+        return f"[local] {job_name} completed successfully"
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 3701892..18c68aa 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -1,9 +1,17 @@
 #!/usr/bin/env python3
-"""Submit FlowSim profiling jobs to Kubernetes or Slurm.
+"""Submit FlowSim profiling jobs locally, to Kubernetes, or to Slurm.
 
 Usage examples
 --------------
 
+Run locally (no cluster needed):
+
+    flowsim submit \\
+        --scheduler local \\
+        --collect perf \\
+        --model-path Qwen/Qwen3-8B \\
+        --tp 1 --local-gpus 0
+
 Dry-run (print Kubernetes Job YAML to stdout):
 
     python scripts/submit_profile.py \\
@@ -45,6 +53,7 @@
 from schedulers.base import ProfileJobSpec
 from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_jwt_token
 from schedulers.k8s import K8sScheduler
+from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 
 
@@ -67,7 +76,7 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     # -- Scheduler choice --
     p.add_argument(
         "--scheduler",
-        choices=["k8s", "slurm"],
+        choices=["local", "k8s", "slurm"],
         required=True,
         help="Scheduler backend.",
     )
@@ -111,6 +120,19 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     )
     infra.add_argument("--job-name", default="")
 
+    # -- Local options --
+    loc = p.add_argument_group("local options")
+    loc.add_argument(
+        "--local-gpus",
+        default="",
+        help="CUDA_VISIBLE_DEVICES for local execution (e.g. '0' or '0,1')",
+    )
+    loc.add_argument(
+        "--local-workdir",
+        default="",
+        help="Working directory for local execution (default: FlowSim project root)",
+    )
+
     # -- Kubernetes-specific --
     k8s = p.add_argument_group("kubernetes options (config: ~/.flowsim/k8s.yaml)")
     k8s.add_argument(
@@ -289,7 +311,12 @@ def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
 
 
 def _build_scheduler(args: argparse.Namespace):
-    if args.scheduler == "k8s":
+    if args.scheduler == "local":
+        return LocalScheduler(
+            gpus=args.local_gpus,
+            workdir=args.local_workdir,
+        )
+    elif args.scheduler == "k8s":
         node_sel = {}
         for item in args.k8s_node_selector:
             k, _, v = item.partition("=")
@@ -334,7 +361,7 @@ def main(argv: list[str] | None = None) -> None:
             args.slurm_jwt_token = token
 
     # Validate required connection params before submit
-    if not args.dry_run:
+    if not args.dry_run and args.scheduler not in ("local",):
         _validate_connection(args)
 
     spec = _build_spec(args)

From e5e303cf8f688a36f8c89059280a086f67e20330 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 05:12:19 +0000
Subject: [PATCH 14/56] test: add 61 unit tests for scheduler CLI, backends,
 and config

Tests cover:
- ProfileJobSpec: job name, server opts, disagg params, as_prefill/decode
- K8sScheduler.render: YAML validity, namespace, GPU resources, PVC,
  hostPath, nodeSelector, serviceAccount, labels, PD pair
- SlurmScheduler.render: shebang, sbatch directives, docker/enroot/bare,
  modules, extra sbatch, constraint, time parsing
- LocalScheduler.render: GPU selection, workdir, env vars
- CLI init: help, required args, bad kubeconfig, save/load config,
  overwrite protection, --force
- CLI submit: help, dry-run for local/k8s/slurm, PD pair, nixl backend
- Config: save/load yaml, jwt_token static/cmd/bad_cmd, cfg_get

All tests run inside the FlowSim Docker container.
---
 tests/unit/test_scheduler_cli.py | 580 +++++++++++++++++++++++++++++++
 1 file changed, 580 insertions(+)
 create mode 100644 tests/unit/test_scheduler_cli.py

diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
new file mode 100644
index 0000000..055e117
--- /dev/null
+++ b/tests/unit/test_scheduler_cli.py
@@ -0,0 +1,580 @@
+"""Unit tests for the scheduler CLI (flowsim init / submit) and backends."""
+
+from __future__ import annotations
+
+import os
+import tempfile
+from pathlib import Path
+from unittest import mock
+
+import pytest
+import yaml
+
+from schedulers.base import ProfileJobSpec
+from schedulers.k8s import K8sScheduler
+from schedulers.local import LocalScheduler
+from schedulers.slurm import SlurmScheduler
+
+
+# =========================================================================
+# ProfileJobSpec
+# =========================================================================
+
+class TestProfileJobSpec:
+    """Tests for ProfileJobSpec dataclass methods."""
+
+    @pytest.fixture()
+    def spec(self) -> ProfileJobSpec:
+        return ProfileJobSpec(
+            collect="perf",
+            model_path="Qwen/Qwen3-8B",
+            tp=2,
+            bs=4,
+            input_len=1024,
+        )
+
+    def test_default_job_name(self, spec: ProfileJobSpec):
+        name = spec.default_job_name()
+        assert name == "flowsim-perf-qwen3-8b-bs4-il1024"
+
+    def test_custom_job_name(self, spec: ProfileJobSpec):
+        spec.job_name = "my-job"
+        assert spec.default_job_name() == "my-job"
+
+    def test_job_name_disagg_suffix(self, spec: ProfileJobSpec):
+        spec.disagg_mode = "prefill"
+        assert spec.default_job_name().endswith("-prefill")
+
+    def test_build_server_opts_basic(self, spec: ProfileJobSpec):
+        opts = spec.build_server_opts()
+        assert "--model-path Qwen/Qwen3-8B" in opts
+        assert "--tp 2" in opts
+        assert "--disaggregation" not in opts
+
+    def test_build_server_opts_dp(self, spec: ProfileJobSpec):
+        spec.dp = 4
+        assert "--dp 4" in spec.build_server_opts()
+
+    def test_build_server_opts_disagg(self, spec: ProfileJobSpec):
+        spec.disagg_mode = "prefill"
+        spec.disagg_transfer_backend = "nixl"
+        opts = spec.build_server_opts()
+        assert "--disaggregation-mode prefill" in opts
+        assert "--disaggregation-transfer-backend nixl" in opts
+        assert "--disaggregation-bootstrap-port 8998" in opts
+
+    def test_build_server_opts_disagg_pp(self, spec: ProfileJobSpec):
+        spec.disagg_mode = "prefill"
+        spec.disagg_prefill_pp = 2
+        assert "--disaggregation-prefill-pp 2" in spec.build_server_opts()
+
+    def test_build_server_opts_extra(self, spec: ProfileJobSpec):
+        spec.extra_server_opts = "--some-flag"
+        assert "--some-flag" in spec.build_server_opts()
+
+    def test_build_profile_command(self, spec: ProfileJobSpec):
+        cmd = spec.build_profile_command()
+        assert cmd[0] == "python"
+        assert "scripts/run_stage_profile.py" in cmd[1]
+        assert "--collect" in cmd
+        assert "perf" in cmd
+        assert "--bs" in cmd
+        assert "4" in cmd
+
+    def test_build_shell_command_quotes_server_opts(self, spec: ProfileJobSpec):
+        shell = spec.build_shell_command()
+        # server-opts contains spaces, must be quoted
+        assert "--server-opts '" in shell or '--server-opts "' in shell
+
+    def test_as_prefill(self, spec: ProfileJobSpec):
+        p = spec.as_prefill()
+        assert p.disagg_mode == "prefill"
+        assert spec.disagg_mode == ""  # original unchanged
+
+    def test_as_decode(self, spec: ProfileJobSpec):
+        d = spec.as_decode()
+        assert d.disagg_mode == "decode"
+        assert spec.disagg_mode == ""
+
+
+# =========================================================================
+# K8sScheduler.render
+# =========================================================================
+
+class TestK8sScheduler:
+    """Tests for K8s Job manifest generation."""
+
+    @pytest.fixture()
+    def scheduler(self) -> K8sScheduler:
+        return K8sScheduler(
+            namespace="ml-team",
+            kubeconfig="/fake/kubeconfig",
+            context="prod",
+            shm_size="32Gi",
+        )
+
+    @pytest.fixture()
+    def spec(self) -> ProfileJobSpec:
+        return ProfileJobSpec(
+            collect="perf",
+            model_path="Qwen/Qwen3-8B",
+            gpus=2,
+        )
+
+    def test_render_valid_yaml(self, scheduler, spec):
+        rendered = scheduler.render(spec)
+        doc = yaml.safe_load(rendered)
+        assert doc["apiVersion"] == "batch/v1"
+        assert doc["kind"] == "Job"
+
+    def test_render_namespace(self, scheduler, spec):
+        doc = yaml.safe_load(scheduler.render(spec))
+        assert doc["metadata"]["namespace"] == "ml-team"
+
+    def test_render_gpu_resources(self, scheduler, spec):
+        doc = yaml.safe_load(scheduler.render(spec))
+        container = doc["spec"]["template"]["spec"]["containers"][0]
+        assert container["resources"]["limits"]["nvidia.com/gpu"] == "2"
+
+    def test_render_shm_size(self, scheduler, spec):
+        doc = yaml.safe_load(scheduler.render(spec))
+        volumes = doc["spec"]["template"]["spec"]["volumes"]
+        dshm = [v for v in volumes if v["name"] == "dshm"][0]
+        assert dshm["emptyDir"]["sizeLimit"] == "32Gi"
+
+    def test_render_pvc_volume(self, spec):
+        sched = K8sScheduler(namespace="default", pvc_name="my-pvc")
+        doc = yaml.safe_load(sched.render(spec))
+        volumes = doc["spec"]["template"]["spec"]["volumes"]
+        pvc_vol = [v for v in volumes if v["name"] == "output"]
+        assert len(pvc_vol) == 1
+        assert pvc_vol[0]["persistentVolumeClaim"]["claimName"] == "my-pvc"
+
+    def test_render_host_output_dir(self, spec):
+        sched = K8sScheduler(namespace="default", host_output_dir="/data/out")
+        doc = yaml.safe_load(sched.render(spec))
+        volumes = doc["spec"]["template"]["spec"]["volumes"]
+        host_vol = [v for v in volumes if v["name"] == "output"]
+        assert len(host_vol) == 1
+        assert host_vol[0]["hostPath"]["path"] == "/data/out"
+
+    def test_render_node_selector(self, spec):
+        sched = K8sScheduler(namespace="default", node_selector={"gpu": "h100"})
+        doc = yaml.safe_load(sched.render(spec))
+        pod_spec = doc["spec"]["template"]["spec"]
+        assert pod_spec["nodeSelector"]["gpu"] == "h100"
+
+    def test_render_service_account(self, spec):
+        sched = K8sScheduler(namespace="default", service_account="runner")
+        doc = yaml.safe_load(sched.render(spec))
+        pod_spec = doc["spec"]["template"]["spec"]
+        assert pod_spec["serviceAccountName"] == "runner"
+
+    def test_render_labels(self, scheduler, spec):
+        doc = yaml.safe_load(scheduler.render(spec))
+        labels = doc["metadata"]["labels"]
+        assert labels["app"] == "flowsim"
+        assert labels["collect"] == "perf"
+
+    def test_render_pd_pair(self, scheduler, spec):
+        output = scheduler.render_pd_pair(spec)
+        assert "PREFILL INSTANCE" in output
+        assert "DECODE INSTANCE" in output
+        # Both should be valid YAML docs
+        docs = output.split("# === DECODE INSTANCE ===")
+        assert len(docs) == 2
+
+
+# =========================================================================
+# SlurmScheduler.render
+# =========================================================================
+
+class TestSlurmScheduler:
+    """Tests for Slurm sbatch script generation."""
+
+    @pytest.fixture()
+    def scheduler(self) -> SlurmScheduler:
+        return SlurmScheduler(
+            partition="gpu-h100",
+            time_limit="01:00:00",
+            account="my-proj",
+        )
+
+    @pytest.fixture()
+    def spec(self) -> ProfileJobSpec:
+        return ProfileJobSpec(
+            collect="perf",
+            model_path="Qwen/Qwen3-8B",
+            gpus=4,
+        )
+
+    def test_render_shebang(self, scheduler, spec):
+        script = scheduler.render(spec)
+        assert script.startswith("#!/bin/bash\n")
+
+    def test_render_sbatch_directives(self, scheduler, spec):
+        script = scheduler.render(spec)
+        assert "#SBATCH --partition=gpu-h100" in script
+        assert "#SBATCH --gpus-per-node=4" in script
+        assert "#SBATCH --time=01:00:00" in script
+        assert "#SBATCH --account=my-proj" in script
+
+    def test_render_env_vars(self, scheduler, spec):
+        script = scheduler.render(spec)
+        assert "SGLANG_PROFILE_KERNELS=1" in script
+
+    def test_render_command(self, scheduler, spec):
+        script = scheduler.render(spec)
+        assert "scripts/run_stage_profile.py" in script
+        assert "--collect perf" in script
+
+    def test_render_docker_runtime(self, spec):
+        sched = SlurmScheduler(
+            partition="gpu",
+            container_runtime="docker",
+            container_mounts="/data:/data",
+        )
+        script = sched.render(spec)
+        assert "docker run" in script
+        assert "-v /data:/data" in script
+
+    def test_render_enroot_runtime(self, spec):
+        sched = SlurmScheduler(
+            partition="gpu",
+            container_runtime="enroot",
+        )
+        script = sched.render(spec)
+        assert "srun --container-image" in script
+
+    def test_render_modules(self, spec):
+        sched = SlurmScheduler(
+            partition="gpu",
+            modules=["cuda/12.6", "anaconda3"],
+        )
+        script = sched.render(spec)
+        assert "module load cuda/12.6" in script
+        assert "module load anaconda3" in script
+
+    def test_render_extra_sbatch(self, spec):
+        sched = SlurmScheduler(
+            partition="gpu",
+            extra_sbatch=["--mem=64G", "--exclusive"],
+        )
+        script = sched.render(spec)
+        assert "#SBATCH --mem=64G" in script
+        assert "#SBATCH --exclusive" in script
+
+    def test_render_constraint(self, spec):
+        sched = SlurmScheduler(partition="gpu", constraint="gpu80g")
+        script = sched.render(spec)
+        assert "#SBATCH --constraint=gpu80g" in script
+
+    def test_time_parse_minutes(self):
+        sched = SlurmScheduler(partition="gpu", time_limit="02:30:00")
+        assert sched._parse_time_minutes() == 150
+
+
+# =========================================================================
+# LocalScheduler.render
+# =========================================================================
+
+class TestLocalScheduler:
+    """Tests for local execution backend."""
+
+    @pytest.fixture()
+    def spec(self) -> ProfileJobSpec:
+        return ProfileJobSpec(
+            collect="perf",
+            model_path="Qwen/Qwen3-8B",
+        )
+
+    def test_render_with_gpus(self, spec):
+        sched = LocalScheduler(gpus="0,1")
+        output = sched.render(spec)
+        assert "CUDA_VISIBLE_DEVICES=0,1" in output
+
+    def test_render_without_gpus(self, spec):
+        sched = LocalScheduler(gpus="")
+        output = sched.render(spec)
+        assert "CUDA_VISIBLE_DEVICES" not in output
+
+    def test_render_has_command(self, spec):
+        sched = LocalScheduler()
+        output = sched.render(spec)
+        assert "scripts/run_stage_profile.py" in output
+        assert "SGLANG_PROFILE_KERNELS=1" in output
+
+    def test_render_workdir(self, spec):
+        sched = LocalScheduler(workdir="/my/project")
+        output = sched.render(spec)
+        assert "cd /my/project" in output
+
+    def test_dry_run_equals_render(self, spec):
+        sched = LocalScheduler(gpus="0")
+        assert sched.dry_run(spec) == sched.render(spec)
+
+
+# =========================================================================
+# CLI: flowsim init
+# =========================================================================
+
+class TestCLIInit:
+    """Tests for `flowsim init` subcommand."""
+
+    def test_init_no_args_shows_help(self, capsys):
+        from scripts.cli import _cmd_init
+        with pytest.raises(SystemExit) as exc_info:
+            _cmd_init([])
+        assert exc_info.value.code != 0
+
+    def test_init_k8s_help(self, capsys):
+        from scripts.cli import _cmd_init
+        with pytest.raises(SystemExit) as exc_info:
+            _cmd_init(["k8s", "--help"])
+        assert exc_info.value.code == 0
+        out = capsys.readouterr().out
+        assert "--kubeconfig" in out
+        assert "--namespace" in out
+
+    def test_init_slurm_help(self, capsys):
+        from scripts.cli import _cmd_init
+        with pytest.raises(SystemExit) as exc_info:
+            _cmd_init(["slurm", "--help"])
+        assert exc_info.value.code == 0
+        out = capsys.readouterr().out
+        assert "--rest-url" in out
+        assert "--partition" in out
+
+    def test_init_k8s_missing_required(self):
+        from scripts.cli import _cmd_init
+        with pytest.raises(SystemExit) as exc_info:
+            _cmd_init(["k8s"])
+        assert exc_info.value.code != 0
+
+    def test_init_slurm_missing_required(self):
+        from scripts.cli import _cmd_init
+        with pytest.raises(SystemExit) as exc_info:
+            _cmd_init(["slurm"])
+        assert exc_info.value.code != 0
+
+    def test_init_k8s_bad_kubeconfig(self):
+        from scripts.cli import _cmd_init
+        rc = _cmd_init(["k8s", "--kubeconfig", "/nonexistent/path", "--namespace", "ns"])
+        assert rc != 0
+
+    def test_init_k8s_saves_config(self, tmp_path: Path):
+        # Create a fake kubeconfig
+        kube = tmp_path / "kubeconfig"
+        kube.write_text("apiVersion: v1\nclusters: []\n")
+
+        config_dir = tmp_path / "flowsim"
+        with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
+            from scripts.cli import _cmd_init
+            rc = _cmd_init([
+                "k8s",
+                "--kubeconfig", str(kube),
+                "--namespace", "test-ns",
+            ])
+        assert rc == 0
+        cfg_file = config_dir / "k8s.yaml"
+        assert cfg_file.exists()
+        cfg = yaml.safe_load(cfg_file.read_text())
+        assert cfg["namespace"] == "test-ns"
+        assert cfg["kubeconfig"] == str(kube)
+
+    def test_init_slurm_saves_config(self, tmp_path: Path):
+        config_dir = tmp_path / "flowsim"
+        with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
+            from scripts.cli import _cmd_init
+            rc = _cmd_init([
+                "slurm",
+                "--rest-url", "http://localhost:6820",
+                "--partition", "gpu",
+                "--account", "proj",
+                "--jwt-token", "fake-token",
+            ])
+        assert rc == 0
+        cfg_file = config_dir / "slurm.yaml"
+        assert cfg_file.exists()
+        cfg = yaml.safe_load(cfg_file.read_text())
+        assert cfg["rest_url"] == "http://localhost:6820"
+        assert cfg["partition"] == "gpu"
+        assert cfg["account"] == "proj"
+
+    def test_init_refuses_overwrite(self, tmp_path: Path):
+        config_dir = tmp_path / "flowsim"
+        config_dir.mkdir()
+        (config_dir / "slurm.yaml").write_text("existing: true\n")
+
+        with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
+            from scripts.cli import _cmd_init
+            rc = _cmd_init([
+                "slurm",
+                "--rest-url", "http://localhost:6820",
+                "--partition", "gpu",
+                "--account", "proj",
+                "--jwt-token", "tok",
+            ])
+        assert rc != 0  # should refuse
+
+    def test_init_force_overwrite(self, tmp_path: Path):
+        config_dir = tmp_path / "flowsim"
+        config_dir.mkdir()
+        (config_dir / "slurm.yaml").write_text("existing: true\n")
+
+        with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
+            from scripts.cli import _cmd_init
+            rc = _cmd_init([
+                "slurm",
+                "--rest-url", "http://localhost:6820",
+                "--partition", "gpu",
+                "--account", "proj",
+                "--jwt-token", "tok",
+                "--force",
+            ])
+        assert rc == 0
+        cfg = yaml.safe_load((config_dir / "slurm.yaml").read_text())
+        assert cfg["rest_url"] == "http://localhost:6820"
+
+
+# =========================================================================
+# CLI: flowsim submit (parse/dry-run only, no actual submission)
+# =========================================================================
+
+class TestCLISubmit:
+    """Tests for `flowsim submit` argument parsing and dry-run."""
+
+    def _run(self, *args: str, expect_ok: bool = True) -> str:
+        """Run submit via the Python function, capture stdout."""
+        from scripts.submit_profile import main as submit_main
+        import io
+        from contextlib import redirect_stdout
+        buf = io.StringIO()
+        with redirect_stdout(buf):
+            submit_main(list(args))
+        return buf.getvalue()
+
+    def test_submit_help(self, capsys):
+        from scripts.submit_profile import main as submit_main
+        with pytest.raises(SystemExit) as exc_info:
+            submit_main(["--help"])
+        assert exc_info.value.code == 0
+        out = capsys.readouterr().out
+        assert "--scheduler" in out
+        assert "local" in out
+
+    def test_submit_missing_required(self):
+        from scripts.submit_profile import main as submit_main
+        with pytest.raises(SystemExit):
+            submit_main([])
+
+    def test_submit_local_dry_run(self):
+        out = self._run(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", "Qwen/Qwen3-8B",
+            "--dry-run",
+        )
+        assert "scripts/run_stage_profile.py" in out
+        assert "SGLANG_PROFILE_KERNELS=1" in out
+
+    def test_submit_local_dry_run_with_gpus(self):
+        out = self._run(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", "Qwen/Qwen3-8B",
+            "--local-gpus", "0,1",
+            "--dry-run",
+        )
+        assert "CUDA_VISIBLE_DEVICES=0,1" in out
+
+    def test_submit_k8s_dry_run(self):
+        out = self._run(
+            "--scheduler", "k8s",
+            "--collect", "perf",
+            "--model-path", "Qwen/Qwen3-8B",
+            "--k8s-namespace", "default",
+            "--dry-run",
+        )
+        assert "apiVersion: batch/v1" in out
+        assert "kind: Job" in out
+
+    def test_submit_slurm_dry_run(self):
+        out = self._run(
+            "--scheduler", "slurm",
+            "--collect", "perf",
+            "--model-path", "Qwen/Qwen3-8B",
+            "--slurm-partition", "gpu",
+            "--slurm-rest-url", "http://fake:6820",
+            "--slurm-jwt-token", "fake-token",
+            "--dry-run",
+        )
+        assert "#!/bin/bash" in out
+        assert "#SBATCH --partition=gpu" in out
+
+    def test_submit_pd_dry_run(self):
+        out = self._run(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", "Qwen/Qwen3-8B",
+            "--pd",
+            "--dry-run",
+        )
+        assert "PREFILL INSTANCE" in out
+        assert "DECODE INSTANCE" in out
+        assert "--disaggregation-mode prefill" in out
+        assert "--disaggregation-mode decode" in out
+
+    def test_submit_pd_nixl_backend(self):
+        out = self._run(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", "Qwen/Qwen3-8B",
+            "--pd",
+            "--disagg-transfer-backend", "nixl",
+            "--dry-run",
+        )
+        assert "--disaggregation-transfer-backend nixl" in out
+
+
+# =========================================================================
+# Config loading
+# =========================================================================
+
+class TestConfig:
+    """Tests for config file loading and saving."""
+
+    def test_save_and_load_yaml(self, tmp_path: Path):
+        from schedulers.config import _save_yaml, _load_yaml
+        data = {"rest_url": "http://localhost:6820", "partition": "gpu"}
+        path = tmp_path / "test.yaml"
+        _save_yaml(path, data)
+        loaded = _load_yaml(path)
+        assert loaded == data
+
+    def test_resolve_jwt_token_static(self):
+        from schedulers.config import resolve_jwt_token
+        cfg = {"jwt_token": "my-secret"}
+        assert resolve_jwt_token(cfg) == "my-secret"
+
+    def test_resolve_jwt_token_cmd(self):
+        from schedulers.config import resolve_jwt_token
+        cfg = {"jwt_token_cmd": "echo test-token-123"}
+        assert resolve_jwt_token(cfg) == "test-token-123"
+
+    def test_resolve_jwt_token_bad_cmd(self):
+        from schedulers.config import resolve_jwt_token
+        cfg = {"jwt_token_cmd": "/nonexistent/binary"}
+        # Should not raise, just return empty
+        assert resolve_jwt_token(cfg) == ""
+
+    def test_resolve_jwt_token_empty(self):
+        from schedulers.config import resolve_jwt_token
+        assert resolve_jwt_token({}) == ""
+
+    def test_cfg_get(self):
+        from schedulers.config import cfg_get
+        cfg = {"key": "value", "empty": ""}
+        assert cfg_get(cfg, "key", "default") == "value"
+        assert cfg_get(cfg, "empty", "default") == ""
+        assert cfg_get(cfg, "missing", "default") == "default"

From c60cd1180f11f412d27f81be94ddced356662369 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 05:39:02 +0000
Subject: [PATCH 15/56] feat: persistent logs under output_dir, flowsim
 status/logs, refuse K8s submit without PVC

- log_dir is now derived as {output_dir}/logs/ (single volume covers both)
- LocalScheduler.submit() tees stdout/stderr to log files in real time
- K8s submit refuses if no --k8s-pvc or --k8s-host-output-dir (prevents data loss)
- Slurm output_dir defaults to ~/flowsim_traces (shared filesystem)
- Local output_dir defaults to {project}/stage_traces/
- Add flowsim status/logs subcommands (K8s via API, Slurm via slurmrestd, local via log files)
- Submit prints result location + follow-up commands after every job
- Add integration tests for local scheduler
---
 .gitignore                                |   3 +-
 schedulers/base.py                        |  31 ++-
 schedulers/k8s.py                         | 103 ++++++++++
 schedulers/local.py                       | 117 ++++++++++-
 schedulers/slurm.py                       |  81 ++++++++
 scripts/cli.py                            |  22 +++
 scripts/run_stage_profile.py              |   8 +-
 scripts/status_profile.py                 | 157 +++++++++++++++
 scripts/submit_profile.py                 |  48 ++++-
 tests/integration/test_scheduler_local.py | 229 ++++++++++++++++++++++
 10 files changed, 780 insertions(+), 19 deletions(-)
 create mode 100644 scripts/status_profile.py
 create mode 100644 tests/integration/test_scheduler_local.py

diff --git a/.gitignore b/.gitignore
index 706276b..b70854b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ tests/test-artifacts/
 unknown_kernels.json
 /artifacts
 /server_profile
-/server_simulate
\ No newline at end of file
+/server_simulate
+/stage_traces/
\ No newline at end of file
diff --git a/schedulers/base.py b/schedulers/base.py
index 1427e8e..3cbc2e7 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -35,7 +35,6 @@ class ProfileJobSpec:
     host: str = "0.0.0.0"
     port: int = 30001
     output_dir: str = "/flowsim/stage_traces"
-    log_dir: str = "/flowsim/tests/test-artifacts"
     job_name: str = ""
 
     # -- PD disaggregation --
@@ -70,6 +69,11 @@ def build_server_opts(self) -> str:
             parts.append(self.extra_server_opts)
         return " ".join(parts)
 
+    @property
+    def log_dir(self) -> str:
+        """Server logs go under ``{output_dir}/logs/``."""
+        return self.output_dir + "/logs"
+
     def build_profile_command(self) -> list[str]:
         """Build the full ``python scripts/run_stage_profile.py ...`` command."""
         cmd = [
@@ -152,6 +156,31 @@ def render(self, spec: ProfileJobSpec) -> str:
     def submit(self, spec: ProfileJobSpec) -> str:
         """Submit the job and return a job identifier string."""
 
+    def status(self, job_id: str) -> dict:
+        """Query job status. Returns dict with at least 'state' key.
+
+        Subclasses should return::
+
+            {
+                "state": "Pending" | "Running" | "Succeeded" | "Failed" | ...,
+                "message": "human-readable detail",
+                "output_hint": "where to find trace files",
+            }
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not support status queries")
+
+    def logs(self, job_id: str, *, tail: int = 100) -> str:
+        """Retrieve recent log output for a job.
+
+        Parameters
+        ----------
+        job_id : str
+            Job name (K8s) or job ID (Slurm) or log prefix (local).
+        tail : int
+            Number of lines from the end to return.
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not support log retrieval")
+
     def dry_run(self, spec: ProfileJobSpec) -> str:
         """Render and return the manifest without submitting."""
         return self.render(spec)
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 6b58ea9..1640f5c 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -176,3 +176,106 @@ def submit(self, spec: ProfileJobSpec) -> str:
             body=body,
         )
         return f"job.batch/{resp.metadata.name} created (namespace={resp.metadata.namespace})"
+
+    # -----------------------------------------------------------------
+    # Helpers shared by status / logs
+    # -----------------------------------------------------------------
+
+    def _load_k8s(self):
+        """Load kubeconfig and return (BatchV1Api, CoreV1Api)."""
+        from kubernetes import client as k8s_client, config as k8s_config
+
+        config_kwargs: dict = {}
+        if self.kubeconfig:
+            config_kwargs["config_file"] = self.kubeconfig
+        if self.context:
+            config_kwargs["context"] = self.context
+        try:
+            k8s_config.load_kube_config(**config_kwargs)
+        except k8s_config.ConfigException:
+            k8s_config.load_incluster_config()
+
+        return k8s_client.BatchV1Api(), k8s_client.CoreV1Api()
+
+    def status(self, job_id: str) -> dict:
+        """Query K8s Job status by job name."""
+        try:
+            from kubernetes import client as k8s_client
+        except ImportError:
+            raise RuntimeError("pip install kubernetes")
+
+        batch_api, core_api = self._load_k8s()
+
+        job = batch_api.read_namespaced_job(name=job_id, namespace=self.namespace)
+        st = job.status
+
+        # Determine state
+        if st.succeeded and st.succeeded > 0:
+            state = "Succeeded"
+        elif st.failed and st.failed > 0:
+            state = "Failed"
+        elif st.active and st.active > 0:
+            state = "Running"
+        else:
+            state = "Pending"
+
+        # Pod info
+        pods = core_api.list_namespaced_pod(
+            namespace=self.namespace,
+            label_selector=f"job-name={job_id}",
+        )
+        pod_statuses = []
+        for pod in pods.items:
+            phase = pod.status.phase
+            node = pod.spec.node_name or "unscheduled"
+            pod_statuses.append(f"{pod.metadata.name} ({phase}, node={node})")
+
+        output_hint = ""
+        if self.pvc_name:
+            output_hint = f"Traces persisted on PVC '{self.pvc_name}'"
+        elif self.host_output_dir:
+            output_hint = f"Traces at hostPath {self.host_output_dir} on the scheduled node"
+        else:
+            output_hint = "WARNING: no PVC or hostPath configured — traces are lost when pod exits"
+
+        msg_parts = [f"Job: {job_id}  Namespace: {self.namespace}  State: {state}"]
+        if pod_statuses:
+            msg_parts.append("Pods: " + ", ".join(pod_statuses))
+        msg_parts.append(output_hint)
+
+        return {
+            "state": state,
+            "message": "\n".join(msg_parts),
+            "output_hint": output_hint,
+        }
+
+    def logs(self, job_id: str, *, tail: int = 100) -> str:
+        """Retrieve logs from the pod(s) of a K8s Job."""
+        try:
+            from kubernetes import client as k8s_client
+        except ImportError:
+            raise RuntimeError("pip install kubernetes")
+
+        _, core_api = self._load_k8s()
+
+        pods = core_api.list_namespaced_pod(
+            namespace=self.namespace,
+            label_selector=f"job-name={job_id}",
+        )
+        if not pods.items:
+            return f"No pods found for job {job_id} in namespace {self.namespace}"
+
+        parts = []
+        for pod in pods.items:
+            name = pod.metadata.name
+            try:
+                log_text = core_api.read_namespaced_pod_log(
+                    name=name,
+                    namespace=self.namespace,
+                    tail_lines=tail,
+                )
+            except Exception as exc:
+                log_text = f"(error reading logs: {exc})"
+            parts.append(f"=== {name} ===\n{log_text}")
+
+        return "\n".join(parts)
diff --git a/schedulers/local.py b/schedulers/local.py
index c1cb1fe..da3b03a 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -1,7 +1,7 @@
 """Local scheduler — run profiling directly on this machine.
 
 ``render()`` returns the shell command string.
-``submit()`` executes it as a subprocess.
+``submit()`` executes it as a subprocess, with stdout/stderr tee'd to log files.
 """
 
 from __future__ import annotations
@@ -9,6 +9,7 @@
 import os
 import subprocess
 import sys
+import time
 
 from schedulers.base import BaseScheduler, ProfileJobSpec
 
@@ -52,7 +53,11 @@ def render(self, spec: ProfileJobSpec) -> str:
         return "\n".join(lines)
 
     def submit(self, spec: ProfileJobSpec) -> str:
-        """Run the profiling command locally as a subprocess."""
+        """Run the profiling command locally as a subprocess.
+
+        stdout and stderr are streamed to the terminal *and* saved to
+        log files under ``spec.log_dir``.
+        """
         cmd = spec.build_shell_command()
 
         env = os.environ.copy()
@@ -61,20 +66,112 @@ def submit(self, spec: ProfileJobSpec) -> str:
             env["CUDA_VISIBLE_DEVICES"] = self.gpus
 
         job_name = spec.default_job_name()
+        log_dir = spec.log_dir
+        os.makedirs(log_dir, exist_ok=True)
+        ts = int(time.time())
+        stdout_path = os.path.join(log_dir, f"{job_name}_{ts}.stdout.log")
+        stderr_path = os.path.join(log_dir, f"{job_name}_{ts}.stderr.log")
+
         print(f"[local] Running {job_name}...")
         print(f"[local] cmd: {cmd}")
         print(f"[local] workdir: {self.workdir}")
         if self.gpus:
             print(f"[local] CUDA_VISIBLE_DEVICES={self.gpus}")
+        print(f"[local] logs: {stdout_path}")
+        print(f"[local]        {stderr_path}")
         print()
 
-        result = subprocess.run(
-            cmd,
-            shell=True,
-            cwd=self.workdir,
-            env=env,
+        with open(stdout_path, "w") as fout, open(stderr_path, "w") as ferr:
+            proc = subprocess.Popen(
+                cmd,
+                shell=True,
+                cwd=self.workdir,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            # Stream stdout/stderr to terminal + log files in real time.
+            # Use threads to avoid blocking on one stream while the other
+            # fills its OS pipe buffer.
+            import threading
+
+            def _tee(src, dest_file, dest_stream):
+                for line in src:
+                    dest_stream.buffer.write(line)
+                    dest_stream.buffer.flush()
+                    dest_file.write(line.decode("utf-8", errors="replace"))
+                    dest_file.flush()
+
+            t_out = threading.Thread(
+                target=_tee, args=(proc.stdout, fout, sys.stdout), daemon=True,
+            )
+            t_err = threading.Thread(
+                target=_tee, args=(proc.stderr, ferr, sys.stderr), daemon=True,
+            )
+            t_out.start()
+            t_err.start()
+            proc.wait()
+            t_out.join()
+            t_err.join()
+
+        if proc.returncode != 0:
+            return (
+                f"[local] {job_name} FAILED (exit code {proc.returncode})\n"
+                f"[local] stdout log: {stdout_path}\n"
+                f"[local] stderr log: {stderr_path}"
+            )
+        return (
+            f"[local] {job_name} completed successfully\n"
+            f"[local] stdout log: {stdout_path}\n"
+            f"[local] stderr log: {stderr_path}"
         )
 
-        if result.returncode != 0:
-            return f"[local] {job_name} FAILED (exit code {result.returncode})"
-        return f"[local] {job_name} completed successfully"
+    def status(self, job_id: str) -> dict:
+        """Check local job status by looking for log files.
+
+        ``job_id`` is the job name prefix used in log filenames.
+        """
+        import glob
+
+        log_dir = os.path.join(self.workdir, "stage_traces", "logs")
+        pattern = os.path.join(log_dir, f"{job_id}_*.stdout.log")
+        matches = sorted(glob.glob(pattern))
+
+        if not matches:
+            return {
+                "state": "NotFound",
+                "message": f"No logs found matching {pattern}",
+                "output_hint": "",
+            }
+
+        latest = matches[-1]
+        stderr_log = latest.replace(".stdout.log", ".stderr.log")
+        trace_dir = os.path.join(self.workdir, "stage_traces")
+
+        return {
+            "state": "Completed",
+            "message": (
+                f"Latest log: {latest}\n"
+                f"Stderr log: {stderr_log}\n"
+                f"Traces dir: {trace_dir}"
+            ),
+            "output_hint": trace_dir,
+        }
+
+    def logs(self, job_id: str, *, tail: int = 100) -> str:
+        """Read the last *tail* lines from the most recent local log file."""
+        import glob
+
+        log_dir = os.path.join(self.workdir, "stage_traces", "logs")
+        pattern = os.path.join(log_dir, f"{job_id}_*.stdout.log")
+        matches = sorted(glob.glob(pattern))
+
+        if not matches:
+            return f"No logs found matching {pattern}"
+
+        latest = matches[-1]
+        with open(latest) as f:
+            all_lines = f.readlines()
+
+        header = f"=== {latest} (last {tail} lines) ===\n"
+        return header + "".join(all_lines[-tail:])
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 9261a15..9ec84a6 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -225,6 +225,87 @@ def submit(self, spec: ProfileJobSpec) -> str:
         job_id = body.get("job_id", "unknown")
         return f"Submitted batch job {job_id}"
 
+    def _rest_get(self, path: str) -> dict:
+        """GET a slurmrestd endpoint and return parsed JSON."""
+        if not self.rest_url:
+            raise RuntimeError("--slurm-rest-url is required")
+        if not self.jwt_token:
+            raise RuntimeError("--slurm-jwt-token is required")
+
+        url = f"{self.rest_url}{path}"
+        headers = {
+            "X-SLURM-USER-TOKEN": self.jwt_token,
+        }
+        req = urllib.request.Request(url, headers=headers, method="GET")
+
+        ctx: ssl.SSLContext | None = None
+        if not self.verify_ssl:
+            ctx = ssl.create_default_context()
+            ctx.check_hostname = False
+            ctx.verify_mode = ssl.CERT_NONE
+
+        try:
+            with urllib.request.urlopen(req, context=ctx) as resp:
+                return json.loads(resp.read())
+        except urllib.error.HTTPError as exc:
+            detail = exc.read().decode(errors="replace")
+            raise RuntimeError(f"slurmrestd returned HTTP {exc.code}:\n{detail}") from exc
+        except urllib.error.URLError as exc:
+            raise RuntimeError(f"Cannot reach slurmrestd at {self.rest_url}: {exc.reason}") from exc
+
+    def status(self, job_id: str) -> dict:
+        """Query Slurm job status via slurmrestd."""
+        body = self._rest_get(f"/slurm/{self.api_version}/job/{job_id}")
+
+        errors = body.get("errors") or []
+        if errors:
+            msgs = "; ".join(e.get("error", str(e)) for e in errors)
+            raise RuntimeError(f"slurmrestd error: {msgs}")
+
+        jobs = body.get("jobs", [])
+        if not jobs:
+            return {"state": "Unknown", "message": f"No job found with ID {job_id}", "output_hint": ""}
+
+        job = jobs[0]
+        state = job.get("job_state", ["UNKNOWN"])
+        if isinstance(state, list):
+            state = state[0] if state else "UNKNOWN"
+        name = job.get("name", "")
+        node_list = job.get("nodes", "")
+        output_file = job.get("standard_output", "")
+        work_dir = job.get("current_working_directory", "")
+
+        msg_parts = [
+            f"Job ID: {job_id}  Name: {name}  State: {state}",
+            f"Nodes: {node_list}" if node_list else "Nodes: (not yet assigned)",
+        ]
+        if output_file:
+            msg_parts.append(f"Output log: {output_file}")
+        if work_dir:
+            msg_parts.append(f"Working dir: {work_dir}")
+
+        return {
+            "state": state,
+            "message": "\n".join(msg_parts),
+            "output_hint": output_file,
+        }
+
+    def logs(self, job_id: str, *, tail: int = 100) -> str:
+        """Retrieve log output for a Slurm job.
+
+        Tries to read the sbatch output file via slurmrestd.
+        Falls back to showing job info if direct log access isn't available.
+        """
+        info = self.status(job_id)
+        output_file = info.get("output_hint", "")
+        lines = [info["message"], ""]
+
+        if output_file:
+            lines.append(f"To view full logs on the cluster:")
+            lines.append(f"  tail -{tail} {output_file}")
+
+        return "\n".join(lines)
+
     def _parse_time_minutes(self) -> int:
         """Convert HH:MM:SS time_limit to total minutes."""
         parts = self.time_limit.split(":")
diff --git a/scripts/cli.py b/scripts/cli.py
index 5cd370a..c17796d 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -152,6 +152,16 @@ def main(argv: list[str] | None = None) -> int:
         help="Submit a profiling job to K8s or Slurm",
         add_help=False,
     )
+    sub.add_parser(
+        "status",
+        help="Query job status (local/k8s/slurm)",
+        add_help=False,
+    )
+    sub.add_parser(
+        "logs",
+        help="Retrieve job logs (local/k8s/slurm)",
+        add_help=False,
+    )
 
     args, remaining = parser.parse_known_args(argv)
 
@@ -164,6 +174,18 @@ def main(argv: list[str] | None = None) -> int:
         submit_main(remaining)
         return 0
 
+    if args.command == "status":
+        from scripts.status_profile import main_status
+
+        main_status(remaining)
+        return 0
+
+    if args.command == "logs":
+        from scripts.status_profile import main_logs
+
+        main_logs(remaining)
+        return 0
+
     parser.print_help()
     return 1
 
diff --git a/scripts/run_stage_profile.py b/scripts/run_stage_profile.py
index 8346e3b..c27d6f3 100644
--- a/scripts/run_stage_profile.py
+++ b/scripts/run_stage_profile.py
@@ -714,8 +714,8 @@ def parse_args(argv: Optional[list] = None) -> argparse.Namespace:
     )
     srv.add_argument(
         "--log-dir",
-        default="/flowsim/tests/test-artifacts",
-        help="Directory for server logs",
+        default="",
+        help="Directory for server logs (default: {output-dir}/logs/)",
     )
 
     return p.parse_args(argv)
@@ -873,6 +873,10 @@ def _write_summary(args, summary: list[dict]) -> None:
 def main(argv: Optional[list] = None) -> int:
     args = parse_args(argv)
 
+    # Default log_dir to {output_dir}/logs/ if not specified
+    if not args.log_dir:
+        args.log_dir = os.path.join(args.output_dir, "logs")
+
     if args.decode_tokens < 2:
         print(
             "[ERROR] --decode-tokens must be >= 2. "
diff --git a/scripts/status_profile.py b/scripts/status_profile.py
new file mode 100644
index 0000000..bfcce41
--- /dev/null
+++ b/scripts/status_profile.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+"""Query FlowSim profiling job status and logs.
+
+Usage examples
+--------------
+
+Check K8s job status::
+
+    flowsim status --scheduler k8s --job flowsim-perf-qwen3-8b-bs1-il2048
+
+Get K8s job logs::
+
+    flowsim logs --scheduler k8s --job flowsim-perf-qwen3-8b-bs1-il2048
+
+Check Slurm job status::
+
+    flowsim status --scheduler slurm --job 12345
+
+Check local job status (by job name prefix)::
+
+    flowsim status --scheduler local --job flowsim-perf-qwen3-8b-bs1-il2048
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+
+from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_jwt_token
+from schedulers.k8s import K8sScheduler
+from schedulers.local import LocalScheduler
+from schedulers.slurm import SlurmScheduler
+
+
+def _d(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
+    return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
+
+
+def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    k8s_cfg = load_k8s_config()
+    slurm_cfg = load_slurm_config()
+
+    p = argparse.ArgumentParser(
+        description="Query FlowSim profiling job status or logs.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    p.add_argument(
+        "--scheduler",
+        choices=["local", "k8s", "slurm"],
+        required=True,
+    )
+    p.add_argument(
+        "--job",
+        required=True,
+        help="Job name (k8s/local) or job ID (slurm)",
+    )
+    p.add_argument(
+        "--tail",
+        type=int,
+        default=100,
+        help="Number of log lines to show (default: 100)",
+    )
+
+    # -- Local options --
+    p.add_argument("--local-workdir", default="")
+
+    # -- K8s options --
+    p.add_argument(
+        "--k8s-namespace",
+        default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
+    )
+    p.add_argument(
+        "--k8s-kubeconfig",
+        default=_d("KUBECONFIG", k8s_cfg, "kubeconfig", ""),
+    )
+    p.add_argument(
+        "--k8s-context",
+        default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
+    )
+
+    # -- Slurm options --
+    p.add_argument(
+        "--slurm-rest-url",
+        default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
+    )
+    p.add_argument(
+        "--slurm-jwt-token",
+        default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
+    )
+    p.add_argument(
+        "--slurm-api-version",
+        default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
+    )
+    p.add_argument(
+        "--slurm-no-verify-ssl",
+        action="store_true",
+    )
+
+    return p.parse_args(argv)
+
+
+def _build_scheduler(args: argparse.Namespace):
+    if args.scheduler == "local":
+        return LocalScheduler(workdir=args.local_workdir)
+    elif args.scheduler == "k8s":
+        return K8sScheduler(
+            namespace=args.k8s_namespace,
+            kubeconfig=args.k8s_kubeconfig,
+            context=args.k8s_context,
+        )
+    else:
+        return SlurmScheduler(
+            rest_url=args.slurm_rest_url,
+            jwt_token=args.slurm_jwt_token,
+            api_version=args.slurm_api_version,
+            verify_ssl=not args.slurm_no_verify_ssl,
+        )
+
+
+def main_status(argv: list[str] | None = None) -> None:
+    args = _parse_args(argv)
+
+    # Resolve Slurm JWT if needed
+    if args.scheduler == "slurm" and not args.slurm_jwt_token:
+        slurm_cfg = load_slurm_config()
+        token = resolve_jwt_token(slurm_cfg)
+        if token:
+            args.slurm_jwt_token = token
+
+    scheduler = _build_scheduler(args)
+    try:
+        info = scheduler.status(args.job)
+        print(info["message"])
+    except Exception as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main_logs(argv: list[str] | None = None) -> None:
+    args = _parse_args(argv)
+
+    # Resolve Slurm JWT if needed
+    if args.scheduler == "slurm" and not args.slurm_jwt_token:
+        slurm_cfg = load_slurm_config()
+        token = resolve_jwt_token(slurm_cfg)
+        if token:
+            args.slurm_jwt_token = token
+
+    scheduler = _build_scheduler(args)
+    try:
+        text = scheduler.logs(args.job, tail=args.tail)
+        print(text)
+    except Exception as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        sys.exit(1)
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 18c68aa..25061f1 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -114,10 +114,7 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     )
     infra.add_argument("--host", default="0.0.0.0")
     infra.add_argument("--port", type=int, default=30001)
-    infra.add_argument("--output-dir", default="/flowsim/stage_traces")
-    infra.add_argument(
-        "--log-dir", default="/flowsim/tests/test-artifacts",
-    )
+    infra.add_argument("--output-dir", default="")
     infra.add_argument("--job-name", default="")
 
     # -- Local options --
@@ -300,7 +297,6 @@ def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
         host=args.host,
         port=args.port,
         output_dir=args.output_dir,
-        log_dir=args.log_dir,
         job_name=args.job_name,
         extra_server_opts=args.extra_server_opts,
         disagg_transfer_backend=args.disagg_transfer_backend,
@@ -353,6 +349,18 @@ def _build_scheduler(args: argparse.Namespace):
 def main(argv: list[str] | None = None) -> None:
     args = _parse_args(argv)
 
+    # Smart defaults for output_dir based on scheduler
+    if not args.output_dir:
+        if args.scheduler == "local":
+            project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+            args.output_dir = os.path.join(project_root, "stage_traces")
+        elif args.scheduler == "slurm":
+            # Slurm: default to ~/flowsim_traces (shared filesystem)
+            args.output_dir = os.path.expanduser("~/flowsim_traces")
+        else:
+            # K8s: container path (PVC/hostPath mounted here)
+            args.output_dir = "/flowsim/stage_traces"
+
     # Resolve Slurm JWT token from jwt_token_cmd in config if needed
     if args.scheduler == "slurm" and not args.slurm_jwt_token:
         slurm_cfg = load_slurm_config()
@@ -380,6 +388,22 @@ def main(argv: list[str] | None = None) -> None:
         else:
             result = scheduler.submit(spec)
         print(result)
+        # Tell user where to find results
+        print()
+        print(f"Traces: {spec.output_dir}")
+        print(f"Logs:   {spec.log_dir}")
+        if args.scheduler == "k8s":
+            if args.k8s_pvc:
+                print(f"  (persisted on PVC '{args.k8s_pvc}')")
+            else:
+                print(f"  (persisted at hostPath '{args.k8s_host_output_dir}' on the node)")
+            print(f"\nTo check status:  flowsim status --scheduler k8s --job {spec.default_job_name()[:63]}")
+            print(f"To view logs:     flowsim logs   --scheduler k8s --job {spec.default_job_name()[:63]}")
+        elif args.scheduler == "slurm":
+            print(f"  (on cluster shared filesystem)")
+            print(f"\nTo check status:  flowsim status --scheduler slurm --job <JOB_ID>")
+        else:
+            print(f"\nTo view logs:     flowsim logs   --scheduler local --job {spec.default_job_name()}")
 
 
 _INIT_HINT = "Run 'flowsim init' to create config files."
@@ -394,6 +418,20 @@ def _validate_connection(args: argparse.Namespace) -> None:
                 "Set it in ~/.flowsim/k8s.yaml, FLOWSIM_K8S_NAMESPACE env var,\n"
                 f"or --k8s-namespace flag. {_INIT_HINT}"
             )
+        # Traces + logs must survive pod termination
+        if not args.k8s_pvc and not args.k8s_host_output_dir:
+            sys.exit(
+                "Error: no persistent storage configured for K8s job output.\n"
+                "Traces and logs are written to output_dir inside the pod —\n"
+                "without a volume mount they are lost when the pod exits.\n\n"
+                "Set one of:\n"
+                "  --k8s-pvc <pvc-name>           (PersistentVolumeClaim)\n"
+                "  --k8s-host-output-dir <path>   (hostPath on the node)\n\n"
+                "Or configure in ~/.flowsim/k8s.yaml:\n"
+                "  pvc: my-traces-pvc\n"
+                "  # or\n"
+                "  host_output_dir: /data/flowsim-traces"
+            )
         # kubeconfig is optional (in-cluster auto-discovery), but warn
         if not args.k8s_kubeconfig and not args.k8s_context:
             print(
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
new file mode 100644
index 0000000..062418a
--- /dev/null
+++ b/tests/integration/test_scheduler_local.py
@@ -0,0 +1,229 @@
+"""Integration tests for `flowsim submit --scheduler local`.
+
+Runs actual profiling jobs inside the FlowSim Docker container and verifies
+that traces and parsed CSVs are produced.
+
+Requirements
+------------
+* Running inside the ``flowsim`` Docker container with GPUs.
+* ``pip install -e .`` done (or schedulers/ available on PYTHONPATH).
+
+Environment Variables
+---------------------
+``MODEL``
+    Model path (default: ``/flowsim/workload/models/configs/Qwen3-235B-A22B``).
+``LOAD_FORMAT``
+    Load format (default: ``dummy``).
+
+Usage
+-----
+    docker exec flowsim-test python -m pytest tests/integration/test_scheduler_local.py -v -x
+"""
+
+import glob
+import os
+import subprocess
+import sys
+
+import pytest
+
+_PROJECT_ROOT = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "..")
+)
+
+MODEL = os.environ.get(
+    "MODEL", "/flowsim/workload/models/configs/Qwen3-235B-A22B"
+)
+LOAD_FORMAT = os.environ.get("LOAD_FORMAT", "dummy")
+ARTIFACT_DIR = os.environ.get(
+    "PYTEST_ARTIFACT_DIR", "/flowsim/tests/test-artifacts"
+)
+
+
+def _flowsim_submit(*args: str, timeout: int = 1200) -> subprocess.CompletedProcess:
+    """Run ``flowsim submit`` via Python entry point."""
+    cmd = [
+        sys.executable, "-u", "-c",
+        "from scripts.cli import main; main()",
+        "submit", *args,
+    ]
+    env = os.environ.copy()
+    env["PYTHONPATH"] = _PROJECT_ROOT + (
+        ":" + env.get("PYTHONPATH", "")
+    )
+    env["PYTHONUNBUFFERED"] = "1"
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        cwd=_PROJECT_ROOT,
+        env=env,
+        timeout=timeout,
+    )
+    return result
+
+
+class TestLocalSubmitPerf:
+    """flowsim submit --scheduler local --collect perf — runs real profiling."""
+
+    def test_local_perf_tp1(self):
+        """Single-GPU perf profiling via flowsim submit."""
+        output_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp1")
+        log_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp1_logs")
+
+        r = _flowsim_submit(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--tp", "1",
+            "--bs", "1",
+            "--input-len", "512",
+            "--decode-tokens", "8",
+            "--warmup-n", "2",
+            "--gpus", "1",
+            "--local-gpus", "0",
+            "--output-dir", output_dir,
+            "--log-dir", log_dir,
+            "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+        )
+
+        # Print output for debugging
+        if r.returncode != 0:
+            print("STDOUT:", r.stdout[-3000:])
+            print("STDERR:", r.stderr[-3000:])
+        assert r.returncode == 0, f"flowsim submit failed (exit {r.returncode})"
+
+        # Verify trace files exist
+        traces = glob.glob(
+            os.path.join(output_dir, "**/*.trace.json.gz"), recursive=True
+        )
+        assert len(traces) > 0, f"No trace files under {output_dir}"
+
+        extend = [t for t in traces if "EXTEND" in os.path.basename(t)]
+        decode = [t for t in traces if "DECODE" in os.path.basename(t)]
+        assert len(extend) > 0, "No EXTEND traces"
+        assert len(decode) > 0, "No DECODE traces"
+
+        # Verify parsed CSVs
+        csvs = glob.glob(
+            os.path.join(output_dir, "**/parsed/*.csv"), recursive=True
+        )
+        assert len(csvs) > 0, f"No parsed CSVs under {output_dir}"
+
+    def test_local_perf_tp2(self):
+        """Multi-GPU perf profiling (TP=2) via flowsim submit."""
+        output_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp2")
+        log_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp2_logs")
+
+        r = _flowsim_submit(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--tp", "2",
+            "--bs", "1",
+            "--input-len", "1024",
+            "--decode-tokens", "8",
+            "--warmup-n", "2",
+            "--gpus", "2",
+            "--local-gpus", "0,1",
+            "--output-dir", output_dir,
+            "--log-dir", log_dir,
+            "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+        )
+
+        if r.returncode != 0:
+            print("STDOUT:", r.stdout[-3000:])
+            print("STDERR:", r.stderr[-3000:])
+        assert r.returncode == 0, f"flowsim submit failed (exit {r.returncode})"
+
+        traces = glob.glob(
+            os.path.join(output_dir, "**/*.trace.json.gz"), recursive=True
+        )
+        assert len(traces) > 0, f"No trace files under {output_dir}"
+
+        # TP=2 should produce traces for both ranks
+        tp0 = [t for t in traces if "TP-0" in os.path.basename(t)]
+        tp1 = [t for t in traces if "TP-1" in os.path.basename(t)]
+        assert len(tp0) > 0, "No TP-0 traces"
+        assert len(tp1) > 0, "No TP-1 traces"
+
+
+class TestLocalSubmitDryRun:
+    """flowsim submit --scheduler local --dry-run — verify command generation."""
+
+    def test_dry_run_output(self):
+        r = _flowsim_submit(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--tp", "2",
+            "--local-gpus", "0,1",
+            "--dry-run",
+        )
+        assert r.returncode == 0
+        assert "CUDA_VISIBLE_DEVICES=0,1" in r.stdout
+        assert "scripts/run_stage_profile.py" in r.stdout
+        assert "--tp 2" in r.stdout
+
+    def test_dry_run_pd(self):
+        r = _flowsim_submit(
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--pd",
+            "--dry-run",
+        )
+        assert r.returncode == 0
+        assert "PREFILL INSTANCE" in r.stdout
+        assert "DECODE INSTANCE" in r.stdout
+        assert "--disaggregation-mode prefill" in r.stdout
+        assert "--disaggregation-mode decode" in r.stdout
+
+
+class TestK8sSubmitDryRun:
+    """flowsim submit --scheduler k8s --dry-run — verify YAML generation."""
+
+    def test_k8s_dry_run(self):
+        r = _flowsim_submit(
+            "--scheduler", "k8s",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--k8s-namespace", "default",
+            "--dry-run",
+        )
+        assert r.returncode == 0
+        assert "apiVersion: batch/v1" in r.stdout
+        assert "kind: Job" in r.stdout
+        assert MODEL in r.stdout
+
+    def test_k8s_pd_dry_run(self):
+        r = _flowsim_submit(
+            "--scheduler", "k8s",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--k8s-namespace", "default",
+            "--pd",
+            "--dry-run",
+        )
+        assert r.returncode == 0
+        assert "PREFILL INSTANCE" in r.stdout
+        assert "DECODE INSTANCE" in r.stdout
+
+
+class TestSlurmSubmitDryRun:
+    """flowsim submit --scheduler slurm --dry-run — verify sbatch script."""
+
+    def test_slurm_dry_run(self):
+        r = _flowsim_submit(
+            "--scheduler", "slurm",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--slurm-partition", "gpu",
+            "--slurm-rest-url", "http://fake:6820",
+            "--slurm-jwt-token", "fake",
+            "--dry-run",
+        )
+        assert r.returncode == 0
+        assert "#!/bin/bash" in r.stdout
+        assert "#SBATCH --partition=gpu" in r.stdout
+        assert MODEL in r.stdout

From ea3c27abb0c66790cfac5e943525aa7c2eb2e129 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 05:43:33 +0000
Subject: [PATCH 16/56] fix: flowsim logs shows all log files (stdout + stderr)
 with listing

---
 schedulers/k8s.py   | 16 ++++++++++++++--
 schedulers/local.py | 33 +++++++++++++++++++++++++--------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 1640f5c..69e13e1 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -250,7 +250,13 @@ def status(self, job_id: str) -> dict:
         }
 
     def logs(self, job_id: str, *, tail: int = 100) -> str:
-        """Retrieve logs from the pod(s) of a K8s Job."""
+        """Retrieve logs from the pod(s) of a K8s Job.
+
+        Shows the Pod stdout/stderr (profiling script output).
+        Server log files are persisted on the PVC/hostPath under
+        ``{output_dir}/logs/`` and can be accessed from the node
+        or another pod mounting the same volume.
+        """
         try:
             from kubernetes import client as k8s_client
         except ImportError:
@@ -276,6 +282,12 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
                 )
             except Exception as exc:
                 log_text = f"(error reading logs: {exc})"
-            parts.append(f"=== {name} ===\n{log_text}")
+            parts.append(f"=== Pod: {name} (last {tail} lines) ===\n{log_text}")
+
+        # Hint about persistent server logs
+        if self.pvc_name:
+            parts.append(f"\nServer logs persisted on PVC '{self.pvc_name}' under {{output_dir}}/logs/")
+        elif self.host_output_dir:
+            parts.append(f"\nServer logs at {self.host_output_dir}/logs/ on the scheduled node")
 
         return "\n".join(parts)
diff --git a/schedulers/local.py b/schedulers/local.py
index da3b03a..2d38b0a 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -159,19 +159,36 @@ def status(self, job_id: str) -> dict:
         }
 
     def logs(self, job_id: str, *, tail: int = 100) -> str:
-        """Read the last *tail* lines from the most recent local log file."""
+        """Show log files for a local job.
+
+        Lists all log files matching *job_id*, then prints the last
+        *tail* lines of the most recent stdout **and** stderr logs.
+        """
         import glob
 
         log_dir = os.path.join(self.workdir, "stage_traces", "logs")
-        pattern = os.path.join(log_dir, f"{job_id}_*.stdout.log")
+        pattern = os.path.join(log_dir, f"{job_id}_*")
         matches = sorted(glob.glob(pattern))
 
         if not matches:
             return f"No logs found matching {pattern}"
 
-        latest = matches[-1]
-        with open(latest) as f:
-            all_lines = f.readlines()
-
-        header = f"=== {latest} (last {tail} lines) ===\n"
-        return header + "".join(all_lines[-tail:])
+        parts = [f"Log files ({len(matches)}):"]
+        for p in matches:
+            size = os.path.getsize(p)
+            parts.append(f"  {p}  ({size} bytes)")
+        parts.append("")
+
+        # Show tail of latest stdout + stderr
+        stdout_files = sorted(f for f in matches if f.endswith(".stdout.log"))
+        stderr_files = sorted(f for f in matches if f.endswith(".stderr.log"))
+
+        for label, files in [("stdout", stdout_files), ("stderr", stderr_files)]:
+            if files:
+                latest = files[-1]
+                with open(latest) as fh:
+                    lines = fh.readlines()
+                parts.append(f"=== {latest} (last {tail} lines) ===")
+                parts.append("".join(lines[-tail:]))
+
+        return "\n".join(parts)

From eb46c36f716f8a7f256c9651ac4d3fcaa64ebe08 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 05:44:51 +0000
Subject: [PATCH 17/56] fix: flowsim logs shows file locations + actionable
 commands instead of dumping content

---
 schedulers/k8s.py   | 56 +++++++++++++++++++++++++++++----------------
 schedulers/local.py | 38 ++++++++++++++++--------------
 schedulers/slurm.py | 31 +++++++++++++++++--------
 3 files changed, 79 insertions(+), 46 deletions(-)

diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 69e13e1..7d52319 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -250,13 +250,7 @@ def status(self, job_id: str) -> dict:
         }
 
     def logs(self, job_id: str, *, tail: int = 100) -> str:
-        """Retrieve logs from the pod(s) of a K8s Job.
-
-        Shows the Pod stdout/stderr (profiling script output).
-        Server log files are persisted on the PVC/hostPath under
-        ``{output_dir}/logs/`` and can be accessed from the node
-        or another pod mounting the same volume.
-        """
+        """Show where logs are and how to access them for a K8s Job."""
         try:
             from kubernetes import client as k8s_client
         except ImportError:
@@ -271,23 +265,45 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
         if not pods.items:
             return f"No pods found for job {job_id} in namespace {self.namespace}"
 
-        parts = []
+        parts: list[str] = []
+
+        # Pod info
         for pod in pods.items:
             name = pod.metadata.name
-            try:
-                log_text = core_api.read_namespaced_pod_log(
-                    name=name,
-                    namespace=self.namespace,
-                    tail_lines=tail,
-                )
-            except Exception as exc:
-                log_text = f"(error reading logs: {exc})"
-            parts.append(f"=== Pod: {name} (last {tail} lines) ===\n{log_text}")
+            phase = pod.status.phase
+            parts.append(f"Pod: {name}  ({phase})")
+
+        parts.append("")
+
+        # Commands to view pod stdout
+        parts.append("View profiling script output:")
+        for pod in pods.items:
+            name = pod.metadata.name
+            parts.append(f"  kubectl logs {name} -n {self.namespace}")
+            parts.append(f"  kubectl logs {name} -n {self.namespace} --tail={tail}")
+
+        parts.append("")
 
-        # Hint about persistent server logs
+        # Persistent log files
         if self.pvc_name:
-            parts.append(f"\nServer logs persisted on PVC '{self.pvc_name}' under {{output_dir}}/logs/")
+            parts.append(f"Server logs + traces persisted on PVC '{self.pvc_name}'.")
+            parts.append("Copy to local machine:")
+            for pod in pods.items:
+                name = pod.metadata.name
+                if pod.status.phase in ("Running", "Succeeded"):
+                    parts.append(f"  kubectl cp {self.namespace}/{name}:/flowsim/stage_traces ./stage_traces")
+                    break
+            else:
+                parts.append("  (pod not running — mount the PVC in another pod to retrieve files)")
         elif self.host_output_dir:
-            parts.append(f"\nServer logs at {self.host_output_dir}/logs/ on the scheduled node")
+            parts.append(f"Server logs + traces at hostPath on the node:")
+            parts.append(f"  {self.host_output_dir}/")
+            parts.append(f"  {self.host_output_dir}/logs/")
+            # Identify node
+            for pod in pods.items:
+                if pod.spec.node_name:
+                    parts.append(f"  Node: {pod.spec.node_name}")
+                    parts.append(f"  scp {pod.spec.node_name}:{self.host_output_dir}/ ./stage_traces/")
+                    break
 
         return "\n".join(parts)
diff --git a/schedulers/local.py b/schedulers/local.py
index 2d38b0a..67704c5 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -159,11 +159,7 @@ def status(self, job_id: str) -> dict:
         }
 
     def logs(self, job_id: str, *, tail: int = 100) -> str:
-        """Show log files for a local job.
-
-        Lists all log files matching *job_id*, then prints the last
-        *tail* lines of the most recent stdout **and** stderr logs.
-        """
+        """List log files for a local job and print access commands."""
         import glob
 
         log_dir = os.path.join(self.workdir, "stage_traces", "logs")
@@ -171,24 +167,32 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
         matches = sorted(glob.glob(pattern))
 
         if not matches:
-            return f"No logs found matching {pattern}"
+            # Also try wildcard — user may have given a partial name
+            pattern = os.path.join(log_dir, f"*{job_id}*")
+            matches = sorted(glob.glob(pattern))
+
+        if not matches:
+            return f"No logs found in {log_dir} matching '{job_id}'"
 
-        parts = [f"Log files ({len(matches)}):"]
+        parts = [f"Log directory: {log_dir}", ""]
+        parts.append(f"Files ({len(matches)}):")
         for p in matches:
             size = os.path.getsize(p)
-            parts.append(f"  {p}  ({size} bytes)")
-        parts.append("")
+            parts.append(f"  {os.path.basename(p)}  ({size:,} bytes)")
 
-        # Show tail of latest stdout + stderr
+        # Provide commands
+        parts.append("")
+        parts.append("View logs:")
         stdout_files = sorted(f for f in matches if f.endswith(".stdout.log"))
         stderr_files = sorted(f for f in matches if f.endswith(".stderr.log"))
+        if stdout_files:
+            parts.append(f"  less {stdout_files[-1]}")
+        if stderr_files:
+            parts.append(f"  less {stderr_files[-1]}")
 
-        for label, files in [("stdout", stdout_files), ("stderr", stderr_files)]:
-            if files:
-                latest = files[-1]
-                with open(latest) as fh:
-                    lines = fh.readlines()
-                parts.append(f"=== {latest} (last {tail} lines) ===")
-                parts.append("".join(lines[-tail:]))
+        trace_dir = os.path.join(self.workdir, "stage_traces")
+        parts.append("")
+        parts.append(f"Trace files: {trace_dir}")
+        parts.append(f"  ls {trace_dir}")
 
         return "\n".join(parts)
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 9ec84a6..3f5d166 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -291,20 +291,33 @@ def status(self, job_id: str) -> dict:
         }
 
     def logs(self, job_id: str, *, tail: int = 100) -> str:
-        """Retrieve log output for a Slurm job.
-
-        Tries to read the sbatch output file via slurmrestd.
-        Falls back to showing job info if direct log access isn't available.
-        """
+        """Show where Slurm job logs are and how to access them."""
         info = self.status(job_id)
         output_file = info.get("output_hint", "")
-        lines = [info["message"], ""]
+        state = info.get("state", "UNKNOWN")
+
+        parts = [info["message"], ""]
 
         if output_file:
-            lines.append(f"To view full logs on the cluster:")
-            lines.append(f"  tail -{tail} {output_file}")
+            parts.append(f"Log file (on cluster shared filesystem):")
+            parts.append(f"  {output_file}")
+            parts.append("")
+            parts.append("View on login node:")
+            parts.append(f"  less {output_file}")
+            parts.append(f"  tail -{tail} {output_file}")
+            parts.append("")
+            parts.append("Copy to local machine:")
+            parts.append(f"  scp <login-node>:{output_file} .")
+        else:
+            parts.append("No output file path found in job metadata.")
 
-        return "\n".join(lines)
+        # Trace files location
+        parts.append("")
+        parts.append("Trace files (on cluster shared filesystem):")
+        parts.append("  ~/flowsim_traces/")
+        parts.append("  ls ~/flowsim_traces/")
+
+        return "\n".join(parts)
 
     def _parse_time_minutes(self) -> int:
         """Convert HH:MM:SS time_limit to total minutes."""

From 0e59219ab1b416650b0f732780266a821b9edc1d Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 05:56:55 +0000
Subject: [PATCH 18/56] test: integration tests for all 3 scheduler backends
 (local/k8s/slurm)

- TestLocalScheduler: real TP=1 profiling, verify traces + logs + status/logs CLI
- TestK8sScheduler: dry-run YAML (PVC mount, hostPath, log paths), refuse without
  storage, real Job submit to Kind cluster with status/logs verification
- TestSlurmScheduler: dry-run sbatch script (output_dir, log_dir, PD pair)

Results: 9 passed, 1 skipped (K8s real submit skipped in container, passes on host)
---
 tests/integration/test_scheduler_local.py | 359 +++++++++++++++-------
 1 file changed, 251 insertions(+), 108 deletions(-)

diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 062418a..879c00f 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -1,12 +1,20 @@
-"""Integration tests for `flowsim submit --scheduler local`.
+"""Integration tests for ``flowsim submit``, ``flowsim status``, ``flowsim logs``.
 
-Runs actual profiling jobs inside the FlowSim Docker container and verifies
-that traces and parsed CSVs are produced.
+Tests all three scheduler backends (local, k8s, slurm) end-to-end.
+
+* **local** — runs real TP=1 profiling and verifies traces, parsed CSVs,
+  and log files are all produced in the correct locations.
+* **k8s**   — submits a real Job to a Kind cluster, verifies it was created,
+  then checks ``flowsim status`` / ``flowsim logs`` output.  Also validates
+  that dry-run YAML has the correct volume mounts and log paths.
+* **slurm** — dry-run only; verifies the sbatch script has the correct
+  ``output_dir``, ``--log-dir``, and ``#SBATCH --output`` directives.
 
 Requirements
 ------------
-* Running inside the ``flowsim`` Docker container with GPUs.
-* ``pip install -e .`` done (or schedulers/ available on PYTHONPATH).
+* The ``flowsim-test`` container with GPUs (for local tests).
+* A Kind cluster named ``flowsim`` (for K8s tests).
+* ``schedulers/`` available on PYTHONPATH.
 
 Environment Variables
 ---------------------
@@ -17,13 +25,20 @@
 
 Usage
 -----
-    docker exec flowsim-test python -m pytest tests/integration/test_scheduler_local.py -v -x
+    # Inside container (local tests):
+    docker exec flowsim-test python -m pytest \
+        tests/integration/test_scheduler_local.py -v -x
+
+    # On host (k8s tests — needs kubeconfig):
+    python -m pytest tests/integration/test_scheduler_local.py \
+        -v -x -k "k8s"
 """
 
 import glob
 import os
 import subprocess
 import sys
+import time
 
 import pytest
 
@@ -39,20 +54,23 @@
     "PYTEST_ARTIFACT_DIR", "/flowsim/tests/test-artifacts"
 )
 
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
 
-def _flowsim_submit(*args: str, timeout: int = 1200) -> subprocess.CompletedProcess:
-    """Run ``flowsim submit`` via Python entry point."""
+def _flowsim_cli(*args: str, timeout: int = 1200) -> subprocess.CompletedProcess:
+    """Run a ``flowsim`` subcommand via Python entry point."""
     cmd = [
         sys.executable, "-u", "-c",
         "from scripts.cli import main; main()",
-        "submit", *args,
+        *args,
     ]
     env = os.environ.copy()
     env["PYTHONPATH"] = _PROJECT_ROOT + (
         ":" + env.get("PYTHONPATH", "")
     )
     env["PYTHONUNBUFFERED"] = "1"
-    result = subprocess.run(
+    return subprocess.run(
         cmd,
         capture_output=True,
         text=True,
@@ -60,18 +78,58 @@ def _flowsim_submit(*args: str, timeout: int = 1200) -> subprocess.CompletedProc
         env=env,
         timeout=timeout,
     )
-    return result
 
 
-class TestLocalSubmitPerf:
-    """flowsim submit --scheduler local --collect perf — runs real profiling."""
+def _assert_traces(output_dir: str) -> None:
+    """Assert EXTEND + DECODE traces and parsed CSVs exist."""
+    traces = glob.glob(
+        os.path.join(output_dir, "**/*.trace.json.gz"), recursive=True
+    )
+    assert len(traces) > 0, f"No trace files under {output_dir}"
+    extend = [t for t in traces if "EXTEND" in os.path.basename(t)]
+    decode = [t for t in traces if "DECODE" in os.path.basename(t)]
+    assert len(extend) > 0, "No EXTEND traces"
+    assert len(decode) > 0, "No DECODE traces"
+
+    csvs = glob.glob(
+        os.path.join(output_dir, "**/parsed/*.csv"), recursive=True
+    )
+    assert len(csvs) > 0, f"No parsed CSVs under {output_dir}"
+    # At least EXTEND should be parsed; DECODE CSV may be absent for short sequences
+    extend_csvs = [c for c in csvs if "EXTEND" in os.path.basename(c)]
+    assert len(extend_csvs) > 0, "No EXTEND parsed CSVs"
+
+
+def _assert_logs(output_dir: str) -> None:
+    """Assert server log files exist under {output_dir}/logs/."""
+    log_dir = os.path.join(output_dir, "logs")
+    assert os.path.isdir(log_dir), f"Log directory not found: {log_dir}"
+    log_files = os.listdir(log_dir)
+    assert len(log_files) > 0, f"No log files in {log_dir}"
+    stdout_logs = [f for f in log_files if f.endswith(".stdout.log")]
+    stderr_logs = [f for f in log_files if f.endswith(".stderr.log")]
+    assert len(stdout_logs) > 0, f"No stdout logs in {log_dir}"
+    assert len(stderr_logs) > 0, f"No stderr logs in {log_dir}"
+    # At least one log should be non-empty
+    sizes = [
+        os.path.getsize(os.path.join(log_dir, f))
+        for f in stdout_logs
+    ]
+    assert max(sizes) > 0, "All stdout logs are empty"
+
+
+# =====================================================================
+# LOCAL SCHEDULER — real profiling
+# =====================================================================
+class TestLocalScheduler:
+    """Run real profiling via ``flowsim submit --scheduler local``."""
 
     def test_local_perf_tp1(self):
-        """Single-GPU perf profiling via flowsim submit."""
-        output_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp1")
-        log_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp1_logs")
+        """TP=1 perf profiling: traces + parsed CSVs + log files."""
+        output_dir = os.path.join(ARTIFACT_DIR, "sched_local_tp1")
 
-        r = _flowsim_submit(
+        r = _flowsim_cli(
+            "submit",
             "--scheduler", "local",
             "--collect", "perf",
             "--model-path", MODEL,
@@ -83,147 +141,232 @@ def test_local_perf_tp1(self):
             "--gpus", "1",
             "--local-gpus", "0",
             "--output-dir", output_dir,
-            "--log-dir", log_dir,
             "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
         )
 
-        # Print output for debugging
         if r.returncode != 0:
             print("STDOUT:", r.stdout[-3000:])
             print("STDERR:", r.stderr[-3000:])
         assert r.returncode == 0, f"flowsim submit failed (exit {r.returncode})"
 
-        # Verify trace files exist
-        traces = glob.glob(
-            os.path.join(output_dir, "**/*.trace.json.gz"), recursive=True
-        )
-        assert len(traces) > 0, f"No trace files under {output_dir}"
-
-        extend = [t for t in traces if "EXTEND" in os.path.basename(t)]
-        decode = [t for t in traces if "DECODE" in os.path.basename(t)]
-        assert len(extend) > 0, "No EXTEND traces"
-        assert len(decode) > 0, "No DECODE traces"
+        # Verify traces and parsed CSVs
+        _assert_traces(output_dir)
 
-        # Verify parsed CSVs
-        csvs = glob.glob(
-            os.path.join(output_dir, "**/parsed/*.csv"), recursive=True
-        )
-        assert len(csvs) > 0, f"No parsed CSVs under {output_dir}"
+        # Verify log files under output_dir/logs/
+        _assert_logs(output_dir)
 
-    def test_local_perf_tp2(self):
-        """Multi-GPU perf profiling (TP=2) via flowsim submit."""
-        output_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp2")
-        log_dir = os.path.join(ARTIFACT_DIR, "local_perf_tp2_logs")
+        # Verify submit output mentions log/trace locations
+        combined = r.stdout + r.stderr
+        assert "Traces:" in combined, "Submit output should show trace location"
+        assert "Logs:" in combined, "Submit output should show log location"
 
-        r = _flowsim_submit(
+    def test_local_status(self):
+        """flowsim status --scheduler local should find logs from the previous run."""
+        r = _flowsim_cli(
+            "status",
             "--scheduler", "local",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--tp", "2",
-            "--bs", "1",
-            "--input-len", "1024",
-            "--decode-tokens", "8",
-            "--warmup-n", "2",
-            "--gpus", "2",
-            "--local-gpus", "0,1",
-            "--output-dir", output_dir,
-            "--log-dir", log_dir,
-            "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+            "--job", "flowsim-perf",
         )
+        # Should either find logs or say not found — should not crash
+        assert r.returncode == 0
 
-        if r.returncode != 0:
-            print("STDOUT:", r.stdout[-3000:])
-            print("STDERR:", r.stderr[-3000:])
-        assert r.returncode == 0, f"flowsim submit failed (exit {r.returncode})"
-
-        traces = glob.glob(
-            os.path.join(output_dir, "**/*.trace.json.gz"), recursive=True
+    def test_local_logs(self):
+        """flowsim logs --scheduler local should list log files and give paths."""
+        r = _flowsim_cli(
+            "logs",
+            "--scheduler", "local",
+            "--job", "flowsim-perf",
         )
-        assert len(traces) > 0, f"No trace files under {output_dir}"
-
-        # TP=2 should produce traces for both ranks
-        tp0 = [t for t in traces if "TP-0" in os.path.basename(t)]
-        tp1 = [t for t in traces if "TP-1" in os.path.basename(t)]
-        assert len(tp0) > 0, "No TP-0 traces"
-        assert len(tp1) > 0, "No TP-1 traces"
+        assert r.returncode == 0
+        output = r.stdout
+        # Should contain file listing or "No logs" — not crash
+        assert "Log directory:" in output or "No logs" in output
 
 
-class TestLocalSubmitDryRun:
-    """flowsim submit --scheduler local --dry-run — verify command generation."""
+# =====================================================================
+# K8S SCHEDULER
+# =====================================================================
+class TestK8sScheduler:
+    """K8s scheduler: dry-run validates YAML structure, real submit to Kind."""
 
-    def test_dry_run_output(self):
-        r = _flowsim_submit(
-            "--scheduler", "local",
+    def test_k8s_dry_run_has_volume_and_log_path(self):
+        """Dry-run YAML should mount output volume and pass --log-dir."""
+        r = _flowsim_cli(
+            "submit",
+            "--scheduler", "k8s",
             "--collect", "perf",
             "--model-path", MODEL,
-            "--tp", "2",
-            "--local-gpus", "0,1",
+            "--k8s-namespace", "default",
+            "--k8s-pvc", "test-traces",
+            "--output-dir", "/data/traces",
             "--dry-run",
         )
         assert r.returncode == 0
-        assert "CUDA_VISIBLE_DEVICES=0,1" in r.stdout
-        assert "scripts/run_stage_profile.py" in r.stdout
-        assert "--tp 2" in r.stdout
-
-    def test_dry_run_pd(self):
-        r = _flowsim_submit(
-            "--scheduler", "local",
+        yaml_output = r.stdout
+
+        # Job structure
+        assert "apiVersion: batch/v1" in yaml_output
+        assert "kind: Job" in yaml_output
+
+        # PVC volume mount
+        assert "test-traces" in yaml_output
+        assert "persistentVolumeClaim" in yaml_output
+
+        # output_dir and derived log_dir appear in the command
+        assert "--output-dir" in yaml_output
+        assert "/data/traces" in yaml_output
+        assert "--log-dir" in yaml_output
+        assert "/data/traces/logs" in yaml_output
+
+    def test_k8s_dry_run_hostpath(self):
+        """Dry-run with hostPath should have hostPath volume."""
+        r = _flowsim_cli(
+            "submit",
+            "--scheduler", "k8s",
             "--collect", "perf",
             "--model-path", MODEL,
-            "--pd",
+            "--k8s-namespace", "default",
+            "--k8s-host-output-dir", "/mnt/traces",
             "--dry-run",
         )
         assert r.returncode == 0
-        assert "PREFILL INSTANCE" in r.stdout
-        assert "DECODE INSTANCE" in r.stdout
-        assert "--disaggregation-mode prefill" in r.stdout
-        assert "--disaggregation-mode decode" in r.stdout
-
-
-class TestK8sSubmitDryRun:
-    """flowsim submit --scheduler k8s --dry-run — verify YAML generation."""
+        assert "hostPath" in r.stdout
+        assert "/mnt/traces" in r.stdout
 
-    def test_k8s_dry_run(self):
-        r = _flowsim_submit(
+    def test_k8s_refuses_without_storage(self):
+        """Submit (not dry-run) without PVC or hostPath should fail."""
+        r = _flowsim_cli(
+            "submit",
             "--scheduler", "k8s",
             "--collect", "perf",
             "--model-path", MODEL,
             "--k8s-namespace", "default",
-            "--dry-run",
+            # Explicitly clear any config defaults
+            "--k8s-pvc", "",
+            "--k8s-host-output-dir", "",
         )
-        assert r.returncode == 0
-        assert "apiVersion: batch/v1" in r.stdout
-        assert "kind: Job" in r.stdout
-        assert MODEL in r.stdout
+        assert r.returncode != 0
+        combined = r.stdout + r.stderr
+        assert "persistent storage" in combined or "pvc" in combined.lower()
 
-    def test_k8s_pd_dry_run(self):
-        r = _flowsim_submit(
+    @pytest.mark.skipif(
+        not os.path.exists(os.path.expanduser("~/.kube/config")),
+        reason="No kubeconfig — skip K8s real submit (run on host with Kind cluster)",
+    )
+    def test_k8s_real_submit_to_kind(self):
+        """Submit a real Job to Kind cluster, verify status + logs commands work."""
+        job_name = f"test-integ-{int(time.time()) % 100000}"
+        r = _flowsim_cli(
+            "submit",
             "--scheduler", "k8s",
             "--collect", "perf",
             "--model-path", MODEL,
             "--k8s-namespace", "default",
-            "--pd",
+            "--k8s-host-output-dir", "/tmp/flowsim-test-traces",
+            "--job-name", job_name,
+        )
+        combined = r.stdout + r.stderr
+
+        if r.returncode != 0:
+            print("Submit output:", combined[-3000:])
+        assert r.returncode == 0, f"K8s submit failed: {combined[-1000:]}"
+        assert "created" in combined.lower()
+
+        # Verify submit output has location hints
+        assert "Traces:" in combined
+        assert "Logs:" in combined
+        assert "flowsim status" in combined
+        assert "flowsim logs" in combined
+
+        # Check status
+        r2 = _flowsim_cli("status", "--scheduler", "k8s", "--job", job_name)
+        assert r2.returncode == 0
+        assert job_name in r2.stdout
+
+        # Check logs (may say "pending" or show pod info)
+        r3 = _flowsim_cli("logs", "--scheduler", "k8s", "--job", job_name)
+        assert r3.returncode == 0
+        # Should mention kubectl or pod name or "No pods"
+        assert "kubectl" in r3.stdout or "No pods" in r3.stdout or "Pod:" in r3.stdout
+
+        # Cleanup: delete the K8s job
+        subprocess.run(
+            ["kubectl", "--context", "kind-flowsim", "delete", "job", job_name,
+             "-n", "default", "--ignore-not-found"],
+            capture_output=True, timeout=30,
+        )
+
+
+# =====================================================================
+# SLURM SCHEDULER — dry-run only (no real cluster)
+# =====================================================================
+class TestSlurmScheduler:
+    """Slurm scheduler: verify sbatch script has correct paths."""
+
+    def test_slurm_dry_run_output_and_log_paths(self):
+        """Dry-run sbatch script should reference output_dir and log_dir."""
+        r = _flowsim_cli(
+            "submit",
+            "--scheduler", "slurm",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--slurm-partition", "gpu",
+            "--slurm-rest-url", "http://fake:6820",
+            "--slurm-jwt-token", "fake-token",
+            "--output-dir", "/shared/flowsim_traces",
             "--dry-run",
         )
         assert r.returncode == 0
-        assert "PREFILL INSTANCE" in r.stdout
-        assert "DECODE INSTANCE" in r.stdout
+        script = r.stdout
 
+        # sbatch directives
+        assert "#!/bin/bash" in script
+        assert "#SBATCH --job-name=" in script
+        assert "#SBATCH --partition=gpu" in script
 
-class TestSlurmSubmitDryRun:
-    """flowsim submit --scheduler slurm --dry-run — verify sbatch script."""
+        # output_dir in the profiling command
+        assert "--output-dir" in script
+        assert "/shared/flowsim_traces" in script
 
-    def test_slurm_dry_run(self):
-        r = _flowsim_submit(
+        # log_dir = output_dir + /logs/
+        assert "--log-dir" in script
+        assert "/shared/flowsim_traces/logs" in script
+
+    def test_slurm_dry_run_default_output_dir(self):
+        """Default output_dir for Slurm should be ~/flowsim_traces."""
+        r = _flowsim_cli(
+            "submit",
             "--scheduler", "slurm",
             "--collect", "perf",
             "--model-path", MODEL,
             "--slurm-partition", "gpu",
             "--slurm-rest-url", "http://fake:6820",
-            "--slurm-jwt-token", "fake",
+            "--slurm-jwt-token", "fake-token",
+            "--dry-run",
+        )
+        assert r.returncode == 0
+        assert "flowsim_traces" in r.stdout
+
+    def test_slurm_dry_run_pd_pair(self):
+        """PD disaggregation dry-run should produce both scripts with correct paths."""
+        r = _flowsim_cli(
+            "submit",
+            "--scheduler", "slurm",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--slurm-partition", "gpu",
+            "--slurm-rest-url", "http://fake:6820",
+            "--slurm-jwt-token", "fake-token",
+            "--output-dir", "/shared/traces",
+            "--pd",
             "--dry-run",
         )
         assert r.returncode == 0
-        assert "#!/bin/bash" in r.stdout
-        assert "#SBATCH --partition=gpu" in r.stdout
-        assert MODEL in r.stdout
+        output = r.stdout
+        assert "PREFILL INSTANCE" in output
+        assert "DECODE INSTANCE" in output
+        assert "--disaggregation-mode prefill" in output
+        assert "--disaggregation-mode decode" in output
+        # Both scripts should reference the same output_dir
+        assert output.count("--output-dir") >= 2
+        assert output.count("/shared/traces/logs") >= 2

From 2c36af02795e87ecce17f3cd914846722d18cc1a Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 18:06:07 +0000
Subject: [PATCH 19/56] feat: align CLI with standard job platform APIs

- Add JobResult dataclass: submit() now returns structured data
  (job_id, scheduler, state, output_dir, message) instead of string
- Add flowsim cancel: K8s (delete_namespaced_job), Slurm (DELETE via
  slurmrestd), local (no-op for synchronous jobs)
- Add flowsim list: list FlowSim jobs with --status filter
  K8s (label_selector=app=flowsim), Slurm (slurmrestd /jobs),
  local (scan log files)
- Add --follow / -f to flowsim logs: shows tail -f / kubectl logs -f
  commands for real-time log streaming
- submit_pd_pair() now returns list[JobResult] instead of string
- Post-submit output shows cancel/list/follow commands
---
 schedulers/__init__.py    |   3 +-
 schedulers/base.py        |  43 ++++++++++++--
 schedulers/k8s.py         |  75 +++++++++++++++++++++++--
 schedulers/local.py       |  86 ++++++++++++++++++++++++----
 schedulers/slurm.py       |  82 +++++++++++++++++++++++----
 scripts/cli.py            |  22 ++++++++
 scripts/status_profile.py | 115 +++++++++++++++++++++++++-------------
 scripts/submit_profile.py |  33 +++++++----
 8 files changed, 377 insertions(+), 82 deletions(-)

diff --git a/schedulers/__init__.py b/schedulers/__init__.py
index fd20eb2..7e0df35 100644
--- a/schedulers/__init__.py
+++ b/schedulers/__init__.py
@@ -1,12 +1,13 @@
 """Scheduler backends for submitting FlowSim profiling jobs."""
 
-from schedulers.base import BaseScheduler, ProfileJobSpec
+from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 
 __all__ = [
     "BaseScheduler",
+    "JobResult",
     "K8sScheduler",
     "LocalScheduler",
     "ProfileJobSpec",
diff --git a/schedulers/base.py b/schedulers/base.py
index 3cbc2e7..40d9cea 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -8,6 +8,17 @@
 from typing import Optional
 
 
+@dataclass
+class JobResult:
+    """Structured return value from ``submit()``."""
+
+    job_id: str
+    scheduler: str          # "local", "k8s", "slurm"
+    state: str              # "Submitted", "Completed", "Failed"
+    output_dir: str = ""
+    message: str = ""
+
+
 @dataclass
 class ProfileJobSpec:
     """All parameters needed to run a stage-profiling job.
@@ -153,8 +164,12 @@ def render(self, spec: ProfileJobSpec) -> str:
         """Render the job manifest / script as a string."""
 
     @abc.abstractmethod
-    def submit(self, spec: ProfileJobSpec) -> str:
-        """Submit the job and return a job identifier string."""
+    def submit(self, spec: ProfileJobSpec) -> JobResult:
+        """Submit the job and return a structured :class:`JobResult`."""
+
+    def cancel(self, job_id: str) -> str:
+        """Cancel a running or pending job. Returns a status message."""
+        raise NotImplementedError(f"{type(self).__name__} does not support cancel")
 
     def status(self, job_id: str) -> dict:
         """Query job status. Returns dict with at least 'state' key.
@@ -169,7 +184,7 @@ def status(self, job_id: str) -> dict:
         """
         raise NotImplementedError(f"{type(self).__name__} does not support status queries")
 
-    def logs(self, job_id: str, *, tail: int = 100) -> str:
+    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """Retrieve recent log output for a job.
 
         Parameters
@@ -178,9 +193,27 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
             Job name (K8s) or job ID (Slurm) or log prefix (local).
         tail : int
             Number of lines from the end to return.
+        follow : bool
+            If True, stream logs in real time (blocking).
         """
         raise NotImplementedError(f"{type(self).__name__} does not support log retrieval")
 
+    def list_jobs(self, *, status_filter: str = "") -> list[dict]:
+        """List jobs managed by this scheduler.
+
+        Parameters
+        ----------
+        status_filter : str
+            If non-empty, only return jobs matching this state
+            (e.g., ``"Running"``, ``"Succeeded"``, ``"PENDING"``).
+
+        Returns
+        -------
+        list[dict]
+            Each dict has at least ``{"job_id": ..., "state": ..., "name": ...}``.
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not support list")
+
     def dry_run(self, spec: ProfileJobSpec) -> str:
         """Render and return the manifest without submitting."""
         return self.render(spec)
@@ -191,8 +224,8 @@ def render_pd_pair(self, spec: ProfileJobSpec) -> str:
         decode = self.render(spec.as_decode())
         return f"# === PREFILL INSTANCE ===\n{prefill}\n# === DECODE INSTANCE ===\n{decode}"
 
-    def submit_pd_pair(self, spec: ProfileJobSpec) -> str:
+    def submit_pd_pair(self, spec: ProfileJobSpec) -> list[JobResult]:
         """Submit both prefill and decode jobs."""
         r1 = self.submit(spec.as_prefill())
         r2 = self.submit(spec.as_decode())
-        return f"[prefill] {r1}\n[decode]  {r2}"
+        return [r1, r2]
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 7d52319..44c2917 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -9,7 +9,7 @@
 
 import json
 
-from schedulers.base import BaseScheduler, ProfileJobSpec
+from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 
 # Optional: nicer YAML output for dry-run.
 try:
@@ -137,7 +137,7 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
             },
         }
 
-    def submit(self, spec: ProfileJobSpec) -> str:
+    def submit(self, spec: ProfileJobSpec) -> JobResult:
         """Submit via the ``kubernetes`` Python client (``pip install kubernetes``)."""
         try:
             from kubernetes import client as k8s_client, config as k8s_config
@@ -175,7 +175,13 @@ def submit(self, spec: ProfileJobSpec) -> str:
             namespace=self.namespace,
             body=body,
         )
-        return f"job.batch/{resp.metadata.name} created (namespace={resp.metadata.namespace})"
+        return JobResult(
+            job_id=resp.metadata.name,
+            scheduler="k8s",
+            state="Submitted",
+            output_dir=spec.output_dir,
+            message=f"job.batch/{resp.metadata.name} created (namespace={resp.metadata.namespace})",
+        )
 
     # -----------------------------------------------------------------
     # Helpers shared by status / logs
@@ -197,6 +203,18 @@ def _load_k8s(self):
 
         return k8s_client.BatchV1Api(), k8s_client.CoreV1Api()
 
+    def cancel(self, job_id: str) -> str:
+        """Delete a K8s Job (and its pods) by name."""
+        from kubernetes import client as k8s_client
+
+        batch_api, _ = self._load_k8s()
+        batch_api.delete_namespaced_job(
+            name=job_id,
+            namespace=self.namespace,
+            body=k8s_client.V1DeleteOptions(propagation_policy="Foreground"),
+        )
+        return f"job.batch/{job_id} deleted (namespace={self.namespace})"
+
     def status(self, job_id: str) -> dict:
         """Query K8s Job status by job name."""
         try:
@@ -249,7 +267,7 @@ def status(self, job_id: str) -> dict:
             "output_hint": output_hint,
         }
 
-    def logs(self, job_id: str, *, tail: int = 100) -> str:
+    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """Show where logs are and how to access them for a K8s Job."""
         try:
             from kubernetes import client as k8s_client
@@ -265,6 +283,19 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
         if not pods.items:
             return f"No pods found for job {job_id} in namespace {self.namespace}"
 
+        if follow:
+            # Stream logs from the first running/succeeded pod
+            for pod in pods.items:
+                name = pod.metadata.name
+                if pod.status.phase in ("Running", "Succeeded"):
+                    # Use kubectl follow since the Python client follow is blocking
+                    return (
+                        f"Follow logs:\n"
+                        f"  kubectl logs -f {name} -n {self.namespace}"
+                    )
+            name = pods.items[0].metadata.name
+            return f"Follow logs:\n  kubectl logs -f {name} -n {self.namespace}"
+
         parts: list[str] = []
 
         # Pod info
@@ -307,3 +338,39 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
                     break
 
         return "\n".join(parts)
+
+    def list_jobs(self, *, status_filter: str = "") -> list[dict]:
+        """List FlowSim Jobs in the namespace (label: app=flowsim)."""
+        batch_api, _ = self._load_k8s()
+
+        jobs = batch_api.list_namespaced_job(
+            namespace=self.namespace,
+            label_selector="app=flowsim",
+        )
+        result: list[dict] = []
+        for job in jobs.items:
+            st = job.status
+            if st.succeeded and st.succeeded > 0:
+                state = "Succeeded"
+            elif st.failed and st.failed > 0:
+                state = "Failed"
+            elif st.active and st.active > 0:
+                state = "Running"
+            else:
+                state = "Pending"
+
+            if status_filter and state.lower() != status_filter.lower():
+                continue
+
+            created = ""
+            if job.metadata.creation_timestamp:
+                created = job.metadata.creation_timestamp.strftime("%Y-%m-%d %H:%M:%S")
+
+            result.append({
+                "job_id": job.metadata.name,
+                "name": job.metadata.name,
+                "state": state,
+                "namespace": self.namespace,
+                "created": created,
+            })
+        return result
diff --git a/schedulers/local.py b/schedulers/local.py
index 67704c5..f23a743 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -11,7 +11,7 @@
 import sys
 import time
 
-from schedulers.base import BaseScheduler, ProfileJobSpec
+from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 
 
 class LocalScheduler(BaseScheduler):
@@ -52,7 +52,7 @@ def render(self, spec: ProfileJobSpec) -> str:
         lines.append(spec.build_shell_command())
         return "\n".join(lines)
 
-    def submit(self, spec: ProfileJobSpec) -> str:
+    def submit(self, spec: ProfileJobSpec) -> JobResult:
         """Run the profiling command locally as a subprocess.
 
         stdout and stderr are streamed to the terminal *and* saved to
@@ -115,17 +115,33 @@ def _tee(src, dest_file, dest_stream):
             t_err.join()
 
         if proc.returncode != 0:
-            return (
-                f"[local] {job_name} FAILED (exit code {proc.returncode})\n"
-                f"[local] stdout log: {stdout_path}\n"
-                f"[local] stderr log: {stderr_path}"
+            return JobResult(
+                job_id=job_name,
+                scheduler="local",
+                state="Failed",
+                output_dir=spec.output_dir,
+                message=(
+                    f"{job_name} FAILED (exit code {proc.returncode})\n"
+                    f"stdout log: {stdout_path}\n"
+                    f"stderr log: {stderr_path}"
+                ),
             )
-        return (
-            f"[local] {job_name} completed successfully\n"
-            f"[local] stdout log: {stdout_path}\n"
-            f"[local] stderr log: {stderr_path}"
+        return JobResult(
+            job_id=job_name,
+            scheduler="local",
+            state="Completed",
+            output_dir=spec.output_dir,
+            message=(
+                f"{job_name} completed successfully\n"
+                f"stdout log: {stdout_path}\n"
+                f"stderr log: {stderr_path}"
+            ),
         )
 
+    def cancel(self, job_id: str) -> str:
+        """Local jobs run synchronously, so cancel is not applicable."""
+        return f"Local jobs run synchronously and cannot be cancelled. Job: {job_id}"
+
     def status(self, job_id: str) -> dict:
         """Check local job status by looking for log files.
 
@@ -158,7 +174,7 @@ def status(self, job_id: str) -> dict:
             "output_hint": trace_dir,
         }
 
-    def logs(self, job_id: str, *, tail: int = 100) -> str:
+    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """List log files for a local job and print access commands."""
         import glob
 
@@ -174,6 +190,12 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
         if not matches:
             return f"No logs found in {log_dir} matching '{job_id}'"
 
+        if follow:
+            stdout_files = sorted(f for f in matches if f.endswith(".stdout.log"))
+            if stdout_files:
+                return f"Follow logs with:\n  tail -f {stdout_files[-1]}"
+            return f"No stdout log found to follow for '{job_id}'"
+
         parts = [f"Log directory: {log_dir}", ""]
         parts.append(f"Files ({len(matches)}):")
         for p in matches:
@@ -189,6 +211,10 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
             parts.append(f"  less {stdout_files[-1]}")
         if stderr_files:
             parts.append(f"  less {stderr_files[-1]}")
+        if stdout_files:
+            parts.append("")
+            parts.append("Follow logs:")
+            parts.append(f"  tail -f {stdout_files[-1]}")
 
         trace_dir = os.path.join(self.workdir, "stage_traces")
         parts.append("")
@@ -196,3 +222,41 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
         parts.append(f"  ls {trace_dir}")
 
         return "\n".join(parts)
+
+    def list_jobs(self, *, status_filter: str = "") -> list[dict]:
+        """List local jobs by scanning log files."""
+        import glob
+        import re
+
+        log_dir = os.path.join(self.workdir, "stage_traces", "logs")
+        pattern = os.path.join(log_dir, "*.stdout.log")
+        matches = sorted(glob.glob(pattern))
+
+        jobs: list[dict] = []
+        for path in matches:
+            basename = os.path.basename(path)
+            # Parse: {job_name}_{timestamp}.stdout.log
+            m = re.match(r"^(.+)_(\d+)\.stdout\.log$", basename)
+            if not m:
+                continue
+            name = m.group(1)
+            ts = m.group(2)
+            stderr = path.replace(".stdout.log", ".stderr.log")
+            stderr_size = os.path.getsize(stderr) if os.path.exists(stderr) else 0
+            # If stderr has content, might have failed; otherwise completed
+            state = "Completed"
+            if stderr_size > 0:
+                # Check if there's an error indicator in stderr
+                state = "Completed"  # local jobs are synchronous; if log exists, it finished
+            jobs.append({
+                "job_id": name,
+                "name": name,
+                "state": state,
+                "timestamp": ts,
+            })
+
+        if status_filter:
+            filt = status_filter.lower()
+            jobs = [j for j in jobs if j["state"].lower() == filt]
+
+        return jobs
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 3f5d166..790ade4 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -12,7 +12,7 @@
 import urllib.error
 import urllib.request
 
-from schedulers.base import BaseScheduler, ProfileJobSpec
+from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 
 
 _DEFAULT_API_VERSION = "v0.0.40"
@@ -150,7 +150,7 @@ def render(self, spec: ProfileJobSpec) -> str:
         lines.append("")
         return "\n".join(lines)
 
-    def submit(self, spec: ProfileJobSpec) -> str:
+    def submit(self, spec: ProfileJobSpec) -> JobResult:
         """Submit the job via slurmrestd REST API.
 
         Requires ``rest_url`` and ``jwt_token`` to be set.
@@ -222,11 +222,17 @@ def submit(self, spec: ProfileJobSpec) -> str:
             msgs = "; ".join(e.get("error", str(e)) for e in errors)
             raise RuntimeError(f"slurmrestd job submit failed: {msgs}")
 
-        job_id = body.get("job_id", "unknown")
-        return f"Submitted batch job {job_id}"
+        job_id = str(body.get("job_id", "unknown"))
+        return JobResult(
+            job_id=job_id,
+            scheduler="slurm",
+            state="Submitted",
+            output_dir=spec.output_dir,
+            message=f"Submitted batch job {job_id}",
+        )
 
-    def _rest_get(self, path: str) -> dict:
-        """GET a slurmrestd endpoint and return parsed JSON."""
+    def _rest_request(self, path: str, *, method: str = "GET") -> dict:
+        """Send a request to slurmrestd and return parsed JSON."""
         if not self.rest_url:
             raise RuntimeError("--slurm-rest-url is required")
         if not self.jwt_token:
@@ -236,7 +242,7 @@ def _rest_get(self, path: str) -> dict:
         headers = {
             "X-SLURM-USER-TOKEN": self.jwt_token,
         }
-        req = urllib.request.Request(url, headers=headers, method="GET")
+        req = urllib.request.Request(url, headers=headers, method=method)
 
         ctx: ssl.SSLContext | None = None
         if not self.verify_ssl:
@@ -253,6 +259,22 @@ def _rest_get(self, path: str) -> dict:
         except urllib.error.URLError as exc:
             raise RuntimeError(f"Cannot reach slurmrestd at {self.rest_url}: {exc.reason}") from exc
 
+    def _rest_get(self, path: str) -> dict:
+        """GET a slurmrestd endpoint and return parsed JSON."""
+        return self._rest_request(path, method="GET")
+
+    def cancel(self, job_id: str) -> str:
+        """Cancel a Slurm job via slurmrestd DELETE."""
+        body = self._rest_request(
+            f"/slurm/{self.api_version}/job/{job_id}",
+            method="DELETE",
+        )
+        errors = body.get("errors") or []
+        if errors:
+            msgs = "; ".join(e.get("error", str(e)) for e in errors)
+            raise RuntimeError(f"slurmrestd cancel failed: {msgs}")
+        return f"Cancelled Slurm job {job_id}"
+
     def status(self, job_id: str) -> dict:
         """Query Slurm job status via slurmrestd."""
         body = self._rest_get(f"/slurm/{self.api_version}/job/{job_id}")
@@ -290,7 +312,7 @@ def status(self, job_id: str) -> dict:
             "output_hint": output_file,
         }
 
-    def logs(self, job_id: str, *, tail: int = 100) -> str:
+    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """Show where Slurm job logs are and how to access them."""
         info = self.status(job_id)
         output_file = info.get("output_hint", "")
@@ -302,9 +324,16 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
             parts.append(f"Log file (on cluster shared filesystem):")
             parts.append(f"  {output_file}")
             parts.append("")
-            parts.append("View on login node:")
-            parts.append(f"  less {output_file}")
-            parts.append(f"  tail -{tail} {output_file}")
+            if follow:
+                parts.append("Follow logs:")
+                parts.append(f"  tail -f {output_file}")
+            else:
+                parts.append("View on login node:")
+                parts.append(f"  less {output_file}")
+                parts.append(f"  tail -{tail} {output_file}")
+                parts.append("")
+                parts.append("Follow logs:")
+                parts.append(f"  tail -f {output_file}")
             parts.append("")
             parts.append("Copy to local machine:")
             parts.append(f"  scp <login-node>:{output_file} .")
@@ -319,6 +348,37 @@ def logs(self, job_id: str, *, tail: int = 100) -> str:
 
         return "\n".join(parts)
 
+    def list_jobs(self, *, status_filter: str = "") -> list[dict]:
+        """List Slurm jobs via slurmrestd /jobs endpoint."""
+        body = self._rest_get(f"/slurm/{self.api_version}/jobs")
+        errors = body.get("errors") or []
+        if errors:
+            msgs = "; ".join(e.get("error", str(e)) for e in errors)
+            raise RuntimeError(f"slurmrestd error: {msgs}")
+
+        result: list[dict] = []
+        for job in body.get("jobs", []):
+            name = job.get("name", "")
+            # Only show flowsim jobs (name starts with "flowsim-")
+            if not name.startswith("flowsim-"):
+                continue
+
+            state = job.get("job_state", ["UNKNOWN"])
+            if isinstance(state, list):
+                state = state[0] if state else "UNKNOWN"
+
+            if status_filter and state.upper() != status_filter.upper():
+                continue
+
+            result.append({
+                "job_id": str(job.get("job_id", "")),
+                "name": name,
+                "state": state,
+                "partition": job.get("partition", ""),
+                "nodes": job.get("nodes", ""),
+            })
+        return result
+
     def _parse_time_minutes(self) -> int:
         """Convert HH:MM:SS time_limit to total minutes."""
         parts = self.time_limit.split(":")
diff --git a/scripts/cli.py b/scripts/cli.py
index c17796d..b5d2bc3 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -162,6 +162,16 @@ def main(argv: list[str] | None = None) -> int:
         help="Retrieve job logs (local/k8s/slurm)",
         add_help=False,
     )
+    sub.add_parser(
+        "list",
+        help="List FlowSim jobs (local/k8s/slurm)",
+        add_help=False,
+    )
+    sub.add_parser(
+        "cancel",
+        help="Cancel a running job (k8s/slurm)",
+        add_help=False,
+    )
 
     args, remaining = parser.parse_known_args(argv)
 
@@ -186,6 +196,18 @@ def main(argv: list[str] | None = None) -> int:
         main_logs(remaining)
         return 0
 
+    if args.command == "list":
+        from scripts.status_profile import main_list
+
+        main_list(remaining)
+        return 0
+
+    if args.command == "cancel":
+        from scripts.status_profile import main_cancel
+
+        main_cancel(remaining)
+        return 0
+
     parser.print_help()
     return 1
 
diff --git a/scripts/status_profile.py b/scripts/status_profile.py
index bfcce41..15244a4 100644
--- a/scripts/status_profile.py
+++ b/scripts/status_profile.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Query FlowSim profiling job status and logs.
+"""Query FlowSim profiling job status, logs, list, and cancel.
 
 Usage examples
 --------------
@@ -12,13 +12,18 @@
 
     flowsim logs --scheduler k8s --job flowsim-perf-qwen3-8b-bs1-il2048
 
-Check Slurm job status::
+Follow K8s job logs::
 
-    flowsim status --scheduler slurm --job 12345
+    flowsim logs --scheduler k8s --job flowsim-perf-qwen3-8b-bs1-il2048 --follow
 
-Check local job status (by job name prefix)::
+List all FlowSim jobs::
 
-    flowsim status --scheduler local --job flowsim-perf-qwen3-8b-bs1-il2048
+    flowsim list --scheduler k8s
+    flowsim list --scheduler k8s --status Running
+
+Cancel a job::
+
+    flowsim cancel --scheduler k8s --job flowsim-perf-qwen3-8b-bs1-il2048
 """
 
 from __future__ import annotations
@@ -37,31 +42,16 @@ def _d(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
     return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
 
 
-def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+def _add_scheduler_args(p: argparse.ArgumentParser) -> None:
+    """Add common scheduler connection args to a parser."""
     k8s_cfg = load_k8s_config()
     slurm_cfg = load_slurm_config()
 
-    p = argparse.ArgumentParser(
-        description="Query FlowSim profiling job status or logs.",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-
     p.add_argument(
         "--scheduler",
         choices=["local", "k8s", "slurm"],
         required=True,
     )
-    p.add_argument(
-        "--job",
-        required=True,
-        help="Job name (k8s/local) or job ID (slurm)",
-    )
-    p.add_argument(
-        "--tail",
-        type=int,
-        default=100,
-        help="Number of log lines to show (default: 100)",
-    )
 
     # -- Local options --
     p.add_argument("--local-workdir", default="")
@@ -98,7 +88,14 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         action="store_true",
     )
 
-    return p.parse_args(argv)
+
+def _resolve_slurm_jwt(args: argparse.Namespace) -> None:
+    """Resolve Slurm JWT from config if not provided."""
+    if args.scheduler == "slurm" and not args.slurm_jwt_token:
+        slurm_cfg = load_slurm_config()
+        token = resolve_jwt_token(slurm_cfg)
+        if token:
+            args.slurm_jwt_token = token
 
 
 def _build_scheduler(args: argparse.Namespace):
@@ -120,15 +117,12 @@ def _build_scheduler(args: argparse.Namespace):
 
 
 def main_status(argv: list[str] | None = None) -> None:
-    args = _parse_args(argv)
-
-    # Resolve Slurm JWT if needed
-    if args.scheduler == "slurm" and not args.slurm_jwt_token:
-        slurm_cfg = load_slurm_config()
-        token = resolve_jwt_token(slurm_cfg)
-        if token:
-            args.slurm_jwt_token = token
+    p = argparse.ArgumentParser(description="Query FlowSim job status.")
+    _add_scheduler_args(p)
+    p.add_argument("--job", required=True, help="Job name or ID")
+    args = p.parse_args(argv)
 
+    _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
     try:
         info = scheduler.status(args.job)
@@ -139,19 +133,60 @@ def main_status(argv: list[str] | None = None) -> None:
 
 
 def main_logs(argv: list[str] | None = None) -> None:
-    args = _parse_args(argv)
+    p = argparse.ArgumentParser(description="Retrieve FlowSim job logs.")
+    _add_scheduler_args(p)
+    p.add_argument("--job", required=True, help="Job name or ID")
+    p.add_argument("--tail", type=int, default=100, help="Number of log lines (default: 100)")
+    p.add_argument("--follow", "-f", action="store_true", help="Follow log output")
+    args = p.parse_args(argv)
+
+    _resolve_slurm_jwt(args)
+    scheduler = _build_scheduler(args)
+    try:
+        text = scheduler.logs(args.job, tail=args.tail, follow=args.follow)
+        print(text)
+    except Exception as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        sys.exit(1)
 
-    # Resolve Slurm JWT if needed
-    if args.scheduler == "slurm" and not args.slurm_jwt_token:
-        slurm_cfg = load_slurm_config()
-        token = resolve_jwt_token(slurm_cfg)
-        if token:
-            args.slurm_jwt_token = token
 
+def main_list(argv: list[str] | None = None) -> None:
+    p = argparse.ArgumentParser(description="List FlowSim jobs.")
+    _add_scheduler_args(p)
+    p.add_argument("--status", default="", help="Filter by job state (e.g. Running, Succeeded, PENDING)")
+    args = p.parse_args(argv)
+
+    _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
     try:
-        text = scheduler.logs(args.job, tail=args.tail)
-        print(text)
+        jobs = scheduler.list_jobs(status_filter=args.status)
+        if not jobs:
+            print("No jobs found.")
+            return
+        # Print table header
+        headers = list(jobs[0].keys())
+        widths = {h: max(len(h), max(len(str(j.get(h, ""))) for j in jobs)) for h in headers}
+        header_line = "  ".join(h.upper().ljust(widths[h]) for h in headers)
+        print(header_line)
+        print("-" * len(header_line))
+        for job in jobs:
+            print("  ".join(str(job.get(h, "")).ljust(widths[h]) for h in headers))
+    except Exception as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main_cancel(argv: list[str] | None = None) -> None:
+    p = argparse.ArgumentParser(description="Cancel a FlowSim job.")
+    _add_scheduler_args(p)
+    p.add_argument("--job", required=True, help="Job name or ID to cancel")
+    args = p.parse_args(argv)
+
+    _resolve_slurm_jwt(args)
+    scheduler = _build_scheduler(args)
+    try:
+        msg = scheduler.cancel(args.job)
+        print(msg)
     except Exception as exc:
         print(f"Error: {exc}", file=sys.stderr)
         sys.exit(1)
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 25061f1..bcd9c23 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -384,26 +384,39 @@ def main(argv: list[str] | None = None) -> None:
             print(scheduler.dry_run(spec))
     else:
         if is_pd:
-            result = scheduler.submit_pd_pair(spec)
+            results = scheduler.submit_pd_pair(spec)
+            for r in results:
+                print(r.message)
+            # Use the first result for follow-up hints
+            result = results[0]
         else:
             result = scheduler.submit(spec)
-        print(result)
+            print(result.message)
+
         # Tell user where to find results
         print()
-        print(f"Traces: {spec.output_dir}")
-        print(f"Logs:   {spec.log_dir}")
-        if args.scheduler == "k8s":
+        print(f"Traces: {result.output_dir}")
+        print(f"Logs:   {result.output_dir}/logs/")
+        job_id = result.job_id
+        sched = args.scheduler
+
+        if sched == "k8s":
             if args.k8s_pvc:
                 print(f"  (persisted on PVC '{args.k8s_pvc}')")
             else:
                 print(f"  (persisted at hostPath '{args.k8s_host_output_dir}' on the node)")
-            print(f"\nTo check status:  flowsim status --scheduler k8s --job {spec.default_job_name()[:63]}")
-            print(f"To view logs:     flowsim logs   --scheduler k8s --job {spec.default_job_name()[:63]}")
-        elif args.scheduler == "slurm":
+            print(f"\nTo check status:  flowsim status --scheduler k8s --job {job_id}")
+            print(f"To view logs:     flowsim logs   --scheduler k8s --job {job_id}")
+            print(f"To follow logs:   flowsim logs   --scheduler k8s --job {job_id} --follow")
+            print(f"To cancel:        flowsim cancel --scheduler k8s --job {job_id}")
+        elif sched == "slurm":
             print(f"  (on cluster shared filesystem)")
-            print(f"\nTo check status:  flowsim status --scheduler slurm --job <JOB_ID>")
+            print(f"\nTo check status:  flowsim status --scheduler slurm --job {job_id}")
+            print(f"To view logs:     flowsim logs   --scheduler slurm --job {job_id}")
+            print(f"To cancel:        flowsim cancel --scheduler slurm --job {job_id}")
         else:
-            print(f"\nTo view logs:     flowsim logs   --scheduler local --job {spec.default_job_name()}")
+            print(f"\nTo view logs:     flowsim logs   --scheduler local --job {job_id}")
+        print(f"To list all jobs: flowsim list   --scheduler {sched}")
 
 
 _INIT_HINT = "Run 'flowsim init' to create config files."

From 8cd62f8ba65b451221d4462da7b3dc743814e075 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 18:13:30 +0000
Subject: [PATCH 20/56] fix: CLI only shows scheduler-specific args based on
 --scheduler

Two-pass argparse: peek --scheduler with a minimal pre-parser,
then add only the relevant scheduler's options before full parse.
'flowsim submit --scheduler local --help' no longer shows k8s/slurm args.
---
 scripts/status_profile.py                 |  94 ++++----
 scripts/submit_profile.py                 | 250 +++++++++++-----------
 tests/integration/test_scheduler_local.py | 188 +++++++++++++++-
 3 files changed, 359 insertions(+), 173 deletions(-)

diff --git a/scripts/status_profile.py b/scripts/status_profile.py
index 15244a4..2f82ebc 100644
--- a/scripts/status_profile.py
+++ b/scripts/status_profile.py
@@ -43,50 +43,53 @@ def _d(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
 
 
 def _add_scheduler_args(p: argparse.ArgumentParser) -> None:
-    """Add common scheduler connection args to a parser."""
-    k8s_cfg = load_k8s_config()
-    slurm_cfg = load_slurm_config()
-
+    """Add common scheduler choice arg (first pass only)."""
     p.add_argument(
         "--scheduler",
         choices=["local", "k8s", "slurm"],
         required=True,
     )
 
-    # -- Local options --
-    p.add_argument("--local-workdir", default="")
 
-    # -- K8s options --
-    p.add_argument(
-        "--k8s-namespace",
-        default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
-    )
-    p.add_argument(
-        "--k8s-kubeconfig",
-        default=_d("KUBECONFIG", k8s_cfg, "kubeconfig", ""),
-    )
-    p.add_argument(
-        "--k8s-context",
-        default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
-    )
+def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) -> None:
+    """Add only the args relevant to the chosen scheduler (second pass)."""
+    k8s_cfg = load_k8s_config()
+    slurm_cfg = load_slurm_config()
 
-    # -- Slurm options --
-    p.add_argument(
-        "--slurm-rest-url",
-        default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
-    )
-    p.add_argument(
-        "--slurm-jwt-token",
-        default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
-    )
-    p.add_argument(
-        "--slurm-api-version",
-        default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
-    )
-    p.add_argument(
-        "--slurm-no-verify-ssl",
-        action="store_true",
-    )
+    if scheduler == "local":
+        p.add_argument("--local-workdir", default="")
+
+    elif scheduler == "k8s":
+        p.add_argument(
+            "--k8s-namespace",
+            default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
+        )
+        p.add_argument(
+            "--k8s-kubeconfig",
+            default=_d("KUBECONFIG", k8s_cfg, "kubeconfig", ""),
+        )
+        p.add_argument(
+            "--k8s-context",
+            default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
+        )
+
+    elif scheduler == "slurm":
+        p.add_argument(
+            "--slurm-rest-url",
+            default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
+        )
+        p.add_argument(
+            "--slurm-jwt-token",
+            default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
+        )
+        p.add_argument(
+            "--slurm-api-version",
+            default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
+        )
+        p.add_argument(
+            "--slurm-no-verify-ssl",
+            action="store_true",
+        )
 
 
 def _resolve_slurm_jwt(args: argparse.Namespace) -> None:
@@ -100,7 +103,7 @@ def _resolve_slurm_jwt(args: argparse.Namespace) -> None:
 
 def _build_scheduler(args: argparse.Namespace):
     if args.scheduler == "local":
-        return LocalScheduler(workdir=args.local_workdir)
+        return LocalScheduler(workdir=getattr(args, "local_workdir", ""))
     elif args.scheduler == "k8s":
         return K8sScheduler(
             namespace=args.k8s_namespace,
@@ -116,11 +119,20 @@ def _build_scheduler(args: argparse.Namespace):
         )
 
 
+def _parse_two_pass(p: argparse.ArgumentParser, argv: list[str] | None = None) -> argparse.Namespace:
+    """Two-pass parse: peek --scheduler, add scheduler-specific args, full parse."""
+    _pre = argparse.ArgumentParser(add_help=False)
+    _pre.add_argument("--scheduler", choices=["local", "k8s", "slurm"])
+    pre, _ = _pre.parse_known_args(argv)
+    _add_scheduler_specific_args(p, pre.scheduler)
+    return p.parse_args(argv)
+
+
 def main_status(argv: list[str] | None = None) -> None:
     p = argparse.ArgumentParser(description="Query FlowSim job status.")
     _add_scheduler_args(p)
     p.add_argument("--job", required=True, help="Job name or ID")
-    args = p.parse_args(argv)
+    args = _parse_two_pass(p, argv)
 
     _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
@@ -138,7 +150,7 @@ def main_logs(argv: list[str] | None = None) -> None:
     p.add_argument("--job", required=True, help="Job name or ID")
     p.add_argument("--tail", type=int, default=100, help="Number of log lines (default: 100)")
     p.add_argument("--follow", "-f", action="store_true", help="Follow log output")
-    args = p.parse_args(argv)
+    args = _parse_two_pass(p, argv)
 
     _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
@@ -154,7 +166,7 @@ def main_list(argv: list[str] | None = None) -> None:
     p = argparse.ArgumentParser(description="List FlowSim jobs.")
     _add_scheduler_args(p)
     p.add_argument("--status", default="", help="Filter by job state (e.g. Running, Succeeded, PENDING)")
-    args = p.parse_args(argv)
+    args = _parse_two_pass(p, argv)
 
     _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
@@ -180,7 +192,7 @@ def main_cancel(argv: list[str] | None = None) -> None:
     p = argparse.ArgumentParser(description="Cancel a FlowSim job.")
     _add_scheduler_args(p)
     p.add_argument("--job", required=True, help="Job name or ID to cancel")
-    args = p.parse_args(argv)
+    args = _parse_two_pass(p, argv)
 
     _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index bcd9c23..18f7882 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -68,7 +68,7 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     slurm_cfg = load_slurm_config()
 
     p = argparse.ArgumentParser(
-        description="Submit FlowSim profiling jobs to K8s or Slurm.",
+        description="Submit FlowSim profiling jobs to local, K8s, or Slurm.",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=__doc__,
     )
@@ -117,127 +117,6 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     infra.add_argument("--output-dir", default="")
     infra.add_argument("--job-name", default="")
 
-    # -- Local options --
-    loc = p.add_argument_group("local options")
-    loc.add_argument(
-        "--local-gpus",
-        default="",
-        help="CUDA_VISIBLE_DEVICES for local execution (e.g. '0' or '0,1')",
-    )
-    loc.add_argument(
-        "--local-workdir",
-        default="",
-        help="Working directory for local execution (default: FlowSim project root)",
-    )
-
-    # -- Kubernetes-specific --
-    k8s = p.add_argument_group("kubernetes options (config: ~/.flowsim/k8s.yaml)")
-    k8s.add_argument(
-        "--k8s-namespace",
-        default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
-        help="K8s namespace (env: FLOWSIM_K8S_NAMESPACE)",
-    )
-    k8s.add_argument(
-        "--k8s-kubeconfig",
-        default=_d("KUBECONFIG", k8s_cfg, "kubeconfig", ""),
-        help="Path to kubeconfig file (env: KUBECONFIG)",
-    )
-    k8s.add_argument(
-        "--k8s-context",
-        default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
-        help="kubeconfig context (env: FLOWSIM_K8S_CONTEXT)",
-    )
-    k8s.add_argument(
-        "--k8s-pvc",
-        default=cfg_get(k8s_cfg, "pvc", ""),
-        help="PVC name for output volume (omit for emptyDir)",
-    )
-    k8s.add_argument(
-        "--k8s-host-output-dir",
-        default=cfg_get(k8s_cfg, "host_output_dir", ""),
-        help="hostPath for output (used when --k8s-pvc is empty)",
-    )
-    k8s.add_argument(
-        "--k8s-node-selector",
-        action="append",
-        default=[],
-        metavar="KEY=VALUE",
-        help="Node selector labels (repeatable)",
-    )
-    k8s.add_argument(
-        "--k8s-service-account",
-        default=cfg_get(k8s_cfg, "service_account", ""),
-    )
-    k8s.add_argument(
-        "--k8s-shm-size",
-        default=cfg_get(k8s_cfg, "shm_size", "16Gi"),
-    )
-
-    # -- Slurm-specific --
-    slurm = p.add_argument_group("slurm options (config: ~/.flowsim/slurm.yaml)")
-    slurm.add_argument(
-        "--slurm-partition",
-        default=_d("FLOWSIM_SLURM_PARTITION", slurm_cfg, "partition", ""),
-        help="Slurm partition (env: FLOWSIM_SLURM_PARTITION)",
-    )
-    slurm.add_argument(
-        "--slurm-time",
-        default=_d("FLOWSIM_SLURM_TIME", slurm_cfg, "time", "02:00:00"),
-        help="Wall time limit (env: FLOWSIM_SLURM_TIME)",
-    )
-    slurm.add_argument(
-        "--slurm-rest-url",
-        default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
-        help="slurmrestd base URL (env: FLOWSIM_SLURM_REST_URL)",
-    )
-    slurm.add_argument(
-        "--slurm-jwt-token",
-        default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
-        help="JWT token for slurmrestd (env: FLOWSIM_SLURM_JWT_TOKEN)",
-    )
-    slurm.add_argument(
-        "--slurm-api-version",
-        default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
-        help="slurmrestd API version (env: FLOWSIM_SLURM_API_VERSION)",
-    )
-    slurm.add_argument(
-        "--slurm-no-verify-ssl",
-        action="store_true",
-        help="Skip TLS certificate verification for slurmrestd",
-    )
-    slurm.add_argument(
-        "--slurm-account",
-        default=cfg_get(slurm_cfg, "account", ""),
-    )
-    slurm.add_argument(
-        "--slurm-constraint",
-        default=cfg_get(slurm_cfg, "constraint", ""),
-    )
-    slurm.add_argument(
-        "--slurm-container-runtime",
-        choices=["docker", "enroot", "none"],
-        default=cfg_get(slurm_cfg, "container_runtime", "none"),
-    )
-    slurm.add_argument(
-        "--slurm-container-mounts",
-        default=cfg_get(slurm_cfg, "container_mounts", ""),
-    )
-    # Modules from config (list) + CLI (append)
-    cfg_modules = slurm_cfg.get("modules") if isinstance(slurm_cfg.get("modules"), list) else []
-    slurm.add_argument(
-        "--slurm-module",
-        action="append",
-        default=[str(m) for m in cfg_modules],
-        help="Modules to load (repeatable, merged with config)",
-    )
-    slurm.add_argument(
-        "--slurm-extra-sbatch",
-        action="append",
-        default=[],
-        metavar="DIRECTIVE",
-        help="Extra #SBATCH directives (repeatable, without prefix)",
-    )
-
     # -- Action --
     p.add_argument(
         "--dry-run",
@@ -276,6 +155,133 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         help="InfiniBand device for RDMA transfer",
     )
 
+    # ---- Two-pass: peek at --scheduler, then add only relevant args ----
+    # Use a minimal pre-parser to avoid required-arg errors during peek.
+    _pre = argparse.ArgumentParser(add_help=False)
+    _pre.add_argument("--scheduler", choices=["local", "k8s", "slurm"])
+    pre, _ = _pre.parse_known_args(argv)
+
+    if pre.scheduler == "local":
+        loc = p.add_argument_group("local options")
+        loc.add_argument(
+            "--local-gpus",
+            default="",
+            help="CUDA_VISIBLE_DEVICES for local execution (e.g. '0' or '0,1')",
+        )
+        loc.add_argument(
+            "--local-workdir",
+            default="",
+            help="Working directory for local execution (default: FlowSim project root)",
+        )
+
+    elif pre.scheduler == "k8s":
+        k8s = p.add_argument_group("kubernetes options (config: ~/.flowsim/k8s.yaml)")
+        k8s.add_argument(
+            "--k8s-namespace",
+            default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
+            help="K8s namespace (env: FLOWSIM_K8S_NAMESPACE)",
+        )
+        k8s.add_argument(
+            "--k8s-kubeconfig",
+            default=_d("KUBECONFIG", k8s_cfg, "kubeconfig", ""),
+            help="Path to kubeconfig file (env: KUBECONFIG)",
+        )
+        k8s.add_argument(
+            "--k8s-context",
+            default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
+            help="kubeconfig context (env: FLOWSIM_K8S_CONTEXT)",
+        )
+        k8s.add_argument(
+            "--k8s-pvc",
+            default=cfg_get(k8s_cfg, "pvc", ""),
+            help="PVC name for output volume (omit for emptyDir)",
+        )
+        k8s.add_argument(
+            "--k8s-host-output-dir",
+            default=cfg_get(k8s_cfg, "host_output_dir", ""),
+            help="hostPath for output (used when --k8s-pvc is empty)",
+        )
+        k8s.add_argument(
+            "--k8s-node-selector",
+            action="append",
+            default=[],
+            metavar="KEY=VALUE",
+            help="Node selector labels (repeatable)",
+        )
+        k8s.add_argument(
+            "--k8s-service-account",
+            default=cfg_get(k8s_cfg, "service_account", ""),
+        )
+        k8s.add_argument(
+            "--k8s-shm-size",
+            default=cfg_get(k8s_cfg, "shm_size", "16Gi"),
+        )
+
+    elif pre.scheduler == "slurm":
+        slurm = p.add_argument_group("slurm options (config: ~/.flowsim/slurm.yaml)")
+        slurm.add_argument(
+            "--slurm-partition",
+            default=_d("FLOWSIM_SLURM_PARTITION", slurm_cfg, "partition", ""),
+            help="Slurm partition (env: FLOWSIM_SLURM_PARTITION)",
+        )
+        slurm.add_argument(
+            "--slurm-time",
+            default=_d("FLOWSIM_SLURM_TIME", slurm_cfg, "time", "02:00:00"),
+            help="Wall time limit (env: FLOWSIM_SLURM_TIME)",
+        )
+        slurm.add_argument(
+            "--slurm-rest-url",
+            default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
+            help="slurmrestd base URL (env: FLOWSIM_SLURM_REST_URL)",
+        )
+        slurm.add_argument(
+            "--slurm-jwt-token",
+            default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
+            help="JWT token for slurmrestd (env: FLOWSIM_SLURM_JWT_TOKEN)",
+        )
+        slurm.add_argument(
+            "--slurm-api-version",
+            default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
+            help="slurmrestd API version (env: FLOWSIM_SLURM_API_VERSION)",
+        )
+        slurm.add_argument(
+            "--slurm-no-verify-ssl",
+            action="store_true",
+            help="Skip TLS certificate verification for slurmrestd",
+        )
+        slurm.add_argument(
+            "--slurm-account",
+            default=cfg_get(slurm_cfg, "account", ""),
+        )
+        slurm.add_argument(
+            "--slurm-constraint",
+            default=cfg_get(slurm_cfg, "constraint", ""),
+        )
+        slurm.add_argument(
+            "--slurm-container-runtime",
+            choices=["docker", "enroot", "none"],
+            default=cfg_get(slurm_cfg, "container_runtime", "none"),
+        )
+        slurm.add_argument(
+            "--slurm-container-mounts",
+            default=cfg_get(slurm_cfg, "container_mounts", ""),
+        )
+        # Modules from config (list) + CLI (append)
+        cfg_modules = slurm_cfg.get("modules") if isinstance(slurm_cfg.get("modules"), list) else []
+        slurm.add_argument(
+            "--slurm-module",
+            action="append",
+            default=[str(m) for m in cfg_modules],
+            help="Modules to load (repeatable, merged with config)",
+        )
+        slurm.add_argument(
+            "--slurm-extra-sbatch",
+            action="append",
+            default=[],
+            metavar="DIRECTIVE",
+            help="Extra #SBATCH directives (repeatable, without prefix)",
+        )
+
     return p.parse_args(argv)
 
 
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 879c00f..932ea9e 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -1,11 +1,11 @@
-"""Integration tests for ``flowsim submit``, ``flowsim status``, ``flowsim logs``.
+"""Integration tests for the FlowSim scheduler CLI.
 
 Tests all three scheduler backends (local, k8s, slurm) end-to-end.
 
 * **local** — runs real TP=1 profiling and verifies traces, parsed CSVs,
-  and log files are all produced in the correct locations.
-* **k8s**   — submits a real Job to a Kind cluster, verifies it was created,
-  then checks ``flowsim status`` / ``flowsim logs`` output.  Also validates
+  log files, JobResult return, cancel, list, logs --follow.
+* **k8s**   — submits a real Job to a Kind cluster, verifies JobResult,
+  status, logs, list, cancel, logs --follow.  Also validates
   that dry-run YAML has the correct volume mounts and log paths.
 * **slurm** — dry-run only; verifies the sbatch script has the correct
   ``output_dir``, ``--log-dir``, and ``#SBATCH --output`` directives.
@@ -35,6 +35,7 @@
 """
 
 import glob
+import json
 import os
 import subprocess
 import sys
@@ -42,6 +43,9 @@
 
 import pytest
 
+from schedulers.base import JobResult, ProfileJobSpec
+from schedulers.local import LocalScheduler
+
 _PROJECT_ROOT = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..")
 )
@@ -182,6 +186,161 @@ def test_local_logs(self):
         # Should contain file listing or "No logs" — not crash
         assert "Log directory:" in output or "No logs" in output
 
+    def test_local_logs_follow(self):
+        """flowsim logs --follow should show tail -f command."""
+        r = _flowsim_cli(
+            "logs",
+            "--scheduler", "local",
+            "--job", "flowsim-perf",
+            "--follow",
+        )
+        assert r.returncode == 0
+        output = r.stdout
+        assert "tail -f" in output or "No logs" in output
+
+    def test_local_cancel(self):
+        """flowsim cancel --scheduler local should return a message (sync jobs can't be cancelled)."""
+        r = _flowsim_cli(
+            "cancel",
+            "--scheduler", "local",
+            "--job", "flowsim-perf",
+        )
+        assert r.returncode == 0
+        assert "cannot be cancelled" in r.stdout.lower() or "synchronous" in r.stdout.lower()
+
+    def test_local_list(self):
+        """flowsim list --scheduler local should list jobs from log files."""
+        r = _flowsim_cli(
+            "list",
+            "--scheduler", "local",
+        )
+        assert r.returncode == 0
+        output = r.stdout
+        # Should either show jobs or "No jobs found"
+        assert "JOB_ID" in output or "No jobs found" in output
+
+    def test_local_list_status_filter(self):
+        """flowsim list --status Completed should filter."""
+        r = _flowsim_cli(
+            "list",
+            "--scheduler", "local",
+            "--status", "Completed",
+        )
+        assert r.returncode == 0
+
+
+# =====================================================================
+# LOCAL SCHEDULER — unit-level tests for JobResult and list_jobs
+# =====================================================================
+class TestLocalSchedulerAPI:
+    """Test LocalScheduler API directly (no subprocess, no GPU)."""
+
+    def test_submit_returns_job_result(self):
+        """LocalScheduler.submit() must return a JobResult, not a string."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            sched = LocalScheduler(workdir=tmpdir)
+            spec = ProfileJobSpec(
+                collect="perf",
+                model_path="Qwen/Qwen3-8B",
+                output_dir=os.path.join(tmpdir, "traces"),
+            )
+            # Monkey-patch: make build_shell_command return a trivial command
+            spec.build_shell_command = lambda: "echo hello"
+            result = sched.submit(spec)
+            assert isinstance(result, JobResult), f"Expected JobResult, got {type(result)}"
+            assert result.scheduler == "local"
+            assert result.state == "Completed"
+            assert result.job_id != ""
+            assert result.output_dir == spec.output_dir
+
+    def test_submit_failed_returns_failed_state(self):
+        """A failing command should return JobResult with state=Failed."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            sched = LocalScheduler(workdir=tmpdir)
+            spec = ProfileJobSpec(
+                collect="perf",
+                model_path="Qwen/Qwen3-8B",
+                output_dir=os.path.join(tmpdir, "traces"),
+            )
+            spec.build_shell_command = lambda: "exit 1"
+            result = sched.submit(spec)
+            assert isinstance(result, JobResult)
+            assert result.state == "Failed"
+
+    def test_list_jobs_finds_log_files(self):
+        """list_jobs() should find jobs from log file names."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            log_dir = os.path.join(tmpdir, "stage_traces", "logs")
+            os.makedirs(log_dir)
+            # Create fake log files
+            for name in [
+                "flowsim-perf-qwen3-8b-bs1-il512_1700000001.stdout.log",
+                "flowsim-perf-qwen3-8b-bs1-il512_1700000001.stderr.log",
+                "flowsim-perf-qwen3-8b-bs1-il1024_1700000002.stdout.log",
+                "flowsim-perf-qwen3-8b-bs1-il1024_1700000002.stderr.log",
+            ]:
+                open(os.path.join(log_dir, name), "w").close()
+
+            sched = LocalScheduler(workdir=tmpdir)
+            jobs = sched.list_jobs()
+            assert len(jobs) == 2
+            assert all("job_id" in j and "state" in j for j in jobs)
+
+    def test_list_jobs_status_filter(self):
+        """list_jobs(status_filter=...) should filter results."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            log_dir = os.path.join(tmpdir, "stage_traces", "logs")
+            os.makedirs(log_dir)
+            open(os.path.join(log_dir, "flowsim-perf-x_100.stdout.log"), "w").close()
+            open(os.path.join(log_dir, "flowsim-perf-x_100.stderr.log"), "w").close()
+
+            sched = LocalScheduler(workdir=tmpdir)
+            assert len(sched.list_jobs(status_filter="Completed")) == 1
+            assert len(sched.list_jobs(status_filter="Running")) == 0
+
+    def test_logs_follow_shows_tail_f(self):
+        """logs(follow=True) should return a tail -f command."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            log_dir = os.path.join(tmpdir, "stage_traces", "logs")
+            os.makedirs(log_dir)
+            open(os.path.join(log_dir, "flowsim-perf-x_100.stdout.log"), "w").close()
+
+            sched = LocalScheduler(workdir=tmpdir)
+            text = sched.logs("flowsim-perf-x", follow=True)
+            assert "tail -f" in text
+
+    def test_cancel_returns_message(self):
+        """cancel() should return a message about sync jobs."""
+        sched = LocalScheduler()
+        msg = sched.cancel("some-job")
+        assert "synchronous" in msg.lower() or "cannot" in msg.lower()
+
+    def test_submit_pd_pair_returns_list(self):
+        """submit_pd_pair() must return list[JobResult]."""
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            sched = LocalScheduler(workdir=tmpdir)
+            spec = ProfileJobSpec(
+                collect="perf",
+                model_path="Qwen/Qwen3-8B",
+                output_dir=os.path.join(tmpdir, "traces"),
+            )
+            # Monkey-patch to avoid real profiling
+            spec.build_shell_command = lambda: "echo hello"
+            results = sched.submit_pd_pair(spec)
+            assert isinstance(results, list)
+            assert len(results) == 2
+            assert all(isinstance(r, JobResult) for r in results)
+            # One should be prefill, one decode
+            modes = {r.job_id for r in results}
+            assert any("prefill" in m for m in modes)
+            assert any("decode" in m for m in modes)
+
 
 # =====================================================================
 # K8S SCHEDULER
@@ -289,12 +448,21 @@ def test_k8s_real_submit_to_kind(self):
         # Should mention kubectl or pod name or "No pods"
         assert "kubectl" in r3.stdout or "No pods" in r3.stdout or "Pod:" in r3.stdout
 
-        # Cleanup: delete the K8s job
-        subprocess.run(
-            ["kubectl", "--context", "kind-flowsim", "delete", "job", job_name,
-             "-n", "default", "--ignore-not-found"],
-            capture_output=True, timeout=30,
-        )
+        # Check logs --follow
+        r3f = _flowsim_cli("logs", "--scheduler", "k8s", "--job", job_name, "--follow")
+        assert r3f.returncode == 0
+        assert "kubectl logs -f" in r3f.stdout
+
+        # Check list
+        r4 = _flowsim_cli("list", "--scheduler", "k8s")
+        assert r4.returncode == 0
+        # Our job should appear in the listing
+        assert job_name in r4.stdout or "JOB_ID" in r4.stdout
+
+        # Cancel via flowsim cancel
+        r5 = _flowsim_cli("cancel", "--scheduler", "k8s", "--job", job_name)
+        assert r5.returncode == 0
+        assert "deleted" in r5.stdout.lower()
 
 
 # =====================================================================

From 84c895353b43ef050542fcafd4c7b4dce3e67c90 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 18:23:13 +0000
Subject: [PATCH 21/56] fix: use python3 instead of python in profile command

Most systems (Ubuntu, Debian) don't have 'python' symlink by default.
---
 schedulers/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/schedulers/base.py b/schedulers/base.py
index 40d9cea..3a35682 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -88,7 +88,7 @@ def log_dir(self) -> str:
     def build_profile_command(self) -> list[str]:
         """Build the full ``python scripts/run_stage_profile.py ...`` command."""
         cmd = [
-            "python",
+            "python3",
             "scripts/run_stage_profile.py",
             "--collect",
             self.collect,

From fab6314fed5253f73f49f20be972f2355f7d6600 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Tue, 17 Mar 2026 18:42:43 +0000
Subject: [PATCH 22/56] fix: use YYYYMMDD_HHMMSS timestamp in log filenames

Before: flowsim-perf-qwen3-8b_1773771736.stdout.log
After:  flowsim-perf-qwen3-8b_20260317_184236.stdout.log

list_jobs() regex updated to support both old epoch and new formats.
---
 schedulers/local.py                       | 123 ++++++++++++++--------
 tests/integration/test_scheduler_local.py |  13 ++-
 tests/unit/test_scheduler_cli.py          |  11 +-
 3 files changed, 98 insertions(+), 49 deletions(-)

diff --git a/schedulers/local.py b/schedulers/local.py
index f23a743..4ec94c9 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -1,7 +1,8 @@
-"""Local scheduler — run profiling directly on this machine.
+"""Local scheduler — run profiling via Docker on the local machine.
 
-``render()`` returns the shell command string.
+``render()`` returns the ``docker run`` command string.
 ``submit()`` executes it as a subprocess, with stdout/stderr tee'd to log files.
+The profiling runs inside the FlowSim Docker image with GPU access.
 """
 
 from __future__ import annotations
@@ -14,17 +15,23 @@
 from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 
 
+def _shell_quote(s: str) -> str:
+    """Quote a string for safe embedding in a bash -c '...' invocation."""
+    import shlex
+    return shlex.quote(s)
+
+
 class LocalScheduler(BaseScheduler):
-    """Run profiling jobs locally via subprocess.
+    """Run profiling jobs locally inside a Docker container.
 
     Parameters
     ----------
     gpus : str
-        ``CUDA_VISIBLE_DEVICES`` value (e.g., ``"0"`` or ``"0,1"``).
-        Empty string means use all visible GPUs.
+        GPU device IDs for Docker ``--gpus`` (e.g., ``"0"`` or ``"0,1"``).
+        Empty string means all GPUs.
     workdir : str
-        Working directory for the subprocess.
-        Defaults to the FlowSim project root.
+        Host directory to use as the FlowSim project root for log scanning.
+        Defaults to the FlowSim project root on the host.
     """
 
     def __init__(
@@ -43,56 +50,83 @@ def _find_project_root() -> str:
         # schedulers/ is one level below project root
         return os.path.dirname(d)
 
+    def _docker_gpu_flag(self) -> str:
+        """Build the ``--gpus`` flag for ``docker run``."""
+        if not self.gpus:
+            return "--gpus all"
+        return f"--gpus '\"device={self.gpus}\"'"
+
+    def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
+        """Build the full ``docker run`` command."""
+        job_name = spec.default_job_name()[:63]
+        # Container always works with /flowsim/stage_traces internally.
+        container_output = "/flowsim/stage_traces"
+        container_log_dir = container_output + "/logs"
+        host_output = os.path.abspath(spec.output_dir)
+        host_log_dir = host_output + "/logs"
+
+        # Build the inner command, then replace host paths with container paths.
+        inner_cmd = spec.build_shell_command()
+        inner_cmd = inner_cmd.replace(host_log_dir, container_log_dir)
+        inner_cmd = inner_cmd.replace(host_output, container_output)
+
+        parts = [
+            "docker run --rm",
+            f"--name {job_name}",
+            self._docker_gpu_flag(),
+            "--ipc=host --shm-size=16g",
+            "--network=host",
+            f"-e SGLANG_PROFILE_KERNELS=1",
+            f"-v {host_output}:{container_output}",
+            f"-w /flowsim",
+            spec.image,
+            f"bash -c {_shell_quote(inner_cmd)}",
+        ]
+        return " \\\n  ".join(parts)
+
     def render(self, spec: ProfileJobSpec) -> str:
-        lines = []
-        if self.gpus:
-            lines.append(f"export CUDA_VISIBLE_DEVICES={self.gpus}")
-        lines.append("export SGLANG_PROFILE_KERNELS=1")
-        lines.append(f"cd {self.workdir}")
-        lines.append(spec.build_shell_command())
-        return "\n".join(lines)
+        return self._build_docker_cmd(spec)
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
-        """Run the profiling command locally as a subprocess.
+        """Launch a Docker container for profiling.
 
         stdout and stderr are streamed to the terminal *and* saved to
-        log files under ``spec.log_dir``.
+        log files under ``spec.output_dir/logs/`` on the host.
         """
-        cmd = spec.build_shell_command()
-
-        env = os.environ.copy()
-        env["SGLANG_PROFILE_KERNELS"] = "1"
-        if self.gpus:
-            env["CUDA_VISIBLE_DEVICES"] = self.gpus
+        # Ensure host output dir exists before mounting
+        host_output = os.path.abspath(spec.output_dir)
+        log_dir = os.path.join(host_output, "logs")
+        os.makedirs(log_dir, exist_ok=True)
 
+        docker_cmd = self._build_docker_cmd(spec)
         job_name = spec.default_job_name()
-        log_dir = spec.log_dir
-        os.makedirs(log_dir, exist_ok=True)
-        ts = int(time.time())
+        ts = time.strftime("%Y%m%d_%H%M%S")
+
+        # Remove stale container with the same name (e.g. from a killed run)
+        subprocess.run(
+            ["docker", "rm", "-f", job_name[:63]],
+            capture_output=True, timeout=10,
+        )
         stdout_path = os.path.join(log_dir, f"{job_name}_{ts}.stdout.log")
         stderr_path = os.path.join(log_dir, f"{job_name}_{ts}.stderr.log")
 
-        print(f"[local] Running {job_name}...")
-        print(f"[local] cmd: {cmd}")
-        print(f"[local] workdir: {self.workdir}")
-        if self.gpus:
-            print(f"[local] CUDA_VISIBLE_DEVICES={self.gpus}")
+        print(f"[local] Running {job_name} in Docker...")
+        print(f"[local] image: {spec.image}")
+        print(f"[local] gpus: {self.gpus or 'all'}")
+        print(f"[local] host output: {host_output}")
         print(f"[local] logs: {stdout_path}")
         print(f"[local]        {stderr_path}")
+        print(f"[local] cmd:\n  {docker_cmd}")
         print()
 
         with open(stdout_path, "w") as fout, open(stderr_path, "w") as ferr:
             proc = subprocess.Popen(
-                cmd,
+                docker_cmd,
                 shell=True,
                 cwd=self.workdir,
-                env=env,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
             )
-            # Stream stdout/stderr to terminal + log files in real time.
-            # Use threads to avoid blocking on one stream while the other
-            # fills its OS pipe buffer.
             import threading
 
             def _tee(src, dest_file, dest_stream):
@@ -119,7 +153,7 @@ def _tee(src, dest_file, dest_stream):
                 job_id=job_name,
                 scheduler="local",
                 state="Failed",
-                output_dir=spec.output_dir,
+                output_dir=host_output,
                 message=(
                     f"{job_name} FAILED (exit code {proc.returncode})\n"
                     f"stdout log: {stdout_path}\n"
@@ -130,7 +164,7 @@ def _tee(src, dest_file, dest_stream):
             job_id=job_name,
             scheduler="local",
             state="Completed",
-            output_dir=spec.output_dir,
+            output_dir=host_output,
             message=(
                 f"{job_name} completed successfully\n"
                 f"stdout log: {stdout_path}\n"
@@ -139,8 +173,14 @@ def _tee(src, dest_file, dest_stream):
         )
 
     def cancel(self, job_id: str) -> str:
-        """Local jobs run synchronously, so cancel is not applicable."""
-        return f"Local jobs run synchronously and cannot be cancelled. Job: {job_id}"
+        """Stop the Docker container for a local job."""
+        proc = subprocess.run(
+            ["docker", "stop", job_id],
+            capture_output=True, text=True, timeout=30,
+        )
+        if proc.returncode == 0:
+            return f"Stopped container {job_id}"
+        return f"Could not stop container {job_id}: {proc.stderr.strip()}"
 
     def status(self, job_id: str) -> dict:
         """Check local job status by looking for log files.
@@ -235,8 +275,9 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         jobs: list[dict] = []
         for path in matches:
             basename = os.path.basename(path)
-            # Parse: {job_name}_{timestamp}.stdout.log
-            m = re.match(r"^(.+)_(\d+)\.stdout\.log$", basename)
+            # Parse: {job_name}_{YYYYMMDD_HHMMSS}.stdout.log
+            # Also support old epoch format {job_name}_{digits}.stdout.log
+            m = re.match(r"^(.+)_(\d{8}_\d{6}|\d{10,})\.stdout\.log$", basename)
             if not m:
                 continue
             name = m.group(1)
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 932ea9e..56cbdde 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -128,6 +128,10 @@ def _assert_logs(output_dir: str) -> None:
 class TestLocalScheduler:
     """Run real profiling via ``flowsim submit --scheduler local``."""
 
+    @pytest.mark.skipif(
+        not os.path.isdir("/flowsim"),
+        reason="Local profiling tests must run inside the FlowSim Docker container",
+    )
     def test_local_perf_tp1(self):
         """TP=1 perf profiling: traces + parsed CSVs + log files."""
         output_dir = os.path.join(ARTIFACT_DIR, "sched_local_tp1")
@@ -199,14 +203,15 @@ def test_local_logs_follow(self):
         assert "tail -f" in output or "No logs" in output
 
     def test_local_cancel(self):
-        """flowsim cancel --scheduler local should return a message (sync jobs can't be cancelled)."""
+        """flowsim cancel --scheduler local should attempt docker stop."""
         r = _flowsim_cli(
             "cancel",
             "--scheduler", "local",
             "--job", "flowsim-perf",
         )
         assert r.returncode == 0
-        assert "cannot be cancelled" in r.stdout.lower() or "synchronous" in r.stdout.lower()
+        out = r.stdout.lower()
+        assert "stop" in out or "container" in out
 
     def test_local_list(self):
         """flowsim list --scheduler local should list jobs from log files."""
@@ -315,10 +320,10 @@ def test_logs_follow_shows_tail_f(self):
             assert "tail -f" in text
 
     def test_cancel_returns_message(self):
-        """cancel() should return a message about sync jobs."""
+        """cancel() should attempt docker stop and return a message."""
         sched = LocalScheduler()
         msg = sched.cancel("some-job")
-        assert "synchronous" in msg.lower() or "cannot" in msg.lower()
+        assert "stop" in msg.lower() or "container" in msg.lower()
 
     def test_submit_pd_pair_returns_list(self):
         """submit_pd_pair() must return list[JobResult]."""
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 055e117..1d50f64 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -74,7 +74,7 @@ def test_build_server_opts_extra(self, spec: ProfileJobSpec):
 
     def test_build_profile_command(self, spec: ProfileJobSpec):
         cmd = spec.build_profile_command()
-        assert cmd[0] == "python"
+        assert cmd[0] == "python3"
         assert "scripts/run_stage_profile.py" in cmd[1]
         assert "--collect" in cmd
         assert "perf" in cmd
@@ -291,7 +291,8 @@ def spec(self) -> ProfileJobSpec:
     def test_render_with_gpus(self, spec):
         sched = LocalScheduler(gpus="0,1")
         output = sched.render(spec)
-        assert "CUDA_VISIBLE_DEVICES=0,1" in output
+        assert "device=0,1" in output
+        assert "docker run" in output
 
     def test_render_without_gpus(self, spec):
         sched = LocalScheduler(gpus="")
@@ -307,7 +308,9 @@ def test_render_has_command(self, spec):
     def test_render_workdir(self, spec):
         sched = LocalScheduler(workdir="/my/project")
         output = sched.render(spec)
-        assert "cd /my/project" in output
+        # Docker mode: workdir is used for log scanning, not in the docker command
+        assert "docker run" in output
+        assert "scripts/run_stage_profile.py" in output
 
     def test_dry_run_equals_render(self, spec):
         sched = LocalScheduler(gpus="0")
@@ -486,7 +489,7 @@ def test_submit_local_dry_run_with_gpus(self):
             "--local-gpus", "0,1",
             "--dry-run",
         )
-        assert "CUDA_VISIBLE_DEVICES=0,1" in out
+        assert "device=0,1" in out
 
     def test_submit_k8s_dry_run(self):
         out = self._run(

From 8f7605207626d8f7f7d52346a5df9d3e525cd5cc Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 20:42:50 +0000
Subject: [PATCH 23/56] scheduler: add Slurm CLI mode (sbatch/squeue/scancel) +
 integration test

- Add submit_via='cli' mode to SlurmScheduler, using sbatch/squeue/scancel
  subprocess calls instead of slurmrestd REST API (which has JWT auth issues
  in Slurm 23.11 docker containers).
- Add cli_prefix param for running commands via docker exec.
- Use scontrol show job for status (works without slurmdbd).
- Slurm compose: base image on flowsim-image:latest, compile Slurm 23.11
  with NVML support, cgroup/v1, explicit GRES config.
- Slurm test passes in ~76s (same as K8s test).
- K8s test uses host mount for traces (no docker cp).
- All three backends (local, k8s, slurm) tested and working.
---
 dockerfiles/cgroup.conf                   |   3 +
 dockerfiles/dev-setup.sh                  | 227 +++++-
 dockerfiles/dev-teardown.sh               |   4 +
 dockerfiles/gres.conf                     |   3 +
 dockerfiles/kind-multi-node.yaml          |  67 +-
 dockerfiles/slurm-compose.yaml            |  73 +-
 dockerfiles/slurm-node.dockerfile         |  19 +-
 dockerfiles/slurm.conf                    |  23 +-
 schedulers/k8s.py                         |  11 +-
 schedulers/local.py                       | 106 ++-
 schedulers/slurm.py                       | 193 +++++-
 schedulers/templates/k8s.yaml             |   1 +
 scripts/cli.py                            |   3 +
 scripts/status_profile.py                 |  22 +
 scripts/submit_profile.py                 |  52 +-
 simulator/base_parser.py                  |   6 +-
 tests/integration/test_scheduler_local.py | 805 ++++++++++++----------
 tests/unit/test_scheduler_cli.py          |  10 +
 18 files changed, 1088 insertions(+), 540 deletions(-)
 create mode 100644 dockerfiles/cgroup.conf
 create mode 100644 dockerfiles/gres.conf

diff --git a/dockerfiles/cgroup.conf b/dockerfiles/cgroup.conf
new file mode 100644
index 0000000..68de2cc
--- /dev/null
+++ b/dockerfiles/cgroup.conf
@@ -0,0 +1,3 @@
+# cgroup.conf — use cgroup v1 (only v1 plugin available; v2 host is compatible
+# via the unified/hybrid hierarchy mount)
+CgroupPlugin=cgroup/v1
diff --git a/dockerfiles/dev-setup.sh b/dockerfiles/dev-setup.sh
index d948bf0..7cefe05 100755
--- a/dockerfiles/dev-setup.sh
+++ b/dockerfiles/dev-setup.sh
@@ -14,8 +14,10 @@ set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 KIND_VERSION="v0.27.0"
 KIND_CLUSTER_NAME="flowsim"
+KIND_WORKERS=("${KIND_CLUSTER_NAME}-worker")
 KUBECTL_STABLE_URL="https://dl.k8s.io/release/stable.txt"
-NVIDIA_DEVICE_PLUGIN="https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml"
+HELM_INSTALL_URL="https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3"
+NVIDIA_CTK_KEYRING="/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg"
 
 log()  { printf "\033[1;32m[setup]\033[0m %s\n" "$*"; }
 warn() { printf "\033[1;33m[setup]\033[0m %s\n" "$*"; }
@@ -57,38 +59,237 @@ ensure_kubectl() {
 }
 
 # ----------------------------------------------------------------
-# Kind cluster
+# Kind cluster with NVIDIA GPU via CDI
+# (Official approach from NVIDIA k8s-device-plugin demo)
+# https://github.com/NVIDIA/k8s-device-plugin/tree/main/demo/clusters/kind
 # ----------------------------------------------------------------
+ensure_nvidia_runtime() {
+    # Docker must use nvidia as default runtime so Kind node containers get GPU access
+    command -v nvidia-ctk >/dev/null || err "nvidia-container-toolkit is required (nvidia-ctk not found)."
+    command -v nvidia-smi >/dev/null || err "NVIDIA driver not found (nvidia-smi missing)."
+    log "nvidia-ctk: $(nvidia-ctk --version 2>&1 | head -1)"
+
+    if ! docker info 2>/dev/null | grep -q "Default Runtime: nvidia"; then
+        log "Setting nvidia as default Docker runtime..."
+        sudo nvidia-ctk runtime configure --runtime=docker --set-as-default
+        sudo systemctl restart docker
+        log "Docker restarted with nvidia runtime as default"
+    else
+        log "Docker already using nvidia as default runtime"
+    fi
+
+    # Required: accept-nvidia-visible-devices-as-volume-mounts must be true
+    # for Kind GPU passthrough via /var/run/nvidia-container-devices/all
+    local cfg="/etc/nvidia-container-runtime/config.toml"
+    if grep -qE '^\s*accept-nvidia-visible-devices-as-volume-mounts\s*=\s*true' "$cfg" 2>/dev/null; then
+        log "accept-nvidia-visible-devices-as-volume-mounts already enabled"
+    else
+        log "Enabling accept-nvidia-visible-devices-as-volume-mounts in $cfg..."
+        if grep -qE '#?\s*accept-nvidia-visible-devices-as-volume-mounts' "$cfg" 2>/dev/null; then
+            sudo sed -i 's/#*\s*accept-nvidia-visible-devices-as-volume-mounts.*/accept-nvidia-visible-devices-as-volume-mounts = true/' "$cfg"
+        else
+            echo 'accept-nvidia-visible-devices-as-volume-mounts = true' | sudo tee -a "$cfg" >/dev/null
+        fi
+        sudo systemctl restart docker
+        log "Host nvidia-container-runtime config updated and Docker restarted"
+    fi
+}
+
+ensure_helm() {
+    if command -v helm >/dev/null; then
+        log "helm already installed: $(helm version --short 2>/dev/null)"
+        return
+    fi
+    log "Installing helm..."
+    curl -fsSL "${HELM_INSTALL_URL}" | bash
+    log "helm installed: $(helm version --short)"
+}
+
 setup_kind() {
     ensure_docker
+    ensure_nvidia_runtime
     ensure_kind
     ensure_kubectl
+    ensure_helm
 
     if kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER_NAME}$"; then
         warn "kind cluster '${KIND_CLUSTER_NAME}' already exists, skipping creation"
     else
-        log "Creating kind cluster '${KIND_CLUSTER_NAME}' (1 control-plane + 2 GPU workers)..."
+        log "Creating kind cluster '${KIND_CLUSTER_NAME}' (1 control-plane + 1 GPU worker)..."
         kind create cluster --name "${KIND_CLUSTER_NAME}" \
             --config "${SCRIPT_DIR}/kind-multi-node.yaml"
-        log "Installing NVIDIA device plugin..."
-        kubectl apply -f "${NVIDIA_DEVICE_PLUGIN}"
     fi
 
-    log "Cluster nodes:"
-    kubectl get nodes -o wide
-    echo
+    # ── Post-creation: configure GPU support inside each worker node ──
+    for worker in "${KIND_WORKERS[@]}"; do
+        log "=== Configuring ${worker} ==="
+
+        # Step 1: Unmount masked /proc/driver/nvidia
+        log "Unmounting /proc/driver/nvidia in ${worker}..."
+        docker exec "${worker}" umount -R /proc/driver/nvidia 2>/dev/null || true
+
+        # Step 2: Install nvidia-container-toolkit inside the worker node
+        log "Installing nvidia-container-toolkit inside ${worker}..."
+        docker exec "${worker}" bash -c "apt-get update && apt-get install -y gpg"
+        docker exec "${worker}" bash -c "\
+            curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
+                | gpg --dearmor -o ${NVIDIA_CTK_KEYRING} \
+            && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
+                | sed 's#deb https://#deb [signed-by=${NVIDIA_CTK_KEYRING}] https://#g' \
+                | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
+            && apt-get update \
+            && apt-get install -y nvidia-container-toolkit"
+
+        # Step 3: Configure CDI mode in containerd inside worker
+        log "Configuring CDI mode for containerd in ${worker}..."
+        docker exec "${worker}" bash -c "\
+            nvidia-ctk config --set nvidia-container-runtime.modes.cdi.annotation-prefixes=nvidia.cdi.k8s.io/ \
+            && nvidia-ctk runtime configure --runtime=containerd --cdi.enabled --config-source=command \
+            && systemctl restart containerd"
 
+        # Step 4: Label worker node for GPU presence
+        kubectl --context "kind-${KIND_CLUSTER_NAME}" label node "${worker}" \
+            --overwrite nvidia.com/gpu.present=true
+    done
+
+    # Step 5: Create nvidia RuntimeClass
+    log "Creating nvidia RuntimeClass..."
+    kubectl --context "kind-${KIND_CLUSTER_NAME}" apply -f - <<'RTEOF'
+apiVersion: node.k8s.io/v1
+handler: nvidia
+kind: RuntimeClass
+metadata:
+  name: nvidia
+RTEOF
+
+    # Step 6: Deploy per-node NVIDIA device plugin DaemonSets
+    # Each worker gets its own DaemonSet with a specific NVIDIA_VISIBLE_DEVICES
+    # so the device plugin only discovers/advertises that worker's assigned GPU.
+    # (Helm's single DaemonSet can't set different env per node.)
+    log "Deploying NVIDIA device plugin (per-node GPU assignment)..."
+    local CTX="kind-${KIND_CLUSTER_NAME}"
+    local PLUGIN_IMAGE="nvcr.io/nvidia/k8s-device-plugin:v0.17.1"
+    local gpu_idx=0
+    for worker in "${KIND_WORKERS[@]}"; do
+        local ds_name="nvidia-device-plugin-${worker##*-}"   # e.g. nvidia-device-plugin-worker
+        kubectl --context "$CTX" apply -f - <<DPEOF
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nvidia-device-plugin
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: ${ds_name}
+  namespace: nvidia-device-plugin
+  labels:
+    app: nvidia-device-plugin
+    node: ${worker}
+spec:
+  selector:
+    matchLabels:
+      app: nvidia-device-plugin
+      node: ${worker}
+  template:
+    metadata:
+      labels:
+        app: nvidia-device-plugin
+        node: ${worker}
+    spec:
+      runtimeClassName: nvidia
+      nodeSelector:
+        kubernetes.io/hostname: ${worker}
+      tolerations:
+        - key: nvidia.com/gpu
+          operator: Exists
+          effect: NoSchedule
+      priorityClassName: system-node-critical
+      containers:
+        - name: nvidia-device-plugin
+          image: ${PLUGIN_IMAGE}
+          env:
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: "${gpu_idx}"
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop: ["ALL"]
+          volumeMounts:
+            - name: device-plugin
+              mountPath: /var/lib/kubelet/device-plugins
+      volumes:
+        - name: device-plugin
+          hostPath:
+            path: /var/lib/kubelet/device-plugins
+DPEOF
+        log "  ${worker} → GPU ${gpu_idx} (DaemonSet: ${ds_name})"
+        gpu_idx=$((gpu_idx + 1))
+    done
+
+    # Step 7: Load flowsim-image into worker nodes (skip if already present)
+    local FLOWSIM_IMAGE="flowsim-image:latest"
+    if docker image inspect "${FLOWSIM_IMAGE}" >/dev/null 2>&1; then
+        for worker in "${KIND_WORKERS[@]}"; do
+            if docker exec "${worker}" crictl images 2>/dev/null | grep -q "flowsim-image.*latest"; then
+                log "${FLOWSIM_IMAGE} already loaded in ${worker}, skipping"
+            else
+                log "Loading ${FLOWSIM_IMAGE} into ${worker} (~34GB, may take several minutes)..."
+                if command -v pv >/dev/null; then
+                    docker save "${FLOWSIM_IMAGE}" | pv -f -a -b | \
+                        docker exec -i "${worker}" ctr -n k8s.io images import -
+                else
+                    docker save "${FLOWSIM_IMAGE}" | \
+                        docker exec -i "${worker}" ctr -n k8s.io images import -
+                fi
+                log "${FLOWSIM_IMAGE} loaded into ${worker}"
+            fi
+        done
+    else
+        warn "${FLOWSIM_IMAGE} not found on host, skipping image load (build it first)"
+    fi
+
+    # Step 9: Wait for GPU resources
+    log "Waiting for nvidia.com/gpu resources to appear (up to 180s)..."
+    local gpu_retries=36
+    while true; do
+        gpu_count=$(kubectl --context "kind-${KIND_CLUSTER_NAME}" get nodes \
+            -o jsonpath='{range .items[*]}{.status.allocatable.nvidia\.com/gpu}{"\n"}{end}' 2>/dev/null \
+            | grep -cE '^[1-9]' || true)
+        if [ "${gpu_count}" -ge 1 ]; then
+            log "GPUs registered on ${gpu_count} node(s)"
+            break
+        fi
+        gpu_retries=$((gpu_retries - 1))
+        if [ "${gpu_retries}" -le 0 ]; then
+            warn "GPUs not registered after 180s — debugging info:"
+            kubectl --context "kind-${KIND_CLUSTER_NAME}" get pods -n nvidia-device-plugin -o wide 2>/dev/null || true
+            kubectl --context "kind-${KIND_CLUSTER_NAME}" describe nodes 2>/dev/null | grep -A5 "Allocatable" || true
+            break
+        fi
+        sleep 5
+    done
+
+    # Step 10: Init FlowSim K8s config
     log "Initializing FlowSim K8s config..."
-    local kubeconfig
-    kubeconfig="${HOME}/.kube/config"
     flowsim init k8s \
-        --kubeconfig "${kubeconfig}" \
+        --kubeconfig "${HOME}/.kube/config" \
         --context "kind-${KIND_CLUSTER_NAME}" \
         --namespace default \
+        --host-output-dir /tmp/flowsim-traces \
+        --runtime-class-name nvidia \
         --force
+
+    log "Cluster nodes:"
+    kubectl --context "kind-${KIND_CLUSTER_NAME}" get nodes -o wide
+    echo
+
+    log "GPU resources:"
+    kubectl --context "kind-${KIND_CLUSTER_NAME}" get nodes \
+        -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.allocatable.nvidia\.com/gpu}{"\n"}{end}' 2>/dev/null || true
     echo
-    log "Kind cluster ready. Test with:"
-    log "  flowsim submit --scheduler k8s --collect perf --model-path <path> --dry-run"
+
+    log "Kind cluster with GPU (CDI mode) ready."
 }
 
 # ----------------------------------------------------------------
diff --git a/dockerfiles/dev-teardown.sh b/dockerfiles/dev-teardown.sh
index 154b049..dfb1c01 100755
--- a/dockerfiles/dev-teardown.sh
+++ b/dockerfiles/dev-teardown.sh
@@ -15,6 +15,10 @@ log()  { printf "\033[1;32m[teardown]\033[0m %s\n" "$*"; }
 warn() { printf "\033[1;33m[teardown]\033[0m %s\n" "$*"; }
 
 teardown_kind() {
+    # Delete device plugin namespace (contains per-node DaemonSets)
+    if command -v kubectl >/dev/null; then
+        kubectl delete namespace nvidia-device-plugin --ignore-not-found 2>/dev/null || true
+    fi
     if command -v kind >/dev/null && kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER_NAME}$"; then
         log "Deleting kind cluster '${KIND_CLUSTER_NAME}'..."
         kind delete cluster --name "${KIND_CLUSTER_NAME}"
diff --git a/dockerfiles/gres.conf b/dockerfiles/gres.conf
new file mode 100644
index 0000000..745eeac
--- /dev/null
+++ b/dockerfiles/gres.conf
@@ -0,0 +1,3 @@
+# Slurm GRES config — explicit GPU definition (AutoDetect=nvml requires
+# cgroup v2 which is not available; define GPU manually)
+Name=gpu Type=nvidia File=/dev/nvidia0 Count=1
diff --git a/dockerfiles/kind-multi-node.yaml b/dockerfiles/kind-multi-node.yaml
index c2208c4..ddb8cd2 100644
--- a/dockerfiles/kind-multi-node.yaml
+++ b/dockerfiles/kind-multi-node.yaml
@@ -1,34 +1,22 @@
-# kind cluster config — 1 control-plane + 2 GPU worker nodes
+# Kind cluster config — 1 control-plane + 1 GPU worker node
 #
-# Each worker gets one GPU via NVIDIA device plugin.
-# Requires: kind, kubectl, nvidia-container-toolkit
+# GPU support via CDI mode (NVIDIA k8s-device-plugin official approach).
+# See: https://github.com/NVIDIA/k8s-device-plugin/tree/main/demo/clusters/kind
 #
-# Usage:
-#   # Install kind (once)
-#   curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.27.0/kind-linux-amd64
-#   chmod +x ./kind && sudo mv ./kind /usr/local/bin/
-#
-#   # Create cluster
-#   kind create cluster --name flowsim --config dockerfiles/kind-multi-node.yaml
-#
-#   # Install NVIDIA device plugin (exposes GPUs to K8s)
-#   kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml
+# The single worker binds GPU 0.  Change the containerPath index to
+# assign a different GPU.
 #
-#   # Verify
-#   kubectl get nodes
-#   kubectl describe node flowsim-worker  | grep nvidia.com/gpu
-#   kubectl describe node flowsim-worker2 | grep nvidia.com/gpu
+# Pre-requisites (host):
+#   - Docker with nvidia as default runtime
+#   - accept-nvidia-visible-devices-as-volume-mounts = true
+#     in /etc/nvidia-container-runtime/config.toml
+#   - kind, kubectl, helm
 #
-#   # Init FlowSim
-#   flowsim init k8s --kubeconfig ~/.kube/config \
-#       --context kind-flowsim --namespace default --force
-#
-#   # Submit a profiling job
-#   flowsim submit --scheduler k8s --collect perf \
-#       --model-path /models/Qwen-7B --gpus 1
+# Usage:
+#   ./dockerfiles/dev-setup.sh kind
 #
-#   # Teardown
-#   kind delete cluster --name flowsim
+# Teardown:
+#   ./dockerfiles/dev-teardown.sh kind
 
 kind: Cluster
 apiVersion: kind.x-k8s.io/v1alpha4
@@ -36,29 +24,14 @@ apiVersion: kind.x-k8s.io/v1alpha4
 nodes:
   - role: control-plane
 
+  # Worker — GPU 0 only
   - role: worker
     extraMounts:
-      # Pass GPU 0 into this node
-      - hostPath: /dev/nvidia0
-        containerPath: /dev/nvidia0
-      - hostPath: /dev/nvidiactl
-        containerPath: /dev/nvidiactl
-      - hostPath: /dev/nvidia-uvm
-        containerPath: /dev/nvidia-uvm
-      # Mount model weights (adjust to your path)
-      - hostPath: /home/administrator/zhangt
-        containerPath: /workspace
-        readOnly: true
-
-  - role: worker
-    extraMounts:
-      # Pass GPU 1 into this node
-      - hostPath: /dev/nvidia1
-        containerPath: /dev/nvidia1
-      - hostPath: /dev/nvidiactl
-        containerPath: /dev/nvidiactl
-      - hostPath: /dev/nvidia-uvm
-        containerPath: /dev/nvidia-uvm
+      - hostPath: /dev/null
+        containerPath: /var/run/nvidia-container-devices/0
       - hostPath: /home/administrator/zhangt
         containerPath: /workspace
         readOnly: true
+      # Writable mount so K8s pods can write traces directly to host
+      - hostPath: /home/administrator/zhangt/FlowSim/stage_traces
+        containerPath: /host-stage-traces
diff --git a/dockerfiles/slurm-compose.yaml b/dockerfiles/slurm-compose.yaml
index 29f694d..ee94656 100644
--- a/dockerfiles/slurm-compose.yaml
+++ b/dockerfiles/slurm-compose.yaml
@@ -48,6 +48,9 @@ services:
         fi
         chown munge:munge /etc/munge/munge.key
         chmod 400 /etc/munge/munge.key
+        mkdir -p /run/munge
+        chown munge:munge /run/munge
+        chmod 755 /run/munge
         gosu munge munged --foreground
       "
     volumes:
@@ -61,6 +64,7 @@ services:
     hostname: slurmctld
     command: >
       bash -c "
+        mkdir -p /run/munge && chown munge:munge /run/munge
         until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
         slurmctld -D -vvv
       "
@@ -79,6 +83,7 @@ services:
     hostname: slurmd-0
     command: >
       bash -c "
+        mkdir -p /run/munge && chown munge:munge /run/munge
         until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
         slurmd -D -vvv
       "
@@ -89,6 +94,10 @@ services:
       - munge-key:/etc/munge:ro
       - munge-socket:/run/munge
       - /home/administrator/zhangt:/workspace:ro
+      # Writable mount so traces appear on host
+      - /home/administrator/zhangt/FlowSim/stage_traces:/flowsim/stage_traces
+      # Cgroup needed by slurmd
+      - /sys/fs/cgroup:/sys/fs/cgroup:rw
     deploy:
       resources:
         reservations:
@@ -97,49 +106,27 @@ services:
               device_ids: ["0"]
               capabilities: [gpu]
 
-  # ---- Compute node 1 (GPU 1) ----
-  slurmd-1:
-    <<: *slurm-base
-    container_name: slurmd-1
-    hostname: slurmd-1
-    command: >
-      bash -c "
-        until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
-        slurmd -D -vvv
-      "
-    depends_on:
-      - slurmctld
-    volumes:
-      - slurm-etc:/etc/slurm:ro
-      - munge-key:/etc/munge:ro
-      - munge-socket:/run/munge
-      - /home/administrator/zhangt:/workspace:ro
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ["1"]
-              capabilities: [gpu]
-
-  # ---- REST API ----
-  slurmrestd:
-    <<: *slurm-base
-    container_name: slurmrestd
-    hostname: slurmrestd
-    command: >
-      bash -c "
-        until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
-        slurmrestd -a rest_auth/jwt 0.0.0.0:6820 -vvv
-      "
-    depends_on:
-      - slurmctld
-    ports:
-      - "6820:6820"
-    volumes:
-      - slurm-etc:/etc/slurm:ro
-      - munge-key:/etc/munge:ro
-      - munge-socket:/run/munge
+  # ---- REST API (optional, for REST mode) ----
+  # slurmrestd:
+  #   <<: *slurm-base
+  #   container_name: slurmrestd
+  #   hostname: slurmrestd
+  #   command: >
+  #     bash -c "
+  #       mkdir -p /run/munge && chown munge:munge /run/munge
+  #       until [ -S /run/munge/munge.socket.2 ]; do sleep 0.5; done
+  #       gosu slurm slurmrestd -a rest_auth/jwt 0.0.0.0:6820 -vvv -s slurmctld
+  #     "
+  #   depends_on:
+  #     - slurmctld
+  #   ports:
+  #     - "6820:6820"
+  #   cap_add:
+  #     - SYS_ADMIN
+  #   volumes:
+  #     - slurm-etc:/etc/slurm:ro
+  #     - munge-key:/etc/munge:ro
+  #     - munge-socket:/run/munge
 
 volumes:
   slurm-etc:
diff --git a/dockerfiles/slurm-node.dockerfile b/dockerfiles/slurm-node.dockerfile
index 397284d..8b79db0 100644
--- a/dockerfiles/slurm-node.dockerfile
+++ b/dockerfiles/slurm-node.dockerfile
@@ -1,25 +1,24 @@
 # Slurm node image — controller, compute, and REST API
 #
-# Based on Ubuntu 22.04 with Slurm 23.11 + munge + JWT support.
+# Based on flowsim-image so compute nodes have the full Python/sglang
+# environment.  Slurm 23.11 is compiled on top with JWT + NVML GRES.
 # Used by slurm-compose.yaml.
 
-FROM ubuntu:22.04
+FROM flowsim-image:latest
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+# Slurm build dependencies + munge
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    curl \
     gosu \
     libhttp-parser-dev \
     libjson-c-dev \
     libjwt-dev \
     libmunge-dev \
     munge \
-    wget \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Slurm 23.11 from source (includes slurmrestd + JWT auth)
+# Install Slurm 23.11 from source (slurmrestd + JWT auth + NVML GRES)
 ARG SLURM_VERSION=23.11.10
 RUN cd /tmp && \
     wget -q https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 && \
@@ -31,22 +30,26 @@ RUN cd /tmp && \
         --with-jwt \
         --with-http-parser \
         --with-json \
+        --with-nvml \
         --enable-slurmrestd && \
     make -j"$(nproc)" && \
     make install && \
     rm -rf /tmp/slurm-*
 
 # Create required directories and users
-RUN useradd -r -s /sbin/nologin slurm && \
+RUN useradd -r -s /sbin/nologin slurm 2>/dev/null || true && \
     mkdir -p /etc/slurm /var/spool/slurmctld /var/spool/slurmd /var/log/slurm && \
     chown slurm:slurm /var/spool/slurmctld /var/spool/slurmd /var/log/slurm
 
-# Slurm config — 2 compute nodes, 1 GPU each
+# Slurm config
 COPY slurm.conf /etc/slurm/slurm.conf
+COPY gres.conf /etc/slurm/gres.conf
+COPY cgroup.conf /etc/slurm/cgroup.conf
 
 # JWT key for REST API auth
 RUN dd if=/dev/urandom bs=32 count=1 2>/dev/null | base64 > /etc/slurm/jwt_hs256.key && \
     chown slurm:slurm /etc/slurm/jwt_hs256.key && \
     chmod 0600 /etc/slurm/jwt_hs256.key
 
+WORKDIR /flowsim
 CMD ["bash"]
diff --git a/dockerfiles/slurm.conf b/dockerfiles/slurm.conf
index 734509d..7a26d5c 100644
--- a/dockerfiles/slurm.conf
+++ b/dockerfiles/slurm.conf
@@ -25,13 +25,14 @@ SchedulerType=sched/backfill
 SelectType=select/cons_tres
 SelectTypeParameters=CR_Core_Memory
 
-# GPU support
-GresTypes=gpu
-
-# Accounting (minimal)
-AccountingStorageType=accounting_storage/none
+# Accounting (disabled — no slurmdbd in test cluster)
 JobAcctGatherType=jobacct_gather/none
 
+# Task management — disable cgroups (not available in containers)
+TaskPlugin=task/none
+ProctrackType=proctrack/linuxproc
+JobContainerType=job_container/none
+
 # Timeouts
 SlurmctldTimeout=30
 SlurmdTimeout=30
@@ -40,9 +41,11 @@ MinJobAge=300
 KillWait=30
 Waittime=0
 
-# Partitions
-PartitionName=normal Nodes=slurmd-[0-1] Default=YES MaxTime=INFINITE State=UP
+# GRES (GPU) auto-detection
+GresTypes=gpu
+
+# Partitions — single compute node for testing
+PartitionName=normal Nodes=slurmd-0 Default=YES MaxTime=INFINITE State=UP
 
-# Node definitions — 1 GPU each
-NodeName=slurmd-0 CPUs=8 RealMemory=32000 Gres=gpu:1 State=UNKNOWN
-NodeName=slurmd-1 CPUs=8 RealMemory=32000 Gres=gpu:1 State=UNKNOWN
+# Node definition — 1 GPU (CPUs/memory match hardware)
+NodeName=slurmd-0 CPUs=112 RealMemory=128000 Gres=gpu:1 State=UNKNOWN
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 44c2917..d29df96 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -49,6 +49,9 @@ class K8sScheduler(BaseScheduler):
         ServiceAccount name for the pod.
     shm_size : str
         Size of ``/dev/shm`` (shared memory).  Defaults to ``"16Gi"``.
+    runtime_class_name : str, optional
+        Kubernetes RuntimeClass name for the pod (e.g., ``"nvidia"`` for
+        CDI-based GPU injection in Kind clusters).
     """
 
     def __init__(
@@ -62,6 +65,7 @@ def __init__(
         node_selector: dict[str, str] | None = None,
         service_account: str = "",
         shm_size: str = "16Gi",
+        runtime_class_name: str = "",
     ) -> None:
         self.namespace = namespace
         self.kubeconfig = kubeconfig
@@ -71,6 +75,7 @@ def __init__(
         self.node_selector = node_selector or {}
         self.service_account = service_account
         self.shm_size = shm_size
+        self.runtime_class_name = runtime_class_name
 
     def render(self, spec: ProfileJobSpec) -> str:
         return _dump(self._build_job_dict(spec))
@@ -92,7 +97,9 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
             volume_mounts.append({"name": "output", "mountPath": spec.output_dir})
             volumes.append({"name": "output", "persistentVolumeClaim": {"claimName": self.pvc_name}})
         elif self.host_output_dir:
-            volume_mounts.append({"name": "output", "mountPath": spec.output_dir})
+            # Mount at base traces dir so the full directory structure
+            # (e.g. k8s/{timestamp}/bs1_...) is preserved on the host.
+            volume_mounts.append({"name": "output", "mountPath": "/flowsim/stage_traces"})
             volumes.append({"name": "output", "hostPath": {"path": self.host_output_dir, "type": "DirectoryOrCreate"}})
 
         container = {
@@ -114,6 +121,8 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
             "containers": [container],
             "volumes": volumes,
         }
+        if self.runtime_class_name:
+            pod_spec["runtimeClassName"] = self.runtime_class_name
         if self.service_account:
             pod_spec["serviceAccountName"] = self.service_account
         if self.node_selector:
diff --git a/schedulers/local.py b/schedulers/local.py
index 4ec94c9..17dd3a0 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -50,25 +50,53 @@ def _find_project_root() -> str:
         # schedulers/ is one level below project root
         return os.path.dirname(d)
 
+    @staticmethod
+    def _check_image_exists(image: str) -> None:
+        """Raise if the Docker image is not available locally."""
+        result = subprocess.run(
+            ["docker", "image", "inspect", image],
+            capture_output=True, timeout=10,
+        )
+        if result.returncode != 0:
+            raise SystemExit(
+                f"[local] Docker image '{image}' not found.\n"
+                f"Build it first, e.g.:\n"
+                f"  docker build -t {image} -f dockerfiles/cuda12.6.dockerfile ."
+            )
+
     def _docker_gpu_flag(self) -> str:
         """Build the ``--gpus`` flag for ``docker run``."""
         if not self.gpus:
             return "--gpus all"
         return f"--gpus '\"device={self.gpus}\"'"
 
+    def _host_output_dir(self, spec_output_dir: str) -> str:
+        """Host directory that gets bind-mounted into the container.
+
+        Mirrors the container path structure under the host workdir.
+        e.g. container ``/flowsim/stage_traces/local/20260317_211318``
+        →  host ``{workdir}/stage_traces/local/20260317_211318``
+        """
+        # spec_output_dir is like /flowsim/stage_traces/local/{ts}
+        # Strip the /flowsim/ prefix to get the relative path
+        rel = spec_output_dir
+        if rel.startswith("/flowsim/"):
+            rel = rel[len("/flowsim/"):]
+        return os.path.join(self.workdir, rel)
+
     def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
-        """Build the full ``docker run`` command."""
+        """Build the full ``docker run`` command.
+
+        Paths in *spec* (model_path, output_dir, log_dir) are expected to be
+        relative to the project root or absolute container paths (``/flowsim/…``).
+        The container workdir is ``/flowsim``, so relative paths resolve
+        correctly without any string replacement.
+        """
         job_name = spec.default_job_name()[:63]
-        # Container always works with /flowsim/stage_traces internally.
-        container_output = "/flowsim/stage_traces"
-        container_log_dir = container_output + "/logs"
-        host_output = os.path.abspath(spec.output_dir)
-        host_log_dir = host_output + "/logs"
+        host_output = self._host_output_dir(spec.output_dir)
+        container_output = spec.output_dir  # e.g. /flowsim/stage_traces/local/{ts}
 
-        # Build the inner command, then replace host paths with container paths.
         inner_cmd = spec.build_shell_command()
-        inner_cmd = inner_cmd.replace(host_log_dir, container_log_dir)
-        inner_cmd = inner_cmd.replace(host_output, container_output)
 
         parts = [
             "docker run --rm",
@@ -78,6 +106,8 @@ def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
             "--network=host",
             f"-e SGLANG_PROFILE_KERNELS=1",
             f"-v {host_output}:{container_output}",
+            f"-v {self.workdir}/simulator:/flowsim/simulator",
+            f"-v {self.workdir}/scripts:/flowsim/scripts",
             f"-w /flowsim",
             spec.image,
             f"bash -c {_shell_quote(inner_cmd)}",
@@ -85,6 +115,7 @@ def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
         return " \\\n  ".join(parts)
 
     def render(self, spec: ProfileJobSpec) -> str:
+        self._check_image_exists(spec.image)
         return self._build_docker_cmd(spec)
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
@@ -93,8 +124,10 @@ def submit(self, spec: ProfileJobSpec) -> JobResult:
         stdout and stderr are streamed to the terminal *and* saved to
         log files under ``spec.output_dir/logs/`` on the host.
         """
+        self._check_image_exists(spec.image)
+
         # Ensure host output dir exists before mounting
-        host_output = os.path.abspath(spec.output_dir)
+        host_output = self._host_output_dir(spec.output_dir)
         log_dir = os.path.join(host_output, "logs")
         os.makedirs(log_dir, exist_ok=True)
 
@@ -182,6 +215,18 @@ def cancel(self, job_id: str) -> str:
             return f"Stopped container {job_id}"
         return f"Could not stop container {job_id}: {proc.stderr.strip()}"
 
+    def _find_log_dirs(self) -> list[str]:
+        """Find all log directories under stage_traces/{scheduler}/*/logs/."""
+        import glob
+        base = os.path.join(self.workdir, "stage_traces", "local")
+        # New layout: stage_traces/local/{ts}/logs/
+        dirs = sorted(glob.glob(os.path.join(base, "*/logs")))
+        # Also check legacy flat layout: stage_traces/logs/
+        legacy = os.path.join(self.workdir, "stage_traces", "logs")
+        if os.path.isdir(legacy):
+            dirs.append(legacy)
+        return dirs
+
     def status(self, job_id: str) -> dict:
         """Check local job status by looking for log files.
 
@@ -189,20 +234,23 @@ def status(self, job_id: str) -> dict:
         """
         import glob
 
-        log_dir = os.path.join(self.workdir, "stage_traces", "logs")
-        pattern = os.path.join(log_dir, f"{job_id}_*.stdout.log")
-        matches = sorted(glob.glob(pattern))
+        matches = []
+        for log_dir in self._find_log_dirs():
+            matches.extend(sorted(glob.glob(
+                os.path.join(log_dir, f"{job_id}_*.stdout.log")
+            )))
 
         if not matches:
             return {
                 "state": "NotFound",
-                "message": f"No logs found matching {pattern}",
+                "message": f"No logs found for job '{job_id}'",
                 "output_hint": "",
             }
 
         latest = matches[-1]
         stderr_log = latest.replace(".stdout.log", ".stderr.log")
-        trace_dir = os.path.join(self.workdir, "stage_traces")
+        # trace_dir is the parent of logs/
+        trace_dir = os.path.dirname(os.path.dirname(latest))
 
         return {
             "state": "Completed",
@@ -218,17 +266,20 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """List log files for a local job and print access commands."""
         import glob
 
-        log_dir = os.path.join(self.workdir, "stage_traces", "logs")
-        pattern = os.path.join(log_dir, f"{job_id}_*")
-        matches = sorted(glob.glob(pattern))
+        matches = []
+        for log_dir in self._find_log_dirs():
+            matches.extend(sorted(glob.glob(
+                os.path.join(log_dir, f"{job_id}_*")
+            )))
 
         if not matches:
-            # Also try wildcard — user may have given a partial name
-            pattern = os.path.join(log_dir, f"*{job_id}*")
-            matches = sorted(glob.glob(pattern))
+            for log_dir in self._find_log_dirs():
+                matches.extend(sorted(glob.glob(
+                    os.path.join(log_dir, f"*{job_id}*")
+                )))
 
         if not matches:
-            return f"No logs found in {log_dir} matching '{job_id}'"
+            return f"No logs found matching '{job_id}'"
 
         if follow:
             stdout_files = sorted(f for f in matches if f.endswith(".stdout.log"))
@@ -236,6 +287,7 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
                 return f"Follow logs with:\n  tail -f {stdout_files[-1]}"
             return f"No stdout log found to follow for '{job_id}'"
 
+        log_dir = os.path.dirname(matches[-1])
         parts = [f"Log directory: {log_dir}", ""]
         parts.append(f"Files ({len(matches)}):")
         for p in matches:
@@ -256,7 +308,7 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
             parts.append("Follow logs:")
             parts.append(f"  tail -f {stdout_files[-1]}")
 
-        trace_dir = os.path.join(self.workdir, "stage_traces")
+        trace_dir = os.path.dirname(log_dir)  # parent of logs/
         parts.append("")
         parts.append(f"Trace files: {trace_dir}")
         parts.append(f"  ls {trace_dir}")
@@ -268,9 +320,11 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         import glob
         import re
 
-        log_dir = os.path.join(self.workdir, "stage_traces", "logs")
-        pattern = os.path.join(log_dir, "*.stdout.log")
-        matches = sorted(glob.glob(pattern))
+        matches = []
+        for log_dir in self._find_log_dirs():
+            matches.extend(sorted(glob.glob(
+                os.path.join(log_dir, "*.stdout.log")
+            )))
 
         jobs: list[dict] = []
         for path in matches:
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 790ade4..6615fad 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -3,12 +3,22 @@
 ``render()`` / ``dry_run()`` produce a standalone bash script (zero deps).
 ``submit()`` posts the script to a slurmrestd endpoint via stdlib
 ``urllib.request`` — no extra packages needed.
+
+Two submission modes are supported:
+
+* **rest** (default) — POST the script to a slurmrestd endpoint.
+  Requires ``rest_url`` and ``jwt_token``.
+* **cli** — pipe the script to ``sbatch`` via subprocess.
+  Requires ``sbatch``/``squeue``/``scancel`` on PATH (or reachable
+  via ``cli_prefix``, e.g. ``"docker exec slurmctld"``).
 """
 
 from __future__ import annotations
 
 import json
+import shlex
 import ssl
+import subprocess
 import urllib.error
 import urllib.request
 
@@ -55,6 +65,12 @@ class SlurmScheduler(BaseScheduler):
         (relevant for ``"none"`` runtime).
     extra_sbatch : list[str]
         Additional ``#SBATCH`` lines, each *without* the ``#SBATCH`` prefix.
+    submit_via : str
+        ``"rest"`` (default) — use slurmrestd REST API.
+        ``"cli"``  — use ``sbatch`` / ``squeue`` / ``scancel`` subprocess.
+    cli_prefix : str
+        Shell prefix for CLI commands (e.g. ``"docker exec -i slurmctld"``).
+        Only used when ``submit_via="cli"``.
     """
 
     def __init__(
@@ -72,6 +88,8 @@ def __init__(
         container_mounts: str = "",
         modules: list[str] | None = None,
         extra_sbatch: list[str] | None = None,
+        submit_via: str = "rest",
+        cli_prefix: str = "",
     ) -> None:
         self.partition = partition
         self.time_limit = time_limit
@@ -85,6 +103,8 @@ def __init__(
         self.container_mounts = container_mounts
         self.modules = modules or []
         self.extra_sbatch = extra_sbatch or []
+        self.submit_via = submit_via
+        self.cli_prefix = cli_prefix
 
     def render(self, spec: ProfileJobSpec) -> str:
         job_name = spec.default_job_name()
@@ -111,6 +131,10 @@ def render(self, spec: ProfileJobSpec) -> str:
         lines.append("set -euo pipefail")
         lines.append("")
 
+        # Ensure output dir exists (needed for #SBATCH --output)
+        lines.append(f"mkdir -p {spec.output_dir}")
+        lines.append("")
+
         if self.modules:
             for mod in self.modules:
                 lines.append(f"module load {mod}")
@@ -151,6 +175,61 @@ def render(self, spec: ProfileJobSpec) -> str:
         return "\n".join(lines)
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
+        """Submit the job via REST API or CLI, depending on ``submit_via``."""
+        if self.submit_via == "cli":
+            return self._submit_cli(spec)
+        return self._submit_rest(spec)
+
+    # ------------------------------------------------------------------
+    # CLI helpers
+    # ------------------------------------------------------------------
+
+    def _cli_cmd(self, *args: str) -> list[str]:
+        """Build a command list, prepending ``cli_prefix`` if set."""
+        prefix = shlex.split(self.cli_prefix) if self.cli_prefix else []
+        return prefix + list(args)
+
+    def _cli_run(
+        self,
+        *args: str,
+        input_data: str | None = None,
+        timeout: int = 60,
+    ) -> subprocess.CompletedProcess:
+        """Run a Slurm CLI command and return the CompletedProcess."""
+        cmd = self._cli_cmd(*args)
+        return subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            input=input_data,
+            timeout=timeout,
+        )
+
+    def _submit_cli(self, spec: ProfileJobSpec) -> JobResult:
+        """Submit via ``sbatch`` (piping the script on stdin)."""
+        script = self.render(spec)
+        job_name = spec.default_job_name()
+
+        r = self._cli_run("sbatch", "--parsable", input_data=script, timeout=30)
+        if r.returncode != 0:
+            raise RuntimeError(
+                f"sbatch failed (exit {r.returncode}):\n{r.stderr}"
+            )
+
+        job_id = r.stdout.strip().split(";")[0]  # parsable: "jobid" or "jobid;cluster"
+        return JobResult(
+            job_id=job_id,
+            scheduler="slurm",
+            state="Submitted",
+            output_dir=spec.output_dir,
+            message=f"Submitted batch job {job_id}",
+        )
+
+    # ------------------------------------------------------------------
+    # REST submit
+    # ------------------------------------------------------------------
+
+    def _submit_rest(self, spec: ProfileJobSpec) -> JobResult:
         """Submit the job via slurmrestd REST API.
 
         Requires ``rest_url`` and ``jwt_token`` to be set.
@@ -264,6 +343,112 @@ def _rest_get(self, path: str) -> dict:
         return self._rest_request(path, method="GET")
 
     def cancel(self, job_id: str) -> str:
+        """Cancel a Slurm job."""
+        if self.submit_via == "cli":
+            return self._cancel_cli(job_id)
+        return self._cancel_rest(job_id)
+
+    def status(self, job_id: str) -> dict:
+        """Query Slurm job status."""
+        if self.submit_via == "cli":
+            return self._status_cli(job_id)
+        return self._status_rest(job_id)
+
+    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+        """Show Slurm job log information."""
+        if self.submit_via == "cli":
+            return self._logs_cli(job_id, tail=tail, follow=follow)
+        return self._logs_rest(job_id, tail=tail, follow=follow)
+
+    def list_jobs(self, *, status_filter: str = "") -> list[dict]:
+        """List Slurm jobs."""
+        if self.submit_via == "cli":
+            return self._list_jobs_cli(status_filter=status_filter)
+        return self._list_jobs_rest(status_filter=status_filter)
+
+    # ------------------------------------------------------------------
+    # CLI implementations
+    # ------------------------------------------------------------------
+
+    def _cancel_cli(self, job_id: str) -> str:
+        r = self._cli_run("scancel", job_id)
+        if r.returncode != 0:
+            raise RuntimeError(f"scancel failed: {r.stderr}")
+        return f"Cancelled Slurm job {job_id}"
+
+    def _status_cli(self, job_id: str) -> dict:
+        # Use scontrol show job — works for both running and completed jobs
+        # (completed jobs stay in memory for MinJobAge seconds, default 300s)
+        r = self._cli_run("scontrol", "show", "job", job_id)
+        if r.returncode != 0 or not r.stdout.strip():
+            return {"state": "Unknown", "message": f"No job found with ID {job_id}", "output_hint": ""}
+
+        # Parse key=value output
+        fields: dict[str, str] = {}
+        for token in r.stdout.replace("\n", " ").split():
+            if "=" in token:
+                k, _, v = token.partition("=")
+                fields[k] = v
+
+        state = fields.get("JobState", "UNKNOWN")
+        name = fields.get("JobName", "")
+        nodes = fields.get("NodeList", "")
+        output_file = fields.get("StdOut", "")
+
+        # Normalize to match test expectations
+        if state == "COMPLETED":
+            state = "Completed"
+        elif state == "FAILED":
+            state = "Failed"
+
+        msg_parts = [
+            f"Job ID: {job_id}  Name: {name}  State: {state}",
+            f"Nodes: {nodes}" if nodes else "Nodes: (not yet assigned)",
+        ]
+        if output_file:
+            msg_parts.append(f"Output log: {output_file}")
+
+        return {
+            "state": state,
+            "message": "\n".join(msg_parts),
+            "output_hint": output_file,
+        }
+
+    def _logs_cli(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+        info = self._status_cli(job_id)
+        return info["message"]
+
+    def _list_jobs_cli(self, *, status_filter: str = "") -> list[dict]:
+        r = self._cli_run(
+            "squeue", "-o", "%i|%j|%T|%P|%N", "-h",
+        )
+        if r.returncode != 0:
+            raise RuntimeError(f"squeue failed: {r.stderr}")
+        result: list[dict] = []
+        for line in r.stdout.strip().splitlines():
+            if not line.strip():
+                continue
+            parts = line.split("|", 4)
+            name = parts[1] if len(parts) > 1 else ""
+            if not name.startswith("flowsim-"):
+                continue
+            state = parts[2] if len(parts) > 2 else "UNKNOWN"
+            if status_filter and state.upper() != status_filter.upper():
+                continue
+            result.append({
+                "job_id": parts[0] if parts else "",
+                "name": name,
+                "state": state,
+                "partition": parts[3] if len(parts) > 3 else "",
+                "nodes": parts[4] if len(parts) > 4 else "",
+            })
+        return result
+
+    # ------------------------------------------------------------------
+    # REST implementations
+    # ------------------------------------------------------------------
+
+    def _cancel_rest(self, job_id: str) -> str:
         """Cancel a Slurm job via slurmrestd DELETE."""
         body = self._rest_request(
             f"/slurm/{self.api_version}/job/{job_id}",
@@ -275,7 +460,7 @@ def cancel(self, job_id: str) -> str:
             raise RuntimeError(f"slurmrestd cancel failed: {msgs}")
         return f"Cancelled Slurm job {job_id}"
 
-    def status(self, job_id: str) -> dict:
+    def _status_rest(self, job_id: str) -> dict:
         """Query Slurm job status via slurmrestd."""
         body = self._rest_get(f"/slurm/{self.api_version}/job/{job_id}")
 
@@ -312,9 +497,9 @@ def status(self, job_id: str) -> dict:
             "output_hint": output_file,
         }
 
-    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+    def _logs_rest(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """Show where Slurm job logs are and how to access them."""
-        info = self.status(job_id)
+        info = self._status_rest(job_id)
         output_file = info.get("output_hint", "")
         state = info.get("state", "UNKNOWN")
 
@@ -348,7 +533,7 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
 
         return "\n".join(parts)
 
-    def list_jobs(self, *, status_filter: str = "") -> list[dict]:
+    def _list_jobs_rest(self, *, status_filter: str = "") -> list[dict]:
         """List Slurm jobs via slurmrestd /jobs endpoint."""
         body = self._rest_get(f"/slurm/{self.api_version}/jobs")
         errors = body.get("errors") or []
diff --git a/schedulers/templates/k8s.yaml b/schedulers/templates/k8s.yaml
index bac3a77..2adb927 100644
--- a/schedulers/templates/k8s.yaml
+++ b/schedulers/templates/k8s.yaml
@@ -22,6 +22,7 @@ host_output_dir: ""         # hostPath alternative to PVC
 # Optional
 service_account: ""
 shm_size: "16Gi"
+runtime_class_name: ""      # e.g. "nvidia" for CDI-based GPU (Kind clusters)
 # node_selector:
 #   gpu: a100
 #   tier: high
diff --git a/scripts/cli.py b/scripts/cli.py
index b5d2bc3..135ed84 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -34,6 +34,8 @@ def _init_k8s_parser(sub: argparse._SubParsersAction) -> None:
                    help="Service account for the job pod")
     p.add_argument("--shm-size", default="16Gi",
                    help="Shared memory size (default: 16Gi)")
+    p.add_argument("--runtime-class-name", default="",
+                   help="RuntimeClass for pod (e.g. 'nvidia' for CDI mode)")
     p.add_argument("--force", action="store_true",
                    help="Overwrite existing config file")
 
@@ -100,6 +102,7 @@ def _cmd_init(argv: list[str]) -> int:
             "host_output_dir": args.host_output_dir,
             "service_account": args.service_account,
             "shm_size": args.shm_size,
+            "runtime_class_name": args.runtime_class_name,
         }
         dst = _CONFIG_DIR / "k8s.yaml"
 
diff --git a/scripts/status_profile.py b/scripts/status_profile.py
index 2f82ebc..4882d11 100644
--- a/scripts/status_profile.py
+++ b/scripts/status_profile.py
@@ -72,6 +72,14 @@ def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) ->
             "--k8s-context",
             default=_d("FLOWSIM_K8S_CONTEXT", k8s_cfg, "context", ""),
         )
+        p.add_argument(
+            "--k8s-pvc",
+            default=cfg_get(k8s_cfg, "pvc", ""),
+        )
+        p.add_argument(
+            "--k8s-host-output-dir",
+            default=cfg_get(k8s_cfg, "host_output_dir", ""),
+        )
 
     elif scheduler == "slurm":
         p.add_argument(
@@ -90,6 +98,15 @@ def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) ->
             "--slurm-no-verify-ssl",
             action="store_true",
         )
+        p.add_argument(
+            "--slurm-submit-via",
+            choices=["rest", "cli"],
+            default=cfg_get(slurm_cfg, "submit_via", "rest"),
+        )
+        p.add_argument(
+            "--slurm-cli-prefix",
+            default=cfg_get(slurm_cfg, "cli_prefix", ""),
+        )
 
 
 def _resolve_slurm_jwt(args: argparse.Namespace) -> None:
@@ -109,6 +126,8 @@ def _build_scheduler(args: argparse.Namespace):
             namespace=args.k8s_namespace,
             kubeconfig=args.k8s_kubeconfig,
             context=args.k8s_context,
+            pvc_name=getattr(args, "k8s_pvc", "") or "",
+            host_output_dir=getattr(args, "k8s_host_output_dir", "") or "",
         )
     else:
         return SlurmScheduler(
@@ -116,6 +135,8 @@ def _build_scheduler(args: argparse.Namespace):
             jwt_token=args.slurm_jwt_token,
             api_version=args.slurm_api_version,
             verify_ssl=not args.slurm_no_verify_ssl,
+            submit_via=args.slurm_submit_via,
+            cli_prefix=args.slurm_cli_prefix,
         )
 
 
@@ -138,6 +159,7 @@ def main_status(argv: list[str] | None = None) -> None:
     scheduler = _build_scheduler(args)
     try:
         info = scheduler.status(args.job)
+        print(f"State: {info['state']}")
         print(info["message"])
     except Exception as exc:
         print(f"Error: {exc}", file=sys.stderr)
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 18f7882..23089eb 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -216,6 +216,11 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
             "--k8s-shm-size",
             default=cfg_get(k8s_cfg, "shm_size", "16Gi"),
         )
+        k8s.add_argument(
+            "--k8s-runtime-class",
+            default=cfg_get(k8s_cfg, "runtime_class_name", ""),
+            help="RuntimeClass for pod (e.g. 'nvidia' for CDI mode)",
+        )
 
     elif pre.scheduler == "slurm":
         slurm = p.add_argument_group("slurm options (config: ~/.flowsim/slurm.yaml)")
@@ -281,6 +286,17 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
             metavar="DIRECTIVE",
             help="Extra #SBATCH directives (repeatable, without prefix)",
         )
+        slurm.add_argument(
+            "--slurm-submit-via",
+            choices=["rest", "cli"],
+            default=cfg_get(slurm_cfg, "submit_via", "rest"),
+            help="Submission mode: rest (slurmrestd) or cli (sbatch subprocess)",
+        )
+        slurm.add_argument(
+            "--slurm-cli-prefix",
+            default=cfg_get(slurm_cfg, "cli_prefix", ""),
+            help='Shell prefix for CLI mode (e.g. "docker exec -i slurmctld")',
+        )
 
     return p.parse_args(argv)
 
@@ -334,6 +350,7 @@ def _build_scheduler(args: argparse.Namespace):
             node_selector=node_sel,
             service_account=args.k8s_service_account,
             shm_size=args.k8s_shm_size,
+            runtime_class_name=args.k8s_runtime_class,
         )
     else:
         return SlurmScheduler(
@@ -349,23 +366,27 @@ def _build_scheduler(args: argparse.Namespace):
             container_mounts=args.slurm_container_mounts,
             modules=args.slurm_module,
             extra_sbatch=args.slurm_extra_sbatch,
+            submit_via=args.slurm_submit_via,
+            cli_prefix=args.slurm_cli_prefix,
         )
 
 
 def main(argv: list[str] | None = None) -> None:
     args = _parse_args(argv)
 
-    # Smart defaults for output_dir based on scheduler
+    # Smart defaults for output_dir based on scheduler.
+    # Layout: stage_traces/{scheduler}/{timestamp}/
+    import time as _time
+    _ts = _time.strftime("%Y%m%d_%H%M%S")
     if not args.output_dir:
         if args.scheduler == "local":
-            project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-            args.output_dir = os.path.join(project_root, "stage_traces")
+            args.output_dir = f"/flowsim/stage_traces/local/{_ts}"
         elif args.scheduler == "slurm":
-            # Slurm: default to ~/flowsim_traces (shared filesystem)
-            args.output_dir = os.path.expanduser("~/flowsim_traces")
+            args.output_dir = os.path.expanduser(
+                f"~/flowsim_traces/slurm/{_ts}"
+            )
         else:
-            # K8s: container path (PVC/hostPath mounted here)
-            args.output_dir = "/flowsim/stage_traces"
+            args.output_dir = f"/flowsim/stage_traces/k8s/{_ts}"
 
     # Resolve Slurm JWT token from jwt_token_cmd in config if needed
     if args.scheduler == "slurm" and not args.slurm_jwt_token:
@@ -378,6 +399,13 @@ def main(argv: list[str] | None = None) -> None:
     if not args.dry_run and args.scheduler not in ("local",):
         _validate_connection(args)
 
+    # For local scheduler, convert absolute host model_path to relative
+    # so it resolves correctly inside the container (workdir=/flowsim).
+    if args.scheduler == "local" and os.path.isabs(args.model_path):
+        project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        if args.model_path.startswith(project_root):
+            args.model_path = os.path.relpath(args.model_path, project_root)
+
     spec = _build_spec(args)
     scheduler = _build_scheduler(args)
 
@@ -459,6 +487,16 @@ def _validate_connection(args: argparse.Namespace) -> None:
                 file=sys.stderr,
             )
     elif args.scheduler == "slurm":
+        if args.slurm_submit_via == "cli":
+            # CLI mode only needs partition
+            if not args.slurm_partition:
+                sys.exit(
+                    "Error: missing required Slurm config:\n"
+                    "  - partition (--slurm-partition)\n\n"
+                    f"Set it in ~/.flowsim/slurm.yaml or via CLI flag.\n"
+                    + _INIT_HINT
+                )
+            return
         missing = []
         if not args.slurm_rest_url:
             missing.append("rest_url (--slurm-rest-url)")
diff --git a/simulator/base_parser.py b/simulator/base_parser.py
index ca9cadb..2b77967 100644
--- a/simulator/base_parser.py
+++ b/simulator/base_parser.py
@@ -319,12 +319,12 @@ def _parse_events(self) -> list[tuple]:
                 else:
                     # Case 2: If no ext_id, we need to find the shape from user annotations
                     # Key Identification Methodology: Annotation is overlapped with kernel
+                    dims_anno = "N/A"
+                    input_type_anno = "N/A"
+                    desc_anno = ""
                     for anno_idx, anno in enumerate(annotation_events):
                         if anno_idx in used_annotations:
                             continue
-                        dims_anno = "N/A"
-                        input_type_anno = "N/A"
-                        desc_anno = ""
                         if "ProfilerStep" in anno.get("name", ""):
                             continue
                         anno_start = anno.get("ts", 0)
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 56cbdde..62f52fb 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -2,38 +2,46 @@
 
 Tests all three scheduler backends (local, k8s, slurm) end-to-end.
 
-* **local** — runs real TP=1 profiling and verifies traces, parsed CSVs,
-  log files, JobResult return, cancel, list, logs --follow.
-* **k8s**   — submits a real Job to a Kind cluster, verifies JobResult,
-  status, logs, list, cancel, logs --follow.  Also validates
-  that dry-run YAML has the correct volume mounts and log paths.
-* **slurm** — dry-run only; verifies the sbatch script has the correct
-  ``output_dir``, ``--log-dir``, and ``#SBATCH --output`` directives.
+* **local** — submits jobs via ``flowsim submit --scheduler local`` which
+  launches Docker containers on the host.  Validates job lifecycle (submit,
+  list, status) and trace CSV correctness (GEMM dim0, FlashAttn seqlen).
+* **k8s**   — submits a real Job to a Kind cluster, retrieves traces via
+  ``docker cp``, and validates trace CSVs.  Auto-sets up the Kind cluster
+  via ``dev-setup.sh`` if not already running.
+* **slurm** — submits a real job to a local docker-compose Slurm cluster,
+  retrieves traces via ``docker cp``, and validates trace CSVs.  Auto-sets
+  up the Slurm cluster via ``dev-setup.sh slurm`` if not already running.
 
 Requirements
 ------------
-* The ``flowsim-test`` container with GPUs (for local tests).
-* A Kind cluster named ``flowsim`` (for K8s tests).
+* Docker with ``flowsim-image:latest`` built (for local tests).
+* A GPU-equipped host machine (local tests run on the physical host,
+  NOT inside a Docker container).
+* ``dockerfiles/dev-setup.sh`` available (Kind and Slurm clusters are
+  automatically created if missing).
 * ``schedulers/`` available on PYTHONPATH.
 
 Environment Variables
 ---------------------
 ``MODEL``
-    Model path (default: ``/flowsim/workload/models/configs/Qwen3-235B-A22B``).
+    Model path relative to project root
+    (default: ``workload/models/configs/Qwen3-235B-A22B``).
 ``LOAD_FORMAT``
     Load format (default: ``dummy``).
 
 Usage
 -----
-    # Inside container (local tests):
-    docker exec flowsim-test python -m pytest \
-        tests/integration/test_scheduler_local.py -v -x
+    # On host (local scheduler tests — needs Docker + GPU):
+    cd FlowSim && python -m pytest \
+        tests/integration/test_scheduler_local.py -v -x -k "local"
 
     # On host (k8s tests — needs kubeconfig):
     python -m pytest tests/integration/test_scheduler_local.py \
         -v -x -k "k8s"
 """
 
+import ast
+import csv
 import glob
 import json
 import os
@@ -49,14 +57,13 @@
 _PROJECT_ROOT = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..")
 )
+_DEV_SETUP = os.path.join(_PROJECT_ROOT, "dockerfiles", "dev-setup.sh")
+_DEV_TEARDOWN = os.path.join(_PROJECT_ROOT, "dockerfiles", "dev-teardown.sh")
 
 MODEL = os.environ.get(
-    "MODEL", "/flowsim/workload/models/configs/Qwen3-235B-A22B"
+    "MODEL", "workload/models/configs/Qwen3-235B-A22B"
 )
 LOAD_FORMAT = os.environ.get("LOAD_FORMAT", "dummy")
-ARTIFACT_DIR = os.environ.get(
-    "PYTEST_ARTIFACT_DIR", "/flowsim/tests/test-artifacts"
-)
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -122,424 +129,466 @@ def _assert_logs(output_dir: str) -> None:
     assert max(sizes) > 0, "All stdout logs are empty"
 
 
+# ---------------------------------------------------------------------------
+# Shape validation helpers (same logic as test_stage_profile_configs.py)
+# ---------------------------------------------------------------------------
+def _read_csv(path):
+    with open(path, newline="") as f:
+        return list(csv.DictReader(f))
+
+
+_GEMM_NAME_PATTERNS = ("nvjet", "cublasLt", "cublas_", "cutlass_gemm")
+
+
+def _first_matmul_dim0(rows):
+    """Return dim0 of the first GEMM kernel (the M dimension)."""
+    for row in rows:
+        if row.get("op", "") == "matmul":
+            dims = ast.literal_eval(row["Dims"])
+            return dims[0][0]
+    for row in rows:
+        name = row["Name"]
+        dims_str = row.get("Dims", "N/A")
+        if dims_str == "N/A" or not dims_str:
+            continue
+        if any(pat in name for pat in _GEMM_NAME_PATTERNS):
+            dims = ast.literal_eval(dims_str)
+            if len(dims) >= 2 and len(dims[0]) == 2 and len(dims[1]) == 2:
+                return dims[0][0]
+    return None
+
+
+def _attention_seqlen_pair(rows, bs, seq_len):
+    """Check that [bs, seq_len] (or +1) appears in FlashAttn dims."""
+    for row in rows:
+        name = row["Name"]
+        if "FlashAttn" not in name:
+            continue
+        if "Combine" in name or "prepare" in name:
+            continue
+        dims = ast.literal_eval(row["Dims"])
+        for d in dims:
+            if (
+                isinstance(d, list)
+                and len(d) == 2
+                and d[0] == bs
+                and d[1] in (seq_len, seq_len + 1)
+            ):
+                return d
+        return None
+    return None
+
+
+def _validate_shapes(output_dir, bs, input_len, existing_ctx):
+    """Validate GEMM dim0 and FlashAttn seqlen in merged/shape_parsed CSVs."""
+    tag = f"bs{bs}_input{input_len}_ctx{existing_ctx}"
+    for csv_subdir in ("merged", "shape_parsed"):
+        extend_csvs = sorted(
+            glob.glob(os.path.join(output_dir, tag, csv_subdir, "*TP-0*EXTEND*.csv"))
+        )
+        decode_csvs = sorted(
+            glob.glob(os.path.join(output_dir, tag, csv_subdir, "*TP-0*DECODE*.csv"))
+        )
+        if extend_csvs and decode_csvs:
+            break
+    else:
+        pytest.fail(
+            f"No EXTEND+DECODE CSVs for TP-0 in {output_dir}/{tag}/{{merged,shape_parsed}}/"
+        )
+
+    extend_rows = _read_csv(extend_csvs[0])
+    decode_rows = _read_csv(decode_csvs[0])
+
+    # EXTEND first GEMM dim0 == bs * input_len
+    ext_gemm_dim0 = _first_matmul_dim0(extend_rows)
+    assert ext_gemm_dim0 is not None, "No matmul kernel found in EXTEND CSV"
+    expected_ext = bs * input_len
+    assert ext_gemm_dim0 == expected_ext, (
+        f"EXTEND first GEMM dim0={ext_gemm_dim0}, expected bs*input_len={expected_ext}"
+    )
+
+    # EXTEND FlashAttn dims contain [bs, seq_len]
+    seq_len = input_len + existing_ctx
+    attn_pair = _attention_seqlen_pair(extend_rows, bs, seq_len)
+    assert attn_pair is not None, (
+        f"No FlashAttention dim matching [bs={bs}, seqlen={seq_len}(+1)] in EXTEND CSV"
+    )
+
+    # DECODE first GEMM dim0 == bs
+    dec_gemm_dim0 = _first_matmul_dim0(decode_rows)
+    assert dec_gemm_dim0 is not None, "No matmul kernel found in DECODE CSV"
+    assert dec_gemm_dim0 == bs, (
+        f"DECODE first GEMM dim0={dec_gemm_dim0}, expected bs={bs}"
+    )
+
+
 # =====================================================================
-# LOCAL SCHEDULER — real profiling
+# LOCAL SCHEDULER — real profiling (4-step flow)
 # =====================================================================
 class TestLocalScheduler:
-    """Run real profiling via ``flowsim submit --scheduler local``."""
+    """Run real profiling via ``flowsim`` CLI on the local Docker scheduler.
+
+    Flow per test point:
+    1. ``flowsim submit`` — submit the job (collect all)
+    2. ``flowsim list``   — verify the job appears
+    3. ``flowsim status`` — poll until Completed
+    4. Validate trace CSVs — GEMM dim0, FlashAttn seqlen for EXTEND & DECODE
+    """
+
+    _TP1_POINTS = [
+        {"bs": 1, "input_len": 2048, "existing_ctx": 0, "decode_tokens": 2},
+        {"bs": 1, "input_len": 2048, "existing_ctx": 2048, "decode_tokens": 2},
+    ]
 
-    @pytest.mark.skipif(
-        not os.path.isdir("/flowsim"),
-        reason="Local profiling tests must run inside the FlowSim Docker container",
+    @pytest.mark.parametrize(
+        "point",
+        _TP1_POINTS,
+        ids=[f"bs{p['bs']}_il{p['input_len']}_ctx{p['existing_ctx']}" for p in _TP1_POINTS],
     )
-    def test_local_perf_tp1(self):
-        """TP=1 perf profiling: traces + parsed CSVs + log files."""
-        output_dir = os.path.join(ARTIFACT_DIR, "sched_local_tp1")
+    def test_local_tp1_all(self, point):
+        bs = point["bs"]
+        input_len = point["input_len"]
+        existing_ctx = point["existing_ctx"]
+        decode_tokens = point["decode_tokens"]
 
+        # ── Step 1: submit ──
         r = _flowsim_cli(
             "submit",
             "--scheduler", "local",
-            "--collect", "perf",
+            "--collect", "all",
             "--model-path", MODEL,
             "--tp", "1",
-            "--bs", "1",
-            "--input-len", "512",
-            "--decode-tokens", "8",
+            "--bs", str(bs),
+            "--input-len", str(input_len),
+            "--existing-ctx", str(existing_ctx),
+            "--decode-tokens", str(decode_tokens),
             "--warmup-n", "2",
             "--gpus", "1",
             "--local-gpus", "0",
-            "--output-dir", output_dir,
             "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
         )
-
         if r.returncode != 0:
             print("STDOUT:", r.stdout[-3000:])
             print("STDERR:", r.stderr[-3000:])
         assert r.returncode == 0, f"flowsim submit failed (exit {r.returncode})"
 
-        # Verify traces and parsed CSVs
-        _assert_traces(output_dir)
-
-        # Verify log files under output_dir/logs/
-        _assert_logs(output_dir)
-
-        # Verify submit output mentions log/trace locations
+        # Extract job_id from output (line like "flowsim-all-... completed successfully")
         combined = r.stdout + r.stderr
-        assert "Traces:" in combined, "Submit output should show trace location"
-        assert "Logs:" in combined, "Submit output should show log location"
-
-    def test_local_status(self):
-        """flowsim status --scheduler local should find logs from the previous run."""
-        r = _flowsim_cli(
-            "status",
-            "--scheduler", "local",
-            "--job", "flowsim-perf",
+        job_id = None
+        for line in combined.splitlines():
+            if "flowsim-all-" in line:
+                for word in line.split():
+                    if word.startswith("flowsim-all-"):
+                        job_id = word.rstrip(".,;:")
+                        break
+                if job_id:
+                    break
+        assert job_id, f"Could not find job_id in submit output:\n{combined[-1000:]}"
+
+        # ── Step 2: list — verify job appears ──
+        r_list = _flowsim_cli("list", "--scheduler", "local")
+        assert r_list.returncode == 0, "flowsim list failed"
+        assert job_id in r_list.stdout, (
+            f"Job {job_id} not found in list output:\n{r_list.stdout}"
         )
-        # Should either find logs or say not found — should not crash
-        assert r.returncode == 0
 
-    def test_local_logs(self):
-        """flowsim logs --scheduler local should list log files and give paths."""
-        r = _flowsim_cli(
-            "logs",
-            "--scheduler", "local",
-            "--job", "flowsim-perf",
+        # ── Step 3: status — should be Completed (submit is synchronous) ──
+        r_status = _flowsim_cli("status", "--scheduler", "local", "--job", job_id)
+        assert r_status.returncode == 0, "flowsim status failed"
+        status_out = r_status.stdout.lower()
+        assert "completed" in status_out, (
+            f"Job {job_id} not completed:\n{r_status.stdout}"
         )
-        assert r.returncode == 0
-        output = r.stdout
-        # Should contain file listing or "No logs" — not crash
-        assert "Log directory:" in output or "No logs" in output
 
-    def test_local_logs_follow(self):
-        """flowsim logs --follow should show tail -f command."""
-        r = _flowsim_cli(
-            "logs",
-            "--scheduler", "local",
-            "--job", "flowsim-perf",
-            "--follow",
+        # ── Step 4: validate trace CSVs ──
+        # Extract output_dir from status output (Traces dir: ...)
+        output_dir = None
+        for line in r_status.stdout.splitlines():
+            if "Traces dir:" in line:
+                output_dir = line.split("Traces dir:", 1)[1].strip()
+                break
+        assert output_dir and os.path.isdir(output_dir), (
+            f"Could not find traces dir in status output:\n{r_status.stdout}"
         )
-        assert r.returncode == 0
-        output = r.stdout
-        assert "tail -f" in output or "No logs" in output
+        _assert_traces(output_dir)
+        _assert_logs(output_dir)
+        _validate_shapes(output_dir, bs=bs, input_len=input_len, existing_ctx=existing_ctx)
 
-    def test_local_cancel(self):
-        """flowsim cancel --scheduler local should attempt docker stop."""
-        r = _flowsim_cli(
-            "cancel",
-            "--scheduler", "local",
-            "--job", "flowsim-perf",
-        )
-        assert r.returncode == 0
-        out = r.stdout.lower()
-        assert "stop" in out or "container" in out
 
-    def test_local_list(self):
-        """flowsim list --scheduler local should list jobs from log files."""
-        r = _flowsim_cli(
-            "list",
-            "--scheduler", "local",
+# =====================================================================
+# Cluster setup helpers & fixtures
+# =====================================================================
+
+def _run_dev_setup(target: str) -> None:
+    """Run ``dockerfiles/dev-setup.sh <target>`` and assert success."""
+    r = subprocess.run(
+        ["bash", _DEV_SETUP, target],
+        capture_output=True, text=True, cwd=_PROJECT_ROOT, timeout=300,
+    )
+    if r.returncode != 0:
+        raise RuntimeError(
+            f"dev-setup.sh {target} failed (exit {r.returncode}):\n"
+            f"stdout: {r.stdout[-2000:]}\nstderr: {r.stderr[-2000:]}"
         )
-        assert r.returncode == 0
-        output = r.stdout
-        # Should either show jobs or "No jobs found"
-        assert "JOB_ID" in output or "No jobs found" in output
 
-    def test_local_list_status_filter(self):
-        """flowsim list --status Completed should filter."""
-        r = _flowsim_cli(
-            "list",
-            "--scheduler", "local",
-            "--status", "Completed",
+
+def _run_dev_teardown(target: str) -> None:
+    """Run ``dockerfiles/dev-teardown.sh <target>``."""
+    subprocess.run(
+        ["bash", _DEV_TEARDOWN, target],
+        capture_output=True, text=True, cwd=_PROJECT_ROOT, timeout=120,
+    )
+
+
+def _kind_cluster_running() -> bool:
+    """Check if the Kind cluster named 'flowsim' is reachable."""
+    try:
+        r = subprocess.run(
+            ["kubectl", "--context", "kind-flowsim", "get", "nodes"],
+            capture_output=True, text=True, timeout=15,
         )
-        assert r.returncode == 0
+        return r.returncode == 0 and "Ready" in r.stdout
+    except Exception:
+        return False
 
 
-# =====================================================================
-# LOCAL SCHEDULER — unit-level tests for JobResult and list_jobs
-# =====================================================================
-class TestLocalSchedulerAPI:
-    """Test LocalScheduler API directly (no subprocess, no GPU)."""
+@pytest.fixture(scope="session")
+def kind_cluster():
+    """Ensure Kind cluster is running; auto-setup if needed.
 
-    def test_submit_returns_job_result(self):
-        """LocalScheduler.submit() must return a JobResult, not a string."""
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmpdir:
-            sched = LocalScheduler(workdir=tmpdir)
-            spec = ProfileJobSpec(
-                collect="perf",
-                model_path="Qwen/Qwen3-8B",
-                output_dir=os.path.join(tmpdir, "traces"),
-            )
-            # Monkey-patch: make build_shell_command return a trivial command
-            spec.build_shell_command = lambda: "echo hello"
-            result = sched.submit(spec)
-            assert isinstance(result, JobResult), f"Expected JobResult, got {type(result)}"
-            assert result.scheduler == "local"
-            assert result.state == "Completed"
-            assert result.job_id != ""
-            assert result.output_dir == spec.output_dir
-
-    def test_submit_failed_returns_failed_state(self):
-        """A failing command should return JobResult with state=Failed."""
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmpdir:
-            sched = LocalScheduler(workdir=tmpdir)
-            spec = ProfileJobSpec(
-                collect="perf",
-                model_path="Qwen/Qwen3-8B",
-                output_dir=os.path.join(tmpdir, "traces"),
-            )
-            spec.build_shell_command = lambda: "exit 1"
-            result = sched.submit(spec)
-            assert isinstance(result, JobResult)
-            assert result.state == "Failed"
+    The cluster is kept alive after the test session to avoid
+    re-loading the 34 GB image every time.  Use ``dev-teardown.sh kind``
+    to clean up manually.
+    """
+    if not _kind_cluster_running():
+        _run_dev_setup("kind")
+    assert _kind_cluster_running(), "Kind cluster not reachable after setup"
+    yield
 
-    def test_list_jobs_finds_log_files(self):
-        """list_jobs() should find jobs from log file names."""
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmpdir:
-            log_dir = os.path.join(tmpdir, "stage_traces", "logs")
-            os.makedirs(log_dir)
-            # Create fake log files
-            for name in [
-                "flowsim-perf-qwen3-8b-bs1-il512_1700000001.stdout.log",
-                "flowsim-perf-qwen3-8b-bs1-il512_1700000001.stderr.log",
-                "flowsim-perf-qwen3-8b-bs1-il1024_1700000002.stdout.log",
-                "flowsim-perf-qwen3-8b-bs1-il1024_1700000002.stderr.log",
-            ]:
-                open(os.path.join(log_dir, name), "w").close()
-
-            sched = LocalScheduler(workdir=tmpdir)
-            jobs = sched.list_jobs()
-            assert len(jobs) == 2
-            assert all("job_id" in j and "state" in j for j in jobs)
-
-    def test_list_jobs_status_filter(self):
-        """list_jobs(status_filter=...) should filter results."""
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmpdir:
-            log_dir = os.path.join(tmpdir, "stage_traces", "logs")
-            os.makedirs(log_dir)
-            open(os.path.join(log_dir, "flowsim-perf-x_100.stdout.log"), "w").close()
-            open(os.path.join(log_dir, "flowsim-perf-x_100.stderr.log"), "w").close()
-
-            sched = LocalScheduler(workdir=tmpdir)
-            assert len(sched.list_jobs(status_filter="Completed")) == 1
-            assert len(sched.list_jobs(status_filter="Running")) == 0
-
-    def test_logs_follow_shows_tail_f(self):
-        """logs(follow=True) should return a tail -f command."""
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmpdir:
-            log_dir = os.path.join(tmpdir, "stage_traces", "logs")
-            os.makedirs(log_dir)
-            open(os.path.join(log_dir, "flowsim-perf-x_100.stdout.log"), "w").close()
-
-            sched = LocalScheduler(workdir=tmpdir)
-            text = sched.logs("flowsim-perf-x", follow=True)
-            assert "tail -f" in text
-
-    def test_cancel_returns_message(self):
-        """cancel() should attempt docker stop and return a message."""
-        sched = LocalScheduler()
-        msg = sched.cancel("some-job")
-        assert "stop" in msg.lower() or "container" in msg.lower()
-
-    def test_submit_pd_pair_returns_list(self):
-        """submit_pd_pair() must return list[JobResult]."""
-        import tempfile
-        with tempfile.TemporaryDirectory() as tmpdir:
-            sched = LocalScheduler(workdir=tmpdir)
-            spec = ProfileJobSpec(
-                collect="perf",
-                model_path="Qwen/Qwen3-8B",
-                output_dir=os.path.join(tmpdir, "traces"),
-            )
-            # Monkey-patch to avoid real profiling
-            spec.build_shell_command = lambda: "echo hello"
-            results = sched.submit_pd_pair(spec)
-            assert isinstance(results, list)
-            assert len(results) == 2
-            assert all(isinstance(r, JobResult) for r in results)
-            # One should be prefill, one decode
-            modes = {r.job_id for r in results}
-            assert any("prefill" in m for m in modes)
-            assert any("decode" in m for m in modes)
+
+@pytest.fixture(scope="session")
+def slurm_cluster():
+    """Ensure Slurm cluster is running; auto-setup if needed.
+
+    Cluster is kept alive after tests.  Use ``dev-teardown.sh slurm``
+    to clean up manually.
+    """
+    if not _slurm_cluster_running():
+        _run_dev_setup("slurm")
+    assert _slurm_cluster_running(), "Slurm cluster not reachable after setup"
+    yield
 
 
 # =====================================================================
 # K8S SCHEDULER
 # =====================================================================
 class TestK8sScheduler:
-    """K8s scheduler: dry-run validates YAML structure, real submit to Kind."""
+    """K8s scheduler: real submit to Kind cluster.
 
-    def test_k8s_dry_run_has_volume_and_log_path(self):
-        """Dry-run YAML should mount output volume and pass --log-dir."""
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "k8s",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--k8s-namespace", "default",
-            "--k8s-pvc", "test-traces",
-            "--output-dir", "/data/traces",
-            "--dry-run",
-        )
-        assert r.returncode == 0
-        yaml_output = r.stdout
-
-        # Job structure
-        assert "apiVersion: batch/v1" in yaml_output
-        assert "kind: Job" in yaml_output
+    Automatically sets up the Kind cluster via ``dev-setup.sh`` if not
+    already running.
+    """
 
-        # PVC volume mount
-        assert "test-traces" in yaml_output
-        assert "persistentVolumeClaim" in yaml_output
+    def test_k8s_real_submit_to_kind(self, kind_cluster):
+        """Submit a real Job to Kind cluster: submit → list → status → retrieve → validate."""
+        import shutil
+        import tempfile
 
-        # output_dir and derived log_dir appear in the command
-        assert "--output-dir" in yaml_output
-        assert "/data/traces" in yaml_output
-        assert "--log-dir" in yaml_output
-        assert "/data/traces/logs" in yaml_output
+        job_name = f"test-integ-{int(time.time()) % 100000}"
+        local_traces = tempfile.mkdtemp(prefix="flowsim-k8s-traces-")
+
+        try:
+            # ── Step 0: clean stale test traces on host ──
+            host_traces = os.path.join(_PROJECT_ROOT, "stage_traces")
+            os.makedirs(host_traces, exist_ok=True)
+
+            # ── Step 1: submit (host mount for trace retrieval) ──
+            r = _flowsim_cli(
+                "submit",
+                "--scheduler", "k8s",
+                "--collect", "all",
+                "--model-path", MODEL,
+                "--tp", "1",
+                "--bs", "1",
+                "--input-len", "2048",
+                "--existing-ctx", "0",
+                "--decode-tokens", "2",
+                "--warmup-n", "2",
+                "--gpus", "1",
+                "--k8s-namespace", "default",
+                "--k8s-host-output-dir", "/host-stage-traces",
+                "--job-name", job_name,
+                "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+            )
+            combined = r.stdout + r.stderr
+            if r.returncode != 0:
+                print("Submit output:", combined[-3000:])
+            assert r.returncode == 0, f"K8s submit failed: {combined[-1000:]}"
+
+            # ── Step 2: list — verify job appears ──
+            r_list = _flowsim_cli("list", "--scheduler", "k8s")
+            assert r_list.returncode == 0
+            assert job_name in r_list.stdout, (
+                f"Job {job_name} not in list:\n{r_list.stdout}"
+            )
 
-    def test_k8s_dry_run_hostpath(self):
-        """Dry-run with hostPath should have hostPath volume."""
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "k8s",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--k8s-namespace", "default",
-            "--k8s-host-output-dir", "/mnt/traces",
-            "--dry-run",
-        )
-        assert r.returncode == 0
-        assert "hostPath" in r.stdout
-        assert "/mnt/traces" in r.stdout
+            # ── Step 3: status — poll until Completed/Succeeded (max 20 min) ──
+            deadline = time.time() + 1200
+            state = ""
+            while time.time() < deadline:
+                r_status = _flowsim_cli("status", "--scheduler", "k8s", "--job", job_name)
+                assert r_status.returncode == 0
+                state = r_status.stdout.lower()
+                if "completed" in state or "succeeded" in state:
+                    break
+                if "failed" in state:
+                    pytest.fail(f"K8s job failed:\n{r_status.stdout}")
+                time.sleep(15)
+            assert "completed" in state or "succeeded" in state, (
+                f"K8s job did not complete in time:\n{r_status.stdout}"
+            )
 
-    def test_k8s_refuses_without_storage(self):
-        """Submit (not dry-run) without PVC or hostPath should fail."""
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "k8s",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--k8s-namespace", "default",
-            # Explicitly clear any config defaults
-            "--k8s-pvc", "",
-            "--k8s-host-output-dir", "",
-        )
-        assert r.returncode != 0
-        combined = r.stdout + r.stderr
-        assert "persistent storage" in combined or "pvc" in combined.lower()
+            # ── Step 4: traces are on host via Kind mount ──
+            # output_dir inside container: /flowsim/stage_traces/k8s/{ts}
+            # host_output_dir on worker: /host-stage-traces
+            # → host: {project}/stage_traces/k8s/{ts}/
+            k8s_traces = os.path.join(host_traces, "k8s")
+            assert os.path.isdir(k8s_traces), (
+                f"No k8s traces dir at {k8s_traces}"
+            )
+            # Find the latest timestamped subdir
+            ts_dirs = sorted(os.listdir(k8s_traces))
+            assert ts_dirs, f"No timestamp dirs in {k8s_traces}"
+            local_traces = os.path.join(k8s_traces, ts_dirs[-1])
 
-    @pytest.mark.skipif(
-        not os.path.exists(os.path.expanduser("~/.kube/config")),
-        reason="No kubeconfig — skip K8s real submit (run on host with Kind cluster)",
-    )
-    def test_k8s_real_submit_to_kind(self):
-        """Submit a real Job to Kind cluster, verify status + logs commands work."""
-        job_name = f"test-integ-{int(time.time()) % 100000}"
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "k8s",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--k8s-namespace", "default",
-            "--k8s-host-output-dir", "/tmp/flowsim-test-traces",
-            "--job-name", job_name,
-        )
-        combined = r.stdout + r.stderr
+            # ── Step 5: validate trace CSVs ──
+            _assert_traces(local_traces)
+            _assert_logs(local_traces)
+            _validate_shapes(local_traces, bs=1, input_len=2048, existing_ctx=0)
 
-        if r.returncode != 0:
-            print("Submit output:", combined[-3000:])
-        assert r.returncode == 0, f"K8s submit failed: {combined[-1000:]}"
-        assert "created" in combined.lower()
-
-        # Verify submit output has location hints
-        assert "Traces:" in combined
-        assert "Logs:" in combined
-        assert "flowsim status" in combined
-        assert "flowsim logs" in combined
-
-        # Check status
-        r2 = _flowsim_cli("status", "--scheduler", "k8s", "--job", job_name)
-        assert r2.returncode == 0
-        assert job_name in r2.stdout
-
-        # Check logs (may say "pending" or show pod info)
-        r3 = _flowsim_cli("logs", "--scheduler", "k8s", "--job", job_name)
-        assert r3.returncode == 0
-        # Should mention kubectl or pod name or "No pods"
-        assert "kubectl" in r3.stdout or "No pods" in r3.stdout or "Pod:" in r3.stdout
-
-        # Check logs --follow
-        r3f = _flowsim_cli("logs", "--scheduler", "k8s", "--job", job_name, "--follow")
-        assert r3f.returncode == 0
-        assert "kubectl logs -f" in r3f.stdout
-
-        # Check list
-        r4 = _flowsim_cli("list", "--scheduler", "k8s")
-        assert r4.returncode == 0
-        # Our job should appear in the listing
-        assert job_name in r4.stdout or "JOB_ID" in r4.stdout
-
-        # Cancel via flowsim cancel
-        r5 = _flowsim_cli("cancel", "--scheduler", "k8s", "--job", job_name)
-        assert r5.returncode == 0
-        assert "deleted" in r5.stdout.lower()
+        finally:
+            # Cleanup: cancel job (traces stay on host for inspection)
+            _flowsim_cli("cancel", "--scheduler", "k8s", "--job", job_name)
 
 
 # =====================================================================
-# SLURM SCHEDULER — dry-run only (no real cluster)
+# SLURM SCHEDULER
 # =====================================================================
-class TestSlurmScheduler:
-    """Slurm scheduler: verify sbatch script has correct paths."""
 
-    def test_slurm_dry_run_output_and_log_paths(self):
-        """Dry-run sbatch script should reference output_dir and log_dir."""
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "slurm",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--slurm-partition", "gpu",
-            "--slurm-rest-url", "http://fake:6820",
-            "--slurm-jwt-token", "fake-token",
-            "--output-dir", "/shared/flowsim_traces",
-            "--dry-run",
+def _slurm_cluster_running() -> bool:
+    """Check if local Slurm test cluster (docker compose) is running."""
+    try:
+        r = subprocess.run(
+            ["docker", "exec", "slurmctld", "sinfo", "-h"],
+            capture_output=True, text=True, timeout=10,
         )
-        assert r.returncode == 0
-        script = r.stdout
+        return r.returncode == 0 and r.stdout.strip() != ""
+    except Exception:
+        return False
 
-        # sbatch directives
-        assert "#!/bin/bash" in script
-        assert "#SBATCH --job-name=" in script
-        assert "#SBATCH --partition=gpu" in script
 
-        # output_dir in the profiling command
-        assert "--output-dir" in script
-        assert "/shared/flowsim_traces" in script
+# CLI prefix for running Slurm commands inside the slurmctld container.
+# Uses -i so sbatch can read scripts from stdin.
+_SLURM_CLI_PREFIX = "docker exec -i slurmctld"
 
-        # log_dir = output_dir + /logs/
-        assert "--log-dir" in script
-        assert "/shared/flowsim_traces/logs" in script
 
-    def test_slurm_dry_run_default_output_dir(self):
-        """Default output_dir for Slurm should be ~/flowsim_traces."""
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "slurm",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--slurm-partition", "gpu",
-            "--slurm-rest-url", "http://fake:6820",
-            "--slurm-jwt-token", "fake-token",
-            "--dry-run",
-        )
-        assert r.returncode == 0
-        assert "flowsim_traces" in r.stdout
+class TestSlurmScheduler:
+    """Slurm scheduler: real submit to local docker-compose cluster.
+
+    Uses CLI mode (sbatch/squeue/scancel) — no slurmrestd needed.
+    Automatically sets up the Slurm cluster via ``dev-setup.sh slurm``
+    if not already running.
+    """
+
+    def test_slurm_real_submit(self, slurm_cluster):
+        """Submit to local Slurm cluster: submit → list → status → retrieve → validate."""
+
+        # Compute node has /flowsim/stage_traces mounted writable to host.
+        # output_dir inside the container maps directly to the host.
+        host_traces = os.path.join(_PROJECT_ROOT, "stage_traces")
+        os.makedirs(host_traces, exist_ok=True)
+        ts = int(time.time())
+        output_dir = f"/flowsim/stage_traces/slurm/test_{ts}"
+
+        job_id = None
+        try:
+            # ── Step 1: submit (CLI mode, container_runtime=none) ──
+            r = _flowsim_cli(
+                "submit",
+                "--scheduler", "slurm",
+                "--collect", "all",
+                "--model-path", MODEL,
+                "--tp", "1",
+                "--bs", "1",
+                "--input-len", "2048",
+                "--existing-ctx", "0",
+                "--decode-tokens", "2",
+                "--warmup-n", "2",
+                "--gpus", "1",
+                "--slurm-partition", "normal",
+                "--slurm-submit-via", "cli",
+                "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
+                "--slurm-container-runtime", "none",
+                "--output-dir", output_dir,
+                "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+            )
+            combined = r.stdout + r.stderr
+            if r.returncode != 0:
+                print("Submit output:", combined[-3000:])
+            assert r.returncode == 0, f"Slurm submit failed: {combined[-1000:]}"
+
+            # Extract job_id from output (line like "Submitted batch job 123")
+            for line in combined.splitlines():
+                if "submitted" in line.lower():
+                    for word in line.split():
+                        if word.isdigit():
+                            job_id = word
+                            break
+                if job_id:
+                    break
+            assert job_id, f"Could not find job_id in submit output:\n{combined[-1000:]}"
+
+            # ── Step 2: status — poll until Completed (max 20 min) ──
+            deadline = time.time() + 1200
+            state = ""
+            while time.time() < deadline:
+                r_status = _flowsim_cli(
+                    "status", "--scheduler", "slurm",
+                    "--job", job_id,
+                    "--slurm-submit-via", "cli",
+                    "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
+                )
+                assert r_status.returncode == 0
+                state = r_status.stdout.lower()
+                if "completed" in state or "succeeded" in state:
+                    break
+                if "failed" in state:
+                    pytest.fail(f"Slurm job failed:\n{r_status.stdout}")
+                time.sleep(15)
+            assert "completed" in state or "succeeded" in state, (
+                f"Slurm job did not complete in time:\n{r_status.stdout}"
+            )
 
-    def test_slurm_dry_run_pd_pair(self):
-        """PD disaggregation dry-run should produce both scripts with correct paths."""
-        r = _flowsim_cli(
-            "submit",
-            "--scheduler", "slurm",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--slurm-partition", "gpu",
-            "--slurm-rest-url", "http://fake:6820",
-            "--slurm-jwt-token", "fake-token",
-            "--output-dir", "/shared/traces",
-            "--pd",
-            "--dry-run",
-        )
-        assert r.returncode == 0
-        output = r.stdout
-        assert "PREFILL INSTANCE" in output
-        assert "DECODE INSTANCE" in output
-        assert "--disaggregation-mode prefill" in output
-        assert "--disaggregation-mode decode" in output
-        # Both scripts should reference the same output_dir
-        assert output.count("--output-dir") >= 2
-        assert output.count("/shared/traces/logs") >= 2
+            # ── Step 3: traces are on host via mount ──
+            slurm_traces = os.path.join(host_traces, "slurm")
+            assert os.path.isdir(slurm_traces), (
+                f"No slurm traces dir at {slurm_traces}"
+            )
+            ts_dirs = sorted(os.listdir(slurm_traces))
+            assert ts_dirs, f"No test dirs in {slurm_traces}"
+            local_traces = os.path.join(slurm_traces, ts_dirs[-1])
+
+            # ── Step 4: validate trace CSVs ──
+            _assert_traces(local_traces)
+            _assert_logs(local_traces)
+            _validate_shapes(local_traces, bs=1, input_len=2048, existing_ctx=0)
+
+        finally:
+            # Cleanup: cancel job (traces stay on host for inspection)
+            if job_id:
+                _flowsim_cli(
+                    "cancel", "--scheduler", "slurm",
+                    "--job", job_id,
+                    "--slurm-submit-via", "cli",
+                    "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
+                )
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 1d50f64..2bb0dec 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -281,6 +281,11 @@ def test_time_parse_minutes(self):
 class TestLocalScheduler:
     """Tests for local execution backend."""
 
+    @pytest.fixture(autouse=True)
+    def _skip_image_check(self):
+        with mock.patch.object(LocalScheduler, "_check_image_exists"):
+            yield
+
     @pytest.fixture()
     def spec(self) -> ProfileJobSpec:
         return ProfileJobSpec(
@@ -447,6 +452,11 @@ def test_init_force_overwrite(self, tmp_path: Path):
 class TestCLISubmit:
     """Tests for `flowsim submit` argument parsing and dry-run."""
 
+    @pytest.fixture(autouse=True)
+    def _skip_image_check(self):
+        with mock.patch.object(LocalScheduler, "_check_image_exists"):
+            yield
+
     def _run(self, *args: str, expect_ok: bool = True) -> str:
         """Run submit via the Python function, capture stdout."""
         from scripts.submit_profile import main as submit_main

From 3edd5f4261c5b176e512ac8daba603b1e2448916 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 20:50:50 +0000
Subject: [PATCH 24/56] slurm: use YYYYMMDD_HHMMSS timestamp for output dirs
 (consistent with local/k8s)

---
 scripts/submit_profile.py                 | 4 +---
 tests/integration/test_scheduler_local.py | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 23089eb..57c730c 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -382,9 +382,7 @@ def main(argv: list[str] | None = None) -> None:
         if args.scheduler == "local":
             args.output_dir = f"/flowsim/stage_traces/local/{_ts}"
         elif args.scheduler == "slurm":
-            args.output_dir = os.path.expanduser(
-                f"~/flowsim_traces/slurm/{_ts}"
-            )
+            args.output_dir = f"/flowsim/stage_traces/slurm/{_ts}"
         else:
             args.output_dir = f"/flowsim/stage_traces/k8s/{_ts}"
 
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 62f52fb..2ff2e67 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -507,8 +507,8 @@ def test_slurm_real_submit(self, slurm_cluster):
         # output_dir inside the container maps directly to the host.
         host_traces = os.path.join(_PROJECT_ROOT, "stage_traces")
         os.makedirs(host_traces, exist_ok=True)
-        ts = int(time.time())
-        output_dir = f"/flowsim/stage_traces/slurm/test_{ts}"
+        ts = time.strftime("%Y%m%d_%H%M%S")
+        output_dir = f"/flowsim/stage_traces/slurm/{ts}"
 
         job_id = None
         try:

From 9bc2d94f4d11b891ec889967d605d516fe9985ae Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 20:56:38 +0000
Subject: [PATCH 25/56] docs: add scheduler README with CLI usage and
 architecture overview

---
 schedulers/README.md | 385 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 385 insertions(+)
 create mode 100644 schedulers/README.md

diff --git a/schedulers/README.md b/schedulers/README.md
new file mode 100644
index 0000000..cf14f3c
--- /dev/null
+++ b/schedulers/README.md
@@ -0,0 +1,385 @@
+# FlowSim Schedulers
+
+FlowSim 支持三种调度器后端，用于提交 GPU profiling 任务：
+
+| 后端 | 适用场景 | 运行位置 | 依赖 |
+|------|----------|----------|------|
+| **local** | 单机开发/测试 | 宿主机 Docker 容器 | Docker + NVIDIA GPU |
+| **k8s** | Kubernetes 集群 | K8s Job Pod | `kubernetes` Python 包 |
+| **slurm** | HPC 集群 | Slurm 计算节点 | Slurm CLI 或 slurmrestd |
+
+## 快速上手
+
+```bash
+# 安装（从 FlowSim 项目根目录）
+cd FlowSim
+pip install -e .  # 或确保 PYTHONPATH 包含项目根目录
+
+# 查看帮助
+flowsim --help
+flowsim submit --help
+```
+
+## 通用工作流
+
+所有调度器共享相同的 CLI 接口：
+
+```bash
+# 1. 提交任务
+flowsim submit --scheduler <local|k8s|slurm> --collect <perf|shapes|all> \
+    --model-path <model> [选项...]
+
+# 2. 查看任务列表
+flowsim list --scheduler <local|k8s|slurm>
+
+# 3. 查看任务状态
+flowsim status --scheduler <local|k8s|slurm> --job <job_id>
+
+# 4. 查看日志
+flowsim logs --scheduler <local|k8s|slurm> --job <job_id>
+
+# 5. 取消任务
+flowsim cancel --scheduler <local|k8s|slurm> --job <job_id>
+
+# 6. Dry-run（仅打印脚本/manifest，不提交）
+flowsim submit --scheduler <local|k8s|slurm> ... --dry-run
+```
+
+### 通用参数
+
+| 参数 | 说明 | 默认值 |
+|------|------|--------|
+| `--collect` | 收集模式：`perf`(性能) / `shapes`(形状) / `all`(两者) | 必填 |
+| `--model-path` | HuggingFace 模型路径 | 必填 |
+| `--tp` | Tensor parallelism | `1` |
+| `--dp` | Data parallelism | `1` |
+| `--bs` | Batch size | `1` |
+| `--input-len` | 输入序列长度 | `2048` |
+| `--existing-ctx` | 已有 KV cache 长度 | `0` |
+| `--decode-tokens` | Decode 生成 token 数 | `32` |
+| `--warmup-n` | Warmup 迭代数 | `5` |
+| `--image` | Docker 镜像 | `flowsim-image:latest` |
+| `--gpus` | GPU 数量 | `1` |
+| `--output-dir` | 输出目录（自动生成如不指定） | `stage_traces/{scheduler}/{timestamp}/` |
+| `--dry-run` | 仅打印脚本，不提交 | `false` |
+
+---
+
+## 1. Local 调度器
+
+直接在宿主机上通过 `docker run` 启动容器运行 profiling。最简单的方式，适合单机开发和测试。
+
+### 使用
+
+```bash
+# 最简单的用法 — 使用 GPU 0 运行
+flowsim submit --scheduler local \
+    --collect all \
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 1 --input-len 2048 \
+    --gpus 1 --local-gpus 0 \
+    --extra-server-opts "--load-format dummy"
+
+# 多 GPU
+flowsim submit --scheduler local \
+    --collect perf \
+    --model-path Qwen/Qwen3-8B \
+    --tp 2 --gpus 2 --local-gpus 0,1
+```
+
+### 专有参数
+
+| 参数 | 说明 | 默认值 |
+|------|------|--------|
+| `--local-gpus` | `CUDA_VISIBLE_DEVICES`（如 `0` 或 `0,1`） | 空（使用所有 GPU） |
+| `--local-workdir` | 主机工作目录 | FlowSim 项目根目录 |
+
+### 工作原理
+
+1. `render()` 生成一条 `docker run --gpus` 命令
+2. `submit()` 在宿主机执行该容器，同步等待完成
+3. Traces 写入宿主机 `stage_traces/local/{YYYYMMDD_HHMMSS}/`
+4. `status()` / `logs()` / `list_jobs()` 扫描日志文件
+
+---
+
+## 2. Kubernetes 调度器
+
+将 profiling 任务作为 Kubernetes Job 提交到集群。支持 PVC 和 hostPath 两种存储方式。
+
+### 首次配置
+
+```bash
+flowsim init k8s \
+    --kubeconfig ~/.kube/config \
+    --namespace default \
+    --host-output-dir /host-stage-traces \
+    --runtime-class-name nvidia \
+    --force
+```
+
+配置保存到 `~/.flowsim/k8s.yaml`，后续提交时自动读取。
+
+### 使用
+
+```bash
+# 提交到 K8s 集群
+flowsim submit --scheduler k8s \
+    --collect all \
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --extra-server-opts "--load-format dummy"
+
+# 覆盖配置文件中的值
+flowsim submit --scheduler k8s \
+    --collect perf \
+    --model-path Qwen/Qwen3-8B \
+    --k8s-namespace ml-team \
+    --k8s-pvc my-traces-pvc \
+    --gpus 4 --tp 4
+
+# Dry-run 查看生成的 YAML
+flowsim submit --scheduler k8s ... --dry-run
+```
+
+### 专有参数
+
+| 参数 | 说明 | 默认值 |
+|------|------|--------|
+| `--k8s-namespace` | K8s 命名空间 | `default` |
+| `--k8s-kubeconfig` | kubeconfig 路径 | `~/.kube/config` |
+| `--k8s-context` | kubeconfig context | 当前 context |
+| `--k8s-pvc` | PVC 名称（持久存储） | 空 |
+| `--k8s-host-output-dir` | hostPath 挂载路径（PVC 为空时使用） | 空 |
+| `--k8s-node-selector` | 节点选择标签（可重复），格式 `KEY=VALUE` | 空 |
+| `--k8s-service-account` | ServiceAccount | 空 |
+| `--k8s-shm-size` | 共享内存大小 | `16Gi` |
+| `--k8s-runtime-class` | RuntimeClass（如 `nvidia`，用于 CDI 模式） | 空 |
+
+### 工作原理
+
+1. `render()` 生成 Kubernetes Job YAML/JSON manifest
+2. `submit()` 通过 `kubernetes` Python 客户端创建 Job
+3. Traces 通过 PVC 或 hostPath 持久化
+4. `status()` / `cancel()` / `list_jobs()` 通过 K8s API 操作
+
+### Kind 本地测试集群
+
+```bash
+# 启动 Kind 集群（GPU passthrough + CDI 模式）
+bash dockerfiles/dev-setup.sh kind
+
+# 运行 K8s 集成测试
+python -m pytest tests/integration/test_scheduler_local.py::TestK8sScheduler -v -x
+
+# 清理
+bash dockerfiles/dev-teardown.sh kind
+```
+
+---
+
+## 3. Slurm 调度器
+
+生成 sbatch 脚本并提交到 Slurm 集群。支持两种提交模式：
+
+- **CLI 模式**（推荐）：通过 `sbatch`/`squeue`/`scancel` 命令
+- **REST 模式**：通过 slurmrestd REST API + JWT 认证
+
+### 首次配置
+
+```bash
+# CLI 模式（推荐，无需 slurmrestd）
+flowsim init slurm \
+    --rest-url http://unused \
+    --partition gpu \
+    --account my-project \
+    --container-runtime none \
+    --force
+
+# REST 模式（需要 slurmrestd）
+flowsim init slurm \
+    --rest-url https://slurm.example.com:6820 \
+    --partition gpu \
+    --account my-project \
+    --jwt-token-cmd "scontrol token lifespan=3600" \
+    --force
+```
+
+### 使用
+
+```bash
+# CLI 模式 — 直接调用 sbatch（最常用）
+flowsim submit --scheduler slurm \
+    --collect all \
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --slurm-partition gpu \
+    --slurm-submit-via cli \
+    --extra-server-opts "--load-format dummy"
+
+# CLI 模式 + 远程前缀（通过 docker exec 或 ssh）
+flowsim submit --scheduler slurm \
+    --slurm-submit-via cli \
+    --slurm-cli-prefix "docker exec -i slurmctld" \
+    --slurm-partition normal \
+    --collect perf --model-path Qwen/Qwen3-8B --gpus 1
+
+# REST 模式
+flowsim submit --scheduler slurm \
+    --slurm-submit-via rest \
+    --slurm-rest-url http://localhost:6820 \
+    --slurm-jwt-token "$(scontrol token lifespan=3600 | cut -d= -f2)" \
+    --collect perf --model-path Qwen/Qwen3-8B --gpus 1
+
+# Dry-run 查看生成的 sbatch 脚本
+flowsim submit --scheduler slurm ... --dry-run
+
+# 查看状态（CLI 模式）
+flowsim status --scheduler slurm --job 12345 \
+    --slurm-submit-via cli \
+    --slurm-cli-prefix "docker exec -i slurmctld"
+
+# 取消任务
+flowsim cancel --scheduler slurm --job 12345 \
+    --slurm-submit-via cli
+```
+
+### 专有参数
+
+| 参数 | 说明 | 默认值 |
+|------|------|--------|
+| `--slurm-submit-via` | 提交模式：`cli`（sbatch）或 `rest`（slurmrestd） | `rest` |
+| `--slurm-cli-prefix` | CLI 命令前缀（如 `"docker exec -i slurmctld"`） | 空 |
+| `--slurm-partition` | Slurm 分区 | 空 |
+| `--slurm-time` | 任务时间限制 | `02:00:00` |
+| `--slurm-account` | 计费账户 | 空 |
+| `--slurm-constraint` | 节点约束 | 空 |
+| `--slurm-container-runtime` | 容器运行时：`docker` / `enroot` / `none` | `none` |
+| `--slurm-container-mounts` | 容器挂载 | 空 |
+| `--slurm-module` | `module load` 命令（可重复） | 空 |
+| `--slurm-extra-sbatch` | 额外 `#SBATCH` 指令（可重复） | 空 |
+| `--slurm-rest-url` | slurmrestd URL（REST 模式需要） | 空 |
+| `--slurm-jwt-token` | JWT token（REST 模式需要） | 空 |
+| `--slurm-api-version` | slurmrestd API 版本 | `v0.0.40` |
+| `--slurm-no-verify-ssl` | 跳过 TLS 验证 | `false` |
+
+### container_runtime 说明
+
+| 值 | 说明 |
+|----|------|
+| `none` | 直接在计算节点上运行（节点已有 Python/sglang 环境）|
+| `docker` | 在分配的节点上 `docker run` |
+| `enroot` | 使用 `srun --container-image` (NVIDIA enroot) |
+
+### 工作原理
+
+**CLI 模式：**
+1. `render()` 生成完整的 sbatch 脚本（含 `#SBATCH` 指令 + profiling 命令）
+2. `submit()` 通过 `sbatch --parsable` 提交（脚本通过 stdin 传入）
+3. `status()` 通过 `scontrol show job` 查询（无需 slurmdbd）
+4. `cancel()` 通过 `scancel` 取消
+5. `list_jobs()` 通过 `squeue` 列出
+
+如果 Slurm 命令不在本地 PATH 中，可通过 `--slurm-cli-prefix` 指定前缀，例如：
+- `"docker exec -i slurmctld"` — 通过 Docker 容器
+- `"ssh login-node"` — 通过 SSH
+
+**REST 模式：**
+1. 同上生成 sbatch 脚本
+2. `submit()` 通过 HTTP POST 到 slurmrestd 的 `/slurm/{version}/job/submit`
+3. 所有操作通过 slurmrestd REST API + JWT 认证
+
+### Docker Compose 本地测试集群
+
+```bash
+# 启动 Slurm 集群（slurmctld + 1 计算节点 + 1 GPU）
+cd dockerfiles/
+docker compose -f slurm-compose.yaml up -d
+
+# 检查集群状态
+docker exec slurmctld sinfo
+
+# 运行 Slurm 集成测试
+python -m pytest tests/integration/test_scheduler_local.py::TestSlurmScheduler -v -x
+
+# 清理
+docker compose -f slurm-compose.yaml down -v
+```
+
+---
+
+## 配置文件
+
+配置保存在 `~/.flowsim/` 目录下，通过 `flowsim init` 生成：
+
+```
+~/.flowsim/
+├── k8s.yaml      # K8s 调度器配置
+└── slurm.yaml    # Slurm 调度器配置
+```
+
+参数优先级（从高到低）：
+1. CLI flag（`--slurm-partition gpu`）
+2. 环境变量（`FLOWSIM_SLURM_PARTITION=gpu`）
+3. 配置文件（`~/.flowsim/slurm.yaml`）
+4. 内置默认值
+
+### 示例 k8s.yaml
+
+```yaml
+kubeconfig: /home/user/.kube/config
+namespace: default
+host_output_dir: /host-stage-traces
+runtime_class_name: nvidia
+shm_size: 16Gi
+```
+
+### 示例 slurm.yaml
+
+```yaml
+partition: gpu
+account: my-project
+time: "02:00:00"
+container_runtime: none
+submit_via: cli
+cli_prefix: ""
+```
+
+---
+
+## 输出目录结构
+
+所有调度器产生统一的 trace 输出结构：
+
+```
+stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
+├── bs1_input2048_ctx0/
+│   ├── *.trace.json.gz           # 原始 trace
+│   ├── parsed/*.csv              # 解析后的 CSV
+│   ├── merged/*_merged.trace.csv # 合并的 trace CSV
+│   ├── shape_traces/             # Shape trace（collect=shapes/all）
+│   ├── shape_parsed/*.csv        # Shape 解析 CSV
+│   ├── analysis_extend.json      # Extend 阶段分析
+│   └── analysis_decode.json      # Decode 阶段分析
+├── logs/
+│   ├── server_*.stdout.log
+│   └── server_*.stderr.log
+└── sweep_summary.json
+```
+
+---
+
+## PD Disaggregation（实验性）
+
+支持 Prefill-Decode 分离部署：
+
+```bash
+flowsim submit --scheduler k8s \
+    --pd \
+    --collect perf \
+    --model-path Qwen/Qwen3-235B-A22B-FP8 \
+    --tp 4 --gpus 8 \
+    --disagg-transfer-backend mooncake
+```
+
+这会生成两个 Job：一个 prefill 实例，一个 decode 实例。

From a92e32ba6280db4710c87170aa04adae31e12f95 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 21:32:13 +0000
Subject: [PATCH 26/56] profile: add --sweep for multi-point profiling in one
 job

Usage:
  flowsim submit --scheduler local --collect perf --model-path Qwen/Qwen3-8B \
      --sweep 1:2048:0 4:8192:0 16:2048:4096

Or from file:
  flowsim submit --scheduler local --collect perf --model-path Qwen/Qwen3-8B \
      --sweep-file sweep_points.txt

Each point is a BS:INPUT_LEN:CTX tuple. One server launch, multiple
profile points sequentially. Backwards compatible: without --sweep,
--bs/--input-len/--existing-ctx still works as single-point.
---
 schedulers/base.py           | 23 +++++----
 scripts/run_stage_profile.py | 90 +++++++++++++++++++++++++++++++++---
 scripts/submit_profile.py    | 50 ++++++++++++++++++++
 3 files changed, 149 insertions(+), 14 deletions(-)

diff --git a/schedulers/base.py b/schedulers/base.py
index 3a35682..b80aa46 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -5,7 +5,7 @@
 import abc
 import shlex
 from dataclasses import dataclass, field
-from typing import Optional
+from typing import Optional, Sequence
 
 
 @dataclass
@@ -55,6 +55,9 @@ class ProfileJobSpec:
     disagg_prefill_pp: int = 1
     disagg_ib_device: str = ""
 
+    # -- Sweep: explicit list of (bs, input_len, existing_ctx) tuples --
+    sweep_points: list[tuple[int, int, int]] = field(default_factory=list)
+
     # -- Extra server opts (appended verbatim) --
     extra_server_opts: str = ""
 
@@ -95,12 +98,6 @@ def build_profile_command(self) -> list[str]:
             "--launch-server",
             "--server-opts",
             self.build_server_opts(),
-            "--bs",
-            str(self.bs),
-            "--input-len",
-            str(self.input_len),
-            "--existing-ctx",
-            str(self.existing_ctx),
             "--decode-tokens",
             str(self.decode_tokens),
             "--warmup-n",
@@ -114,6 +111,13 @@ def build_profile_command(self) -> list[str]:
             "--log-dir",
             self.log_dir,
         ]
+        if self.sweep_points:
+            for bs, il, ctx in self.sweep_points:
+                cmd.extend(["--sweep", f"{bs}:{il}:{ctx}"])
+        else:
+            cmd.extend(["--bs", str(self.bs)])
+            cmd.extend(["--input-len", str(self.input_len)])
+            cmd.extend(["--existing-ctx", str(self.existing_ctx)])
         if self.disable_chunked_prefill:
             cmd.append("--disable-chunked-prefill")
             cmd.extend(["--max-prefill-tokens", str(self.max_prefill_tokens)])
@@ -140,7 +144,10 @@ def default_job_name(self) -> str:
         if self.job_name:
             return self.job_name
         model_short = self.model_path.split("/")[-1].lower().replace(".", "-")
-        name = f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
+        if self.sweep_points:
+            name = f"flowsim-{self.collect}-{model_short}-sweep{len(self.sweep_points)}pt"
+        else:
+            name = f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
         if self.disagg_mode:
             name += f"-{self.disagg_mode}"
         return name
diff --git a/scripts/run_stage_profile.py b/scripts/run_stage_profile.py
index c27d6f3..91f9143 100644
--- a/scripts/run_stage_profile.py
+++ b/scripts/run_stage_profile.py
@@ -700,6 +700,31 @@ def parse_args(argv: Optional[list] = None) -> argparse.Namespace:
         default="/flowsim/stage_traces",
         help="Root directory for trace output",
     )
+
+    sweep = p.add_argument_group("sweep (multi-point profiling)")
+    sweep.add_argument(
+        "--sweep",
+        type=str,
+        nargs="+",
+        default=[],
+        metavar="BS:INPUT_LEN:CTX",
+        help=(
+            "Profile multiple (bs, input_len, existing_ctx) points in one job. "
+            "Each value is a colon-separated tuple, e.g. --sweep 1:2048:0 4:8192:0 16:2048:4096. "
+            "Overrides --bs, --input-len, --existing-ctx."
+        ),
+    )
+    sweep.add_argument(
+        "--sweep-file",
+        type=str,
+        default="",
+        metavar="FILE",
+        help=(
+            "Read sweep points from a file (one BS:INPUT_LEN:CTX per line, "
+            "# comments allowed). Overrides --bs, --input-len, --existing-ctx."
+        ),
+    )
+
     srv = p.add_argument_group("server launch (optional)")
     srv.add_argument(
         "--launch-server",
@@ -721,6 +746,45 @@ def parse_args(argv: Optional[list] = None) -> argparse.Namespace:
     return p.parse_args(argv)
 
 
+def _parse_sweep_point(s: str) -> tuple[int, int, int]:
+    """Parse a ``BS:INPUT_LEN:CTX`` string into an int 3-tuple."""
+    parts = s.strip().split(":")
+    if len(parts) != 3:
+        raise ValueError(
+            f"Bad sweep point {s!r}: expected BS:INPUT_LEN:CTX "
+            f"(e.g. 1:2048:0)"
+        )
+    try:
+        return int(parts[0]), int(parts[1]), int(parts[2])
+    except ValueError:
+        raise ValueError(
+            f"Bad sweep point {s!r}: all three values must be integers"
+        )
+
+
+def _load_sweep_points(args) -> list[tuple[int, int, int]]:
+    """Resolve sweep points from --sweep, --sweep-file, or single-point args."""
+    if args.sweep and args.sweep_file:
+        print("[ERROR] --sweep and --sweep-file are mutually exclusive")
+        raise SystemExit(1)
+
+    points: list[tuple[int, int, int]] = []
+    if args.sweep:
+        for s in args.sweep:
+            points.append(_parse_sweep_point(s))
+    elif args.sweep_file:
+        with open(args.sweep_file) as f:
+            for line in f:
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
+                points.append(_parse_sweep_point(line))
+    else:
+        # Single-point from --bs / --input-len / --existing-ctx
+        points.append((args.bs, args.input_len, args.existing_ctx))
+    return points
+
+
 # ---------------------------------------------------------------------------
 # Phase runners
 # ---------------------------------------------------------------------------
@@ -759,11 +823,11 @@ def _start_server(
     return proc
 
 
-def _run_perf(args, summary: list[dict]) -> int:
+def _run_perf(args, summary: list[dict], *, bs: int = 0, input_len: int = 0, existing_ctx: int = 0) -> int:
     """Collect traces for a single (bs, input_len, existing_ctx, decode_tokens) point."""
-    bs = args.bs
-    input_len = args.input_len
-    existing_ctx = args.existing_ctx
+    bs = bs or args.bs
+    input_len = input_len or args.input_len
+    existing_ctx = existing_ctx if (bs != 0) else args.existing_ctx
 
     tag = f"bs{bs}_input{input_len}_ctx{existing_ctx}"
     sub_dir = os.path.join(args.output_dir, tag)
@@ -887,6 +951,14 @@ def main(argv: Optional[list] = None) -> int:
 
     server_proc = None
     summary: list[dict] = []
+    sweep_points = _load_sweep_points(args)
+    is_sweep = len(sweep_points) > 1
+
+    if is_sweep:
+        print(f"\n[sweep] {len(sweep_points)} points to profile:")
+        for i, (bs, il, ctx) in enumerate(sweep_points):
+            print(f"  [{i+1}] bs={bs}  input_len={il}  existing_ctx={ctx}")
+        print()
 
     try:
         # ==================================================================
@@ -908,7 +980,10 @@ def main(argv: Optional[list] = None) -> int:
             print("  PHASE 1 / 2 : PERF COLLECTION")
             print("=" * 60 + "\n")
             server_proc = _start_server(args, disable_cuda_graph=False)
-            _run_perf(args, summary)
+            for idx, (bs, il, ctx) in enumerate(sweep_points):
+                if is_sweep:
+                    print(f"\n[sweep] Point {idx+1}/{len(sweep_points)}")
+                _run_perf(args, summary, bs=bs, input_len=il, existing_ctx=ctx)
             _write_summary(args, summary)
             print("\n[server] Shutting down for shape pass …")
             kill_server(server_proc)
@@ -929,7 +1004,10 @@ def main(argv: Optional[list] = None) -> int:
         if args.collect == "perf":
             if args.launch_server:
                 server_proc = _start_server(args, disable_cuda_graph=False)
-            _run_perf(args, summary)
+            for idx, (bs, il, ctx) in enumerate(sweep_points):
+                if is_sweep:
+                    print(f"\n[sweep] Point {idx+1}/{len(sweep_points)}")
+                _run_perf(args, summary, bs=bs, input_len=il, existing_ctx=ctx)
             _write_summary(args, summary)
             return 0
 
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 57c730c..f1e3051 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -105,6 +105,28 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         default="",
         help="Extra server options appended verbatim",
     )
+    wl.add_argument(
+        "--sweep",
+        type=str,
+        nargs="+",
+        default=[],
+        metavar="BS:INPUT_LEN:CTX",
+        help=(
+            "Profile multiple (bs, input_len, existing_ctx) points in one job. "
+            "Each value is a colon-separated tuple, e.g. --sweep 1:2048:0 4:8192:0. "
+            "Overrides --bs, --input-len, --existing-ctx."
+        ),
+    )
+    wl.add_argument(
+        "--sweep-file",
+        type=str,
+        default="",
+        metavar="FILE",
+        help=(
+            "Read sweep points from a file (one BS:INPUT_LEN:CTX per line, "
+            "# comments allowed). Overrides --bs, --input-len, --existing-ctx."
+        ),
+    )
 
     # -- Infrastructure --
     infra = p.add_argument_group("infrastructure")
@@ -301,7 +323,34 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     return p.parse_args(argv)
 
 
+def _parse_sweep_points(args) -> list[tuple[int, int, int]]:
+    """Resolve sweep points from --sweep / --sweep-file args."""
+    if args.sweep and args.sweep_file:
+        sys.exit("Error: --sweep and --sweep-file are mutually exclusive")
+    points: list[tuple[int, int, int]] = []
+    raw: list[str] = []
+    if args.sweep:
+        raw = args.sweep
+    elif args.sweep_file:
+        with open(args.sweep_file) as f:
+            raw = [
+                line.strip()
+                for line in f
+                if line.strip() and not line.strip().startswith("#")
+            ]
+    for s in raw:
+        parts = s.strip().split(":")
+        if len(parts) != 3:
+            sys.exit(f"Bad sweep point {s!r}: expected BS:INPUT_LEN:CTX")
+        try:
+            points.append((int(parts[0]), int(parts[1]), int(parts[2])))
+        except ValueError:
+            sys.exit(f"Bad sweep point {s!r}: all three values must be integers")
+    return points
+
+
 def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
+    sweep_points = _parse_sweep_points(args)
     return ProfileJobSpec(
         collect=args.collect,
         model_path=args.model_path,
@@ -325,6 +374,7 @@ def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
         disagg_bootstrap_port=args.disagg_bootstrap_port,
         disagg_prefill_pp=args.disagg_prefill_pp,
         disagg_ib_device=args.disagg_ib_device,
+        sweep_points=sweep_points,
     )
 
 

From 0aeff899ec566237b25315a81f13d4510f9a3293 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 21:42:08 +0000
Subject: [PATCH 27/56] test: add sweep integration tests (inline + file)

Two tests in TestLocalSweep:
- test_sweep_inline: --sweep 1:2048:0 1:4096:0 1:2048:2048
- test_sweep_file:   same points read from a temp file

Also fix: use single --sweep with multiple values (nargs=+)
instead of repeated --sweep flags which argparse would override.
---
 schedulers/base.py                        |   3 +-
 tests/integration/test_scheduler_local.py | 122 ++++++++++++++++++++++
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/schedulers/base.py b/schedulers/base.py
index b80aa46..0641f41 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -112,8 +112,9 @@ def build_profile_command(self) -> list[str]:
             self.log_dir,
         ]
         if self.sweep_points:
+            cmd.append("--sweep")
             for bs, il, ctx in self.sweep_points:
-                cmd.extend(["--sweep", f"{bs}:{il}:{ctx}"])
+                cmd.append(f"{bs}:{il}:{ctx}")
         else:
             cmd.extend(["--bs", str(self.bs)])
             cmd.extend(["--input-len", str(self.input_len)])
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 2ff2e67..8815250 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -47,6 +47,7 @@
 import os
 import subprocess
 import sys
+import tempfile
 import time
 
 import pytest
@@ -592,3 +593,124 @@ def test_slurm_real_submit(self, slurm_cluster):
                     "--slurm-submit-via", "cli",
                     "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
                 )
+
+
+# =====================================================================
+# SWEEP — multi-point profiling in a single job
+# =====================================================================
+
+# Three lightweight points: different (bs, input_len, existing_ctx)
+_SWEEP_POINTS = [
+    (1, 2048, 0),
+    (1, 4096, 0),
+    (1, 2048, 2048),
+]
+
+
+def _assert_sweep_output(host_output_dir: str, points: list[tuple[int, int, int]]) -> None:
+    """Validate that every sweep point produced traces and parsed CSVs."""
+    for bs, il, ctx in points:
+        tag = f"bs{bs}_input{il}_ctx{ctx}"
+        point_dir = os.path.join(host_output_dir, tag)
+        assert os.path.isdir(point_dir), f"Missing sweep point dir: {point_dir}"
+        _assert_traces(point_dir)
+
+    # sweep_summary.json should exist at the root
+    summary_path = os.path.join(host_output_dir, "sweep_summary.json")
+    assert os.path.isfile(summary_path), f"Missing {summary_path}"
+    with open(summary_path) as f:
+        summary = json.load(f)
+    assert len(summary) == len(points), (
+        f"Expected {len(points)} entries in sweep_summary.json, got {len(summary)}"
+    )
+    for entry in summary:
+        assert entry["traces"] > 0, f"Point {entry} has 0 traces"
+
+
+class TestLocalSweep:
+    """Multi-point sweep via ``--sweep`` and ``--sweep-file`` on local scheduler.
+
+    Validates that one job profiles all requested points and produces
+    correct directory structure, traces, and sweep_summary.json.
+    """
+
+    def test_sweep_inline(self):
+        """Submit a 3-point sweep using inline --sweep tuples."""
+        sweep_args = [f"{bs}:{il}:{ctx}" for bs, il, ctx in _SWEEP_POINTS]
+
+        r = _flowsim_cli(
+            "submit",
+            "--scheduler", "local",
+            "--collect", "perf",
+            "--model-path", MODEL,
+            "--tp", "1",
+            "--decode-tokens", "2",
+            "--warmup-n", "2",
+            "--gpus", "1",
+            "--local-gpus", "0",
+            "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+            "--sweep", *sweep_args,
+        )
+        combined = r.stdout + r.stderr
+        if r.returncode != 0:
+            print("STDOUT:", r.stdout[-3000:])
+            print("STDERR:", r.stderr[-3000:])
+        assert r.returncode == 0, f"sweep submit failed (exit {r.returncode})"
+
+        # Find host output dir from submit output
+        output_dir = None
+        for line in combined.splitlines():
+            if "Traces:" in line:
+                output_dir = line.split("Traces:", 1)[1].strip()
+                break
+        assert output_dir and os.path.isdir(output_dir), (
+            f"Could not find traces dir in output:\n{combined[-1000:]}"
+        )
+
+        _assert_sweep_output(output_dir, _SWEEP_POINTS)
+        _assert_logs(output_dir)
+
+    def test_sweep_file(self):
+        """Submit a 3-point sweep reading points from a file."""
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".txt", delete=False, prefix="sweep_"
+        ) as f:
+            f.write("# bs:input_len:existing_ctx\n")
+            for bs, il, ctx in _SWEEP_POINTS:
+                f.write(f"{bs}:{il}:{ctx}\n")
+            sweep_file = f.name
+
+        try:
+            r = _flowsim_cli(
+                "submit",
+                "--scheduler", "local",
+                "--collect", "perf",
+                "--model-path", MODEL,
+                "--tp", "1",
+                "--decode-tokens", "2",
+                "--warmup-n", "2",
+                "--gpus", "1",
+                "--local-gpus", "0",
+                "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+                "--sweep-file", sweep_file,
+            )
+            combined = r.stdout + r.stderr
+            if r.returncode != 0:
+                print("STDOUT:", r.stdout[-3000:])
+                print("STDERR:", r.stderr[-3000:])
+            assert r.returncode == 0, f"sweep-file submit failed (exit {r.returncode})"
+
+            # Find host output dir from submit output
+            output_dir = None
+            for line in combined.splitlines():
+                if "Traces:" in line:
+                    output_dir = line.split("Traces:", 1)[1].strip()
+                    break
+            assert output_dir and os.path.isdir(output_dir), (
+                f"Could not find traces dir in output:\n{combined[-1000:]}"
+            )
+
+            _assert_sweep_output(output_dir, _SWEEP_POINTS)
+            _assert_logs(output_dir)
+        finally:
+            os.unlink(sweep_file)

From f58d791f01d1f68046b0a01aba20b75b9f901182 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 22:08:47 +0000
Subject: [PATCH 28/56] refactor: dedup shared utilities, deprecate Slurm REST
 mode

- Extract resolve_default() to config.py (was _d() duplicated in submit/status)
- Extract parse_sweep_point()/load_sweep_file() to scripts/__init__.py
- K8s: submit() reuses _load_k8s() instead of duplicating kubeconfig logic
- K8s: remove unused kubernetes imports in status()/logs()
- Local: move inline imports (glob/re/shlex/threading) to module level
- Local: remove dead if-branch in list_jobs (always set Completed)
- Slurm: default submit_via='cli', deprecate REST mode with DeprecationWarning
- Slurm: add TODO for _logs_cli (currently returns status info only)
- CLI: flowsim init slurm supports --submit-via/--cli-prefix, rest-url optional
- Template: slurm.yaml updated for CLI-first workflow
- run_stage_profile: fix _run_perf sentinel bs=0 -> Optional[int]
---
 schedulers/config.py            |  5 +++
 schedulers/k8s.py               | 63 +++++++++++----------------------
 schedulers/local.py             | 17 +++------
 schedulers/slurm.py             | 24 +++++++++----
 schedulers/templates/slurm.yaml | 14 ++++----
 scripts/__init__.py             | 36 +++++++++++++++++++
 scripts/cli.py                  | 19 ++++++----
 scripts/run_stage_profile.py    | 44 ++++++-----------------
 scripts/status_profile.py       |  8 ++---
 scripts/submit_profile.py       | 40 ++++++++-------------
 10 files changed, 131 insertions(+), 139 deletions(-)

diff --git a/schedulers/config.py b/schedulers/config.py
index 185c87f..e228cb3 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -123,3 +123,8 @@ def cfg_get(cfg: dict, key: str, fallback: str = "") -> str:
     if val is not None:
         return str(val)
     return fallback
+
+
+def resolve_default(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
+    """Resolve a config value: env var > config file > fallback."""
+    return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index d29df96..741721e 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -148,38 +148,9 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
         """Submit via the ``kubernetes`` Python client (``pip install kubernetes``)."""
-        try:
-            from kubernetes import client as k8s_client, config as k8s_config
-        except ImportError:
-            raise RuntimeError(
-                "The 'kubernetes' package is required for --submit. "
-                "Install it with: pip install kubernetes"
-            )
-
-        # Load kubeconfig / in-cluster config
-        config_kwargs: dict = {}
-        if self.kubeconfig:
-            config_kwargs["config_file"] = self.kubeconfig
-        if self.context:
-            config_kwargs["context"] = self.context
-
-        try:
-            k8s_config.load_kube_config(**config_kwargs)
-        except k8s_config.ConfigException:
-            try:
-                k8s_config.load_incluster_config()
-            except k8s_config.ConfigException:
-                hint = ""
-                if not self.kubeconfig:
-                    hint = " Try --k8s-kubeconfig /path/to/kubeconfig."
-                raise RuntimeError(
-                    "No valid Kubernetes configuration found. "
-                    "Checked kubeconfig file and in-cluster environment."
-                    + hint
-                )
+        batch_api, _ = self._load_k8s()
 
         body = self._build_job_dict(spec)
-        batch_api = k8s_client.BatchV1Api()
         resp = batch_api.create_namespaced_job(
             namespace=self.namespace,
             body=body,
@@ -197,8 +168,17 @@ def submit(self, spec: ProfileJobSpec) -> JobResult:
     # -----------------------------------------------------------------
 
     def _load_k8s(self):
-        """Load kubeconfig and return (BatchV1Api, CoreV1Api)."""
-        from kubernetes import client as k8s_client, config as k8s_config
+        """Load kubeconfig and return (BatchV1Api, CoreV1Api).
+
+        Raises RuntimeError with actionable message on failure.
+        """
+        try:
+            from kubernetes import client as k8s_client, config as k8s_config
+        except ImportError:
+            raise RuntimeError(
+                "The 'kubernetes' package is required. "
+                "Install it with: pip install kubernetes"
+            )
 
         config_kwargs: dict = {}
         if self.kubeconfig:
@@ -208,7 +188,14 @@ def _load_k8s(self):
         try:
             k8s_config.load_kube_config(**config_kwargs)
         except k8s_config.ConfigException:
-            k8s_config.load_incluster_config()
+            try:
+                k8s_config.load_incluster_config()
+            except k8s_config.ConfigException:
+                hint = " Try --k8s-kubeconfig /path/to/kubeconfig." if not self.kubeconfig else ""
+                raise RuntimeError(
+                    "No valid Kubernetes configuration found. "
+                    "Checked kubeconfig file and in-cluster environment." + hint
+                )
 
         return k8s_client.BatchV1Api(), k8s_client.CoreV1Api()
 
@@ -226,11 +213,6 @@ def cancel(self, job_id: str) -> str:
 
     def status(self, job_id: str) -> dict:
         """Query K8s Job status by job name."""
-        try:
-            from kubernetes import client as k8s_client
-        except ImportError:
-            raise RuntimeError("pip install kubernetes")
-
         batch_api, core_api = self._load_k8s()
 
         job = batch_api.read_namespaced_job(name=job_id, namespace=self.namespace)
@@ -278,11 +260,6 @@ def status(self, job_id: str) -> dict:
 
     def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """Show where logs are and how to access them for a K8s Job."""
-        try:
-            from kubernetes import client as k8s_client
-        except ImportError:
-            raise RuntimeError("pip install kubernetes")
-
         _, core_api = self._load_k8s()
 
         pods = core_api.list_namespaced_pod(
diff --git a/schedulers/local.py b/schedulers/local.py
index 17dd3a0..eeaa020 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -7,9 +7,13 @@
 
 from __future__ import annotations
 
+import glob
 import os
+import re
+import shlex
 import subprocess
 import sys
+import threading
 import time
 
 from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
@@ -17,7 +21,6 @@
 
 def _shell_quote(s: str) -> str:
     """Quote a string for safe embedding in a bash -c '...' invocation."""
-    import shlex
     return shlex.quote(s)
 
 
@@ -160,7 +163,6 @@ def submit(self, spec: ProfileJobSpec) -> JobResult:
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
             )
-            import threading
 
             def _tee(src, dest_file, dest_stream):
                 for line in src:
@@ -217,7 +219,6 @@ def cancel(self, job_id: str) -> str:
 
     def _find_log_dirs(self) -> list[str]:
         """Find all log directories under stage_traces/{scheduler}/*/logs/."""
-        import glob
         base = os.path.join(self.workdir, "stage_traces", "local")
         # New layout: stage_traces/local/{ts}/logs/
         dirs = sorted(glob.glob(os.path.join(base, "*/logs")))
@@ -232,8 +233,6 @@ def status(self, job_id: str) -> dict:
 
         ``job_id`` is the job name prefix used in log filenames.
         """
-        import glob
-
         matches = []
         for log_dir in self._find_log_dirs():
             matches.extend(sorted(glob.glob(
@@ -264,8 +263,6 @@ def status(self, job_id: str) -> dict:
 
     def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """List log files for a local job and print access commands."""
-        import glob
-
         matches = []
         for log_dir in self._find_log_dirs():
             matches.extend(sorted(glob.glob(
@@ -317,9 +314,6 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
 
     def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         """List local jobs by scanning log files."""
-        import glob
-        import re
-
         matches = []
         for log_dir in self._find_log_dirs():
             matches.extend(sorted(glob.glob(
@@ -340,9 +334,6 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
             stderr_size = os.path.getsize(stderr) if os.path.exists(stderr) else 0
             # If stderr has content, might have failed; otherwise completed
             state = "Completed"
-            if stderr_size > 0:
-                # Check if there's an error indicator in stderr
-                state = "Completed"  # local jobs are synchronous; if log exists, it finished
             jobs.append({
                 "job_id": name,
                 "name": name,
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 6615fad..b297fec 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -6,11 +6,11 @@
 
 Two submission modes are supported:
 
-* **rest** (default) — POST the script to a slurmrestd endpoint.
-  Requires ``rest_url`` and ``jwt_token``.
-* **cli** — pipe the script to ``sbatch`` via subprocess.
+* **cli** (default) — pipe the script to ``sbatch`` via subprocess.
   Requires ``sbatch``/``squeue``/``scancel`` on PATH (or reachable
   via ``cli_prefix``, e.g. ``"docker exec slurmctld"``).
+* **rest** (deprecated) — POST the script to a slurmrestd endpoint.
+  Requires ``rest_url`` and ``jwt_token``.
 """
 
 from __future__ import annotations
@@ -66,8 +66,8 @@ class SlurmScheduler(BaseScheduler):
     extra_sbatch : list[str]
         Additional ``#SBATCH`` lines, each *without* the ``#SBATCH`` prefix.
     submit_via : str
-        ``"rest"`` (default) — use slurmrestd REST API.
-        ``"cli"``  — use ``sbatch`` / ``squeue`` / ``scancel`` subprocess.
+        ``"cli"``  (default) — use ``sbatch`` / ``squeue`` / ``scancel`` subprocess.
+        ``"rest"`` (deprecated) — use slurmrestd REST API.
     cli_prefix : str
         Shell prefix for CLI commands (e.g. ``"docker exec -i slurmctld"``).
         Only used when ``submit_via="cli"``.
@@ -88,7 +88,7 @@ def __init__(
         container_mounts: str = "",
         modules: list[str] | None = None,
         extra_sbatch: list[str] | None = None,
-        submit_via: str = "rest",
+        submit_via: str = "cli",
         cli_prefix: str = "",
     ) -> None:
         self.partition = partition
@@ -106,6 +106,16 @@ def __init__(
         self.submit_via = submit_via
         self.cli_prefix = cli_prefix
 
+        if self.submit_via != "cli":
+            import warnings
+            warnings.warn(
+                "Slurm REST mode (slurmrestd) is deprecated and will be "
+                "removed in a future release. Use submit_via='cli' "
+                "(sbatch) instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
     def render(self, spec: ProfileJobSpec) -> str:
         job_name = spec.default_job_name()
         cmd = spec.build_shell_command()
@@ -415,6 +425,8 @@ def _status_cli(self, job_id: str) -> dict:
         }
 
     def _logs_cli(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+        # TODO: read actual Slurm log file (StdOut from scontrol)
+        # and support tail/follow properly.
         info = self._status_cli(job_id)
         return info["message"]
 
diff --git a/schedulers/templates/slurm.yaml b/schedulers/templates/slurm.yaml
index 0910f4a..3ff2d0a 100644
--- a/schedulers/templates/slurm.yaml
+++ b/schedulers/templates/slurm.yaml
@@ -8,17 +8,19 @@
 #
 # CLI flags and env vars can override individual values.
 
-# REQUIRED — slurmrestd endpoint
-rest_url: ""                # e.g. https://slurm.corp.com:6820
-
-# REQUIRED — authentication (pick one)
-# jwt_token: ""             # not recommended — stored in plaintext
-jwt_token_cmd: ""           # e.g. "scontrol token lifespan=3600"
+# REQUIRED — submission mode
+submit_via: "cli"           # "cli" (sbatch, default) or "rest" (deprecated)
+cli_prefix: ""              # e.g. "docker exec -i slurmctld" (optional)
 
 # REQUIRED — cluster settings
 partition: ""               # e.g. gpu-h100
 account: ""                 # e.g. my-project
 
+# REST mode (deprecated) — only needed if submit_via: rest
+rest_url: ""                # e.g. https://slurm.corp.com:6820
+# jwt_token: ""             # not recommended — stored in plaintext
+jwt_token_cmd: ""           # e.g. "scontrol token lifespan=3600"
+
 # Optional
 api_version: "v0.0.40"
 time: "02:00:00"
diff --git a/scripts/__init__.py b/scripts/__init__.py
index e69de29..e785b75 100644
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
@@ -0,0 +1,36 @@
+"""Shared utilities for FlowSim CLI scripts."""
+
+
+def parse_sweep_point(s: str) -> tuple[int, int, int]:
+    """Parse a ``BS:INPUT_LEN:CTX`` string into an int 3-tuple.
+
+    Raises :class:`ValueError` on bad input.
+    """
+    parts = s.strip().split(":")
+    if len(parts) != 3:
+        raise ValueError(
+            f"Bad sweep point {s!r}: expected BS:INPUT_LEN:CTX "
+            f"(e.g. 1:2048:0)"
+        )
+    try:
+        return int(parts[0]), int(parts[1]), int(parts[2])
+    except ValueError:
+        raise ValueError(
+            f"Bad sweep point {s!r}: all three values must be integers"
+        )
+
+
+def load_sweep_file(path: str) -> list[tuple[int, int, int]]:
+    """Read sweep points from a file (one ``BS:INPUT_LEN:CTX`` per line).
+
+    Blank lines and ``#`` comments are skipped.
+    Raises :class:`ValueError` on bad entries.
+    """
+    points: list[tuple[int, int, int]] = []
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            points.append(parse_sweep_point(line))
+    return points
diff --git a/scripts/cli.py b/scripts/cli.py
index 135ed84..fc4d0cb 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -42,12 +42,17 @@ def _init_k8s_parser(sub: argparse._SubParsersAction) -> None:
 
 def _init_slurm_parser(sub: argparse._SubParsersAction) -> None:
     p = sub.add_parser("slurm", help="Configure Slurm scheduler")
-    p.add_argument("--rest-url", required=True,
-                   help="slurmrestd endpoint URL (REQUIRED)")
     p.add_argument("--partition", required=True,
                    help="Slurm partition (REQUIRED)")
-    p.add_argument("--account", required=True,
-                   help="Slurm account (REQUIRED)")
+    p.add_argument("--account", default="",
+                   help="Slurm account")
+    p.add_argument("--submit-via", default="cli",
+                   choices=["cli", "rest"],
+                   help="Submission mode (default: cli)")
+    p.add_argument("--cli-prefix", default="",
+                   help='CLI mode prefix, e.g. "docker exec -i slurmctld"')
+    p.add_argument("--rest-url", default="",
+                   help="slurmrestd endpoint URL (REST mode only, deprecated)")
     p.add_argument("--jwt-token-cmd", default="",
                    help='Command to get JWT token, e.g. "scontrol token lifespan=3600"')
     p.add_argument("--jwt-token", default="",
@@ -107,10 +112,12 @@ def _cmd_init(argv: list[str]) -> int:
         dst = _CONFIG_DIR / "k8s.yaml"
 
     elif args.scheduler == "slurm":
-        if not args.jwt_token_cmd and not args.jwt_token:
-            print("Error: provide --jwt-token-cmd or --jwt-token", file=sys.stderr)
+        if args.submit_via == "rest" and not args.jwt_token_cmd and not args.jwt_token:
+            print("Error: REST mode requires --jwt-token-cmd or --jwt-token", file=sys.stderr)
             return 1
         cfg = {
+            "submit_via": args.submit_via,
+            "cli_prefix": args.cli_prefix,
             "rest_url": args.rest_url,
             "jwt_token_cmd": args.jwt_token_cmd,
             "jwt_token": args.jwt_token,
diff --git a/scripts/run_stage_profile.py b/scripts/run_stage_profile.py
index 91f9143..00dce4b 100644
--- a/scripts/run_stage_profile.py
+++ b/scripts/run_stage_profile.py
@@ -107,6 +107,7 @@
 )
 from utils.net import wait_for_port
 from utils.shape_merge import merge_shapes_dir
+from scripts import load_sweep_file, parse_sweep_point
 
 # ---------------------------------------------------------------------------
 # Defaults
@@ -746,43 +747,18 @@ def parse_args(argv: Optional[list] = None) -> argparse.Namespace:
     return p.parse_args(argv)
 
 
-def _parse_sweep_point(s: str) -> tuple[int, int, int]:
-    """Parse a ``BS:INPUT_LEN:CTX`` string into an int 3-tuple."""
-    parts = s.strip().split(":")
-    if len(parts) != 3:
-        raise ValueError(
-            f"Bad sweep point {s!r}: expected BS:INPUT_LEN:CTX "
-            f"(e.g. 1:2048:0)"
-        )
-    try:
-        return int(parts[0]), int(parts[1]), int(parts[2])
-    except ValueError:
-        raise ValueError(
-            f"Bad sweep point {s!r}: all three values must be integers"
-        )
-
-
 def _load_sweep_points(args) -> list[tuple[int, int, int]]:
     """Resolve sweep points from --sweep, --sweep-file, or single-point args."""
     if args.sweep and args.sweep_file:
         print("[ERROR] --sweep and --sweep-file are mutually exclusive")
         raise SystemExit(1)
 
-    points: list[tuple[int, int, int]] = []
     if args.sweep:
-        for s in args.sweep:
-            points.append(_parse_sweep_point(s))
-    elif args.sweep_file:
-        with open(args.sweep_file) as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                points.append(_parse_sweep_point(line))
-    else:
-        # Single-point from --bs / --input-len / --existing-ctx
-        points.append((args.bs, args.input_len, args.existing_ctx))
-    return points
+        return [parse_sweep_point(s) for s in args.sweep]
+    if args.sweep_file:
+        return load_sweep_file(args.sweep_file)
+    # Single-point from --bs / --input-len / --existing-ctx
+    return [(args.bs, args.input_len, args.existing_ctx)]
 
 
 # ---------------------------------------------------------------------------
@@ -823,11 +799,11 @@ def _start_server(
     return proc
 
 
-def _run_perf(args, summary: list[dict], *, bs: int = 0, input_len: int = 0, existing_ctx: int = 0) -> int:
+def _run_perf(args, summary: list[dict], *, bs: Optional[int] = None, input_len: Optional[int] = None, existing_ctx: Optional[int] = None) -> int:
     """Collect traces for a single (bs, input_len, existing_ctx, decode_tokens) point."""
-    bs = bs or args.bs
-    input_len = input_len or args.input_len
-    existing_ctx = existing_ctx if (bs != 0) else args.existing_ctx
+    bs = bs if bs is not None else args.bs
+    input_len = input_len if input_len is not None else args.input_len
+    existing_ctx = existing_ctx if existing_ctx is not None else args.existing_ctx
 
     tag = f"bs{bs}_input{input_len}_ctx{existing_ctx}"
     sub_dir = os.path.join(args.output_dir, tag)
diff --git a/scripts/status_profile.py b/scripts/status_profile.py
index 4882d11..085d79e 100644
--- a/scripts/status_profile.py
+++ b/scripts/status_profile.py
@@ -29,17 +29,15 @@
 from __future__ import annotations
 
 import argparse
-import os
 import sys
 
-from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_jwt_token
+from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default, resolve_jwt_token
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 
 
-def _d(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
-    return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
+_d = resolve_default
 
 
 def _add_scheduler_args(p: argparse.ArgumentParser) -> None:
@@ -101,7 +99,7 @@ def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) ->
         p.add_argument(
             "--slurm-submit-via",
             choices=["rest", "cli"],
-            default=cfg_get(slurm_cfg, "submit_via", "rest"),
+            default=cfg_get(slurm_cfg, "submit_via", "cli"),
         )
         p.add_argument(
             "--slurm-cli-prefix",
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index f1e3051..8999212 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -51,15 +51,15 @@
 import sys
 
 from schedulers.base import ProfileJobSpec
-from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_jwt_token
+from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default, resolve_jwt_token
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
+from scripts import load_sweep_file, parse_sweep_point
 
 
-def _d(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
-    """Resolve default: env var > config file > fallback."""
-    return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
+# Short alias for argparse default= expressions
+_d = resolve_default
 
 
 def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
@@ -311,8 +311,8 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         slurm.add_argument(
             "--slurm-submit-via",
             choices=["rest", "cli"],
-            default=cfg_get(slurm_cfg, "submit_via", "rest"),
-            help="Submission mode: rest (slurmrestd) or cli (sbatch subprocess)",
+            default=cfg_get(slurm_cfg, "submit_via", "cli"),
+            help="Submission mode: cli (sbatch subprocess) or rest (slurmrestd, deprecated)",
         )
         slurm.add_argument(
             "--slurm-cli-prefix",
@@ -327,26 +327,14 @@ def _parse_sweep_points(args) -> list[tuple[int, int, int]]:
     """Resolve sweep points from --sweep / --sweep-file args."""
     if args.sweep and args.sweep_file:
         sys.exit("Error: --sweep and --sweep-file are mutually exclusive")
-    points: list[tuple[int, int, int]] = []
-    raw: list[str] = []
-    if args.sweep:
-        raw = args.sweep
-    elif args.sweep_file:
-        with open(args.sweep_file) as f:
-            raw = [
-                line.strip()
-                for line in f
-                if line.strip() and not line.strip().startswith("#")
-            ]
-    for s in raw:
-        parts = s.strip().split(":")
-        if len(parts) != 3:
-            sys.exit(f"Bad sweep point {s!r}: expected BS:INPUT_LEN:CTX")
-        try:
-            points.append((int(parts[0]), int(parts[1]), int(parts[2])))
-        except ValueError:
-            sys.exit(f"Bad sweep point {s!r}: all three values must be integers")
-    return points
+    try:
+        if args.sweep:
+            return [parse_sweep_point(s) for s in args.sweep]
+        if args.sweep_file:
+            return load_sweep_file(args.sweep_file)
+    except ValueError as e:
+        sys.exit(str(e))
+    return []
 
 
 def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:

From 3152a72383acc3870804b7326529e2e72236c7b3 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 22:15:23 +0000
Subject: [PATCH 29/56] review: fix remaining issues (stale docstring, unused
 vars, README defaults)

- slurm.py: fix module docstring (no longer says 'posts to slurmrestd')
- local.py: remove unused stderr/stderr_size vars in list_jobs()
- k8s.py: extract _k8s_job_state() helper (was duplicated in status+list_jobs)
- README: update Slurm default to cli, mark REST as deprecated, fix init example
---
 schedulers/README.md | 13 ++++++-------
 schedulers/k8s.py    | 31 +++++++++++++------------------
 schedulers/local.py  |  3 ---
 schedulers/slurm.py  |  3 +--
 4 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index cf14f3c..7f99f4e 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -182,21 +182,20 @@ bash dockerfiles/dev-teardown.sh kind
 
 生成 sbatch 脚本并提交到 Slurm 集群。支持两种提交模式：
 
-- **CLI 模式**（推荐）：通过 `sbatch`/`squeue`/`scancel` 命令
-- **REST 模式**：通过 slurmrestd REST API + JWT 认证
+- **CLI 模式**（推荐，默认）：通过 `sbatch`/`squeue`/`scancel` 命令
+- **REST 模式**（已弃用）：通过 slurmrestd REST API + JWT 认证
 
 ### 首次配置
 
 ```bash
 # CLI 模式（推荐，无需 slurmrestd）
 flowsim init slurm \
-    --rest-url http://unused \
     --partition gpu \
     --account my-project \
     --container-runtime none \
     --force
 
-# REST 模式（需要 slurmrestd）
+# REST 模式（已弃用，需要 slurmrestd）
 flowsim init slurm \
     --rest-url https://slurm.example.com:6820 \
     --partition gpu \
@@ -224,7 +223,7 @@ flowsim submit --scheduler slurm \
     --slurm-partition normal \
     --collect perf --model-path Qwen/Qwen3-8B --gpus 1
 
-# REST 模式
+# REST 模式（已弃用）
 flowsim submit --scheduler slurm \
     --slurm-submit-via rest \
     --slurm-rest-url http://localhost:6820 \
@@ -248,7 +247,7 @@ flowsim cancel --scheduler slurm --job 12345 \
 
 | 参数 | 说明 | 默认值 |
 |------|------|--------|
-| `--slurm-submit-via` | 提交模式：`cli`（sbatch）或 `rest`（slurmrestd） | `rest` |
+| `--slurm-submit-via` | 提交模式：`cli`（sbatch）或 `rest`（slurmrestd，已弃用） | `cli` |
 | `--slurm-cli-prefix` | CLI 命令前缀（如 `"docker exec -i slurmctld"`） | 空 |
 | `--slurm-partition` | Slurm 分区 | 空 |
 | `--slurm-time` | 任务时间限制 | `02:00:00` |
@@ -284,7 +283,7 @@ flowsim cancel --scheduler slurm --job 12345 \
 - `"docker exec -i slurmctld"` — 通过 Docker 容器
 - `"ssh login-node"` — 通过 SSH
 
-**REST 模式：**
+**REST 模式（已弃用）：**
 1. 同上生成 sbatch 脚本
 2. `submit()` 通过 HTTP POST 到 slurmrestd 的 `/slurm/{version}/job/submit`
 3. 所有操作通过 slurmrestd REST API + JWT 认证
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 741721e..83b991c 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -11,6 +11,17 @@
 
 from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 
+
+def _k8s_job_state(status) -> str:
+    """Derive a human-readable state string from a K8s Job status object."""
+    if status.succeeded and status.succeeded > 0:
+        return "Succeeded"
+    if status.failed and status.failed > 0:
+        return "Failed"
+    if status.active and status.active > 0:
+        return "Running"
+    return "Pending"
+
 # Optional: nicer YAML output for dry-run.
 try:
     import yaml as _yaml  # type: ignore[import-untyped]
@@ -216,17 +227,9 @@ def status(self, job_id: str) -> dict:
         batch_api, core_api = self._load_k8s()
 
         job = batch_api.read_namespaced_job(name=job_id, namespace=self.namespace)
-        st = job.status
 
         # Determine state
-        if st.succeeded and st.succeeded > 0:
-            state = "Succeeded"
-        elif st.failed and st.failed > 0:
-            state = "Failed"
-        elif st.active and st.active > 0:
-            state = "Running"
-        else:
-            state = "Pending"
+        state = _k8s_job_state(job.status)
 
         # Pod info
         pods = core_api.list_namespaced_pod(
@@ -335,15 +338,7 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         )
         result: list[dict] = []
         for job in jobs.items:
-            st = job.status
-            if st.succeeded and st.succeeded > 0:
-                state = "Succeeded"
-            elif st.failed and st.failed > 0:
-                state = "Failed"
-            elif st.active and st.active > 0:
-                state = "Running"
-            else:
-                state = "Pending"
+            state = _k8s_job_state(job.status)
 
             if status_filter and state.lower() != status_filter.lower():
                 continue
diff --git a/schedulers/local.py b/schedulers/local.py
index eeaa020..4c61865 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -330,9 +330,6 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
                 continue
             name = m.group(1)
             ts = m.group(2)
-            stderr = path.replace(".stdout.log", ".stderr.log")
-            stderr_size = os.path.getsize(stderr) if os.path.exists(stderr) else 0
-            # If stderr has content, might have failed; otherwise completed
             state = "Completed"
             jobs.append({
                 "job_id": name,
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index b297fec..6d2880b 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -1,8 +1,7 @@
 """Slurm sbatch scheduler for FlowSim profiling.
 
 ``render()`` / ``dry_run()`` produce a standalone bash script (zero deps).
-``submit()`` posts the script to a slurmrestd endpoint via stdlib
-``urllib.request`` — no extra packages needed.
+``submit()`` pipes the script to ``sbatch`` by default (CLI mode).
 
 Two submission modes are supported:
 

From 19973cb732bea0bec8de43e8cabd485c51981b1f Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 22:28:59 +0000
Subject: [PATCH 30/56] remove Slurm REST dead code, rewrite README in English

- Delete all slurmrestd REST methods (submit/cancel/status/logs/list)
- Remove ssl, urllib, json imports from slurm.py
- Remove REST constructor params (rest_url, jwt_token, api_version, verify_ssl, submit_via)
- Remove resolve_jwt_token() from config.py
- Remove REST CLI args from submit_profile.py, status_profile.py, cli.py
- Strip REST fields from slurm.yaml template
- Remove JWT-related tests, update init/submit tests
- Rewrite schedulers/README.md entirely in English, no REST references
- 56 unit tests pass, net -524 lines
---
 schedulers/README.md             | 331 ++++++++++++++-----------------
 schedulers/config.py             |  29 ---
 schedulers/slurm.py              | 322 +-----------------------------
 schedulers/templates/slurm.yaml  |  11 +-
 scripts/cli.py                   |  24 +--
 scripts/status_profile.py        |  41 +---
 scripts/submit_profile.py        |  61 +-----
 tests/unit/test_scheduler_cli.py |  39 +---
 8 files changed, 167 insertions(+), 691 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index 7f99f4e..48e6b0b 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -1,78 +1,78 @@
 # FlowSim Schedulers
 
-FlowSim 支持三种调度器后端，用于提交 GPU profiling 任务：
+FlowSim supports three scheduler backends for submitting GPU profiling jobs:
 
-| 后端 | 适用场景 | 运行位置 | 依赖 |
-|------|----------|----------|------|
-| **local** | 单机开发/测试 | 宿主机 Docker 容器 | Docker + NVIDIA GPU |
-| **k8s** | Kubernetes 集群 | K8s Job Pod | `kubernetes` Python 包 |
-| **slurm** | HPC 集群 | Slurm 计算节点 | Slurm CLI 或 slurmrestd |
+| Backend | Use Case | Runs On | Dependencies |
+|---------|----------|---------|--------------|
+| **local** | Single-machine dev/testing | Host Docker container | Docker + NVIDIA GPU |
+| **k8s** | Kubernetes cluster | K8s Job Pod | `kubernetes` Python package |
+| **slurm** | HPC cluster | Slurm compute node | Slurm CLI (`sbatch`/`squeue`/`scancel`) |
 
-## 快速上手
+## Quick Start
 
 ```bash
-# 安装（从 FlowSim 项目根目录）
+# Install (from FlowSim project root)
 cd FlowSim
-pip install -e .  # 或确保 PYTHONPATH 包含项目根目录
+pip install -e .  # or ensure PYTHONPATH includes the project root
 
-# 查看帮助
+# Show help
 flowsim --help
 flowsim submit --help
 ```
 
-## 通用工作流
+## Common Workflow
 
-所有调度器共享相同的 CLI 接口：
+All schedulers share the same CLI interface:
 
 ```bash
-# 1. 提交任务
+# 1. Submit a job
 flowsim submit --scheduler <local|k8s|slurm> --collect <perf|shapes|all> \
-    --model-path <model> [选项...]
+    --model-path <model> [options...]
 
-# 2. 查看任务列表
+# 2. List jobs
 flowsim list --scheduler <local|k8s|slurm>
 
-# 3. 查看任务状态
+# 3. Check job status
 flowsim status --scheduler <local|k8s|slurm> --job <job_id>
 
-# 4. 查看日志
+# 4. View logs
 flowsim logs --scheduler <local|k8s|slurm> --job <job_id>
 
-# 5. 取消任务
+# 5. Cancel a job
 flowsim cancel --scheduler <local|k8s|slurm> --job <job_id>
 
-# 6. Dry-run（仅打印脚本/manifest，不提交）
+# 6. Dry-run (print script/manifest without submitting)
 flowsim submit --scheduler <local|k8s|slurm> ... --dry-run
 ```
 
-### 通用参数
+### Common Parameters
 
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--collect` | 收集模式：`perf`(性能) / `shapes`(形状) / `all`(两者) | 必填 |
-| `--model-path` | HuggingFace 模型路径 | 必填 |
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `--collect` | Collection mode: `perf` / `shapes` / `all` | required |
+| `--model-path` | HuggingFace model path | required |
 | `--tp` | Tensor parallelism | `1` |
 | `--dp` | Data parallelism | `1` |
 | `--bs` | Batch size | `1` |
-| `--input-len` | 输入序列长度 | `2048` |
-| `--existing-ctx` | 已有 KV cache 长度 | `0` |
-| `--decode-tokens` | Decode 生成 token 数 | `32` |
-| `--warmup-n` | Warmup 迭代数 | `5` |
-| `--image` | Docker 镜像 | `flowsim-image:latest` |
-| `--gpus` | GPU 数量 | `1` |
-| `--output-dir` | 输出目录（自动生成如不指定） | `stage_traces/{scheduler}/{timestamp}/` |
-| `--dry-run` | 仅打印脚本，不提交 | `false` |
+| `--input-len` | Input sequence length | `2048` |
+| `--existing-ctx` | Existing KV cache length | `0` |
+| `--decode-tokens` | Decode token count | `32` |
+| `--warmup-n` | Warmup iterations | `5` |
+| `--image` | Docker image | `flowsim-image:latest` |
+| `--gpus` | GPU count | `1` |
+| `--output-dir` | Output directory (auto-generated if omitted) | `stage_traces/{scheduler}/{timestamp}/` |
+| `--dry-run` | Print script only, do not submit | `false` |
 
 ---
 
-## 1. Local 调度器
+## 1. Local Scheduler
 
-直接在宿主机上通过 `docker run` 启动容器运行 profiling。最简单的方式，适合单机开发和测试。
+Runs profiling directly on the host via `docker run`. The simplest option, suitable for single-machine development and testing.
 
-### 使用
+### Usage
 
 ```bash
-# 最简单的用法 — 使用 GPU 0 运行
+# Simplest usage — run on GPU 0
 flowsim submit --scheduler local \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
@@ -80,34 +80,34 @@ flowsim submit --scheduler local \
     --gpus 1 --local-gpus 0 \
     --extra-server-opts "--load-format dummy"
 
-# 多 GPU
+# Multi-GPU
 flowsim submit --scheduler local \
     --collect perf \
     --model-path Qwen/Qwen3-8B \
     --tp 2 --gpus 2 --local-gpus 0,1
 ```
 
-### 专有参数
+### Parameters
 
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--local-gpus` | `CUDA_VISIBLE_DEVICES`（如 `0` 或 `0,1`） | 空（使用所有 GPU） |
-| `--local-workdir` | 主机工作目录 | FlowSim 项目根目录 |
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `--local-gpus` | `CUDA_VISIBLE_DEVICES` (e.g. `0` or `0,1`) | empty (all GPUs) |
+| `--local-workdir` | Host working directory | FlowSim project root |
 
-### 工作原理
+### How It Works
 
-1. `render()` 生成一条 `docker run --gpus` 命令
-2. `submit()` 在宿主机执行该容器，同步等待完成
-3. Traces 写入宿主机 `stage_traces/local/{YYYYMMDD_HHMMSS}/`
-4. `status()` / `logs()` / `list_jobs()` 扫描日志文件
+1. `render()` generates a `docker run --gpus` command
+2. `submit()` runs the container on the host, waits for completion
+3. Traces are written to `stage_traces/local/{YYYYMMDD_HHMMSS}/`
+4. `status()` / `logs()` / `list_jobs()` scan log files
 
 ---
 
-## 2. Kubernetes 调度器
+## 2. Kubernetes Scheduler
 
-将 profiling 任务作为 Kubernetes Job 提交到集群。支持 PVC 和 hostPath 两种存储方式。
+Submits profiling jobs as Kubernetes Jobs to a cluster. Supports both PVC and hostPath storage.
 
-### 首次配置
+### First-Time Setup
 
 ```bash
 flowsim init k8s \
@@ -118,19 +118,19 @@ flowsim init k8s \
     --force
 ```
 
-配置保存到 `~/.flowsim/k8s.yaml`，后续提交时自动读取。
+Config is saved to `~/.flowsim/k8s.yaml` and automatically loaded on subsequent submissions.
 
-### 使用
+### Usage
 
 ```bash
-# 提交到 K8s 集群
+# Submit to K8s cluster
 flowsim submit --scheduler k8s \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
     --tp 1 --bs 1 --input-len 2048 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 
-# 覆盖配置文件中的值
+# Override config file values
 flowsim submit --scheduler k8s \
     --collect perf \
     --model-path Qwen/Qwen3-8B \
@@ -138,192 +138,158 @@ flowsim submit --scheduler k8s \
     --k8s-pvc my-traces-pvc \
     --gpus 4 --tp 4
 
-# Dry-run 查看生成的 YAML
+# Dry-run to preview the generated YAML
 flowsim submit --scheduler k8s ... --dry-run
 ```
 
-### 专有参数
+### Parameters
 
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--k8s-namespace` | K8s 命名空间 | `default` |
-| `--k8s-kubeconfig` | kubeconfig 路径 | `~/.kube/config` |
-| `--k8s-context` | kubeconfig context | 当前 context |
-| `--k8s-pvc` | PVC 名称（持久存储） | 空 |
-| `--k8s-host-output-dir` | hostPath 挂载路径（PVC 为空时使用） | 空 |
-| `--k8s-node-selector` | 节点选择标签（可重复），格式 `KEY=VALUE` | 空 |
-| `--k8s-service-account` | ServiceAccount | 空 |
-| `--k8s-shm-size` | 共享内存大小 | `16Gi` |
-| `--k8s-runtime-class` | RuntimeClass（如 `nvidia`，用于 CDI 模式） | 空 |
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `--k8s-namespace` | K8s namespace | `default` |
+| `--k8s-kubeconfig` | kubeconfig path | `~/.kube/config` |
+| `--k8s-context` | kubeconfig context | current context |
+| `--k8s-pvc` | PVC name (persistent storage) | empty |
+| `--k8s-host-output-dir` | hostPath mount (used when PVC is empty) | empty |
+| `--k8s-node-selector` | Node selector labels (repeatable), format `KEY=VALUE` | empty |
+| `--k8s-service-account` | ServiceAccount | empty |
+| `--k8s-shm-size` | Shared memory size | `16Gi` |
+| `--k8s-runtime-class` | RuntimeClass (e.g. `nvidia` for CDI mode) | empty |
 
-### 工作原理
+### How It Works
 
-1. `render()` 生成 Kubernetes Job YAML/JSON manifest
-2. `submit()` 通过 `kubernetes` Python 客户端创建 Job
-3. Traces 通过 PVC 或 hostPath 持久化
-4. `status()` / `cancel()` / `list_jobs()` 通过 K8s API 操作
+1. `render()` generates a Kubernetes Job YAML/JSON manifest
+2. `submit()` creates the Job via the `kubernetes` Python client
+3. Traces are persisted via PVC or hostPath
+4. `status()` / `cancel()` / `list_jobs()` operate via the K8s API
 
-### Kind 本地测试集群
+### Kind Local Test Cluster
 
 ```bash
-# 启动 Kind 集群（GPU passthrough + CDI 模式）
+# Start a Kind cluster (GPU passthrough + CDI mode)
 bash dockerfiles/dev-setup.sh kind
 
-# 运行 K8s 集成测试
+# Run K8s integration tests
 python -m pytest tests/integration/test_scheduler_local.py::TestK8sScheduler -v -x
 
-# 清理
+# Teardown
 bash dockerfiles/dev-teardown.sh kind
 ```
 
 ---
 
-## 3. Slurm 调度器
+## 3. Slurm Scheduler
 
-生成 sbatch 脚本并提交到 Slurm 集群。支持两种提交模式：
+Generates sbatch scripts and submits them to a Slurm cluster via `sbatch`/`squeue`/`scancel`.
 
-- **CLI 模式**（推荐，默认）：通过 `sbatch`/`squeue`/`scancel` 命令
-- **REST 模式**（已弃用）：通过 slurmrestd REST API + JWT 认证
-
-### 首次配置
+### First-Time Setup
 
 ```bash
-# CLI 模式（推荐，无需 slurmrestd）
 flowsim init slurm \
     --partition gpu \
     --account my-project \
     --container-runtime none \
     --force
-
-# REST 模式（已弃用，需要 slurmrestd）
-flowsim init slurm \
-    --rest-url https://slurm.example.com:6820 \
-    --partition gpu \
-    --account my-project \
-    --jwt-token-cmd "scontrol token lifespan=3600" \
-    --force
 ```
 
-### 使用
+### Usage
 
 ```bash
-# CLI 模式 — 直接调用 sbatch（最常用）
+# Submit via sbatch
 flowsim submit --scheduler slurm \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
     --tp 1 --bs 1 --input-len 2048 --gpus 1 \
     --slurm-partition gpu \
-    --slurm-submit-via cli \
     --extra-server-opts "--load-format dummy"
 
-# CLI 模式 + 远程前缀（通过 docker exec 或 ssh）
+# CLI prefix (e.g. via docker exec or ssh)
 flowsim submit --scheduler slurm \
-    --slurm-submit-via cli \
     --slurm-cli-prefix "docker exec -i slurmctld" \
     --slurm-partition normal \
     --collect perf --model-path Qwen/Qwen3-8B --gpus 1
 
-# REST 模式（已弃用）
-flowsim submit --scheduler slurm \
-    --slurm-submit-via rest \
-    --slurm-rest-url http://localhost:6820 \
-    --slurm-jwt-token "$(scontrol token lifespan=3600 | cut -d= -f2)" \
-    --collect perf --model-path Qwen/Qwen3-8B --gpus 1
-
-# Dry-run 查看生成的 sbatch 脚本
+# Dry-run to preview the generated sbatch script
 flowsim submit --scheduler slurm ... --dry-run
 
-# 查看状态（CLI 模式）
+# Check status
 flowsim status --scheduler slurm --job 12345 \
-    --slurm-submit-via cli \
     --slurm-cli-prefix "docker exec -i slurmctld"
 
-# 取消任务
-flowsim cancel --scheduler slurm --job 12345 \
-    --slurm-submit-via cli
+# Cancel a job
+flowsim cancel --scheduler slurm --job 12345
 ```
 
-### 专有参数
-
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| `--slurm-submit-via` | 提交模式：`cli`（sbatch）或 `rest`（slurmrestd，已弃用） | `cli` |
-| `--slurm-cli-prefix` | CLI 命令前缀（如 `"docker exec -i slurmctld"`） | 空 |
-| `--slurm-partition` | Slurm 分区 | 空 |
-| `--slurm-time` | 任务时间限制 | `02:00:00` |
-| `--slurm-account` | 计费账户 | 空 |
-| `--slurm-constraint` | 节点约束 | 空 |
-| `--slurm-container-runtime` | 容器运行时：`docker` / `enroot` / `none` | `none` |
-| `--slurm-container-mounts` | 容器挂载 | 空 |
-| `--slurm-module` | `module load` 命令（可重复） | 空 |
-| `--slurm-extra-sbatch` | 额外 `#SBATCH` 指令（可重复） | 空 |
-| `--slurm-rest-url` | slurmrestd URL（REST 模式需要） | 空 |
-| `--slurm-jwt-token` | JWT token（REST 模式需要） | 空 |
-| `--slurm-api-version` | slurmrestd API 版本 | `v0.0.40` |
-| `--slurm-no-verify-ssl` | 跳过 TLS 验证 | `false` |
-
-### container_runtime 说明
-
-| 值 | 说明 |
-|----|------|
-| `none` | 直接在计算节点上运行（节点已有 Python/sglang 环境）|
-| `docker` | 在分配的节点上 `docker run` |
-| `enroot` | 使用 `srun --container-image` (NVIDIA enroot) |
-
-### 工作原理
-
-**CLI 模式：**
-1. `render()` 生成完整的 sbatch 脚本（含 `#SBATCH` 指令 + profiling 命令）
-2. `submit()` 通过 `sbatch --parsable` 提交（脚本通过 stdin 传入）
-3. `status()` 通过 `scontrol show job` 查询（无需 slurmdbd）
-4. `cancel()` 通过 `scancel` 取消
-5. `list_jobs()` 通过 `squeue` 列出
-
-如果 Slurm 命令不在本地 PATH 中，可通过 `--slurm-cli-prefix` 指定前缀，例如：
-- `"docker exec -i slurmctld"` — 通过 Docker 容器
-- `"ssh login-node"` — 通过 SSH
-
-**REST 模式（已弃用）：**
-1. 同上生成 sbatch 脚本
-2. `submit()` 通过 HTTP POST 到 slurmrestd 的 `/slurm/{version}/job/submit`
-3. 所有操作通过 slurmrestd REST API + JWT 认证
-
-### Docker Compose 本地测试集群
+### Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `--slurm-partition` | Slurm partition | empty |
+| `--slurm-time` | Job time limit | `02:00:00` |
+| `--slurm-account` | Billing account | empty |
+| `--slurm-constraint` | Node constraint | empty |
+| `--slurm-cli-prefix` | Shell prefix for CLI commands (e.g. `"docker exec -i slurmctld"`) | empty |
+| `--slurm-container-runtime` | Container runtime: `docker` / `enroot` / `none` | `none` |
+| `--slurm-container-mounts` | Container mounts | empty |
+| `--slurm-module` | `module load` commands (repeatable) | empty |
+| `--slurm-extra-sbatch` | Extra `#SBATCH` directives (repeatable) | empty |
+
+### container_runtime Options
+
+| Value | Description |
+|-------|-------------|
+| `none` | Run directly on compute node (Python/sglang must be installed) |
+| `docker` | Run via `docker run` on the allocated node |
+| `enroot` | Run via `srun --container-image` (NVIDIA enroot) |
+
+### How It Works
+
+1. `render()` generates a complete sbatch script (`#SBATCH` directives + profiling command)
+2. `submit()` pipes the script to `sbatch --parsable`
+3. `status()` queries via `scontrol show job`
+4. `cancel()` runs `scancel`
+5. `list_jobs()` runs `squeue`
+
+If Slurm commands are not on the local PATH, use `--slurm-cli-prefix` to specify a prefix, e.g.:
+- `"docker exec -i slurmctld"` — via Docker container
+- `"ssh login-node"` — via SSH
+
+### Docker Compose Local Test Cluster
 
 ```bash
-# 启动 Slurm 集群（slurmctld + 1 计算节点 + 1 GPU）
+# Start Slurm cluster (slurmctld + 1 compute node + 1 GPU)
 cd dockerfiles/
 docker compose -f slurm-compose.yaml up -d
 
-# 检查集群状态
+# Check cluster status
 docker exec slurmctld sinfo
 
-# 运行 Slurm 集成测试
+# Run Slurm integration tests
 python -m pytest tests/integration/test_scheduler_local.py::TestSlurmScheduler -v -x
 
-# 清理
+# Teardown
 docker compose -f slurm-compose.yaml down -v
 ```
 
 ---
 
-## 配置文件
+## Configuration
 
-配置保存在 `~/.flowsim/` 目录下，通过 `flowsim init` 生成：
+Config files are stored in `~/.flowsim/` and generated via `flowsim init`:
 
 ```
 ~/.flowsim/
-├── k8s.yaml      # K8s 调度器配置
-└── slurm.yaml    # Slurm 调度器配置
+├── k8s.yaml      # K8s scheduler config
+└── slurm.yaml    # Slurm scheduler config
 ```
 
-参数优先级（从高到低）：
-1. CLI flag（`--slurm-partition gpu`）
-2. 环境变量（`FLOWSIM_SLURM_PARTITION=gpu`）
-3. 配置文件（`~/.flowsim/slurm.yaml`）
-4. 内置默认值
+Parameter priority (highest to lowest):
+1. CLI flag (`--slurm-partition gpu`)
+2. Environment variable (`FLOWSIM_SLURM_PARTITION=gpu`)
+3. Config file (`~/.flowsim/slurm.yaml`)
+4. Built-in default
 
-### 示例 k8s.yaml
+### Example k8s.yaml
 
 ```yaml
 kubeconfig: /home/user/.kube/config
@@ -333,33 +299,32 @@ runtime_class_name: nvidia
 shm_size: 16Gi
 ```
 
-### 示例 slurm.yaml
+### Example slurm.yaml
 
 ```yaml
 partition: gpu
 account: my-project
 time: "02:00:00"
 container_runtime: none
-submit_via: cli
 cli_prefix: ""
 ```
 
 ---
 
-## 输出目录结构
+## Output Directory Structure
 
-所有调度器产生统一的 trace 输出结构：
+All schedulers produce a unified trace output structure:
 
 ```
 stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
 ├── bs1_input2048_ctx0/
-│   ├── *.trace.json.gz           # 原始 trace
-│   ├── parsed/*.csv              # 解析后的 CSV
-│   ├── merged/*_merged.trace.csv # 合并的 trace CSV
-│   ├── shape_traces/             # Shape trace（collect=shapes/all）
-│   ├── shape_parsed/*.csv        # Shape 解析 CSV
-│   ├── analysis_extend.json      # Extend 阶段分析
-│   └── analysis_decode.json      # Decode 阶段分析
+│   ├── *.trace.json.gz           # Raw traces
+│   ├── parsed/*.csv              # Parsed CSVs
+│   ├── merged/*_merged.trace.csv # Merged trace CSV
+│   ├── shape_traces/             # Shape traces (collect=shapes/all)
+│   ├── shape_parsed/*.csv        # Shape parsed CSVs
+│   ├── analysis_extend.json      # Extend stage analysis
+│   └── analysis_decode.json      # Decode stage analysis
 ├── logs/
 │   ├── server_*.stdout.log
 │   └── server_*.stderr.log
@@ -368,9 +333,9 @@ stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
 
 ---
 
-## PD Disaggregation（实验性）
+## PD Disaggregation (Experimental)
 
-支持 Prefill-Decode 分离部署：
+Supports Prefill-Decode disaggregated deployment:
 
 ```bash
 flowsim submit --scheduler k8s \
@@ -381,4 +346,4 @@ flowsim submit --scheduler k8s \
     --disagg-transfer-backend mooncake
 ```
 
-这会生成两个 Job：一个 prefill 实例，一个 decode 实例。
+This generates two Jobs: one prefill instance and one decode instance.
diff --git a/schedulers/config.py b/schedulers/config.py
index e228cb3..723dfc2 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -15,17 +15,11 @@
 
 Template files are in ``schedulers/templates/k8s.yaml`` and
 ``schedulers/templates/slurm.yaml``.  Copy to ``~/.flowsim/`` and edit.
-
-For Slurm, use ``jwt_token_cmd`` instead of ``jwt_token`` to avoid
-storing secrets in plaintext.  The command is executed at submit time
-and its stdout is used as the token.
 """
 
 from __future__ import annotations
 
 import os
-import shlex
-import subprocess
 from pathlib import Path
 
 # Optional: try PyYAML, fall back to JSON
@@ -94,29 +88,6 @@ def load_slurm_config() -> dict:
         return {}
 
 
-def resolve_jwt_token(slurm_cfg: dict) -> str:
-    """Get the JWT token from config, executing jwt_token_cmd if needed."""
-    token = slurm_cfg.get("jwt_token", "")
-    if token:
-        return str(token)
-
-    cmd = slurm_cfg.get("jwt_token_cmd", "")
-    if cmd:
-        try:
-            result = subprocess.run(
-                shlex.split(cmd),
-                capture_output=True,
-                text=True,
-                timeout=30,
-            )
-            if result.returncode == 0:
-                return result.stdout.strip()
-        except (FileNotFoundError, OSError):
-            pass
-
-    return ""
-
-
 def cfg_get(cfg: dict, key: str, fallback: str = "") -> str:
     """Get a value from a flat config dict, or fallback."""
     val = cfg.get(key)
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 6d2880b..67e954b 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -1,32 +1,20 @@
 """Slurm sbatch scheduler for FlowSim profiling.
 
 ``render()`` / ``dry_run()`` produce a standalone bash script (zero deps).
-``submit()`` pipes the script to ``sbatch`` by default (CLI mode).
+``submit()`` pipes the script to ``sbatch`` via subprocess (CLI mode).
 
-Two submission modes are supported:
-
-* **cli** (default) — pipe the script to ``sbatch`` via subprocess.
-  Requires ``sbatch``/``squeue``/``scancel`` on PATH (or reachable
-  via ``cli_prefix``, e.g. ``"docker exec slurmctld"``).
-* **rest** (deprecated) — POST the script to a slurmrestd endpoint.
-  Requires ``rest_url`` and ``jwt_token``.
+Requires ``sbatch``/``squeue``/``scancel`` on PATH (or reachable
+via ``cli_prefix``, e.g. ``"docker exec slurmctld"``).
 """
 
 from __future__ import annotations
 
-import json
 import shlex
-import ssl
 import subprocess
-import urllib.error
-import urllib.request
 
 from schedulers.base import BaseScheduler, JobResult, ProfileJobSpec
 
 
-_DEFAULT_API_VERSION = "v0.0.40"
-
-
 class SlurmScheduler(BaseScheduler):
     """Generate and optionally submit an sbatch script for profiling.
 
@@ -36,17 +24,6 @@ class SlurmScheduler(BaseScheduler):
         Slurm partition to submit to.
     time_limit : str
         Wall-clock time limit (e.g., ``"01:00:00"``).
-    rest_url : str
-        Base URL of the slurmrestd daemon
-        (e.g., ``"https://slurm.example.com:6820"``).
-        Required only for ``submit()``.
-    jwt_token : str
-        JWT/auth token for slurmrestd.  Required only for ``submit()``.
-    api_version : str
-        slurmrestd OpenAPI version (default: ``"v0.0.40"``).
-        Adjust to match your cluster (``v0.0.39``, ``v0.0.41``, …).
-    verify_ssl : bool
-        Whether to verify the slurmrestd TLS certificate (default True).
     account : str, optional
         ``--account`` for which allocation to charge.
     constraint : str, optional
@@ -64,12 +41,8 @@ class SlurmScheduler(BaseScheduler):
         (relevant for ``"none"`` runtime).
     extra_sbatch : list[str]
         Additional ``#SBATCH`` lines, each *without* the ``#SBATCH`` prefix.
-    submit_via : str
-        ``"cli"``  (default) — use ``sbatch`` / ``squeue`` / ``scancel`` subprocess.
-        ``"rest"`` (deprecated) — use slurmrestd REST API.
     cli_prefix : str
         Shell prefix for CLI commands (e.g. ``"docker exec -i slurmctld"``).
-        Only used when ``submit_via="cli"``.
     """
 
     def __init__(
@@ -77,44 +50,24 @@ def __init__(
         *,
         partition: str = "gpu",
         time_limit: str = "02:00:00",
-        rest_url: str = "",
-        jwt_token: str = "",
-        api_version: str = _DEFAULT_API_VERSION,
-        verify_ssl: bool = True,
         account: str = "",
         constraint: str = "",
         container_runtime: str = "none",
         container_mounts: str = "",
         modules: list[str] | None = None,
         extra_sbatch: list[str] | None = None,
-        submit_via: str = "cli",
         cli_prefix: str = "",
     ) -> None:
         self.partition = partition
         self.time_limit = time_limit
-        self.rest_url = rest_url.rstrip("/")
-        self.jwt_token = jwt_token
-        self.api_version = api_version
-        self.verify_ssl = verify_ssl
         self.account = account
         self.constraint = constraint
         self.container_runtime = container_runtime
         self.container_mounts = container_mounts
         self.modules = modules or []
         self.extra_sbatch = extra_sbatch or []
-        self.submit_via = submit_via
         self.cli_prefix = cli_prefix
 
-        if self.submit_via != "cli":
-            import warnings
-            warnings.warn(
-                "Slurm REST mode (slurmrestd) is deprecated and will be "
-                "removed in a future release. Use submit_via='cli' "
-                "(sbatch) instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
     def render(self, spec: ProfileJobSpec) -> str:
         job_name = spec.default_job_name()
         cmd = spec.build_shell_command()
@@ -184,10 +137,8 @@ def render(self, spec: ProfileJobSpec) -> str:
         return "\n".join(lines)
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
-        """Submit the job via REST API or CLI, depending on ``submit_via``."""
-        if self.submit_via == "cli":
-            return self._submit_cli(spec)
-        return self._submit_rest(spec)
+        """Submit the job via ``sbatch``."""
+        return self._submit_cli(spec)
 
     # ------------------------------------------------------------------
     # CLI helpers
@@ -234,146 +185,21 @@ def _submit_cli(self, spec: ProfileJobSpec) -> JobResult:
             message=f"Submitted batch job {job_id}",
         )
 
-    # ------------------------------------------------------------------
-    # REST submit
-    # ------------------------------------------------------------------
-
-    def _submit_rest(self, spec: ProfileJobSpec) -> JobResult:
-        """Submit the job via slurmrestd REST API.
-
-        Requires ``rest_url`` and ``jwt_token`` to be set.
-        Uses only ``urllib.request`` from the standard library.
-        """
-        if not self.rest_url:
-            raise RuntimeError(
-                "--slurm-rest-url is required for --submit. "
-                "Point it at your slurmrestd endpoint "
-                "(e.g. https://slurm.example.com:6820)."
-            )
-        if not self.jwt_token:
-            raise RuntimeError(
-                "--slurm-jwt-token is required for --submit. "
-                "Generate one via: scontrol token lifespan=3600"
-            )
-
-        script = self.render(spec)
-        job_name = spec.default_job_name()
-
-        url = (
-            f"{self.rest_url}/slurm/{self.api_version}/job/submit"
-        )
-
-        # slurmrestd job submission payload
-        payload = {
-            "script": script,
-            "job": {
-                "name": job_name,
-                "partition": self.partition,
-                "time_limit": {"number": self._parse_time_minutes(), "set": True},
-                "tasks": 1,
-                "current_working_directory": "/flowsim",
-                "environment": ["PATH=/usr/local/bin:/usr/bin:/bin"],
-            },
-        }
-        if self.account:
-            payload["job"]["account"] = self.account
-
-        data = json.dumps(payload).encode()
-        headers = {
-            "Content-Type": "application/json",
-            "X-SLURM-USER-TOKEN": self.jwt_token,
-        }
-        req = urllib.request.Request(url, data=data, headers=headers, method="POST")
-
-        ctx: ssl.SSLContext | None = None
-        if not self.verify_ssl:
-            ctx = ssl.create_default_context()
-            ctx.check_hostname = False
-            ctx.verify_mode = ssl.CERT_NONE
-
-        try:
-            with urllib.request.urlopen(req, context=ctx) as resp:
-                body = json.loads(resp.read())
-        except urllib.error.HTTPError as exc:
-            detail = exc.read().decode(errors="replace")
-            raise RuntimeError(
-                f"slurmrestd returned HTTP {exc.code}:\n{detail}"
-            ) from exc
-        except urllib.error.URLError as exc:
-            raise RuntimeError(
-                f"Cannot reach slurmrestd at {self.rest_url}: {exc.reason}"
-            ) from exc
-
-        # Response contains job_id on success, errors array on failure
-        errors = body.get("errors") or []
-        if errors:
-            msgs = "; ".join(e.get("error", str(e)) for e in errors)
-            raise RuntimeError(f"slurmrestd job submit failed: {msgs}")
-
-        job_id = str(body.get("job_id", "unknown"))
-        return JobResult(
-            job_id=job_id,
-            scheduler="slurm",
-            state="Submitted",
-            output_dir=spec.output_dir,
-            message=f"Submitted batch job {job_id}",
-        )
-
-    def _rest_request(self, path: str, *, method: str = "GET") -> dict:
-        """Send a request to slurmrestd and return parsed JSON."""
-        if not self.rest_url:
-            raise RuntimeError("--slurm-rest-url is required")
-        if not self.jwt_token:
-            raise RuntimeError("--slurm-jwt-token is required")
-
-        url = f"{self.rest_url}{path}"
-        headers = {
-            "X-SLURM-USER-TOKEN": self.jwt_token,
-        }
-        req = urllib.request.Request(url, headers=headers, method=method)
-
-        ctx: ssl.SSLContext | None = None
-        if not self.verify_ssl:
-            ctx = ssl.create_default_context()
-            ctx.check_hostname = False
-            ctx.verify_mode = ssl.CERT_NONE
-
-        try:
-            with urllib.request.urlopen(req, context=ctx) as resp:
-                return json.loads(resp.read())
-        except urllib.error.HTTPError as exc:
-            detail = exc.read().decode(errors="replace")
-            raise RuntimeError(f"slurmrestd returned HTTP {exc.code}:\n{detail}") from exc
-        except urllib.error.URLError as exc:
-            raise RuntimeError(f"Cannot reach slurmrestd at {self.rest_url}: {exc.reason}") from exc
-
-    def _rest_get(self, path: str) -> dict:
-        """GET a slurmrestd endpoint and return parsed JSON."""
-        return self._rest_request(path, method="GET")
-
     def cancel(self, job_id: str) -> str:
         """Cancel a Slurm job."""
-        if self.submit_via == "cli":
-            return self._cancel_cli(job_id)
-        return self._cancel_rest(job_id)
+        return self._cancel_cli(job_id)
 
     def status(self, job_id: str) -> dict:
         """Query Slurm job status."""
-        if self.submit_via == "cli":
-            return self._status_cli(job_id)
-        return self._status_rest(job_id)
+        return self._status_cli(job_id)
 
     def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         """Show Slurm job log information."""
-        if self.submit_via == "cli":
-            return self._logs_cli(job_id, tail=tail, follow=follow)
-        return self._logs_rest(job_id, tail=tail, follow=follow)
+        return self._logs_cli(job_id, tail=tail, follow=follow)
 
     def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         """List Slurm jobs."""
-        if self.submit_via == "cli":
-            return self._list_jobs_cli(status_filter=status_filter)
-        return self._list_jobs_rest(status_filter=status_filter)
+        return self._list_jobs_cli(status_filter=status_filter)
 
     # ------------------------------------------------------------------
     # CLI implementations
@@ -454,133 +280,3 @@ def _list_jobs_cli(self, *, status_filter: str = "") -> list[dict]:
                 "nodes": parts[4] if len(parts) > 4 else "",
             })
         return result
-
-    # ------------------------------------------------------------------
-    # REST implementations
-    # ------------------------------------------------------------------
-
-    def _cancel_rest(self, job_id: str) -> str:
-        """Cancel a Slurm job via slurmrestd DELETE."""
-        body = self._rest_request(
-            f"/slurm/{self.api_version}/job/{job_id}",
-            method="DELETE",
-        )
-        errors = body.get("errors") or []
-        if errors:
-            msgs = "; ".join(e.get("error", str(e)) for e in errors)
-            raise RuntimeError(f"slurmrestd cancel failed: {msgs}")
-        return f"Cancelled Slurm job {job_id}"
-
-    def _status_rest(self, job_id: str) -> dict:
-        """Query Slurm job status via slurmrestd."""
-        body = self._rest_get(f"/slurm/{self.api_version}/job/{job_id}")
-
-        errors = body.get("errors") or []
-        if errors:
-            msgs = "; ".join(e.get("error", str(e)) for e in errors)
-            raise RuntimeError(f"slurmrestd error: {msgs}")
-
-        jobs = body.get("jobs", [])
-        if not jobs:
-            return {"state": "Unknown", "message": f"No job found with ID {job_id}", "output_hint": ""}
-
-        job = jobs[0]
-        state = job.get("job_state", ["UNKNOWN"])
-        if isinstance(state, list):
-            state = state[0] if state else "UNKNOWN"
-        name = job.get("name", "")
-        node_list = job.get("nodes", "")
-        output_file = job.get("standard_output", "")
-        work_dir = job.get("current_working_directory", "")
-
-        msg_parts = [
-            f"Job ID: {job_id}  Name: {name}  State: {state}",
-            f"Nodes: {node_list}" if node_list else "Nodes: (not yet assigned)",
-        ]
-        if output_file:
-            msg_parts.append(f"Output log: {output_file}")
-        if work_dir:
-            msg_parts.append(f"Working dir: {work_dir}")
-
-        return {
-            "state": state,
-            "message": "\n".join(msg_parts),
-            "output_hint": output_file,
-        }
-
-    def _logs_rest(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
-        """Show where Slurm job logs are and how to access them."""
-        info = self._status_rest(job_id)
-        output_file = info.get("output_hint", "")
-        state = info.get("state", "UNKNOWN")
-
-        parts = [info["message"], ""]
-
-        if output_file:
-            parts.append(f"Log file (on cluster shared filesystem):")
-            parts.append(f"  {output_file}")
-            parts.append("")
-            if follow:
-                parts.append("Follow logs:")
-                parts.append(f"  tail -f {output_file}")
-            else:
-                parts.append("View on login node:")
-                parts.append(f"  less {output_file}")
-                parts.append(f"  tail -{tail} {output_file}")
-                parts.append("")
-                parts.append("Follow logs:")
-                parts.append(f"  tail -f {output_file}")
-            parts.append("")
-            parts.append("Copy to local machine:")
-            parts.append(f"  scp <login-node>:{output_file} .")
-        else:
-            parts.append("No output file path found in job metadata.")
-
-        # Trace files location
-        parts.append("")
-        parts.append("Trace files (on cluster shared filesystem):")
-        parts.append("  ~/flowsim_traces/")
-        parts.append("  ls ~/flowsim_traces/")
-
-        return "\n".join(parts)
-
-    def _list_jobs_rest(self, *, status_filter: str = "") -> list[dict]:
-        """List Slurm jobs via slurmrestd /jobs endpoint."""
-        body = self._rest_get(f"/slurm/{self.api_version}/jobs")
-        errors = body.get("errors") or []
-        if errors:
-            msgs = "; ".join(e.get("error", str(e)) for e in errors)
-            raise RuntimeError(f"slurmrestd error: {msgs}")
-
-        result: list[dict] = []
-        for job in body.get("jobs", []):
-            name = job.get("name", "")
-            # Only show flowsim jobs (name starts with "flowsim-")
-            if not name.startswith("flowsim-"):
-                continue
-
-            state = job.get("job_state", ["UNKNOWN"])
-            if isinstance(state, list):
-                state = state[0] if state else "UNKNOWN"
-
-            if status_filter and state.upper() != status_filter.upper():
-                continue
-
-            result.append({
-                "job_id": str(job.get("job_id", "")),
-                "name": name,
-                "state": state,
-                "partition": job.get("partition", ""),
-                "nodes": job.get("nodes", ""),
-            })
-        return result
-
-    def _parse_time_minutes(self) -> int:
-        """Convert HH:MM:SS time_limit to total minutes."""
-        parts = self.time_limit.split(":")
-        if len(parts) == 3:
-            h, m, s = int(parts[0]), int(parts[1]), int(parts[2])
-            return h * 60 + m + (1 if s > 0 else 0)
-        if len(parts) == 2:
-            return int(parts[0]) * 60 + int(parts[1])
-        return int(parts[0])
diff --git a/schedulers/templates/slurm.yaml b/schedulers/templates/slurm.yaml
index 3ff2d0a..b4d77a1 100644
--- a/schedulers/templates/slurm.yaml
+++ b/schedulers/templates/slurm.yaml
@@ -8,21 +8,12 @@
 #
 # CLI flags and env vars can override individual values.
 
-# REQUIRED — submission mode
-submit_via: "cli"           # "cli" (sbatch, default) or "rest" (deprecated)
-cli_prefix: ""              # e.g. "docker exec -i slurmctld" (optional)
-
 # REQUIRED — cluster settings
 partition: ""               # e.g. gpu-h100
 account: ""                 # e.g. my-project
 
-# REST mode (deprecated) — only needed if submit_via: rest
-rest_url: ""                # e.g. https://slurm.corp.com:6820
-# jwt_token: ""             # not recommended — stored in plaintext
-jwt_token_cmd: ""           # e.g. "scontrol token lifespan=3600"
-
 # Optional
-api_version: "v0.0.40"
+cli_prefix: ""              # e.g. "docker exec -i slurmctld"
 time: "02:00:00"
 constraint: ""
 container_runtime: "none"   # docker | enroot | none
diff --git a/scripts/cli.py b/scripts/cli.py
index fc4d0cb..00409fb 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -3,7 +3,7 @@
 Usage::
 
     flowsim init k8s --kubeconfig ~/.kube/config --namespace ml-team ...
-    flowsim init slurm --rest-url https://slurm:6820 --partition gpu ...
+    flowsim init slurm --partition gpu --account proj ...
     flowsim submit --scheduler k8s --collect perf --model-path ...
     flowsim submit ... --dry-run   # debug: preview manifest
 """
@@ -46,19 +46,8 @@ def _init_slurm_parser(sub: argparse._SubParsersAction) -> None:
                    help="Slurm partition (REQUIRED)")
     p.add_argument("--account", default="",
                    help="Slurm account")
-    p.add_argument("--submit-via", default="cli",
-                   choices=["cli", "rest"],
-                   help="Submission mode (default: cli)")
     p.add_argument("--cli-prefix", default="",
                    help='CLI mode prefix, e.g. "docker exec -i slurmctld"')
-    p.add_argument("--rest-url", default="",
-                   help="slurmrestd endpoint URL (REST mode only, deprecated)")
-    p.add_argument("--jwt-token-cmd", default="",
-                   help='Command to get JWT token, e.g. "scontrol token lifespan=3600"')
-    p.add_argument("--jwt-token", default="",
-                   help="Static JWT token (not recommended)")
-    p.add_argument("--api-version", default="v0.0.40",
-                   help="slurmrestd API version (default: v0.0.40)")
     p.add_argument("--time", default="02:00:00",
                    help="Job time limit (default: 02:00:00)")
     p.add_argument("--constraint", default="",
@@ -82,8 +71,7 @@ def _cmd_init(argv: list[str]) -> int:
             "Configure a scheduler and save to ~/.flowsim/.\n\n"
             "Examples:\n"
             "  flowsim init k8s --kubeconfig ~/.kube/config --namespace ml-team\n"
-            "  flowsim init slurm --rest-url https://slurm:6820 "
-            "--partition gpu --account proj"
+            "  flowsim init slurm --partition gpu --account proj"
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
@@ -112,18 +100,10 @@ def _cmd_init(argv: list[str]) -> int:
         dst = _CONFIG_DIR / "k8s.yaml"
 
     elif args.scheduler == "slurm":
-        if args.submit_via == "rest" and not args.jwt_token_cmd and not args.jwt_token:
-            print("Error: REST mode requires --jwt-token-cmd or --jwt-token", file=sys.stderr)
-            return 1
         cfg = {
-            "submit_via": args.submit_via,
             "cli_prefix": args.cli_prefix,
-            "rest_url": args.rest_url,
-            "jwt_token_cmd": args.jwt_token_cmd,
-            "jwt_token": args.jwt_token,
             "partition": args.partition,
             "account": args.account,
-            "api_version": args.api_version,
             "time": args.time,
             "constraint": args.constraint,
             "container_runtime": args.container_runtime,
diff --git a/scripts/status_profile.py b/scripts/status_profile.py
index 085d79e..5d10f84 100644
--- a/scripts/status_profile.py
+++ b/scripts/status_profile.py
@@ -31,7 +31,7 @@
 import argparse
 import sys
 
-from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default, resolve_jwt_token
+from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
@@ -80,42 +80,12 @@ def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) ->
         )
 
     elif scheduler == "slurm":
-        p.add_argument(
-            "--slurm-rest-url",
-            default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
-        )
-        p.add_argument(
-            "--slurm-jwt-token",
-            default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
-        )
-        p.add_argument(
-            "--slurm-api-version",
-            default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
-        )
-        p.add_argument(
-            "--slurm-no-verify-ssl",
-            action="store_true",
-        )
-        p.add_argument(
-            "--slurm-submit-via",
-            choices=["rest", "cli"],
-            default=cfg_get(slurm_cfg, "submit_via", "cli"),
-        )
         p.add_argument(
             "--slurm-cli-prefix",
             default=cfg_get(slurm_cfg, "cli_prefix", ""),
         )
 
 
-def _resolve_slurm_jwt(args: argparse.Namespace) -> None:
-    """Resolve Slurm JWT from config if not provided."""
-    if args.scheduler == "slurm" and not args.slurm_jwt_token:
-        slurm_cfg = load_slurm_config()
-        token = resolve_jwt_token(slurm_cfg)
-        if token:
-            args.slurm_jwt_token = token
-
-
 def _build_scheduler(args: argparse.Namespace):
     if args.scheduler == "local":
         return LocalScheduler(workdir=getattr(args, "local_workdir", ""))
@@ -129,11 +99,6 @@ def _build_scheduler(args: argparse.Namespace):
         )
     else:
         return SlurmScheduler(
-            rest_url=args.slurm_rest_url,
-            jwt_token=args.slurm_jwt_token,
-            api_version=args.slurm_api_version,
-            verify_ssl=not args.slurm_no_verify_ssl,
-            submit_via=args.slurm_submit_via,
             cli_prefix=args.slurm_cli_prefix,
         )
 
@@ -153,7 +118,6 @@ def main_status(argv: list[str] | None = None) -> None:
     p.add_argument("--job", required=True, help="Job name or ID")
     args = _parse_two_pass(p, argv)
 
-    _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
     try:
         info = scheduler.status(args.job)
@@ -172,7 +136,6 @@ def main_logs(argv: list[str] | None = None) -> None:
     p.add_argument("--follow", "-f", action="store_true", help="Follow log output")
     args = _parse_two_pass(p, argv)
 
-    _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
     try:
         text = scheduler.logs(args.job, tail=args.tail, follow=args.follow)
@@ -188,7 +151,6 @@ def main_list(argv: list[str] | None = None) -> None:
     p.add_argument("--status", default="", help="Filter by job state (e.g. Running, Succeeded, PENDING)")
     args = _parse_two_pass(p, argv)
 
-    _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
     try:
         jobs = scheduler.list_jobs(status_filter=args.status)
@@ -214,7 +176,6 @@ def main_cancel(argv: list[str] | None = None) -> None:
     p.add_argument("--job", required=True, help="Job name or ID to cancel")
     args = _parse_two_pass(p, argv)
 
-    _resolve_slurm_jwt(args)
     scheduler = _build_scheduler(args)
     try:
         msg = scheduler.cancel(args.job)
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 8999212..150116e 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -51,7 +51,7 @@
 import sys
 
 from schedulers.base import ProfileJobSpec
-from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default, resolve_jwt_token
+from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
@@ -256,26 +256,6 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
             default=_d("FLOWSIM_SLURM_TIME", slurm_cfg, "time", "02:00:00"),
             help="Wall time limit (env: FLOWSIM_SLURM_TIME)",
         )
-        slurm.add_argument(
-            "--slurm-rest-url",
-            default=_d("FLOWSIM_SLURM_REST_URL", slurm_cfg, "rest_url", ""),
-            help="slurmrestd base URL (env: FLOWSIM_SLURM_REST_URL)",
-        )
-        slurm.add_argument(
-            "--slurm-jwt-token",
-            default=_d("FLOWSIM_SLURM_JWT_TOKEN", slurm_cfg, "jwt_token", ""),
-            help="JWT token for slurmrestd (env: FLOWSIM_SLURM_JWT_TOKEN)",
-        )
-        slurm.add_argument(
-            "--slurm-api-version",
-            default=_d("FLOWSIM_SLURM_API_VERSION", slurm_cfg, "api_version", "v0.0.40"),
-            help="slurmrestd API version (env: FLOWSIM_SLURM_API_VERSION)",
-        )
-        slurm.add_argument(
-            "--slurm-no-verify-ssl",
-            action="store_true",
-            help="Skip TLS certificate verification for slurmrestd",
-        )
         slurm.add_argument(
             "--slurm-account",
             default=cfg_get(slurm_cfg, "account", ""),
@@ -308,12 +288,6 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
             metavar="DIRECTIVE",
             help="Extra #SBATCH directives (repeatable, without prefix)",
         )
-        slurm.add_argument(
-            "--slurm-submit-via",
-            choices=["rest", "cli"],
-            default=cfg_get(slurm_cfg, "submit_via", "cli"),
-            help="Submission mode: cli (sbatch subprocess) or rest (slurmrestd, deprecated)",
-        )
         slurm.add_argument(
             "--slurm-cli-prefix",
             default=cfg_get(slurm_cfg, "cli_prefix", ""),
@@ -394,17 +368,12 @@ def _build_scheduler(args: argparse.Namespace):
         return SlurmScheduler(
             partition=args.slurm_partition,
             time_limit=args.slurm_time,
-            rest_url=args.slurm_rest_url,
-            jwt_token=args.slurm_jwt_token,
-            api_version=args.slurm_api_version,
-            verify_ssl=not args.slurm_no_verify_ssl,
             account=args.slurm_account,
             constraint=args.slurm_constraint,
             container_runtime=args.slurm_container_runtime,
             container_mounts=args.slurm_container_mounts,
             modules=args.slurm_module,
             extra_sbatch=args.slurm_extra_sbatch,
-            submit_via=args.slurm_submit_via,
             cli_prefix=args.slurm_cli_prefix,
         )
 
@@ -424,13 +393,6 @@ def main(argv: list[str] | None = None) -> None:
         else:
             args.output_dir = f"/flowsim/stage_traces/k8s/{_ts}"
 
-    # Resolve Slurm JWT token from jwt_token_cmd in config if needed
-    if args.scheduler == "slurm" and not args.slurm_jwt_token:
-        slurm_cfg = load_slurm_config()
-        token = resolve_jwt_token(slurm_cfg)
-        if token:
-            args.slurm_jwt_token = token
-
     # Validate required connection params before submit
     if not args.dry_run and args.scheduler not in ("local",):
         _validate_connection(args)
@@ -523,28 +485,11 @@ def _validate_connection(args: argparse.Namespace) -> None:
                 file=sys.stderr,
             )
     elif args.scheduler == "slurm":
-        if args.slurm_submit_via == "cli":
-            # CLI mode only needs partition
-            if not args.slurm_partition:
-                sys.exit(
-                    "Error: missing required Slurm config:\n"
-                    "  - partition (--slurm-partition)\n\n"
-                    f"Set it in ~/.flowsim/slurm.yaml or via CLI flag.\n"
-                    + _INIT_HINT
-                )
-            return
-        missing = []
-        if not args.slurm_rest_url:
-            missing.append("rest_url (--slurm-rest-url)")
-        if not args.slurm_jwt_token:
-            missing.append("jwt_token/jwt_token_cmd (--slurm-jwt-token)")
         if not args.slurm_partition:
-            missing.append("partition (--slurm-partition)")
-        if missing:
             sys.exit(
                 "Error: missing required Slurm config:\n"
-                + "\n".join(f"  - {m}" for m in missing)
-                + f"\n\nSet them in ~/.flowsim/slurm.yaml or via CLI flags.\n"
+                "  - partition (--slurm-partition)\n\n"
+                f"Set it in ~/.flowsim/slurm.yaml or via CLI flag.\n"
                 + _INIT_HINT
             )
 
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 2bb0dec..9968ea1 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -269,10 +269,6 @@ def test_render_constraint(self, spec):
         script = sched.render(spec)
         assert "#SBATCH --constraint=gpu80g" in script
 
-    def test_time_parse_minutes(self):
-        sched = SlurmScheduler(partition="gpu", time_limit="02:30:00")
-        assert sched._parse_time_minutes() == 150
-
 
 # =========================================================================
 # LocalScheduler.render
@@ -350,7 +346,7 @@ def test_init_slurm_help(self, capsys):
             _cmd_init(["slurm", "--help"])
         assert exc_info.value.code == 0
         out = capsys.readouterr().out
-        assert "--rest-url" in out
+        assert "--cli-prefix" in out
         assert "--partition" in out
 
     def test_init_k8s_missing_required(self):
@@ -396,16 +392,13 @@ def test_init_slurm_saves_config(self, tmp_path: Path):
             from scripts.cli import _cmd_init
             rc = _cmd_init([
                 "slurm",
-                "--rest-url", "http://localhost:6820",
                 "--partition", "gpu",
                 "--account", "proj",
-                "--jwt-token", "fake-token",
             ])
         assert rc == 0
         cfg_file = config_dir / "slurm.yaml"
         assert cfg_file.exists()
         cfg = yaml.safe_load(cfg_file.read_text())
-        assert cfg["rest_url"] == "http://localhost:6820"
         assert cfg["partition"] == "gpu"
         assert cfg["account"] == "proj"
 
@@ -418,10 +411,8 @@ def test_init_refuses_overwrite(self, tmp_path: Path):
             from scripts.cli import _cmd_init
             rc = _cmd_init([
                 "slurm",
-                "--rest-url", "http://localhost:6820",
                 "--partition", "gpu",
                 "--account", "proj",
-                "--jwt-token", "tok",
             ])
         assert rc != 0  # should refuse
 
@@ -434,15 +425,13 @@ def test_init_force_overwrite(self, tmp_path: Path):
             from scripts.cli import _cmd_init
             rc = _cmd_init([
                 "slurm",
-                "--rest-url", "http://localhost:6820",
                 "--partition", "gpu",
                 "--account", "proj",
-                "--jwt-token", "tok",
                 "--force",
             ])
         assert rc == 0
         cfg = yaml.safe_load((config_dir / "slurm.yaml").read_text())
-        assert cfg["rest_url"] == "http://localhost:6820"
+        assert cfg["partition"] == "gpu"
 
 
 # =========================================================================
@@ -518,8 +507,6 @@ def test_submit_slurm_dry_run(self):
             "--collect", "perf",
             "--model-path", "Qwen/Qwen3-8B",
             "--slurm-partition", "gpu",
-            "--slurm-rest-url", "http://fake:6820",
-            "--slurm-jwt-token", "fake-token",
             "--dry-run",
         )
         assert "#!/bin/bash" in out
@@ -559,32 +546,12 @@ class TestConfig:
 
     def test_save_and_load_yaml(self, tmp_path: Path):
         from schedulers.config import _save_yaml, _load_yaml
-        data = {"rest_url": "http://localhost:6820", "partition": "gpu"}
+        data = {"partition": "gpu", "account": "proj"}
         path = tmp_path / "test.yaml"
         _save_yaml(path, data)
         loaded = _load_yaml(path)
         assert loaded == data
 
-    def test_resolve_jwt_token_static(self):
-        from schedulers.config import resolve_jwt_token
-        cfg = {"jwt_token": "my-secret"}
-        assert resolve_jwt_token(cfg) == "my-secret"
-
-    def test_resolve_jwt_token_cmd(self):
-        from schedulers.config import resolve_jwt_token
-        cfg = {"jwt_token_cmd": "echo test-token-123"}
-        assert resolve_jwt_token(cfg) == "test-token-123"
-
-    def test_resolve_jwt_token_bad_cmd(self):
-        from schedulers.config import resolve_jwt_token
-        cfg = {"jwt_token_cmd": "/nonexistent/binary"}
-        # Should not raise, just return empty
-        assert resolve_jwt_token(cfg) == ""
-
-    def test_resolve_jwt_token_empty(self):
-        from schedulers.config import resolve_jwt_token
-        assert resolve_jwt_token({}) == ""
-
     def test_cfg_get(self):
         from schedulers.config import cfg_get
         cfg = {"key": "value", "empty": ""}

From 892eeacf80e07e9068c6f45a069ebc1902cb18b9 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 22:47:16 +0000
Subject: [PATCH 31/56] review: normalize Slurm states, implement _logs_cli,
 dedup image check, add env var docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Normalize all Slurm job states (PENDING→Pending, RUNNING→Running, etc.)
- Implement _logs_cli: read log file via tail, fallback to hints
- Remove duplicate _check_image_exists() call from local.py render()
- Add supported environment variables table to README
- 56 unit tests pass
---
 schedulers/README.md | 14 ++++++++++++-
 schedulers/local.py  |  1 -
 schedulers/slurm.py  | 48 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index 48e6b0b..2ab2b0e 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -285,10 +285,22 @@ Config files are stored in `~/.flowsim/` and generated via `flowsim init`:
 
 Parameter priority (highest to lowest):
 1. CLI flag (`--slurm-partition gpu`)
-2. Environment variable (`FLOWSIM_SLURM_PARTITION=gpu`)
+2. Environment variable (see table below)
 3. Config file (`~/.flowsim/slurm.yaml`)
 4. Built-in default
 
+### Supported Environment Variables
+
+| Variable | Overrides | Example |
+|----------|-----------|--------|
+| `KUBECONFIG` | `--k8s-kubeconfig` | `/home/user/.kube/config` |
+| `FLOWSIM_K8S_NAMESPACE` | `--k8s-namespace` | `ml-team` |
+| `FLOWSIM_K8S_CONTEXT` | `--k8s-context` | `kind-flowsim` |
+| `FLOWSIM_K8S_CONFIG` | Config file path | `/etc/flowsim/k8s.yaml` |
+| `FLOWSIM_SLURM_PARTITION` | `--slurm-partition` | `gpu-h100` |
+| `FLOWSIM_SLURM_TIME` | `--slurm-time` | `04:00:00` |
+| `FLOWSIM_SLURM_CONFIG` | Config file path | `/etc/flowsim/slurm.yaml` |
+
 ### Example k8s.yaml
 
 ```yaml
diff --git a/schedulers/local.py b/schedulers/local.py
index 4c61865..673acac 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -118,7 +118,6 @@ def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
         return " \\\n  ".join(parts)
 
     def render(self, spec: ProfileJobSpec) -> str:
-        self._check_image_exists(spec.image)
         return self._build_docker_cmd(spec)
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 67e954b..265a725 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -230,11 +230,20 @@ def _status_cli(self, job_id: str) -> dict:
         nodes = fields.get("NodeList", "")
         output_file = fields.get("StdOut", "")
 
-        # Normalize to match test expectations
-        if state == "COMPLETED":
-            state = "Completed"
-        elif state == "FAILED":
-            state = "Failed"
+        # Normalize Slurm uppercase states to capitalized format
+        _STATE_MAP = {
+            "PENDING": "Pending",
+            "RUNNING": "Running",
+            "SUSPENDED": "Suspended",
+            "COMPLETED": "Completed",
+            "CANCELLED": "Cancelled",
+            "FAILED": "Failed",
+            "TIMEOUT": "Timeout",
+            "NODE_FAIL": "Failed",
+            "PREEMPTED": "Preempted",
+            "OUT_OF_MEMORY": "Failed",
+        }
+        state = _STATE_MAP.get(state, state)
 
         msg_parts = [
             f"Job ID: {job_id}  Name: {name}  State: {state}",
@@ -250,10 +259,33 @@ def _status_cli(self, job_id: str) -> dict:
         }
 
     def _logs_cli(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
-        # TODO: read actual Slurm log file (StdOut from scontrol)
-        # and support tail/follow properly.
         info = self._status_cli(job_id)
-        return info["message"]
+        output_file = info.get("output_hint", "")
+
+        if not output_file:
+            return info["message"] + "\n(no log file path found)"
+
+        # Try to read the log file via CLI prefix (handles remote Slurm)
+        if follow:
+            return (
+                f"{info['message']}\n\n"
+                f"Follow logs:\n"
+                f"  tail -f {output_file}"
+            )
+
+        r = self._cli_run("tail", f"-{tail}", output_file, timeout=15)
+        if r.returncode == 0 and r.stdout.strip():
+            return r.stdout
+
+        # Fallback: file may not exist yet or be on a remote node
+        return (
+            f"{info['message']}\n\n"
+            f"Log file: {output_file}\n"
+            f"View on login node:\n"
+            f"  tail -{tail} {output_file}\n"
+            f"Follow:\n"
+            f"  tail -f {output_file}"
+        )
 
     def _list_jobs_cli(self, *, status_filter: str = "") -> list[dict]:
         r = self._cli_run(

From 73cedbed78c7df5ca83e8c1b5f1ea19c3d0e205a Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 22:59:49 +0000
Subject: [PATCH 32/56] fix: remove --slurm-submit-via from integration tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Missed in REST removal — integration tests still passed
the deleted CLI flag. All tests pass:
- 56 unit tests
- K8s integration (77s)
- Slurm integration (76s)
---
 tests/integration/test_scheduler_local.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 8815250..5f592a8 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -527,7 +527,6 @@ def test_slurm_real_submit(self, slurm_cluster):
                 "--warmup-n", "2",
                 "--gpus", "1",
                 "--slurm-partition", "normal",
-                "--slurm-submit-via", "cli",
                 "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
                 "--slurm-container-runtime", "none",
                 "--output-dir", output_dir,
@@ -556,7 +555,6 @@ def test_slurm_real_submit(self, slurm_cluster):
                 r_status = _flowsim_cli(
                     "status", "--scheduler", "slurm",
                     "--job", job_id,
-                    "--slurm-submit-via", "cli",
                     "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
                 )
                 assert r_status.returncode == 0
@@ -590,7 +588,6 @@ def test_slurm_real_submit(self, slurm_cluster):
                 _flowsim_cli(
                     "cancel", "--scheduler", "slurm",
                     "--job", job_id,
-                    "--slurm-submit-via", "cli",
                     "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
                 )
 

From 0a30f7fc446141325131e5cbe3dd13588057fbb5 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 23:48:45 +0000
Subject: [PATCH 33/56] refactor: move test infra to tests/integration/infra/,
 delete unused templates

- Move dev-setup.sh, dev-teardown.sh, slurm-compose.yaml,
  slurm-node.dockerfile, kind-multi-node.yaml, slurm.conf,
  cgroup.conf, gres.conf from dockerfiles/ to tests/integration/infra/
- Delete schedulers/templates/ (unused by code; flowsim init generates
  config directly from CLI args)
- Update all path references in README, config.py, test files, and
  shell script comments
- dockerfiles/ now contains only cuda12.6.dockerfile (app image)
---
 schedulers/README.md                          |  6 ++--
 schedulers/config.py                          |  4 +--
 schedulers/templates/k8s.yaml                 | 28 -------------------
 schedulers/templates/slurm.yaml               | 23 ---------------
 .../integration/infra}/cgroup.conf            |  0
 .../integration/infra}/dev-setup.sh           | 10 +++----
 .../integration/infra}/dev-teardown.sh        |  6 ++--
 .../integration/infra}/gres.conf              |  0
 .../integration/infra}/kind-multi-node.yaml   |  4 +--
 .../integration/infra}/slurm-compose.yaml     |  4 ++-
 .../integration/infra}/slurm-node.dockerfile  |  0
 .../integration/infra}/slurm.conf             |  0
 tests/integration/test_scheduler_local.py     | 10 +++----
 13 files changed, 23 insertions(+), 72 deletions(-)
 delete mode 100644 schedulers/templates/k8s.yaml
 delete mode 100644 schedulers/templates/slurm.yaml
 rename {dockerfiles => tests/integration/infra}/cgroup.conf (100%)
 rename {dockerfiles => tests/integration/infra}/dev-setup.sh (97%)
 rename {dockerfiles => tests/integration/infra}/dev-teardown.sh (86%)
 rename {dockerfiles => tests/integration/infra}/gres.conf (100%)
 rename {dockerfiles => tests/integration/infra}/kind-multi-node.yaml (91%)
 rename {dockerfiles => tests/integration/infra}/slurm-compose.yaml (96%)
 rename {dockerfiles => tests/integration/infra}/slurm-node.dockerfile (100%)
 rename {dockerfiles => tests/integration/infra}/slurm.conf (100%)

diff --git a/schedulers/README.md b/schedulers/README.md
index 2ab2b0e..5b1ebf5 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -167,13 +167,13 @@ flowsim submit --scheduler k8s ... --dry-run
 
 ```bash
 # Start a Kind cluster (GPU passthrough + CDI mode)
-bash dockerfiles/dev-setup.sh kind
+bash tests/integration/infra/dev-setup.sh kind
 
 # Run K8s integration tests
 python -m pytest tests/integration/test_scheduler_local.py::TestK8sScheduler -v -x
 
 # Teardown
-bash dockerfiles/dev-teardown.sh kind
+bash tests/integration/infra/dev-teardown.sh kind
 ```
 
 ---
@@ -258,7 +258,7 @@ If Slurm commands are not on the local PATH, use `--slurm-cli-prefix` to specify
 
 ```bash
 # Start Slurm cluster (slurmctld + 1 compute node + 1 GPU)
-cd dockerfiles/
+cd tests/integration/infra/
 docker compose -f slurm-compose.yaml up -d
 
 # Check cluster status
diff --git a/schedulers/config.py b/schedulers/config.py
index 723dfc2..18ab55e 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -13,8 +13,8 @@
 Priority (highest → lowest):
     CLI flag  >  env var  >  config file  >  built-in default
 
-Template files are in ``schedulers/templates/k8s.yaml`` and
-``schedulers/templates/slurm.yaml``.  Copy to ``~/.flowsim/`` and edit.
+Run ``flowsim init k8s`` or ``flowsim init slurm`` to generate
+a config under ``~/.flowsim/``.
 """
 
 from __future__ import annotations
diff --git a/schedulers/templates/k8s.yaml b/schedulers/templates/k8s.yaml
deleted file mode 100644
index 2adb927..0000000
--- a/schedulers/templates/k8s.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# FlowSim Kubernetes scheduler config
-#
-# Created by: flowsim init
-# Location:   ~/.flowsim/k8s.yaml
-#
-# Fill in the values below, then submit with:
-#   flowsim submit --scheduler k8s --collect perf --model-path ...
-#
-# CLI flags and env vars can override individual values.
-
-# REQUIRED — path to your kubeconfig file
-kubeconfig: ""              # e.g. /home/me/.kube/prod.kubeconfig
-
-# REQUIRED — which context and namespace to use
-context: ""                 # e.g. prod-cluster (empty = current-context)
-namespace: ""               # e.g. ml-team
-
-# Output storage (pick one or leave both empty for emptyDir)
-pvc: ""                     # PVC name for trace output
-host_output_dir: ""         # hostPath alternative to PVC
-
-# Optional
-service_account: ""
-shm_size: "16Gi"
-runtime_class_name: ""      # e.g. "nvidia" for CDI-based GPU (Kind clusters)
-# node_selector:
-#   gpu: a100
-#   tier: high
diff --git a/schedulers/templates/slurm.yaml b/schedulers/templates/slurm.yaml
deleted file mode 100644
index b4d77a1..0000000
--- a/schedulers/templates/slurm.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# FlowSim Slurm scheduler config
-#
-# Created by: flowsim init
-# Location:   ~/.flowsim/slurm.yaml
-#
-# Fill in the values below, then submit with:
-#   flowsim submit --scheduler slurm --collect perf --model-path ...
-#
-# CLI flags and env vars can override individual values.
-
-# REQUIRED — cluster settings
-partition: ""               # e.g. gpu-h100
-account: ""                 # e.g. my-project
-
-# Optional
-cli_prefix: ""              # e.g. "docker exec -i slurmctld"
-time: "02:00:00"
-constraint: ""
-container_runtime: "none"   # docker | enroot | none
-container_mounts: ""
-# modules:
-#   - cuda/12.6
-#   - anaconda3
diff --git a/dockerfiles/cgroup.conf b/tests/integration/infra/cgroup.conf
similarity index 100%
rename from dockerfiles/cgroup.conf
rename to tests/integration/infra/cgroup.conf
diff --git a/dockerfiles/dev-setup.sh b/tests/integration/infra/dev-setup.sh
similarity index 97%
rename from dockerfiles/dev-setup.sh
rename to tests/integration/infra/dev-setup.sh
index 7cefe05..afbb9f7 100755
--- a/dockerfiles/dev-setup.sh
+++ b/tests/integration/infra/dev-setup.sh
@@ -2,12 +2,12 @@
 # dev-setup.sh — one-shot setup for FlowSim test clusters (kind + Slurm)
 #
 # Usage:
-#   ./dockerfiles/dev-setup.sh          # setup both kind + slurm
-#   ./dockerfiles/dev-setup.sh kind     # kind only
-#   ./dockerfiles/dev-setup.sh slurm    # slurm only
+#   ./tests/integration/infra/dev-setup.sh          # setup both kind + slurm
+#   ./tests/integration/infra/dev-setup.sh kind     # kind only
+#   ./tests/integration/infra/dev-setup.sh slurm    # slurm only
 #
 # Teardown:
-#   ./dockerfiles/dev-teardown.sh
+#   ./tests/integration/infra/dev-teardown.sh
 
 set -euo pipefail
 
@@ -355,4 +355,4 @@ case "${target}" in
 esac
 
 echo
-log "All done. Teardown with: ./dockerfiles/dev-teardown.sh"
+log "All done. Teardown with: ./tests/integration/infra/dev-teardown.sh"
diff --git a/dockerfiles/dev-teardown.sh b/tests/integration/infra/dev-teardown.sh
similarity index 86%
rename from dockerfiles/dev-teardown.sh
rename to tests/integration/infra/dev-teardown.sh
index dfb1c01..c5e74ee 100755
--- a/dockerfiles/dev-teardown.sh
+++ b/tests/integration/infra/dev-teardown.sh
@@ -2,9 +2,9 @@
 # dev-teardown.sh — tear down FlowSim test clusters
 #
 # Usage:
-#   ./dockerfiles/dev-teardown.sh          # teardown both
-#   ./dockerfiles/dev-teardown.sh kind     # kind only
-#   ./dockerfiles/dev-teardown.sh slurm    # slurm only
+#   ./tests/integration/infra/dev-teardown.sh          # teardown both
+#   ./tests/integration/infra/dev-teardown.sh kind     # kind only
+#   ./tests/integration/infra/dev-teardown.sh slurm    # slurm only
 
 set -euo pipefail
 
diff --git a/dockerfiles/gres.conf b/tests/integration/infra/gres.conf
similarity index 100%
rename from dockerfiles/gres.conf
rename to tests/integration/infra/gres.conf
diff --git a/dockerfiles/kind-multi-node.yaml b/tests/integration/infra/kind-multi-node.yaml
similarity index 91%
rename from dockerfiles/kind-multi-node.yaml
rename to tests/integration/infra/kind-multi-node.yaml
index ddb8cd2..90b4e6f 100644
--- a/dockerfiles/kind-multi-node.yaml
+++ b/tests/integration/infra/kind-multi-node.yaml
@@ -13,10 +13,10 @@
 #   - kind, kubectl, helm
 #
 # Usage:
-#   ./dockerfiles/dev-setup.sh kind
+#   ./tests/integration/infra/dev-setup.sh kind
 #
 # Teardown:
-#   ./dockerfiles/dev-teardown.sh kind
+#   ./tests/integration/infra/dev-teardown.sh kind
 
 kind: Cluster
 apiVersion: kind.x-k8s.io/v1alpha4
diff --git a/dockerfiles/slurm-compose.yaml b/tests/integration/infra/slurm-compose.yaml
similarity index 96%
rename from dockerfiles/slurm-compose.yaml
rename to tests/integration/infra/slurm-compose.yaml
index ee94656..c2369ba 100644
--- a/dockerfiles/slurm-compose.yaml
+++ b/tests/integration/infra/slurm-compose.yaml
@@ -1,7 +1,7 @@
 # Slurm test cluster — slurmctld + 2 compute nodes (GPU 0, GPU 1) + slurmrestd
 #
 # Usage:
-#   cd dockerfiles/
+#   cd tests/integration/infra/
 #   docker compose -f slurm-compose.yaml up -d
 #
 #   # Wait for cluster to be ready (~30s)
@@ -22,6 +22,8 @@
 #
 #   # Teardown
 #   docker compose -f slurm-compose.yaml down -v
+#   # Or from project root:
+#   docker compose -f tests/integration/infra/slurm-compose.yaml down -v
 
 x-slurm-base: &slurm-base
   build:
diff --git a/dockerfiles/slurm-node.dockerfile b/tests/integration/infra/slurm-node.dockerfile
similarity index 100%
rename from dockerfiles/slurm-node.dockerfile
rename to tests/integration/infra/slurm-node.dockerfile
diff --git a/dockerfiles/slurm.conf b/tests/integration/infra/slurm.conf
similarity index 100%
rename from dockerfiles/slurm.conf
rename to tests/integration/infra/slurm.conf
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index 5f592a8..a2086f1 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -17,7 +17,7 @@
 * Docker with ``flowsim-image:latest`` built (for local tests).
 * A GPU-equipped host machine (local tests run on the physical host,
   NOT inside a Docker container).
-* ``dockerfiles/dev-setup.sh`` available (Kind and Slurm clusters are
+* ``tests/integration/infra/dev-setup.sh`` available (Kind and Slurm clusters are
   automatically created if missing).
 * ``schedulers/`` available on PYTHONPATH.
 
@@ -58,8 +58,8 @@
 _PROJECT_ROOT = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..")
 )
-_DEV_SETUP = os.path.join(_PROJECT_ROOT, "dockerfiles", "dev-setup.sh")
-_DEV_TEARDOWN = os.path.join(_PROJECT_ROOT, "dockerfiles", "dev-teardown.sh")
+_DEV_SETUP = os.path.join(_PROJECT_ROOT, "tests", "integration", "infra", "dev-setup.sh")
+_DEV_TEARDOWN = os.path.join(_PROJECT_ROOT, "tests", "integration", "infra", "dev-teardown.sh")
 
 MODEL = os.environ.get(
     "MODEL", "workload/models/configs/Qwen3-235B-A22B"
@@ -321,7 +321,7 @@ def test_local_tp1_all(self, point):
 # =====================================================================
 
 def _run_dev_setup(target: str) -> None:
-    """Run ``dockerfiles/dev-setup.sh <target>`` and assert success."""
+    """Run ``tests/integration/infra/dev-setup.sh <target>`` and assert success."""
     r = subprocess.run(
         ["bash", _DEV_SETUP, target],
         capture_output=True, text=True, cwd=_PROJECT_ROOT, timeout=300,
@@ -334,7 +334,7 @@ def _run_dev_setup(target: str) -> None:
 
 
 def _run_dev_teardown(target: str) -> None:
-    """Run ``dockerfiles/dev-teardown.sh <target>``."""
+    """Run ``tests/integration/infra/dev-teardown.sh <target>``."""
     subprocess.run(
         ["bash", _DEV_TEARDOWN, target],
         capture_output=True, text=True, cwd=_PROJECT_ROOT, timeout=120,

From 7831272a824f99c6456a0bedc13b22922272f481 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Wed, 18 Mar 2026 23:53:44 +0000
Subject: [PATCH 34/56] remove untested PD disaggregation code

- Remove disagg_mode, disagg_transfer_backend, disagg_bootstrap_port,
  disagg_prefill_pp, disagg_ib_device fields from ProfileJobSpec
- Remove as_prefill(), as_decode(), render_pd_pair(), submit_pd_pair()
- Remove --pd, --disagg-* CLI args from submit_profile.py
- Remove PD branch from main() submit/dry-run logic
- Remove 8 PD-related unit tests
- Remove PD Disaggregation section from README
- 48 unit tests pass
---
 schedulers/README.md             | 16 ---------
 schedulers/base.py               | 39 ---------------------
 scripts/submit_profile.py        | 53 ++--------------------------
 tests/unit/test_scheduler_cli.py | 59 --------------------------------
 4 files changed, 3 insertions(+), 164 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index 5b1ebf5..6069f2c 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -343,19 +343,3 @@ stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
 └── sweep_summary.json
 ```
 
----
-
-## PD Disaggregation (Experimental)
-
-Supports Prefill-Decode disaggregated deployment:
-
-```bash
-flowsim submit --scheduler k8s \
-    --pd \
-    --collect perf \
-    --model-path Qwen/Qwen3-235B-A22B-FP8 \
-    --tp 4 --gpus 8 \
-    --disagg-transfer-backend mooncake
-```
-
-This generates two Jobs: one prefill instance and one decode instance.
diff --git a/schedulers/base.py b/schedulers/base.py
index 0641f41..a47ac1f 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -48,13 +48,6 @@ class ProfileJobSpec:
     output_dir: str = "/flowsim/stage_traces"
     job_name: str = ""
 
-    # -- PD disaggregation --
-    disagg_mode: str = ""  # "prefill", "decode", or "" (unified)
-    disagg_transfer_backend: str = "mooncake"  # "mooncake" or "nixl"
-    disagg_bootstrap_port: int = 8998
-    disagg_prefill_pp: int = 1
-    disagg_ib_device: str = ""
-
     # -- Sweep: explicit list of (bs, input_len, existing_ctx) tuples --
     sweep_points: list[tuple[int, int, int]] = field(default_factory=list)
 
@@ -71,14 +64,6 @@ def build_server_opts(self) -> str:
         ]
         if self.dp > 1:
             parts.append(f"--dp {self.dp}")
-        if self.disagg_mode:
-            parts.append(f"--disaggregation-mode {self.disagg_mode}")
-            parts.append(f"--disaggregation-transfer-backend {self.disagg_transfer_backend}")
-            parts.append(f"--disaggregation-bootstrap-port {self.disagg_bootstrap_port}")
-            if self.disagg_prefill_pp > 1:
-                parts.append(f"--disaggregation-prefill-pp {self.disagg_prefill_pp}")
-            if self.disagg_ib_device:
-                parts.append(f"--disaggregation-ib-device {self.disagg_ib_device}")
         if self.extra_server_opts:
             parts.append(self.extra_server_opts)
         return " ".join(parts)
@@ -149,20 +134,8 @@ def default_job_name(self) -> str:
             name = f"flowsim-{self.collect}-{model_short}-sweep{len(self.sweep_points)}pt"
         else:
             name = f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
-        if self.disagg_mode:
-            name += f"-{self.disagg_mode}"
         return name
 
-    def as_prefill(self) -> "ProfileJobSpec":
-        """Return a copy configured as the prefill instance."""
-        from dataclasses import replace
-        return replace(self, disagg_mode="prefill")
-
-    def as_decode(self) -> "ProfileJobSpec":
-        """Return a copy configured as the decode instance."""
-        from dataclasses import replace
-        return replace(self, disagg_mode="decode")
-
 
 class BaseScheduler(abc.ABC):
     """Abstract scheduler backend."""
@@ -225,15 +198,3 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
     def dry_run(self, spec: ProfileJobSpec) -> str:
         """Render and return the manifest without submitting."""
         return self.render(spec)
-
-    def render_pd_pair(self, spec: ProfileJobSpec) -> str:
-        """Render both prefill and decode manifests for PD disaggregation."""
-        prefill = self.render(spec.as_prefill())
-        decode = self.render(spec.as_decode())
-        return f"# === PREFILL INSTANCE ===\n{prefill}\n# === DECODE INSTANCE ===\n{decode}"
-
-    def submit_pd_pair(self, spec: ProfileJobSpec) -> list[JobResult]:
-        """Submit both prefill and decode jobs."""
-        r1 = self.submit(spec.as_prefill())
-        r2 = self.submit(spec.as_decode())
-        return [r1, r2]
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 150116e..747b9b3 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -146,37 +146,6 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         help="[debug] Print rendered manifest without submitting",
     )
 
-    # -- PD disaggregation --
-    pd = p.add_argument_group("PD disaggregation")
-    pd.add_argument(
-        "--pd",
-        action="store_true",
-        help="Submit a prefill + decode job pair (PD disaggregation)",
-    )
-    pd.add_argument(
-        "--disagg-transfer-backend",
-        default="mooncake",
-        choices=["mooncake", "nixl"],
-        help="KV transfer backend (default: mooncake)",
-    )
-    pd.add_argument(
-        "--disagg-bootstrap-port",
-        type=int,
-        default=8998,
-        help="Bootstrap port for PD coordination (default: 8998)",
-    )
-    pd.add_argument(
-        "--disagg-prefill-pp",
-        type=int,
-        default=1,
-        help="Pipeline parallelism for prefill instance (default: 1)",
-    )
-    pd.add_argument(
-        "--disagg-ib-device",
-        default="",
-        help="InfiniBand device for RDMA transfer",
-    )
-
     # ---- Two-pass: peek at --scheduler, then add only relevant args ----
     # Use a minimal pre-parser to avoid required-arg errors during peek.
     _pre = argparse.ArgumentParser(add_help=False)
@@ -332,10 +301,6 @@ def _build_spec(args: argparse.Namespace) -> ProfileJobSpec:
         output_dir=args.output_dir,
         job_name=args.job_name,
         extra_server_opts=args.extra_server_opts,
-        disagg_transfer_backend=args.disagg_transfer_backend,
-        disagg_bootstrap_port=args.disagg_bootstrap_port,
-        disagg_prefill_pp=args.disagg_prefill_pp,
-        disagg_ib_device=args.disagg_ib_device,
         sweep_points=sweep_points,
     )
 
@@ -407,23 +372,11 @@ def main(argv: list[str] | None = None) -> None:
     spec = _build_spec(args)
     scheduler = _build_scheduler(args)
 
-    is_pd = args.pd
-
     if args.dry_run:
-        if is_pd:
-            print(scheduler.render_pd_pair(spec))
-        else:
-            print(scheduler.dry_run(spec))
+        print(scheduler.dry_run(spec))
     else:
-        if is_pd:
-            results = scheduler.submit_pd_pair(spec)
-            for r in results:
-                print(r.message)
-            # Use the first result for follow-up hints
-            result = results[0]
-        else:
-            result = scheduler.submit(spec)
-            print(result.message)
+        result = scheduler.submit(spec)
+        print(result.message)
 
         # Tell user where to find results
         print()
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 9968ea1..0bdfe9d 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -41,33 +41,15 @@ def test_custom_job_name(self, spec: ProfileJobSpec):
         spec.job_name = "my-job"
         assert spec.default_job_name() == "my-job"
 
-    def test_job_name_disagg_suffix(self, spec: ProfileJobSpec):
-        spec.disagg_mode = "prefill"
-        assert spec.default_job_name().endswith("-prefill")
-
     def test_build_server_opts_basic(self, spec: ProfileJobSpec):
         opts = spec.build_server_opts()
         assert "--model-path Qwen/Qwen3-8B" in opts
         assert "--tp 2" in opts
-        assert "--disaggregation" not in opts
 
     def test_build_server_opts_dp(self, spec: ProfileJobSpec):
         spec.dp = 4
         assert "--dp 4" in spec.build_server_opts()
 
-    def test_build_server_opts_disagg(self, spec: ProfileJobSpec):
-        spec.disagg_mode = "prefill"
-        spec.disagg_transfer_backend = "nixl"
-        opts = spec.build_server_opts()
-        assert "--disaggregation-mode prefill" in opts
-        assert "--disaggregation-transfer-backend nixl" in opts
-        assert "--disaggregation-bootstrap-port 8998" in opts
-
-    def test_build_server_opts_disagg_pp(self, spec: ProfileJobSpec):
-        spec.disagg_mode = "prefill"
-        spec.disagg_prefill_pp = 2
-        assert "--disaggregation-prefill-pp 2" in spec.build_server_opts()
-
     def test_build_server_opts_extra(self, spec: ProfileJobSpec):
         spec.extra_server_opts = "--some-flag"
         assert "--some-flag" in spec.build_server_opts()
@@ -86,16 +68,6 @@ def test_build_shell_command_quotes_server_opts(self, spec: ProfileJobSpec):
         # server-opts contains spaces, must be quoted
         assert "--server-opts '" in shell or '--server-opts "' in shell
 
-    def test_as_prefill(self, spec: ProfileJobSpec):
-        p = spec.as_prefill()
-        assert p.disagg_mode == "prefill"
-        assert spec.disagg_mode == ""  # original unchanged
-
-    def test_as_decode(self, spec: ProfileJobSpec):
-        d = spec.as_decode()
-        assert d.disagg_mode == "decode"
-        assert spec.disagg_mode == ""
-
 
 # =========================================================================
 # K8sScheduler.render
@@ -176,14 +148,6 @@ def test_render_labels(self, scheduler, spec):
         assert labels["app"] == "flowsim"
         assert labels["collect"] == "perf"
 
-    def test_render_pd_pair(self, scheduler, spec):
-        output = scheduler.render_pd_pair(spec)
-        assert "PREFILL INSTANCE" in output
-        assert "DECODE INSTANCE" in output
-        # Both should be valid YAML docs
-        docs = output.split("# === DECODE INSTANCE ===")
-        assert len(docs) == 2
-
 
 # =========================================================================
 # SlurmScheduler.render
@@ -512,29 +476,6 @@ def test_submit_slurm_dry_run(self):
         assert "#!/bin/bash" in out
         assert "#SBATCH --partition=gpu" in out
 
-    def test_submit_pd_dry_run(self):
-        out = self._run(
-            "--scheduler", "local",
-            "--collect", "perf",
-            "--model-path", "Qwen/Qwen3-8B",
-            "--pd",
-            "--dry-run",
-        )
-        assert "PREFILL INSTANCE" in out
-        assert "DECODE INSTANCE" in out
-        assert "--disaggregation-mode prefill" in out
-        assert "--disaggregation-mode decode" in out
-
-    def test_submit_pd_nixl_backend(self):
-        out = self._run(
-            "--scheduler", "local",
-            "--collect", "perf",
-            "--model-path", "Qwen/Qwen3-8B",
-            "--pd",
-            "--disagg-transfer-backend", "nixl",
-            "--dry-run",
-        )
-        assert "--disaggregation-transfer-backend nixl" in out
 
 
 # =========================================================================

From b6dbbbb12c38cb4ec4436dc242cf69730c531dbe Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 00:47:46 +0000
Subject: [PATCH 35/56] simplify flowsim init: write annotated template instead
 of argparse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- flowsim init k8s → writes commented k8s.yaml template to ~/.flowsim/
- flowsim init slurm → writes commented slurm.yaml template
- Users edit the file directly (comments explain each field)
- Removed ~60 lines of argparse init code
- Kept --force overwrite logic
- Updated README examples and tests (43 pass)
---
 schedulers/README.md             |  22 ++--
 scripts/cli.py                   | 167 +++++++++++++++----------------
 tests/unit/test_scheduler_cli.py |  97 +++++-------------
 3 files changed, 114 insertions(+), 172 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index 6069f2c..520559b 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -110,16 +110,11 @@ Submits profiling jobs as Kubernetes Jobs to a cluster. Supports both PVC and ho
 ### First-Time Setup
 
 ```bash
-flowsim init k8s \
-    --kubeconfig ~/.kube/config \
-    --namespace default \
-    --host-output-dir /host-stage-traces \
-    --runtime-class-name nvidia \
-    --force
+# Generate an annotated config template
+flowsim init k8s
+# Edit ~/.flowsim/k8s.yaml with your cluster details
 ```
 
-Config is saved to `~/.flowsim/k8s.yaml` and automatically loaded on subsequent submissions.
-
 ### Usage
 
 ```bash
@@ -185,11 +180,9 @@ Generates sbatch scripts and submits them to a Slurm cluster via `sbatch`/`squeu
 ### First-Time Setup
 
 ```bash
-flowsim init slurm \
-    --partition gpu \
-    --account my-project \
-    --container-runtime none \
-    --force
+# Generate an annotated config template
+flowsim init slurm
+# Edit ~/.flowsim/slurm.yaml with your cluster details
 ```
 
 ### Usage
@@ -275,7 +268,8 @@ docker compose -f slurm-compose.yaml down -v
 
 ## Configuration
 
-Config files are stored in `~/.flowsim/` and generated via `flowsim init`:
+Config files are stored in `~/.flowsim/` and generated via `flowsim init`.
+Templates include comments explaining each field — edit to match your cluster:
 
 ```
 ~/.flowsim/
diff --git a/scripts/cli.py b/scripts/cli.py
index 00409fb..78a3912 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -2,8 +2,8 @@
 
 Usage::
 
-    flowsim init k8s --kubeconfig ~/.kube/config --namespace ml-team ...
-    flowsim init slurm --partition gpu --account proj ...
+    flowsim init k8s            # create ~/.flowsim/k8s.yaml template
+    flowsim init slurm          # create ~/.flowsim/slurm.yaml template
     flowsim submit --scheduler k8s --collect perf --model-path ...
     flowsim submit ... --dry-run   # debug: preview manifest
 """
@@ -17,110 +17,103 @@
 
 _CONFIG_DIR = Path.home() / ".flowsim"
 
+# ---- Annotated config templates (written by `flowsim init`) ----
 
-def _init_k8s_parser(sub: argparse._SubParsersAction) -> None:
-    p = sub.add_parser("k8s", help="Configure Kubernetes scheduler")
-    p.add_argument("--kubeconfig", required=True,
-                   help="Path to kubeconfig file (REQUIRED)")
-    p.add_argument("--context", default="",
-                   help="Kubeconfig context (empty = current-context)")
-    p.add_argument("--namespace", required=True,
-                   help="Kubernetes namespace (REQUIRED)")
-    p.add_argument("--pvc", default="",
-                   help="PVC name for trace output")
-    p.add_argument("--host-output-dir", default="",
-                   help="hostPath alternative to PVC")
-    p.add_argument("--service-account", default="",
-                   help="Service account for the job pod")
-    p.add_argument("--shm-size", default="16Gi",
-                   help="Shared memory size (default: 16Gi)")
-    p.add_argument("--runtime-class-name", default="",
-                   help="RuntimeClass for pod (e.g. 'nvidia' for CDI mode)")
-    p.add_argument("--force", action="store_true",
-                   help="Overwrite existing config file")
-
-
-def _init_slurm_parser(sub: argparse._SubParsersAction) -> None:
-    p = sub.add_parser("slurm", help="Configure Slurm scheduler")
-    p.add_argument("--partition", required=True,
-                   help="Slurm partition (REQUIRED)")
-    p.add_argument("--account", default="",
-                   help="Slurm account")
-    p.add_argument("--cli-prefix", default="",
-                   help='CLI mode prefix, e.g. "docker exec -i slurmctld"')
-    p.add_argument("--time", default="02:00:00",
-                   help="Job time limit (default: 02:00:00)")
-    p.add_argument("--constraint", default="",
-                   help="Node constraint")
-    p.add_argument("--container-runtime", default="none",
-                   choices=["docker", "enroot", "none"],
-                   help="Container runtime (default: none)")
-    p.add_argument("--container-mounts", default="",
-                   help="Container mount spec")
-    p.add_argument("--force", action="store_true",
-                   help="Overwrite existing config file")
+_K8S_TEMPLATE = """\
+# FlowSim Kubernetes scheduler config
+# Edit this file, then run: flowsim submit --scheduler k8s ...
 
+# Path to kubeconfig file (required)
+kubeconfig: ~/.kube/config
 
-def _cmd_init(argv: list[str]) -> int:
-    """Save scheduler config to ~/.flowsim/ from CLI args."""
-    from schedulers.config import _save_yaml
+# Kubeconfig context (empty = current-context)
+context: ""
+
+# Kubernetes namespace (required)
+namespace: default
+
+# Persistent storage for trace output (set one):
+#   pvc: my-traces-pvc
+#   host_output_dir: /data/flowsim-traces
+pvc: ""
+host_output_dir: ""
+
+# Service account for the job pod (empty = default)
+service_account: ""
+
+# Shared memory size (for /dev/shm in the pod)
+shm_size: "16Gi"
+
+# RuntimeClass (e.g. "nvidia" for CDI GPU passthrough)
+runtime_class_name: ""
+"""
+
+_SLURM_TEMPLATE = """\
+# FlowSim Slurm scheduler config
+# Edit this file, then run: flowsim submit --scheduler slurm ...
+
+# Slurm partition (required)
+partition: gpu
+
+# Billing account (empty = default)
+account: ""
+
+# Job time limit
+time: "02:00:00"
+
+# Node constraint (e.g. "h100")
+constraint: ""
 
+# CLI prefix for remote sbatch/squeue/scancel
+# Examples:
+#   "docker exec -i slurmctld"   (via Docker container)
+#   "ssh login-node"             (via SSH)
+cli_prefix: ""
+
+# Container runtime: docker | enroot | none
+container_runtime: none
+
+# Container mount spec (for enroot/docker)
+container_mounts: ""
+"""
+
+
+def _cmd_init(argv: list[str]) -> int:
+    """Copy an annotated config template to ~/.flowsim/."""
     parser = argparse.ArgumentParser(
         prog="flowsim init",
         description=(
-            "Configure a scheduler and save to ~/.flowsim/.\n\n"
+            "Generate a scheduler config template under ~/.flowsim/.\n\n"
             "Examples:\n"
-            "  flowsim init k8s --kubeconfig ~/.kube/config --namespace ml-team\n"
-            "  flowsim init slurm --partition gpu --account proj"
+            "  flowsim init k8s          # creates ~/.flowsim/k8s.yaml\n"
+            "  flowsim init slurm        # creates ~/.flowsim/slurm.yaml\n"
+            "  flowsim init slurm --force # overwrite existing"
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    sub = parser.add_subparsers(dest="scheduler")
-    sub.required = True
-    _init_k8s_parser(sub)
-    _init_slurm_parser(sub)
-
+    parser.add_argument(
+        "scheduler", choices=["k8s", "slurm"],
+        help="Scheduler type",
+    )
+    parser.add_argument(
+        "--force", action="store_true",
+        help="Overwrite existing config file",
+    )
     args = parser.parse_args(argv)
 
-    if args.scheduler == "k8s":
-        kube_path = Path(args.kubeconfig).expanduser()
-        if not kube_path.is_file():
-            print(f"Error: kubeconfig not found: {kube_path}", file=sys.stderr)
-            return 1
-        cfg = {
-            "kubeconfig": str(kube_path),
-            "context": args.context,
-            "namespace": args.namespace,
-            "pvc": args.pvc,
-            "host_output_dir": args.host_output_dir,
-            "service_account": args.service_account,
-            "shm_size": args.shm_size,
-            "runtime_class_name": args.runtime_class_name,
-        }
-        dst = _CONFIG_DIR / "k8s.yaml"
-
-    elif args.scheduler == "slurm":
-        cfg = {
-            "cli_prefix": args.cli_prefix,
-            "partition": args.partition,
-            "account": args.account,
-            "time": args.time,
-            "constraint": args.constraint,
-            "container_runtime": args.container_runtime,
-            "container_mounts": args.container_mounts,
-        }
-        dst = _CONFIG_DIR / "slurm.yaml"
-    else:
-        parser.print_help()
-        return 1
+    templates = {"k8s": _K8S_TEMPLATE, "slurm": _SLURM_TEMPLATE}
+    dst = _CONFIG_DIR / f"{args.scheduler}.yaml"
 
     if dst.exists() and not args.force:
         print(f"Error: {dst} already exists (use --force to overwrite)",
               file=sys.stderr)
         return 1
 
-    _save_yaml(dst, cfg)
-    print(f"Saved {dst}")
+    _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    dst.write_text(templates[args.scheduler])
+    print(f"Created {dst}")
+    print("Edit the file, then run: flowsim submit --scheduler "
+          f"{args.scheduler} ...")
     return 0
 
 
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 0bdfe9d..5e07fca 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -295,76 +295,39 @@ def test_init_no_args_shows_help(self, capsys):
             _cmd_init([])
         assert exc_info.value.code != 0
 
-    def test_init_k8s_help(self, capsys):
-        from scripts.cli import _cmd_init
-        with pytest.raises(SystemExit) as exc_info:
-            _cmd_init(["k8s", "--help"])
-        assert exc_info.value.code == 0
-        out = capsys.readouterr().out
-        assert "--kubeconfig" in out
-        assert "--namespace" in out
-
-    def test_init_slurm_help(self, capsys):
-        from scripts.cli import _cmd_init
-        with pytest.raises(SystemExit) as exc_info:
-            _cmd_init(["slurm", "--help"])
-        assert exc_info.value.code == 0
-        out = capsys.readouterr().out
-        assert "--cli-prefix" in out
-        assert "--partition" in out
-
-    def test_init_k8s_missing_required(self):
-        from scripts.cli import _cmd_init
-        with pytest.raises(SystemExit) as exc_info:
-            _cmd_init(["k8s"])
-        assert exc_info.value.code != 0
-
-    def test_init_slurm_missing_required(self):
-        from scripts.cli import _cmd_init
-        with pytest.raises(SystemExit) as exc_info:
-            _cmd_init(["slurm"])
-        assert exc_info.value.code != 0
-
-    def test_init_k8s_bad_kubeconfig(self):
-        from scripts.cli import _cmd_init
-        rc = _cmd_init(["k8s", "--kubeconfig", "/nonexistent/path", "--namespace", "ns"])
-        assert rc != 0
-
-    def test_init_k8s_saves_config(self, tmp_path: Path):
-        # Create a fake kubeconfig
-        kube = tmp_path / "kubeconfig"
-        kube.write_text("apiVersion: v1\nclusters: []\n")
-
+    def test_init_k8s_creates_template(self, tmp_path: Path):
         config_dir = tmp_path / "flowsim"
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
-            rc = _cmd_init([
-                "k8s",
-                "--kubeconfig", str(kube),
-                "--namespace", "test-ns",
-            ])
+            rc = _cmd_init(["k8s"])
         assert rc == 0
         cfg_file = config_dir / "k8s.yaml"
         assert cfg_file.exists()
-        cfg = yaml.safe_load(cfg_file.read_text())
-        assert cfg["namespace"] == "test-ns"
-        assert cfg["kubeconfig"] == str(kube)
-
-    def test_init_slurm_saves_config(self, tmp_path: Path):
+        content = cfg_file.read_text()
+        assert "kubeconfig:" in content
+        assert "namespace:" in content
+        # Template should have comments
+        assert content.startswith("#")
+        # Should be valid YAML
+        cfg = yaml.safe_load(content)
+        assert "kubeconfig" in cfg
+        assert "namespace" in cfg
+
+    def test_init_slurm_creates_template(self, tmp_path: Path):
         config_dir = tmp_path / "flowsim"
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
-            rc = _cmd_init([
-                "slurm",
-                "--partition", "gpu",
-                "--account", "proj",
-            ])
+            rc = _cmd_init(["slurm"])
         assert rc == 0
         cfg_file = config_dir / "slurm.yaml"
         assert cfg_file.exists()
-        cfg = yaml.safe_load(cfg_file.read_text())
-        assert cfg["partition"] == "gpu"
-        assert cfg["account"] == "proj"
+        content = cfg_file.read_text()
+        assert "partition:" in content
+        assert "cli_prefix:" in content
+        # Template should have comments
+        assert content.startswith("#")
+        cfg = yaml.safe_load(content)
+        assert "partition" in cfg
 
     def test_init_refuses_overwrite(self, tmp_path: Path):
         config_dir = tmp_path / "flowsim"
@@ -373,11 +336,7 @@ def test_init_refuses_overwrite(self, tmp_path: Path):
 
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
-            rc = _cmd_init([
-                "slurm",
-                "--partition", "gpu",
-                "--account", "proj",
-            ])
+            rc = _cmd_init(["slurm"])
         assert rc != 0  # should refuse
 
     def test_init_force_overwrite(self, tmp_path: Path):
@@ -387,15 +346,11 @@ def test_init_force_overwrite(self, tmp_path: Path):
 
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
-            rc = _cmd_init([
-                "slurm",
-                "--partition", "gpu",
-                "--account", "proj",
-                "--force",
-            ])
+            rc = _cmd_init(["slurm", "--force"])
         assert rc == 0
-        cfg = yaml.safe_load((config_dir / "slurm.yaml").read_text())
-        assert cfg["partition"] == "gpu"
+        content = (config_dir / "slurm.yaml").read_text()
+        assert "partition:" in content
+        assert "existing" not in content
 
 
 # =========================================================================

From 95028db75328b335e72ab0329c2f1817ca912450 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 00:52:11 +0000
Subject: [PATCH 36/56] add --config flag to flowsim init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- flowsim init k8s --config my.yaml  → installs user file to ~/.flowsim/
- flowsim init k8s                   → writes annotated template (unchanged)
- Added 2 tests: config copy + missing file error
---
 scripts/cli.py                   | 38 +++++++++++++++++++++++---------
 tests/unit/test_scheduler_cli.py | 20 +++++++++++++++++
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/scripts/cli.py b/scripts/cli.py
index 78a3912..c123421 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -79,15 +79,19 @@
 
 
 def _cmd_init(argv: list[str]) -> int:
-    """Copy an annotated config template to ~/.flowsim/."""
+    """Set up scheduler config under ~/.flowsim/.
+
+    Without --config: writes an annotated template.
+    With --config: copies the user-provided file.
+    """
     parser = argparse.ArgumentParser(
         prog="flowsim init",
         description=(
-            "Generate a scheduler config template under ~/.flowsim/.\n\n"
+            "Set up scheduler config under ~/.flowsim/.\n\n"
             "Examples:\n"
-            "  flowsim init k8s          # creates ~/.flowsim/k8s.yaml\n"
-            "  flowsim init slurm        # creates ~/.flowsim/slurm.yaml\n"
-            "  flowsim init slurm --force # overwrite existing"
+            "  flowsim init k8s                    # write template\n"
+            "  flowsim init k8s --config my.yaml   # use existing file\n"
+            "  flowsim init slurm --force           # overwrite existing"
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
@@ -95,13 +99,16 @@ def _cmd_init(argv: list[str]) -> int:
         "scheduler", choices=["k8s", "slurm"],
         help="Scheduler type",
     )
+    parser.add_argument(
+        "--config", "-c", default="",
+        help="Path to an existing config YAML to install",
+    )
     parser.add_argument(
         "--force", action="store_true",
         help="Overwrite existing config file",
     )
     args = parser.parse_args(argv)
 
-    templates = {"k8s": _K8S_TEMPLATE, "slurm": _SLURM_TEMPLATE}
     dst = _CONFIG_DIR / f"{args.scheduler}.yaml"
 
     if dst.exists() and not args.force:
@@ -110,10 +117,21 @@ def _cmd_init(argv: list[str]) -> int:
         return 1
 
     _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
-    dst.write_text(templates[args.scheduler])
-    print(f"Created {dst}")
-    print("Edit the file, then run: flowsim submit --scheduler "
-          f"{args.scheduler} ...")
+
+    if args.config:
+        src = Path(args.config).expanduser()
+        if not src.is_file():
+            print(f"Error: config file not found: {src}", file=sys.stderr)
+            return 1
+        import shutil
+        shutil.copy2(src, dst)
+        print(f"Installed {src} → {dst}")
+    else:
+        templates = {"k8s": _K8S_TEMPLATE, "slurm": _SLURM_TEMPLATE}
+        dst.write_text(templates[args.scheduler])
+        print(f"Created {dst}")
+        print("Edit the file, then run: flowsim submit --scheduler "
+              f"{args.scheduler} ...")
     return 0
 
 
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 5e07fca..08e7146 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -352,6 +352,26 @@ def test_init_force_overwrite(self, tmp_path: Path):
         assert "partition:" in content
         assert "existing" not in content
 
+    def test_init_config_copies_file(self, tmp_path: Path):
+        # User has an existing config
+        user_cfg = tmp_path / "my-k8s.yaml"
+        user_cfg.write_text("namespace: prod\nkubeconfig: /etc/kube\n")
+
+        config_dir = tmp_path / "flowsim"
+        with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
+            from scripts.cli import _cmd_init
+            rc = _cmd_init(["k8s", "--config", str(user_cfg)])
+        assert rc == 0
+        installed = config_dir / "k8s.yaml"
+        assert installed.exists()
+        cfg = yaml.safe_load(installed.read_text())
+        assert cfg["namespace"] == "prod"
+
+    def test_init_config_missing_file(self):
+        from scripts.cli import _cmd_init
+        rc = _cmd_init(["k8s", "--config", "/nonexistent/path.yaml"])
+        assert rc != 0
+
 
 # =========================================================================
 # CLI: flowsim submit (parse/dry-run only, no actual submission)

From ac41690600818afdd633f8b1531bdbd8a8db0363 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 00:59:44 +0000
Subject: [PATCH 37/56] use template files for flowsim init instead of inline
 strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Move annotated templates to schedulers/templates/{k8s,slurm}.yaml
- flowsim init k8s → copies bundled template to ~/.flowsim/
- flowsim init k8s --config my.yaml → copies user file instead
- Remove inline template strings from cli.py
---
 schedulers/config.py            |   5 +-
 schedulers/templates/k8s.yaml   |  27 +++++++++
 schedulers/templates/slurm.yaml |  27 +++++++++
 scripts/cli.py                  | 100 +++++++-------------------------
 4 files changed, 77 insertions(+), 82 deletions(-)
 create mode 100644 schedulers/templates/k8s.yaml
 create mode 100644 schedulers/templates/slurm.yaml

diff --git a/schedulers/config.py b/schedulers/config.py
index 18ab55e..3b2d2fd 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -13,8 +13,9 @@
 Priority (highest → lowest):
     CLI flag  >  env var  >  config file  >  built-in default
 
-Run ``flowsim init k8s`` or ``flowsim init slurm`` to generate
-a config under ``~/.flowsim/``.
+Run ``flowsim init k8s`` or ``flowsim init slurm`` to install
+a config template under ``~/.flowsim/``.  Templates are in
+``schedulers/templates/``.
 """
 
 from __future__ import annotations
diff --git a/schedulers/templates/k8s.yaml b/schedulers/templates/k8s.yaml
new file mode 100644
index 0000000..8f548de
--- /dev/null
+++ b/schedulers/templates/k8s.yaml
@@ -0,0 +1,27 @@
+# FlowSim Kubernetes scheduler config
+# Copy to ~/.flowsim/k8s.yaml and edit:
+#   flowsim init k8s --config schedulers/templates/k8s.yaml
+
+# Path to kubeconfig file (required)
+kubeconfig: ~/.kube/config
+
+# Kubeconfig context (empty = current-context)
+context: ""
+
+# Kubernetes namespace (required)
+namespace: default
+
+# Persistent storage for trace output (set one):
+#   pvc: my-traces-pvc
+#   host_output_dir: /data/flowsim-traces
+pvc: ""
+host_output_dir: ""
+
+# Service account for the job pod (empty = default)
+service_account: ""
+
+# Shared memory size (for /dev/shm in the pod)
+shm_size: "16Gi"
+
+# RuntimeClass (e.g. "nvidia" for CDI GPU passthrough)
+runtime_class_name: ""
diff --git a/schedulers/templates/slurm.yaml b/schedulers/templates/slurm.yaml
new file mode 100644
index 0000000..5f27328
--- /dev/null
+++ b/schedulers/templates/slurm.yaml
@@ -0,0 +1,27 @@
+# FlowSim Slurm scheduler config
+# Copy to ~/.flowsim/slurm.yaml and edit:
+#   flowsim init slurm --config schedulers/templates/slurm.yaml
+
+# Slurm partition (required)
+partition: gpu
+
+# Billing account (empty = default)
+account: ""
+
+# Job time limit
+time: "02:00:00"
+
+# Node constraint (e.g. "h100")
+constraint: ""
+
+# CLI prefix for remote sbatch/squeue/scancel
+# Examples:
+#   "docker exec -i slurmctld"   (via Docker container)
+#   "ssh login-node"             (via SSH)
+cli_prefix: ""
+
+# Container runtime: docker | enroot | none
+container_runtime: none
+
+# Container mount spec (for enroot/docker)
+container_mounts: ""
diff --git a/scripts/cli.py b/scripts/cli.py
index c123421..ba0a65e 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -16,81 +16,22 @@
 
 
 _CONFIG_DIR = Path.home() / ".flowsim"
-
-# ---- Annotated config templates (written by `flowsim init`) ----
-
-_K8S_TEMPLATE = """\
-# FlowSim Kubernetes scheduler config
-# Edit this file, then run: flowsim submit --scheduler k8s ...
-
-# Path to kubeconfig file (required)
-kubeconfig: ~/.kube/config
-
-# Kubeconfig context (empty = current-context)
-context: ""
-
-# Kubernetes namespace (required)
-namespace: default
-
-# Persistent storage for trace output (set one):
-#   pvc: my-traces-pvc
-#   host_output_dir: /data/flowsim-traces
-pvc: ""
-host_output_dir: ""
-
-# Service account for the job pod (empty = default)
-service_account: ""
-
-# Shared memory size (for /dev/shm in the pod)
-shm_size: "16Gi"
-
-# RuntimeClass (e.g. "nvidia" for CDI GPU passthrough)
-runtime_class_name: ""
-"""
-
-_SLURM_TEMPLATE = """\
-# FlowSim Slurm scheduler config
-# Edit this file, then run: flowsim submit --scheduler slurm ...
-
-# Slurm partition (required)
-partition: gpu
-
-# Billing account (empty = default)
-account: ""
-
-# Job time limit
-time: "02:00:00"
-
-# Node constraint (e.g. "h100")
-constraint: ""
-
-# CLI prefix for remote sbatch/squeue/scancel
-# Examples:
-#   "docker exec -i slurmctld"   (via Docker container)
-#   "ssh login-node"             (via SSH)
-cli_prefix: ""
-
-# Container runtime: docker | enroot | none
-container_runtime: none
-
-# Container mount spec (for enroot/docker)
-container_mounts: ""
-"""
+_TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "schedulers" / "templates"
 
 
 def _cmd_init(argv: list[str]) -> int:
-    """Set up scheduler config under ~/.flowsim/.
+    """Install a scheduler config to ~/.flowsim/.
 
-    Without --config: writes an annotated template.
-    With --config: copies the user-provided file.
+    Without --config: copies the bundled template from schedulers/templates/.
+    With --config: copies the specified file.
     """
     parser = argparse.ArgumentParser(
         prog="flowsim init",
         description=(
-            "Set up scheduler config under ~/.flowsim/.\n\n"
+            "Install scheduler config under ~/.flowsim/.\n\n"
             "Examples:\n"
-            "  flowsim init k8s                    # write template\n"
-            "  flowsim init k8s --config my.yaml   # use existing file\n"
+            "  flowsim init k8s                    # install bundled template\n"
+            "  flowsim init k8s --config my.yaml   # install your own file\n"
             "  flowsim init slurm --force           # overwrite existing"
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -101,7 +42,7 @@ def _cmd_init(argv: list[str]) -> int:
     )
     parser.add_argument(
         "--config", "-c", default="",
-        help="Path to an existing config YAML to install",
+        help="Path to a config YAML to install (default: bundled template)",
     )
     parser.add_argument(
         "--force", action="store_true",
@@ -116,22 +57,21 @@ def _cmd_init(argv: list[str]) -> int:
               file=sys.stderr)
         return 1
 
-    _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
-
     if args.config:
         src = Path(args.config).expanduser()
-        if not src.is_file():
-            print(f"Error: config file not found: {src}", file=sys.stderr)
-            return 1
-        import shutil
-        shutil.copy2(src, dst)
-        print(f"Installed {src} → {dst}")
     else:
-        templates = {"k8s": _K8S_TEMPLATE, "slurm": _SLURM_TEMPLATE}
-        dst.write_text(templates[args.scheduler])
-        print(f"Created {dst}")
-        print("Edit the file, then run: flowsim submit --scheduler "
-              f"{args.scheduler} ...")
+        src = _TEMPLATES_DIR / f"{args.scheduler}.yaml"
+
+    if not src.is_file():
+        print(f"Error: config file not found: {src}", file=sys.stderr)
+        return 1
+
+    import shutil
+    _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(src, dst)
+    print(f"Installed {src} → {dst}")
+    print(f"Edit {dst}, then run: flowsim submit --scheduler "
+          f"{args.scheduler} ...")
     return 0
 
 

From da8ab00589911ee3bafd53567118f9a4a538a6ba Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 01:01:28 +0000
Subject: [PATCH 38/56] update README: reflect template-file init with --config
 option

---
 schedulers/README.md | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index 520559b..8f9d8e1 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -110,9 +110,12 @@ Submits profiling jobs as Kubernetes Jobs to a cluster. Supports both PVC and ho
 ### First-Time Setup
 
 ```bash
-# Generate an annotated config template
+# Install the bundled config template
 flowsim init k8s
 # Edit ~/.flowsim/k8s.yaml with your cluster details
+
+# Or install your own config file
+flowsim init k8s --config my-cluster.yaml
 ```
 
 ### Usage
@@ -180,9 +183,12 @@ Generates sbatch scripts and submits them to a Slurm cluster via `sbatch`/`squeu
 ### First-Time Setup
 
 ```bash
-# Generate an annotated config template
+# Install the bundled config template
 flowsim init slurm
 # Edit ~/.flowsim/slurm.yaml with your cluster details
+
+# Or install your own config file
+flowsim init slurm --config my-slurm.yaml
 ```
 
 ### Usage
@@ -268,8 +274,8 @@ docker compose -f slurm-compose.yaml down -v
 
 ## Configuration
 
-Config files are stored in `~/.flowsim/` and generated via `flowsim init`.
-Templates include comments explaining each field — edit to match your cluster:
+Config files are stored in `~/.flowsim/` and installed via `flowsim init`.
+Templates are in `schedulers/templates/` with comments explaining each field:
 
 ```
 ~/.flowsim/

From 059f3eaf4535d7b9429b5d582810ce1d3e83b75b Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 19:09:17 +0000
Subject: [PATCH 39/56] docs: streamline READMEs, unify examples, remove legacy
 manual workflow

- Root README: replace manual docker run profile/parse with flowsim submit
- Schedulers README: remove redundant How It Works, inline YAML examples, scattered test sections
- Unify model/params across both READMEs (Qwen3-235B-A22B, tp=1, gpus=1, --load-format dummy)
- Add Scheduler Backends section to root README linking to schedulers/README.md
---
 README.md            | 215 +++++++++-----------------------
 schedulers/README.md | 291 ++++++++++++++-----------------------------
 2 files changed, 151 insertions(+), 355 deletions(-)

diff --git a/README.md b/README.md
index c4a674e..604c2a5 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,8 @@ The project supports rapid deployment using Docker, includes scripts for environ
 ## Table of Contents
 
 - [Getting Started](#getting-started)
+- [Stage Profiling](#stage-profiling)
+- [Scheduler Backends](#scheduler-backends)
 - [For Developers](#for-developers)
 - [Risks and limitations](#risks-and-limitations)
 - [License](#license)
@@ -49,208 +51,112 @@ make build-docker
 
 This creates a local image named `flowsim-image` with FlowSim patches already applied to sglang.
 
-### 2. Run Profile → Parse → Simulate
+### 2. Profile (Generate Traces)
 
-Create workspace directories on your host for storing traces and results:
+Use `flowsim submit` to capture stage-separated traces (EXTEND + DECODE), parse them, and run cross-rank analysis — all in one step. See [Stage Profiling](#stage-profiling) for how stages and collection modes work.
 
 ```bash
-mkdir -p /data/flowsim-profile
-mkdir -p /data/flowsim-simulate
-```
-
-#### Step 1: Profile (Generate Traces)
-
-```bash
-sudo docker run --gpus=all \
-  -v /data/flowsim-profile:/workspace/profile \
-  -v /data/flowsim-simulate:/workspace/simulate \
-  -w /flowsim \
-  --cap-add=SYS_ADMIN \
-  --network=host \
-  --shm-size 911G \
-  flowsim-image \
-  python scripts/run_profile.py \
-    --profile-dir /workspace/profile \
-    --log-dir /workspace/profile/logs \
-    --bench-timeout 3600 \
-    --server-opts "--model-path /flowsim/workload/models/configs/deepseek/ --load-format dummy --tp 4 --ep 4 --host 0.0.0.0 --port 30001 --attention-backend flashinfer --disable-cuda-graph" \
-    --bench-opts "--backend sglang --host 0.0.0.0 --port 30001 --dataset-name defined-len --prefill-decode-lens 1024:8 --num-prompts 1 --profile"
-```
-
-**What this does:**
-- Starts an sglang server with profiling enabled
-- Runs benchmark requests against it
-- Generates `*.trace.json.gz` files in `/data/flowsim-profile` (mounted as `/workspace/profile`)
-
-**Note:** The first run will be slow (~10 minutes) due to DeepGEMM kernel warmup and compilation. For stable performance, avoid using `--rm` flag and reuse the same container using `sudo docker exec -it <container_id> bash`. Subsequent runs with similar configurations will be faster.
-
-**Tip:** 
-- Adjust `--server-opts` and `--bench-opts` to match your model, parallelism (TP/DP/EP), and workload requirements. All `sglang.launch_server` and `bench_serving.py` parameters are supported.
-- Trace files can be visualized using [Perfetto UI](https://ui.perfetto.dev/) by uploading the `.trace.json.gz` files directly.
-- For multi-GPU profiling (TP > 1), merge individual traces into a single file for a global view:
-  ```bash
-  python /flowsim/utils/merge_trace.py \
-    --trace_dir /data/flowsim-profile \
-    --output /data/flowsim-profile/merged_trace.json
-  ```
-  Then visualize the merged trace at [Perfetto UI](https://ui.perfetto.dev/).
-
-#### Step 2: Parse (Convert Trace to CSV)
-
-```bash
-sudo docker run --rm \
-  -v /data/flowsim-profile:/workspace/profile \
-  -v /data/flowsim-simulate:/workspace/simulate \
-  -w /flowsim \
-  flowsim-image \
-  python -m scripts.run_parse \
-    --trace-file /workspace/profile/your-trace-name-TP-0.trace.json.gz \
-    --output-dir /workspace/simulate
+pip install -e .
+flowsim submit --scheduler local \
+    --collect all \
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --extra-server-opts "--load-format dummy"
 ```
 
-Replace `your-trace-name-TP-0.trace.json.gz` with the actual filename from step 1.
-
-**What this does:**
-- Parses the trace file
-- Extracts kernel-level information (operator, shapes, dtypes)
-- Generates a CSV file and JSON summary in `/data/flowsim-simulate` (mounted as `/workspace/simulate`)
-
-**Fallback:** If you don't have a GPU or can't run profiling, use the demo trace shipped with the repo:
+For K8s / Slurm clusters, see [Scheduler Backends](#scheduler-backends).
 
+**Tip:** Trace files can be visualized at [Perfetto UI](https://ui.perfetto.dev/). For multi-GPU traces, merge them first:
 ```bash
-sudo docker run --rm \
-  -v /data/flowsim-simulate:/workspace/simulate \
-  -w /flowsim \
-  flowsim-image \
-  python -m scripts.run_parse \
-    --trace-file /flowsim/demo/deepseekv3-TP-0.trace.json.gz \
-    --output-dir /workspace/simulate
+python utils/merge_trace.py --trace_dir stage_traces/local/*/bs1_input2048_ctx0 --output merged.json
 ```
 
-#### Step 3: Simulate (Run Hardware Simulation)
+### 3. Simulate (Run Hardware Simulation)
 
-This step requires a running LLMCompass backend. First, build the backend image:
+Build and start the LLMCompass backend, then submit parsed traces for kernel-level simulation:
 
 ```bash
+# Build backend image
 sudo docker build -t llmcompass-backend -f backend/LLMCompass/Dockerfile backend/LLMCompass/
-```
 
-Then start the backend:
-
-```bash
-# Terminal 1: Start LLMCompass backend
+# Terminal 1: Start backend
 sudo docker run --rm -p 8000:8000 llmcompass-backend
-```
 
-Then in another terminal, run the simulation:
-
-```bash
 # Terminal 2: Run simulation
-sudo docker run --rm \
-  --network=host \
-  -v /data/flowsim-profile:/workspace/profile \
-  -v /data/flowsim-simulate:/workspace/simulate \
+sudo docker run --rm --network=host \
+  -v /data/flowsim:/workspace \
   flowsim-image \
   python -m scripts.run_simulate \
-    --trace-file /workspace/profile/your-trace-name-TP-0.trace.json.gz \
+    --trace-file /workspace/traces/bs1_input2048_ctx0/*-TP-0-EXTEND.trace.json.gz \
     --api-url http://127.0.0.1:8000 \
     --artifact-dir /workspace/simulate/llmcompass
 ```
 
-**What this does:**
-- Parses the trace into kernels
-- Submits each kernel to the LLMCompass backend `/tasks` API
-- Polls until all tasks complete
-- Writes request/response artifacts to `/workspace/simulate/llmcompass`
-
-### 3. Inspect Results
-
-All generated files are available on your host at `/data/`:
+### 4. Inspect Results
 
 ```bash
-ls -lh /data/flowsim-profile/      # Raw trace files
-ls -lh /data/flowsim-simulate/     # Parsed CSV, summary, simulation artifacts
+ls -lh /data/flowsim/traces/       # Stage-separated traces + parsed CSVs
+ls -lh /data/flowsim/simulate/     # Simulation artifacts
 ```
 
 ---
 
-## Stage Profiling (`run_stage_profile.py`)
+## Stage Profiling
 
-`scripts/run_stage_profile.py` is the single entry-point for **stage-separated** profiling: it captures prefill (EXTEND) and decode traces independently, parses them, runs cross-rank kernel analysis, and optionally collects kernel input shapes.
+FlowSim performs **stage-separated** profiling: it captures prefill (EXTEND) and decode traces independently, parses them, runs cross-rank kernel analysis, and optionally collects kernel input shapes.
 
-### Quick reference
+### How stages work
 
 Each profiling request produces **two** stage-separated traces:
 - **EXTEND** (prefill) — processes `input_len` new tokens (with optional `existing_ctx` tokens already in KV cache)
-- **DECODE** — profiler captures `decode-tokens` decode batch steps
-
-The profiler captures exactly **one** EXTEND batch and **decode-tokens** DECODE batches per run.
+- **DECODE** — captures `decode-tokens` decode batch steps
 
-| Flag | Description | Default |
-|---|---|---|
-| `--input-len` | Number of new prefill tokens per request (EXTEND) | 2048 |
-| `--existing-ctx` | Tokens already in KV cache from a prior request (0 = cold prefill) | 0 |
-| `--bs` | Batch size (concurrent requests) | 1 |
-| `--decode-tokens` | Number of decode tokens to generate (= number of decode batches profiled) | 32 |
+### Collection modes
 
 | Mode | What it does |
 |---|---|
-| `--collect perf` | Profile a single (bs, input_len, existing_ctx) point → trace (EXTEND + DECODE) → parse → cross-rank analysis |
-| `--collect shapes` | Re-run **without CUDA graph** to capture kernel input shapes, then merge into timing CSVs (both EXTEND and DECODE) |
-| `--collect all` | Both phases back-to-back (auto-restarts the server in between). Requires `--launch-server`. |
-
-`--collect` is required. Use `perf`, `shapes`, or `all`.
+| `--collect perf` | Profile a single (bs, input_len, existing_ctx) point → trace → parse → cross-rank analysis |
+| `--collect shapes` | Re-run **without CUDA graph** to capture kernel input shapes, then merge into timing CSVs |
+| `--collect all` | Both phases back-to-back (auto-restarts the server in between) |
 
 ### Examples
 
-**Cold prefill** (server already running):
-
 ```bash
-python3 scripts/run_stage_profile.py \
+# Basic profiling
+flowsim submit --scheduler local \
     --collect perf \
-    --bs 1 --input-len 2048 --decode-tokens 32 \
-    --output-dir /workspace/traces \
-    --host 0.0.0.0 --port 30001
-```
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --extra-server-opts "--load-format dummy"
 
-**With existing KV cache context:**
-
-```bash
-python3 scripts/run_stage_profile.py \
+# With existing KV cache context
+flowsim submit --scheduler local \
     --collect perf \
-    --bs 4 --input-len 512 --existing-ctx 4096 --decode-tokens 32 \
-    --output-dir /workspace/traces \
-    --launch-server \
-    --server-opts "--model-path Qwen/Qwen3-235B-A22B-FP8 --tp 4 --host 0.0.0.0 --port 30001"
-```
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 4 --input-len 512 --existing-ctx 4096 --gpus 1 \
+    --extra-server-opts "--load-format dummy"
 
-**Collect shapes only** (requires a no-CUDA-graph server):
-
-```bash
-python3 scripts/run_stage_profile.py \
-    --collect shapes \
-    --output-dir /workspace/sweep_P1_tp4 \
-    --launch-server \
-    --server-opts "--model-path Qwen/Qwen3-235B-A22B-FP8 --tp 4 --host 0.0.0.0 --port 30001"
-```
-
-When `--collect shapes` is used with `--launch-server`, the server is automatically started with `--disable-cuda-graph --disable-cuda-graph-padding`.
-
-**Full pipeline** (perf → auto-restart → shapes → merge):
+# Full pipeline (perf + shapes)
+flowsim submit --scheduler local \
+    --collect all \
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --extra-server-opts "--load-format dummy"
 
-```bash
-python3 scripts/run_stage_profile.py \
+# Multi-point sweep
+flowsim submit --scheduler local \
     --collect all \
-    --output-dir /workspace/sweep_P1_tp4 \
-    --launch-server \
-    --server-opts "--model-path Qwen/Qwen3-235B-A22B-FP8 --tp 4 --host 0.0.0.0 --port 30001"
+    --model-path workload/models/configs/Qwen3-235B-A22B \
+    --sweep 1:2048:0 4:2048:0 8:2048:0 --gpus 1 \
+    --extra-server-opts "--load-format dummy"
 ```
 
+For K8s / Slurm clusters, replace `--scheduler local` with `k8s` or `slurm`. See [schedulers/README.md](schedulers/README.md) for full scheduler documentation.
 
 ### Output structure
 
 ```
-sweep_P1_tp4/
+stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
 ├── sweep_summary.json
 ├── bs1_input2048_ctx0/
 │   ├── *-TP-*-EXTEND.trace.json.gz
@@ -266,23 +172,22 @@ sweep_P1_tp4/
 
 After `--collect shapes`, each `parsed/TP-*-DECODE.csv` gains a `Dims` column with kernel tensor shapes.
 
-### Helper scripts
-
-| Script | Purpose |
-|---|---|
-| `tests/integration/test_stage_profile_configs.py` | Integration tests for `--collect {perf,shapes,all}` across parallelism configs. Run with `pytest` inside Docker. Filter with `RUN_CONFIGS=P1`. |
-
 ### Utilities (`utils/`)
 
 | File | Purpose |
 |---|---|
 | `utils/cross_rank_agg.py` | Cross-rank kernel aggregation (symmetric collectives → min, asymmetric → max, compute → mean) |
 | `utils/shape_merge.py` | Merge kernel shape data into timing CSVs |
-| `utils/net.py` | Shared networking helpers (`wait_for_port`) |
 | `utils/merge_trace.py` | Merge multi-rank traces into a single Perfetto-compatible file |
 
 ---
 
+## Scheduler Backends
+
+For submitting profiling jobs to **local Docker**, **Kubernetes**, or **Slurm** clusters, use the `flowsim` CLI. See [schedulers/README.md](schedulers/README.md) for full documentation including per-scheduler parameters, configuration, and environment variables.
+
+---
+
 ## For Developers
 
 ### Customizing Profiling Workloads
diff --git a/schedulers/README.md b/schedulers/README.md
index 8f9d8e1..d0835e7 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -11,133 +11,92 @@ FlowSim supports three scheduler backends for submitting GPU profiling jobs:
 ## Quick Start
 
 ```bash
-# Install (from FlowSim project root)
-cd FlowSim
-pip install -e .  # or ensure PYTHONPATH includes the project root
-
-# Show help
+pip install -e .
 flowsim --help
-flowsim submit --help
 ```
 
 ## Common Workflow
 
-All schedulers share the same CLI interface:
-
 ```bash
-# 1. Submit a job
-flowsim submit --scheduler <local|k8s|slurm> --collect <perf|shapes|all> \
-    --model-path <model> [options...]
-
-# 2. List jobs
-flowsim list --scheduler <local|k8s|slurm>
-
-# 3. Check job status
-flowsim status --scheduler <local|k8s|slurm> --job <job_id>
-
-# 4. View logs
-flowsim logs --scheduler <local|k8s|slurm> --job <job_id>
-
-# 5. Cancel a job
-flowsim cancel --scheduler <local|k8s|slurm> --job <job_id>
-
-# 6. Dry-run (print script/manifest without submitting)
-flowsim submit --scheduler <local|k8s|slurm> ... --dry-run
+# Submit a job (same interface for all backends)
+flowsim submit --scheduler <local|k8s|slurm> \
+    --collect <perf|shapes|all> \
+    --model-path <model> \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1
+
+# Job lifecycle
+flowsim list   --scheduler <backend>
+flowsim status --scheduler <backend> --job <job_id>
+flowsim logs   --scheduler <backend> --job <job_id>
+flowsim cancel --scheduler <backend> --job <job_id>
+
+# Preview without submitting
+flowsim submit --scheduler <backend> ... --dry-run
+
+# Multi-point sweep
+flowsim submit --scheduler <backend> \
+    --collect all --model-path workload/models/configs/Qwen3-235B-A22B \
+    --sweep 1:2048:0 4:2048:0 8:2048:0 --gpus 1
 ```
 
 ### Common Parameters
 
 | Parameter | Description | Default |
 |-----------|-------------|---------|
-| `--collect` | Collection mode: `perf` / `shapes` / `all` | required |
+| `--collect` | `perf` / `shapes` / `all` | required |
 | `--model-path` | HuggingFace model path | required |
 | `--tp` | Tensor parallelism | `1` |
 | `--dp` | Data parallelism | `1` |
 | `--bs` | Batch size | `1` |
 | `--input-len` | Input sequence length | `2048` |
 | `--existing-ctx` | Existing KV cache length | `0` |
-| `--decode-tokens` | Decode token count | `32` |
-| `--warmup-n` | Warmup iterations | `5` |
-| `--image` | Docker image | `flowsim-image:latest` |
 | `--gpus` | GPU count | `1` |
-| `--output-dir` | Output directory (auto-generated if omitted) | `stage_traces/{scheduler}/{timestamp}/` |
-| `--dry-run` | Print script only, do not submit | `false` |
+| `--image` | Docker image | `flowsim-image:latest` |
+| `--output-dir` | Output directory | `stage_traces/{scheduler}/{timestamp}/` |
+| `--dry-run` | Print script only | `false` |
 
 ---
 
 ## 1. Local Scheduler
 
-Runs profiling directly on the host via `docker run`. The simplest option, suitable for single-machine development and testing.
-
-### Usage
+Runs profiling via `docker run` on the host machine.
 
 ```bash
-# Simplest usage — run on GPU 0
 flowsim submit --scheduler local \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 \
-    --gpus 1 --local-gpus 0 \
+    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --local-gpus 0 \
     --extra-server-opts "--load-format dummy"
-
-# Multi-GPU
-flowsim submit --scheduler local \
-    --collect perf \
-    --model-path Qwen/Qwen3-8B \
-    --tp 2 --gpus 2 --local-gpus 0,1
 ```
 
-### Parameters
-
 | Parameter | Description | Default |
 |-----------|-------------|---------|
-| `--local-gpus` | `CUDA_VISIBLE_DEVICES` (e.g. `0` or `0,1`) | empty (all GPUs) |
+| `--local-gpus` | `CUDA_VISIBLE_DEVICES` (e.g. `0` or `0,1`) | all GPUs |
 | `--local-workdir` | Host working directory | FlowSim project root |
 
-### How It Works
-
-1. `render()` generates a `docker run --gpus` command
-2. `submit()` runs the container on the host, waits for completion
-3. Traces are written to `stage_traces/local/{YYYYMMDD_HHMMSS}/`
-4. `status()` / `logs()` / `list_jobs()` scan log files
-
 ---
 
 ## 2. Kubernetes Scheduler
 
-Submits profiling jobs as Kubernetes Jobs to a cluster. Supports both PVC and hostPath storage.
+Submits profiling jobs as Kubernetes Jobs. Supports PVC and hostPath storage.
 
-### First-Time Setup
+### Setup
 
 ```bash
-# Install the bundled config template
-flowsim init k8s
-# Edit ~/.flowsim/k8s.yaml with your cluster details
-
-# Or install your own config file
-flowsim init k8s --config my-cluster.yaml
+flowsim init k8s                           # install bundled template
+flowsim init k8s --config my-cluster.yaml  # or use your own
+# Edit ~/.flowsim/k8s.yaml
 ```
 
 ### Usage
 
 ```bash
-# Submit to K8s cluster
 flowsim submit --scheduler k8s \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
     --tp 1 --bs 1 --input-len 2048 --gpus 1 \
     --extra-server-opts "--load-format dummy"
-
-# Override config file values
-flowsim submit --scheduler k8s \
-    --collect perf \
-    --model-path Qwen/Qwen3-8B \
-    --k8s-namespace ml-team \
-    --k8s-pvc my-traces-pvc \
-    --gpus 4 --tp 4
-
-# Dry-run to preview the generated YAML
-flowsim submit --scheduler k8s ... --dry-run
 ```
 
 ### Parameters
@@ -147,76 +106,43 @@ flowsim submit --scheduler k8s ... --dry-run
 | `--k8s-namespace` | K8s namespace | `default` |
 | `--k8s-kubeconfig` | kubeconfig path | `~/.kube/config` |
 | `--k8s-context` | kubeconfig context | current context |
-| `--k8s-pvc` | PVC name (persistent storage) | empty |
-| `--k8s-host-output-dir` | hostPath mount (used when PVC is empty) | empty |
-| `--k8s-node-selector` | Node selector labels (repeatable), format `KEY=VALUE` | empty |
+| `--k8s-pvc` | PVC name for traces | empty |
+| `--k8s-host-output-dir` | hostPath (when no PVC) | empty |
+| `--k8s-node-selector` | Node selector `KEY=VALUE` (repeatable) | empty |
 | `--k8s-service-account` | ServiceAccount | empty |
 | `--k8s-shm-size` | Shared memory size | `16Gi` |
-| `--k8s-runtime-class` | RuntimeClass (e.g. `nvidia` for CDI mode) | empty |
-
-### How It Works
-
-1. `render()` generates a Kubernetes Job YAML/JSON manifest
-2. `submit()` creates the Job via the `kubernetes` Python client
-3. Traces are persisted via PVC or hostPath
-4. `status()` / `cancel()` / `list_jobs()` operate via the K8s API
-
-### Kind Local Test Cluster
-
-```bash
-# Start a Kind cluster (GPU passthrough + CDI mode)
-bash tests/integration/infra/dev-setup.sh kind
-
-# Run K8s integration tests
-python -m pytest tests/integration/test_scheduler_local.py::TestK8sScheduler -v -x
-
-# Teardown
-bash tests/integration/infra/dev-teardown.sh kind
-```
+| `--k8s-runtime-class` | RuntimeClass (e.g. `nvidia`) | empty |
 
 ---
 
 ## 3. Slurm Scheduler
 
-Generates sbatch scripts and submits them to a Slurm cluster via `sbatch`/`squeue`/`scancel`.
+Generates sbatch scripts and submits via `sbatch`/`squeue`/`scancel`.
 
-### First-Time Setup
+### Setup
 
 ```bash
-# Install the bundled config template
-flowsim init slurm
-# Edit ~/.flowsim/slurm.yaml with your cluster details
-
-# Or install your own config file
-flowsim init slurm --config my-slurm.yaml
+flowsim init slurm                         # install bundled template
+flowsim init slurm --config my-slurm.yaml  # or use your own
+# Edit ~/.flowsim/slurm.yaml
 ```
 
 ### Usage
 
 ```bash
-# Submit via sbatch
 flowsim submit --scheduler slurm \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
     --tp 1 --bs 1 --input-len 2048 --gpus 1 \
     --slurm-partition gpu \
     --extra-server-opts "--load-format dummy"
+```
 
-# CLI prefix (e.g. via docker exec or ssh)
-flowsim submit --scheduler slurm \
-    --slurm-cli-prefix "docker exec -i slurmctld" \
-    --slurm-partition normal \
-    --collect perf --model-path Qwen/Qwen3-8B --gpus 1
-
-# Dry-run to preview the generated sbatch script
-flowsim submit --scheduler slurm ... --dry-run
-
-# Check status
-flowsim status --scheduler slurm --job 12345 \
+For remote clusters, use `--slurm-cli-prefix`:
+```bash
+flowsim submit --scheduler slurm ... \
     --slurm-cli-prefix "docker exec -i slurmctld"
-
-# Cancel a job
-flowsim cancel --scheduler slurm --job 12345
+# or: --slurm-cli-prefix "ssh login-node"
 ```
 
 ### Parameters
@@ -227,69 +153,29 @@ flowsim cancel --scheduler slurm --job 12345
 | `--slurm-time` | Job time limit | `02:00:00` |
 | `--slurm-account` | Billing account | empty |
 | `--slurm-constraint` | Node constraint | empty |
-| `--slurm-cli-prefix` | Shell prefix for CLI commands (e.g. `"docker exec -i slurmctld"`) | empty |
-| `--slurm-container-runtime` | Container runtime: `docker` / `enroot` / `none` | `none` |
+| `--slurm-cli-prefix` | Shell prefix for remote CLI | empty |
+| `--slurm-container-runtime` | `docker` / `enroot` / `none` | `none` |
 | `--slurm-container-mounts` | Container mounts | empty |
 | `--slurm-module` | `module load` commands (repeatable) | empty |
 | `--slurm-extra-sbatch` | Extra `#SBATCH` directives (repeatable) | empty |
 
-### container_runtime Options
-
-| Value | Description |
-|-------|-------------|
-| `none` | Run directly on compute node (Python/sglang must be installed) |
-| `docker` | Run via `docker run` on the allocated node |
-| `enroot` | Run via `srun --container-image` (NVIDIA enroot) |
-
-### How It Works
-
-1. `render()` generates a complete sbatch script (`#SBATCH` directives + profiling command)
-2. `submit()` pipes the script to `sbatch --parsable`
-3. `status()` queries via `scontrol show job`
-4. `cancel()` runs `scancel`
-5. `list_jobs()` runs `squeue`
-
-If Slurm commands are not on the local PATH, use `--slurm-cli-prefix` to specify a prefix, e.g.:
-- `"docker exec -i slurmctld"` — via Docker container
-- `"ssh login-node"` — via SSH
-
-### Docker Compose Local Test Cluster
-
-```bash
-# Start Slurm cluster (slurmctld + 1 compute node + 1 GPU)
-cd tests/integration/infra/
-docker compose -f slurm-compose.yaml up -d
-
-# Check cluster status
-docker exec slurmctld sinfo
-
-# Run Slurm integration tests
-python -m pytest tests/integration/test_scheduler_local.py::TestSlurmScheduler -v -x
-
-# Teardown
-docker compose -f slurm-compose.yaml down -v
-```
-
 ---
 
 ## Configuration
 
-Config files are stored in `~/.flowsim/` and installed via `flowsim init`.
-Templates are in `schedulers/templates/` with comments explaining each field:
+Config files live in `~/.flowsim/` and are installed via `flowsim init`.
+Templates with comments are in `schedulers/templates/`.
 
 ```
 ~/.flowsim/
-├── k8s.yaml      # K8s scheduler config
-└── slurm.yaml    # Slurm scheduler config
+├── k8s.yaml
+└── slurm.yaml
 ```
 
-Parameter priority (highest to lowest):
-1. CLI flag (`--slurm-partition gpu`)
-2. Environment variable (see table below)
-3. Config file (`~/.flowsim/slurm.yaml`)
-4. Built-in default
+**Priority** (highest to lowest):
+CLI flag → environment variable → config file → built-in default
 
-### Supported Environment Variables
+### Environment Variables
 
 | Variable | Overrides | Example |
 |----------|-----------|--------|
@@ -301,45 +187,50 @@ Parameter priority (highest to lowest):
 | `FLOWSIM_SLURM_TIME` | `--slurm-time` | `04:00:00` |
 | `FLOWSIM_SLURM_CONFIG` | Config file path | `/etc/flowsim/slurm.yaml` |
 
-### Example k8s.yaml
-
-```yaml
-kubeconfig: /home/user/.kube/config
-namespace: default
-host_output_dir: /host-stage-traces
-runtime_class_name: nvidia
-shm_size: 16Gi
-```
-
-### Example slurm.yaml
-
-```yaml
-partition: gpu
-account: my-project
-time: "02:00:00"
-container_runtime: none
-cli_prefix: ""
-```
-
 ---
 
-## Output Directory Structure
-
-All schedulers produce a unified trace output structure:
+## Output Structure
 
 ```
 stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
 ├── bs1_input2048_ctx0/
-│   ├── *.trace.json.gz           # Raw traces
-│   ├── parsed/*.csv              # Parsed CSVs
-│   ├── merged/*_merged.trace.csv # Merged trace CSV
-│   ├── shape_traces/             # Shape traces (collect=shapes/all)
-│   ├── shape_parsed/*.csv        # Shape parsed CSVs
-│   ├── analysis_extend.json      # Extend stage analysis
-│   └── analysis_decode.json      # Decode stage analysis
+│   ├── *.trace.json.gz
+│   ├── parsed/*.csv
+│   ├── merged/*_merged.trace.csv
+│   ├── shape_traces/ + shape_parsed/
+│   ├── analysis_extend.json
+│   └── analysis_decode.json
 ├── logs/
 │   ├── server_*.stdout.log
 │   └── server_*.stderr.log
 └── sweep_summary.json
 ```
 
+---
+
+## Development
+
+### Test Clusters
+
+```bash
+# Kind (K8s) — GPU passthrough via CDI
+bash tests/integration/infra/dev-setup.sh kind
+bash tests/integration/infra/dev-teardown.sh kind
+
+# Slurm — Docker Compose cluster
+cd tests/integration/infra/
+docker compose -f slurm-compose.yaml up -d
+docker compose -f slurm-compose.yaml down -v
+```
+
+### Running Tests
+
+```bash
+# Unit tests
+python -m pytest tests/unit/test_scheduler_cli.py -v
+
+# Integration tests
+python -m pytest tests/integration/test_scheduler_local.py::TestK8sScheduler -v -x
+python -m pytest tests/integration/test_scheduler_local.py::TestSlurmScheduler -v -x
+```
+

From b0dfdd50659d9c9947c808a32cdd4e4995619ff3 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 19:12:43 +0000
Subject: [PATCH 40/56] format: fix with black

---
 schedulers/base.py                        |  24 +-
 schedulers/config.py                      |   6 +-
 schedulers/k8s.py                         | 103 ++++--
 schedulers/local.py                       |  72 +++--
 schedulers/slurm.py                       |  39 ++-
 scripts/cli.py                            |  28 +-
 scripts/run_stage_profile.py              |  13 +-
 scripts/status_profile.py                 |  46 ++-
 scripts/submit_profile.py                 |  79 +++--
 tests/integration/test_scheduler_local.py | 378 ++++++++++++++--------
 tests/unit/test_scheduler_cli.py          |  66 +++-
 11 files changed, 588 insertions(+), 266 deletions(-)

diff --git a/schedulers/base.py b/schedulers/base.py
index a47ac1f..d3b32c4 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -13,8 +13,8 @@ class JobResult:
     """Structured return value from ``submit()``."""
 
     job_id: str
-    scheduler: str          # "local", "k8s", "slurm"
-    state: str              # "Submitted", "Completed", "Failed"
+    scheduler: str  # "local", "k8s", "slurm"
+    state: str  # "Submitted", "Completed", "Failed"
     output_dir: str = ""
     message: str = ""
 
@@ -150,7 +150,9 @@ def submit(self, spec: ProfileJobSpec) -> JobResult:
 
     def cancel(self, job_id: str) -> str:
         """Cancel a running or pending job. Returns a status message."""
-        raise NotImplementedError(f"{type(self).__name__} does not support cancel")
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support cancel"
+        )
 
     def status(self, job_id: str) -> dict:
         """Query job status. Returns dict with at least 'state' key.
@@ -163,9 +165,13 @@ def status(self, job_id: str) -> dict:
                 "output_hint": "where to find trace files",
             }
         """
-        raise NotImplementedError(f"{type(self).__name__} does not support status queries")
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support status queries"
+        )
 
-    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+    def logs(
+        self, job_id: str, *, tail: int = 100, follow: bool = False
+    ) -> str:
         """Retrieve recent log output for a job.
 
         Parameters
@@ -177,7 +183,9 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         follow : bool
             If True, stream logs in real time (blocking).
         """
-        raise NotImplementedError(f"{type(self).__name__} does not support log retrieval")
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support log retrieval"
+        )
 
     def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         """List jobs managed by this scheduler.
@@ -193,7 +201,9 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         list[dict]
             Each dict has at least ``{"job_id": ..., "state": ..., "name": ...}``.
         """
-        raise NotImplementedError(f"{type(self).__name__} does not support list")
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support list"
+        )
 
     def dry_run(self, spec: ProfileJobSpec) -> str:
         """Render and return the manifest without submitting."""
diff --git a/schedulers/config.py b/schedulers/config.py
index 3b2d2fd..433c87b 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -48,10 +48,12 @@ def _save_yaml(path: Path, data: dict) -> None:
     path.parent.mkdir(parents=True, exist_ok=True)
     try:
         import yaml as _y
+
         with open(path, "w") as f:
             _y.safe_dump(data, f, default_flow_style=False, sort_keys=False)
     except ImportError:
         import json as _j
+
         with open(path, "w") as f:
             _j.dump(data, f, indent=2, ensure_ascii=False)
             f.write("\n")
@@ -97,6 +99,8 @@ def cfg_get(cfg: dict, key: str, fallback: str = "") -> str:
     return fallback
 
 
-def resolve_default(env_var: str, cfg: dict, key: str, fallback: str = "") -> str:
+def resolve_default(
+    env_var: str, cfg: dict, key: str, fallback: str = ""
+) -> str:
     """Resolve a config value: env var > config file > fallback."""
     return os.environ.get(env_var, "") or cfg_get(cfg, key, fallback)
diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index 83b991c..e75e7f8 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -22,6 +22,7 @@ def _k8s_job_state(status) -> str:
         return "Running"
     return "Pending"
 
+
 # Optional: nicer YAML output for dry-run.
 try:
     import yaml as _yaml  # type: ignore[import-untyped]
@@ -102,16 +103,36 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
         # volumes + mounts
         volume_mounts = [{"name": "dshm", "mountPath": "/dev/shm"}]
         volumes: list[dict] = [
-            {"name": "dshm", "emptyDir": {"medium": "Memory", "sizeLimit": self.shm_size}},
+            {
+                "name": "dshm",
+                "emptyDir": {"medium": "Memory", "sizeLimit": self.shm_size},
+            },
         ]
         if self.pvc_name:
-            volume_mounts.append({"name": "output", "mountPath": spec.output_dir})
-            volumes.append({"name": "output", "persistentVolumeClaim": {"claimName": self.pvc_name}})
+            volume_mounts.append(
+                {"name": "output", "mountPath": spec.output_dir}
+            )
+            volumes.append(
+                {
+                    "name": "output",
+                    "persistentVolumeClaim": {"claimName": self.pvc_name},
+                }
+            )
         elif self.host_output_dir:
             # Mount at base traces dir so the full directory structure
             # (e.g. k8s/{timestamp}/bs1_...) is preserved on the host.
-            volume_mounts.append({"name": "output", "mountPath": "/flowsim/stage_traces"})
-            volumes.append({"name": "output", "hostPath": {"path": self.host_output_dir, "type": "DirectoryOrCreate"}})
+            volume_mounts.append(
+                {"name": "output", "mountPath": "/flowsim/stage_traces"}
+            )
+            volumes.append(
+                {
+                    "name": "output",
+                    "hostPath": {
+                        "path": self.host_output_dir,
+                        "type": "DirectoryOrCreate",
+                    },
+                }
+            )
 
         container = {
             "name": "profiler",
@@ -145,13 +166,19 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
             "metadata": {
                 "name": job_name,
                 "namespace": self.namespace,
-                "labels": {"app": "flowsim", "component": "profiling", "collect": spec.collect},
+                "labels": {
+                    "app": "flowsim",
+                    "component": "profiling",
+                    "collect": spec.collect,
+                },
             },
             "spec": {
                 "backoffLimit": 0,
                 "ttlSecondsAfterFinished": 86400,
                 "template": {
-                    "metadata": {"labels": {"app": "flowsim", "component": "profiling"}},
+                    "metadata": {
+                        "labels": {"app": "flowsim", "component": "profiling"}
+                    },
                     "spec": pod_spec,
                 },
             },
@@ -202,7 +229,11 @@ def _load_k8s(self):
             try:
                 k8s_config.load_incluster_config()
             except k8s_config.ConfigException:
-                hint = " Try --k8s-kubeconfig /path/to/kubeconfig." if not self.kubeconfig else ""
+                hint = (
+                    " Try --k8s-kubeconfig /path/to/kubeconfig."
+                    if not self.kubeconfig
+                    else ""
+                )
                 raise RuntimeError(
                     "No valid Kubernetes configuration found. "
                     "Checked kubeconfig file and in-cluster environment." + hint
@@ -226,7 +257,9 @@ def status(self, job_id: str) -> dict:
         """Query K8s Job status by job name."""
         batch_api, core_api = self._load_k8s()
 
-        job = batch_api.read_namespaced_job(name=job_id, namespace=self.namespace)
+        job = batch_api.read_namespaced_job(
+            name=job_id, namespace=self.namespace
+        )
 
         # Determine state
         state = _k8s_job_state(job.status)
@@ -250,7 +283,9 @@ def status(self, job_id: str) -> dict:
         else:
             output_hint = "WARNING: no PVC or hostPath configured — traces are lost when pod exits"
 
-        msg_parts = [f"Job: {job_id}  Namespace: {self.namespace}  State: {state}"]
+        msg_parts = [
+            f"Job: {job_id}  Namespace: {self.namespace}  State: {state}"
+        ]
         if pod_statuses:
             msg_parts.append("Pods: " + ", ".join(pod_statuses))
         msg_parts.append(output_hint)
@@ -261,7 +296,9 @@ def status(self, job_id: str) -> dict:
             "output_hint": output_hint,
         }
 
-    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+    def logs(
+        self, job_id: str, *, tail: int = 100, follow: bool = False
+    ) -> str:
         """Show where logs are and how to access them for a K8s Job."""
         _, core_api = self._load_k8s()
 
@@ -270,7 +307,9 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
             label_selector=f"job-name={job_id}",
         )
         if not pods.items:
-            return f"No pods found for job {job_id} in namespace {self.namespace}"
+            return (
+                f"No pods found for job {job_id} in namespace {self.namespace}"
+            )
 
         if follow:
             # Stream logs from the first running/succeeded pod
@@ -300,21 +339,29 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
         for pod in pods.items:
             name = pod.metadata.name
             parts.append(f"  kubectl logs {name} -n {self.namespace}")
-            parts.append(f"  kubectl logs {name} -n {self.namespace} --tail={tail}")
+            parts.append(
+                f"  kubectl logs {name} -n {self.namespace} --tail={tail}"
+            )
 
         parts.append("")
 
         # Persistent log files
         if self.pvc_name:
-            parts.append(f"Server logs + traces persisted on PVC '{self.pvc_name}'.")
+            parts.append(
+                f"Server logs + traces persisted on PVC '{self.pvc_name}'."
+            )
             parts.append("Copy to local machine:")
             for pod in pods.items:
                 name = pod.metadata.name
                 if pod.status.phase in ("Running", "Succeeded"):
-                    parts.append(f"  kubectl cp {self.namespace}/{name}:/flowsim/stage_traces ./stage_traces")
+                    parts.append(
+                        f"  kubectl cp {self.namespace}/{name}:/flowsim/stage_traces ./stage_traces"
+                    )
                     break
             else:
-                parts.append("  (pod not running — mount the PVC in another pod to retrieve files)")
+                parts.append(
+                    "  (pod not running — mount the PVC in another pod to retrieve files)"
+                )
         elif self.host_output_dir:
             parts.append(f"Server logs + traces at hostPath on the node:")
             parts.append(f"  {self.host_output_dir}/")
@@ -323,7 +370,9 @@ def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
             for pod in pods.items:
                 if pod.spec.node_name:
                     parts.append(f"  Node: {pod.spec.node_name}")
-                    parts.append(f"  scp {pod.spec.node_name}:{self.host_output_dir}/ ./stage_traces/")
+                    parts.append(
+                        f"  scp {pod.spec.node_name}:{self.host_output_dir}/ ./stage_traces/"
+                    )
                     break
 
         return "\n".join(parts)
@@ -345,13 +394,17 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
 
             created = ""
             if job.metadata.creation_timestamp:
-                created = job.metadata.creation_timestamp.strftime("%Y-%m-%d %H:%M:%S")
+                created = job.metadata.creation_timestamp.strftime(
+                    "%Y-%m-%d %H:%M:%S"
+                )
 
-            result.append({
-                "job_id": job.metadata.name,
-                "name": job.metadata.name,
-                "state": state,
-                "namespace": self.namespace,
-                "created": created,
-            })
+            result.append(
+                {
+                    "job_id": job.metadata.name,
+                    "name": job.metadata.name,
+                    "state": state,
+                    "namespace": self.namespace,
+                    "created": created,
+                }
+            )
         return result
diff --git a/schedulers/local.py b/schedulers/local.py
index 673acac..f9c2aa8 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -58,7 +58,8 @@ def _check_image_exists(image: str) -> None:
         """Raise if the Docker image is not available locally."""
         result = subprocess.run(
             ["docker", "image", "inspect", image],
-            capture_output=True, timeout=10,
+            capture_output=True,
+            timeout=10,
         )
         if result.returncode != 0:
             raise SystemExit(
@@ -84,7 +85,7 @@ def _host_output_dir(self, spec_output_dir: str) -> str:
         # Strip the /flowsim/ prefix to get the relative path
         rel = spec_output_dir
         if rel.startswith("/flowsim/"):
-            rel = rel[len("/flowsim/"):]
+            rel = rel[len("/flowsim/") :]
         return os.path.join(self.workdir, rel)
 
     def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
@@ -97,7 +98,9 @@ def _build_docker_cmd(self, spec: ProfileJobSpec) -> str:
         """
         job_name = spec.default_job_name()[:63]
         host_output = self._host_output_dir(spec.output_dir)
-        container_output = spec.output_dir  # e.g. /flowsim/stage_traces/local/{ts}
+        container_output = (
+            spec.output_dir
+        )  # e.g. /flowsim/stage_traces/local/{ts}
 
         inner_cmd = spec.build_shell_command()
 
@@ -140,7 +143,8 @@ def submit(self, spec: ProfileJobSpec) -> JobResult:
         # Remove stale container with the same name (e.g. from a killed run)
         subprocess.run(
             ["docker", "rm", "-f", job_name[:63]],
-            capture_output=True, timeout=10,
+            capture_output=True,
+            timeout=10,
         )
         stdout_path = os.path.join(log_dir, f"{job_name}_{ts}.stdout.log")
         stderr_path = os.path.join(log_dir, f"{job_name}_{ts}.stderr.log")
@@ -171,10 +175,14 @@ def _tee(src, dest_file, dest_stream):
                     dest_file.flush()
 
             t_out = threading.Thread(
-                target=_tee, args=(proc.stdout, fout, sys.stdout), daemon=True,
+                target=_tee,
+                args=(proc.stdout, fout, sys.stdout),
+                daemon=True,
             )
             t_err = threading.Thread(
-                target=_tee, args=(proc.stderr, ferr, sys.stderr), daemon=True,
+                target=_tee,
+                args=(proc.stderr, ferr, sys.stderr),
+                daemon=True,
             )
             t_out.start()
             t_err.start()
@@ -210,7 +218,9 @@ def cancel(self, job_id: str) -> str:
         """Stop the Docker container for a local job."""
         proc = subprocess.run(
             ["docker", "stop", job_id],
-            capture_output=True, text=True, timeout=30,
+            capture_output=True,
+            text=True,
+            timeout=30,
         )
         if proc.returncode == 0:
             return f"Stopped container {job_id}"
@@ -234,9 +244,11 @@ def status(self, job_id: str) -> dict:
         """
         matches = []
         for log_dir in self._find_log_dirs():
-            matches.extend(sorted(glob.glob(
-                os.path.join(log_dir, f"{job_id}_*.stdout.log")
-            )))
+            matches.extend(
+                sorted(
+                    glob.glob(os.path.join(log_dir, f"{job_id}_*.stdout.log"))
+                )
+            )
 
         if not matches:
             return {
@@ -260,25 +272,29 @@ def status(self, job_id: str) -> dict:
             "output_hint": trace_dir,
         }
 
-    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+    def logs(
+        self, job_id: str, *, tail: int = 100, follow: bool = False
+    ) -> str:
         """List log files for a local job and print access commands."""
         matches = []
         for log_dir in self._find_log_dirs():
-            matches.extend(sorted(glob.glob(
-                os.path.join(log_dir, f"{job_id}_*")
-            )))
+            matches.extend(
+                sorted(glob.glob(os.path.join(log_dir, f"{job_id}_*")))
+            )
 
         if not matches:
             for log_dir in self._find_log_dirs():
-                matches.extend(sorted(glob.glob(
-                    os.path.join(log_dir, f"*{job_id}*")
-                )))
+                matches.extend(
+                    sorted(glob.glob(os.path.join(log_dir, f"*{job_id}*")))
+                )
 
         if not matches:
             return f"No logs found matching '{job_id}'"
 
         if follow:
-            stdout_files = sorted(f for f in matches if f.endswith(".stdout.log"))
+            stdout_files = sorted(
+                f for f in matches if f.endswith(".stdout.log")
+            )
             if stdout_files:
                 return f"Follow logs with:\n  tail -f {stdout_files[-1]}"
             return f"No stdout log found to follow for '{job_id}'"
@@ -315,9 +331,9 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
         """List local jobs by scanning log files."""
         matches = []
         for log_dir in self._find_log_dirs():
-            matches.extend(sorted(glob.glob(
-                os.path.join(log_dir, "*.stdout.log")
-            )))
+            matches.extend(
+                sorted(glob.glob(os.path.join(log_dir, "*.stdout.log")))
+            )
 
         jobs: list[dict] = []
         for path in matches:
@@ -330,12 +346,14 @@ def list_jobs(self, *, status_filter: str = "") -> list[dict]:
             name = m.group(1)
             ts = m.group(2)
             state = "Completed"
-            jobs.append({
-                "job_id": name,
-                "name": name,
-                "state": state,
-                "timestamp": ts,
-            })
+            jobs.append(
+                {
+                    "job_id": name,
+                    "name": name,
+                    "state": state,
+                    "timestamp": ts,
+                }
+            )
 
         if status_filter:
             filt = status_filter.lower()
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 265a725..ad12e75 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -176,7 +176,9 @@ def _submit_cli(self, spec: ProfileJobSpec) -> JobResult:
                 f"sbatch failed (exit {r.returncode}):\n{r.stderr}"
             )
 
-        job_id = r.stdout.strip().split(";")[0]  # parsable: "jobid" or "jobid;cluster"
+        job_id = r.stdout.strip().split(";")[
+            0
+        ]  # parsable: "jobid" or "jobid;cluster"
         return JobResult(
             job_id=job_id,
             scheduler="slurm",
@@ -193,7 +195,9 @@ def status(self, job_id: str) -> dict:
         """Query Slurm job status."""
         return self._status_cli(job_id)
 
-    def logs(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+    def logs(
+        self, job_id: str, *, tail: int = 100, follow: bool = False
+    ) -> str:
         """Show Slurm job log information."""
         return self._logs_cli(job_id, tail=tail, follow=follow)
 
@@ -216,7 +220,11 @@ def _status_cli(self, job_id: str) -> dict:
         # (completed jobs stay in memory for MinJobAge seconds, default 300s)
         r = self._cli_run("scontrol", "show", "job", job_id)
         if r.returncode != 0 or not r.stdout.strip():
-            return {"state": "Unknown", "message": f"No job found with ID {job_id}", "output_hint": ""}
+            return {
+                "state": "Unknown",
+                "message": f"No job found with ID {job_id}",
+                "output_hint": "",
+            }
 
         # Parse key=value output
         fields: dict[str, str] = {}
@@ -258,7 +266,9 @@ def _status_cli(self, job_id: str) -> dict:
             "output_hint": output_file,
         }
 
-    def _logs_cli(self, job_id: str, *, tail: int = 100, follow: bool = False) -> str:
+    def _logs_cli(
+        self, job_id: str, *, tail: int = 100, follow: bool = False
+    ) -> str:
         info = self._status_cli(job_id)
         output_file = info.get("output_hint", "")
 
@@ -289,7 +299,10 @@ def _logs_cli(self, job_id: str, *, tail: int = 100, follow: bool = False) -> st
 
     def _list_jobs_cli(self, *, status_filter: str = "") -> list[dict]:
         r = self._cli_run(
-            "squeue", "-o", "%i|%j|%T|%P|%N", "-h",
+            "squeue",
+            "-o",
+            "%i|%j|%T|%P|%N",
+            "-h",
         )
         if r.returncode != 0:
             raise RuntimeError(f"squeue failed: {r.stderr}")
@@ -304,11 +317,13 @@ def _list_jobs_cli(self, *, status_filter: str = "") -> list[dict]:
             state = parts[2] if len(parts) > 2 else "UNKNOWN"
             if status_filter and state.upper() != status_filter.upper():
                 continue
-            result.append({
-                "job_id": parts[0] if parts else "",
-                "name": name,
-                "state": state,
-                "partition": parts[3] if len(parts) > 3 else "",
-                "nodes": parts[4] if len(parts) > 4 else "",
-            })
+            result.append(
+                {
+                    "job_id": parts[0] if parts else "",
+                    "name": name,
+                    "state": state,
+                    "partition": parts[3] if len(parts) > 3 else "",
+                    "nodes": parts[4] if len(parts) > 4 else "",
+                }
+            )
         return result
diff --git a/scripts/cli.py b/scripts/cli.py
index ba0a65e..04d0c84 100644
--- a/scripts/cli.py
+++ b/scripts/cli.py
@@ -14,9 +14,10 @@
 import sys
 from pathlib import Path
 
-
 _CONFIG_DIR = Path.home() / ".flowsim"
-_TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "schedulers" / "templates"
+_TEMPLATES_DIR = (
+    Path(__file__).resolve().parent.parent / "schedulers" / "templates"
+)
 
 
 def _cmd_init(argv: list[str]) -> int:
@@ -37,15 +38,19 @@ def _cmd_init(argv: list[str]) -> int:
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
     parser.add_argument(
-        "scheduler", choices=["k8s", "slurm"],
+        "scheduler",
+        choices=["k8s", "slurm"],
         help="Scheduler type",
     )
     parser.add_argument(
-        "--config", "-c", default="",
+        "--config",
+        "-c",
+        default="",
         help="Path to a config YAML to install (default: bundled template)",
     )
     parser.add_argument(
-        "--force", action="store_true",
+        "--force",
+        action="store_true",
         help="Overwrite existing config file",
     )
     args = parser.parse_args(argv)
@@ -53,8 +58,10 @@ def _cmd_init(argv: list[str]) -> int:
     dst = _CONFIG_DIR / f"{args.scheduler}.yaml"
 
     if dst.exists() and not args.force:
-        print(f"Error: {dst} already exists (use --force to overwrite)",
-              file=sys.stderr)
+        print(
+            f"Error: {dst} already exists (use --force to overwrite)",
+            file=sys.stderr,
+        )
         return 1
 
     if args.config:
@@ -67,11 +74,14 @@ def _cmd_init(argv: list[str]) -> int:
         return 1
 
     import shutil
+
     _CONFIG_DIR.mkdir(parents=True, exist_ok=True)
     shutil.copy2(src, dst)
     print(f"Installed {src} → {dst}")
-    print(f"Edit {dst}, then run: flowsim submit --scheduler "
-          f"{args.scheduler} ...")
+    print(
+        f"Edit {dst}, then run: flowsim submit --scheduler "
+        f"{args.scheduler} ..."
+    )
     return 0
 
 
diff --git a/scripts/run_stage_profile.py b/scripts/run_stage_profile.py
index 00dce4b..3c739ef 100644
--- a/scripts/run_stage_profile.py
+++ b/scripts/run_stage_profile.py
@@ -799,11 +799,20 @@ def _start_server(
     return proc
 
 
-def _run_perf(args, summary: list[dict], *, bs: Optional[int] = None, input_len: Optional[int] = None, existing_ctx: Optional[int] = None) -> int:
+def _run_perf(
+    args,
+    summary: list[dict],
+    *,
+    bs: Optional[int] = None,
+    input_len: Optional[int] = None,
+    existing_ctx: Optional[int] = None,
+) -> int:
     """Collect traces for a single (bs, input_len, existing_ctx, decode_tokens) point."""
     bs = bs if bs is not None else args.bs
     input_len = input_len if input_len is not None else args.input_len
-    existing_ctx = existing_ctx if existing_ctx is not None else args.existing_ctx
+    existing_ctx = (
+        existing_ctx if existing_ctx is not None else args.existing_ctx
+    )
 
     tag = f"bs{bs}_input{input_len}_ctx{existing_ctx}"
     sub_dir = os.path.join(args.output_dir, tag)
diff --git a/scripts/status_profile.py b/scripts/status_profile.py
index 5d10f84..bf389ab 100644
--- a/scripts/status_profile.py
+++ b/scripts/status_profile.py
@@ -31,12 +31,16 @@
 import argparse
 import sys
 
-from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default
+from schedulers.config import (
+    cfg_get,
+    load_k8s_config,
+    load_slurm_config,
+    resolve_default,
+)
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 
-
 _d = resolve_default
 
 
@@ -49,7 +53,9 @@ def _add_scheduler_args(p: argparse.ArgumentParser) -> None:
     )
 
 
-def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) -> None:
+def _add_scheduler_specific_args(
+    p: argparse.ArgumentParser, scheduler: str
+) -> None:
     """Add only the args relevant to the chosen scheduler (second pass)."""
     k8s_cfg = load_k8s_config()
     slurm_cfg = load_slurm_config()
@@ -60,7 +66,9 @@ def _add_scheduler_specific_args(p: argparse.ArgumentParser, scheduler: str) ->
     elif scheduler == "k8s":
         p.add_argument(
             "--k8s-namespace",
-            default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
+            default=_d(
+                "FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"
+            ),
         )
         p.add_argument(
             "--k8s-kubeconfig",
@@ -103,7 +111,9 @@ def _build_scheduler(args: argparse.Namespace):
         )
 
 
-def _parse_two_pass(p: argparse.ArgumentParser, argv: list[str] | None = None) -> argparse.Namespace:
+def _parse_two_pass(
+    p: argparse.ArgumentParser, argv: list[str] | None = None
+) -> argparse.Namespace:
     """Two-pass parse: peek --scheduler, add scheduler-specific args, full parse."""
     _pre = argparse.ArgumentParser(add_help=False)
     _pre.add_argument("--scheduler", choices=["local", "k8s", "slurm"])
@@ -132,8 +142,15 @@ def main_logs(argv: list[str] | None = None) -> None:
     p = argparse.ArgumentParser(description="Retrieve FlowSim job logs.")
     _add_scheduler_args(p)
     p.add_argument("--job", required=True, help="Job name or ID")
-    p.add_argument("--tail", type=int, default=100, help="Number of log lines (default: 100)")
-    p.add_argument("--follow", "-f", action="store_true", help="Follow log output")
+    p.add_argument(
+        "--tail",
+        type=int,
+        default=100,
+        help="Number of log lines (default: 100)",
+    )
+    p.add_argument(
+        "--follow", "-f", action="store_true", help="Follow log output"
+    )
     args = _parse_two_pass(p, argv)
 
     scheduler = _build_scheduler(args)
@@ -148,7 +165,11 @@ def main_logs(argv: list[str] | None = None) -> None:
 def main_list(argv: list[str] | None = None) -> None:
     p = argparse.ArgumentParser(description="List FlowSim jobs.")
     _add_scheduler_args(p)
-    p.add_argument("--status", default="", help="Filter by job state (e.g. Running, Succeeded, PENDING)")
+    p.add_argument(
+        "--status",
+        default="",
+        help="Filter by job state (e.g. Running, Succeeded, PENDING)",
+    )
     args = _parse_two_pass(p, argv)
 
     scheduler = _build_scheduler(args)
@@ -159,12 +180,17 @@ def main_list(argv: list[str] | None = None) -> None:
             return
         # Print table header
         headers = list(jobs[0].keys())
-        widths = {h: max(len(h), max(len(str(j.get(h, ""))) for j in jobs)) for h in headers}
+        widths = {
+            h: max(len(h), max(len(str(j.get(h, ""))) for j in jobs))
+            for h in headers
+        }
         header_line = "  ".join(h.upper().ljust(widths[h]) for h in headers)
         print(header_line)
         print("-" * len(header_line))
         for job in jobs:
-            print("  ".join(str(job.get(h, "")).ljust(widths[h]) for h in headers))
+            print(
+                "  ".join(str(job.get(h, "")).ljust(widths[h]) for h in headers)
+            )
     except Exception as exc:
         print(f"Error: {exc}", file=sys.stderr)
         sys.exit(1)
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 747b9b3..5e1021a 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -51,13 +51,17 @@
 import sys
 
 from schedulers.base import ProfileJobSpec
-from schedulers.config import cfg_get, load_k8s_config, load_slurm_config, resolve_default
+from schedulers.config import (
+    cfg_get,
+    load_k8s_config,
+    load_slurm_config,
+    resolve_default,
+)
 from schedulers.k8s import K8sScheduler
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 from scripts import load_sweep_file, parse_sweep_point
 
-
 # Short alias for argparse default= expressions
 _d = resolve_default
 
@@ -97,7 +101,8 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     wl.add_argument("--decode-tokens", type=int, default=32)
     wl.add_argument("--warmup-n", type=int, default=5)
     wl.add_argument(
-        "--disable-chunked-prefill", action="store_true",
+        "--disable-chunked-prefill",
+        action="store_true",
     )
     wl.add_argument("--max-prefill-tokens", type=int, default=131072)
     wl.add_argument(
@@ -132,7 +137,10 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     infra = p.add_argument_group("infrastructure")
     infra.add_argument("--image", default="flowsim-image:latest")
     infra.add_argument(
-        "--gpus", type=int, default=1, help="Total GPU count",
+        "--gpus",
+        type=int,
+        default=1,
+        help="Total GPU count",
     )
     infra.add_argument("--host", default="0.0.0.0")
     infra.add_argument("--port", type=int, default=30001)
@@ -166,10 +174,14 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         )
 
     elif pre.scheduler == "k8s":
-        k8s = p.add_argument_group("kubernetes options (config: ~/.flowsim/k8s.yaml)")
+        k8s = p.add_argument_group(
+            "kubernetes options (config: ~/.flowsim/k8s.yaml)"
+        )
         k8s.add_argument(
             "--k8s-namespace",
-            default=_d("FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"),
+            default=_d(
+                "FLOWSIM_K8S_NAMESPACE", k8s_cfg, "namespace", "default"
+            ),
             help="K8s namespace (env: FLOWSIM_K8S_NAMESPACE)",
         )
         k8s.add_argument(
@@ -214,7 +226,9 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         )
 
     elif pre.scheduler == "slurm":
-        slurm = p.add_argument_group("slurm options (config: ~/.flowsim/slurm.yaml)")
+        slurm = p.add_argument_group(
+            "slurm options (config: ~/.flowsim/slurm.yaml)"
+        )
         slurm.add_argument(
             "--slurm-partition",
             default=_d("FLOWSIM_SLURM_PARTITION", slurm_cfg, "partition", ""),
@@ -243,7 +257,11 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
             default=cfg_get(slurm_cfg, "container_mounts", ""),
         )
         # Modules from config (list) + CLI (append)
-        cfg_modules = slurm_cfg.get("modules") if isinstance(slurm_cfg.get("modules"), list) else []
+        cfg_modules = (
+            slurm_cfg.get("modules")
+            if isinstance(slurm_cfg.get("modules"), list)
+            else []
+        )
         slurm.add_argument(
             "--slurm-module",
             action="append",
@@ -316,7 +334,9 @@ def _build_scheduler(args: argparse.Namespace):
         for item in args.k8s_node_selector:
             k, _, v = item.partition("=")
             if not v:
-                sys.exit(f"Bad --k8s-node-selector format: {item!r} (use KEY=VALUE)")
+                sys.exit(
+                    f"Bad --k8s-node-selector format: {item!r} (use KEY=VALUE)"
+                )
             node_sel[k] = v
         return K8sScheduler(
             namespace=args.k8s_namespace,
@@ -349,6 +369,7 @@ def main(argv: list[str] | None = None) -> None:
     # Smart defaults for output_dir based on scheduler.
     # Layout: stage_traces/{scheduler}/{timestamp}/
     import time as _time
+
     _ts = _time.strftime("%Y%m%d_%H%M%S")
     if not args.output_dir:
         if args.scheduler == "local":
@@ -365,7 +386,9 @@ def main(argv: list[str] | None = None) -> None:
     # For local scheduler, convert absolute host model_path to relative
     # so it resolves correctly inside the container (workdir=/flowsim).
     if args.scheduler == "local" and os.path.isabs(args.model_path):
-        project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        project_root = os.path.dirname(
+            os.path.dirname(os.path.abspath(__file__))
+        )
         if args.model_path.startswith(project_root):
             args.model_path = os.path.relpath(args.model_path, project_root)
 
@@ -389,18 +412,36 @@ def main(argv: list[str] | None = None) -> None:
             if args.k8s_pvc:
                 print(f"  (persisted on PVC '{args.k8s_pvc}')")
             else:
-                print(f"  (persisted at hostPath '{args.k8s_host_output_dir}' on the node)")
-            print(f"\nTo check status:  flowsim status --scheduler k8s --job {job_id}")
-            print(f"To view logs:     flowsim logs   --scheduler k8s --job {job_id}")
-            print(f"To follow logs:   flowsim logs   --scheduler k8s --job {job_id} --follow")
-            print(f"To cancel:        flowsim cancel --scheduler k8s --job {job_id}")
+                print(
+                    f"  (persisted at hostPath '{args.k8s_host_output_dir}' on the node)"
+                )
+            print(
+                f"\nTo check status:  flowsim status --scheduler k8s --job {job_id}"
+            )
+            print(
+                f"To view logs:     flowsim logs   --scheduler k8s --job {job_id}"
+            )
+            print(
+                f"To follow logs:   flowsim logs   --scheduler k8s --job {job_id} --follow"
+            )
+            print(
+                f"To cancel:        flowsim cancel --scheduler k8s --job {job_id}"
+            )
         elif sched == "slurm":
             print(f"  (on cluster shared filesystem)")
-            print(f"\nTo check status:  flowsim status --scheduler slurm --job {job_id}")
-            print(f"To view logs:     flowsim logs   --scheduler slurm --job {job_id}")
-            print(f"To cancel:        flowsim cancel --scheduler slurm --job {job_id}")
+            print(
+                f"\nTo check status:  flowsim status --scheduler slurm --job {job_id}"
+            )
+            print(
+                f"To view logs:     flowsim logs   --scheduler slurm --job {job_id}"
+            )
+            print(
+                f"To cancel:        flowsim cancel --scheduler slurm --job {job_id}"
+            )
         else:
-            print(f"\nTo view logs:     flowsim logs   --scheduler local --job {job_id}")
+            print(
+                f"\nTo view logs:     flowsim logs   --scheduler local --job {job_id}"
+            )
         print(f"To list all jobs: flowsim list   --scheduler {sched}")
 
 
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler_local.py
index a2086f1..a6bc416 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler_local.py
@@ -58,29 +58,34 @@
 _PROJECT_ROOT = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..")
 )
-_DEV_SETUP = os.path.join(_PROJECT_ROOT, "tests", "integration", "infra", "dev-setup.sh")
-_DEV_TEARDOWN = os.path.join(_PROJECT_ROOT, "tests", "integration", "infra", "dev-teardown.sh")
-
-MODEL = os.environ.get(
-    "MODEL", "workload/models/configs/Qwen3-235B-A22B"
+_DEV_SETUP = os.path.join(
+    _PROJECT_ROOT, "tests", "integration", "infra", "dev-setup.sh"
+)
+_DEV_TEARDOWN = os.path.join(
+    _PROJECT_ROOT, "tests", "integration", "infra", "dev-teardown.sh"
 )
+
+MODEL = os.environ.get("MODEL", "workload/models/configs/Qwen3-235B-A22B")
 LOAD_FORMAT = os.environ.get("LOAD_FORMAT", "dummy")
 
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
-def _flowsim_cli(*args: str, timeout: int = 1200) -> subprocess.CompletedProcess:
+
+def _flowsim_cli(
+    *args: str, timeout: int = 1200
+) -> subprocess.CompletedProcess:
     """Run a ``flowsim`` subcommand via Python entry point."""
     cmd = [
-        sys.executable, "-u", "-c",
+        sys.executable,
+        "-u",
+        "-c",
         "from scripts.cli import main; main()",
         *args,
     ]
     env = os.environ.copy()
-    env["PYTHONPATH"] = _PROJECT_ROOT + (
-        ":" + env.get("PYTHONPATH", "")
-    )
+    env["PYTHONPATH"] = _PROJECT_ROOT + (":" + env.get("PYTHONPATH", ""))
     env["PYTHONUNBUFFERED"] = "1"
     return subprocess.run(
         cmd,
@@ -123,10 +128,7 @@ def _assert_logs(output_dir: str) -> None:
     assert len(stdout_logs) > 0, f"No stdout logs in {log_dir}"
     assert len(stderr_logs) > 0, f"No stderr logs in {log_dir}"
     # At least one log should be non-empty
-    sizes = [
-        os.path.getsize(os.path.join(log_dir, f))
-        for f in stdout_logs
-    ]
+    sizes = [os.path.getsize(os.path.join(log_dir, f)) for f in stdout_logs]
     assert max(sizes) > 0, "All stdout logs are empty"
 
 
@@ -185,10 +187,14 @@ def _validate_shapes(output_dir, bs, input_len, existing_ctx):
     tag = f"bs{bs}_input{input_len}_ctx{existing_ctx}"
     for csv_subdir in ("merged", "shape_parsed"):
         extend_csvs = sorted(
-            glob.glob(os.path.join(output_dir, tag, csv_subdir, "*TP-0*EXTEND*.csv"))
+            glob.glob(
+                os.path.join(output_dir, tag, csv_subdir, "*TP-0*EXTEND*.csv")
+            )
         )
         decode_csvs = sorted(
-            glob.glob(os.path.join(output_dir, tag, csv_subdir, "*TP-0*DECODE*.csv"))
+            glob.glob(
+                os.path.join(output_dir, tag, csv_subdir, "*TP-0*DECODE*.csv")
+            )
         )
         if extend_csvs and decode_csvs:
             break
@@ -204,23 +210,23 @@ def _validate_shapes(output_dir, bs, input_len, existing_ctx):
     ext_gemm_dim0 = _first_matmul_dim0(extend_rows)
     assert ext_gemm_dim0 is not None, "No matmul kernel found in EXTEND CSV"
     expected_ext = bs * input_len
-    assert ext_gemm_dim0 == expected_ext, (
-        f"EXTEND first GEMM dim0={ext_gemm_dim0}, expected bs*input_len={expected_ext}"
-    )
+    assert (
+        ext_gemm_dim0 == expected_ext
+    ), f"EXTEND first GEMM dim0={ext_gemm_dim0}, expected bs*input_len={expected_ext}"
 
     # EXTEND FlashAttn dims contain [bs, seq_len]
     seq_len = input_len + existing_ctx
     attn_pair = _attention_seqlen_pair(extend_rows, bs, seq_len)
-    assert attn_pair is not None, (
-        f"No FlashAttention dim matching [bs={bs}, seqlen={seq_len}(+1)] in EXTEND CSV"
-    )
+    assert (
+        attn_pair is not None
+    ), f"No FlashAttention dim matching [bs={bs}, seqlen={seq_len}(+1)] in EXTEND CSV"
 
     # DECODE first GEMM dim0 == bs
     dec_gemm_dim0 = _first_matmul_dim0(decode_rows)
     assert dec_gemm_dim0 is not None, "No matmul kernel found in DECODE CSV"
-    assert dec_gemm_dim0 == bs, (
-        f"DECODE first GEMM dim0={dec_gemm_dim0}, expected bs={bs}"
-    )
+    assert (
+        dec_gemm_dim0 == bs
+    ), f"DECODE first GEMM dim0={dec_gemm_dim0}, expected bs={bs}"
 
 
 # =====================================================================
@@ -244,7 +250,10 @@ class TestLocalScheduler:
     @pytest.mark.parametrize(
         "point",
         _TP1_POINTS,
-        ids=[f"bs{p['bs']}_il{p['input_len']}_ctx{p['existing_ctx']}" for p in _TP1_POINTS],
+        ids=[
+            f"bs{p['bs']}_il{p['input_len']}_ctx{p['existing_ctx']}"
+            for p in _TP1_POINTS
+        ],
     )
     def test_local_tp1_all(self, point):
         bs = point["bs"]
@@ -255,18 +264,30 @@ def test_local_tp1_all(self, point):
         # ── Step 1: submit ──
         r = _flowsim_cli(
             "submit",
-            "--scheduler", "local",
-            "--collect", "all",
-            "--model-path", MODEL,
-            "--tp", "1",
-            "--bs", str(bs),
-            "--input-len", str(input_len),
-            "--existing-ctx", str(existing_ctx),
-            "--decode-tokens", str(decode_tokens),
-            "--warmup-n", "2",
-            "--gpus", "1",
-            "--local-gpus", "0",
-            "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+            "--scheduler",
+            "local",
+            "--collect",
+            "all",
+            "--model-path",
+            MODEL,
+            "--tp",
+            "1",
+            "--bs",
+            str(bs),
+            "--input-len",
+            str(input_len),
+            "--existing-ctx",
+            str(existing_ctx),
+            "--decode-tokens",
+            str(decode_tokens),
+            "--warmup-n",
+            "2",
+            "--gpus",
+            "1",
+            "--local-gpus",
+            "0",
+            "--extra-server-opts",
+            f"--load-format {LOAD_FORMAT}",
         )
         if r.returncode != 0:
             print("STDOUT:", r.stdout[-3000:])
@@ -284,22 +305,26 @@ def test_local_tp1_all(self, point):
                         break
                 if job_id:
                     break
-        assert job_id, f"Could not find job_id in submit output:\n{combined[-1000:]}"
+        assert (
+            job_id
+        ), f"Could not find job_id in submit output:\n{combined[-1000:]}"
 
         # ── Step 2: list — verify job appears ──
         r_list = _flowsim_cli("list", "--scheduler", "local")
         assert r_list.returncode == 0, "flowsim list failed"
-        assert job_id in r_list.stdout, (
-            f"Job {job_id} not found in list output:\n{r_list.stdout}"
-        )
+        assert (
+            job_id in r_list.stdout
+        ), f"Job {job_id} not found in list output:\n{r_list.stdout}"
 
         # ── Step 3: status — should be Completed (submit is synchronous) ──
-        r_status = _flowsim_cli("status", "--scheduler", "local", "--job", job_id)
+        r_status = _flowsim_cli(
+            "status", "--scheduler", "local", "--job", job_id
+        )
         assert r_status.returncode == 0, "flowsim status failed"
         status_out = r_status.stdout.lower()
-        assert "completed" in status_out, (
-            f"Job {job_id} not completed:\n{r_status.stdout}"
-        )
+        assert (
+            "completed" in status_out
+        ), f"Job {job_id} not completed:\n{r_status.stdout}"
 
         # ── Step 4: validate trace CSVs ──
         # Extract output_dir from status output (Traces dir: ...)
@@ -308,23 +333,29 @@ def test_local_tp1_all(self, point):
             if "Traces dir:" in line:
                 output_dir = line.split("Traces dir:", 1)[1].strip()
                 break
-        assert output_dir and os.path.isdir(output_dir), (
-            f"Could not find traces dir in status output:\n{r_status.stdout}"
-        )
+        assert output_dir and os.path.isdir(
+            output_dir
+        ), f"Could not find traces dir in status output:\n{r_status.stdout}"
         _assert_traces(output_dir)
         _assert_logs(output_dir)
-        _validate_shapes(output_dir, bs=bs, input_len=input_len, existing_ctx=existing_ctx)
+        _validate_shapes(
+            output_dir, bs=bs, input_len=input_len, existing_ctx=existing_ctx
+        )
 
 
 # =====================================================================
 # Cluster setup helpers & fixtures
 # =====================================================================
 
+
 def _run_dev_setup(target: str) -> None:
     """Run ``tests/integration/infra/dev-setup.sh <target>`` and assert success."""
     r = subprocess.run(
         ["bash", _DEV_SETUP, target],
-        capture_output=True, text=True, cwd=_PROJECT_ROOT, timeout=300,
+        capture_output=True,
+        text=True,
+        cwd=_PROJECT_ROOT,
+        timeout=300,
     )
     if r.returncode != 0:
         raise RuntimeError(
@@ -337,7 +368,10 @@ def _run_dev_teardown(target: str) -> None:
     """Run ``tests/integration/infra/dev-teardown.sh <target>``."""
     subprocess.run(
         ["bash", _DEV_TEARDOWN, target],
-        capture_output=True, text=True, cwd=_PROJECT_ROOT, timeout=120,
+        capture_output=True,
+        text=True,
+        cwd=_PROJECT_ROOT,
+        timeout=120,
     )
 
 
@@ -346,7 +380,9 @@ def _kind_cluster_running() -> bool:
     try:
         r = subprocess.run(
             ["kubectl", "--context", "kind-flowsim", "get", "nodes"],
-            capture_output=True, text=True, timeout=15,
+            capture_output=True,
+            text=True,
+            timeout=15,
         )
         return r.returncode == 0 and "Ready" in r.stdout
     except Exception:
@@ -406,20 +442,34 @@ def test_k8s_real_submit_to_kind(self, kind_cluster):
             # ── Step 1: submit (host mount for trace retrieval) ──
             r = _flowsim_cli(
                 "submit",
-                "--scheduler", "k8s",
-                "--collect", "all",
-                "--model-path", MODEL,
-                "--tp", "1",
-                "--bs", "1",
-                "--input-len", "2048",
-                "--existing-ctx", "0",
-                "--decode-tokens", "2",
-                "--warmup-n", "2",
-                "--gpus", "1",
-                "--k8s-namespace", "default",
-                "--k8s-host-output-dir", "/host-stage-traces",
-                "--job-name", job_name,
-                "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+                "--scheduler",
+                "k8s",
+                "--collect",
+                "all",
+                "--model-path",
+                MODEL,
+                "--tp",
+                "1",
+                "--bs",
+                "1",
+                "--input-len",
+                "2048",
+                "--existing-ctx",
+                "0",
+                "--decode-tokens",
+                "2",
+                "--warmup-n",
+                "2",
+                "--gpus",
+                "1",
+                "--k8s-namespace",
+                "default",
+                "--k8s-host-output-dir",
+                "/host-stage-traces",
+                "--job-name",
+                job_name,
+                "--extra-server-opts",
+                f"--load-format {LOAD_FORMAT}",
             )
             combined = r.stdout + r.stderr
             if r.returncode != 0:
@@ -429,15 +479,17 @@ def test_k8s_real_submit_to_kind(self, kind_cluster):
             # ── Step 2: list — verify job appears ──
             r_list = _flowsim_cli("list", "--scheduler", "k8s")
             assert r_list.returncode == 0
-            assert job_name in r_list.stdout, (
-                f"Job {job_name} not in list:\n{r_list.stdout}"
-            )
+            assert (
+                job_name in r_list.stdout
+            ), f"Job {job_name} not in list:\n{r_list.stdout}"
 
             # ── Step 3: status — poll until Completed/Succeeded (max 20 min) ──
             deadline = time.time() + 1200
             state = ""
             while time.time() < deadline:
-                r_status = _flowsim_cli("status", "--scheduler", "k8s", "--job", job_name)
+                r_status = _flowsim_cli(
+                    "status", "--scheduler", "k8s", "--job", job_name
+                )
                 assert r_status.returncode == 0
                 state = r_status.stdout.lower()
                 if "completed" in state or "succeeded" in state:
@@ -445,18 +497,18 @@ def test_k8s_real_submit_to_kind(self, kind_cluster):
                 if "failed" in state:
                     pytest.fail(f"K8s job failed:\n{r_status.stdout}")
                 time.sleep(15)
-            assert "completed" in state or "succeeded" in state, (
-                f"K8s job did not complete in time:\n{r_status.stdout}"
-            )
+            assert (
+                "completed" in state or "succeeded" in state
+            ), f"K8s job did not complete in time:\n{r_status.stdout}"
 
             # ── Step 4: traces are on host via Kind mount ──
             # output_dir inside container: /flowsim/stage_traces/k8s/{ts}
             # host_output_dir on worker: /host-stage-traces
             # → host: {project}/stage_traces/k8s/{ts}/
             k8s_traces = os.path.join(host_traces, "k8s")
-            assert os.path.isdir(k8s_traces), (
-                f"No k8s traces dir at {k8s_traces}"
-            )
+            assert os.path.isdir(
+                k8s_traces
+            ), f"No k8s traces dir at {k8s_traces}"
             # Find the latest timestamped subdir
             ts_dirs = sorted(os.listdir(k8s_traces))
             assert ts_dirs, f"No timestamp dirs in {k8s_traces}"
@@ -476,12 +528,15 @@ def test_k8s_real_submit_to_kind(self, kind_cluster):
 # SLURM SCHEDULER
 # =====================================================================
 
+
 def _slurm_cluster_running() -> bool:
     """Check if local Slurm test cluster (docker compose) is running."""
     try:
         r = subprocess.run(
             ["docker", "exec", "slurmctld", "sinfo", "-h"],
-            capture_output=True, text=True, timeout=10,
+            capture_output=True,
+            text=True,
+            timeout=10,
         )
         return r.returncode == 0 and r.stdout.strip() != ""
     except Exception:
@@ -516,21 +571,36 @@ def test_slurm_real_submit(self, slurm_cluster):
             # ── Step 1: submit (CLI mode, container_runtime=none) ──
             r = _flowsim_cli(
                 "submit",
-                "--scheduler", "slurm",
-                "--collect", "all",
-                "--model-path", MODEL,
-                "--tp", "1",
-                "--bs", "1",
-                "--input-len", "2048",
-                "--existing-ctx", "0",
-                "--decode-tokens", "2",
-                "--warmup-n", "2",
-                "--gpus", "1",
-                "--slurm-partition", "normal",
-                "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
-                "--slurm-container-runtime", "none",
-                "--output-dir", output_dir,
-                "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
+                "--scheduler",
+                "slurm",
+                "--collect",
+                "all",
+                "--model-path",
+                MODEL,
+                "--tp",
+                "1",
+                "--bs",
+                "1",
+                "--input-len",
+                "2048",
+                "--existing-ctx",
+                "0",
+                "--decode-tokens",
+                "2",
+                "--warmup-n",
+                "2",
+                "--gpus",
+                "1",
+                "--slurm-partition",
+                "normal",
+                "--slurm-cli-prefix",
+                _SLURM_CLI_PREFIX,
+                "--slurm-container-runtime",
+                "none",
+                "--output-dir",
+                output_dir,
+                "--extra-server-opts",
+                f"--load-format {LOAD_FORMAT}",
             )
             combined = r.stdout + r.stderr
             if r.returncode != 0:
@@ -546,16 +616,22 @@ def test_slurm_real_submit(self, slurm_cluster):
                             break
                 if job_id:
                     break
-            assert job_id, f"Could not find job_id in submit output:\n{combined[-1000:]}"
+            assert (
+                job_id
+            ), f"Could not find job_id in submit output:\n{combined[-1000:]}"
 
             # ── Step 2: status — poll until Completed (max 20 min) ──
             deadline = time.time() + 1200
             state = ""
             while time.time() < deadline:
                 r_status = _flowsim_cli(
-                    "status", "--scheduler", "slurm",
-                    "--job", job_id,
-                    "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
+                    "status",
+                    "--scheduler",
+                    "slurm",
+                    "--job",
+                    job_id,
+                    "--slurm-cli-prefix",
+                    _SLURM_CLI_PREFIX,
                 )
                 assert r_status.returncode == 0
                 state = r_status.stdout.lower()
@@ -564,15 +640,15 @@ def test_slurm_real_submit(self, slurm_cluster):
                 if "failed" in state:
                     pytest.fail(f"Slurm job failed:\n{r_status.stdout}")
                 time.sleep(15)
-            assert "completed" in state or "succeeded" in state, (
-                f"Slurm job did not complete in time:\n{r_status.stdout}"
-            )
+            assert (
+                "completed" in state or "succeeded" in state
+            ), f"Slurm job did not complete in time:\n{r_status.stdout}"
 
             # ── Step 3: traces are on host via mount ──
             slurm_traces = os.path.join(host_traces, "slurm")
-            assert os.path.isdir(slurm_traces), (
-                f"No slurm traces dir at {slurm_traces}"
-            )
+            assert os.path.isdir(
+                slurm_traces
+            ), f"No slurm traces dir at {slurm_traces}"
             ts_dirs = sorted(os.listdir(slurm_traces))
             assert ts_dirs, f"No test dirs in {slurm_traces}"
             local_traces = os.path.join(slurm_traces, ts_dirs[-1])
@@ -586,9 +662,13 @@ def test_slurm_real_submit(self, slurm_cluster):
             # Cleanup: cancel job (traces stay on host for inspection)
             if job_id:
                 _flowsim_cli(
-                    "cancel", "--scheduler", "slurm",
-                    "--job", job_id,
-                    "--slurm-cli-prefix", _SLURM_CLI_PREFIX,
+                    "cancel",
+                    "--scheduler",
+                    "slurm",
+                    "--job",
+                    job_id,
+                    "--slurm-cli-prefix",
+                    _SLURM_CLI_PREFIX,
                 )
 
 
@@ -604,7 +684,9 @@ def test_slurm_real_submit(self, slurm_cluster):
 ]
 
 
-def _assert_sweep_output(host_output_dir: str, points: list[tuple[int, int, int]]) -> None:
+def _assert_sweep_output(
+    host_output_dir: str, points: list[tuple[int, int, int]]
+) -> None:
     """Validate that every sweep point produced traces and parsed CSVs."""
     for bs, il, ctx in points:
         tag = f"bs{bs}_input{il}_ctx{ctx}"
@@ -617,9 +699,9 @@ def _assert_sweep_output(host_output_dir: str, points: list[tuple[int, int, int]
     assert os.path.isfile(summary_path), f"Missing {summary_path}"
     with open(summary_path) as f:
         summary = json.load(f)
-    assert len(summary) == len(points), (
-        f"Expected {len(points)} entries in sweep_summary.json, got {len(summary)}"
-    )
+    assert len(summary) == len(
+        points
+    ), f"Expected {len(points)} entries in sweep_summary.json, got {len(summary)}"
     for entry in summary:
         assert entry["traces"] > 0, f"Point {entry} has 0 traces"
 
@@ -637,16 +719,26 @@ def test_sweep_inline(self):
 
         r = _flowsim_cli(
             "submit",
-            "--scheduler", "local",
-            "--collect", "perf",
-            "--model-path", MODEL,
-            "--tp", "1",
-            "--decode-tokens", "2",
-            "--warmup-n", "2",
-            "--gpus", "1",
-            "--local-gpus", "0",
-            "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
-            "--sweep", *sweep_args,
+            "--scheduler",
+            "local",
+            "--collect",
+            "perf",
+            "--model-path",
+            MODEL,
+            "--tp",
+            "1",
+            "--decode-tokens",
+            "2",
+            "--warmup-n",
+            "2",
+            "--gpus",
+            "1",
+            "--local-gpus",
+            "0",
+            "--extra-server-opts",
+            f"--load-format {LOAD_FORMAT}",
+            "--sweep",
+            *sweep_args,
         )
         combined = r.stdout + r.stderr
         if r.returncode != 0:
@@ -660,9 +752,9 @@ def test_sweep_inline(self):
             if "Traces:" in line:
                 output_dir = line.split("Traces:", 1)[1].strip()
                 break
-        assert output_dir and os.path.isdir(output_dir), (
-            f"Could not find traces dir in output:\n{combined[-1000:]}"
-        )
+        assert output_dir and os.path.isdir(
+            output_dir
+        ), f"Could not find traces dir in output:\n{combined[-1000:]}"
 
         _assert_sweep_output(output_dir, _SWEEP_POINTS)
         _assert_logs(output_dir)
@@ -680,22 +772,34 @@ def test_sweep_file(self):
         try:
             r = _flowsim_cli(
                 "submit",
-                "--scheduler", "local",
-                "--collect", "perf",
-                "--model-path", MODEL,
-                "--tp", "1",
-                "--decode-tokens", "2",
-                "--warmup-n", "2",
-                "--gpus", "1",
-                "--local-gpus", "0",
-                "--extra-server-opts", f"--load-format {LOAD_FORMAT}",
-                "--sweep-file", sweep_file,
+                "--scheduler",
+                "local",
+                "--collect",
+                "perf",
+                "--model-path",
+                MODEL,
+                "--tp",
+                "1",
+                "--decode-tokens",
+                "2",
+                "--warmup-n",
+                "2",
+                "--gpus",
+                "1",
+                "--local-gpus",
+                "0",
+                "--extra-server-opts",
+                f"--load-format {LOAD_FORMAT}",
+                "--sweep-file",
+                sweep_file,
             )
             combined = r.stdout + r.stderr
             if r.returncode != 0:
                 print("STDOUT:", r.stdout[-3000:])
                 print("STDERR:", r.stderr[-3000:])
-            assert r.returncode == 0, f"sweep-file submit failed (exit {r.returncode})"
+            assert (
+                r.returncode == 0
+            ), f"sweep-file submit failed (exit {r.returncode})"
 
             # Find host output dir from submit output
             output_dir = None
@@ -703,9 +807,9 @@ def test_sweep_file(self):
                 if "Traces:" in line:
                     output_dir = line.split("Traces:", 1)[1].strip()
                     break
-            assert output_dir and os.path.isdir(output_dir), (
-                f"Could not find traces dir in output:\n{combined[-1000:]}"
-            )
+            assert output_dir and os.path.isdir(
+                output_dir
+            ), f"Could not find traces dir in output:\n{combined[-1000:]}"
 
             _assert_sweep_output(output_dir, _SWEEP_POINTS)
             _assert_logs(output_dir)
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 08e7146..b6c1afb 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -15,11 +15,11 @@
 from schedulers.local import LocalScheduler
 from schedulers.slurm import SlurmScheduler
 
-
 # =========================================================================
 # ProfileJobSpec
 # =========================================================================
 
+
 class TestProfileJobSpec:
     """Tests for ProfileJobSpec dataclass methods."""
 
@@ -73,6 +73,7 @@ def test_build_shell_command_quotes_server_opts(self, spec: ProfileJobSpec):
 # K8sScheduler.render
 # =========================================================================
 
+
 class TestK8sScheduler:
     """Tests for K8s Job manifest generation."""
 
@@ -153,6 +154,7 @@ def test_render_labels(self, scheduler, spec):
 # SlurmScheduler.render
 # =========================================================================
 
+
 class TestSlurmScheduler:
     """Tests for Slurm sbatch script generation."""
 
@@ -238,6 +240,7 @@ def test_render_constraint(self, spec):
 # LocalScheduler.render
 # =========================================================================
 
+
 class TestLocalScheduler:
     """Tests for local execution backend."""
 
@@ -286,11 +289,13 @@ def test_dry_run_equals_render(self, spec):
 # CLI: flowsim init
 # =========================================================================
 
+
 class TestCLIInit:
     """Tests for `flowsim init` subcommand."""
 
     def test_init_no_args_shows_help(self, capsys):
         from scripts.cli import _cmd_init
+
         with pytest.raises(SystemExit) as exc_info:
             _cmd_init([])
         assert exc_info.value.code != 0
@@ -299,6 +304,7 @@ def test_init_k8s_creates_template(self, tmp_path: Path):
         config_dir = tmp_path / "flowsim"
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
+
             rc = _cmd_init(["k8s"])
         assert rc == 0
         cfg_file = config_dir / "k8s.yaml"
@@ -317,6 +323,7 @@ def test_init_slurm_creates_template(self, tmp_path: Path):
         config_dir = tmp_path / "flowsim"
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
+
             rc = _cmd_init(["slurm"])
         assert rc == 0
         cfg_file = config_dir / "slurm.yaml"
@@ -336,6 +343,7 @@ def test_init_refuses_overwrite(self, tmp_path: Path):
 
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
+
             rc = _cmd_init(["slurm"])
         assert rc != 0  # should refuse
 
@@ -346,6 +354,7 @@ def test_init_force_overwrite(self, tmp_path: Path):
 
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
+
             rc = _cmd_init(["slurm", "--force"])
         assert rc == 0
         content = (config_dir / "slurm.yaml").read_text()
@@ -360,6 +369,7 @@ def test_init_config_copies_file(self, tmp_path: Path):
         config_dir = tmp_path / "flowsim"
         with mock.patch("scripts.cli._CONFIG_DIR", config_dir):
             from scripts.cli import _cmd_init
+
             rc = _cmd_init(["k8s", "--config", str(user_cfg)])
         assert rc == 0
         installed = config_dir / "k8s.yaml"
@@ -369,6 +379,7 @@ def test_init_config_copies_file(self, tmp_path: Path):
 
     def test_init_config_missing_file(self):
         from scripts.cli import _cmd_init
+
         rc = _cmd_init(["k8s", "--config", "/nonexistent/path.yaml"])
         assert rc != 0
 
@@ -377,6 +388,7 @@ def test_init_config_missing_file(self):
 # CLI: flowsim submit (parse/dry-run only, no actual submission)
 # =========================================================================
 
+
 class TestCLISubmit:
     """Tests for `flowsim submit` argument parsing and dry-run."""
 
@@ -390,6 +402,7 @@ def _run(self, *args: str, expect_ok: bool = True) -> str:
         from scripts.submit_profile import main as submit_main
         import io
         from contextlib import redirect_stdout
+
         buf = io.StringIO()
         with redirect_stdout(buf):
             submit_main(list(args))
@@ -397,6 +410,7 @@ def _run(self, *args: str, expect_ok: bool = True) -> str:
 
     def test_submit_help(self, capsys):
         from scripts.submit_profile import main as submit_main
+
         with pytest.raises(SystemExit) as exc_info:
             submit_main(["--help"])
         assert exc_info.value.code == 0
@@ -406,14 +420,18 @@ def test_submit_help(self, capsys):
 
     def test_submit_missing_required(self):
         from scripts.submit_profile import main as submit_main
+
         with pytest.raises(SystemExit):
             submit_main([])
 
     def test_submit_local_dry_run(self):
         out = self._run(
-            "--scheduler", "local",
-            "--collect", "perf",
-            "--model-path", "Qwen/Qwen3-8B",
+            "--scheduler",
+            "local",
+            "--collect",
+            "perf",
+            "--model-path",
+            "Qwen/Qwen3-8B",
             "--dry-run",
         )
         assert "scripts/run_stage_profile.py" in out
@@ -421,20 +439,28 @@ def test_submit_local_dry_run(self):
 
     def test_submit_local_dry_run_with_gpus(self):
         out = self._run(
-            "--scheduler", "local",
-            "--collect", "perf",
-            "--model-path", "Qwen/Qwen3-8B",
-            "--local-gpus", "0,1",
+            "--scheduler",
+            "local",
+            "--collect",
+            "perf",
+            "--model-path",
+            "Qwen/Qwen3-8B",
+            "--local-gpus",
+            "0,1",
             "--dry-run",
         )
         assert "device=0,1" in out
 
     def test_submit_k8s_dry_run(self):
         out = self._run(
-            "--scheduler", "k8s",
-            "--collect", "perf",
-            "--model-path", "Qwen/Qwen3-8B",
-            "--k8s-namespace", "default",
+            "--scheduler",
+            "k8s",
+            "--collect",
+            "perf",
+            "--model-path",
+            "Qwen/Qwen3-8B",
+            "--k8s-namespace",
+            "default",
             "--dry-run",
         )
         assert "apiVersion: batch/v1" in out
@@ -442,26 +468,31 @@ def test_submit_k8s_dry_run(self):
 
     def test_submit_slurm_dry_run(self):
         out = self._run(
-            "--scheduler", "slurm",
-            "--collect", "perf",
-            "--model-path", "Qwen/Qwen3-8B",
-            "--slurm-partition", "gpu",
+            "--scheduler",
+            "slurm",
+            "--collect",
+            "perf",
+            "--model-path",
+            "Qwen/Qwen3-8B",
+            "--slurm-partition",
+            "gpu",
             "--dry-run",
         )
         assert "#!/bin/bash" in out
         assert "#SBATCH --partition=gpu" in out
 
 
-
 # =========================================================================
 # Config loading
 # =========================================================================
 
+
 class TestConfig:
     """Tests for config file loading and saving."""
 
     def test_save_and_load_yaml(self, tmp_path: Path):
         from schedulers.config import _save_yaml, _load_yaml
+
         data = {"partition": "gpu", "account": "proj"}
         path = tmp_path / "test.yaml"
         _save_yaml(path, data)
@@ -470,6 +501,7 @@ def test_save_and_load_yaml(self, tmp_path: Path):
 
     def test_cfg_get(self):
         from schedulers.config import cfg_get
+
         cfg = {"key": "value", "empty": ""}
         assert cfg_get(cfg, "key", "default") == "value"
         assert cfg_get(cfg, "empty", "default") == ""

From 9e2541a7fb18f97cb5d88736f884243456a8c694 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 19:24:36 +0000
Subject: [PATCH 41/56] docs: add --existing-ctx and --decode-tokens to all
 examples, default decode-tokens to 2

---
 README.md                    | 12 ++++++------
 schedulers/README.md         |  9 +++++----
 scripts/run_stage_profile.py |  6 +++---
 scripts/submit_profile.py    |  4 ++--
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 604c2a5..6f1842b 100644
--- a/README.md
+++ b/README.md
@@ -60,7 +60,7 @@ pip install -e .
 flowsim submit --scheduler local \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --tp 1 --bs 1 --input-len 2048 --existing-ctx 0 --decode-tokens 2 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 ```
 
@@ -109,7 +109,7 @@ FlowSim performs **stage-separated** profiling: it captures prefill (EXTEND) and
 
 Each profiling request produces **two** stage-separated traces:
 - **EXTEND** (prefill) — processes `input_len` new tokens (with optional `existing_ctx` tokens already in KV cache)
-- **DECODE** — captures `decode-tokens` decode batch steps
+- **DECODE** — captures `decode-tokens` decode batch steps (default 2)
 
 ### Collection modes
 
@@ -126,28 +126,28 @@ Each profiling request produces **two** stage-separated traces:
 flowsim submit --scheduler local \
     --collect perf \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --tp 1 --bs 1 --input-len 2048 --existing-ctx 0 --decode-tokens 2 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 
 # With existing KV cache context
 flowsim submit --scheduler local \
     --collect perf \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 4 --input-len 512 --existing-ctx 4096 --gpus 1 \
+    --tp 1 --bs 4 --input-len 512 --existing-ctx 4096 --decode-tokens 2 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 
 # Full pipeline (perf + shapes)
 flowsim submit --scheduler local \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --tp 1 --bs 1 --input-len 2048 --existing-ctx 0 --decode-tokens 2 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 
 # Multi-point sweep
 flowsim submit --scheduler local \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --sweep 1:2048:0 4:2048:0 8:2048:0 --gpus 1 \
+    --sweep 1:2048:0 4:2048:0 8:2048:0 --decode-tokens 2 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 ```
 
diff --git a/schedulers/README.md b/schedulers/README.md
index d0835e7..86892bb 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -22,7 +22,7 @@ flowsim --help
 flowsim submit --scheduler <local|k8s|slurm> \
     --collect <perf|shapes|all> \
     --model-path <model> \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1
+    --tp 1 --bs 1 --input-len 2048 --decode-tokens 2 --gpus 1
 
 # Job lifecycle
 flowsim list   --scheduler <backend>
@@ -50,6 +50,7 @@ flowsim submit --scheduler <backend> \
 | `--bs` | Batch size | `1` |
 | `--input-len` | Input sequence length | `2048` |
 | `--existing-ctx` | Existing KV cache length | `0` |
+| `--decode-tokens` | Decode batches to profile | `2` |
 | `--gpus` | GPU count | `1` |
 | `--image` | Docker image | `flowsim-image:latest` |
 | `--output-dir` | Output directory | `stage_traces/{scheduler}/{timestamp}/` |
@@ -65,7 +66,7 @@ Runs profiling via `docker run` on the host machine.
 flowsim submit --scheduler local \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --tp 1 --bs 1 --input-len 2048 --existing-ctx 0 --decode-tokens 2 --gpus 1 \
     --local-gpus 0 \
     --extra-server-opts "--load-format dummy"
 ```
@@ -95,7 +96,7 @@ flowsim init k8s --config my-cluster.yaml  # or use your own
 flowsim submit --scheduler k8s \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --tp 1 --bs 1 --input-len 2048 --existing-ctx 0 --decode-tokens 2 --gpus 1 \
     --extra-server-opts "--load-format dummy"
 ```
 
@@ -133,7 +134,7 @@ flowsim init slurm --config my-slurm.yaml  # or use your own
 flowsim submit --scheduler slurm \
     --collect all \
     --model-path workload/models/configs/Qwen3-235B-A22B \
-    --tp 1 --bs 1 --input-len 2048 --gpus 1 \
+    --tp 1 --bs 1 --input-len 2048 --existing-ctx 0 --decode-tokens 2 --gpus 1 \
     --slurm-partition gpu \
     --extra-server-opts "--load-format dummy"
 ```
diff --git a/scripts/run_stage_profile.py b/scripts/run_stage_profile.py
index 3c739ef..36505ec 100644
--- a/scripts/run_stage_profile.py
+++ b/scripts/run_stage_profile.py
@@ -61,14 +61,14 @@
   python scripts/run_stage_profile.py \\
       --collect perf \\
       --host 0.0.0.0 --port 30001 \\
-      --bs 1 --input-len 2048 --decode-tokens 32 \\
+      --bs 1 --input-len 2048 --decode-tokens 2 \\
       --output-dir /flowsim/stage_traces
 
 Example — with existing KV cache context
   python scripts/run_stage_profile.py \\
       --collect perf \\
       --host 0.0.0.0 --port 30001 \\
-      --bs 4 --input-len 512 --existing-ctx 4096 --decode-tokens 32 \\
+      --bs 4 --input-len 512 --existing-ctx 4096 --decode-tokens 2 \\
       --output-dir /flowsim/stage_traces
 
 Example — launch server + full pipeline (perf → shapes)
@@ -113,7 +113,7 @@
 # Defaults
 # ---------------------------------------------------------------------------
 DEFAULT_WARMUP_N = 5
-DEFAULT_DECODE_TOKENS = 32
+DEFAULT_DECODE_TOKENS = 2
 DEFAULT_MAX_PREFILL_TOKENS = 131072
 
 
diff --git a/scripts/submit_profile.py b/scripts/submit_profile.py
index 5e1021a..0c3074c 100644
--- a/scripts/submit_profile.py
+++ b/scripts/submit_profile.py
@@ -19,7 +19,7 @@
         --collect perf \\
         --model-path Qwen/Qwen3-235B-A22B-FP8 \\
         --tp 4 --gpus 4 \\
-        --bs 1 --input-len 2048 --decode-tokens 32 \\
+        --bs 1 --input-len 2048 --decode-tokens 2 \\
         --image flowsim-image:latest \\
         --k8s-namespace default \\
         --k8s-pvc flowsim-traces \\
@@ -98,7 +98,7 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
     wl.add_argument("--bs", type=int, default=1, help="Batch size")
     wl.add_argument("--input-len", type=int, default=2048)
     wl.add_argument("--existing-ctx", type=int, default=0)
-    wl.add_argument("--decode-tokens", type=int, default=32)
+    wl.add_argument("--decode-tokens", type=int, default=2)
     wl.add_argument("--warmup-n", type=int, default=5)
     wl.add_argument(
         "--disable-chunked-prefill",

From 880fe055c117a1f5a184c5f6fc8605ac0d3f668d Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 19:31:46 +0000
Subject: [PATCH 42/56] refactor: remove PyYAML fallback, make it a core
 dependency

---
 pyproject.toml       |  2 +-
 schedulers/config.py | 32 +++++++-------------------------
 2 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index feade94..c91de8a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,13 +14,13 @@ dependencies = [
     "perfetto>=0.7",
     "numpy>=1.24",
     "pandas>=1.5",
+    "PyYAML>=6.0",
 ]
 
 [project.optional-dependencies]
 # Scheduler backends -------------------------------------------------------
 k8s = [
     "kubernetes>=27.0",    # K8s Python client for remote job submission
-    "PyYAML>=6.0",         # nicer YAML dry-run output (json fallback w/o this)
 ]
 slurm = []                 # Slurm REST API uses stdlib urllib only
 
diff --git a/schedulers/config.py b/schedulers/config.py
index 433c87b..10c7f8d 100644
--- a/schedulers/config.py
+++ b/schedulers/config.py
@@ -23,40 +23,22 @@
 import os
 from pathlib import Path
 
-# Optional: try PyYAML, fall back to JSON
-try:
-    import yaml as _yaml
+import yaml as _yaml
 
-    def _load_yaml(path: Path) -> dict:
-        with open(path) as f:
-            return _yaml.safe_load(f) or {}
 
-except ImportError:
-    import json as _json
-
-    def _load_yaml(path: Path) -> dict:  # type: ignore[misc]
-        """Fallback: accept JSON (valid YAML 1.2 subset)."""
-        with open(path) as f:
-            return _json.load(f)
+def _load_yaml(path: Path) -> dict:
+    with open(path) as f:
+        return _yaml.safe_load(f) or {}
 
 
 _CONFIG_DIR = Path.home() / ".flowsim"
 
 
 def _save_yaml(path: Path, data: dict) -> None:
-    """Write a dict to a YAML file (uses PyYAML if available, else JSON)."""
+    """Write a dict to a YAML file."""
     path.parent.mkdir(parents=True, exist_ok=True)
-    try:
-        import yaml as _y
-
-        with open(path, "w") as f:
-            _y.safe_dump(data, f, default_flow_style=False, sort_keys=False)
-    except ImportError:
-        import json as _j
-
-        with open(path, "w") as f:
-            _j.dump(data, f, indent=2, ensure_ascii=False)
-            f.write("\n")
+    with open(path, "w") as f:
+        _yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False)
 
 
 def _resolve_path(env_var: str, filename: str) -> Path | None:

From 236548a277b6a9d933d0649e85488bfd98d87b82 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 21:03:25 +0000
Subject: [PATCH 43/56] fix: reject k8s submit when no PVC or hostPath
 configured

---
 schedulers/k8s.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/schedulers/k8s.py b/schedulers/k8s.py
index e75e7f8..8c07771 100644
--- a/schedulers/k8s.py
+++ b/schedulers/k8s.py
@@ -186,6 +186,11 @@ def _build_job_dict(self, spec: ProfileJobSpec) -> dict:
 
     def submit(self, spec: ProfileJobSpec) -> JobResult:
         """Submit via the ``kubernetes`` Python client (``pip install kubernetes``)."""
+        if not self.pvc_name and not self.host_output_dir:
+            raise ValueError(
+                "No persistent storage configured. "
+                "Set --k8s-pvc or --k8s-host-output-dir to avoid losing traces when the pod exits."
+            )
         batch_api, _ = self._load_k8s()
 
         body = self._build_job_dict(spec)
@@ -281,7 +286,7 @@ def status(self, job_id: str) -> dict:
         elif self.host_output_dir:
             output_hint = f"Traces at hostPath {self.host_output_dir} on the scheduled node"
         else:
-            output_hint = "WARNING: no PVC or hostPath configured — traces are lost when pod exits"
+            output_hint = "WARNING: no PVC or hostPath configured — traces will be lost when pod exits"
 
         msg_parts = [
             f"Job: {job_id}  Namespace: {self.namespace}  State: {state}"

From 9daee82717c0d423822230cc3989e1ca6df76b17 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 21:27:00 +0000
Subject: [PATCH 44/56] docs: add missing parameters

---
 schedulers/README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/schedulers/README.md b/schedulers/README.md
index 86892bb..703ee9c 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -54,6 +54,9 @@ flowsim submit --scheduler <backend> \
 | `--gpus` | GPU count | `1` |
 | `--image` | Docker image | `flowsim-image:latest` |
 | `--output-dir` | Output directory | `stage_traces/{scheduler}/{timestamp}/` |
+| `--extra-server-opts` | Extra sglang server flags (quoted string) | `""` |
+| `--sweep` | Multi-point sweep `BS:INPUT_LEN:CTX` (repeatable) | empty |
+| `--job-name` | Custom job name | auto-generated |
 | `--dry-run` | Print script only | `false` |
 
 ---

From 5e3d1bb12ffa3fe6e6ab8227d38350b7cf6ac00b Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 21:52:02 +0000
Subject: [PATCH 45/56] fix: unique job names, Slurm exclusive GPU, remove
 list_jobs prefix filter

- Add timestamp suffix (-MMDD-HHMMSS) to auto-generated job names for uniqueness
- Add #SBATCH --exclusive to Slurm scripts for profiling GPU isolation
- Remove flowsim- prefix filter from Slurm list_jobs (let users filter)
- Add --sweep-file to scheduler README Common Parameters table
---
 schedulers/README.md             |  1 +
 schedulers/base.py               | 14 +++++++++++---
 schedulers/slurm.py              |  3 +--
 tests/unit/test_scheduler_cli.py |  3 ++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/schedulers/README.md b/schedulers/README.md
index 703ee9c..6e7600f 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -56,6 +56,7 @@ flowsim submit --scheduler <backend> \
 | `--output-dir` | Output directory | `stage_traces/{scheduler}/{timestamp}/` |
 | `--extra-server-opts` | Extra sglang server flags (quoted string) | `""` |
 | `--sweep` | Multi-point sweep `BS:INPUT_LEN:CTX` (repeatable) | empty |
+| `--sweep-file` | File with one `BS:INPUT_LEN:CTX` per line (mutually exclusive with `--sweep`) | none |
 | `--job-name` | Custom job name | auto-generated |
 | `--dry-run` | Print script only | `false` |
 
diff --git a/schedulers/base.py b/schedulers/base.py
index d3b32c4..ac71548 100644
--- a/schedulers/base.py
+++ b/schedulers/base.py
@@ -4,6 +4,7 @@
 
 import abc
 import shlex
+import time
 from dataclasses import dataclass, field
 from typing import Optional, Sequence
 
@@ -126,14 +127,21 @@ def build_shell_command(self) -> str:
         return " ".join(quoted)
 
     def default_job_name(self) -> str:
-        """Generate a default job name from workload params."""
+        """Generate a default job name from workload params.
+
+        Auto-generated names include a short timestamp suffix
+        (``-MMDD-HHMMSS``) so repeated submissions of the same
+        workload get distinct names.  User-supplied ``--job-name``
+        values are returned as-is.
+        """
         if self.job_name:
             return self.job_name
         model_short = self.model_path.split("/")[-1].lower().replace(".", "-")
+        ts = time.strftime("%m%d-%H%M%S")
         if self.sweep_points:
-            name = f"flowsim-{self.collect}-{model_short}-sweep{len(self.sweep_points)}pt"
+            name = f"flowsim-{self.collect}-{model_short}-sweep{len(self.sweep_points)}pt-{ts}"
         else:
-            name = f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}"
+            name = f"flowsim-{self.collect}-{model_short}-bs{self.bs}-il{self.input_len}-{ts}"
         return name
 
 
diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index ad12e75..55b194d 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -78,6 +78,7 @@ def render(self, spec: ProfileJobSpec) -> str:
             f"#SBATCH --partition={self.partition}",
             f"#SBATCH --gpus-per-node={spec.gpus}",
             f"#SBATCH --ntasks=1",
+            f"#SBATCH --exclusive",
             f"#SBATCH --time={self.time_limit}",
             f"#SBATCH --output={spec.output_dir}/{job_name}_%j.log",
         ]
@@ -312,8 +313,6 @@ def _list_jobs_cli(self, *, status_filter: str = "") -> list[dict]:
                 continue
             parts = line.split("|", 4)
             name = parts[1] if len(parts) > 1 else ""
-            if not name.startswith("flowsim-"):
-                continue
             state = parts[2] if len(parts) > 2 else "UNKNOWN"
             if status_filter and state.upper() != status_filter.upper():
                 continue
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index b6c1afb..c6f329b 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -35,7 +35,7 @@ def spec(self) -> ProfileJobSpec:
 
     def test_default_job_name(self, spec: ProfileJobSpec):
         name = spec.default_job_name()
-        assert name == "flowsim-perf-qwen3-8b-bs4-il1024"
+        assert name.startswith("flowsim-perf-qwen3-8b-bs4-il1024-")
 
     def test_custom_job_name(self, spec: ProfileJobSpec):
         spec.job_name = "my-job"
@@ -182,6 +182,7 @@ def test_render_sbatch_directives(self, scheduler, spec):
         script = scheduler.render(spec)
         assert "#SBATCH --partition=gpu-h100" in script
         assert "#SBATCH --gpus-per-node=4" in script
+        assert "#SBATCH --exclusive" in script
         assert "#SBATCH --time=01:00:00" in script
         assert "#SBATCH --account=my-proj" in script
 

From 2a718a7228249ee9b69c8c7561003b3f6a39e0b1 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 22:03:28 +0000
Subject: [PATCH 46/56] refactor: restructure CLI into scripts/cli/ subpackage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- cli.py → cli/__init__.py (entry point + init command)
- submit_profile.py → cli/submit.py (flowsim submit)
- status_profile.py → cli/manage.py (flowsim status/logs/list/cancel)
- Update all import paths in tests
---
 scripts/{cli.py => cli/__init__.py}          | 12 ++++++------
 scripts/{status_profile.py => cli/manage.py} |  0
 scripts/{submit_profile.py => cli/submit.py} |  6 +++---
 tests/unit/test_scheduler_cli.py             |  6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)
 rename scripts/{cli.py => cli/__init__.py} (91%)
 rename scripts/{status_profile.py => cli/manage.py} (100%)
 rename scripts/{submit_profile.py => cli/submit.py} (99%)

diff --git a/scripts/cli.py b/scripts/cli/__init__.py
similarity index 91%
rename from scripts/cli.py
rename to scripts/cli/__init__.py
index 04d0c84..9d4755e 100644
--- a/scripts/cli.py
+++ b/scripts/cli/__init__.py
@@ -16,7 +16,7 @@
 
 _CONFIG_DIR = Path.home() / ".flowsim"
 _TEMPLATES_DIR = (
-    Path(__file__).resolve().parent.parent / "schedulers" / "templates"
+    Path(__file__).resolve().parent.parent.parent / "schedulers" / "templates"
 )
 
 
@@ -130,31 +130,31 @@ def main(argv: list[str] | None = None) -> int:
         return _cmd_init(remaining)
 
     if args.command == "submit":
-        from scripts.submit_profile import main as submit_main
+        from scripts.cli.submit import main as submit_main
 
         submit_main(remaining)
         return 0
 
     if args.command == "status":
-        from scripts.status_profile import main_status
+        from scripts.cli.manage import main_status
 
         main_status(remaining)
         return 0
 
     if args.command == "logs":
-        from scripts.status_profile import main_logs
+        from scripts.cli.manage import main_logs
 
         main_logs(remaining)
         return 0
 
     if args.command == "list":
-        from scripts.status_profile import main_list
+        from scripts.cli.manage import main_list
 
         main_list(remaining)
         return 0
 
     if args.command == "cancel":
-        from scripts.status_profile import main_cancel
+        from scripts.cli.manage import main_cancel
 
         main_cancel(remaining)
         return 0
diff --git a/scripts/status_profile.py b/scripts/cli/manage.py
similarity index 100%
rename from scripts/status_profile.py
rename to scripts/cli/manage.py
diff --git a/scripts/submit_profile.py b/scripts/cli/submit.py
similarity index 99%
rename from scripts/submit_profile.py
rename to scripts/cli/submit.py
index 0c3074c..a6370f8 100644
--- a/scripts/submit_profile.py
+++ b/scripts/cli/submit.py
@@ -14,7 +14,7 @@
 
 Dry-run (print Kubernetes Job YAML to stdout):
 
-    python scripts/submit_profile.py \\
+    flowsim submit \\
         --scheduler k8s \\
         --collect perf \\
         --model-path Qwen/Qwen3-235B-A22B-FP8 \\
@@ -27,7 +27,7 @@
 
 Dry-run (print Slurm sbatch script to stdout):
 
-    python scripts/submit_profile.py \\
+    flowsim submit \\
         --scheduler slurm \\
         --collect perf \\
         --model-path Qwen/Qwen3-235B-A22B-FP8 \\
@@ -38,7 +38,7 @@
 
 Submit directly to cluster:
 
-    python scripts/submit_profile.py \\
+    flowsim submit \\
         --scheduler k8s \\
         ... \\
         --submit
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index c6f329b..2491b7e 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -400,7 +400,7 @@ def _skip_image_check(self):
 
     def _run(self, *args: str, expect_ok: bool = True) -> str:
         """Run submit via the Python function, capture stdout."""
-        from scripts.submit_profile import main as submit_main
+        from scripts.cli.submit import main as submit_main
         import io
         from contextlib import redirect_stdout
 
@@ -410,7 +410,7 @@ def _run(self, *args: str, expect_ok: bool = True) -> str:
         return buf.getvalue()
 
     def test_submit_help(self, capsys):
-        from scripts.submit_profile import main as submit_main
+        from scripts.cli.submit import main as submit_main
 
         with pytest.raises(SystemExit) as exc_info:
             submit_main(["--help"])
@@ -420,7 +420,7 @@ def test_submit_help(self, capsys):
         assert "local" in out
 
     def test_submit_missing_required(self):
-        from scripts.submit_profile import main as submit_main
+        from scripts.cli.submit import main as submit_main
 
         with pytest.raises(SystemExit):
             submit_main([])

From 31dc15ba9d1ce76a193384173057ecdbd588fedd Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 22:22:41 +0000
Subject: [PATCH 47/56] fix: use runtime:nvidia for slurm compute node GPU
 access

Replace deploy.resources.reservations with runtime:nvidia +
NVIDIA_VISIBLE_DEVICES to fix NVML initialization failure in slurmd-0.
---
 tests/integration/infra/slurm-compose.yaml | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tests/integration/infra/slurm-compose.yaml b/tests/integration/infra/slurm-compose.yaml
index c2369ba..d24ab6f 100644
--- a/tests/integration/infra/slurm-compose.yaml
+++ b/tests/integration/infra/slurm-compose.yaml
@@ -83,6 +83,9 @@ services:
     <<: *slurm-base
     container_name: slurmd-0
     hostname: slurmd-0
+    runtime: nvidia
+    environment:
+      NVIDIA_VISIBLE_DEVICES: "0"
     command: >
       bash -c "
         mkdir -p /run/munge && chown munge:munge /run/munge
@@ -100,13 +103,6 @@ services:
       - /home/administrator/zhangt/FlowSim/stage_traces:/flowsim/stage_traces
       # Cgroup needed by slurmd
       - /sys/fs/cgroup:/sys/fs/cgroup:rw
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ["0"]
-              capabilities: [gpu]
 
   # ---- REST API (optional, for REST mode) ----
   # slurmrestd:

From b7ec2cb5b963fc4cd9690b7a1914a2d1f2fbc0b6 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 22:57:12 +0000
Subject: [PATCH 48/56] =?UTF-8?q?refactor:=20rename=20test=5Fscheduler=5Fl?=
 =?UTF-8?q?ocal.py=20=E2=86=92=20test=5Fscheduler.py,=20rewrite=20header?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove misleading 'local' suffix (file tests all 3 backends)
- Add test methodology (How It Works) and Pass Criteria to docstring
- Update file references in schedulers/README.md
---
 schedulers/README.md                          |  4 +-
 ...t_scheduler_local.py => test_scheduler.py} | 65 +++++++++++--------
 2 files changed, 41 insertions(+), 28 deletions(-)
 rename tests/integration/{test_scheduler_local.py => test_scheduler.py} (93%)

diff --git a/schedulers/README.md b/schedulers/README.md
index 6e7600f..dc91616 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -235,7 +235,7 @@ docker compose -f slurm-compose.yaml down -v
 python -m pytest tests/unit/test_scheduler_cli.py -v
 
 # Integration tests
-python -m pytest tests/integration/test_scheduler_local.py::TestK8sScheduler -v -x
-python -m pytest tests/integration/test_scheduler_local.py::TestSlurmScheduler -v -x
+python -m pytest tests/integration/test_scheduler.py::TestK8sScheduler -v -x
+python -m pytest tests/integration/test_scheduler.py::TestSlurmScheduler -v -x
 ```
 
diff --git a/tests/integration/test_scheduler_local.py b/tests/integration/test_scheduler.py
similarity index 93%
rename from tests/integration/test_scheduler_local.py
rename to tests/integration/test_scheduler.py
index a6bc416..7ecaf9b 100644
--- a/tests/integration/test_scheduler_local.py
+++ b/tests/integration/test_scheduler.py
@@ -1,43 +1,56 @@
-"""Integration tests for the FlowSim scheduler CLI.
+"""Integration tests for all FlowSim scheduler backends.
 
-Tests all three scheduler backends (local, k8s, slurm) end-to-end.
-
-* **local** — submits jobs via ``flowsim submit --scheduler local`` which
-  launches Docker containers on the host.  Validates job lifecycle (submit,
-  list, status) and trace CSV correctness (GEMM dim0, FlashAttn seqlen).
-* **k8s**   — submits a real Job to a Kind cluster, retrieves traces via
-  ``docker cp``, and validates trace CSVs.  Auto-sets up the Kind cluster
-  via ``dev-setup.sh`` if not already running.
-* **slurm** — submits a real job to a local docker-compose Slurm cluster,
-  retrieves traces via ``docker cp``, and validates trace CSVs.  Auto-sets
-  up the Slurm cluster via ``dev-setup.sh slurm`` if not already running.
+How It Works
+------------
+Each test class exercises one scheduler backend end-to-end through the
+``flowsim`` CLI (the same commands a user would run).  The flow is:
+
+1. ``flowsim submit`` — submit a ``--collect all`` profiling job.
+2. ``flowsim list``   — verify the job appears in the listing.
+3. ``flowsim status`` — poll until Completed / Succeeded (up to 20 min).
+4. Validate outputs on the host file system.
+
+Infrastructure is auto-provisioned by session-scoped fixtures:
+
+* **Local** — uses Docker on the host directly (no extra infra).
+* **K8s**   — spins up a Kind cluster via ``dev-setup.sh kind``.
+* **Slurm** — spins up a docker-compose Slurm cluster via
+  ``dev-setup.sh slurm`` (slurmctld + slurmd-0 with GPU 0).
+
+Pass Criteria
+-------------
+* Job reaches Completed/Succeeded within the timeout.
+* Stage-separated trace files exist (EXTEND + DECODE ``.trace.json.gz``).
+* Parsed CSVs exist under ``parsed/`` with non-zero rows.
+* GEMM kernels: EXTEND ``dim0 == bs * input_len``, DECODE ``dim0 == bs``.
+* FlashAttn kernels: EXTEND dims contain ``[bs, input_len + existing_ctx]`` (±1).
+* ``analysis_extend.json`` and ``analysis_decode.json`` are valid JSON.
+* After ``--collect shapes``, ``Dims`` column is present in merged CSVs.
+* Sweep jobs produce per-point subdirs + ``sweep_summary.json``.
+* Log files (stdout/stderr) exist under ``logs/``.
 
 Requirements
 ------------
-* Docker with ``flowsim-image:latest`` built (for local tests).
-* A GPU-equipped host machine (local tests run on the physical host,
-  NOT inside a Docker container).
-* ``tests/integration/infra/dev-setup.sh`` available (Kind and Slurm clusters are
-  automatically created if missing).
-* ``schedulers/`` available on PYTHONPATH.
+* Docker with ``flowsim-image:latest`` built.
+* GPU-equipped host machine.
+* ``tests/integration/infra/dev-setup.sh`` available.
 
 Environment Variables
 ---------------------
 ``MODEL``
-    Model path relative to project root
-    (default: ``workload/models/configs/Qwen3-235B-A22B``).
+    Model path (default: ``workload/models/configs/Qwen3-235B-A22B``).
 ``LOAD_FORMAT``
     Load format (default: ``dummy``).
 
 Usage
 -----
-    # On host (local scheduler tests — needs Docker + GPU):
-    cd FlowSim && python -m pytest \
-        tests/integration/test_scheduler_local.py -v -x -k "local"
+    # All scheduler tests:
+    python -m pytest tests/integration/test_scheduler.py -v -x
 
-    # On host (k8s tests — needs kubeconfig):
-    python -m pytest tests/integration/test_scheduler_local.py \
-        -v -x -k "k8s"
+    # Single backend:
+    python -m pytest tests/integration/test_scheduler.py -v -x -k "local"
+    python -m pytest tests/integration/test_scheduler.py -v -x -k "k8s"
+    python -m pytest tests/integration/test_scheduler.py -v -x -k "slurm"
 """
 
 import ast

From 7d4088944bc204f04ede0131954dc7cd346e53e0 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Thu, 19 Mar 2026 23:55:51 +0000
Subject: [PATCH 49/56] docs: update output structure to include logs, merged,
 and shape dirs

Sync the output tree in both README.md and schedulers/README.md to
reflect the actual directory layout produced by profiling jobs:
- Add logs/ with server, shape_server, and job log entries
- Add merged/ and shape_traces/ + shape_parsed/ inside point dirs
- Add brief descriptions of each subdirectory in root README
---
 README.md            | 22 +++++++++++++---------
 schedulers/README.md |  5 +++--
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 6f1842b..6fa4289 100644
--- a/README.md
+++ b/README.md
@@ -157,20 +157,24 @@ For K8s / Slurm clusters, replace `--scheduler local` with `k8s` or `slurm`. See
 
 ```
 stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
-├── sweep_summary.json
 ├── bs1_input2048_ctx0/
-│   ├── *-TP-*-EXTEND.trace.json.gz
-│   ├── *-TP-*-DECODE.trace.json.gz
-│   ├── parsed/
-│   │   ├── TP-0-EXTEND.csv
-│   │   ├── TP-0-DECODE.csv
-│   │   └── ...
+│   ├── *.trace.json.gz
+│   ├── parsed/*.csv
+│   ├── merged/*_merged.trace.csv
+│   ├── shape_traces/ + shape_parsed/
 │   ├── analysis_extend.json
 │   └── analysis_decode.json
-└── ...
+├── logs/
+│   ├── server_*.{stdout,stderr}.log
+│   ├── shape_server_*.{stdout,stderr}.log
+│   └── {job_name}_*.{stdout,stderr}.log
+└── sweep_summary.json
 ```
 
-After `--collect shapes`, each `parsed/TP-*-DECODE.csv` gains a `Dims` column with kernel tensor shapes.
+- `parsed/`: Per-rank timing CSVs extracted from traces.
+- `merged/`: Timing + shape columns joined into a single CSV per rank/stage.
+- `shape_traces/` / `shape_parsed/`: Raw and parsed shape-profiling traces (generated by `--collect shapes` or `--collect all`).
+- `logs/`: Server, shape-server, and job stdout/stderr logs.
 
 ### Utilities (`utils/`)
 
diff --git a/schedulers/README.md b/schedulers/README.md
index dc91616..3994cb6 100644
--- a/schedulers/README.md
+++ b/schedulers/README.md
@@ -206,8 +206,9 @@ stage_traces/{scheduler}/{YYYYMMDD_HHMMSS}/
 │   ├── analysis_extend.json
 │   └── analysis_decode.json
 ├── logs/
-│   ├── server_*.stdout.log
-│   └── server_*.stderr.log
+│   ├── server_*.{stdout,stderr}.log
+│   ├── shape_server_*.{stdout,stderr}.log
+│   └── {job_name}_*.{stdout,stderr}.log
 └── sweep_summary.json
 ```
 

From 86dd517fce4b00c5454e4fd938b2510349ff14a9 Mon Sep 17 00:00:00 2001
From: Terrence <39916879+TerrenceZhangX@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:09:38 -0700
Subject: [PATCH 50/56] Update scripts/cli/submit.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 scripts/cli/submit.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scripts/cli/submit.py b/scripts/cli/submit.py
index a6370f8..e59d697 100644
--- a/scripts/cli/submit.py
+++ b/scripts/cli/submit.py
@@ -36,14 +36,12 @@
         --slurm-time 02:00:00 \\
         --dry-run
 
-Submit directly to cluster:
+Submit directly to cluster (omit --dry-run):
 
     flowsim submit \\
         --scheduler k8s \\
-        ... \\
-        --submit
+        ... 
 """
-
 from __future__ import annotations
 
 import argparse

From 2dbb8966857d51dbc370c0c2bbb4386438563e4a Mon Sep 17 00:00:00 2001
From: Terrence <39916879+TerrenceZhangX@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:11:42 -0700
Subject: [PATCH 51/56] Update schedulers/local.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 schedulers/local.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/schedulers/local.py b/schedulers/local.py
index f9c2aa8..7015d28 100644
--- a/schedulers/local.py
+++ b/schedulers/local.py
@@ -215,16 +215,23 @@ def _tee(src, dest_file, dest_stream):
         )
 
     def cancel(self, job_id: str) -> str:
-        """Stop the Docker container for a local job."""
+        """Stop the Docker container for a local job.
+
+        The Docker container name is truncated to 63 characters when created.
+        To ensure we stop the correct container even if a longer job id is
+        provided (for example, the full job name), apply the same truncation
+        here before calling ``docker stop``.
+        """
+        container_name = job_id[:63]
         proc = subprocess.run(
-            ["docker", "stop", job_id],
+            ["docker", "stop", container_name],
             capture_output=True,
             text=True,
             timeout=30,
         )
         if proc.returncode == 0:
-            return f"Stopped container {job_id}"
-        return f"Could not stop container {job_id}: {proc.stderr.strip()}"
+            return f"Stopped container {container_name}"
+        return f"Could not stop container {container_name}: {proc.stderr.strip()}"
 
     def _find_log_dirs(self) -> list[str]:
         """Find all log directories under stage_traces/{scheduler}/*/logs/."""

From f30329a08f3157cc8cc356e6e2f2cf395dabb23f Mon Sep 17 00:00:00 2001
From: Terrence <39916879+TerrenceZhangX@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:12:01 -0700
Subject: [PATCH 52/56] Update tests/integration/infra/kind-multi-node.yaml

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/integration/infra/kind-multi-node.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/infra/kind-multi-node.yaml b/tests/integration/infra/kind-multi-node.yaml
index 90b4e6f..7ec1b68 100644
--- a/tests/integration/infra/kind-multi-node.yaml
+++ b/tests/integration/infra/kind-multi-node.yaml
@@ -29,9 +29,9 @@ nodes:
     extraMounts:
       - hostPath: /dev/null
         containerPath: /var/run/nvidia-container-devices/0
-      - hostPath: /home/administrator/zhangt
+      - hostPath: /path/to/host/workspace
         containerPath: /workspace
         readOnly: true
       # Writable mount so K8s pods can write traces directly to host
-      - hostPath: /home/administrator/zhangt/FlowSim/stage_traces
+      - hostPath: /path/to/host/stage_traces
         containerPath: /host-stage-traces

From 471876428bb6c8ee8adc930341ef046d6a84647b Mon Sep 17 00:00:00 2001
From: Terrence <39916879+TerrenceZhangX@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:12:30 -0700
Subject: [PATCH 53/56] Update tests/integration/infra/slurm-compose.yaml

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/integration/infra/slurm-compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/infra/slurm-compose.yaml b/tests/integration/infra/slurm-compose.yaml
index d24ab6f..b772eb9 100644
--- a/tests/integration/infra/slurm-compose.yaml
+++ b/tests/integration/infra/slurm-compose.yaml
@@ -1,4 +1,4 @@
-# Slurm test cluster — slurmctld + 2 compute nodes (GPU 0, GPU 1) + slurmrestd
+# Slurm test cluster — slurmctld + 1 compute node (GPU 0)
 #
 # Usage:
 #   cd tests/integration/infra/

From 8f790542fb20556eef4bf7737035bf805012628c Mon Sep 17 00:00:00 2001
From: Terrence <39916879+TerrenceZhangX@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:15:48 -0700
Subject: [PATCH 54/56] Update tests/integration/infra/slurm.conf

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/integration/infra/slurm.conf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/infra/slurm.conf b/tests/integration/infra/slurm.conf
index 7a26d5c..ea7611b 100644
--- a/tests/integration/infra/slurm.conf
+++ b/tests/integration/infra/slurm.conf
@@ -1,8 +1,8 @@
-# slurm.conf — minimal 2-node cluster for FlowSim testing
+# slurm.conf — minimal single-node cluster for FlowSim testing
 #
 # Controller: slurmctld
-# Compute:    slurmd-0 (1 GPU), slurmd-1 (1 GPU)
-# REST API:   slurmrestd on port 6820
+# Compute:    slurmd-0 (1 GPU)
+# REST API:   not provisioned in this test configuration
 
 ClusterName=flowsim
 SlurmctldHost=slurmctld

From 7d64cfce41c42c866efd65ed15941f6ce42c3cb9 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Fri, 20 Mar 2026 02:44:35 +0000
Subject: [PATCH 55/56] fix: parameterize hardcoded host paths in slurm-compose
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace absolute /home/administrator/… bind mounts with:
- ${HOST_WORKSPACE} env var for the read-only /workspace mount
- Relative path ../../../stage_traces for the writable traces mount

dev-setup.sh now exports HOST_WORKSPACE (defaults to parent of
repo root) before invoking docker compose.
---
 tests/integration/infra/dev-setup.sh       |  5 +++++
 tests/integration/infra/slurm-compose.yaml | 10 +++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/integration/infra/dev-setup.sh b/tests/integration/infra/dev-setup.sh
index afbb9f7..02e447f 100755
--- a/tests/integration/infra/dev-setup.sh
+++ b/tests/integration/infra/dev-setup.sh
@@ -302,7 +302,12 @@ setup_slurm() {
         err "docker compose v2 is required but not available."
     fi
 
+    # HOST_WORKSPACE is used by slurm-compose.yaml for the read-only /workspace mount.
+    REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+    export HOST_WORKSPACE="${HOST_WORKSPACE:-$(dirname "${REPO_ROOT}")}"
+
     log "Building and starting Slurm cluster (slurmctld + 2 slurmd + slurmrestd)..."
+    log "  HOST_WORKSPACE=${HOST_WORKSPACE}"
     docker compose -f "${SCRIPT_DIR}/slurm-compose.yaml" up -d --build
 
     log "Waiting for slurmctld to become ready..."
diff --git a/tests/integration/infra/slurm-compose.yaml b/tests/integration/infra/slurm-compose.yaml
index b772eb9..b9ba09a 100644
--- a/tests/integration/infra/slurm-compose.yaml
+++ b/tests/integration/infra/slurm-compose.yaml
@@ -1,6 +1,10 @@
 # Slurm test cluster — slurmctld + 1 compute node (GPU 0)
 #
+# Requires HOST_WORKSPACE env var pointing to the directory containing
+# model weights (mounted read-only into containers as /workspace).
+#
 # Usage:
+#   export HOST_WORKSPACE=/path/to/workspace
 #   cd tests/integration/infra/
 #   docker compose -f slurm-compose.yaml up -d
 #
@@ -33,7 +37,7 @@ x-slurm-base: &slurm-base
     - slurm-etc:/etc/slurm
     - munge-socket:/run/munge
     # Share workspace for model weights / traces
-    - /home/administrator/zhangt:/workspace:ro
+    - ${HOST_WORKSPACE:?set HOST_WORKSPACE to the directory containing model weights}:/workspace:ro
   networks:
     - slurm-net
 
@@ -98,9 +102,9 @@ services:
       - slurm-etc:/etc/slurm:ro
       - munge-key:/etc/munge:ro
       - munge-socket:/run/munge
-      - /home/administrator/zhangt:/workspace:ro
+      - ${HOST_WORKSPACE:?set HOST_WORKSPACE}:/workspace:ro
       # Writable mount so traces appear on host
-      - /home/administrator/zhangt/FlowSim/stage_traces:/flowsim/stage_traces
+      - ../../../stage_traces:/flowsim/stage_traces
       # Cgroup needed by slurmd
       - /sys/fs/cgroup:/sys/fs/cgroup:rw
 

From 76e75bd91b9c90b71ae659bb9ca0b7502b4214e0 Mon Sep 17 00:00:00 2001
From: Terrence Zhang <terrence.zhang@example.com>
Date: Fri, 20 Mar 2026 02:51:08 +0000
Subject: [PATCH 56/56] fix: auto-mount output_dir in docker/enroot container
 modes

Without mounting spec.output_dir into the container, traces and logs
are written to the ephemeral container filesystem and lost on exit.

Docker mode: prepend -v output_dir:output_dir to the mount list.
Enroot mode: append output_dir:output_dir to --container-mounts.
---
 schedulers/slurm.py              | 14 +++++++++-----
 tests/unit/test_scheduler_cli.py |  4 ++++
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/schedulers/slurm.py b/schedulers/slurm.py
index 55b194d..543b22f 100644
--- a/schedulers/slurm.py
+++ b/schedulers/slurm.py
@@ -107,22 +107,26 @@ def render(self, spec: ProfileJobSpec) -> str:
         lines.append("")
 
         if self.container_runtime == "docker":
-            mounts = ""
+            # Always mount output_dir so traces/logs persist on the host.
+            mounts = f" -v {spec.output_dir}:{spec.output_dir}"
             if self.container_mounts:
-                mounts = f" -v {self.container_mounts}"
+                mounts += f" -v {self.container_mounts}"
             lines.append(
                 f"docker run --gpus all --ipc=host --shm-size=16g"
                 f"{mounts} -w /flowsim {spec.image} \\"
             )
             lines.append(f"  {cmd}")
         elif self.container_runtime == "enroot":
-            mounts = ""
+            # Always mount output_dir so traces/logs persist on the host.
+            out_mount = f"{spec.output_dir}:{spec.output_dir}"
             if self.container_mounts:
-                mounts = f" --container-mounts={self.container_mounts}"
+                all_mounts = f"{self.container_mounts},{out_mount}"
+            else:
+                all_mounts = out_mount
             lines.append(
                 f"srun --container-image={spec.image}"
                 f" --container-workdir=/flowsim"
-                f"{mounts} \\"
+                f" --container-mounts={all_mounts} \\"
             )
             lines.append(f"  {cmd}")
         elif self.container_runtime == "none":
diff --git a/tests/unit/test_scheduler_cli.py b/tests/unit/test_scheduler_cli.py
index 2491b7e..9f9c5ab 100644
--- a/tests/unit/test_scheduler_cli.py
+++ b/tests/unit/test_scheduler_cli.py
@@ -204,6 +204,8 @@ def test_render_docker_runtime(self, spec):
         script = sched.render(spec)
         assert "docker run" in script
         assert "-v /data:/data" in script
+        # output_dir is always auto-mounted
+        assert f"-v {spec.output_dir}:{spec.output_dir}" in script
 
     def test_render_enroot_runtime(self, spec):
         sched = SlurmScheduler(
@@ -212,6 +214,8 @@ def test_render_enroot_runtime(self, spec):
         )
         script = sched.render(spec)
         assert "srun --container-image" in script
+        # output_dir is always auto-mounted
+        assert f"{spec.output_dir}:{spec.output_dir}" in script
 
     def test_render_modules(self, spec):
         sched = SlurmScheduler(