treqs · TrevorBasinger · Mar 18, 2026 · Mar 18, 2026
diff --git a/docs/developer/profiling.md b/docs/developer/profiling.md
@@ -0,0 +1,72 @@
+# Profiling Roar
+
+`roar` already has targeted benchmarks under `tests/benchmarks/`. The profiling harness in
+[`scripts/profile_roar.py`](/home/trevor/dev/roar-cli-polish/scripts/profile_roar.py) adds a
+repeatable way to capture wall-time summaries and Python hot spots for representative local
+workflows.
+
+## What It Profiles
+
+- top-level CLI startup: `roar --help`
+- a simple local `roar run`
+- active-session query commands: `status` and `show --session`
+- local publish flows without remote side effects: `register --dry-run` and `put --dry-run`
+- Python startup overhead for `ROAR_WRAP=1`, with and without `ROAR_LOG_FILE`
+
+Each CLI scenario records:
+
+- repeated wall-time samples
+- one `cProfile` run
+- captured stdout/stderr
+- top cumulative and internal Python hot spots
+
+The startup scenario records:
+
+- baseline vs wrapped wall time
+- import-time breakdown from `python -X importtime -c pass`
+
+## Run It
+
+From the repo root:
+
+```bash
+uv run --extra dev python scripts/profile_roar.py
+```
+
+Useful options:
+
+```bash
+uv run --extra dev python scripts/profile_roar.py --iterations 5 --top 20
+uv run --extra dev python scripts/profile_roar.py --scenario cli_run_simple --scenario startup_wrap
+```
+
+## Output Files
+
+The harness writes:
+
+- JSON summary: `tests/benchmarks/results/profile_suite_latest.json`
+- Markdown summary: `tests/benchmarks/results/profile_suite_latest.md`
+- raw profile artifacts: `tests/benchmarks/results/profiles/<timestamp>/`
+- a copied latest artifact set: `tests/benchmarks/results/profiles/latest/`
+
+The raw artifact directory contains:
+
+- `*.prof` `cProfile` files
+- `*.stdout.txt` and `*.stderr.txt` for profiled CLI runs
+- `startup_wrap.importtime.txt` for import-time output
+
+## How To Read It
+
+- Start with the wall-time means to find the slowest end-user workflows.
+- For a slow CLI scenario, inspect `top_cumulative` first; it shows what dominates total time.
+- Inspect `top_internal` when cumulative time is dominated by wrappers and you need the leaf work.
+- For `startup_wrap`, compare:
+  - import overhead: `ROAR_WRAP=1` minus baseline
+  - atexit overhead: `ROAR_WRAP=1 + LOG_FILE` minus `ROAR_WRAP=1`
+
+## Current Focus
+
+The existing performance guardrail in
+[`tests/execution/runtime/test_sitecustomize_perf.py`](/home/trevor/dev/roar-cli-polish/tests/execution/runtime/test_sitecustomize_perf.py)
+is close to the local threshold. The profiling harness is intended to make that startup/runtime
+path measurable enough to optimize, not just to rerun the guardrail test.
diff --git a/roar/application/label_rendering.py b/roar/application/label_rendering.py
@@ -0,0 +1,37 @@
+"""Pure helpers for rendering label metadata without DB-side imports."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+
+def flatten_label_metadata(metadata: dict[str, Any]) -> list[tuple[str, str]]:
+    """Flatten metadata into sorted ``(key, display_value)`` pairs."""
+    flat: list[tuple[str, str]] = []
+
+    def _walk(prefix: str, value: Any) -> None:
+        if isinstance(value, dict):
+            for key in sorted(value.keys()):
+                next_prefix = f"{prefix}.{key}" if prefix else key
+                _walk(next_prefix, value[key])
+            return
+        flat.append((prefix, _display_scalar(value)))
+
+    _walk("", metadata)
+    return flat
+
+
+def render_label_lines(metadata: dict[str, Any], indent: str = "") -> list[str]:
+    """Render a metadata document as sorted ``key=value`` lines."""
+    return [f"{indent}{key}={value}" for key, value in flatten_label_metadata(metadata)]
+
+
+def _display_scalar(value: Any) -> str:
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if value is None:
+        return "null"
+    if isinstance(value, (int, float, str)):
+        return str(value)
+    return json.dumps(value, sort_keys=True)
diff --git a/roar/application/labels.py b/roar/application/labels.py
@@ -15,6 +15,7 @@
 
 from ..db.context import DatabaseContext
 from ..execution.recording.dataset_metadata import AUTO_DATASET_LABEL_KEYS
+from .label_rendering import flatten_label_metadata
 
 RESERVED_LABEL_KEYS = set(AUTO_DATASET_LABEL_KEYS)
 
@@ -58,27 +59,6 @@ def parse_label_pairs(pairs: tuple[str, ...]) -> dict[str, Any]:
     return metadata
 
 
-def flatten_label_metadata(metadata: dict[str, Any]) -> list[tuple[str, str]]:
-    """Flatten metadata into sorted ``(key, display_value)`` pairs."""
-    flat: list[tuple[str, str]] = []
-
-    def _walk(prefix: str, value: Any) -> None:
-        if isinstance(value, dict):
-            for key in sorted(value.keys()):
-                next_prefix = f"{prefix}.{key}" if prefix else key
-                _walk(next_prefix, value[key])
-            return
-        flat.append((prefix, _display_scalar(value)))
-
-    _walk("", metadata)
-    return flat
-
-
-def render_label_lines(metadata: dict[str, Any], indent: str = "") -> list[str]:
-    """Render a metadata document as sorted ``key=value`` lines."""
-    return [f"{indent}{key}={value}" for key, value in flatten_label_metadata(metadata)]
-
-
 class LabelService:
     """High-level local label workflow service."""
 
@@ -353,16 +333,6 @@ def _parse_scalar(raw: str) -> Any:
     return stripped
 
 
-def _display_scalar(value: Any) -> str:
-    if isinstance(value, bool):
-        return "true" if value else "false"
-    if value is None:
-        return "null"
-    if isinstance(value, (int, float, str)):
-        return str(value)
-    return json.dumps(value, sort_keys=True)
-
-
 def _deep_merge(current: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]:
     merged = json.loads(json.dumps(current))
     for key, value in patch.items():

diff --git a/roar/application/publish/collection.py b/roar/application/publish/collection.py
@@ -11,6 +11,7 @@
 from ...core.interfaces.logger import ILogger
 from ...db.context import create_database_context
 from ...db.hashing.backend import compute_hashes_batch
+from ...db.query_context import create_query_database_context
 from .lineage import LineageCollector
 from .session import PublishSessionService
 from .targets import (
@@ -39,13 +40,15 @@ def collect_register_lineage(
     lineage_collector: LineageCollector,
     session_service: PublishSessionService,
     logger: ILogger,
+    dry_run: bool = False,
 ) -> tuple[CollectedRegisterLineage | None, str | None]:
     """Collect local lineage for a resolved register target."""
     if target.kind == "step_reference":
         return _collect_step_lineage(
             step_reference=target.value,
             roar_dir=roar_dir,
             lineage_collector=lineage_collector,
+            dry_run=dry_run,
         )
     if target.kind == "job_uid":
         return _collect_job_lineage(
@@ -82,18 +85,35 @@ def _collect_step_lineage(
     step_reference: str,
     roar_dir: Path,
     lineage_collector: LineageCollector,
+    dry_run: bool,
 ) -> tuple[CollectedRegisterLineage | None, str | None]:
     parsed = parse_register_step_reference(step_reference)
     if parsed is None:
         return None, f"Invalid DAG reference: {step_reference}"
     step_number, is_build = parsed
 
-    with create_database_context(roar_dir) as db_ctx:
-        session = db_ctx.sessions.get_active()
-        if not session:
-            return None, "No active session. Run 'roar run' to create a session first."
+    if dry_run:
+        with create_query_database_context(roar_dir) as db_ctx:
+            session = db_ctx.sessions.get_active()
+            if not session:
+                return None, "No active session. Run 'roar run' to create a session first."
+            session_id = int(session["id"])
+
+        lineage = lineage_collector.collect_step_read_only(
+            session_id=session_id,
+            step_number=step_number,
+            roar_dir=roar_dir,
+            job_type="build" if is_build else None,
+        )
+    else:
+        with create_database_context(roar_dir) as db_ctx:
+            session = db_ctx.sessions.get_active()
+            if not session:
+                return None, "No active session. Run 'roar run' to create a session first."
+            session_id = int(session["id"])
+
         lineage = lineage_collector.collect_step(
-            session_id=int(session["id"]),
+            session_id=session_id,
             step_number=step_number,
             roar_dir=roar_dir,
             job_type="build" if is_build else None,

diff --git a/roar/application/publish/job_preparation.py b/roar/application/publish/job_preparation.py
@@ -4,7 +4,6 @@
 
 from typing import Any
 
-from ...application.publish.registration import normalize_registration_hashes
 from ...execution.framework.registry import (
     is_execution_noise_job,
     is_execution_submit_job,
@@ -13,6 +12,15 @@
 from ...integrations.glaas.registration import _artifact_ref
 
 
+def normalize_registration_hashes(*args: Any, **kwargs: Any) -> Any:
+    """Load heavy registration hashing helpers only when needed."""
+    from ...application.publish.registration import (
+        normalize_registration_hashes as _normalize_registration_hashes,
+    )
+
+    return _normalize_registration_hashes(*args, **kwargs)
+
+
 def normalize_jobs_for_registration(jobs: list[dict[str, Any]]) -> list[dict[str, Any]]:
     """Drop known noise jobs and repair unresolved local parent references."""
     normalized = [dict(job) for job in jobs if not is_execution_noise_job(job)]