Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions dashboard/backend/heartbeat_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import argparse
import json
import logging
import os
import signal
import subprocess
Expand All @@ -28,6 +29,8 @@
from datetime import datetime, timezone
from pathlib import Path

log = logging.getLogger(__name__)

# Workspace root
WORKSPACE = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(Path(__file__).resolve().parent))
Expand Down Expand Up @@ -284,17 +287,65 @@ def step7_invoke_claude(

duration_ms = int((time.time() - start_time) * 1000)

tokens_in, tokens_out, cost_usd = _parse_claude_cli_usage(output) if status == "success" else (None, None, None)

return {
"status": status,
"output": output,
"error": error,
"duration_ms": duration_ms,
"tokens_in": None, # Claude CLI doesn't expose token counts easily
"tokens_out": None,
"cost_usd": None,
"tokens_in": tokens_in,
"tokens_out": tokens_out,
"cost_usd": cost_usd,
}


def _parse_claude_cli_usage(output: str) -> tuple[int | None, int | None, float | None]:
"""Extract (tokens_in, tokens_out, cost_usd) from a Claude CLI ``--output-format json`` payload.

The CLI emits a single-line JSON object on success with ``total_cost_usd`` at top level and
``usage.{input,output}_tokens`` nested. On any malformation the function logs a metadata-only
warning (no raw output) and returns ``(None, None, None)`` so the caller can persist a NULL row
without crashing.

``total_cost_usd`` is rounded to 6 decimal places (micro-dollar precision) so aggregated SUM
queries on ``heartbeat_runs.cost_usd`` (FLOAT column) don't accumulate float-representation
drift. Full ``Decimal`` would require a schema migration; rounding is the pragmatic mid-point.

SECURITY: the raw CLI output may contain PII or model-generated content from the user's prompt
(emails, credentials, customer data). The log line on failure intentionally contains ONLY the
exception class and the output length — no content preview.
"""
if not output:
return None, None, None
try:
payload = json.loads(output.strip().splitlines()[-1])
# `total_cost_usd` is float in the CLI today (2.1.141 verified), but accept stringified
# numerics defensively in case the format changes (e.g. wrapped Decimal serializer).
# Anything else (None, dict, list) falls back to NULL — caller persists missing data.
raw_cost = payload.get("total_cost_usd")
if isinstance(raw_cost, (int, float)):
cost_value = raw_cost
elif isinstance(raw_cost, str):
try:
cost_value = float(raw_cost)
except ValueError:
cost_value = None
else:
cost_value = None
cost_usd = round(cost_value, 6) if cost_value is not None else None
usage = payload.get("usage") or {}
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
return usage.get("input_tokens"), usage.get("output_tokens"), cost_usd
except (json.JSONDecodeError, IndexError, TypeError, AttributeError) as parse_exc:
log.warning(
"step7_invoke_claude: failed to parse Claude CLI JSON output "
"(%s, output_len=%d); cost/tokens will be NULL for this run.",
parse_exc.__class__.__name__,
len(output),
)
return None, None, None


# ── Step 8: Persist status ────────────────────────────────────────────────────

def step8_persist(run_id: str, heartbeat_id: str, result: dict, trigger_id: str | None, triggered_by: str, prompt_preview: str, conn):
Expand Down