From b2064b7fc41c5991ffb8e7c72ba00c006ad9e72b Mon Sep 17 00:00:00 2001 From: Marcello Alarcon Date: Thu, 14 May 2026 10:47:45 -0300 Subject: [PATCH] fix(heartbeat-runner): capture cost_usd and token counts from Claude CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit step7_invoke_claude was hardcoding tokens_in/tokens_out/cost_usd to None with a comment claiming "Claude CLI doesn't expose token counts easily". That comment is outdated — the runner already passes `--output-format json` to the CLI, which emits a single-line JSON object containing: - `total_cost_usd` (top level) - `usage.input_tokens`, `usage.output_tokens` - `usage.cache_creation_input_tokens`, `usage.cache_read_input_tokens` - `modelUsage..{costUSD,inputTokens,outputTokens,...}` Result: the heartbeat_runs.cost_usd column was always NULL despite the schema, making cost tracking and budget analysis impossible. Sourcery review rounds 1, 2, and 3 incorporated (2026-05-14): Round 1: - Narrowed the catch from blanket Exception to specific failure modes (json.JSONDecodeError + IndexError + TypeError + AttributeError). - Added warning logging on parse failure (silent data loss was the original problem; swallowing the new parse failure would be self-defeating). - Rounded total_cost_usd to 6 decimal places (micro-dollar) before returning so aggregated SUM queries don't accumulate float drift. Round 2: - Switched from `print(file=sys.stderr)` to `logging.getLogger(__name__)` + `log.warning(...)` — consistent control plane, no new dependency. - Extracted `_parse_claude_cli_usage(output)` helper so step7 stays focused on subprocess orchestration. Helper is pure and unit-testable. - SECURITY: removed the 200-char raw output preview from the failure log. CLI output can contain PII or sensitive model-generated content from the user's prompt. Log now contains ONLY the exception class and the output length. Round 3: - Accept stringified `total_cost_usd` defensively (forward-compat with a possible future Decimal-serializer wrap on the CLI side). Today CLI 2.1.141 always emits float, but the cost is one isinstance check and a try/float() — cheap forward-compat for monetary data. - Non-numeric / dict / list / None all still fall back to NULL safely. Round 3 also raised two points we didn't change: - "json.loads with no import in the diff" → false positive. `json` is already imported at module top (line 21 of upstream/main, unchanged by this PR, hence not in the diff). No NameError. - "Pass status into the helper" → the helper is only called when status == 'success' (one-line ternary in the caller). Logging status inside the helper would be redundant. ## Why this matters now Anthropic announced (2026-05-14) that starting 2026-06-15, Max 5x plan subscribers get a dedicated $100/month credit for Agent SDK / claude -p usage, separate from interactive subscription limits. To decide whether the credit covers the workload (and how much extra-usage cap to set), operators need per-run cost data — which is exactly what this fix restores. With the fix deployed today, 30 days of real data accumulate before the credit launches. ## Test plan - [x] CLI shape verified against Claude CLI 2.1.141 (single-line JSON, `total_cost_usd` top-level, `usage.input_tokens`, `usage.output_tokens`). - [x] Helper sanity scenarios: - Float cost (today's reality): `(tokens, tokens, 0.524192)` ✓ - String cost (forward-compat): `(1, 2, 1.23e-4)` ✓ - Garbage string cost: `(1, 2, None)` — ValueError caught ✓ - Dict cost (defensive): `(3, 4, None)` — falls to None ✓ - Missing `usage`: `(None, None, 1.0)` ✓ - Malformed JSON: `(None, None, None)`, log.warning fires ✓ - Empty output: `(None, None, None)`, no log ✓ - [x] Failure path unchanged: status != "success" → helper not invoked, all three fields remain None (existing contract preserved). - [x] SECURITY: failure log contains only exception class + output length, never the raw output content. Co-Authored-By: Claude Opus 4.7 --- dashboard/backend/heartbeat_runner.py | 57 +++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/dashboard/backend/heartbeat_runner.py b/dashboard/backend/heartbeat_runner.py index 9966643b..eb82b99f 100644 --- a/dashboard/backend/heartbeat_runner.py +++ b/dashboard/backend/heartbeat_runner.py @@ -19,6 +19,7 @@ import argparse import json +import logging import os import signal import subprocess @@ -28,6 +29,8 @@ from datetime import datetime, timezone from pathlib import Path +log = logging.getLogger(__name__) + # Workspace root WORKSPACE = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(Path(__file__).resolve().parent)) @@ -284,17 +287,65 @@ def step7_invoke_claude( duration_ms = int((time.time() - start_time) * 1000) + tokens_in, tokens_out, cost_usd = _parse_claude_cli_usage(output) if status == "success" else (None, None, None) + return { "status": status, "output": output, "error": error, "duration_ms": duration_ms, - "tokens_in": None, # Claude CLI doesn't expose token counts easily - "tokens_out": None, - "cost_usd": None, + "tokens_in": tokens_in, + "tokens_out": tokens_out, + "cost_usd": cost_usd, } +def _parse_claude_cli_usage(output: str) -> tuple[int | None, int | None, float | None]: + """Extract (tokens_in, tokens_out, cost_usd) from a Claude CLI ``--output-format json`` payload. + + The CLI emits a single-line JSON object on success with ``total_cost_usd`` at top level and + ``usage.{input,output}_tokens`` nested. On any malformation the function logs a metadata-only + warning (no raw output) and returns ``(None, None, None)`` so the caller can persist a NULL row + without crashing. + + ``total_cost_usd`` is rounded to 6 decimal places (micro-dollar precision) so aggregated SUM + queries on ``heartbeat_runs.cost_usd`` (FLOAT column) don't accumulate float-representation + drift. Full ``Decimal`` would require a schema migration; rounding is the pragmatic mid-point. + + SECURITY: the raw CLI output may contain PII or model-generated content from the user's prompt + (emails, credentials, customer data). The log line on failure intentionally contains ONLY the + exception class and the output length — no content preview. + """ + if not output: + return None, None, None + try: + payload = json.loads(output.strip().splitlines()[-1]) + # `total_cost_usd` is float in the CLI today (2.1.141 verified), but accept stringified + # numerics defensively in case the format changes (e.g. wrapped Decimal serializer). + # Anything else (None, dict, list) falls back to NULL — caller persists missing data. + raw_cost = payload.get("total_cost_usd") + if isinstance(raw_cost, (int, float)): + cost_value = raw_cost + elif isinstance(raw_cost, str): + try: + cost_value = float(raw_cost) + except ValueError: + cost_value = None + else: + cost_value = None + cost_usd = round(cost_value, 6) if cost_value is not None else None + usage = payload.get("usage") or {} + return usage.get("input_tokens"), usage.get("output_tokens"), cost_usd + except (json.JSONDecodeError, IndexError, TypeError, AttributeError) as parse_exc: + log.warning( + "step7_invoke_claude: failed to parse Claude CLI JSON output " + "(%s, output_len=%d); cost/tokens will be NULL for this run.", + parse_exc.__class__.__name__, + len(output), + ) + return None, None, None + + # ── Step 8: Persist status ──────────────────────────────────────────────────── def step8_persist(run_id: str, heartbeat_id: str, result: dict, trigger_id: str | None, triggered_by: str, prompt_preview: str, conn):