From f9652bbdac22418c1450380d09689455f405e216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Montero=20Par=C3=A9s?= Date: Sun, 26 Apr 2026 10:23:37 +0200 Subject: [PATCH 1/2] fix: use INSERT OR REPLACE so streaming dedup survives incremental scans Claude Code writes multiple JSONL records per assistant message during streaming, all sharing the same message.id; only the last record carries the final usage tallies. parse_jsonl_file dedups by keeping the last record seen, which is correct on a single full parse. But on incremental scans, if the streaming records straddle the previous scan boundary, scan 1 inserts a partial-count row, then scan 2 sees the final record and tries to insert it -- INSERT OR IGNORE silently drops it because of the unique index on message_id, locking in stale partial counts forever. Session totals, daily charts and hourly charts all inherit the wrong values. Switching to INSERT OR REPLACE makes last-write-wins, which matches the intent of the dedup. The recompute at the end of scan() then refreshes session totals from the corrected turns rows. Co-Authored-By: Claude Opus 4.7 --- scanner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scanner.py b/scanner.py index e40e100..91a0cf4 100644 --- a/scanner.py +++ b/scanner.py @@ -300,8 +300,13 @@ def upsert_sessions(conn, sessions): def insert_turns(conn, turns): + # INSERT OR REPLACE: if a later record arrives for the same message_id + # (Claude streams multiple records per message — the last has the final + # usage tallies), overwrite the earlier partial row. INSERT OR IGNORE + # would lock in stale partial counts when the streaming boundary fell + # between two incremental scans. conn.executemany(""" - INSERT OR IGNORE INTO turns + INSERT OR REPLACE INTO turns (session_id, timestamp, model, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, tool_name, cwd, message_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) From b64f88d44ecd32b784fab88dfe110e6fd0e6f372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Montero=20Par=C3=A9s?= Date: Sun, 26 Apr 2026 10:23:49 +0200 Subject: [PATCH 2/2] fix(dashboard): query-string handling, hourly filter, server hardening * Strip query string before path matching in do_GET and do_POST. Reloading at /?range=week previously fell through to the 404 branch because self.path includes the query string. * Define the hourly filter from getRangeBounds(start, end) instead of an undefined `cutoff` variable, which threw ReferenceError and blocked all chart rendering. * Switch HTTPServer to ThreadingHTTPServer with allow_reuse_address so long rescans no longer freeze GETs and the port frees instantly on Ctrl+C. * Warn at startup when bound to a non-loopback host -- the dashboard has no auth, so any LAN peer could read project history or POST /api/rescan. * /api/rescan now defaults to an incremental scan; pass ?full=1 for the destructive full rebuild. The button in the UI keeps using the default, which is the right behavior for routine refreshes. Co-Authored-By: Claude Opus 4.7 --- dashboard.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/dashboard.py b/dashboard.py index ebf8d5f..b0d8ada 100644 --- a/dashboard.py +++ b/dashboard.py @@ -5,7 +5,7 @@ import json import os import sqlite3 -from http.server import HTTPServer, BaseHTTPRequestHandler +from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler from pathlib import Path from datetime import datetime @@ -742,7 +742,7 @@ def get_dashboard_data(db_path=DB_PATH): // Hourly aggregation (filtered by model + range, then bucketed by UTC hour) const hourlySrc = (rawData.hourly_by_model || []).filter(r => - selectedModels.has(r.model) && (!cutoff || r.day >= cutoff) + selectedModels.has(r.model) && (!start || r.day >= start) && (!end || r.day <= end) ); const hourlyAgg = aggregateHourly(hourlySrc, hourlyTZ); @@ -1242,13 +1242,14 @@ def log_message(self, format, *args): pass def do_GET(self): - if self.path in ("/", "/index.html"): + path = self.path.split("?", 1)[0] + if path in ("/", "/index.html"): self.send_response(200) self.send_header("Content-Type", "text/html; charset=utf-8") self.end_headers() self.wfile.write(HTML_TEMPLATE.encode("utf-8")) - elif self.path == "/api/data": + elif path == "/api/data": data = get_dashboard_data() body = json.dumps(data).encode("utf-8") self.send_response(200) @@ -1262,14 +1263,18 @@ def do_GET(self): self.end_headers() def do_POST(self): - if self.path == "/api/rescan": - # Full rebuild: delete DB and rescan from scratch. + path, _, query = self.path.partition("?") + if path == "/api/rescan": + # Default: incremental scan (fast, non-destructive). + # Opt-in full rebuild with ?full=1 — useful when pricing or + # parsing logic changes and historical rows need to be redone. # Pass DB_PATH / DEFAULT_PROJECTS_DIRS explicitly so tests that # patch the module globals are honored (scan's defaults are # frozen at def time and would otherwise target the real paths). import scanner db_path = DB_PATH - if db_path.exists(): + full = "full=1" in query + if full and db_path.exists(): db_path.unlink() result = scanner.scan( db_path=db_path, @@ -1290,8 +1295,13 @@ def do_POST(self): def serve(host=None, port=None): host = host or os.environ.get("HOST", "localhost") port = port or int(os.environ.get("PORT", "8080")) - server = HTTPServer((host, port), DashboardHandler) + ThreadingHTTPServer.allow_reuse_address = True + server = ThreadingHTTPServer((host, port), DashboardHandler) print(f"Dashboard running at http://{host}:{port}") + if host not in ("localhost", "127.0.0.1", "::1"): + print(f" WARNING: bound to {host} — no authentication. " + "Anyone reachable on this interface can read your project history " + "and trigger /api/rescan.") print("Press Ctrl+C to stop.") try: server.serve_forever()