From d7f9d5b66fdb17f28ec2dbacdfca23aec0f45183 Mon Sep 17 00:00:00 2001
From: bald-ai <baldai@hey.com>
Date: Thu, 19 Feb 2026 14:33:17 +0100
Subject: [PATCH] Reduce perf regressions in Codex, web fetch, and idle
 animation

---
 .../StatusItemController+Animation.swift      |  41 ++++++-
 Sources/CodexBar/UsageStore.swift             |  14 ++-
 .../Providers/Codex/CodexStatusProbe.swift    |  17 ++-
 docs/codex.md                                 |   3 +-
 ...ssue-139-main-fix-validation-2026-02-19.md |  69 ++++++++++++
 ...-issue-139-simulation-report-2026-02-19.md | 101 ++++++++++++++++++
 6 files changed, 234 insertions(+), 11 deletions(-)
 create mode 100644 docs/perf-energy-issue-139-main-fix-validation-2026-02-19.md
 create mode 100644 docs/perf-energy-issue-139-simulation-report-2026-02-19.md

diff --git a/Sources/CodexBar/StatusItemController+Animation.swift b/Sources/CodexBar/StatusItemController+Animation.swift
index a411cf8c1..391132669 100644
--- a/Sources/CodexBar/StatusItemController+Animation.swift
+++ b/Sources/CodexBar/StatusItemController+Animation.swift
@@ -4,6 +4,8 @@ import QuartzCore
 
 extension StatusItemController {
     private static let loadingPercentEpsilon = 0.0001
+    private static let blinkActiveTickInterval: Duration = .milliseconds(75)
+    private static let blinkIdleFallbackInterval: Duration = .seconds(1)
 
     func needsMenuBarIconAnimation() -> Bool {
         if self.shouldMergeIcons {
@@ -32,7 +34,10 @@ extension StatusItemController {
                 self.seedBlinkStatesIfNeeded()
                 self.blinkTask = Task { [weak self] in
                     while !Task.isCancelled {
-                        try? await Task.sleep(for: .milliseconds(75))
+                        let delay = await MainActor.run {
+                            self?.blinkTickSleepDuration(now: Date()) ?? Self.blinkIdleFallbackInterval
+                        }
+                        try? await Task.sleep(for: delay)
                         await MainActor.run { self?.tickBlink() }
                     }
                 }
@@ -63,6 +68,36 @@ extension StatusItemController {
         }
     }
 
+    private func blinkTickSleepDuration(now: Date) -> Duration {
+        let mergeIcons = self.shouldMergeIcons
+        var nextWakeAt: Date?
+
+        for provider in UsageProvider.allCases {
+            let shouldRender = mergeIcons ? self.isEnabled(provider) : self.isVisible(provider)
+            guard shouldRender, !self.shouldAnimate(provider: provider, mergeIcons: mergeIcons) else { continue }
+
+            let state = self
+                .blinkStates[provider] ?? BlinkState(nextBlink: now.addingTimeInterval(BlinkState.randomDelay()))
+            if state.blinkStart != nil {
+                return Self.blinkActiveTickInterval
+            }
+
+            let candidate: Date = state.pendingSecondStart ?? state.nextBlink
+            if let current = nextWakeAt {
+                if candidate < current {
+                    nextWakeAt = candidate
+                }
+            } else {
+                nextWakeAt = candidate
+            }
+        }
+
+        guard let nextWakeAt else { return Self.blinkIdleFallbackInterval }
+        let delay = nextWakeAt.timeIntervalSince(now)
+        if delay <= 0 { return Self.blinkActiveTickInterval }
+        return .seconds(delay)
+    }
+
     private func tickBlink(now: Date = .init()) {
         guard self.isBlinkingAllowed(at: now) else {
             self.stopBlinking()
@@ -465,6 +500,10 @@ extension StatusItemController {
             self.assignMotion(amount: 0, for: provider, effect: state.effect)
         }
 
+        // If the blink task is currently in a long idle sleep, restart it so this forced blink
+        // keeps animating on the active frame cadence immediately.
+        self.blinkTask?.cancel()
+        self.blinkTask = nil
         self.updateBlinkingState()
         self.tickBlink(now: now)
     }
diff --git a/Sources/CodexBar/UsageStore.swift b/Sources/CodexBar/UsageStore.swift
index 20bb7491f..9ba144d47 100644
--- a/Sources/CodexBar/UsageStore.swift
+++ b/Sources/CodexBar/UsageStore.swift
@@ -665,6 +665,8 @@ final class UsageStore {
 
 extension UsageStore {
     private static let openAIWebRefreshMultiplier: TimeInterval = 5
+    private static let openAIWebPrimaryFetchTimeout: TimeInterval = 15
+    private static let openAIWebRetryFetchTimeout: TimeInterval = 8
 
     private func openAIWebRefreshIntervalSeconds() -> TimeInterval {
         let base = max(self.settings.refreshFrequency.seconds ?? 0, 120)
@@ -780,7 +782,8 @@ extension UsageStore {
             var dash = try await OpenAIDashboardFetcher().loadLatestDashboard(
                 accountEmail: effectiveEmail,
                 logger: log,
-                debugDumpHTML: false)
+                debugDumpHTML: false,
+                timeout: Self.openAIWebPrimaryFetchTimeout)
 
             if self.dashboardEmailMismatch(expected: normalized, actual: dash.signedInEmail) {
                 if let imported = await self.importOpenAIDashboardCookiesIfNeeded(
@@ -792,7 +795,8 @@ extension UsageStore {
                 dash = try await OpenAIDashboardFetcher().loadLatestDashboard(
                     accountEmail: effectiveEmail,
                     logger: log,
-                    debugDumpHTML: false)
+                    debugDumpHTML: false,
+                    timeout: Self.openAIWebRetryFetchTimeout)
             }
 
             if self.dashboardEmailMismatch(expected: normalized, actual: dash.signedInEmail) {
@@ -821,7 +825,8 @@ extension UsageStore {
                 let dash = try await OpenAIDashboardFetcher().loadLatestDashboard(
                     accountEmail: effectiveEmail,
                     logger: log,
-                    debugDumpHTML: true)
+                    debugDumpHTML: true,
+                    timeout: Self.openAIWebRetryFetchTimeout)
                 await self.applyOpenAIDashboard(dash, targetEmail: effectiveEmail)
             } catch let OpenAIDashboardFetcher.FetchError.noDashboardData(retryBody) {
                 let finalBody = retryBody.isEmpty ? body : retryBody
@@ -844,7 +849,8 @@ extension UsageStore {
                 let dash = try await OpenAIDashboardFetcher().loadLatestDashboard(
                     accountEmail: effectiveEmail,
                     logger: log,
-                    debugDumpHTML: true)
+                    debugDumpHTML: true,
+                    timeout: Self.openAIWebRetryFetchTimeout)
                 await self.applyOpenAIDashboard(dash, targetEmail: effectiveEmail)
             } catch OpenAIDashboardFetcher.FetchError.loginRequired {
                 await MainActor.run {
diff --git a/Sources/CodexBarCore/Providers/Codex/CodexStatusProbe.swift b/Sources/CodexBarCore/Providers/Codex/CodexStatusProbe.swift
index ff644cf2b..a04aee558 100644
--- a/Sources/CodexBarCore/Providers/Codex/CodexStatusProbe.swift
+++ b/Sources/CodexBarCore/Providers/Codex/CodexStatusProbe.swift
@@ -47,13 +47,20 @@ public enum CodexStatusProbeError: LocalizedError, Sendable {
 
 /// Runs `codex` inside a PTY, sends `/status`, captures text, and parses credits/limits.
 public struct CodexStatusProbe {
+    private static let defaultTimeoutSeconds: TimeInterval = 8.0
+    private static let parseRetryTimeoutSeconds: TimeInterval = 4.0
+
     public var codexBinary: String = "codex"
-    public var timeout: TimeInterval = 18.0
+    public var timeout: TimeInterval = Self.defaultTimeoutSeconds
     public var keepCLISessionsAlive: Bool = false
 
     public init() {}
 
-    public init(codexBinary: String = "codex", timeout: TimeInterval = 18.0, keepCLISessionsAlive: Bool = false) {
+    public init(
+        codexBinary: String = "codex",
+        timeout: TimeInterval = 8.0,
+        keepCLISessionsAlive: Bool = false)
+    {
         self.codexBinary = codexBinary
         self.timeout = timeout
         self.keepCLISessionsAlive = keepCLISessionsAlive
@@ -69,14 +76,14 @@ public struct CodexStatusProbe {
         do {
             return try await self.runAndParse(binary: resolved, rows: 60, cols: 200, timeout: self.timeout)
         } catch let error as CodexStatusProbeError {
-            // Codex sometimes returns an incomplete screen on the first try; retry once with a longer window.
+            // Retry only parser-level flakes with a short second attempt.
             switch error {
-            case .parseFailed, .timedOut:
+            case .parseFailed:
                 return try await self.runAndParse(
                     binary: resolved,
                     rows: 70,
                     cols: 220,
-                    timeout: max(self.timeout, 24.0))
+                    timeout: Self.parseRetryTimeoutSeconds)
             default:
                 throw error
             }
diff --git a/docs/codex.md b/docs/codex.md
index 9b23ed49c..c7250939a 100644
--- a/docs/codex.md
+++ b/docs/codex.md
@@ -80,7 +80,8 @@ Usage source picker:
   - `Credits:` line
   - `5h limit` line → percent + reset text
   - `Weekly limit` line → percent + reset text
-- Retry once with a larger terminal size on parse failure.
+- Retry once with a larger terminal size on parse failure (short retry window).
+- Do not retry on timeout; timed-out probes fail fast and wait for the next refresh cycle.
 - Detects update prompts and surfaces a "CLI update needed" error.
 
 ## Account identity resolution (for web matching)
diff --git a/docs/perf-energy-issue-139-main-fix-validation-2026-02-19.md b/docs/perf-energy-issue-139-main-fix-validation-2026-02-19.md
new file mode 100644
index 000000000..682e3fb7a
--- /dev/null
+++ b/docs/perf-energy-issue-139-main-fix-validation-2026-02-19.md
@@ -0,0 +1,69 @@
+# CodexBar Issue #139 Main Fix Validation (Post-Fix vs Pre-Fix)
+
+Date: 2026-02-19
+Workspace: /Users/michalkrsik/windsurf_project_folder/CodexBar
+Branch: codex/perf-issue-139
+
+Reference pre-fix report:
+- /Users/michalkrsik/windsurf_project_folder/CodexBar/docs/perf-energy-issue-139-simulation-report-2026-02-19.md
+
+## Implemented Main Fix
+
+File changed:
+- /Users/michalkrsik/windsurf_project_folder/CodexBar/Sources/CodexBarCore/Providers/Codex/CodexStatusProbe.swift
+
+Behavior change:
+- Primary Codex PTY probe timeout reduced from 18s to 8s.
+- Retry policy changed from `retry on parseFailed OR timedOut` to `retry only on parseFailed`.
+- Parse retry timeout set to 4s.
+- Timed-out runs now fail fast and wait for next scheduled refresh.
+
+## Post-Fix Validation Method
+
+Target: main culprit path (Codex CLI failure path).
+
+Practical simulation used:
+- `CODEX_CLI_PATH` pointed to a fake codex script.
+- Script behavior:
+  - exits immediately for `app-server` args (forces RPC failure/fallback path),
+  - otherwise busy-loops with no `/status` output (simulates heavy stuck CLI PTY behavior).
+- Command run (3 times):
+  - `./.build/debug/CodexBarCLI usage --provider codex --source cli --format json --pretty`
+- Collected:
+  - wall time (`/usr/bin/time -p`),
+  - sampled child CPU every 0.5s,
+  - leftover child-process count after run.
+
+Artifacts:
+- /tmp/codexbar_main_fix_validation_after
+
+## Post-Fix Results (3 runs)
+
+| Run | Real (s) | Avg child CPU (%) | Max child CPU (%) | Remaining child procs |
+|---|---:|---:|---:|---:|
+| 1 | 12.76 | 88.32 | 100.00 | 0 |
+| 2 | 12.67 | 89.79 | 100.00 | 0 |
+| 3 | 12.59 | 89.90 | 100.00 | 0 |
+| Mean | 12.67 | 89.34 | 100.00 | 0 |
+
+## Side-by-Side Comparison Against Stored Pre-Fix Report
+
+Pre-fix values are from the stored report's Culprit A simulation summary.
+Post-fix values are from the validation above.
+
+| Metric | Pre-fix (stored report) | Post-fix (this validation) | Delta |
+|---|---:|---:|---:|
+| Failed-run duration (worst-case path) | 42.00s (code-path budget before fix) | 12.67s (measured mean) | -69.8% |
+| Child CPU during failed run | 113.32% avg | 89.34% avg | -21.2% |
+| Peak child CPU during failed run | 115.90% max | 100.00% max | -13.7% |
+| Remaining child processes after failure | not captured in pre-fix report | 0 | improved |
+
+Derived CPU-time exposure index (avg CPU * duration):
+- Pre-fix: `113.32 * 42.00 = 4759.44`
+- Post-fix: `89.34 * 12.67 = 1132.94`
+- Reduction: **-76.2%**
+
+## Conclusion
+
+The implemented main fix materially reduces the failure-path runtime and overall CPU exposure.
+The heavy CLI process can still spike CPU while active, but it now lives for a much shorter window and is cleaned up after failure.
diff --git a/docs/perf-energy-issue-139-simulation-report-2026-02-19.md b/docs/perf-energy-issue-139-simulation-report-2026-02-19.md
new file mode 100644
index 000000000..3ad413bf1
--- /dev/null
+++ b/docs/perf-energy-issue-139-simulation-report-2026-02-19.md
@@ -0,0 +1,101 @@
+# CodexBar Issue #139 Performance/Energy Simulation Report
+
+Date: 2026-02-19
+Workspace: /Users/michalkrsik/windsurf_project_folder/CodexBar
+Issue: https://github.com/steipete/codexbar/issues/139
+
+## Purpose
+
+Determine which suspected culprit(s) can produce the abnormal CPU/energy behavior reported by users, using short reproducible simulations and process-level sampling.
+
+## Host/Tooling
+
+- macOS: Darwin 25.2.0 (arm64)
+- Swift: 6.2.3
+- Sampling tools: `ps`, `top`
+- Note: `powermetrics` was unavailable (requires sudo password in this session), so energy was sampled via `top` `POWER` proxy.
+
+## Simulated Culprits
+
+- Culprit A: CLI/PTTY-style heavy subprocess churn with polling loop behavior.
+- Culprit B: Web dashboard scrape/retry loop with repeated parse work and 400-600ms waits.
+- Culprit C: 75ms idle polling loop (blink-style wakeups).
+- Combined: A + B + C at once.
+- Baseline: near-idle control.
+
+## Test Pass 1 (Primary Mechanism Pass)
+
+Artifacts:
+- /tmp/codexbar_perf_sim/results_20260219_111607
+
+Summary:
+
+| Scenario | Avg CPU | Max CPU | Avg RSS MB | Avg POWER | Avg IDLEW |
+|---|---:|---:|---:|---:|---:|
+| Baseline | 0.00 | 0.10 | 0.54 | 0.00 | 0.00 |
+| Culprit A | 113.68 | 117.40 | 121.76 | 0.00 | 0.00 |
+| Culprit B | 4.64 | 13.30 | 64.15 | 0.00 | 5.04 |
+| Culprit C | 0.25 | 2.30 | 33.12 | 0.00 | 10.43 |
+| Combined | 114.62 | 121.30 | 217.62 | 0.00 | 0.00 |
+
+Interpretation:
+- CPU ranking was clear (A dominates strongly).
+- POWER field in this pass was unusable (stuck at 0.00 for several scenarios due `top` sampling mode).
+
+## Test Pass 2 (Calibrated Energy Pass)
+
+Artifacts:
+- /tmp/codexbar_perf_sim/energy2_results_20260219_112350
+
+Sampling correction:
+- Switched to `top -l 2` and parsed the second sample for tracked PIDs to get non-zero `POWER` values.
+
+Summary:
+
+| Scenario | Avg CPU | Max CPU | Avg RSS MB | Avg POWER | Max POWER | Avg IDLEW |
+|---|---:|---:|---:|---:|---:|---:|
+| Baseline | 0.00 | 0.00 | 0.55 | 0.00 | 0.00 | 0.00 |
+| Culprit A | 113.32 | 115.90 | 114.73 | 94.85 | 150.60 | 6106.70 |
+| Culprit B | 4.30 | 10.10 | 62.09 | 2.94 | 4.20 | 2.18 |
+| Culprit C | 0.35 | 2.60 | 34.09 | 0.23 | 0.60 | 14.27 |
+| Combined | 115.67 | 118.90 | 218.48 | 93.29 | 129.60 | 3858.60 |
+
+## Validation Against Expected Pattern
+
+Computed checks on pass 2: 10/10 passed.
+
+- A dominates CPU vs B (>=10x): PASS
+- A dominates CPU vs C (>=50x): PASS
+- A dominates POWER vs B (>=10x): PASS
+- A dominates POWER vs C (>=100x): PASS
+- Combined close to A CPU (+/-15%): PASS
+- Combined close to A POWER (+/-25%): PASS
+- C is low CPU (<1%): PASS
+- B is moderate CPU (<15%): PASS
+- Baseline near zero CPU (<1%): PASS
+- Baseline near zero POWER (<1): PASS
+
+## Final Finding
+
+Primary root-cause class for the extreme behavior is Culprit A (heavy long-lived CLI/subprocess churn under bad/failure paths).
+
+Secondary:
+- Culprit B contributes moderate load.
+- Culprit C contributes wakeups/noise but is not a major CPU/energy driver.
+
+Human-level answer:
+A tiny toolbar app should never keep heavyweight background subprocess/UI loops alive in failure conditions. That behavior is what creates the abnormal battery/CPU footprint.
+
+## Limitations
+
+- These were controlled simulations, not a full end-user UI replay of `CodexBar.app` with all real auth/cookie/account paths.
+- `powermetrics` could not be used in this session due sudo restriction.
+
+## Recommended Next Validation (Before Closing Issue)
+
+- Run one short real-app before/after validation after fixes:
+  - baseline
+  - culprit A-focused repro
+  - optional combined
+- Capture `powermetrics` if sudo is available, plus process CPU snapshots.
+- Publish before/after table in issue #139.