From 72d57652f6d2afe1a17d3e33cbcd5727731534d5 Mon Sep 17 00:00:00 2001
From: Justin Lee <lis186@gmail.com>
Date: Tue, 9 Jun 2026 00:44:19 +0800
Subject: [PATCH 1/2] fix(#58): use anthropic-beta context-1m header as
 non-lagging 1M window signal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

getMaxContext took model identity from the (lagging) system-prompt marker, so
after a mid-session model switch the context-window denominator resolved to a
stale model for several turns — ctx% showed ~99% then cliff-dropped to ~20%.

- identity now comes from the request `model` field (updates immediately)
- 1M detected from anthropic-beta `context-1m-2025-08-07` (empirically confirmed
  present on every turn) OR the legacy [1m] marker, gated by SUPPORTS_1M so the
  client-level beta flag riding haiku title-gen turns can't over-claim 1M
- usage escape-hatch retained as monotonic lower bound
- beta1m threaded from request header through index→forward→wire-parser + HUD
- docs/wire-protocol-reference.md: record context-1m-2025-08-07 + rate-limit≠window

Antifragile: any single signal can vanish (beta token renamed, marker never
arrives, model absent from table) and detection degrades to the next source,
never below the usage lower bound. New 1M families = one line in SUPPORTS_1M.

Red→green: test/config.test.js adds fail-on-old/pass-on-new cases keyed on the
issue's turn-35 data (stale marker + opus-4-8 + beta1m must return 1M).
---
 docs/wire-protocol-reference.md  |  3 ++
 server/config.js                 | 41 +++++++++++++++++------
 server/forward.js                |  8 ++---
 server/helpers.js                |  8 +++--
 server/index.js                  |  8 +++++
 server/wire-parsers/anthropic.js |  2 +-
 test/config.test.js              | 56 ++++++++++++++++++++++++++++++++
 7 files changed, 108 insertions(+), 18 deletions(-)

diff --git a/docs/wire-protocol-reference.md b/docs/wire-protocol-reference.md
index c41e5f8..6c4e9e4 100644
--- a/docs/wire-protocol-reference.md
+++ b/docs/wire-protocol-reference.md
@@ -27,6 +27,7 @@
 
 | Date | Agent | Version | Change |
 |------|-------|---------|--------|
+| 2026-06-09 | Claude Code | 2.1.x | Confirmed via loopback wire capture: `anthropic-beta` carries `context-1m-2025-08-07` on **every** request when the account's 1M context is enabled — including haiku title-gen turns (it is a client/account-level capability flag, not a per-turn window declaration). ccxray now uses it as the non-lagging 1M-window signal, gated by model capability (`SUPPORTS_1M`), replacing sole reliance on the lagging system-prompt `[1m]` marker (#58). |
 | 2026-06-05 | ccxray | 1.11.x | Usage normalization: OpenAI `input_tokens` includes `cached_tokens` (subset), unlike Anthropic's disjoint fields. `normalizeUsageForProvider` now subtracts the overlap so canonical `input_tokens + cache_read + cache_creation = total context` holds for both providers. Normalized entries carry `_ccxrayUsageNormalized: true`. Historical entries normalized on restore (in-memory, index unchanged). Cache display: Codex sessions show `cache N% hit` instead of TTL countdown; topbar adapts per provider (`ephemeral-ttl` vs `server-managed`). `UPSTREAM_PROFILES` registry added to `providers.js`. |
 | 2026-06-04 | ccxray | 1.10.x | Fix: WS `stopReason` now extracts `response.status` from terminal events (`completed`/`incomplete`/`failed`/`cancelled`) instead of WS close reason. WS `title` extracts user input summary via `getOpenAIInputSummary` instead of hardcoded string. Non-terminal statuses (`in_progress`/`queued`) are ignored to prevent masking close/error reasons. |
 | 2026-06-02 | ccxray | 1.10.0 | Doc audit: 13 major + 25 minor corrections applied (F1–F38) |
@@ -65,6 +66,8 @@
 | ChatGPT base path | N/A | `/backend-api/codex/...` (the proxy strips the `/v1` prefix before prepending the base path, so `POST /v1/responses` → `/backend-api/codex/responses`) | `obs-stable` codex ≥0.131 |
 | Version header | `anthropic-version: 2023-06-01` | N/A | `contractual` |
 | Beta features | `anthropic-beta: ...` (comma-separated) | `openai-beta: ...` | `contractual` |
+| 1M context window signal | `anthropic-beta` list contains `context-1m-2025-08-07` (present on every request when 1M enabled — a client-level flag, also on haiku turns; does **not** lag a mid-session model switch, unlike the system-prompt `[1m]` marker) | N/A | `obs-stable` Claude Code ≥2.1.x |
+| ~~Rate-limit ≠ context window~~ | `anthropic-ratelimit-tokens-limit` (e.g. `80000`) is a per-window quota, **not** the context window — never use it to size the denominator | N/A | `obs-stable` |
 
 ---
 
diff --git a/server/config.js b/server/config.js
index 0e232c0..4337843 100644
--- a/server/config.js
+++ b/server/config.js
@@ -224,6 +224,14 @@ const MODEL_CONTEXT_FALLBACK = {
 };
 const DEFAULT_CONTEXT = 200_000;
 
+// Models that can actually be served with a 1M context window. The 1M signal
+// (anthropic-beta context-1m header, or the system "[1m]" marker) is a
+// client/account-level capability flag — it rides on EVERY Claude Code request,
+// including haiku title-gen turns. Gate the 1M jump on the model itself so a
+// haiku request carrying the beta header is not shown as a 1M window. New 1M
+// families get one line here, not a logic change.
+const SUPPORTS_1M = /^claude-(opus|sonnet)-4/;
+
 // Extract effective model ID from system prompt (includes [1m] suffix if present).
 // API request model field never includes [1m], but system prompt does:
 //   "The exact model ID is claude-opus-4-6[1m]."
@@ -237,14 +245,27 @@ function extractModelFromSystem(system) {
   return null;
 }
 
-function getMaxContext(model, system) {
-  // Prefer model ID from system prompt (has [1m] suffix when applicable)
-  const effective = extractModelFromSystem(system) || model;
-  if (!effective) return DEFAULT_CONTEXT;
-  // 1) Explicit suffix: "claude-opus-4-6[1m]" → 1M
-  if (/\[1m\]/i.test(effective)) return 1_000_000;
-  // 2) Known Claude Code defaults (200K standard plan)
-  const stripped = effective.replace(/\[.*\]/, '');
+function getMaxContext(model, system, opts = {}) {
+  // Model IDENTITY comes from the request `model` field — it updates immediately
+  // on a mid-session model switch. The system marker is only a fallback for
+  // identity, because Claude Code's "The exact model ID is ..." line lags several
+  // turns behind the switch and would otherwise corrupt the window denominator
+  // (issue #58). The system marker is still the place the "[1m]" suffix appears.
+  const sysModel = extractModelFromSystem(system);
+  const identity = model || sysModel;
+  if (!identity) return DEFAULT_CONTEXT;
+  const stripped = identity.replace(/\[.*\]/, '');
+  // 1) 1M plan active? Two non-mutually-exclusive signals:
+  //    - opts.beta1m: anthropic-beta `context-1m-*` request header (non-lagging,
+  //      present on every turn — the authoritative plan flag).
+  //    - "[1m]" suffix in the system marker (legacy; lags after a model switch).
+  //    Either signal counts, but only for a 1M-capable model (SUPPORTS_1M) so a
+  //    client-level flag riding on a haiku request does not over-claim 1M.
+  const has1mSignal = opts.beta1m === true
+    || /\[1m\]/i.test(sysModel || '')
+    || /\[1m\]/i.test(model || '');
+  if (has1mSignal && SUPPORTS_1M.test(stripped)) return 1_000_000;
+  // 2) Known Claude Code / Codex defaults (200K / 400K)
   const keys = Object.keys(MODEL_CONTEXT_FALLBACK).sort((a, b) => b.length - a.length);
   for (const key of keys) {
     if (stripped.startsWith(key)) return MODEL_CONTEXT_FALLBACK[key];
@@ -263,8 +284,8 @@ function getMaxContext(model, system) {
 // base, bump Claude models up to 1M so the dashboard "X / Y (Z%)" stays
 // self-consistent. Non-Claude models are not bumped because we have no
 // reliable next tier to escalate to.
-function inferMaxContext(model, system, usage) {
-  const base = getMaxContext(model, system);
+function inferMaxContext(model, system, usage, opts = {}) {
+  const base = getMaxContext(model, system, opts);
   if (!usage) return base;
   const used = (usage.input_tokens || 0)
     + (usage.cache_creation_input_tokens || 0)
diff --git a/server/forward.js b/server/forward.js
index 29a0562..6e4b6f8 100644
--- a/server/forward.js
+++ b/server/forward.js
@@ -589,7 +589,7 @@ function handleSSEResponse(ctx, proxyRes, clientRes) {
         console.log(`\x1b[90m   Context HUD: injecting into session ${reqSessionId.slice(0, 8)}\x1b[0m`);
         _hudLoggedSessions.add(reqSessionId);
       }
-      const maxCtx = config.inferMaxContext(parsedBody?.model, parsedBody?.system, usage);
+      const maxCtx = config.inferMaxContext(parsedBody?.model, parsedBody?.system, usage, { beta1m: ctx.beta1m });
       const pct = (totalCtx / maxCtx * 100).toFixed(1);
       const newIdx = maxBlockIndex + 1;
       const costInfo = calculateCost(usage, parsedBody?.model);
@@ -666,7 +666,7 @@ function handleSSEResponse(ctx, proxyRes, clientRes) {
         proxyRes, sessionId, sessionInferred: ctx.sessionInferred,
         sysHash: ctx.sysHash, toolsHash: ctx.toolsHash, coreHash: ctx.coreHash,
         cwd: store.sessionMeta[sessionId]?.cwd || null,
-        stopReason, title, thinkingDuration, thinkingStripped,
+        stopReason, title, thinkingDuration, thinkingStripped, beta1m: ctx.beta1m,
         isSubagent, toolFail: helpers.hasToolFail(parsedBody), startTime,
       }),
     };
@@ -695,7 +695,7 @@ function handleSSEResponse(ctx, proxyRes, clientRes) {
     const outTok = usage?.output_tokens ? `  out=${usage.output_tokens.toLocaleString()} tok` : '';
     const prefix = ctx.attribPrefix || '';
     console.log(`${color}📥 [${helpers.taipeiTime()}]  ${prefix}  ${glyph} ${code}  ${elapsed}s${outTok}\x1b[0m`);
-    if (usage) helpers.printContextBar(usage, parsedBody?.model, parsedBody?.system);
+    if (usage) helpers.printContextBar(usage, parsedBody?.model, parsedBody?.system, ctx.beta1m);
     if (entry.cost?.cost != null) {
       store.sessionCosts.set(sessionId, (store.sessionCosts.get(sessionId) || 0) + entry.cost.cost);
       console.log(`  💰 $${entry.cost.cost.toFixed(4)} this turn | $${store.sessionCosts.get(sessionId).toFixed(4)} session`);
@@ -886,7 +886,7 @@ function handleNonSSEResponse(ctx, proxyRes, clientRes) {
           sessionId, sessionInferred: ctx.sessionInferred,
           sysHash: ctx.sysHash, toolsHash: ctx.toolsHash, coreHash: ctx.coreHash,
           cwd: store.sessionMeta[sessionId]?.cwd || null,
-          stopReason, title, thinkingDuration: null, thinkingStripped,
+          stopReason, title, thinkingDuration: null, thinkingStripped, beta1m: ctx.beta1m,
           isSubagent, toolFail: helpers.hasToolFail(parsedBody), startTime,
         }),
       };
diff --git a/server/helpers.js b/server/helpers.js
index 7d5fde1..4bc6ebe 100644
--- a/server/helpers.js
+++ b/server/helpers.js
@@ -369,14 +369,16 @@ function totalContextTokens(usage) {
     + (usage.cache_read_input_tokens || 0);
 }
 
-function printContextBar(usage, model, system) {
+function printContextBar(usage, model, system, beta1m) {
   const { inferMaxContext } = require('./config');
   if (!usage) return;
   // Use inferMaxContext (not getMaxContext) so the terminal HUD bumps
   // claude-opus-* / claude-sonnet-* to 1M when observed usage exceeds 200K
   // but the [1m] marker isn't present in the system prompt — otherwise the
-  // bar clamps to "100% (X / 200,000)" while X overflows the max.
-  const maxCtx = inferMaxContext(model, system, usage);
+  // bar clamps to "100% (X / 200,000)" while X overflows the max. beta1m
+  // (anthropic-beta context-1m header) gives 1M immediately, before usage
+  // crosses 200K and without waiting for the lagging [1m] marker (#58).
+  const maxCtx = inferMaxContext(model, system, usage, { beta1m });
   const used = totalContextTokens(usage);
   if (!used) return;
   const pct = Math.min(100, (used / maxCtx) * 100);
diff --git a/server/index.js b/server/index.js
index fb95415..a600af8 100755
--- a/server/index.js
+++ b/server/index.js
@@ -376,9 +376,17 @@ const server = http.createServer((clientReq, clientRes) => {
     // Build context for forwarding
     const fwdHeaders = buildForwardHeaders(clientReq.headers, upstream);
 
+    // #58: the 1M context window is enabled via the `anthropic-beta:
+    // context-1m-*` request header. Unlike the system-prompt "[1m]" marker, this
+    // header rides every turn (it does not lag a mid-session model switch), so it
+    // is the authoritative, non-lagging signal for the context-window denominator.
+    // Carried on ctx and fed into inferMaxContext downstream.
+    const beta1m = /(^|,)\s*context-1m-/.test(clientReq.headers['anthropic-beta'] || '');
+
     const ctx = {
       id, ts, startTime, parsedBody, rawBody, clientReq, clientRes, fwdHeaders,
       reqSessionId, reqWritePromise, sysHash, toolsHash, coreHash, sessionInferred, upstream,
+      beta1m,
       isSubagent: provider === 'openai' ? isOpenAISubagent(clientReq.headers, parsedBody) : undefined,
     };
 
diff --git a/server/wire-parsers/anthropic.js b/server/wire-parsers/anthropic.js
index 70f874c..5a9b7cd 100644
--- a/server/wire-parsers/anthropic.js
+++ b/server/wire-parsers/anthropic.js
@@ -58,7 +58,7 @@ function buildEntryFields(ctx) {
     cwd: ctx.cwd ?? null,
     usage,
     cost: calculateCost(usage, model),
-    maxContext: config.inferMaxContext(model, parsedBody?.system, usage),
+    maxContext: config.inferMaxContext(model, parsedBody?.system, usage, { beta1m: ctx.beta1m }),
     responseMetadata: undefined,
     stopReason: ctx.stopReason || '',
     title: ctx.title || null,
diff --git a/test/config.test.js b/test/config.test.js
index f374d91..96bb91e 100644
--- a/test/config.test.js
+++ b/test/config.test.js
@@ -649,3 +649,59 @@ describe('inferMaxContext', () => {
     assert.equal(inferMaxContext('claude-opus-4-7', null, usage), 1_000_000);
   });
 });
+
+// ── #58: anthropic-beta context-1m header as a non-lagging 1M signal ─────────
+// Empirically confirmed (2026-06-09): Claude Code sends
+//   anthropic-beta: ...,context-1m-2025-08-07,...
+// on EVERY request when the account has the 1M context beta enabled. Unlike the
+// system-prompt "[1m]" marker (which lags several turns after a mid-session
+// model switch), this header is present on every turn, so it fixes the lag-window
+// flicker / ctx% cliff at the source. Because the header is a client-level
+// capability flag (also present on haiku title-gen requests), it must be GATED by
+// model 1M-capability so it does not over-claim a 1M window for haiku.
+describe('getMaxContext / inferMaxContext — context-1m beta header (#58)', () => {
+  const { getMaxContext, inferMaxContext } = require('../server/config');
+  const staleMarker = [{ type: 'text', text: 'The exact model ID is claude-opus-4-6.' }];
+
+  it('FIX: stale system marker no longer corrupts identity — beta header wins (fail-on-old)', () => {
+    // The reported bug: request model already switched to opus-4-8 (1M plan), but
+    // the system marker still lags on opus-4-6 with no [1m]. Old code trusted the
+    // stale marker for identity AND had no beta signal → 200K → ctx% ~99%.
+    const usage = { input_tokens: 196_602 }; // turn 35 from the issue, < 200K
+    assert.equal(inferMaxContext('claude-opus-4-8', staleMarker, usage, { beta1m: true }), 1_000_000);
+  });
+
+  it('FIX: beta header gives 1M even with no [1m] marker and small usage (fail-on-old)', () => {
+    // Lag window, early turn: no [1m] anywhere, usage below 200K. The header is the
+    // only non-lagging signal — without it the turn shows 200K until usage crosses.
+    assert.equal(getMaxContext('claude-opus-4-8', null, { beta1m: true }), 1_000_000);
+    assert.equal(inferMaxContext('claude-opus-4-8', null, { input_tokens: 50_000 }, { beta1m: true }), 1_000_000);
+    assert.equal(inferMaxContext('claude-sonnet-4-6', null, { input_tokens: 50_000 }, { beta1m: true }), 1_000_000);
+  });
+
+  it('GUARD: beta header does NOT over-claim 1M for non-1M-capable models', () => {
+    // haiku title-gen requests carry the same client-level beta header but haiku
+    // does not support 1M — must stay at base when usage is small.
+    assert.equal(getMaxContext('claude-haiku-4-5', null, { beta1m: true }), 200_000);
+    assert.equal(inferMaxContext('claude-haiku-4-5', null, { input_tokens: 50_000 }, { beta1m: true }), 200_000);
+    // claude-3 families are not 1M-capable either.
+    assert.equal(getMaxContext('claude-3-opus', null, { beta1m: true }), 200_000);
+  });
+
+  it('GUARD: no signal at all stays at base (no false 1M)', () => {
+    assert.equal(getMaxContext('claude-opus-4-8', null), 200_000);
+    assert.equal(getMaxContext('claude-opus-4-8', null, {}), 200_000);
+    assert.equal(inferMaxContext('claude-opus-4-8', null, { input_tokens: 50_000 }), 200_000);
+  });
+
+  it('REGRESSION: existing [1m] marker path and usage hatch still work without opts', () => {
+    const marker1m = [{ type: 'text', text: 'The exact model ID is claude-opus-4-8[1m].' }];
+    assert.equal(getMaxContext('claude-opus-4-8', marker1m), 1_000_000);
+    assert.equal(inferMaxContext('claude-haiku-4-5', null, { input_tokens: 260_000 }), 1_000_000); // usage hatch
+    assert.equal(getMaxContext('gpt-5.1-codex', null), 400_000);
+  });
+
+  it('GUARD: beta header is ignored for OpenAI models (no 1M concept there)', () => {
+    assert.equal(getMaxContext('gpt-5.1-codex', null, { beta1m: true }), 400_000);
+  });
+});

From 775a48eca1bc8b53daefedef65b966d8a554951a Mon Sep 17 00:00:00 2001
From: Justin Lee <lis186@gmail.com>
Date: Tue, 9 Jun 2026 01:50:22 +0800
Subject: [PATCH 2/2] test: make dashboard-codex-e2e project-label assertion
 truncation-aware

The test compared the selected project label against the raw cwd basename, but
the dashboard renders it through truncateMiddle(name, 20). When the suite runs
from a git worktree (long '.claude/worktrees/<branch>' basename) the label is
truncated and the assertion failed spuriously. Mirror the UI truncation so the
test is independent of the checkout path length.
---
 test/dashboard-codex-e2e.test.js | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/test/dashboard-codex-e2e.test.js b/test/dashboard-codex-e2e.test.js
index 0d71491..c12ec89 100644
--- a/test/dashboard-codex-e2e.test.js
+++ b/test/dashboard-codex-e2e.test.js
@@ -14,6 +14,17 @@ const PROJECT_CWD = path.resolve(__dirname, '..');
 const PROJECT_NAME = path.basename(PROJECT_CWD);
 const tmpDirs = [];
 
+// Mirror of the dashboard's project-label truncation (public/miller-columns.js
+// truncateMiddle). The project label is the cwd basename, which is long when the
+// suite runs from a git worktree (e.g. ".claude/worktrees/<branch>"), so compare
+// against the same truncation the UI applies rather than the raw name.
+function truncateMiddle(s, max) {
+  if (s.length <= max) return s;
+  const tail = Math.ceil(max * 0.6);
+  const head = max - tail - 1;
+  return s.slice(0, head) + '…' + s.slice(-tail);
+}
+
 function makeOpenAISSE() {
   return [
     'event: response.created',
@@ -162,7 +173,7 @@ describe('Codex dashboard status E2E', () => {
         };
       });
 
-      assert.equal(state.projectText, PROJECT_NAME);
+      assert.equal(state.projectText, truncateMiddle(PROJECT_NAME, 20));
       assert.equal(state.sessionText, 'Codex Raw');
       assert.match(state.url, /s=codex-raw/);
       assert.equal(state.hasOkDot, true);