lis186 · lis186 · Jun 9, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/docs/wire-protocol-reference.md b/docs/wire-protocol-reference.md
@@ -27,6 +27,7 @@
 
 | Date | Agent | Version | Change |
 |------|-------|---------|--------|
+| 2026-06-09 | Claude Code | 2.1.x | Confirmed via loopback wire capture: `anthropic-beta` carries `context-1m-2025-08-07` on **every** request when the account's 1M context is enabled — including haiku title-gen turns (it is a client/account-level capability flag, not a per-turn window declaration). ccxray now uses it as the non-lagging 1M-window signal, gated by model capability (`SUPPORTS_1M`), replacing sole reliance on the lagging system-prompt `[1m]` marker (#58). |
 | 2026-06-05 | ccxray | 1.11.x | Usage normalization: OpenAI `input_tokens` includes `cached_tokens` (subset), unlike Anthropic's disjoint fields. `normalizeUsageForProvider` now subtracts the overlap so canonical `input_tokens + cache_read + cache_creation = total context` holds for both providers. Normalized entries carry `_ccxrayUsageNormalized: true`. Historical entries normalized on restore (in-memory, index unchanged). Cache display: Codex sessions show `cache N% hit` instead of TTL countdown; topbar adapts per provider (`ephemeral-ttl` vs `server-managed`). `UPSTREAM_PROFILES` registry added to `providers.js`. |
 | 2026-06-04 | ccxray | 1.10.x | Fix: WS `stopReason` now extracts `response.status` from terminal events (`completed`/`incomplete`/`failed`/`cancelled`) instead of WS close reason. WS `title` extracts user input summary via `getOpenAIInputSummary` instead of hardcoded string. Non-terminal statuses (`in_progress`/`queued`) are ignored to prevent masking close/error reasons. |
 | 2026-06-02 | ccxray | 1.10.0 | Doc audit: 13 major + 25 minor corrections applied (F1–F38) |
@@ -65,6 +66,8 @@
 | ChatGPT base path | N/A | `/backend-api/codex/...` (the proxy strips the `/v1` prefix before prepending the base path, so `POST /v1/responses` → `/backend-api/codex/responses`) | `obs-stable` codex ≥0.131 |
 | Version header | `anthropic-version: 2023-06-01` | N/A | `contractual` |
 | Beta features | `anthropic-beta: ...` (comma-separated) | `openai-beta: ...` | `contractual` |
+| 1M context window signal | `anthropic-beta` list contains `context-1m-2025-08-07` (present on every request when 1M enabled — a client-level flag, also on haiku turns; does **not** lag a mid-session model switch, unlike the system-prompt `[1m]` marker) | N/A | `obs-stable` Claude Code ≥2.1.x |
+| ~~Rate-limit ≠ context window~~ | `anthropic-ratelimit-tokens-limit` (e.g. `80000`) is a per-window quota, **not** the context window — never use it to size the denominator | N/A | `obs-stable` |
 
 ---
 

diff --git a/server/config.js b/server/config.js
@@ -224,6 +224,14 @@ const MODEL_CONTEXT_FALLBACK = {
 };
 const DEFAULT_CONTEXT = 200_000;
 
+// Models that can actually be served with a 1M context window. The 1M signal
+// (anthropic-beta context-1m header, or the system "[1m]" marker) is a
+// client/account-level capability flag — it rides on EVERY Claude Code request,
+// including haiku title-gen turns. Gate the 1M jump on the model itself so a
+// haiku request carrying the beta header is not shown as a 1M window. New 1M
+// families get one line here, not a logic change.
+const SUPPORTS_1M = /^claude-(opus|sonnet)-4/;
+
 // Extract effective model ID from system prompt (includes [1m] suffix if present).
 // API request model field never includes [1m], but system prompt does:
 //   "The exact model ID is claude-opus-4-6[1m]."
@@ -237,14 +245,27 @@ function extractModelFromSystem(system) {
   return null;
 }
 
-function getMaxContext(model, system) {
-  // Prefer model ID from system prompt (has [1m] suffix when applicable)
-  const effective = extractModelFromSystem(system) || model;
-  if (!effective) return DEFAULT_CONTEXT;
-  // 1) Explicit suffix: "claude-opus-4-6[1m]" → 1M
-  if (/\[1m\]/i.test(effective)) return 1_000_000;
-  // 2) Known Claude Code defaults (200K standard plan)
-  const stripped = effective.replace(/\[.*\]/, '');
+function getMaxContext(model, system, opts = {}) {
+  // Model IDENTITY comes from the request `model` field — it updates immediately
+  // on a mid-session model switch. The system marker is only a fallback for
+  // identity, because Claude Code's "The exact model ID is ..." line lags several
+  // turns behind the switch and would otherwise corrupt the window denominator
+  // (issue #58). The system marker is still the place the "[1m]" suffix appears.
+  const sysModel = extractModelFromSystem(system);
+  const identity = model || sysModel;
+  if (!identity) return DEFAULT_CONTEXT;
+  const stripped = identity.replace(/\[.*\]/, '');
+  // 1) 1M plan active? Two non-mutually-exclusive signals:
+  //    - opts.beta1m: anthropic-beta `context-1m-*` request header (non-lagging,
+  //      present on every turn — the authoritative plan flag).
+  //    - "[1m]" suffix in the system marker (legacy; lags after a model switch).
+  //    Either signal counts, but only for a 1M-capable model (SUPPORTS_1M) so a
+  //    client-level flag riding on a haiku request does not over-claim 1M.
+  const has1mSignal = opts.beta1m === true
+    || /\[1m\]/i.test(sysModel || '')
+    || /\[1m\]/i.test(model || '');
+  if (has1mSignal && SUPPORTS_1M.test(stripped)) return 1_000_000;
+  // 2) Known Claude Code / Codex defaults (200K / 400K)
   const keys = Object.keys(MODEL_CONTEXT_FALLBACK).sort((a, b) => b.length - a.length);
   for (const key of keys) {
     if (stripped.startsWith(key)) return MODEL_CONTEXT_FALLBACK[key];
@@ -263,8 +284,8 @@ function getMaxContext(model, system) {
 // base, bump Claude models up to 1M so the dashboard "X / Y (Z%)" stays
 // self-consistent. Non-Claude models are not bumped because we have no
 // reliable next tier to escalate to.
-function inferMaxContext(model, system, usage) {
-  const base = getMaxContext(model, system);
+function inferMaxContext(model, system, usage, opts = {}) {
+  const base = getMaxContext(model, system, opts);
   if (!usage) return base;
   const used = (usage.input_tokens || 0)
     + (usage.cache_creation_input_tokens || 0)

diff --git a/server/forward.js b/server/forward.js
@@ -589,7 +589,7 @@ function handleSSEResponse(ctx, proxyRes, clientRes) {
         console.log(`\x1b[90m   Context HUD: injecting into session ${reqSessionId.slice(0, 8)}\x1b[0m`);
         _hudLoggedSessions.add(reqSessionId);
       }
-      const maxCtx = config.inferMaxContext(parsedBody?.model, parsedBody?.system, usage);
+      const maxCtx = config.inferMaxContext(parsedBody?.model, parsedBody?.system, usage, { beta1m: ctx.beta1m });
       const pct = (totalCtx / maxCtx * 100).toFixed(1);
       const newIdx = maxBlockIndex + 1;
       const costInfo = calculateCost(usage, parsedBody?.model);
@@ -666,7 +666,7 @@ function handleSSEResponse(ctx, proxyRes, clientRes) {
         proxyRes, sessionId, sessionInferred: ctx.sessionInferred,
         sysHash: ctx.sysHash, toolsHash: ctx.toolsHash, coreHash: ctx.coreHash,
         cwd: store.sessionMeta[sessionId]?.cwd || null,
-        stopReason, title, thinkingDuration, thinkingStripped,
+        stopReason, title, thinkingDuration, thinkingStripped, beta1m: ctx.beta1m,
         isSubagent, toolFail: helpers.hasToolFail(parsedBody), startTime,
       }),
     };
@@ -695,7 +695,7 @@ function handleSSEResponse(ctx, proxyRes, clientRes) {
     const outTok = usage?.output_tokens ? `  out=${usage.output_tokens.toLocaleString()} tok` : '';
     const prefix = ctx.attribPrefix || '';
     console.log(`${color}📥 [${helpers.taipeiTime()}]  ${prefix}  ${glyph} ${code}  ${elapsed}s${outTok}\x1b[0m`);
-    if (usage) helpers.printContextBar(usage, parsedBody?.model, parsedBody?.system);
+    if (usage) helpers.printContextBar(usage, parsedBody?.model, parsedBody?.system, ctx.beta1m);
     if (entry.cost?.cost != null) {
       store.sessionCosts.set(sessionId, (store.sessionCosts.get(sessionId) || 0) + entry.cost.cost);
       console.log(`  💰 $${entry.cost.cost.toFixed(4)} this turn | $${store.sessionCosts.get(sessionId).toFixed(4)} session`);
@@ -886,7 +886,7 @@ function handleNonSSEResponse(ctx, proxyRes, clientRes) {
           sessionId, sessionInferred: ctx.sessionInferred,
           sysHash: ctx.sysHash, toolsHash: ctx.toolsHash, coreHash: ctx.coreHash,
           cwd: store.sessionMeta[sessionId]?.cwd || null,
-          stopReason, title, thinkingDuration: null, thinkingStripped,
+          stopReason, title, thinkingDuration: null, thinkingStripped, beta1m: ctx.beta1m,
           isSubagent, toolFail: helpers.hasToolFail(parsedBody), startTime,
         }),
       };

diff --git a/server/helpers.js b/server/helpers.js
@@ -369,14 +369,16 @@ function totalContextTokens(usage) {
     + (usage.cache_read_input_tokens || 0);
 }
 
-function printContextBar(usage, model, system) {
+function printContextBar(usage, model, system, beta1m) {
   const { inferMaxContext } = require('./config');
   if (!usage) return;
   // Use inferMaxContext (not getMaxContext) so the terminal HUD bumps
   // claude-opus-* / claude-sonnet-* to 1M when observed usage exceeds 200K
   // but the [1m] marker isn't present in the system prompt — otherwise the
-  // bar clamps to "100% (X / 200,000)" while X overflows the max.
-  const maxCtx = inferMaxContext(model, system, usage);
+  // bar clamps to "100% (X / 200,000)" while X overflows the max. beta1m
+  // (anthropic-beta context-1m header) gives 1M immediately, before usage
+  // crosses 200K and without waiting for the lagging [1m] marker (#58).
+  const maxCtx = inferMaxContext(model, system, usage, { beta1m });
   const used = totalContextTokens(usage);
   if (!used) return;
   const pct = Math.min(100, (used / maxCtx) * 100);

diff --git a/server/index.js b/server/index.js
@@ -376,9 +376,17 @@ const server = http.createServer((clientReq, clientRes) => {
     // Build context for forwarding
     const fwdHeaders = buildForwardHeaders(clientReq.headers, upstream);
 
+    // #58: the 1M context window is enabled via the `anthropic-beta:
+    // context-1m-*` request header. Unlike the system-prompt "[1m]" marker, this
+    // header rides every turn (it does not lag a mid-session model switch), so it
+    // is the authoritative, non-lagging signal for the context-window denominator.
+    // Carried on ctx and fed into inferMaxContext downstream.
+    const beta1m = /(^|,)\s*context-1m-/.test(clientReq.headers['anthropic-beta'] || '');
+
     const ctx = {
       id, ts, startTime, parsedBody, rawBody, clientReq, clientRes, fwdHeaders,
       reqSessionId, reqWritePromise, sysHash, toolsHash, coreHash, sessionInferred, upstream,
+      beta1m,
       isSubagent: provider === 'openai' ? isOpenAISubagent(clientReq.headers, parsedBody) : undefined,
     };
 

diff --git a/server/wire-parsers/anthropic.js b/server/wire-parsers/anthropic.js
@@ -58,7 +58,7 @@ function buildEntryFields(ctx) {
     cwd: ctx.cwd ?? null,
     usage,
     cost: calculateCost(usage, model),
-    maxContext: config.inferMaxContext(model, parsedBody?.system, usage),
+    maxContext: config.inferMaxContext(model, parsedBody?.system, usage, { beta1m: ctx.beta1m }),
     responseMetadata: undefined,
     stopReason: ctx.stopReason || '',
     title: ctx.title || null,

diff --git a/test/config.test.js b/test/config.test.js
@@ -649,3 +649,59 @@ describe('inferMaxContext', () => {
     assert.equal(inferMaxContext('claude-opus-4-7', null, usage), 1_000_000);
   });
 });
+
+// ── #58: anthropic-beta context-1m header as a non-lagging 1M signal ─────────
+// Empirically confirmed (2026-06-09): Claude Code sends
+//   anthropic-beta: ...,context-1m-2025-08-07,...
+// on EVERY request when the account has the 1M context beta enabled. Unlike the
+// system-prompt "[1m]" marker (which lags several turns after a mid-session
+// model switch), this header is present on every turn, so it fixes the lag-window
+// flicker / ctx% cliff at the source. Because the header is a client-level
+// capability flag (also present on haiku title-gen requests), it must be GATED by
+// model 1M-capability so it does not over-claim a 1M window for haiku.
+describe('getMaxContext / inferMaxContext — context-1m beta header (#58)', () => {
+  const { getMaxContext, inferMaxContext } = require('../server/config');
+  const staleMarker = [{ type: 'text', text: 'The exact model ID is claude-opus-4-6.' }];
+
+  it('FIX: stale system marker no longer corrupts identity — beta header wins (fail-on-old)', () => {
+    // The reported bug: request model already switched to opus-4-8 (1M plan), but
+    // the system marker still lags on opus-4-6 with no [1m]. Old code trusted the
+    // stale marker for identity AND had no beta signal → 200K → ctx% ~99%.
+    const usage = { input_tokens: 196_602 }; // turn 35 from the issue, < 200K
+    assert.equal(inferMaxContext('claude-opus-4-8', staleMarker, usage, { beta1m: true }), 1_000_000);
+  });
+
+  it('FIX: beta header gives 1M even with no [1m] marker and small usage (fail-on-old)', () => {
+    // Lag window, early turn: no [1m] anywhere, usage below 200K. The header is the
+    // only non-lagging signal — without it the turn shows 200K until usage crosses.
+    assert.equal(getMaxContext('claude-opus-4-8', null, { beta1m: true }), 1_000_000);
+    assert.equal(inferMaxContext('claude-opus-4-8', null, { input_tokens: 50_000 }, { beta1m: true }), 1_000_000);
+    assert.equal(inferMaxContext('claude-sonnet-4-6', null, { input_tokens: 50_000 }, { beta1m: true }), 1_000_000);
+  });
+
+  it('GUARD: beta header does NOT over-claim 1M for non-1M-capable models', () => {
+    // haiku title-gen requests carry the same client-level beta header but haiku
+    // does not support 1M — must stay at base when usage is small.
+    assert.equal(getMaxContext('claude-haiku-4-5', null, { beta1m: true }), 200_000);
+    assert.equal(inferMaxContext('claude-haiku-4-5', null, { input_tokens: 50_000 }, { beta1m: true }), 200_000);
+    // claude-3 families are not 1M-capable either.
+    assert.equal(getMaxContext('claude-3-opus', null, { beta1m: true }), 200_000);
+  });
+
+  it('GUARD: no signal at all stays at base (no false 1M)', () => {
+    assert.equal(getMaxContext('claude-opus-4-8', null), 200_000);
+    assert.equal(getMaxContext('claude-opus-4-8', null, {}), 200_000);
+    assert.equal(inferMaxContext('claude-opus-4-8', null, { input_tokens: 50_000 }), 200_000);
+  });
+
+  it('REGRESSION: existing [1m] marker path and usage hatch still work without opts', () => {
+    const marker1m = [{ type: 'text', text: 'The exact model ID is claude-opus-4-8[1m].' }];
+    assert.equal(getMaxContext('claude-opus-4-8', marker1m), 1_000_000);
+    assert.equal(inferMaxContext('claude-haiku-4-5', null, { input_tokens: 260_000 }), 1_000_000); // usage hatch
+    assert.equal(getMaxContext('gpt-5.1-codex', null), 400_000);
+  });
+
+  it('GUARD: beta header is ignored for OpenAI models (no 1M concept there)', () => {
+    assert.equal(getMaxContext('gpt-5.1-codex', null, { beta1m: true }), 400_000);
+  });
+});
diff --git a/test/dashboard-codex-e2e.test.js b/test/dashboard-codex-e2e.test.js
@@ -14,6 +14,17 @@ const PROJECT_CWD = path.resolve(__dirname, '..');
 const PROJECT_NAME = path.basename(PROJECT_CWD);
 const tmpDirs = [];
 
+// Mirror of the dashboard's project-label truncation (public/miller-columns.js
+// truncateMiddle). The project label is the cwd basename, which is long when the
+// suite runs from a git worktree (e.g. ".claude/worktrees/<branch>"), so compare
+// against the same truncation the UI applies rather than the raw name.
+function truncateMiddle(s, max) {
+  if (s.length <= max) return s;
+  const tail = Math.ceil(max * 0.6);
+  const head = max - tail - 1;
+  return s.slice(0, head) + '…' + s.slice(-tail);
+}
+
 function makeOpenAISSE() {
   return [
     'event: response.created',
@@ -162,7 +173,7 @@ describe('Codex dashboard status E2E', () => {
         };
       });
 
-      assert.equal(state.projectText, PROJECT_NAME);
+      assert.equal(state.projectText, truncateMiddle(PROJECT_NAME, 20));
       assert.equal(state.sessionText, 'Codex Raw');
       assert.match(state.url, /s=codex-raw/);
       assert.equal(state.hasOkDot, true);