From 25468df523e1d428bdbd92238bc98b67bb06cc34 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 22 Jun 2026 10:19:36 +0000
Subject: [PATCH] feat(budget): model-aware startup context budget for profiles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a model-aware "startup budget" check so a profile's always-on
footprint (skill frontmatter + MCP tool schemas) is warned about when it
exceeds 50% of the model's context window — a ~128K ceiling on a 256K
model, leaving the other half free for the conversation.

- lib/token-budget.ts: pure, tested math — MODEL_CONTEXT_WINDOWS map,
  resolveContextWindow (profile contextWindow/model → CUE_CONTEXT_WINDOW
  /CUE_MODEL env → 256K default), estimateMcpTokens, computeContextBudget,
  formatContextBudgetWarning (silent <80% budget, 🟡 near, 🔴 over).
- profiles: optional `model` / `contextWindow` fields (schema.json +
  _types.ts), resolved leaf-wins through inheritance in profile-loader.
  core declares contextWindow: 256000 as the fan-out baseline.
- cue launch: budget warning in the startup banner (real resolved
  skill + MCP token data).
- cue profile suggest: new "Context budget" section auditing every
  profile; --model / --context / --load-factor / --no-budget flags.
- docs/agent-context-budget.md: documents the budget + flags.
- token-budget.test.ts: 20 new tests.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01TwQNpDsSR466euk2d1McHV
---
 docs/agent-context-budget.md    |  34 ++++++
 profiles/_types.ts              |  14 +++
 profiles/core/profile.yaml      |   9 ++
 profiles/schema.json            |   9 ++
 src/commands/launch.ts          |  18 +++
 src/commands/profile-suggest.ts | 128 +++++++++++++++++++++-
 src/lib/profile-loader.ts       |  11 ++
 src/lib/token-budget.test.ts    | 141 ++++++++++++++++++++++++
 src/lib/token-budget.ts         | 187 ++++++++++++++++++++++++++++++++
 9 files changed, 550 insertions(+), 1 deletion(-)
 create mode 100644 src/lib/token-budget.test.ts
diff --git a/docs/agent-context-budget.md b/docs/agent-context-budget.md
index d881ad98..f02b12cb 100644
--- a/docs/agent-context-budget.md
+++ b/docs/agent-context-budget.md
@@ -32,6 +32,40 @@ Use `setup/lean-cue.md` for the smallest path. It should install cue and pin
 `core` by default. Caveman, RTK, skill-writing, memory, gbrain, and Office MCPs
 are optional add-ons.
 
+## Model-Aware Startup Budget
+
+A profile's *always-on* footprint — skill frontmatter (loaded into the skill
+router every message) plus MCP tool schemas (one set per connected server) — is
+paid before the user types anything. The rule cue enforces: that startup load
+should stay under **50% of the model's context window**, leaving the other half
+as working headroom. For a 256K model that's a **~128K startup ceiling**.
+
+The math lives in `src/lib/token-budget.ts` (pure, unit-tested in
+`token-budget.test.ts`):
+
+- `resolveContextWindow` picks the window. Precedence: profile `contextWindow`
+  → profile `model` (via `MODEL_CONTEXT_WINDOWS`) → env `CUE_CONTEXT_WINDOW`
+  → env `CUE_MODEL` → `DEFAULT_CONTEXT_WINDOW` (256K). cue can't auto-detect the
+  main-session model (per-launch `/model` choice), hence the declared/env source.
+- `estimateMcpTokens` charges `MCP_TOKENS_PER_SERVER` (default 1500, override
+  with `CUE_MCP_TOKENS_PER_SERVER`) per connected MCP.
+- `computeContextBudget` returns the budget, the startup load, and whether it
+  fits. `formatContextBudgetWarning` stays quiet under 80% of budget, prints a
+  🟡 note as it approaches, and a 🔴 over-budget warning past the ceiling.
+
+Surfaces:
+
+- `cue launch` prints the warning in the startup banner (real resolved skill +
+  MCP token data).
+- `cue profile suggest` adds a **Context budget** section auditing every
+  profile; flags `--model <id>`, `--context <tokens>`, `--load-factor <0..1>`,
+  and `--no-budget` to skip it.
+
+Profiles declare their target via the `model` / `contextWindow` fields (both
+optional, leaf-wins through inheritance). `core` sets `contextWindow: 256000` as
+the fan-out baseline; a long-context profile can bump it (e.g.
+`model: claude-opus-4-8[1m]` for a 1M window).
+
 ## Onboarding Source
 
 `src/commands/init.ts` controls first-run global onboarding. Keep the first
diff --git a/profiles/_types.ts b/profiles/_types.ts
index e05c9b08..ca041a82 100644
--- a/profiles/_types.ts
+++ b/profiles/_types.ts
@@ -43,6 +43,20 @@ export interface Profile {
   description: string;
   icon?: string;
   iconImage?: string;
+  /**
+   * Target main-session model id (e.g. "claude-opus-4-8"). Advisory only — cue
+   * can't pin the main session model (that's a per-launch `/model` choice) — but
+   * the model-aware startup budget (lib/token-budget.ts) uses it to resolve the
+   * context window so the over-budget warning knows what window to size against.
+   * Leaf-wins through inheritance. Env `CUE_MODEL` overrides per launch.
+   */
+  model?: string;
+  /**
+   * Explicit context window (tokens) to budget the startup load against, when
+   * the model id alone isn't enough (custom/long-context deployments). Takes
+   * precedence over `model`. Leaf-wins; env `CUE_CONTEXT_WINDOW` overrides.
+   */
+  contextWindow?: number;
   agents?: AgentKind[];
   inherits?: string | string[];
   // Companion profiles surfaced at `cue use` time as suggestions. Activating
diff --git a/profiles/core/profile.yaml b/profiles/core/profile.yaml
index b93ee9af..ea78d9cd 100644
--- a/profiles/core/profile.yaml
+++ b/profiles/core/profile.yaml
@@ -2,6 +2,15 @@ name: core
 icon: "🧠"
 iconImage: "logo.png"
 description: Baseline shared by every cue profile — essentials only
+# Startup context budget baseline, fans out to every inheriting profile.
+# cue can't pin the main-session model (that's a per-launch /model choice), so
+# this just tells the model-aware budget (lib/token-budget.ts) which window to
+# size against: the always-on load (skill frontmatter + MCP tool schemas) should
+# stay under 50% of it, leaving the other half free for the conversation. At
+# 256K that's a ~128K startup ceiling; `cue launch` and `cue profile suggest`
+# warn when a profile blows past it. Override per-profile by redeclaring
+# `contextWindow`/`model`, or per-launch via CUE_CONTEXT_WINDOW / CUE_MODEL.
+contextWindow: 256000
 # Cost knob, fans out to every inheriting profile. Surfaced into the runtime
 # settings.json `env` block by buildClaudeSettings (allowlisted). Pins Task/Agent
 # subagents (code-reviewer, Explore, file-read/grep/summarize) to Sonnet —
diff --git a/profiles/schema.json b/profiles/schema.json
index 4b90e4a4..41e7ce11 100644
--- a/profiles/schema.json
+++ b/profiles/schema.json
@@ -27,6 +27,15 @@
       "type": "string",
       "description": "Path to a PNG/JPG logo, relative to the profile dir. Rendered inline in Kitty terminals via the graphics protocol; falls back to `icon` emoji elsewhere."
     },
+    "model": {
+      "type": "string",
+      "description": "Advisory target main-session model id (e.g. 'claude-opus-4-8'). cue can't pin the main session model (per-launch /model choice); the model-aware startup budget uses it to resolve the context window for the over-budget warning. Leaf-wins; env CUE_MODEL overrides."
+    },
+    "contextWindow": {
+      "type": "integer",
+      "minimum": 1,
+      "description": "Explicit context window (tokens) to budget the startup load against, when the model id alone isn't enough. Takes precedence over `model`. Leaf-wins; env CUE_CONTEXT_WINDOW overrides."
+    },
     "bundles": {
       "type": "array",
       "items": { "type": "string" },
diff --git a/src/commands/launch.ts b/src/commands/launch.ts
index 8060dd1d..e5367a8a 100644
--- a/src/commands/launch.ts
+++ b/src/commands/launch.ts
@@ -20,6 +20,8 @@ import { configDir } from "../lib/config-paths";
 import { debug } from "../lib/debug-log";
 import {
   computeTokenBreakdown,
+  computeContextBudget,
+  formatContextBudgetWarning,
   splitSkillBytes,
   tokenLevelEmoji,
   type SkillTokens,
@@ -1874,6 +1876,22 @@ export async function run(args: string[]): Promise<number> {
       const breakdown = computeTokenBreakdown(profile, parts, tokensForSkill);
       alwaysOnForBadge = breakdown.alwaysOn;
       const lines = formatTokenWarning(breakdown);
+
+      // Model-aware startup budget: skills frontmatter + MCP tool-schema cost
+      // should stay under 50% of the model's context window so the other half
+      // is free for the conversation. Window precedence: profile
+      // contextWindow/model → env (CUE_CONTEXT_WINDOW/CUE_MODEL) → 256K default.
+      const budget = computeContextBudget({
+        skillTokens: breakdown.alwaysOn,
+        mcpCount: profile.mcps.length,
+        window: profile.contextWindow,
+        model: profile.model,
+      });
+      const bc = colorFns();
+      lines.push(
+        ...formatContextBudgetWarning(budget, { yellow: bc.yellow, bold: bc.bold, dim: bc.dim }),
+      );
+
       if (lines.length > 0) {
         process.stderr.write("\n");
         for (const l of lines) process.stderr.write(`${l}\n`);
diff --git a/src/commands/profile-suggest.ts b/src/commands/profile-suggest.ts
index 048a8e8b..7560aa96 100644
--- a/src/commands/profile-suggest.ts
+++ b/src/commands/profile-suggest.ts
@@ -22,9 +22,16 @@ import {
   skillFrequency,
   type ClusterItem,
 } from "../lib/cluster-skills";
+import { loadProfile } from "../lib/profile-loader";
+import {
+  computeContextBudget,
+  resolveContextWindow,
+  splitSkillBytes,
+} from "../lib/token-budget";
 
 const REPO_ROOT = process.env.CUE_REPO_ROOT ?? process.env.SOUL_REPO_ROOT ?? resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
 const PROFILES_DIR = join(REPO_ROOT, "profiles");
+const SKILLS_DIR = join(REPO_ROOT, "resources", "skills", "skills");
 const DISCOVER_CACHE = join(
   process.env.XDG_CONFIG_HOME ?? join(homedir(), ".config"),
   "cue", "discover", "gems.json",
@@ -160,6 +167,105 @@ function reportUnfitGems(minSize: number): void {
   process.stdout.write(`\n  ${dim("→ run `cue discover suggest-profiles` to generate draft profile.yaml files")}\n\n`);
 }
 
+// ---------------------------------------------------------------------------
+// Context-budget audit (model-aware)
+//
+// Resolve each profile (so inherited core skills/MCPs count) and estimate its
+// always-on startup load: skill frontmatter tokens + MCP tool-schema tokens.
+// Flag profiles whose load exceeds the 50% startup target for the chosen
+// model's context window (default 256K → ~128K budget).
+// ---------------------------------------------------------------------------
+
+interface BudgetOpts {
+  model?: string;
+  window?: number;
+  loadFactor?: number;
+}
+
+const skillTokenCache = new Map<string, number>();
+
+/** Always-on frontmatter tokens for one local skill id, estimated from bytes. */
+function skillFrontmatterTokens(id: string): number {
+  const cached = skillTokenCache.get(id);
+  if (cached !== undefined) return cached;
+  let tokens = 0;
+  try {
+    const src = readFileSync(join(SKILLS_DIR, id, "SKILL.md"), "utf8");
+    tokens = Math.ceil(splitSkillBytes(src).frontmatter / 4);
+  } catch {
+    // Skill not on disk (npx-only or moved) — counts as 0; the resolved
+    // profile may still list it, but we can't measure what we can't read.
+  }
+  skillTokenCache.set(id, tokens);
+  return tokens;
+}
+
+async function reportContextBudget(profileNames: string[], opts: BudgetOpts): Promise<void> {
+  const window = resolveContextWindow({ contextWindow: opts.window, model: opts.model });
+  const rows: Array<{ name: string; load: number; over: boolean; pct: number; mcps: number }> = [];
+
+  for (const name of profileNames) {
+    let resolved;
+    try {
+      resolved = await loadProfile(name);
+    } catch {
+      continue; // malformed / unresolvable — `cue validate` is the right tool.
+    }
+    let skillTokens = 0;
+    for (const s of resolved.skills.local) skillTokens += skillFrontmatterTokens(s.id);
+    const budget = computeContextBudget({
+      skillTokens,
+      mcpCount: resolved.mcps.length,
+      window: opts.window ?? resolved.contextWindow,
+      model: opts.model ?? resolved.model,
+      loadFactor: opts.loadFactor,
+    });
+    rows.push({
+      name,
+      load: budget.startupLoad,
+      over: !budget.withinBudget,
+      pct: budget.pctOfBudget,
+      mcps: budget.mcpCount,
+    });
+  }
+
+  if (rows.length === 0) {
+    process.stdout.write(`  ${dim("no resolvable profiles to budget")}\n\n`);
+    return;
+  }
+
+  const loadFactor = opts.loadFactor && opts.loadFactor > 0 ? opts.loadFactor : 0.5;
+  const budgetTokens = Math.round(window * loadFactor);
+  const windowK = `${(window / 1000).toFixed(0)}K`;
+  const budgetK = `${(budgetTokens / 1000).toFixed(0)}K`;
+  process.stdout.write(
+    `  ${dim(`window ${windowK} · ${Math.round(loadFactor * 100)}% startup target → budget ~${budgetK} always-on`)}\n\n`,
+  );
+
+  const over = rows.filter(r => r.over).sort((a, b) => b.load - a.load);
+  if (over.length === 0) {
+    process.stdout.write(`  ${dim(`all ${rows.length} profiles fit the ${budgetK} startup budget`)}\n`);
+  } else {
+    process.stdout.write(`  ${bold(`${over.length} profile(s) over the ${budgetK} startup budget:`)}\n\n`);
+    for (const r of over) {
+      const loadK = `${(r.load / 1000).toFixed(1)}K`;
+      const mcpNote = r.mcps > 0 ? `, ${r.mcps} MCP${r.mcps > 1 ? "s" : ""}` : "";
+      process.stdout.write(
+        `    🔴 ${r.name}  ${dim(`~${loadK} always-on${mcpNote} — ${Math.round(r.pct * 100)}% of budget`)}\n`,
+      );
+    }
+  }
+
+  // Always show the 3 heaviest so a near-miss is visible before it tips over.
+  const heaviest = [...rows].sort((a, b) => b.load - a.load).slice(0, 3);
+  process.stdout.write(`\n  ${dim("Heaviest profiles:")}\n`);
+  for (const r of heaviest) {
+    const loadK = `${(r.load / 1000).toFixed(1)}K`;
+    process.stdout.write(`    • ${r.name} ${dim(`(~${loadK}, ${Math.round(r.pct * 100)}% of budget)`)}\n`);
+  }
+  process.stdout.write(`\n`);
+}
+
 // ---------------------------------------------------------------------------
 // Tiny ANSI helpers (no dependency)
 // ---------------------------------------------------------------------------
@@ -177,13 +283,17 @@ export async function run(args: string[]): Promise<number> {
     process.stdout.write(`cue profile suggest — audit profiles/ and propose regroupings
 
 Usage:
-  cue profile suggest                Run all three signals (default)
+  cue profile suggest                Run all signals (default)
   cue profile suggest --no-cluster   Skip the discover-cache clustering section
+  cue profile suggest --no-budget    Skip the model-aware context-budget audit
 
 Options:
   --min-profiles <n>    Promote-to-core threshold (default: 3)
   --jaccard <0..1>      Merge-candidate threshold (default: 0.5)
   --min-size <n>        Cluster size threshold for unfit gems (default: 3)
+  --model <id>          Model id for the context-budget audit (e.g. claude-opus-4-8)
+  --context <tokens>    Explicit context window for the budget (overrides --model)
+  --load-factor <0..1>  Fraction of the window allowed at startup (default: 0.5)
 
 Output: report only. Nothing is written.
 `);
@@ -197,6 +307,13 @@ Output: report only. Nothing is written.
   const minSizeIdx = args.indexOf("--min-size");
   const minSize = minSizeIdx >= 0 ? parseInt(args[minSizeIdx + 1] ?? "3", 10) : 3;
   const skipCluster = args.includes("--no-cluster");
+  const skipBudget = args.includes("--no-budget");
+  const modelIdx = args.indexOf("--model");
+  const model = modelIdx >= 0 ? args[modelIdx + 1] : undefined;
+  const contextIdx = args.indexOf("--context");
+  const contextWindow = contextIdx >= 0 ? parseInt(args[contextIdx + 1] ?? "", 10) : undefined;
+  const loadFactorIdx = args.indexOf("--load-factor");
+  const loadFactor = loadFactorIdx >= 0 ? parseFloat(args[loadFactorIdx + 1] ?? "") : undefined;
 
   const profileSkills = readProfileSkills();
   const total = Object.keys(profileSkills).length;
@@ -218,6 +335,15 @@ Output: report only. Nothing is written.
     reportUnfitGems(minSize);
   }
 
+  if (!skipBudget) {
+    process.stdout.write(`${bold("4. Context budget (model-aware)")}\n\n`);
+    await reportContextBudget(Object.keys(profileSkills), {
+      model,
+      window: Number.isFinite(contextWindow) ? contextWindow : undefined,
+      loadFactor: Number.isFinite(loadFactor) ? loadFactor : undefined,
+    });
+  }
+
   process.stdout.write(`${dim("(report-only — review and edit profiles/*/profile.yaml by hand)")}\n`);
   return 0;
 }
diff --git a/src/lib/profile-loader.ts b/src/lib/profile-loader.ts
index a2c1a271..c8f83e3d 100644
--- a/src/lib/profile-loader.ts
+++ b/src/lib/profile-loader.ts
@@ -444,6 +444,10 @@ function foldChain(chain: Profile[]): ResolvedProfile {
       description: child.description,
       icon: child.icon ?? acc.icon,
       iconImage: child.iconImage ?? acc.iconImage,
+      // Budget hints are leaf-wins: a child that declares its own model /
+      // context window overrides the parent; otherwise it inherits.
+      model: child.model ?? acc.model,
+      contextWindow: child.contextWindow ?? acc.contextWindow,
       // agents: arrays merge by dedupe; if neither parent nor child declares
       // agents we fall back to the default at the end.
       agents: dedupePrimitiveArray(
@@ -504,6 +508,8 @@ function normalizeToResolved(p: Profile, chain: string[]): ResolvedProfile {
     description: p.description,
     icon: p.icon,
     iconImage: p.iconImage,
+    model: p.model,
+    contextWindow: p.contextWindow,
     agents: p.agents && p.agents.length > 0 ? [...p.agents] : [],
     inherits: p.inherits,
     skills: {
@@ -599,6 +605,9 @@ function foldComposite(selector: string, parts: ResolvedProfile[]): ResolvedProf
     description: parts.map((p) => p.description).join(" + "),
     icon: parts.find((p) => p.icon)?.icon,
     iconImage: parts.find((p) => p.iconImage)?.iconImage,
+    // First part that declares a budget hint wins for the composite.
+    model: parts.find((p) => p.model)?.model,
+    contextWindow: parts.find((p) => p.contextWindow)?.contextWindow,
     agents: [...head.agents] as ResolvedProfile["agents"],
     inherits: undefined,
     skills: { local: [...head.skills.local], npx: [...head.skills.npx] },
@@ -635,6 +644,8 @@ function foldComposite(selector: string, parts: ResolvedProfile[]): ResolvedProf
       description: acc.description,
       icon: acc.icon ?? next.icon,
       iconImage: acc.iconImage ?? next.iconImage,
+      model: acc.model ?? next.model,
+      contextWindow: acc.contextWindow ?? next.contextWindow,
       agents: dedupePrimitiveArray(acc.agents, next.agents) as ResolvedProfile["agents"],
       inherits: undefined,
       skills: {
diff --git a/src/lib/token-budget.test.ts b/src/lib/token-budget.test.ts
new file mode 100644
index 00000000..90d13f66
--- /dev/null
+++ b/src/lib/token-budget.test.ts
@@ -0,0 +1,141 @@
+import { describe, expect, test } from "bun:test";
+
+import {
+  DEFAULT_CONTEXT_WINDOW,
+  DEFAULT_LOAD_FACTOR,
+  MCP_TOKENS_PER_SERVER,
+  MODEL_CONTEXT_WINDOWS,
+  computeContextBudget,
+  estimateMcpTokens,
+  formatContextBudgetWarning,
+  resolveContextWindow,
+} from "./token-budget";
+
+describe("resolveContextWindow", () => {
+  test("explicit contextWindow wins over everything", () => {
+    expect(
+      resolveContextWindow({
+        contextWindow: 64_000,
+        model: "claude-opus-4-8[1m]",
+        env: { CUE_CONTEXT_WINDOW: "999000" },
+      }),
+    ).toBe(64_000);
+  });
+
+  test("model id maps to its window", () => {
+    expect(resolveContextWindow({ model: "claude-opus-4-8" })).toBe(256_000);
+    expect(resolveContextWindow({ model: "claude-opus-4-8[1m]" })).toBe(1_000_000);
+  });
+
+  test("env CUE_CONTEXT_WINDOW used when no profile signal", () => {
+    expect(resolveContextWindow({ env: { CUE_CONTEXT_WINDOW: "300000" } })).toBe(300_000);
+  });
+
+  test("env CUE_MODEL used after CUE_CONTEXT_WINDOW", () => {
+    expect(resolveContextWindow({ env: { CUE_MODEL: "claude-sonnet-4-6" } })).toBe(256_000);
+  });
+
+  test("falls back to the 256K default", () => {
+    expect(resolveContextWindow({ env: {} })).toBe(DEFAULT_CONTEXT_WINDOW);
+    expect(DEFAULT_CONTEXT_WINDOW).toBe(256_000);
+  });
+
+  test("ignores non-positive / unparseable overrides", () => {
+    expect(resolveContextWindow({ contextWindow: 0, env: {} })).toBe(DEFAULT_CONTEXT_WINDOW);
+    expect(resolveContextWindow({ env: { CUE_CONTEXT_WINDOW: "nonsense" } })).toBe(DEFAULT_CONTEXT_WINDOW);
+    expect(resolveContextWindow({ env: { CUE_CONTEXT_WINDOW: "-5" } })).toBe(DEFAULT_CONTEXT_WINDOW);
+  });
+
+  test("unknown model id falls through to default", () => {
+    expect(resolveContextWindow({ model: "gpt-imaginary", env: {} })).toBe(DEFAULT_CONTEXT_WINDOW);
+  });
+});
+
+describe("estimateMcpTokens", () => {
+  test("scales linearly with server count", () => {
+    expect(estimateMcpTokens(0)).toBe(0);
+    expect(estimateMcpTokens(4)).toBe(4 * MCP_TOKENS_PER_SERVER);
+  });
+
+  test("negative counts clamp to zero", () => {
+    expect(estimateMcpTokens(-3)).toBe(0);
+  });
+
+  test("honours a custom per-server estimate", () => {
+    expect(estimateMcpTokens(2, 1000)).toBe(2000);
+  });
+});
+
+describe("computeContextBudget", () => {
+  test("256K model → 128K budget; small load is within budget", () => {
+    const b = computeContextBudget({ skillTokens: 12_000, mcpCount: 4, model: "claude-opus-4-8" });
+    expect(b.window).toBe(256_000);
+    expect(b.budget).toBe(128_000);
+    expect(b.mcpTokens).toBe(4 * MCP_TOKENS_PER_SERVER);
+    expect(b.startupLoad).toBe(12_000 + 4 * MCP_TOKENS_PER_SERVER);
+    expect(b.withinBudget).toBe(true);
+    expect(b.overBy).toBe(0);
+  });
+
+  test("flags over-budget when skills + MCPs exceed half the window", () => {
+    const b = computeContextBudget({ skillTokens: 130_000, mcpCount: 2, model: "claude-opus-4-8" });
+    expect(b.withinBudget).toBe(false);
+    expect(b.overBy).toBeGreaterThan(0);
+    expect(b.overBy).toBe(b.startupLoad - b.budget);
+    expect(b.pctOfWindow).toBeGreaterThan(0.5);
+  });
+
+  test("default load factor is 50%", () => {
+    expect(DEFAULT_LOAD_FACTOR).toBe(0.5);
+    const b = computeContextBudget({ skillTokens: 0, mcpCount: 0, window: 200_000 });
+    expect(b.budget).toBe(100_000);
+  });
+
+  test("custom load factor changes the ceiling", () => {
+    const b = computeContextBudget({ skillTokens: 0, mcpCount: 0, window: 256_000, loadFactor: 0.25 });
+    expect(b.budget).toBe(64_000);
+  });
+
+  test("a tighter window can push a profile over budget", () => {
+    const within = computeContextBudget({ skillTokens: 90_000, mcpCount: 0, model: "claude-opus-4-8" });
+    expect(within.withinBudget).toBe(true);
+    const tight = computeContextBudget({ skillTokens: 90_000, mcpCount: 0, window: 128_000 });
+    expect(tight.budget).toBe(64_000);
+    expect(tight.withinBudget).toBe(false);
+  });
+});
+
+describe("formatContextBudgetWarning", () => {
+  test("silent well under budget", () => {
+    const b = computeContextBudget({ skillTokens: 5_000, mcpCount: 1, model: "claude-opus-4-8" });
+    expect(formatContextBudgetWarning(b)).toEqual([]);
+  });
+
+  test("soft 🟡 note when approaching the budget", () => {
+    // ~85% of a 128K budget: 100K skills + ~9K MCP.
+    const b = computeContextBudget({ skillTokens: 100_000, mcpCount: 6, model: "claude-opus-4-8" });
+    const lines = formatContextBudgetWarning(b);
+    expect(lines.length).toBe(1);
+    expect(lines[0]).toContain("🟡");
+  });
+
+  test("🔴 over-budget warning past the ceiling", () => {
+    const b = computeContextBudget({ skillTokens: 140_000, mcpCount: 2, model: "claude-opus-4-8" });
+    const lines = formatContextBudgetWarning(b);
+    expect(lines.length).toBe(2);
+    expect(lines[0]).toContain("🔴");
+    expect(lines.join(" ")).toContain("256K");
+  });
+
+  test("mentions MCP count and works without color helpers", () => {
+    const b = computeContextBudget({ skillTokens: 140_000, mcpCount: 3, model: "claude-opus-4-8" });
+    expect(formatContextBudgetWarning(b)[0]).toContain("3 MCPs");
+  });
+});
+
+describe("MODEL_CONTEXT_WINDOWS", () => {
+  test("known Claude models are present", () => {
+    expect(MODEL_CONTEXT_WINDOWS["claude-opus-4-8"]).toBe(256_000);
+    expect(MODEL_CONTEXT_WINDOWS["claude-sonnet-4-6"]).toBe(256_000);
+  });
+});
diff --git a/src/lib/token-budget.ts b/src/lib/token-budget.ts
index 6f95018e..30b561a3 100644
--- a/src/lib/token-budget.ts
+++ b/src/lib/token-budget.ts
@@ -109,3 +109,190 @@ export function tokenLevelEmoji(alwaysOn: number): "🔴" | "🟠" | "🟡" | "
       : alwaysOn > 5000 ? "🟡"
         : "🟢";
 }
+
+// ---------------------------------------------------------------------------
+// Model-aware startup budget
+//
+// The always-on footprint (skill frontmatter + MCP tool schemas) is paid on
+// every single message, before the user has typed anything. The guidance we
+// enforce here: at session start a profile should consume no more than HALF
+// the model's context window, leaving the other half as working headroom for
+// the actual task. For a 256K model that's a ~128K startup ceiling.
+// ---------------------------------------------------------------------------
+
+/** Fallback context window (tokens) when neither the profile nor the env says
+ *  which model is in play. cue can't auto-detect the main-session model (it's a
+ *  per-launch `/model` choice), so 256K is the conservative baseline. */
+export const DEFAULT_CONTEXT_WINDOW = 256_000;
+
+/** Fraction of the window a profile may occupy at startup. Half the window
+ *  keeps the other half free for the conversation. */
+export const DEFAULT_LOAD_FACTOR = 0.5;
+
+/**
+ * Rough always-on cost of one MCP server, in tokens. Every connected server
+ * injects its tool-schema definitions into the system prompt on every message.
+ * Real cost varies widely by server (a 2-tool server vs codegraph's dozens),
+ * so this is a deliberately conservative per-server heuristic; override with
+ * CUE_MCP_TOKENS_PER_SERVER when you have a measured number.
+ */
+export const MCP_TOKENS_PER_SERVER = 1500;
+
+/**
+ * Known model → context-window (tokens). Keyed on the exact model ids cue
+ * surfaces in its `/model` hints. The `[1m]` long-context Opus variant is the
+ * one outlier; everything else defaults to the standard 256K window.
+ */
+export const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
+  "claude-opus-4-8": 256_000,
+  "claude-opus-4-8[1m]": 1_000_000,
+  "claude-opus-4-7": 256_000,
+  "claude-opus-4-6": 256_000,
+  "claude-sonnet-4-6": 256_000,
+  "claude-haiku-4-5": 256_000,
+  "claude-fable-5": 256_000,
+};
+
+/**
+ * Resolve the context window to budget against. Precedence (first hit wins):
+ *   1. explicit `contextWindow` (e.g. the profile's declared field)
+ *   2. `model` looked up in MODEL_CONTEXT_WINDOWS
+ *   3. env `CUE_CONTEXT_WINDOW` (a raw token count)
+ *   4. env `CUE_MODEL` looked up in MODEL_CONTEXT_WINDOWS
+ *   5. DEFAULT_CONTEXT_WINDOW (256K)
+ * Non-positive / unparseable values are ignored so a bad override can't drive
+ * the budget to zero.
+ */
+export function resolveContextWindow(opts: {
+  contextWindow?: number;
+  model?: string;
+  env?: Record<string, string | undefined>;
+} = {}): number {
+  const env = opts.env ?? process.env;
+  if (opts.contextWindow && opts.contextWindow > 0) return opts.contextWindow;
+  if (opts.model && MODEL_CONTEXT_WINDOWS[opts.model]) return MODEL_CONTEXT_WINDOWS[opts.model]!;
+  const envWindow = Number(env.CUE_CONTEXT_WINDOW);
+  if (Number.isFinite(envWindow) && envWindow > 0) return envWindow;
+  const envModel = env.CUE_MODEL;
+  if (envModel && MODEL_CONTEXT_WINDOWS[envModel]) return MODEL_CONTEXT_WINDOWS[envModel]!;
+  return DEFAULT_CONTEXT_WINDOW;
+}
+
+/** Estimate the always-on token cost of `mcpCount` connected MCP servers. */
+export function estimateMcpTokens(
+  mcpCount: number,
+  perServer: number = MCP_TOKENS_PER_SERVER,
+): number {
+  if (mcpCount <= 0) return 0;
+  return mcpCount * perServer;
+}
+
+export interface ContextBudget {
+  /** Model context window in tokens. */
+  window: number;
+  /** Fraction of the window allowed at startup (0..1). */
+  loadFactor: number;
+  /** Token ceiling = window * loadFactor. */
+  budget: number;
+  /** Always-on skill frontmatter tokens. */
+  skillTokens: number;
+  /** Estimated always-on MCP tool-schema tokens. */
+  mcpTokens: number;
+  /** Number of MCP servers counted. */
+  mcpCount: number;
+  /** Total always-on startup load = skillTokens + mcpTokens. */
+  startupLoad: number;
+  /** True when the startup load is at or under the budget. */
+  withinBudget: boolean;
+  /** Tokens over the budget (0 when within). */
+  overBy: number;
+  /** startupLoad / window. */
+  pctOfWindow: number;
+  /** startupLoad / budget. */
+  pctOfBudget: number;
+}
+
+/**
+ * Compute the model-aware startup budget for a profile. Pure: callers pass the
+ * measured skill frontmatter total and the MCP count; the window is resolved
+ * via `resolveContextWindow`.
+ */
+export function computeContextBudget(input: {
+  skillTokens: number;
+  mcpCount: number;
+  window?: number;
+  model?: string;
+  loadFactor?: number;
+  mcpTokensPerServer?: number;
+  env?: Record<string, string | undefined>;
+}): ContextBudget {
+  const window = resolveContextWindow({
+    contextWindow: input.window,
+    model: input.model,
+    env: input.env,
+  });
+  const loadFactor = input.loadFactor && input.loadFactor > 0 ? input.loadFactor : DEFAULT_LOAD_FACTOR;
+  const budget = Math.round(window * loadFactor);
+  const skillTokens = Math.max(0, input.skillTokens);
+  const mcpTokens = estimateMcpTokens(input.mcpCount, input.mcpTokensPerServer);
+  const startupLoad = skillTokens + mcpTokens;
+  const overBy = Math.max(0, startupLoad - budget);
+  return {
+    window,
+    loadFactor,
+    budget,
+    skillTokens,
+    mcpTokens,
+    mcpCount: Math.max(0, input.mcpCount),
+    startupLoad,
+    withinBudget: startupLoad <= budget,
+    overBy,
+    pctOfWindow: window > 0 ? startupLoad / window : 0,
+    pctOfBudget: budget > 0 ? startupLoad / budget : 0,
+  };
+}
+
+const fmtK = (n: number): string => `${(n / 1000).toFixed(n >= 100_000 ? 0 : 1)}K`;
+
+/**
+ * Format the model-aware budget block for the CLI. Returns `[]` when the
+ * profile sits comfortably under the budget (< 80%), a soft 🟡 note when it's
+ * approaching it, and a 🔴 over-budget warning past the ceiling. The `color`
+ * helpers are injected so this stays free of any terminal/ANSI dependency.
+ */
+export function formatContextBudgetWarning(
+  b: ContextBudget,
+  color: { yellow: (s: string) => string; bold: (s: string) => string; dim: (s: string) => string } = {
+    yellow: (s) => s,
+    bold: (s) => s,
+    dim: (s) => s,
+  },
+): string[] {
+  // Quiet until the profile is within striking distance of the ceiling.
+  if (b.pctOfBudget < 0.8) return [];
+
+  const windowK = fmtK(b.window);
+  const budgetK = fmtK(b.budget);
+  const loadK = fmtK(b.startupLoad);
+  const pct = Math.round(b.pctOfWindow * 100);
+  const mcpNote = b.mcpCount > 0 ? ` + ${b.mcpCount} MCP${b.mcpCount > 1 ? "s" : ""}` : "";
+  const lines: string[] = [];
+
+  if (!b.withinBudget) {
+    const overK = fmtK(b.overBy);
+    lines.push(
+      `🔴 Context budget: ${color.yellow(`~${loadK}`)} always-on (skills${mcpNote}) — ` +
+        `${color.bold(`over the ${Math.round(b.loadFactor * 100)}% startup target`)} for a ${windowK} model ` +
+        `(budget ~${budgetK}, over by ~${overK}).`,
+    );
+    lines.push(
+      `   ${color.dim(`You're at ${pct}% of the window before the first message. Trim skills/MCPs, launch a narrower stack, or raise the window (CUE_CONTEXT_WINDOW).`)}`,
+    );
+  } else {
+    lines.push(
+      `🟡 Context budget: ${color.yellow(`~${loadK}`)} always-on (skills${mcpNote}) — ` +
+        `${Math.round(b.pctOfBudget * 100)}% of the ${Math.round(b.loadFactor * 100)}% startup target for a ${windowK} model (budget ~${budgetK}).`,
+    );
+  }
+  return lines;
+}