opencue · NagyVikt · Jun 24, 2026 · Jun 22, 2026
diff --git a/docs/agent-context-budget.md b/docs/agent-context-budget.md
@@ -32,6 +32,40 @@ Use `setup/lean-cue.md` for the smallest path. It should install cue and pin
 `core` by default. Caveman, RTK, skill-writing, memory, gbrain, and Office MCPs
 are optional add-ons.
 
+## Model-Aware Startup Budget
+
+A profile's *always-on* footprint — skill frontmatter (loaded into the skill
+router every message) plus MCP tool schemas (one set per connected server) — is
+paid before the user types anything. The rule cue enforces: that startup load
+should stay under **50% of the model's context window**, leaving the other half
+as working headroom. For a 256K model that's a **~128K startup ceiling**.
+
+The math lives in `src/lib/token-budget.ts` (pure, unit-tested in
+`token-budget.test.ts`):
+
+- `resolveContextWindow` picks the window. Precedence: profile `contextWindow`
+  → profile `model` (via `MODEL_CONTEXT_WINDOWS`) → env `CUE_CONTEXT_WINDOW`
+  → env `CUE_MODEL` → `DEFAULT_CONTEXT_WINDOW` (256K). cue can't auto-detect the
+  main-session model (per-launch `/model` choice), hence the declared/env source.
+- `estimateMcpTokens` charges `MCP_TOKENS_PER_SERVER` (default 1500, override
+  with `CUE_MCP_TOKENS_PER_SERVER`) per connected MCP.
+- `computeContextBudget` returns the budget, the startup load, and whether it
+  fits. `formatContextBudgetWarning` stays quiet under 80% of budget, prints a
+  🟡 note as it approaches, and a 🔴 over-budget warning past the ceiling.
+
+Surfaces:
+
+- `cue launch` prints the warning in the startup banner (real resolved skill +
+  MCP token data).
+- `cue profile suggest` adds a **Context budget** section auditing every
+  profile; flags `--model <id>`, `--context <tokens>`, `--load-factor <0..1>`,
+  and `--no-budget` to skip it.
+
+Profiles declare their target via the `model` / `contextWindow` fields (both
+optional, leaf-wins through inheritance). `core` sets `contextWindow: 256000` as
+the fan-out baseline; a long-context profile can bump it (e.g.
+`model: claude-opus-4-8[1m]` for a 1M window).
+
 ## Onboarding Source
 
 `src/commands/init.ts` controls first-run global onboarding. Keep the first

diff --git a/profiles/_types.ts b/profiles/_types.ts
@@ -43,6 +43,20 @@ export interface Profile {
   description: string;
   icon?: string;
   iconImage?: string;
+  /**
+   * Target main-session model id (e.g. "claude-opus-4-8"). Advisory only — cue
+   * can't pin the main session model (that's a per-launch `/model` choice) — but
+   * the model-aware startup budget (lib/token-budget.ts) uses it to resolve the
+   * context window so the over-budget warning knows what window to size against.
+   * Leaf-wins through inheritance. Env `CUE_MODEL` overrides per launch.
+   */
+  model?: string;
+  /**
+   * Explicit context window (tokens) to budget the startup load against, when
+   * the model id alone isn't enough (custom/long-context deployments). Takes
+   * precedence over `model`. Leaf-wins; env `CUE_CONTEXT_WINDOW` overrides.
+   */
+  contextWindow?: number;
   agents?: AgentKind[];
   inherits?: string | string[];
   // Companion profiles surfaced at `cue use` time as suggestions. Activating

diff --git a/profiles/core/profile.yaml b/profiles/core/profile.yaml
@@ -2,6 +2,15 @@ name: core
 icon: "🧠"
 iconImage: "logo.png"
 description: Baseline shared by every cue profile — essentials only
+# Startup context budget baseline, fans out to every inheriting profile.
+# cue can't pin the main-session model (that's a per-launch /model choice), so
+# this just tells the model-aware budget (lib/token-budget.ts) which window to
+# size against: the always-on load (skill frontmatter + MCP tool schemas) should
+# stay under 50% of it, leaving the other half free for the conversation. At
+# 256K that's a ~128K startup ceiling; `cue launch` and `cue profile suggest`
+# warn when a profile blows past it. Override per-profile by redeclaring
+# `contextWindow`/`model`, or per-launch via CUE_CONTEXT_WINDOW / CUE_MODEL.
+contextWindow: 256000
 # Cost knob, fans out to every inheriting profile. Surfaced into the runtime
 # settings.json `env` block by buildClaudeSettings (allowlisted). Pins Task/Agent
 # subagents (code-reviewer, Explore, file-read/grep/summarize) to Sonnet —

diff --git a/profiles/schema.json b/profiles/schema.json
@@ -27,6 +27,15 @@
       "type": "string",
       "description": "Path to a PNG/JPG logo, relative to the profile dir. Rendered inline in Kitty terminals via the graphics protocol; falls back to `icon` emoji elsewhere."
     },
+    "model": {
+      "type": "string",
+      "description": "Advisory target main-session model id (e.g. 'claude-opus-4-8'). cue can't pin the main session model (per-launch /model choice); the model-aware startup budget uses it to resolve the context window for the over-budget warning. Leaf-wins; env CUE_MODEL overrides."
+    },
+    "contextWindow": {
+      "type": "integer",
+      "minimum": 1,
+      "description": "Explicit context window (tokens) to budget the startup load against, when the model id alone isn't enough. Takes precedence over `model`. Leaf-wins; env CUE_CONTEXT_WINDOW overrides."
+    },
     "bundles": {
       "type": "array",
       "items": { "type": "string" },

diff --git a/src/commands/launch.ts b/src/commands/launch.ts
@@ -20,6 +20,8 @@ import { configDir } from "../lib/config-paths";
 import { debug } from "../lib/debug-log";
 import {
   computeTokenBreakdown,
+  computeContextBudget,
+  formatContextBudgetWarning,
   splitSkillBytes,
   tokenLevelEmoji,
   type SkillTokens,
@@ -1874,6 +1876,22 @@ export async function run(args: string[]): Promise<number> {
       const breakdown = computeTokenBreakdown(profile, parts, tokensForSkill);
       alwaysOnForBadge = breakdown.alwaysOn;
       const lines = formatTokenWarning(breakdown);
+
+      // Model-aware startup budget: skills frontmatter + MCP tool-schema cost
+      // should stay under 50% of the model's context window so the other half
+      // is free for the conversation. Window precedence: profile
+      // contextWindow/model → env (CUE_CONTEXT_WINDOW/CUE_MODEL) → 256K default.
+      const budget = computeContextBudget({
+        skillTokens: breakdown.alwaysOn,
+        mcpCount: profile.mcps.length,
+        window: profile.contextWindow,
+        model: profile.model,
+      });
+      const bc = colorFns();
+      lines.push(
+        ...formatContextBudgetWarning(budget, { yellow: bc.yellow, bold: bc.bold, dim: bc.dim }),
+      );
+
       if (lines.length > 0) {
         process.stderr.write("\n");
         for (const l of lines) process.stderr.write(`${l}\n`);

diff --git a/src/commands/profile-suggest.ts b/src/commands/profile-suggest.ts
@@ -22,9 +22,16 @@ import {
   skillFrequency,
   type ClusterItem,
 } from "../lib/cluster-skills";
+import { loadProfile } from "../lib/profile-loader";
+import {
+  computeContextBudget,
+  resolveContextWindow,
+  splitSkillBytes,
+} from "../lib/token-budget";
 
 const REPO_ROOT = process.env.CUE_REPO_ROOT ?? process.env.SOUL_REPO_ROOT ?? resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
 const PROFILES_DIR = join(REPO_ROOT, "profiles");
+const SKILLS_DIR = join(REPO_ROOT, "resources", "skills", "skills");
 const DISCOVER_CACHE = join(
   process.env.XDG_CONFIG_HOME ?? join(homedir(), ".config"),
   "cue", "discover", "gems.json",
@@ -160,6 +167,105 @@ function reportUnfitGems(minSize: number): void {
   process.stdout.write(`\n  ${dim("→ run `cue discover suggest-profiles` to generate draft profile.yaml files")}\n\n`);
 }
 
+// ---------------------------------------------------------------------------
+// Context-budget audit (model-aware)
+//
+// Resolve each profile (so inherited core skills/MCPs count) and estimate its
+// always-on startup load: skill frontmatter tokens + MCP tool-schema tokens.
+// Flag profiles whose load exceeds the 50% startup target for the chosen
+// model's context window (default 256K → ~128K budget).
+// ---------------------------------------------------------------------------
+
+interface BudgetOpts {
+  model?: string;
+  window?: number;
+  loadFactor?: number;
+}
+
+const skillTokenCache = new Map<string, number>();
+
+/** Always-on frontmatter tokens for one local skill id, estimated from bytes. */
+function skillFrontmatterTokens(id: string): number {
+  const cached = skillTokenCache.get(id);
+  if (cached !== undefined) return cached;
+  let tokens = 0;
+  try {
+    const src = readFileSync(join(SKILLS_DIR, id, "SKILL.md"), "utf8");
+    tokens = Math.ceil(splitSkillBytes(src).frontmatter / 4);
+  } catch {
+    // Skill not on disk (npx-only or moved) — counts as 0; the resolved
+    // profile may still list it, but we can't measure what we can't read.
+  }
+  skillTokenCache.set(id, tokens);
+  return tokens;
+}
+
+async function reportContextBudget(profileNames: string[], opts: BudgetOpts): Promise<void> {
+  const window = resolveContextWindow({ contextWindow: opts.window, model: opts.model });
+  const rows: Array<{ name: string; load: number; over: boolean; pct: number; mcps: number }> = [];
+
+  for (const name of profileNames) {
+    let resolved;
+    try {
+      resolved = await loadProfile(name);
+    } catch {
+      continue; // malformed / unresolvable — `cue validate` is the right tool.
+    }
+    let skillTokens = 0;
+    for (const s of resolved.skills.local) skillTokens += skillFrontmatterTokens(s.id);
+    const budget = computeContextBudget({
+      skillTokens,
+      mcpCount: resolved.mcps.length,
+      window: opts.window ?? resolved.contextWindow,
+      model: opts.model ?? resolved.model,
+      loadFactor: opts.loadFactor,
+    });
+    rows.push({
+      name,
+      load: budget.startupLoad,
+      over: !budget.withinBudget,
+      pct: budget.pctOfBudget,
+      mcps: budget.mcpCount,
+    });
+  }
+
+  if (rows.length === 0) {
+    process.stdout.write(`  ${dim("no resolvable profiles to budget")}\n\n`);
+    return;
+  }
+
+  const loadFactor = opts.loadFactor && opts.loadFactor > 0 ? opts.loadFactor : 0.5;
+  const budgetTokens = Math.round(window * loadFactor);
+  const windowK = `${(window / 1000).toFixed(0)}K`;
+  const budgetK = `${(budgetTokens / 1000).toFixed(0)}K`;
+  process.stdout.write(
+    `  ${dim(`window ${windowK} · ${Math.round(loadFactor * 100)}% startup target → budget ~${budgetK} always-on`)}\n\n`,
+  );
+
+  const over = rows.filter(r => r.over).sort((a, b) => b.load - a.load);
+  if (over.length === 0) {
+    process.stdout.write(`  ${dim(`all ${rows.length} profiles fit the ${budgetK} startup budget`)}\n`);
+  } else {
+    process.stdout.write(`  ${bold(`${over.length} profile(s) over the ${budgetK} startup budget:`)}\n\n`);
+    for (const r of over) {
+      const loadK = `${(r.load / 1000).toFixed(1)}K`;
+      const mcpNote = r.mcps > 0 ? `, ${r.mcps} MCP${r.mcps > 1 ? "s" : ""}` : "";
+      process.stdout.write(
+        `    🔴 ${r.name}  ${dim(`~${loadK} always-on${mcpNote} — ${Math.round(r.pct * 100)}% of budget`)}\n`,
+      );
+    }
+  }
+
+  // Always show the 3 heaviest so a near-miss is visible before it tips over.
+  const heaviest = [...rows].sort((a, b) => b.load - a.load).slice(0, 3);
+  process.stdout.write(`\n  ${dim("Heaviest profiles:")}\n`);
+  for (const r of heaviest) {
+    const loadK = `${(r.load / 1000).toFixed(1)}K`;
+    process.stdout.write(`    • ${r.name} ${dim(`(~${loadK}, ${Math.round(r.pct * 100)}% of budget)`)}\n`);
+  }
+  process.stdout.write(`\n`);
+}
+
 // ---------------------------------------------------------------------------
 // Tiny ANSI helpers (no dependency)
 // ---------------------------------------------------------------------------
@@ -177,13 +283,17 @@ export async function run(args: string[]): Promise<number> {
     process.stdout.write(`cue profile suggest — audit profiles/ and propose regroupings
 
 Usage:
-  cue profile suggest                Run all three signals (default)
+  cue profile suggest                Run all signals (default)
   cue profile suggest --no-cluster   Skip the discover-cache clustering section
+  cue profile suggest --no-budget    Skip the model-aware context-budget audit
 
 Options:
   --min-profiles <n>    Promote-to-core threshold (default: 3)
   --jaccard <0..1>      Merge-candidate threshold (default: 0.5)
   --min-size <n>        Cluster size threshold for unfit gems (default: 3)
+  --model <id>          Model id for the context-budget audit (e.g. claude-opus-4-8)
+  --context <tokens>    Explicit context window for the budget (overrides --model)
+  --load-factor <0..1>  Fraction of the window allowed at startup (default: 0.5)
 
 Output: report only. Nothing is written.
 `);
@@ -197,6 +307,13 @@ Output: report only. Nothing is written.
   const minSizeIdx = args.indexOf("--min-size");
   const minSize = minSizeIdx >= 0 ? parseInt(args[minSizeIdx + 1] ?? "3", 10) : 3;
   const skipCluster = args.includes("--no-cluster");
+  const skipBudget = args.includes("--no-budget");
+  const modelIdx = args.indexOf("--model");
+  const model = modelIdx >= 0 ? args[modelIdx + 1] : undefined;
+  const contextIdx = args.indexOf("--context");
+  const contextWindow = contextIdx >= 0 ? parseInt(args[contextIdx + 1] ?? "", 10) : undefined;
+  const loadFactorIdx = args.indexOf("--load-factor");
+  const loadFactor = loadFactorIdx >= 0 ? parseFloat(args[loadFactorIdx + 1] ?? "") : undefined;
 
   const profileSkills = readProfileSkills();
   const total = Object.keys(profileSkills).length;
@@ -218,6 +335,15 @@ Output: report only. Nothing is written.
     reportUnfitGems(minSize);
   }
 
+  if (!skipBudget) {
+    process.stdout.write(`${bold("4. Context budget (model-aware)")}\n\n`);
+    await reportContextBudget(Object.keys(profileSkills), {
+      model,
+      window: Number.isFinite(contextWindow) ? contextWindow : undefined,
+      loadFactor: Number.isFinite(loadFactor) ? loadFactor : undefined,
+    });
+  }
+
   process.stdout.write(`${dim("(report-only — review and edit profiles/*/profile.yaml by hand)")}\n`);
   return 0;
 }
diff --git a/src/lib/profile-loader.ts b/src/lib/profile-loader.ts
@@ -444,6 +444,10 @@ function foldChain(chain: Profile[]): ResolvedProfile {
       description: child.description,
       icon: child.icon ?? acc.icon,
       iconImage: child.iconImage ?? acc.iconImage,
+      // Budget hints are leaf-wins: a child that declares its own model /
+      // context window overrides the parent; otherwise it inherits.
+      model: child.model ?? acc.model,
+      contextWindow: child.contextWindow ?? acc.contextWindow,
       // agents: arrays merge by dedupe; if neither parent nor child declares
       // agents we fall back to the default at the end.
       agents: dedupePrimitiveArray(
@@ -504,6 +508,8 @@ function normalizeToResolved(p: Profile, chain: string[]): ResolvedProfile {
     description: p.description,
     icon: p.icon,
     iconImage: p.iconImage,
+    model: p.model,
+    contextWindow: p.contextWindow,
     agents: p.agents && p.agents.length > 0 ? [...p.agents] : [],
     inherits: p.inherits,
     skills: {
@@ -599,6 +605,9 @@ function foldComposite(selector: string, parts: ResolvedProfile[]): ResolvedProf
     description: parts.map((p) => p.description).join(" + "),
     icon: parts.find((p) => p.icon)?.icon,
     iconImage: parts.find((p) => p.iconImage)?.iconImage,
+    // First part that declares a budget hint wins for the composite.
+    model: parts.find((p) => p.model)?.model,
+    contextWindow: parts.find((p) => p.contextWindow)?.contextWindow,
     agents: [...head.agents] as ResolvedProfile["agents"],
     inherits: undefined,
     skills: { local: [...head.skills.local], npx: [...head.skills.npx] },
@@ -635,6 +644,8 @@ function foldComposite(selector: string, parts: ResolvedProfile[]): ResolvedProf
       description: acc.description,
       icon: acc.icon ?? next.icon,
       iconImage: acc.iconImage ?? next.iconImage,
+      model: acc.model ?? next.model,
+      contextWindow: acc.contextWindow ?? next.contextWindow,
       agents: dedupePrimitiveArray(acc.agents, next.agents) as ResolvedProfile["agents"],
       inherits: undefined,
       skills: {