From d96193795ae9a086cd3e3df9cc7d50a2d1ba40f2 Mon Sep 17 00:00:00 2001
From: "Shawn L. Kiser" <35721408+slkiser@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:04:17 +0100
Subject: [PATCH] Add runtime pricing refresh & update snapshot

Introduce runtime pricing refresh for models.dev data: add scripts (refresh-modelsdev-pricing.mjs, refresh-modelsdev-pricing-if-stale.mjs) and package.json commands to fetch and atomically write a trimmed pricing snapshot. Update README with runtime refresh behavior, env knobs, and maintainer commands. Implement runtime snapshot handling, refresh policy, health/state types, and atomic write/load paths in src/lib/modelsdev-pricing.ts; include bundled snapshot fallback and selection between bundled/runtime snapshots. Ship an updated bundled modelsdev-pricing.min.json with expanded provider/model entries and cache cost fields, add .emdash.json, and add/update tests related to pricing refresh and snapshot health.
---
 README.md                                     |  38 +-
 package.json                                  |   2 +
 .../refresh-modelsdev-pricing-if-stale.mjs    |  95 +++
 scripts/refresh-modelsdev-pricing.mjs         | 141 ++++
 src/data/modelsdev-pricing.min.json           | 743 +++++++++++-------
 src/lib/modelsdev-pricing.ts                  | 649 ++++++++++++++-
 src/lib/quota-stats.ts                        | 120 ++-
 src/lib/quota-status.ts                       |  30 +
 src/plugin.ts                                 |  49 +-
 tests/plugin.command-handled-boundary.test.ts |  31 +
 tests/plugin.qwen-hook.test.ts                |  10 +
 tests/pricing-auto-refresh-policy.test.ts     | 337 ++++++++
 tests/pricing-resolver.coverage.test.ts       |  62 +-
 tests/pricing-snapshot-health.test.ts         |  31 +
 tests/setup.ts                                |  10 +-
 15 files changed, 2029 insertions(+), 319 deletions(-)
 create mode 100644 scripts/refresh-modelsdev-pricing-if-stale.mjs
 create mode 100644 scripts/refresh-modelsdev-pricing.mjs
 create mode 100644 tests/pricing-auto-refresh-policy.test.ts
 create mode 100644 tests/pricing-snapshot-health.test.ts

diff --git a/README.md b/README.md
index 074e7bb..11e01e0 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Track OpenCode quota & tokens via Toasts/Commands with zero context window pollu
 
 ![Image of quota toasts](https://github.com/slkiser/opencode-quota/blob/main/toast.png)
 
-**Token Report Commands** - Track token usage and estimated costs across sessions using only local OpenCode SQLite history plus the bundled models.dev snapshot (no network calls).
+**Token Report Commands** - Track token usage and estimated costs across sessions using local OpenCode SQLite history plus a local models.dev pricing snapshot. The plugin can refresh that local snapshot at runtime when stale.
 
 ![Image of /quota and /tokens_daily outputs](https://github.com/slkiser/opencode-quota/blob/main/quota.png)
 
@@ -77,7 +77,39 @@ Token reporting commands are the `/tokens_*` family (there is no `/token` comman
 | Google Antigravity | `google-antigravity` | Multi-account via `opencode-antigravity-auth` |
 | Z.ai               | `zai`                | OpenCode auth (automatic)                     |
 
-Token pricing coverage is broader than quota provider support: `/tokens_*` maps usage against all provider/model entries present in the bundled models.dev data snapshot.
+Token pricing coverage is broader than quota provider support: `/tokens_*` maps usage against provider/model entries in the active local models.dev pricing snapshot.
+
+Pricing refresh ownership is now runtime-based:
+
+- A bundled snapshot (`src/data/modelsdev-pricing.min.json`) is always shipped as bootstrap/offline fallback.
+- At plugin runtime, when `experimental.quotaToast.enabled` is `true`, pricing refresh runs as a bounded best-effort check (once per process window, plus persisted attempt tracking) during init and before `/tokens_*` / `/quota_status` report paths.
+- If the active snapshot is older than 3 days, the plugin attempts to fetch `https://models.dev/api.json`, keeps only `input`, `output`, `cache_read`, `cache_write`, and writes a refreshed local runtime snapshot.
+- If fetch fails, reports continue using the last local snapshot (no hard failure).
+
+Runtime snapshot files are stored under the OpenCode cache directory:
+
+- `.../opencode/opencode-quota/modelsdev-pricing.runtime.min.json`
+- `.../opencode/opencode-quota/modelsdev-pricing.refresh-state.json`
+
+Runtime refresh toggles:
+
+```sh
+# Disable runtime pricing refresh
+OPENCODE_QUOTA_PRICING_AUTO_REFRESH=0
+
+# Change stale threshold (default: 3 days)
+OPENCODE_QUOTA_PRICING_MAX_AGE_DAYS=5
+```
+
+Maintainer-only bundled snapshot refresh (manual):
+
+```sh
+npm run pricing:refresh
+npm run pricing:refresh:if-stale
+npm run build
+```
+
+`pricing:refresh:if-stale` uses the same env knobs as runtime refresh (`OPENCODE_QUOTA_PRICING_AUTO_REFRESH`, `OPENCODE_QUOTA_PRICING_MAX_AGE_DAYS`).
 
 ### Provider-Specific Setup
 
@@ -215,7 +247,7 @@ All options go under `experimental.quotaToast` in `opencode.json` or `opencode.j
 
 | Option              | Default      | Description                                                                                          |
 | ------------------- | ------------ | ---------------------------------------------------------------------------------------------------- |
-| `enabled`           | `true`       | Enable/disable plugin                                                                                |
+| `enabled`           | `true`       | Enable/disable plugin. When `false`, `/quota`, `/quota_status`, and `/tokens_*` are strict no-ops. |
 | `enableToast`       | `true`       | Show popup toasts                                                                                    |
 | `toastStyle`        | `classic`    | Toast layout style: `classic` or `grouped`                                                           |
 | `enabledProviders`  | `"auto"`     | Provider IDs to query, or `"auto"` to detect                                                         |
diff --git a/package.json b/package.json
index 56d21be..8f05f11 100644
--- a/package.json
+++ b/package.json
@@ -18,6 +18,8 @@
   ],
   "scripts": {
     "build": "tsc && node scripts/copy-data.mjs",
+    "pricing:refresh": "node scripts/refresh-modelsdev-pricing.mjs",
+    "pricing:refresh:if-stale": "node scripts/refresh-modelsdev-pricing-if-stale.mjs",
     "verify:release-version": "node scripts/verify-release-version.mjs",
     "typecheck": "tsc --noEmit",
     "test": "vitest run",
diff --git a/scripts/refresh-modelsdev-pricing-if-stale.mjs b/scripts/refresh-modelsdev-pricing-if-stale.mjs
new file mode 100644
index 0000000..78fe9d6
--- /dev/null
+++ b/scripts/refresh-modelsdev-pricing-if-stale.mjs
@@ -0,0 +1,95 @@
+import { spawn } from "child_process";
+import { readFile } from "fs/promises";
+import path from "path";
+import { fileURLToPath } from "url";
+
+export const DEFAULT_MAX_AGE_MS = 3 * 24 * 60 * 60 * 1000;
+
+function parseEnabled(value) {
+  if (!value) return true;
+  const normalized = value.trim().toLowerCase();
+  return !["0", "false", "no", "off"].includes(normalized);
+}
+
+function parseMaxAgeMs(value) {
+  if (!value) return DEFAULT_MAX_AGE_MS;
+  const days = Number(value);
+  if (!Number.isFinite(days) || days <= 0) return DEFAULT_MAX_AGE_MS;
+  return Math.floor(days * 24 * 60 * 60 * 1000);
+}
+
+export function shouldAutoRefresh(meta, nowMs, maxAgeMs = DEFAULT_MAX_AGE_MS) {
+  const generatedAt = Number(meta?.generatedAt);
+  if (!Number.isFinite(generatedAt) || generatedAt <= 0) return true;
+  return nowMs - generatedAt > maxAgeMs;
+}
+
+async function readSnapshotMeta() {
+  const snapshotUrl = new URL("../src/data/modelsdev-pricing.min.json", import.meta.url);
+  const raw = await readFile(snapshotUrl, "utf8");
+  const parsed = JSON.parse(raw);
+  const meta = parsed?._meta;
+  return meta && typeof meta === "object" ? meta : null;
+}
+
+function runRefreshScript() {
+  const scriptPath = fileURLToPath(new URL("./refresh-modelsdev-pricing.mjs", import.meta.url));
+  return new Promise((resolve, reject) => {
+    const child = spawn(process.execPath, [scriptPath], { stdio: "inherit" });
+    child.once("error", reject);
+    child.once("exit", (code) => {
+      if (code === 0) resolve(undefined);
+      else reject(new Error(`refresh-modelsdev-pricing.mjs exited with code ${code ?? "unknown"}`));
+    });
+  });
+}
+
+export async function main() {
+  if (!parseEnabled(process.env.OPENCODE_QUOTA_PRICING_AUTO_REFRESH)) {
+    console.log("Skipping pricing auto-refresh: OPENCODE_QUOTA_PRICING_AUTO_REFRESH disables it.");
+    return;
+  }
+
+  const maxAgeMs = parseMaxAgeMs(process.env.OPENCODE_QUOTA_PRICING_MAX_AGE_DAYS);
+  const nowMs = Date.now();
+
+  let meta = null;
+  try {
+    meta = await readSnapshotMeta();
+  } catch {
+    meta = null;
+  }
+
+  if (!shouldAutoRefresh(meta, nowMs, maxAgeMs)) {
+    const generatedAt = Number(meta?.generatedAt);
+    const ageMs = Math.max(0, nowMs - generatedAt);
+    console.log(
+      `Pricing snapshot is fresh (age ${ageMs}ms <= max ${maxAgeMs}ms). Skipping auto-refresh.`,
+    );
+    return;
+  }
+
+  console.log(
+    `Pricing snapshot is stale or missing (max age ${maxAgeMs}ms). Refreshing from models.dev...`,
+  );
+  try {
+    await runRefreshScript();
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    console.warn(
+      `Pricing auto-refresh failed (${message}). Continuing build with the existing bundled snapshot.`,
+    );
+  }
+}
+
+function isMainModule() {
+  if (!process.argv[1]) return false;
+  return path.resolve(process.argv[1]) === path.resolve(fileURLToPath(import.meta.url));
+}
+
+if (isMainModule()) {
+  main().catch((error) => {
+    console.error(error);
+    process.exit(1);
+  });
+}
diff --git a/scripts/refresh-modelsdev-pricing.mjs b/scripts/refresh-modelsdev-pricing.mjs
new file mode 100644
index 0000000..6517d0b
--- /dev/null
+++ b/scripts/refresh-modelsdev-pricing.mjs
@@ -0,0 +1,141 @@
+import { mkdir, rename, rm, writeFile } from "fs/promises";
+import { dirname } from "path";
+import { fileURLToPath } from "url";
+
+const SOURCE_URL = "https://models.dev/api.json";
+const DEFAULT_PROVIDERS = ["anthropic", "google", "moonshotai", "openai", "xai", "zai"];
+const COST_KEYS = ["input", "output", "cache_read", "cache_write"];
+const FETCH_TIMEOUT_MS = 15_000;
+
+function parseProviderArgs(argv) {
+  const providerArg = argv.find((arg) => arg.startsWith("--providers="));
+  if (!providerArg) return DEFAULT_PROVIDERS;
+
+  const raw = providerArg.slice("--providers=".length).trim();
+  if (!raw) return DEFAULT_PROVIDERS;
+
+  return raw
+    .split(",")
+    .map((part) => part.trim())
+    .filter(Boolean);
+}
+
+function pickCostBuckets(rawCost) {
+  if (!rawCost || typeof rawCost !== "object") return null;
+  const picked = {};
+
+  for (const key of COST_KEYS) {
+    const value = rawCost[key];
+    if (typeof value === "number" && Number.isFinite(value)) {
+      picked[key] = value;
+    }
+  }
+
+  return Object.keys(picked).length > 0 ? picked : null;
+}
+
+function sortObjectByKeys(obj) {
+  const sorted = {};
+  for (const key of Object.keys(obj).sort((a, b) => a.localeCompare(b))) {
+    sorted[key] = obj[key];
+  }
+  return sorted;
+}
+
+async function fetchModelsDevJson() {
+  const controller = new AbortController();
+  const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+
+  let response;
+  try {
+    response = await fetch(SOURCE_URL, { signal: controller.signal });
+  } finally {
+    clearTimeout(timeoutId);
+  }
+
+  if (!response.ok) {
+    throw new Error(`Failed to fetch ${SOURCE_URL}: ${response.status} ${response.statusText}`);
+  }
+  return response.json();
+}
+
+async function writeFileAtomic(path, content) {
+  const dir = dirname(path);
+  const tmp = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2)}`;
+
+  await mkdir(dir, { recursive: true });
+  await writeFile(tmp, content, "utf8");
+
+  const safeRm = async (target) => {
+    try {
+      await rm(target, { force: true });
+    } catch {
+      // best-effort cleanup
+    }
+  };
+
+  try {
+    await rename(tmp, path);
+  } catch (err) {
+    const code = err && typeof err === "object" && "code" in err ? String(err.code) : "";
+    const shouldRetryAsReplace =
+      code === "EPERM" || code === "EEXIST" || code === "EACCES" || code === "ENOTEMPTY";
+
+    if (!shouldRetryAsReplace) {
+      await safeRm(tmp);
+      throw err;
+    }
+
+    await safeRm(path);
+    await rename(tmp, path);
+  }
+}
+
+function buildSnapshot(api, providerIDs) {
+  const providers = {};
+
+  for (const providerID of providerIDs) {
+    const models = api?.[providerID]?.models;
+    if (!models || typeof models !== "object") continue;
+
+    const pricedModels = {};
+    for (const modelID of Object.keys(models)) {
+      const cost = pickCostBuckets(models[modelID]?.cost);
+      if (cost) pricedModels[modelID] = cost;
+    }
+
+    if (Object.keys(pricedModels).length > 0) {
+      providers[providerID] = sortObjectByKeys(pricedModels);
+    }
+  }
+
+  const providerList = Object.keys(providers).sort((a, b) => a.localeCompare(b));
+
+  return {
+    _meta: {
+      generatedAt: Date.now(),
+      providers: providerList,
+      source: SOURCE_URL,
+      units: "USD per 1M tokens",
+    },
+    providers,
+  };
+}
+
+async function main() {
+  const providerIDs = parseProviderArgs(process.argv.slice(2));
+  const api = await fetchModelsDevJson();
+  const snapshot = buildSnapshot(api, providerIDs);
+
+  const outPath = new URL("../src/data/modelsdev-pricing.min.json", import.meta.url);
+  await writeFileAtomic(fileURLToPath(outPath), `${JSON.stringify(snapshot, null, 2)}\n`);
+
+  console.log(
+    `Wrote ${outPath.pathname} with ${snapshot._meta.providers.length} providers and ${Object.values(snapshot.providers).reduce((sum, models) => sum + Object.keys(models).length, 0)} priced models.`,
+  );
+}
+
+main().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
diff --git a/src/data/modelsdev-pricing.min.json b/src/data/modelsdev-pricing.min.json
index 2411358..78bbc74 100644
--- a/src/data/modelsdev-pricing.min.json
+++ b/src/data/modelsdev-pricing.min.json
@@ -1,11 +1,12 @@
 {
   "_meta": {
-    "generatedAt": 1768919388015,
+    "generatedAt": 1772498852258,
     "providers": [
-      "openai",
       "anthropic",
       "google",
       "moonshotai",
+      "openai",
+      "xai",
       "zai"
     ],
     "source": "https://models.dev/api.json",
@@ -13,496 +14,676 @@
   },
   "providers": {
     "anthropic": {
-      "claude-2.0": {
-        "input": 8,
-        "output": 24
-      },
-      "claude-2.1": {
-        "input": 8,
-        "output": 24
-      },
       "claude-3-5-haiku-20241022": {
+        "input": 0.8,
+        "output": 4,
         "cache_read": 0.08,
-        "cache_write": 1,
+        "cache_write": 1
+      },
+      "claude-3-5-haiku-latest": {
         "input": 0.8,
-        "output": 4
+        "output": 4,
+        "cache_read": 0.08,
+        "cache_write": 1
+      },
+      "claude-3-5-sonnet-20240620": {
+        "input": 3,
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 3.75
       },
       "claude-3-5-sonnet-20241022": {
+        "input": 3,
+        "output": 15,
         "cache_read": 0.3,
-        "cache_write": 3.75,
+        "cache_write": 3.75
+      },
+      "claude-3-7-sonnet-20250219": {
         "input": 3,
-        "output": 15
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 3.75
+      },
+      "claude-3-7-sonnet-latest": {
+        "input": 3,
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 3.75
       },
       "claude-3-haiku-20240307": {
         "input": 0.25,
-        "output": 1.25
+        "output": 1.25,
+        "cache_read": 0.03,
+        "cache_write": 0.3
       },
       "claude-3-opus-20240229": {
         "input": 15,
-        "output": 75
+        "output": 75,
+        "cache_read": 1.5,
+        "cache_write": 18.75
       },
       "claude-3-sonnet-20240229": {
         "input": 3,
-        "output": 15
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 0.3
+      },
+      "claude-haiku-4-5": {
+        "input": 1,
+        "output": 5,
+        "cache_read": 0.1,
+        "cache_write": 1.25
+      },
+      "claude-haiku-4-5-20251001": {
+        "input": 1,
+        "output": 5,
+        "cache_read": 0.1,
+        "cache_write": 1.25
       },
       "claude-opus-4-0": {
-        "cache_read": 0.5,
-        "cache_write": 6.25,
-        "input": 5,
-        "output": 25
+        "input": 15,
+        "output": 75,
+        "cache_read": 1.5,
+        "cache_write": 18.75
       },
       "claude-opus-4-1": {
-        "cache_read": 1.5,
-        "cache_write": 18.75,
         "input": 15,
-        "output": 75
+        "output": 75,
+        "cache_read": 1.5,
+        "cache_write": 18.75
       },
       "claude-opus-4-1-20250805": {
-        "cache_read": 1.5,
-        "cache_write": 18.75,
         "input": 15,
-        "output": 75
+        "output": 75,
+        "cache_read": 1.5,
+        "cache_write": 18.75
       },
       "claude-opus-4-20250514": {
-        "cache_read": 0.5,
-        "cache_write": 6.25,
-        "input": 5,
-        "output": 25
+        "input": 15,
+        "output": 75,
+        "cache_read": 1.5,
+        "cache_write": 18.75
       },
       "claude-opus-4-5": {
-        "cache_read": 0.5,
-        "cache_write": 6.25,
         "input": 5,
-        "output": 25
+        "output": 25,
+        "cache_read": 0.5,
+        "cache_write": 6.25
       },
       "claude-opus-4-5-20251101": {
+        "input": 5,
+        "output": 25,
         "cache_read": 0.5,
-        "cache_write": 6.25,
+        "cache_write": 6.25
+      },
+      "claude-opus-4-6": {
         "input": 5,
-        "output": 25
+        "output": 25,
+        "cache_read": 0.5,
+        "cache_write": 6.25
       },
       "claude-sonnet-4-0": {
-        "cache_read": 0.3,
-        "cache_write": 3.75,
         "input": 3,
-        "output": 15
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 3.75
       },
       "claude-sonnet-4-20250514": {
-        "cache_read": 0.3,
-        "cache_write": 3.75,
         "input": 3,
-        "output": 15
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 3.75
       },
       "claude-sonnet-4-5": {
-        "cache_read": 0.3,
-        "cache_write": 3.75,
         "input": 3,
-        "output": 15
-      },
-      "claude-sonnet-4-5-20251101": {
+        "output": 15,
         "cache_read": 0.3,
-        "cache_write": 3.75,
-        "input": 3,
-        "output": 15
+        "cache_write": 3.75
       },
-      "claude-sonnet-4-7": {
-        "cache_read": 0.3,
-        "cache_write": 3.75,
+      "claude-sonnet-4-5-20250929": {
         "input": 3,
-        "output": 15
-      },
-      "claude-sonnet-4-7-20251101": {
+        "output": 15,
         "cache_read": 0.3,
-        "cache_write": 3.75,
-        "input": 3,
-        "output": 15
+        "cache_write": 3.75
       },
-      "claude-sonnet-4-20251001": {
-        "cache_read": 0.3,
-        "cache_write": 3.75,
+      "claude-sonnet-4-6": {
         "input": 3,
-        "output": 15
+        "output": 15,
+        "cache_read": 0.3,
+        "cache_write": 3.75
       }
     },
     "google": {
       "gemini-1.5-flash": {
         "input": 0.075,
-        "output": 0.3
+        "output": 0.3,
+        "cache_read": 0.01875
+      },
+      "gemini-1.5-flash-8b": {
+        "input": 0.0375,
+        "output": 0.15,
+        "cache_read": 0.01
       },
       "gemini-1.5-pro": {
         "input": 1.25,
-        "output": 5
+        "output": 5,
+        "cache_read": 0.3125
       },
       "gemini-2.0-flash": {
         "input": 0.1,
-        "output": 0.4
-      },
-      "gemini-2.0-flash-exp": {
-        "input": 0.1,
-        "output": 0.4
+        "output": 0.4,
+        "cache_read": 0.025
       },
       "gemini-2.0-flash-lite": {
         "input": 0.075,
         "output": 0.3
       },
-      "gemini-2.0-pro-exp": {
-        "input": 1.25,
-        "output": 5
-      },
       "gemini-2.5-flash": {
-        "input": 0.15,
-        "output": 0.6
+        "input": 0.3,
+        "output": 2.5,
+        "cache_read": 0.075
       },
-      "gemini-2.5-flash-exp": {
-        "input": 0.15,
-        "output": 0.6
+      "gemini-2.5-flash-image": {
+        "input": 0.3,
+        "output": 30,
+        "cache_read": 0.075
+      },
+      "gemini-2.5-flash-image-preview": {
+        "input": 0.3,
+        "output": 30,
+        "cache_read": 0.075
       },
       "gemini-2.5-flash-lite": {
         "input": 0.1,
-        "output": 0.4
+        "output": 0.4,
+        "cache_read": 0.025
+      },
+      "gemini-2.5-flash-lite-preview-06-17": {
+        "input": 0.1,
+        "output": 0.4,
+        "cache_read": 0.025
+      },
+      "gemini-2.5-flash-lite-preview-09-2025": {
+        "input": 0.1,
+        "output": 0.4,
+        "cache_read": 0.025
+      },
+      "gemini-2.5-flash-preview-04-17": {
+        "input": 0.15,
+        "output": 0.6,
+        "cache_read": 0.0375
+      },
+      "gemini-2.5-flash-preview-05-20": {
+        "input": 0.15,
+        "output": 0.6,
+        "cache_read": 0.0375
+      },
+      "gemini-2.5-flash-preview-09-2025": {
+        "input": 0.3,
+        "output": 2.5,
+        "cache_read": 0.075
+      },
+      "gemini-2.5-flash-preview-tts": {
+        "input": 0.5,
+        "output": 10
       },
       "gemini-2.5-pro": {
         "input": 1.25,
-        "output": 5
+        "output": 10,
+        "cache_read": 0.31
       },
-      "gemini-2.5-pro-exp": {
+      "gemini-2.5-pro-preview-05-06": {
         "input": 1.25,
-        "output": 5
+        "output": 10,
+        "cache_read": 0.31
       },
-      "gemini-2.5-pro-preview": {
+      "gemini-2.5-pro-preview-06-05": {
         "input": 1.25,
-        "output": 5
+        "output": 10,
+        "cache_read": 0.31
+      },
+      "gemini-2.5-pro-preview-tts": {
+        "input": 1,
+        "output": 20
       },
       "gemini-3-flash-preview": {
-        "cache_read": 0.05,
         "input": 0.5,
-        "output": 3
+        "output": 3,
+        "cache_read": 0.05
       },
       "gemini-3-pro-preview": {
-        "cache_read": 0.2,
         "input": 2,
-        "output": 12
-      },
-      "gemini-exp-1114": {
-        "input": 1.25,
-        "output": 5
+        "output": 12,
+        "cache_read": 0.2
       },
-      "gemini-exp-1121": {
-        "input": 1.25,
-        "output": 5
+      "gemini-3.1-pro-preview": {
+        "input": 2,
+        "output": 12,
+        "cache_read": 0.2
       },
-      "gemini-exp-1206": {
-        "input": 1.25,
-        "output": 5
+      "gemini-3.1-pro-preview-customtools": {
+        "input": 2,
+        "output": 12,
+        "cache_read": 0.2
       },
-      "gemini-flash-experimental": {
+      "gemini-embedding-001": {
         "input": 0.15,
-        "output": 0.6
+        "output": 0
       },
-      "gemini-pro": {
-        "input": 1.25,
-        "output": 5
-      },
-      "gemini-pro-vision": {
-        "input": 1.25,
-        "output": 5
+      "gemini-flash-latest": {
+        "input": 0.3,
+        "output": 2.5,
+        "cache_read": 0.075
       },
-      "gemini-1.0-pro": {
-        "input": 1.25,
-        "output": 5
-      },
-      "gemini-1.0-pro-vision": {
-        "input": 1.25,
-        "output": 5
-      },
-      "gemini-1.0-ultra": {
-        "input": 7,
-        "output": 21
+      "gemini-flash-lite-latest": {
+        "input": 0.1,
+        "output": 0.4,
+        "cache_read": 0.025
       },
-      "gemini-1.0-ultra-vision": {
-        "input": 7,
-        "output": 21
+      "gemini-live-2.5-flash": {
+        "input": 0.5,
+        "output": 2
       },
-      "gemini-2.0-flash-thinking-exp": {
-        "input": 0.1,
-        "output": 0.4
+      "gemini-live-2.5-flash-preview-native-audio": {
+        "input": 0.5,
+        "output": 2
       }
     },
     "moonshotai": {
-      "kimi-k2-thinking": {
-        "cache_read": 0.15,
-        "input": 0.6,
-        "output": 2.5
-      },
       "kimi-k2-0711-preview": {
-        "cache_read": 0.15,
         "input": 0.6,
-        "output": 2.5
+        "output": 2.5,
+        "cache_read": 0.15
       },
       "kimi-k2-0905-preview": {
-        "cache_read": 0.15,
         "input": 0.6,
-        "output": 2.5
+        "output": 2.5,
+        "cache_read": 0.15
+      },
+      "kimi-k2-thinking": {
+        "input": 0.6,
+        "output": 2.5,
+        "cache_read": 0.15
       },
       "kimi-k2-thinking-turbo": {
-        "cache_read": 0.15,
         "input": 1.15,
-        "output": 8
+        "output": 8,
+        "cache_read": 0.15
       },
       "kimi-k2-turbo-preview": {
-        "cache_read": 0.6,
         "input": 2.4,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.6
       },
-      "kimi-k2-chat": {
-        "cache_read": 0.15,
+      "kimi-k2.5": {
         "input": 0.6,
-        "output": 2.5
-      },
-      "moonshot-v1-auto": {
-        "input": 2.5,
-        "output": 7.5
-      },
-      "moonshot-v1-128k": {
-        "input": 2.5,
-        "output": 7.5
-      },
-      "moonshot-v1-32k": {
-        "input": 2.5,
-        "output": 7.5
+        "output": 3,
+        "cache_read": 0.1
       }
     },
     "openai": {
-      "gpt-4o": {
-        "cache_read": 1.25,
-        "input": 2.5,
-        "output": 10
+      "codex-mini-latest": {
+        "input": 1.5,
+        "output": 6,
+        "cache_read": 0.375
       },
-      "gpt-4o-2024-08-06": {
-        "cache_read": 1.25,
-        "input": 2.5,
-        "output": 10
+      "gpt-3.5-turbo": {
+        "input": 0.5,
+        "output": 1.5,
+        "cache_read": 1.25
       },
-      "gpt-4o-2024-11-20": {
-        "cache_read": 1.25,
-        "input": 2.5,
-        "output": 10
+      "gpt-4": {
+        "input": 30,
+        "output": 60
       },
-      "gpt-4o-audio-preview": {
-        "cache_read": 1.25,
-        "input": 2.5,
-        "output": 10
+      "gpt-4-turbo": {
+        "input": 10,
+        "output": 30
       },
-      "gpt-4o-mini": {
-        "cache_read": 0.075,
-        "input": 0.15,
-        "output": 0.6
+      "gpt-4.1": {
+        "input": 2,
+        "output": 8,
+        "cache_read": 0.5
       },
-      "gpt-4o-mini-2024-07-18": {
-        "cache_read": 0.075,
-        "input": 0.15,
-        "output": 0.6
+      "gpt-4.1-mini": {
+        "input": 0.4,
+        "output": 1.6,
+        "cache_read": 0.1
       },
-      "gpt-4o-mini-audio-preview": {
-        "cache_read": 0.075,
-        "input": 0.15,
-        "output": 0.6
+      "gpt-4.1-nano": {
+        "input": 0.1,
+        "output": 0.4,
+        "cache_read": 0.03
       },
-      "gpt-4o-realtime-preview": {
-        "cache_read": 1.25,
+      "gpt-4o": {
         "input": 2.5,
-        "output": 10
+        "output": 10,
+        "cache_read": 1.25
       },
-      "gpt-4o-realtime-preview-2024-10-01": {
-        "cache_read": 1.25,
+      "gpt-4o-2024-05-13": {
+        "input": 5,
+        "output": 15
+      },
+      "gpt-4o-2024-08-06": {
         "input": 2.5,
-        "output": 10
+        "output": 10,
+        "cache_read": 1.25
       },
-      "gpt-4o-realtime-preview-2024-12-17": {
-        "cache_read": 1.25,
+      "gpt-4o-2024-11-20": {
         "input": 2.5,
-        "output": 10
+        "output": 10,
+        "cache_read": 1.25
+      },
+      "gpt-4o-mini": {
+        "input": 0.15,
+        "output": 0.6,
+        "cache_read": 0.08
       },
       "gpt-5": {
-        "cache_read": 0.13,
         "input": 1.25,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.125
       },
       "gpt-5-chat-latest": {
-        "cache_read": 0.13,
         "input": 1.25,
         "output": 10
       },
       "gpt-5-codex": {
-        "cache_read": 0.13,
         "input": 1.25,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.125
       },
       "gpt-5-mini": {
-        "cache_read": 0.025,
         "input": 0.25,
-        "output": 2
+        "output": 2,
+        "cache_read": 0.025
       },
       "gpt-5-nano": {
-        "cache_read": 0.005,
         "input": 0.05,
-        "output": 0.4
+        "output": 0.4,
+        "cache_read": 0.005
       },
       "gpt-5-pro": {
-        "cache_read": 0.5,
-        "input": 5,
-        "output": 40
+        "input": 15,
+        "output": 120
       },
       "gpt-5.1": {
-        "cache_read": 0.13,
         "input": 1.25,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.13
       },
       "gpt-5.1-chat-latest": {
-        "cache_read": 0.13,
         "input": 1.25,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.125
       },
       "gpt-5.1-codex": {
-        "cache_read": 0.13,
         "input": 1.25,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.125
       },
       "gpt-5.1-codex-max": {
-        "cache_read": 0.13,
         "input": 1.25,
-        "output": 10
+        "output": 10,
+        "cache_read": 0.125
       },
       "gpt-5.1-codex-mini": {
-        "cache_read": 0.025,
         "input": 0.25,
-        "output": 2
+        "output": 2,
+        "cache_read": 0.025
       },
       "gpt-5.2": {
-        "cache_read": 0.175,
         "input": 1.75,
-        "output": 14
+        "output": 14,
+        "cache_read": 0.175
       },
       "gpt-5.2-chat-latest": {
-        "cache_read": 0.175,
         "input": 1.75,
-        "output": 14
+        "output": 14,
+        "cache_read": 0.175
       },
       "gpt-5.2-codex": {
-        "cache_read": 0.175,
         "input": 1.75,
-        "output": 14
+        "output": 14,
+        "cache_read": 0.175
+      },
+      "gpt-5.2-pro": {
+        "input": 21,
+        "output": 168
       },
       "gpt-5.3-codex": {
-        "cache_read": 0.175,
         "input": 1.75,
-        "output": 14
+        "output": 14,
+        "cache_read": 0.175
       },
-      "gpt-5.2-pro": {
-        "cache_read": 0.2,
-        "input": 2,
-        "output": 16
+      "gpt-5.3-codex-spark": {
+        "input": 1.75,
+        "output": 14,
+        "cache_read": 0.175
       },
       "o1": {
-        "cache_read": 3,
         "input": 15,
-        "output": 60
+        "output": 60,
+        "cache_read": 7.5
       },
       "o1-mini": {
-        "cache_read": 1.5,
-        "input": 3,
-        "output": 12
-      },
-      "o1-mini-2024-09-12": {
-        "cache_read": 1.5,
-        "input": 3,
-        "output": 12
+        "input": 1.1,
+        "output": 4.4,
+        "cache_read": 0.55
       },
       "o1-preview": {
-        "cache_read": 3,
         "input": 15,
-        "output": 60
+        "output": 60,
+        "cache_read": 7.5
       },
-      "o1-preview-2024-09-12": {
-        "cache_read": 3,
-        "input": 15,
-        "output": 60
+      "o1-pro": {
+        "input": 150,
+        "output": 600
+      },
+      "o3": {
+        "input": 2,
+        "output": 8,
+        "cache_read": 0.5
+      },
+      "o3-deep-research": {
+        "input": 10,
+        "output": 40,
+        "cache_read": 2.5
       },
       "o3-mini": {
-        "cache_read": 0.25,
-        "input": 1,
-        "output": 4
+        "input": 1.1,
+        "output": 4.4,
+        "cache_read": 0.55
       },
-      "o3-mini-2025-01-31": {
-        "cache_read": 0.25,
-        "input": 1,
-        "output": 4
+      "o3-pro": {
+        "input": 20,
+        "output": 80
       },
       "o4-mini": {
-        "cache_read": 0.275,
         "input": 1.1,
-        "output": 4.4
+        "output": 4.4,
+        "cache_read": 0.28
       },
-      "o4-mini-2025-04-16": {
-        "cache_read": 0.275,
-        "input": 1.1,
-        "output": 4.4
+      "o4-mini-deep-research": {
+        "input": 2,
+        "output": 8,
+        "cache_read": 0.5
       },
       "text-embedding-3-large": {
-        "input": 0.13
+        "input": 0.13,
+        "output": 0
       },
       "text-embedding-3-small": {
-        "input": 0.02
+        "input": 0.02,
+        "output": 0
       },
       "text-embedding-ada-002": {
-        "input": 0.1
+        "input": 0.1,
+        "output": 0
+      }
+    },
+    "xai": {
+      "grok-2": {
+        "input": 2,
+        "output": 10,
+        "cache_read": 2
+      },
+      "grok-2-1212": {
+        "input": 2,
+        "output": 10,
+        "cache_read": 2
+      },
+      "grok-2-latest": {
+        "input": 2,
+        "output": 10,
+        "cache_read": 2
+      },
+      "grok-2-vision": {
+        "input": 2,
+        "output": 10,
+        "cache_read": 2
+      },
+      "grok-2-vision-1212": {
+        "input": 2,
+        "output": 10,
+        "cache_read": 2
+      },
+      "grok-2-vision-latest": {
+        "input": 2,
+        "output": 10,
+        "cache_read": 2
+      },
+      "grok-3": {
+        "input": 3,
+        "output": 15,
+        "cache_read": 0.75
+      },
+      "grok-3-fast": {
+        "input": 5,
+        "output": 25,
+        "cache_read": 1.25
+      },
+      "grok-3-fast-latest": {
+        "input": 5,
+        "output": 25,
+        "cache_read": 1.25
       },
-      "whisper-1": {
-        "input": 6
+      "grok-3-latest": {
+        "input": 3,
+        "output": 15,
+        "cache_read": 0.75
+      },
+      "grok-3-mini": {
+        "input": 0.3,
+        "output": 0.5,
+        "cache_read": 0.075
+      },
+      "grok-3-mini-fast": {
+        "input": 0.6,
+        "output": 4,
+        "cache_read": 0.15
+      },
+      "grok-3-mini-fast-latest": {
+        "input": 0.6,
+        "output": 4,
+        "cache_read": 0.15
+      },
+      "grok-3-mini-latest": {
+        "input": 0.3,
+        "output": 0.5,
+        "cache_read": 0.075
+      },
+      "grok-4": {
+        "input": 3,
+        "output": 15,
+        "cache_read": 0.75
+      },
+      "grok-4-1-fast": {
+        "input": 0.2,
+        "output": 0.5,
+        "cache_read": 0.05
+      },
+      "grok-4-1-fast-non-reasoning": {
+        "input": 0.2,
+        "output": 0.5,
+        "cache_read": 0.05
+      },
+      "grok-4-fast": {
+        "input": 0.2,
+        "output": 0.5,
+        "cache_read": 0.05
+      },
+      "grok-4-fast-non-reasoning": {
+        "input": 0.2,
+        "output": 0.5,
+        "cache_read": 0.05
+      },
+      "grok-beta": {
+        "input": 5,
+        "output": 15,
+        "cache_read": 5
+      },
+      "grok-code-fast-1": {
+        "input": 0.2,
+        "output": 1.5,
+        "cache_read": 0.02
+      },
+      "grok-vision-beta": {
+        "input": 5,
+        "output": 15,
+        "cache_read": 5
       }
     },
     "zai": {
       "glm-4.5": {
-        "cache_read": 0.11,
-        "cache_write": 0,
         "input": 0.6,
-        "output": 2.2
+        "output": 2.2,
+        "cache_read": 0.11,
+        "cache_write": 0
       },
       "glm-4.5-air": {
+        "input": 0.2,
+        "output": 1.1,
         "cache_read": 0.03,
-        "cache_write": 0,
-        "input": 0.15,
-        "output": 0.6
+        "cache_write": 0
       },
       "glm-4.5-flash": {
-        "cache_read": 0.016,
-        "cache_write": 0,
-        "input": 0.08,
-        "output": 0.3
+        "input": 0,
+        "output": 0,
+        "cache_read": 0,
+        "cache_write": 0
       },
       "glm-4.5v": {
-        "cache_read": 0.11,
-        "cache_write": 0,
         "input": 0.6,
-        "output": 2.2
+        "output": 1.8
       },
       "glm-4.6": {
-        "cache_read": 0.11,
-        "cache_write": 0,
         "input": 0.6,
-        "output": 2.2
+        "output": 2.2,
+        "cache_read": 0.11,
+        "cache_write": 0
       },
       "glm-4.6v": {
-        "cache_read": 0.11,
-        "cache_write": 0,
-        "input": 0.6,
-        "output": 2.2
+        "input": 0.3,
+        "output": 0.9
       },
       "glm-4.7": {
-        "cache_read": 0.11,
-        "cache_write": 0,
         "input": 0.6,
-        "output": 2.2
+        "output": 2.2,
+        "cache_read": 0.11,
+        "cache_write": 0
+      },
+      "glm-4.7-flash": {
+        "input": 0,
+        "output": 0,
+        "cache_read": 0,
+        "cache_write": 0
+      },
+      "glm-5": {
+        "input": 1,
+        "output": 3.2,
+        "cache_read": 0.2,
+        "cache_write": 0
       }
     }
   }
diff --git a/src/lib/modelsdev-pricing.ts b/src/lib/modelsdev-pricing.ts
index 8f23449..e4c556e 100644
--- a/src/lib/modelsdev-pricing.ts
+++ b/src/lib/modelsdev-pricing.ts
@@ -1,4 +1,17 @@
 import { readFileSync } from "fs";
+import { mkdir, readFile, rename, rm, writeFile } from "fs/promises";
+import { dirname, join } from "path";
+
+import { fetchWithTimeout } from "./http.js";
+import { getOpencodeRuntimeDirs, type OpencodeRuntimeDirs } from "./opencode-runtime-paths.js";
+
+const SOURCE_URL = "https://models.dev/api.json";
+const DEFAULT_MODELSDEV_PROVIDERS = ["anthropic", "google", "moonshotai", "openai", "xai", "zai"];
+const COST_KEYS = ["input", "output", "cache_read", "cache_write"] as const;
+const RUNTIME_SNAPSHOT_FILENAME = "modelsdev-pricing.runtime.min.json";
+const RUNTIME_REFRESH_STATE_FILENAME = "modelsdev-pricing.refresh-state.json";
+const DEFAULT_REFRESH_MIN_ATTEMPT_INTERVAL_MS = 6 * 60 * 60 * 1000;
+const DEFAULT_REFRESH_TIMEOUT_MS = 6_000;
 
 export type CostBuckets = {
   input?: number;
@@ -8,7 +21,7 @@ export type CostBuckets = {
   reasoning?: number;
 };
 
-type Snapshot = {
+export type PricingSnapshot = {
   _meta: {
     source: string;
     generatedAt: number;
@@ -18,15 +31,229 @@ type Snapshot = {
   providers: Record<string, Record<string, CostBuckets>>;
 };
 
-let SNAPSHOT: Snapshot | null = null;
+export const DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS = 3 * 24 * 60 * 60 * 1000;
+
+export type PricingSnapshotHealth = {
+  generatedAt: number;
+  ageMs: number;
+  maxAgeMs: number;
+  stale: boolean;
+};
+
+export interface PricingRefreshStateV1 {
+  version: 1;
+  updatedAt: number;
+  lastAttemptAt?: number;
+  lastSuccessAt?: number;
+  lastFailureAt?: number;
+  lastResult?: "success" | "not_modified" | "skipped_fresh" | "skipped_throttled" | "failed";
+  lastError?: string;
+  etag?: string;
+  lastModified?: string;
+}
+
+export interface PricingRefreshPolicy {
+  enabled: boolean;
+  maxAgeMs: number;
+  minAttemptIntervalMs: number;
+  timeoutMs: number;
+}
+
+export interface PricingRefreshOptions {
+  reason?: "init" | "tokens" | "status";
+  force?: boolean;
+  nowMs?: number;
+  maxAgeMs?: number;
+  minAttemptIntervalMs?: number;
+  timeoutMs?: number;
+  runtimeDirs?: OpencodeRuntimeDirs;
+  fetchFn?: typeof fetch;
+  bootstrapSnapshotOverride?: PricingSnapshot;
+  providerAllowlist?: string[];
+}
+
+export interface PricingRefreshResult {
+  attempted: boolean;
+  updated: boolean;
+  state: PricingRefreshStateV1;
+  error?: string;
+  reason?: string;
+}
+
+const EMPTY_SNAPSHOT: PricingSnapshot = {
+  _meta: {
+    source: "none",
+    generatedAt: 0,
+    providers: [],
+    units: "USD per 1M tokens",
+  },
+  providers: {},
+};
+
+let SNAPSHOT: PricingSnapshot | null = null;
+let SNAPSHOT_SOURCE: "runtime" | "bundled" | "empty" = "bundled";
 let MODEL_INDEX: Map<string, string[]> | null = null;
+let REFRESH_IN_FLIGHT: Promise<PricingRefreshResult> | null = null;
+let PROCESS_REFRESH_CHECKED = false;
+
+function asRecord(value: unknown): Record<string, unknown> | null {
+  return value && typeof value === "object" ? (value as Record<string, unknown>) : null;
+}
+
+function sortRecordByKeys<T>(obj: Record<string, T>): Record<string, T> {
+  const out: Record<string, T> = {};
+  for (const key of Object.keys(obj).sort((a, b) => a.localeCompare(b))) {
+    out[key] = obj[key];
+  }
+  return out;
+}
+
+function parseEnabled(value: string | undefined): boolean {
+  if (!value) return true;
+  const normalized = value.trim().toLowerCase();
+  return !["0", "false", "no", "off"].includes(normalized);
+}
+
+function parseMaxAgeMs(value: string | undefined): number {
+  if (!value) return DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS;
+  const days = Number(value);
+  if (!Number.isFinite(days) || days <= 0) return DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS;
+  return Math.floor(days * 24 * 60 * 60 * 1000);
+}
+
+export function getPricingRefreshPolicy(env: NodeJS.ProcessEnv = process.env): PricingRefreshPolicy {
+  return {
+    enabled: parseEnabled(env.OPENCODE_QUOTA_PRICING_AUTO_REFRESH),
+    maxAgeMs: parseMaxAgeMs(env.OPENCODE_QUOTA_PRICING_MAX_AGE_DAYS),
+    minAttemptIntervalMs: DEFAULT_REFRESH_MIN_ATTEMPT_INTERVAL_MS,
+    timeoutMs: DEFAULT_REFRESH_TIMEOUT_MS,
+  };
+}
+
+function normalizeSnapshot(raw: unknown): PricingSnapshot | null {
+  const root = asRecord(raw);
+  if (!root) return null;
+
+  const metaRaw = asRecord(root._meta);
+  const providersRaw = asRecord(root.providers);
+  if (!metaRaw || !providersRaw) return null;
+
+  const generatedAt = Number(metaRaw.generatedAt);
+  if (!Number.isFinite(generatedAt) || generatedAt <= 0) return null;
+
+  const providers: Record<string, Record<string, CostBuckets>> = {};
+
+  for (const providerId of Object.keys(providersRaw)) {
+    const modelsRaw = asRecord(providersRaw[providerId]);
+    if (!modelsRaw) continue;
+
+    const models: Record<string, CostBuckets> = {};
+    for (const modelId of Object.keys(modelsRaw)) {
+      const modelRaw = asRecord(modelsRaw[modelId]);
+      if (!modelRaw) continue;
+
+      const buckets: CostBuckets = {};
+      const input = modelRaw.input;
+      const output = modelRaw.output;
+      const cacheRead = modelRaw.cache_read;
+      const cacheWrite = modelRaw.cache_write;
+      const reasoning = modelRaw.reasoning;
+
+      if (typeof input === "number" && Number.isFinite(input)) buckets.input = input;
+      if (typeof output === "number" && Number.isFinite(output)) buckets.output = output;
+      if (typeof cacheRead === "number" && Number.isFinite(cacheRead)) buckets.cache_read = cacheRead;
+      if (typeof cacheWrite === "number" && Number.isFinite(cacheWrite)) {
+        buckets.cache_write = cacheWrite;
+      }
+      if (typeof reasoning === "number" && Number.isFinite(reasoning)) buckets.reasoning = reasoning;
+
+      if (Object.keys(buckets).length > 0) {
+        models[modelId] = buckets;
+      }
+    }
+
+    if (Object.keys(models).length > 0) {
+      providers[providerId] = sortRecordByKeys(models);
+    }
+  }
 
-function ensureLoaded(): Snapshot {
+  const providerList = Object.keys(providers).sort((a, b) => a.localeCompare(b));
+
+  return {
+    _meta: {
+      source: typeof metaRaw.source === "string" && metaRaw.source ? metaRaw.source : SOURCE_URL,
+      generatedAt: Math.trunc(generatedAt),
+      providers: providerList,
+      units:
+        typeof metaRaw.units === "string" && metaRaw.units ? metaRaw.units : "USD per 1M tokens",
+    },
+    providers: sortRecordByKeys(providers),
+  };
+}
+
+function loadBundledSnapshotSync(override?: PricingSnapshot): PricingSnapshot {
+  if (override) {
+    return normalizeSnapshot(override) ?? EMPTY_SNAPSHOT;
+  }
+
+  try {
+    const url = new URL("../data/modelsdev-pricing.min.json", import.meta.url);
+    const raw = readFileSync(url, "utf-8");
+    return normalizeSnapshot(JSON.parse(raw)) ?? EMPTY_SNAPSHOT;
+  } catch {
+    return EMPTY_SNAPSHOT;
+  }
+}
+
+export function getRuntimePricingSnapshotPath(runtimeDirs?: OpencodeRuntimeDirs): string {
+  const dirs = runtimeDirs ?? getOpencodeRuntimeDirs();
+  return join(dirs.cacheDir, "opencode-quota", RUNTIME_SNAPSHOT_FILENAME);
+}
+
+export function getRuntimePricingRefreshStatePath(runtimeDirs?: OpencodeRuntimeDirs): string {
+  const dirs = runtimeDirs ?? getOpencodeRuntimeDirs();
+  return join(dirs.cacheDir, "opencode-quota", RUNTIME_REFRESH_STATE_FILENAME);
+}
+
+function loadRuntimeSnapshotSync(runtimeDirs?: OpencodeRuntimeDirs): PricingSnapshot | null {
+  const path = getRuntimePricingSnapshotPath(runtimeDirs);
+  try {
+    const raw = readFileSync(path, "utf-8");
+    return normalizeSnapshot(JSON.parse(raw));
+  } catch {
+    return null;
+  }
+}
+
+function chooseSnapshot(params?: {
+  runtimeDirs?: OpencodeRuntimeDirs;
+  bootstrapSnapshotOverride?: PricingSnapshot;
+}): { snapshot: PricingSnapshot; source: "runtime" | "bundled" | "empty" } {
+  const bundled = loadBundledSnapshotSync(params?.bootstrapSnapshotOverride);
+  const runtime = loadRuntimeSnapshotSync(params?.runtimeDirs);
+
+  if (runtime && runtime._meta.generatedAt >= bundled._meta.generatedAt) {
+    return { snapshot: runtime, source: "runtime" };
+  }
+
+  if (bundled._meta.generatedAt > 0) {
+    return { snapshot: bundled, source: "bundled" };
+  }
+
+  return { snapshot: EMPTY_SNAPSHOT, source: "empty" };
+}
+
+function setSnapshot(snapshot: PricingSnapshot, source: "runtime" | "bundled" | "empty"): void {
+  SNAPSHOT = snapshot;
+  SNAPSHOT_SOURCE = source;
+  MODEL_INDEX = null;
+}
+
+function ensureLoaded(): PricingSnapshot {
   if (SNAPSHOT) return SNAPSHOT;
-  const url = new URL("../data/modelsdev-pricing.min.json", import.meta.url);
-  const raw = readFileSync(url, "utf-8");
-  SNAPSHOT = JSON.parse(raw) as Snapshot;
-  return SNAPSHOT;
+  const selected = chooseSnapshot();
+  setSnapshot(selected.snapshot, selected.source);
+  return selected.snapshot;
 }
 
 function ensureModelIndex(): Map<string, string[]> {
@@ -47,10 +274,408 @@ function ensureModelIndex(): Map<string, string[]> {
   return idx;
 }
 
-export function getPricingSnapshotMeta(): Snapshot["_meta"] {
+function normalizeRefreshState(raw: unknown): PricingRefreshStateV1 | null {
+  const obj = asRecord(raw);
+  if (!obj) return null;
+
+  const version = Number(obj.version);
+  const updatedAt = Number(obj.updatedAt);
+  if (version !== 1 || !Number.isFinite(updatedAt) || updatedAt <= 0) return null;
+
+  const out: PricingRefreshStateV1 = {
+    version: 1,
+    updatedAt: Math.trunc(updatedAt),
+  };
+
+  const lastAttemptAt = Number(obj.lastAttemptAt);
+  const lastSuccessAt = Number(obj.lastSuccessAt);
+  const lastFailureAt = Number(obj.lastFailureAt);
+
+  if (Number.isFinite(lastAttemptAt) && lastAttemptAt > 0) out.lastAttemptAt = Math.trunc(lastAttemptAt);
+  if (Number.isFinite(lastSuccessAt) && lastSuccessAt > 0) out.lastSuccessAt = Math.trunc(lastSuccessAt);
+  if (Number.isFinite(lastFailureAt) && lastFailureAt > 0) out.lastFailureAt = Math.trunc(lastFailureAt);
+
+  if (typeof obj.lastResult === "string") {
+    const allowed = new Set(["success", "not_modified", "skipped_fresh", "skipped_throttled", "failed"]);
+    if (allowed.has(obj.lastResult)) {
+      out.lastResult = obj.lastResult as PricingRefreshStateV1["lastResult"];
+    }
+  }
+
+  if (typeof obj.lastError === "string" && obj.lastError) out.lastError = obj.lastError;
+  if (typeof obj.etag === "string" && obj.etag) out.etag = obj.etag;
+  if (typeof obj.lastModified === "string" && obj.lastModified) out.lastModified = obj.lastModified;
+
+  return out;
+}
+
+async function writeJsonAtomic(path: string, data: unknown): Promise<void> {
+  const dir = dirname(path);
+  const tmp = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2)}`;
+
+  await mkdir(dir, { recursive: true });
+  await writeFile(tmp, `${JSON.stringify(data, null, 2)}\n`, "utf-8");
+
+  const safeRm = async (target: string): Promise<void> => {
+    try {
+      await rm(target, { force: true });
+    } catch {
+      // best-effort cleanup
+    }
+  };
+
+  try {
+    await rename(tmp, path);
+  } catch (err) {
+    const code =
+      err && typeof err === "object" && "code" in err
+        ? String((err as { code?: unknown }).code)
+        : "";
+    const shouldRetryAsReplace =
+      code === "EPERM" || code === "EEXIST" || code === "EACCES" || code === "ENOTEMPTY";
+
+    if (!shouldRetryAsReplace) {
+      await safeRm(tmp);
+      throw err;
+    }
+
+    await safeRm(path);
+    await rename(tmp, path);
+  }
+}
+
+async function readRefreshState(path: string): Promise<PricingRefreshStateV1 | null> {
+  try {
+    const raw = await readFile(path, "utf-8");
+    return normalizeRefreshState(JSON.parse(raw));
+  } catch {
+    return null;
+  }
+}
+
+export async function readPricingRefreshState(
+  runtimeDirs?: OpencodeRuntimeDirs,
+): Promise<PricingRefreshStateV1 | null> {
+  return await readRefreshState(getRuntimePricingRefreshStatePath(runtimeDirs));
+}
+
+function makeDefaultRefreshState(nowMs: number): PricingRefreshStateV1 {
+  return {
+    version: 1,
+    updatedAt: nowMs,
+  };
+}
+
+function pickCostBuckets(rawCost: unknown): CostBuckets | null {
+  const obj = asRecord(rawCost);
+  if (!obj) return null;
+
+  const picked: CostBuckets = {};
+  for (const key of COST_KEYS) {
+    const value = obj[key];
+    if (typeof value === "number" && Number.isFinite(value)) {
+      picked[key] = value;
+    }
+  }
+
+  return Object.keys(picked).length > 0 ? picked : null;
+}
+
+function buildSnapshotFromApi(
+  apiRaw: unknown,
+  providerIDs: string[],
+  generatedAt: number,
+): PricingSnapshot {
+  const api = asRecord(apiRaw) ?? {};
+  const providers: Record<string, Record<string, CostBuckets>> = {};
+
+  for (const providerID of providerIDs) {
+    const providerNode = asRecord(api[providerID]);
+    const models = asRecord(providerNode?.models);
+    if (!models) continue;
+
+    const pricedModels: Record<string, CostBuckets> = {};
+    for (const modelID of Object.keys(models)) {
+      const modelNode = asRecord(models[modelID]);
+      const cost = pickCostBuckets(modelNode?.cost);
+      if (cost) {
+        pricedModels[modelID] = cost;
+      }
+    }
+
+    if (Object.keys(pricedModels).length > 0) {
+      providers[providerID] = sortRecordByKeys(pricedModels);
+    }
+  }
+
+  const providerList = Object.keys(providers).sort((a, b) => a.localeCompare(b));
+
+  return {
+    _meta: {
+      generatedAt,
+      providers: providerList,
+      source: SOURCE_URL,
+      units: "USD per 1M tokens",
+    },
+    providers: sortRecordByKeys(providers),
+  };
+}
+
+function countPricedModels(snapshot: PricingSnapshot): number {
+  let total = 0;
+  for (const models of Object.values(snapshot.providers)) {
+    total += Object.keys(models).length;
+  }
+  return total;
+}
+
+function getErrorMessage(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
+async function fetchModelsDevSnapshot(params: {
+  timeoutMs: number;
+  state: PricingRefreshStateV1;
+  fetchFn?: typeof fetch;
+}): Promise<Response> {
+  const headers = new Headers();
+  if (params.state.etag) headers.set("If-None-Match", params.state.etag);
+  if (params.state.lastModified) headers.set("If-Modified-Since", params.state.lastModified);
+
+  if (!params.fetchFn) {
+    return await fetchWithTimeout(SOURCE_URL, { headers }, params.timeoutMs);
+  }
+
+  const controller = new AbortController();
+  const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs);
+
+  try {
+    return await params.fetchFn(SOURCE_URL, {
+      headers,
+      signal: controller.signal,
+    });
+  } finally {
+    clearTimeout(timeoutId);
+  }
+}
+
+export async function maybeRefreshPricingSnapshot(
+  opts: PricingRefreshOptions = {},
+): Promise<PricingRefreshResult> {
+  if (REFRESH_IN_FLIGHT) return REFRESH_IN_FLIGHT;
+
+  REFRESH_IN_FLIGHT = (async (): Promise<PricingRefreshResult> => {
+    const nowMs = opts.nowMs ?? Date.now();
+    const policy = getPricingRefreshPolicy(process.env);
+    const maxAgeMs = opts.maxAgeMs ?? policy.maxAgeMs;
+    const minAttemptIntervalMs = opts.minAttemptIntervalMs ?? policy.minAttemptIntervalMs;
+    const timeoutMs = opts.timeoutMs ?? policy.timeoutMs;
+    const runtimeDirs = opts.runtimeDirs;
+    const snapshotPath = getRuntimePricingSnapshotPath(runtimeDirs);
+    const statePath = getRuntimePricingRefreshStatePath(runtimeDirs);
+    const force = opts.force === true;
+
+    const previousState = (await readRefreshState(statePath)) ?? makeDefaultRefreshState(nowMs);
+
+    if (!force && PROCESS_REFRESH_CHECKED) {
+      return {
+        attempted: false,
+        updated: false,
+        reason: "already_checked_this_process",
+        state: previousState,
+      };
+    }
+
+    PROCESS_REFRESH_CHECKED = true;
+
+    const selected = chooseSnapshot({
+      runtimeDirs,
+      bootstrapSnapshotOverride: opts.bootstrapSnapshotOverride,
+    });
+
+    setSnapshot(selected.snapshot, selected.source);
+
+    const health = getPricingSnapshotHealth({ nowMs, maxAgeMs });
+
+    if (!force && !policy.enabled) {
+      return {
+        attempted: false,
+        updated: false,
+        reason: "disabled",
+        state: {
+          ...previousState,
+          updatedAt: nowMs,
+          lastResult: "skipped_fresh",
+        },
+      };
+    }
+
+    if (!force && !health.stale) {
+      return {
+        attempted: false,
+        updated: false,
+        reason: "fresh",
+        state: {
+          ...previousState,
+          updatedAt: nowMs,
+          lastResult: "skipped_fresh",
+        },
+      };
+    }
+
+    if (!force && previousState.lastAttemptAt && nowMs - previousState.lastAttemptAt < minAttemptIntervalMs) {
+      return {
+        attempted: false,
+        updated: false,
+        reason: "throttled",
+        state: {
+          ...previousState,
+          updatedAt: nowMs,
+          lastResult: "skipped_throttled",
+        },
+      };
+    }
+
+    const attemptingState: PricingRefreshStateV1 = {
+      ...previousState,
+      version: 1,
+      updatedAt: nowMs,
+      lastAttemptAt: nowMs,
+    };
+
+    try {
+      const response = await fetchModelsDevSnapshot({
+        timeoutMs,
+        state: attemptingState,
+        fetchFn: opts.fetchFn,
+      });
+
+      if (response.status === 304) {
+        const activeSnapshot = ensureLoaded();
+        const refreshedSnapshot: PricingSnapshot = {
+          _meta: {
+            ...activeSnapshot._meta,
+            generatedAt: nowMs,
+          },
+          providers: activeSnapshot.providers,
+        };
+        await writeJsonAtomic(snapshotPath, refreshedSnapshot);
+        setSnapshot(refreshedSnapshot, "runtime");
+
+        const nextState: PricingRefreshStateV1 = {
+          ...attemptingState,
+          updatedAt: nowMs,
+          lastSuccessAt: nowMs,
+          lastResult: "not_modified",
+          lastError: undefined,
+          etag: response.headers.get("etag") ?? attemptingState.etag,
+          lastModified: response.headers.get("last-modified") ?? attemptingState.lastModified,
+        };
+        try {
+          await writeJsonAtomic(statePath, nextState);
+        } catch {
+          // best effort; keep refreshed in-memory/runtime snapshot active
+        }
+        return {
+          attempted: true,
+          updated: true,
+          state: nextState,
+        };
+      }
+
+      if (!response.ok) {
+        throw new Error(`Failed to fetch ${SOURCE_URL}: ${response.status} ${response.statusText}`);
+      }
+
+      const api = await response.json();
+      const snapshot = buildSnapshotFromApi(
+        api,
+        opts.providerAllowlist ?? DEFAULT_MODELSDEV_PROVIDERS,
+        nowMs,
+      );
+      if (countPricedModels(snapshot) === 0) {
+        throw new Error("Refusing to persist empty pricing snapshot from models.dev");
+      }
+
+      await writeJsonAtomic(snapshotPath, snapshot);
+      setSnapshot(snapshot, "runtime");
+
+      const nextState: PricingRefreshStateV1 = {
+        ...attemptingState,
+        updatedAt: nowMs,
+        lastSuccessAt: nowMs,
+        lastResult: "success",
+        lastError: undefined,
+        etag: response.headers.get("etag") ?? attemptingState.etag,
+        lastModified: response.headers.get("last-modified") ?? attemptingState.lastModified,
+      };
+
+      try {
+        await writeJsonAtomic(statePath, nextState);
+      } catch {
+        // best effort; snapshot has already been updated
+      }
+
+      return {
+        attempted: true,
+        updated: true,
+        state: nextState,
+      };
+    } catch (error) {
+      const errorMessage = getErrorMessage(error);
+      const nextState: PricingRefreshStateV1 = {
+        ...attemptingState,
+        updatedAt: nowMs,
+        lastFailureAt: nowMs,
+        lastResult: "failed",
+        lastError: errorMessage,
+      };
+
+      try {
+        await writeJsonAtomic(statePath, nextState);
+      } catch {
+        // best effort; report original fetch/refresh error
+      }
+
+      return {
+        attempted: true,
+        updated: false,
+        error: errorMessage,
+        state: nextState,
+      };
+    }
+  })().finally(() => {
+    REFRESH_IN_FLIGHT = null;
+  });
+
+  return REFRESH_IN_FLIGHT;
+}
+
+export function getPricingSnapshotMeta(): PricingSnapshot["_meta"] {
   return ensureLoaded()._meta;
 }
 
+export function getPricingSnapshotSource(): "runtime" | "bundled" | "empty" {
+  ensureLoaded();
+  return SNAPSHOT_SOURCE;
+}
+
+export function getPricingSnapshotHealth(opts?: {
+  nowMs?: number;
+  maxAgeMs?: number;
+}): PricingSnapshotHealth {
+  const generatedAt = getPricingSnapshotMeta().generatedAt;
+  const nowMs = opts?.nowMs ?? Date.now();
+  const maxAgeMs = opts?.maxAgeMs ?? DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS;
+  const ageMs = Math.max(0, nowMs - generatedAt);
+  return {
+    generatedAt,
+    ageMs,
+    maxAgeMs,
+    stale: ageMs > maxAgeMs,
+  };
+}
+
 export function hasProvider(providerId: string): boolean {
   return !!ensureLoaded().providers[providerId];
 }
@@ -103,3 +728,11 @@ export function lookupCost(providerId: string, modelId: string): CostBuckets | n
 export function hasCost(providerId: string, modelId: string): boolean {
   return lookupCost(providerId, modelId) != null;
 }
+
+export function __resetPricingSnapshotForTests(): void {
+  SNAPSHOT = null;
+  SNAPSHOT_SOURCE = "bundled";
+  MODEL_INDEX = null;
+  REFRESH_IN_FLIGHT = null;
+  PROCESS_REFRESH_CHECKED = false;
+}
diff --git a/src/lib/quota-stats.ts b/src/lib/quota-stats.ts
index 15668ee..1ee16a9 100644
--- a/src/lib/quota-stats.ts
+++ b/src/lib/quota-stats.ts
@@ -6,6 +6,7 @@ import {
   SessionNotFoundError,
 } from "./opencode-storage.js";
 import {
+  hasCost,
   hasProvider,
   hasModel,
   isModelsDevProviderId,
@@ -151,6 +152,29 @@ function normalizeModelId(raw: string): string {
   return s;
 }
 
+function stripFreeSuffix(modelId: string): string | null {
+  if (!modelId.toLowerCase().endsWith("-free")) return null;
+  const stripped = modelId.slice(0, -"-free".length);
+  return stripped || null;
+}
+
+function freeSuffixCandidates(modelId: string): string[] {
+  const candidates = [modelId];
+  const stripped = stripFreeSuffix(modelId);
+  if (stripped) candidates.push(stripped);
+  return candidates.filter((value, index, list) => list.indexOf(value) === index);
+}
+
+function pickBestModelForProvider(providerID: string, candidates: readonly string[]): string | null {
+  for (const candidate of candidates) {
+    if (hasCost(providerID, candidate)) return candidate;
+  }
+  for (const candidate of candidates) {
+    if (hasModel(providerID, candidate)) return candidate;
+  }
+  return null;
+}
+
 function parseModelIdHint(rawModelId?: string): { providerHint?: string; modelPart?: string } {
   if (!rawModelId || typeof rawModelId !== "string") return {};
   const trimmed = rawModelId.trim();
@@ -232,9 +256,21 @@ function anthropicPricingCandidates(model: string): string[] {
   return [model];
 }
 
+function moonshotaiPricingCandidates(model: string): string[] {
+  const candidates: string[] = [];
+  for (const freeCandidate of freeSuffixCandidates(model)) {
+    candidates.push(freeCandidate);
+    if (freeCandidate.includes(".")) {
+      candidates.push(freeCandidate.replace(/\./g, "-"));
+    }
+  }
+  return candidates.filter((value, index, list) => list.indexOf(value) === index);
+}
+
 function resolveModelForProvider(providerID: string, normalizedModel: string): string | null {
   if (!isModelsDevProviderId(providerID)) return null;
-  if (hasModel(providerID, normalizedModel)) return normalizedModel;
+  const preferredDirect = pickBestModelForProvider(providerID, freeSuffixCandidates(normalizedModel));
+  if (preferredDirect) return preferredDirect;
 
   // Some source ids include "-thinking" while snapshot keeps a base key (or vice versa).
   if (normalizedModel.toLowerCase().endsWith("-thinking")) {
@@ -247,6 +283,14 @@ function resolveModelForProvider(providerID: string, normalizedModel: string): s
     if (hasModel("moonshotai", "kimi-k2-thinking")) return "kimi-k2-thinking";
   }
 
+  if (providerID === "moonshotai") {
+    const preferredMoonshot = pickBestModelForProvider(
+      "moonshotai",
+      moonshotaiPricingCandidates(normalizedModel),
+    );
+    if (preferredMoonshot) return preferredMoonshot;
+  }
+
   // Gemini naming fallback: some logs omit -preview.
   if (providerID === "google") {
     if (normalizedModel === "gemini-3-pro" && hasModel("google", "gemini-3-pro-preview")) {
@@ -301,9 +345,10 @@ export function resolvePricingKey(source: {
   const tryProvider = (
     providerID: string | undefined,
     method: "source_provider" | "model_prefix" | "alias_fallback",
+    modelIDHint: string = normalizedModel,
   ): PricingResolution | null => {
     if (!providerID) return null;
-    const modelID = resolveModelForProvider(providerID, normalizedModel);
+    const modelID = resolveModelForProvider(providerID, modelIDHint);
     if (!modelID) return null;
     return { ok: true, key: { provider: providerID, model: modelID }, method };
   };
@@ -314,43 +359,74 @@ export function resolvePricingKey(source: {
   const fromModelPrefix = tryProvider(modelProviderHint, "model_prefix");
   if (fromModelPrefix) return fromModelPrefix;
 
-  const providerCandidates = listProvidersForModelId(normalizedModel);
-  if (providerCandidates.length === 1) {
-    const provider = providerCandidates[0]!;
-    return {
-      ok: true,
-      key: { provider, model: normalizedModel },
-      method: "unique_model",
-    };
+  const modelCandidates = freeSuffixCandidates(normalizedModel);
+  let ambiguousMatch: { model: string; providerCandidates: string[] } | null = null;
+
+  for (const candidateModel of modelCandidates) {
+    const providerCandidates = listProvidersForModelId(candidateModel);
+    if (providerCandidates.length === 1) {
+      const provider = providerCandidates[0]!;
+      return {
+        ok: true,
+        key: { provider, model: candidateModel },
+        method: "unique_model",
+      };
+    }
+
+    if (providerCandidates.length > 1) {
+      const inferredAmbiguousProvider = inferOfficialProviderFromModelId(candidateModel);
+      if (inferredAmbiguousProvider && providerCandidates.includes(inferredAmbiguousProvider)) {
+        const inferredFromAmbiguous = tryProvider(
+          inferredAmbiguousProvider,
+          "alias_fallback",
+          candidateModel,
+        );
+        if (inferredFromAmbiguous) return inferredFromAmbiguous;
+      }
+
+      if (!ambiguousMatch) {
+        ambiguousMatch = {
+          model: candidateModel,
+          providerCandidates: [...providerCandidates].sort((a, b) => a.localeCompare(b)),
+        };
+      }
+    }
   }
-  if (providerCandidates.length > 1) {
-    const sortedCandidates = [...providerCandidates].sort((a, b) => a.localeCompare(b));
+
+  if (ambiguousMatch) {
     return {
       ok: false,
       unknown: {
         sourceProviderID: srcProvider,
         sourceModelID: srcModel,
-        mappedModel: normalizedModel,
-        normalizedModelID: normalizedModel,
-        providerCandidates: sortedCandidates,
+        mappedModel: ambiguousMatch.model,
+        normalizedModelID: ambiguousMatch.model,
+        providerCandidates: ambiguousMatch.providerCandidates,
         reason: "ambiguous_model",
       },
     };
   }
 
-  const inferredProvider = inferOfficialProviderFromModelId(normalizedModel);
-  const inferred = tryProvider(inferredProvider ?? undefined, "alias_fallback");
-  if (inferred) return inferred;
+  let inferredMissing: { provider: string; model: string } | null = null;
+  for (const candidateModel of modelCandidates) {
+    const inferredProvider = inferOfficialProviderFromModelId(candidateModel);
+    const inferred = tryProvider(inferredProvider ?? undefined, "alias_fallback", candidateModel);
+    if (inferred) return inferred;
+
+    if (inferredProvider && !inferredMissing) {
+      inferredMissing = { provider: inferredProvider, model: candidateModel };
+    }
+  }
 
-  if (inferredProvider) {
+  if (inferredMissing) {
     return {
       ok: false,
       unknown: {
         sourceProviderID: srcProvider,
         sourceModelID: srcModel,
-        mappedProvider: inferredProvider,
-        mappedModel: normalizedModel,
-        normalizedModelID: normalizedModel,
+        mappedProvider: inferredMissing.provider,
+        mappedModel: inferredMissing.model,
+        normalizedModelID: inferredMissing.model,
         reason: "missing_provider",
       },
     };
diff --git a/src/lib/quota-status.ts b/src/lib/quota-status.ts
index 5adff1a..a25e980 100644
--- a/src/lib/quota-status.ts
+++ b/src/lib/quota-status.ts
@@ -13,10 +13,16 @@ import {
 } from "./qwen-local-quota.js";
 import { hasQwenOAuthAuth } from "./qwen-auth.js";
 import {
+  getPricingSnapshotHealth,
+  getPricingRefreshPolicy,
   getPricingSnapshotMeta,
+  getPricingSnapshotSource,
+  getRuntimePricingRefreshStatePath,
+  getRuntimePricingSnapshotPath,
   listProviders,
   getProviderModelCount,
   hasProvider as snapshotHasProvider,
+  readPricingRefreshState,
 } from "./modelsdev-pricing.js";
 import { getProviders } from "../providers/registry.js";
 import { getPackageVersion } from "./version.js";
@@ -416,12 +422,36 @@ export async function buildQuotaStatusReport(params: {
   const meta = getPricingSnapshotMeta();
   const providers = listProviders();
   const coverage = computePricingCoverageFromAgg(agg);
+  const refreshPolicy = getPricingRefreshPolicy(process.env);
+  const health = getPricingSnapshotHealth({
+    maxAgeMs: refreshPolicy.maxAgeMs,
+  });
+  const snapshotSource = getPricingSnapshotSource();
+  const runtimeSnapshotPath = getRuntimePricingSnapshotPath();
+  const refreshStatePath = getRuntimePricingRefreshStatePath();
+  const pricingRefreshState = await readPricingRefreshState();
 
   lines.push("");
   lines.push("pricing_snapshot:");
   lines.push(`- source: ${meta.source}`);
+  lines.push(`- active_source: ${snapshotSource}`);
   lines.push(`- generatedAt: ${new Date(meta.generatedAt).toISOString()}`);
   lines.push(`- units: ${meta.units}`);
+  lines.push(`- runtime_snapshot_path: ${runtimeSnapshotPath}`);
+  lines.push(`- refresh_state_path: ${refreshStatePath}`);
+  lines.push(
+    `- staleness: age_ms=${fmtInt(health.ageMs)} max_age_ms=${fmtInt(health.maxAgeMs)} stale=${health.stale ? "true" : "false"}`,
+  );
+  if (pricingRefreshState) {
+    lines.push(
+      `- refresh: last_attempt_at=${pricingRefreshState.lastAttemptAt ? new Date(pricingRefreshState.lastAttemptAt).toISOString() : "(none)"} last_success_at=${pricingRefreshState.lastSuccessAt ? new Date(pricingRefreshState.lastSuccessAt).toISOString() : "(none)"} last_failure_at=${pricingRefreshState.lastFailureAt ? new Date(pricingRefreshState.lastFailureAt).toISOString() : "(none)"} last_result=${pricingRefreshState.lastResult ?? "(none)"}`,
+    );
+    if (pricingRefreshState.lastError) {
+      lines.push(`- refresh_error: ${pricingRefreshState.lastError}`);
+    }
+  } else {
+    lines.push("- refresh: (no runtime refresh state yet)");
+  }
   lines.push(`- providers: ${providers.join(",")}`);
   lines.push(
     `- coverage_seen: priced_keys=${fmtInt(coverage.totals.pricedKeysSeen)} mapped_but_missing=${fmtInt(coverage.totals.mappedMissingKeysSeen)} unpriced_keys=${fmtInt(coverage.totals.unpricedKeysSeen)}`,
diff --git a/src/plugin.ts b/src/plugin.ts
index ec16c09..7c0e25e 100644
--- a/src/plugin.ts
+++ b/src/plugin.ts
@@ -27,6 +27,7 @@ import { aggregateUsage } from "./lib/quota-stats.js";
 import { fetchSessionTokensForDisplay } from "./lib/session-tokens.js";
 import { formatQuotaStatsReport } from "./lib/quota-stats-format.js";
 import { buildQuotaStatusReport, type SessionTokenError } from "./lib/quota-status.js";
+import { maybeRefreshPricingSnapshot } from "./lib/modelsdev-pricing.js";
 import { refreshGoogleTokensForAllAccounts } from "./lib/google.js";
 import { getQuotaProviderDisplayLabel } from "./lib/provider-metadata.js";
 import { hasQwenOAuthAuthCached, isQwenCodeModelId } from "./lib/qwen-auth.js";
@@ -440,9 +441,38 @@ export const QuotaToastPlugin: Plugin = async ({ client }) => {
     return configInFlight;
   }
 
+  async function kickPricingRefresh(params: {
+    reason: "init" | "tokens" | "status";
+    maxWaitMs?: number;
+  }): Promise<void> {
+    try {
+      const refreshPromise = maybeRefreshPricingSnapshot({ reason: params.reason });
+      const guardedRefreshPromise = refreshPromise.catch(() => undefined);
+      if (!params.maxWaitMs || params.maxWaitMs <= 0) {
+        void guardedRefreshPromise;
+        return;
+      }
+
+      await Promise.race([
+        guardedRefreshPromise,
+        new Promise<void>((resolve) => {
+          setTimeout(resolve, params.maxWaitMs);
+        }),
+      ]);
+    } catch (error) {
+      await log("Pricing refresh failed", {
+        reason: params.reason,
+        error: error instanceof Error ? error.message : String(error),
+      });
+    }
+  }
+
   // Best-effort async init (do not await)
   void (async () => {
     await refreshConfig();
+    if (config.enabled) {
+      void kickPricingRefresh({ reason: "init" });
+    }
 
     try {
       await typedClient.app.log({
@@ -905,8 +935,10 @@ export const QuotaToastPlugin: Plugin = async ({ client }) => {
     skewMs?: number;
     force?: boolean;
     sessionID?: string;
-  }): Promise<string> {
+  }): Promise<string | null> {
     await refreshConfig();
+    if (!config.enabled) return null;
+    await kickPricingRefresh({ reason: "status", maxWaitMs: 750 });
 
     const currentModel = await getCurrentModel(params.sessionID);
     const sessionModelLookup: "ok" | "not_found" | "no_session" = !params.sessionID
@@ -996,6 +1028,15 @@ export const QuotaToastPlugin: Plugin = async ({ client }) => {
       try {
         const cmd = input.command;
         const sessionID = input.sessionID;
+        const isQuotaCommand =
+          cmd === "quota" || cmd === "quota_status" || isTokenReportCommand(cmd);
+
+        if (isQuotaCommand && !configLoaded) {
+          await refreshConfig();
+        }
+        if (isQuotaCommand && !config.enabled) {
+          handled();
+        }
 
         if (cmd === "quota") {
           // Separate cache for /quota so it doesn't pollute the toast cache.
@@ -1076,6 +1117,7 @@ export const QuotaToastPlugin: Plugin = async ({ client }) => {
 
         // Handle token report commands (/tokens_*)
         if (isTokenReportCommand(cmd)) {
+          await kickPricingRefresh({ reason: "tokens", maxWaitMs: 750 });
           const spec = TOKEN_REPORT_COMMANDS_BY_ID.get(cmd)!;
 
           if (spec.kind === "between") {
@@ -1161,7 +1203,9 @@ export const QuotaToastPlugin: Plugin = async ({ client }) => {
             force: parsed.value["force"] === true,
             sessionID,
           });
-          await injectRawOutput(sessionID, out);
+          if (out) {
+            await injectRawOutput(sessionID, out);
+          }
           handled();
         }
       } catch (err) {
@@ -1199,6 +1243,7 @@ export const QuotaToastPlugin: Plugin = async ({ client }) => {
             force: args.force,
             sessionID: context.sessionID,
           });
+          if (!out) return "";
           context.metadata({ title: "Quota Status" });
           await injectRawOutput(context.sessionID, out);
           return ""; // Empty return - output already injected with noReply
diff --git a/tests/plugin.command-handled-boundary.test.ts b/tests/plugin.command-handled-boundary.test.ts
index c07a139..63cafca 100644
--- a/tests/plugin.command-handled-boundary.test.ts
+++ b/tests/plugin.command-handled-boundary.test.ts
@@ -6,6 +6,7 @@ import { DEFAULT_CONFIG } from "../src/lib/types.js";
 const mocks = vi.hoisted(() => ({
   loadConfig: vi.fn(),
   getProviders: vi.fn(),
+  maybeRefreshPricingSnapshot: vi.fn(),
 }));
 
 vi.mock("@opencode-ai/plugin", () => {
@@ -36,6 +37,10 @@ vi.mock("../src/providers/registry.js", () => ({
   getProviders: mocks.getProviders,
 }));
 
+vi.mock("../src/lib/modelsdev-pricing.js", () => ({
+  maybeRefreshPricingSnapshot: mocks.maybeRefreshPricingSnapshot,
+}));
+
 function createClient() {
   return {
     config: {
@@ -65,6 +70,11 @@ describe("plugin command handled boundary", () => {
       enabled: true,
     });
     mocks.getProviders.mockReturnValue([]);
+    mocks.maybeRefreshPricingSnapshot.mockResolvedValue({
+      attempted: false,
+      updated: false,
+      state: { version: 1, updatedAt: Date.now() },
+    });
   });
 
   it("propagates command-handled sentinel errors to abort command pipeline", async () => {
@@ -101,4 +111,25 @@ describe("plugin command handled boundary", () => {
       } as any),
     ).rejects.toThrow("boom");
   });
+
+  it("treats handled slash commands as strict no-op when disabled", async () => {
+    mocks.loadConfig.mockResolvedValue({
+      ...DEFAULT_CONFIG,
+      enabled: false,
+    });
+
+    const { QuotaToastPlugin } = await import("../src/plugin.js");
+    const client = createClient();
+    const hooks = await QuotaToastPlugin({ client } as any);
+
+    await expect(
+      hooks["command.execute.before"]?.({
+        command: "tokens_daily",
+        sessionID: "session-disabled",
+      } as any),
+    ).rejects.toThrow(COMMAND_HANDLED_SENTINEL);
+
+    expect(mocks.maybeRefreshPricingSnapshot).not.toHaveBeenCalled();
+    expect(client.session.prompt).not.toHaveBeenCalled();
+  });
 });
diff --git a/tests/plugin.qwen-hook.test.ts b/tests/plugin.qwen-hook.test.ts
index 0cd981c..3617943 100644
--- a/tests/plugin.qwen-hook.test.ts
+++ b/tests/plugin.qwen-hook.test.ts
@@ -6,6 +6,7 @@ const mocks = vi.hoisted(() => ({
   loadConfig: vi.fn(),
   readAuthFileCached: vi.fn(),
   recordQwenCompletion: vi.fn(),
+  maybeRefreshPricingSnapshot: vi.fn(),
 }));
 
 vi.mock("@opencode-ai/plugin", () => {
@@ -47,6 +48,10 @@ vi.mock("../src/lib/qwen-local-quota.js", () => ({
   getQwenLocalQuotaPath: vi.fn(() => "/tmp/qwen-local-quota.json"),
 }));
 
+vi.mock("../src/lib/modelsdev-pricing.js", () => ({
+  maybeRefreshPricingSnapshot: mocks.maybeRefreshPricingSnapshot,
+}));
+
 function createClient(modelID: string) {
   return {
     config: {
@@ -83,6 +88,11 @@ describe("plugin qwen question hook", () => {
       recent: [],
       updatedAt: 1,
     });
+    mocks.maybeRefreshPricingSnapshot.mockResolvedValue({
+      attempted: false,
+      updated: false,
+      state: { version: 1, updatedAt: Date.now() },
+    });
   });
 
   it("records completion on successful qwen question execution", async () => {
diff --git a/tests/pricing-auto-refresh-policy.test.ts b/tests/pricing-auto-refresh-policy.test.ts
new file mode 100644
index 0000000..8eeb8d2
--- /dev/null
+++ b/tests/pricing-auto-refresh-policy.test.ts
@@ -0,0 +1,337 @@
+import { mkdtemp, readFile, rm, stat } from "fs/promises";
+import { tmpdir } from "os";
+import { join } from "path";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import type { OpencodeRuntimeDirs } from "../src/lib/opencode-runtime-paths.js";
+
+const DAY_MS = 24 * 60 * 60 * 1000;
+
+function createBootstrapSnapshot(generatedAt: number) {
+  return {
+    _meta: {
+      source: "test-bootstrap",
+      generatedAt,
+      providers: ["openai"],
+      units: "USD per 1M tokens",
+    },
+    providers: {
+      openai: {
+        "gpt-4o-mini": {
+          input: 0.15,
+          output: 0.6,
+          cache_read: 0.03,
+          cache_write: 0.2,
+        },
+      },
+    },
+  };
+}
+
+function createRuntimeDirs(root: string): OpencodeRuntimeDirs {
+  return {
+    dataDir: join(root, "data"),
+    configDir: join(root, "config"),
+    cacheDir: join(root, "cache"),
+    stateDir: join(root, "state"),
+  };
+}
+
+async function exists(path: string): Promise<boolean> {
+  try {
+    await stat(path);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+const tempRoots: string[] = [];
+
+afterEach(async () => {
+  for (const root of tempRoots.splice(0, tempRoots.length)) {
+    await rm(root, { recursive: true, force: true });
+  }
+});
+
+async function createTempRuntimeDirs(): Promise<OpencodeRuntimeDirs> {
+  const root = await mkdtemp(join(tmpdir(), "opencode-quota-pricing-"));
+  tempRoots.push(root);
+  return createRuntimeDirs(root);
+}
+
+async function loadPricingModule() {
+  vi.resetModules();
+  return await import("../src/lib/modelsdev-pricing.js");
+}
+
+describe("pricing runtime refresh policy", () => {
+  beforeEach(() => {
+    vi.stubEnv("OPENCODE_QUOTA_PRICING_AUTO_REFRESH", "1");
+  });
+
+  it("does not fetch when snapshot is fresh", async () => {
+    const pricing = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+
+    const fetchFn = vi.fn();
+
+    const result = await pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(nowMs),
+    });
+
+    expect(result.attempted).toBe(false);
+    expect(result.reason).toBe("fresh");
+    expect(fetchFn).not.toHaveBeenCalled();
+  });
+
+  it("fetches and persists a runtime snapshot when stale", async () => {
+    const pricing = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+    const staleGeneratedAt = nowMs - 4 * DAY_MS;
+
+    const fetchFn = vi.fn().mockResolvedValue(
+      new Response(
+        JSON.stringify({
+          openai: {
+            models: {
+              "gpt-4o-mini": {
+                cost: {
+                  input: 0.123,
+                  output: 0.456,
+                  cache_read: 0.01,
+                  cache_write: 0.02,
+                  reasoning: 999,
+                  ignored_key: 123,
+                },
+              },
+            },
+          },
+        }),
+        {
+          status: 200,
+          headers: {
+            etag: "etag-1",
+            "last-modified": "Tue, 02 Mar 2026 00:00:00 GMT",
+          },
+        },
+      ),
+    );
+
+    const result = await pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    expect(result.attempted).toBe(true);
+    expect(result.updated).toBe(true);
+    expect(fetchFn).toHaveBeenCalledTimes(1);
+    expect(pricing.getPricingSnapshotSource()).toBe("runtime");
+
+    const runtimeSnapshotPath = pricing.getRuntimePricingSnapshotPath(runtimeDirs);
+    expect(await exists(runtimeSnapshotPath)).toBe(true);
+
+    const persistedSnapshot = JSON.parse(await readFile(runtimeSnapshotPath, "utf-8"));
+    expect(persistedSnapshot._meta.generatedAt).toBe(nowMs);
+    expect(persistedSnapshot.providers.openai["gpt-4o-mini"]).toEqual({
+      input: 0.123,
+      output: 0.456,
+      cache_read: 0.01,
+      cache_write: 0.02,
+    });
+  });
+
+  it("falls back to last local snapshot when fetch fails", async () => {
+    const pricing = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+    const staleGeneratedAt = nowMs - 4 * DAY_MS;
+
+    const fetchFn = vi.fn().mockRejectedValue(new Error("network down"));
+
+    const result = await pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    expect(result.attempted).toBe(true);
+    expect(result.updated).toBe(false);
+    expect(result.error).toContain("network down");
+
+    const fallbackCost = pricing.lookupCost("openai", "gpt-4o-mini");
+    expect(fallbackCost?.input).toBe(0.15);
+
+    const state = await pricing.readPricingRefreshState(runtimeDirs);
+    expect(state?.lastResult).toBe("failed");
+    expect(state?.lastError).toContain("network down");
+  });
+
+  it("uses bundled bootstrap snapshot when no runtime snapshot exists", async () => {
+    const pricing = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+
+    const result = await pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn: vi.fn(),
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(nowMs),
+    });
+
+    expect(result.attempted).toBe(false);
+    expect(pricing.getPricingSnapshotSource()).toBe("bundled");
+    expect(pricing.lookupCost("openai", "gpt-4o-mini")?.output).toBe(0.6);
+
+    const runtimeSnapshotPath = pricing.getRuntimePricingSnapshotPath(runtimeDirs);
+    expect(await exists(runtimeSnapshotPath)).toBe(false);
+  });
+
+  it("refreshes local snapshot freshness when models.dev responds 304", async () => {
+    const pricing = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+    const staleGeneratedAt = nowMs - 4 * DAY_MS;
+
+    const fetchFn = vi.fn().mockResolvedValue(
+      new Response(null, {
+        status: 304,
+        headers: {
+          etag: "etag-304",
+          "last-modified": "Tue, 02 Mar 2026 00:00:00 GMT",
+        },
+      }),
+    );
+
+    const result = await pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    expect(result.attempted).toBe(true);
+    expect(result.updated).toBe(true);
+    expect(result.state.lastResult).toBe("not_modified");
+    expect(pricing.getPricingSnapshotSource()).toBe("runtime");
+    expect(pricing.getPricingSnapshotMeta().generatedAt).toBe(nowMs);
+
+    const runtimeSnapshotPath = pricing.getRuntimePricingSnapshotPath(runtimeDirs);
+    const persistedSnapshot = JSON.parse(await readFile(runtimeSnapshotPath, "utf-8"));
+    expect(persistedSnapshot._meta.generatedAt).toBe(nowMs);
+    expect(persistedSnapshot.providers.openai["gpt-4o-mini"].input).toBe(0.15);
+  });
+
+  it("throttles refresh attempts using persisted lastAttemptAt state", async () => {
+    const firstLoad = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+    const staleGeneratedAt = nowMs - 4 * DAY_MS;
+
+    const failingFetch = vi.fn().mockRejectedValue(new Error("network down"));
+    const firstResult = await firstLoad.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn: failingFetch,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    expect(firstResult.attempted).toBe(true);
+    expect(firstResult.updated).toBe(false);
+
+    const secondLoad = await loadPricingModule();
+    const throttledFetch = vi.fn();
+    const secondResult = await secondLoad.maybeRefreshPricingSnapshot({
+      nowMs: nowMs + 60_000,
+      runtimeDirs,
+      fetchFn: throttledFetch,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    expect(secondResult.attempted).toBe(false);
+    expect(secondResult.reason).toBe("throttled");
+    expect(throttledFetch).not.toHaveBeenCalled();
+  });
+
+  it("dedupes concurrent attempts and only checks once per process window", async () => {
+    const pricing = await loadPricingModule();
+    const runtimeDirs = await createTempRuntimeDirs();
+    const nowMs = 1_800_000_000_000;
+    const staleGeneratedAt = nowMs - 4 * DAY_MS;
+
+    let resolveFetch: ((response: Response) => void) | null = null;
+    const fetchFn = vi.fn().mockImplementation(
+      () =>
+        new Promise<Response>((resolve) => {
+          resolveFetch = resolve;
+        }),
+    );
+
+    const first = pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    const second = pricing.maybeRefreshPricingSnapshot({
+      nowMs,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    await vi.waitFor(() => {
+      expect(fetchFn).toHaveBeenCalledTimes(1);
+    });
+
+    resolveFetch?.(
+      new Response(
+        JSON.stringify({
+          openai: {
+            models: {
+              "gpt-4o-mini": {
+                cost: { input: 0.2, output: 0.8 },
+              },
+            },
+          },
+        }),
+        { status: 200 },
+      ),
+    );
+
+    const [firstResult, secondResult] = await Promise.all([first, second]);
+    expect(firstResult.updated).toBe(true);
+    expect(secondResult.updated).toBe(true);
+
+    const thirdResult = await pricing.maybeRefreshPricingSnapshot({
+      nowMs: nowMs + 30_000,
+      runtimeDirs,
+      fetchFn,
+      maxAgeMs: 3 * DAY_MS,
+      bootstrapSnapshotOverride: createBootstrapSnapshot(staleGeneratedAt),
+    });
+
+    expect(thirdResult.attempted).toBe(false);
+    expect(thirdResult.reason).toBe("already_checked_this_process");
+    expect(fetchFn).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/tests/pricing-resolver.coverage.test.ts b/tests/pricing-resolver.coverage.test.ts
index 505bb4a..4a799f9 100644
--- a/tests/pricing-resolver.coverage.test.ts
+++ b/tests/pricing-resolver.coverage.test.ts
@@ -1,6 +1,10 @@
 import { describe, expect, it } from "vitest";
 
-import { listModelsForProvider, listProviders } from "../src/lib/modelsdev-pricing.js";
+import {
+  listModelsForProvider,
+  listProviders,
+  lookupCost,
+} from "../src/lib/modelsdev-pricing.js";
 import { resolvePricingKey } from "../src/lib/quota-stats.js";
 
 describe("resolvePricingKey snapshot coverage", () => {
@@ -45,5 +49,59 @@ describe("resolvePricingKey snapshot coverage", () => {
     expect(resolved.key.provider).toBe(providerID);
     expect(resolved.key.model).toBe(modelID);
   });
-});
 
+  it("maps copilot and proxy model variants to priced snapshot keys", () => {
+    const copilotHaiku = resolvePricingKey({
+      providerID: "github-copilot",
+      modelID: "github-copilot/claude-haiku-4.5",
+    });
+    expect(copilotHaiku.ok).toBe(true);
+    if (!copilotHaiku.ok) return;
+    expect(copilotHaiku.key).toEqual({ provider: "anthropic", model: "claude-haiku-4-5" });
+
+    const copilotGrok = resolvePricingKey({
+      providerID: "github-copilot",
+      modelID: "github-copilot/grok-code-fast-1",
+    });
+    expect(copilotGrok.ok).toBe(true);
+    if (!copilotGrok.ok) return;
+    expect(copilotGrok.key).toEqual({ provider: "xai", model: "grok-code-fast-1" });
+
+    const kimiBase = resolvePricingKey({
+      providerID: "CLIProxyAPI",
+      modelID: "moonshotai/kimi-k2.5",
+    });
+    expect(kimiBase.ok).toBe(true);
+    if (!kimiBase.ok) return;
+    expect(kimiBase.key).toEqual({ provider: "moonshotai", model: "kimi-k2.5" });
+
+    const kimiFree = resolvePricingKey({
+      providerID: "opencode",
+      modelID: "opencode/kimi-k2.5-free",
+    });
+    expect(kimiFree.ok).toBe(true);
+    if (!kimiFree.ok) return;
+    expect(kimiFree.key).toEqual({ provider: "moonshotai", model: "kimi-k2.5" });
+
+    const openaiFreeKnownProvider = resolvePricingKey({
+      providerID: "openai",
+      modelID: "openai/gpt-4o-mini-free",
+    });
+    expect(openaiFreeKnownProvider.ok).toBe(true);
+    if (!openaiFreeKnownProvider.ok) return;
+    expect(openaiFreeKnownProvider.key).toEqual({ provider: "openai", model: "gpt-4o-mini" });
+
+    const openaiFreeModelPrefix = resolvePricingKey({
+      providerID: "connector-without-pricing-id",
+      modelID: "openai/gpt-4o-mini-free",
+    });
+    expect(openaiFreeModelPrefix.ok).toBe(true);
+    if (!openaiFreeModelPrefix.ok) return;
+    expect(openaiFreeModelPrefix.key).toEqual({ provider: "openai", model: "gpt-4o-mini" });
+
+    expect(lookupCost("anthropic", "claude-haiku-4-5")).not.toBeNull();
+    expect(lookupCost("xai", "grok-code-fast-1")).not.toBeNull();
+    expect(lookupCost("moonshotai", "kimi-k2.5")).not.toBeNull();
+    expect(lookupCost("openai", "gpt-4o-mini")).not.toBeNull();
+  });
+});
diff --git a/tests/pricing-snapshot-health.test.ts b/tests/pricing-snapshot-health.test.ts
new file mode 100644
index 0000000..830a239
--- /dev/null
+++ b/tests/pricing-snapshot-health.test.ts
@@ -0,0 +1,31 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS,
+  getPricingSnapshotHealth,
+  getPricingSnapshotMeta,
+} from "../src/lib/modelsdev-pricing.js";
+
+describe("pricing snapshot health", () => {
+  it("marks snapshot as fresh before the max-age boundary", () => {
+    const generatedAt = getPricingSnapshotMeta().generatedAt;
+    const health = getPricingSnapshotHealth({
+      nowMs: generatedAt + DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS - 1,
+      maxAgeMs: DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS,
+    });
+
+    expect(health.stale).toBe(false);
+    expect(health.ageMs).toBe(DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS - 1);
+  });
+
+  it("marks snapshot as stale after the max-age boundary", () => {
+    const generatedAt = getPricingSnapshotMeta().generatedAt;
+    const health = getPricingSnapshotHealth({
+      nowMs: generatedAt + DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS + 1,
+      maxAgeMs: DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS,
+    });
+
+    expect(health.stale).toBe(true);
+    expect(health.ageMs).toBe(DEFAULT_PRICING_SNAPSHOT_MAX_AGE_MS + 1);
+  });
+});
diff --git a/tests/setup.ts b/tests/setup.ts
index 3850173..6052280 100644
--- a/tests/setup.ts
+++ b/tests/setup.ts
@@ -1,7 +1,15 @@
 import { afterEach, vi } from "vitest";
 
-afterEach(() => {
+afterEach(async () => {
+  try {
+    const pricing = await import("../src/lib/modelsdev-pricing.js");
+    pricing.__resetPricingSnapshotForTests();
+  } catch {
+    // best effort; tests that don't load pricing module should still clean up
+  }
+
   vi.useRealTimers();
+  vi.unstubAllEnvs();
   vi.unstubAllGlobals();
   vi.clearAllMocks();
 });