diff --git a/doc/tag-log.md b/doc/tag-log.md
index 13bfa3f..4ba1018 100644
--- a/doc/tag-log.md
+++ b/doc/tag-log.md
@@ -17,6 +17,10 @@ Release history of mimo2codex, newest first.
 
 ---
 
+## feat/multimodal-fallback
+
+- **[new]** **Multimodal fallback: auto-switch to vision model when images are detected**: when a request contains images but the active model doesn't support vision (e.g. `mimo-v2.5-pro`), the proxy now automatically rewrites the upstream model to a vision-capable one (default `mimo-v2.5`) so images are processed instead of silently dropped. Toggle and model selection are in the admin UI → Codex Integration → "Thinking & Runtime Overrides" tab. Disabled by default — enable it when your workflow mixes vision and non-vision models.
+
 ## v0.5.21 (upcoming)
 
 - **[fix]** **Sustained 429 rate limits no longer break the session (follow-up to v0.5.20's retry)**: v0.5.20 added proxy-side 429/5xx retry, but the default budget (3 retries, ~3.5s) only outlasted sub-second blips. Real per-minute quota limits (`429 Too many requests / limitation`, often *without* a `Retry-After` header) still exhausted it, so the raw 429 was forwarded to Codex, which then burned its own retries and surfaced "exceeded retry limit, last status: 429" again. The default retry budget is now larger: **6 retries with exponential backoff capped at 12s (~28s total)**, so a multi-second quota limit clears before we give up. Still abortable, still honors `Retry-After` when present, and still tunable via `MIMO2CODEX_UPSTREAM_MAX_RETRIES` (now up to 12) / `MIMO2CODEX_UPSTREAM_RETRY_BASE_MS`. Trade-off: while rate-limited, a single request now waits up to ~28s before failing instead of ~3.5s.
diff --git a/doc/tag-log.zh.md b/doc/tag-log.zh.md
index 3eb716a..2a27c1a 100644
--- a/doc/tag-log.zh.md
+++ b/doc/tag-log.zh.md
@@ -17,6 +17,10 @@ mimo2codex 的版本发布历史，按 tag 倒序排列。
 
 ---
 
+## feat/multimodal-fallback
+
+- **[new]** **多模态 Fallback：检测到图片时自动切换 vision 模型**：当请求包含图片但当前模型不支持 vision（如 `mimo-v2.5-pro`）时，代理自动将 upstream model 重写为多模态模型（默认 `mimo-v2.5`），避免图片被静默丢弃。开关和模型选择在 admin UI → Codex 接入 →「思考与运行时覆盖」标签页。默认关闭——需要混合 vision / 非 vision 模型时开启。
+
 ## v0.5.21 (upcoming)
 
 - **[fix]** **持续型 429 限流不再中断会话（v0.5.20 重试的补强）**：v0.5.20 加了代理侧的 429/5xx 重试，但默认预算（重试 3 次、约 3.5 秒）只能扛住亚秒级抖动。真正按分钟计的配额限流（`429 Too many requests / limitation`，且经常**不带 `Retry-After` 头**）仍会把预算耗尽，于是原始 429 被透传给 Codex，Codex 再耗尽自己的重试，又报出「exceeded retry limit, last status: 429」。现在默认重试预算放大为：**重试 6 次、指数退避封顶 12 秒（总计约 28 秒）**，让几秒到几十秒的配额限流在放弃前自行解除。仍可被取消、仍尊重上游的 `Retry-After`、仍可通过 `MIMO2CODEX_UPSTREAM_MAX_RETRIES`（上限提到 12）/ `MIMO2CODEX_UPSTREAM_RETRY_BASE_MS` 调整。代价：限流期间单个请求最长会等约 28 秒才失败，而不是原来的约 3.5 秒。
diff --git a/src/admin/router.ts b/src/admin/router.ts
index 46cbdf0..920127f 100644
--- a/src/admin/router.ts
+++ b/src/admin/router.ts
@@ -1003,6 +1003,57 @@ async function handleApi(ctx: RouteContext): Promise<void> {
     return sendError(res, 405, "method_not_allowed", "use GET or PUT");
   }
 
+  // GET/PUT /admin/api/vision-fallback — multimodal fallback toggle + model.
+  // When enabled, requests containing images are automatically routed to a
+  // vision-capable model even if the client's model doesn't support images.
+  if (pathname === "/admin/api/vision-fallback") {
+    if (req.method === "GET") {
+      const enabled = (() => {
+        try {
+          return getSetting("codex.visionFallbackEnabled") === "1";
+        } catch {
+          return false;
+        }
+      })();
+      const model = (() => {
+        try {
+          return getSetting("codex.visionFallbackModel") || "mimo-v2.5";
+        } catch {
+          return "mimo-v2.5";
+        }
+      })();
+      return sendJson(res, 200, { enabled, model });
+    }
+    if (req.method === "PUT") {
+      const body = await readJsonBody<{ enabled?: unknown; model?: unknown }>(req);
+      let changed = false;
+      if (typeof body.enabled === "boolean") {
+        setSetting("codex.visionFallbackEnabled", body.enabled ? "1" : "0");
+        log.info(`codex.visionFallbackEnabled set to ${body.enabled} via admin UI`);
+        changed = true;
+      }
+      if (typeof body.model === "string") {
+        const trimmed = body.model.trim();
+        if (!trimmed) {
+          return sendError(res, 400, "invalid_body", "model must be a non-empty string");
+        }
+        setSetting("codex.visionFallbackModel", trimmed);
+        log.info(`codex.visionFallbackModel set to "${trimmed}" via admin UI`);
+        changed = true;
+      }
+      if (!changed) {
+        return sendError(
+          res,
+          400,
+          "invalid_body",
+          "body must include at least one of: enabled (boolean), model (string)",
+        );
+      }
+      return sendJson(res, 200, { ok: true });
+    }
+    return sendError(res, 405, "method_not_allowed", "use GET or PUT");
+  }
+
   // GET/PUT /admin/api/log-settings — quick toggle for the "model fallback
   // applied" rewrite log. Default is silent (suppressed). env
   // MIMO2CODEX_SILENT_REWRITE, when set, overrides and disables the toggle.
diff --git a/src/server.ts b/src/server.ts
index b3b56ba..13675e2 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,6 +2,7 @@ import { createServer, type IncomingMessage, type Server, type ServerResponse }
 import type { Config } from "./config.js";
 import { respToResponses } from "./translate/respToResponses.js";
 import { pipeChatStreamToResponses, type StreamPipelineResult } from "./translate/streamToSse.js";
+import { modelSupportsImages } from "./translate/reqToChat.js";
 import { iterChatStreamChunks } from "./upstream/chatStream.js";
 import {
   callOpenAICompat,
@@ -386,6 +387,54 @@ function rewriteWarning(notice: { from: string; to: string; reason: string }): {
   };
 }
 
+// ---------------------------------------------------------------------------
+// Vision (multimodal) fallback
+// ---------------------------------------------------------------------------
+
+// 读取 DB 设置，返回 vision fallback 模型名；未启用或 admin 关闭时返回 null。
+function resolveVisionFallback(cfg: Config): string | null {
+  if (!cfg.adminEnabled) return null;
+  try {
+    if (getSetting("codex.visionFallbackEnabled") !== "1") return null;
+    const model = getSetting("codex.visionFallbackModel");
+    return model || "mimo-v2.5";
+  } catch {
+    return null;
+  }
+}
+
+// 检测 Responses API 请求是否包含图片（input_image 类型）。
+function requestContainsImages(payload: ResponsesRequest): boolean {
+  if (!Array.isArray(payload.input)) return false;
+  for (const item of payload.input) {
+    if (item.type === "message" && Array.isArray(item.content)) {
+      for (const part of item.content) {
+        if (part.type === "input_image") return true;
+      }
+    }
+    // function_call_output 也可能包含图片（tool 返回的图片）
+    if (item.type === "function_call_output" && Array.isArray(item.output)) {
+      for (const part of item.output) {
+        if (part.type === "input_image") return true;
+      }
+    }
+  }
+  return false;
+}
+
+// 检测 Chat Completions API 请求是否包含图片（image_url 类型）。
+function chatRequestContainsImages(payload: ChatRequest): boolean {
+  if (!Array.isArray(payload.messages)) return false;
+  for (const msg of payload.messages) {
+    if (Array.isArray(msg.content)) {
+      for (const part of msg.content) {
+        if (part.type === "image_url") return true;
+      }
+    }
+  }
+  return false;
+}
+
 /**
  * 从 Codex 请求的 tools 数组中提取 namespace 映射：toolName → namespaceName。
  * Codex Desktop 期望响应中的 function_call 带 namespace 字段才能路由到正确 handler。
@@ -457,6 +506,27 @@ async function handleResponses(
     cfg,
     readActiveOverrideSafely(cfg)
   );
+  // 多模态 fallback：请求含图片但 model 不支持 vision → 自动切换。
+  const visionFallbackModel = resolveVisionFallback(cfg);
+  if (visionFallbackModel) {
+    const effectiveModel = selectedRaw.upstreamModel;
+    if (!modelSupportsImages(effectiveModel) && requestContainsImages(payload)) {
+      const resolved = selectedRaw.provider.resolveModel(visionFallbackModel);
+      const newModel = resolved?.id ?? visionFallbackModel;
+      selectedRaw.rewriteNotice = {
+        from: effectiveModel,
+        to: newModel,
+        reason: `multimodal fallback — request contains images but model "${effectiveModel}" does not support vision`,
+      };
+      selectedRaw.upstreamModel = newModel;
+      selectedRaw.modelInfo = resolved ?? selectedRaw.modelInfo;
+      log.info("vision fallback applied", {
+        from: effectiveModel,
+        to: newModel,
+        provider: selectedRaw.provider.id,
+      });
+    }
+  }
   const { provider, upstreamModel, modelInfo, rewriteNotice } = selectedRaw;
   // BYOK: if a logged-in user has stored their own upstream API key for this
   // provider, swap it into the runtime. Local-mode / shared-key users keep
@@ -1091,6 +1161,27 @@ async function handleChatPassthrough(
     cfg,
     readActiveOverrideSafely(cfg)
   );
+  // 多模态 fallback（chat completions 路径）：请求含图片但 model 不支持 vision → 自动切换。
+  const visionFallbackModel = resolveVisionFallback(cfg);
+  if (visionFallbackModel) {
+    const effectiveModel = selectedRaw.upstreamModel;
+    if (!modelSupportsImages(effectiveModel) && chatRequestContainsImages(payload)) {
+      const resolved = selectedRaw.provider.resolveModel(visionFallbackModel);
+      const newModel = resolved?.id ?? visionFallbackModel;
+      selectedRaw.rewriteNotice = {
+        from: effectiveModel,
+        to: newModel,
+        reason: `multimodal fallback — request contains images but model "${effectiveModel}" does not support vision`,
+      };
+      selectedRaw.upstreamModel = newModel;
+      selectedRaw.modelInfo = resolved ?? selectedRaw.modelInfo;
+      log.info("vision fallback applied", {
+        from: effectiveModel,
+        to: newModel,
+        provider: selectedRaw.provider.id,
+      });
+    }
+  }
   const { provider, upstreamModel, modelInfo, rewriteNotice } = selectedRaw;
   const { runtime, source: apiKeySource } = resolveRuntimeForUser(
     selectedRaw.runtime,
diff --git a/src/translate/reqToChat.ts b/src/translate/reqToChat.ts
index 850dec1..cbbe503 100644
--- a/src/translate/reqToChat.ts
+++ b/src/translate/reqToChat.ts
@@ -59,7 +59,7 @@ function materializeStrippedImage(imageUrl: string, dropDir?: string): string |
 // only `mimo-v2.5` and `mimo-v2-omni` (and *-omni* variants) accept image
 // input. The other v2.5 variants (mimo-v2.5-pro, mimo-v2-flash, …) return
 // 404 "No endpoints found that support image input" when given image_url parts.
-function modelSupportsImages(model: string): boolean {
+export function modelSupportsImages(model: string): boolean {
   const base = model.toLowerCase();
   if (base.includes("omni")) return true;
   if (base === "mimo-v2.5") return true;
diff --git a/web/src/api/client.ts b/web/src/api/client.ts
index 6635e85..1f5b013 100644
--- a/web/src/api/client.ts
+++ b/web/src/api/client.ts
@@ -608,6 +608,10 @@ export const api = {
     request<{ ok: boolean }>("PUT", "/thinking-state", { disabled }),
   setForceHighEffort: (forceHighEffort: boolean) =>
     request<{ ok: boolean }>("PUT", "/thinking-state", { forceHighEffort }),
+  visionFallback: () =>
+    request<{ enabled: boolean; model: string }>("GET", "/vision-fallback"),
+  setVisionFallback: (body: { enabled?: boolean; model?: string }) =>
+    request<{ ok: boolean }>("PUT", "/vision-fallback", body),
   logSettings: () => request<LogSettingsResponse>("GET", "/log-settings"),
   setSilentRewrite: (silentRewrite: boolean) =>
     request<{ ok: boolean }>("PUT", "/log-settings", { silentRewrite }),
diff --git a/web/src/i18n/locales/en-US/codexEnable.json b/web/src/i18n/locales/en-US/codexEnable.json
index fd1ea08..0af010c 100644
--- a/web/src/i18n/locales/en-US/codexEnable.json
+++ b/web/src/i18n/locales/en-US/codexEnable.json
@@ -62,6 +62,14 @@
     "hint": "Thinking ON/OFF: when OFF, every provider skips thinking (mimo / deepseek send thinking:{type:\"disabled\"}, sensenova / other generic send reasoning_effort:\"none\"). When ON, each provider follows its own default (most clients don't include reasoning effort for non-GPT-5 models, so the upstream may skip thinking on simple tasks). Force high reasoning effort: only available while Thinking is ON — if Codex didn't pass an effort, mimo2codex injects reasoning_effort=\"high\"; if Codex did pass one, that value is respected. Takes effect immediately (no restart).",
     "cliOverride": "Thinking ON/OFF is currently controlled by CLI flag (--disable-thinking) or env (MIMO2CODEX_DISABLE_THINKING). The switch is locked. Restart without those args to control via UI."
   },
+  "visionFallback": {
+    "title": "Multimodal Fallback",
+    "statusOn": "Enabled: image requests auto-switch to vision model",
+    "statusOff": "Disabled",
+    "hint": "When enabled, if a request contains images but the current model doesn't support vision (e.g. mimo-v2.5-pro), automatically switch to the specified multimodal model (default: mimo-v2.5) to avoid images being silently dropped.",
+    "modelLabel": "Fallback model",
+    "modelPlaceholder": "e.g. mimo-v2.5"
+  },
   "targets": {
     "title": "Available combinations",
     "externalWarn": "Your current ~/.codex/auth.json was not written by mimo2codex (probably a real OpenAI login or another tool). 'Write files and enable' will back it up first, then overwrite — restore is always available.",
diff --git a/web/src/i18n/locales/zh-CN/codexEnable.json b/web/src/i18n/locales/zh-CN/codexEnable.json
index b571394..ae2d29b 100644
--- a/web/src/i18n/locales/zh-CN/codexEnable.json
+++ b/web/src/i18n/locales/zh-CN/codexEnable.json
@@ -62,6 +62,14 @@
     "hint": "「开/关思考」：关闭后所有 provider 都不思考（mimo / deepseek 发 thinking:{type:\"disabled\"}，sensenova / 其他 generic 发 reasoning_effort:\"none\"）。开启时各 provider 走自己的默认行为（多数客户端对非 GPT-5 模型默认不带 reasoning effort，上游可能对简单任务跳过思考）。「强制高强度思考」：仅在「开/关思考」是开时可用，Codex 没明确传 effort 时由 mimo2codex 兜底注入 reasoning_effort=\"high\"；Codex 显式传过的值会被尊重，不被覆盖。修改后立即对新请求生效（无需重启）。",
     "cliOverride": "「开/关思考」当前由 CLI flag (--disable-thinking) 或环境变量 (MIMO2CODEX_DISABLE_THINKING) 控制，开关被锁定。如需用 UI 控制，启动时不要带这些参数。"
   },
+  "visionFallback": {
+    "title": "多模态 Fallback",
+    "statusOn": "已启用：图片请求自动切换 vision 模型",
+    "statusOff": "未启用",
+    "hint": "开启后，当请求包含图片但当前模型不支持 vision（如 mimo-v2.5-pro），自动切换到指定的多模态模型（默认 mimo-v2.5），避免图片被静默丢弃。",
+    "modelLabel": "Fallback 模型",
+    "modelPlaceholder": "例如 mimo-v2.5"
+  },
   "targets": {
     "title": "可启用组合",
     "externalWarn": "当前 ~/.codex/auth.json 不是 mimo2codex 写入的（可能是真 OpenAI 登录或其他工具）。点「写入文件并启用」会先自动备份再覆盖，恢复随时可做。",
diff --git a/web/src/pages/codex/CodexEnable.tsx b/web/src/pages/codex/CodexEnable.tsx
index fc331bb..4650627 100644
--- a/web/src/pages/codex/CodexEnable.tsx
+++ b/web/src/pages/codex/CodexEnable.tsx
@@ -5,6 +5,7 @@ import {
   Button,
   Card,
   Collapse,
+  Input,
   Modal,
   Space,
   Switch,
@@ -62,6 +63,10 @@ export function CodexEnable() {
   const [forceHighEffort, setForceHighEffort] = useState<boolean | null>(null);
   const [forceHighEffortSaving, setForceHighEffortSaving] =
     useState<boolean>(false);
+  // visionFallback：多模态 fallback 开关 + 目标模型。null = 加载中。
+  const [visionFallbackEnabled, setVisionFallbackEnabled] = useState<boolean | null>(null);
+  const [visionFallbackModel, setVisionFallbackModel] = useState<string>("mimo-v2.5");
+  const [visionFallbackSaving, setVisionFallbackSaving] = useState<boolean>(false);
 
   async function doProbe(target: CodexTarget) {
     const key = `${target.providerId}::${target.modelId}`;
@@ -93,10 +98,11 @@ export function CodexEnable() {
   async function load() {
     try {
       setError(null);
-      const [s, ts, think] = await Promise.all([
+      const [s, ts, think, vf] = await Promise.all([
         api.codexState(),
         api.codexTargets(),
         api.thinkingState().catch(() => null), // 老后端没此端点时降级
+        api.visionFallback().catch(() => null), // 老后端没此端点时降级
       ]);
       setState(s);
       setTargetsResp(ts);
@@ -105,6 +111,12 @@ export function CodexEnable() {
         setThinkingCliOverridden(think.cliOverride !== null);
         setForceHighEffort(think.forceHighEffort);
       }
+      if (vf) {
+        setVisionFallbackEnabled(vf.enabled);
+        setVisionFallbackModel(vf.model);
+      } else {
+        setVisionFallbackEnabled(false);
+      }
     } catch (err) {
       setError((err as Error).message);
     }
@@ -134,6 +146,32 @@ export function CodexEnable() {
     }
   }
 
+  async function doToggleVisionFallback(enabled: boolean): Promise<void> {
+    setVisionFallbackSaving(true);
+    try {
+      await api.setVisionFallback({ enabled });
+      setVisionFallbackEnabled(enabled);
+    } catch (err) {
+      setError((err as Error).message);
+    } finally {
+      setVisionFallbackSaving(false);
+    }
+  }
+
+  async function doSetVisionFallbackModel(model: string): Promise<void> {
+    const trimmed = model.trim();
+    if (!trimmed || trimmed === visionFallbackModel) return;
+    setVisionFallbackSaving(true);
+    try {
+      await api.setVisionFallback({ model: trimmed });
+      setVisionFallbackModel(trimmed);
+    } catch (err) {
+      setError((err as Error).message);
+    } finally {
+      setVisionFallbackSaving(false);
+    }
+  }
+
   useEffect(() => {
     void load();
   }, []);
@@ -527,6 +565,62 @@ export function CodexEnable() {
                       )}
                     </Card>
                   )}
+                  {visionFallbackEnabled !== null && (
+                    <Card
+                      size="small"
+                      title={t("visionFallback.title")}
+                      style={{ marginBottom: 12 }}
+                    >
+                      <Space wrap>
+                        <Switch
+                          size="small"
+                          checked={!!visionFallbackEnabled}
+                          loading={visionFallbackSaving}
+                          onChange={(enabled) =>
+                            void doToggleVisionFallback(enabled)
+                          }
+                          checkedChildren={t("thinking.switchOn")}
+                          unCheckedChildren={t("thinking.switchOff")}
+                        />
+                        <span>
+                          {visionFallbackEnabled
+                            ? t("visionFallback.statusOn")
+                            : t("visionFallback.statusOff")}
+                        </span>
+                      </Space>
+                      <div style={{ marginTop: 8 }}>
+                        <Typography.Text
+                          type="secondary"
+                          style={{ fontSize: 12 }}
+                        >
+                          {t("visionFallback.modelLabel")}
+                        </Typography.Text>
+                        <Input
+                          size="small"
+                          value={visionFallbackModel}
+                          disabled={!visionFallbackEnabled}
+                          placeholder={t("visionFallback.modelPlaceholder")}
+                          onBlur={(e) =>
+                            void doSetVisionFallbackModel(e.target.value)
+                          }
+                          onPressEnter={() =>
+                            void doSetVisionFallbackModel(visionFallbackModel)
+                          }
+                          style={{ width: 240, marginTop: 4, marginLeft: 4 }}
+                        />
+                      </div>
+                      <Typography.Paragraph
+                        type="secondary"
+                        style={{
+                          fontSize: 12,
+                          marginTop: 8,
+                          marginBottom: 0,
+                        }}
+                      >
+                        {t("visionFallback.hint")}
+                      </Typography.Paragraph>
+                    </Card>
+                  )}
                   {state && (
                     <RuntimeOverrideCard
                       state={state}