fix(feed): sanitizeMediaUrl 走 URL 对象重写 protocol + 清 :80 (CR PR#345)

github-actions[bot] · copilot-pull-request-reviewer[bot] · github-actions[bot] · commit 0ae43f04ee8b · 2026-05-12T18:56:54.000Z
Copilot CR 指出两个问题： 1. http:// → https:// 用字符串拼接会保留显式端口 —— "http://x.com:80/" 升成 "https://x.com:80/" 后浏览器拿 80 走 TLS 必失败 2. 升级逻辑没有单测 修复： - 用 new URL(safe) 改 protocol = "https:"，并在 port === "80" 时清空 - 非 80 端口保留（用户跑 https on 8443 等场景） - 相对路径不走 URL parser，原样返回 - 新增 tests/url-safety.test.ts，14 条 case 覆盖：https 原样 / http 升级 / 大小写 / :80 清空 / :8080 保留 / path-query-hash 保留 / 相对路径 / 协议相对 拒绝 / javascript: / data: / vbscript: / mailto: 在媒体场景拒 / 空值 49/49 vitest 通过。 Co-authored-by: copilot-pull-request-reviewer[bot] <copilot-pull-request-reviewer[bot]@users.noreply.github.com>
diff --git a/lib/url-safety.ts b/lib/url-safety.ts
@@ -54,15 +54,27 @@ export function sanitizeExternalUrl(
  * 这里是 defense-in-depth —— 万一某条历史数据漏网（或 LLM 兜底回填了
  * http:// 的封面），前端再升一次。HTTPS 页面加载 http:// 图片会被
  * mixed-content policy 拦掉，宁可不显示也别让浏览器报黄锁。
+ *
+ * 实现历史：最初版本用字符串拼接 `"https://" + safe.substring(7)`，被 CR
+ * (#345) 指出会保留显式端口 —— `http://x.com:80/` 升成 `https://x.com:80/`
+ * 后浏览器拿 80 端口走 TLS 必失败。改成走 URL 对象重写 protocol，
+ * 并在 port === "80" 时清空端口（http 默认端口在 https 里没意义）。
  */
 export function sanitizeMediaUrl(
   raw: string | undefined | null,
 ): string | null {
   const safe = sanitize(raw, SAFE_MEDIA_PROTOCOLS, true);
   if (!safe) return null;
-  // 显式判前缀避免误升级相对路径（"/x.jpg" 不会进这里，但保险）
-  if (safe.toLowerCase().startsWith("http://")) {
-    return "https://" + safe.substring(7);
+  // 相对路径（"/x.jpg"）走不到协议升级，原样返回
+  if (!safe.toLowerCase().startsWith("http://")) return safe;
+  try {
+    const u = new URL(safe);
+    u.protocol = "https:";
+    // 显式 :80 在 https 下会让浏览器拿 80 端口握手 TLS，必挂；清空让它走默认 443
+    if (u.port === "80") u.port = "";
+    return u.toString();
+  } catch {
+    // 理论上 sanitize 已经保证 URL 合法可解析，走到这只是兜底
+    return safe;
   }
-  return safe;
 }
diff --git a/tests/url-safety.test.ts b/tests/url-safety.test.ts
@@ -0,0 +1,100 @@
+/**
+ * url-safety 单元测试（CR PR#345 要求补的覆盖）。
+ *
+ * sanitizeMediaUrl 现在做两件事：
+ *   1. 协议白名单：只放 http/https + 站内相对路径，拒 javascript:/data:/协议相对
+ *   2. http -> https 自动升级，顺手清显式 :80（http 默认端口在 https 下会挂 TLS）
+ *
+ * sanitizeExternalUrl 走的是 link 白名单（多个 mailto:），不在本次 PR 改动范围，
+ * 但顺手补几条 smoke test 锁住边界。
+ */
+import { describe, expect, test } from "vitest";
+import { sanitizeMediaUrl, sanitizeExternalUrl } from "../lib/url-safety";
+
+describe("sanitizeMediaUrl", () => {
+  test("https 原样返回（normalizer 可能加 trailing slash，URL.toString 已稳定）", () => {
+    expect(sanitizeMediaUrl("https://example.com/x.jpg")).toBe(
+      "https://example.com/x.jpg",
+    );
+  });
+
+  test("http:// 自动升级到 https://", () => {
+    expect(sanitizeMediaUrl("http://example.com/x.jpg")).toBe(
+      "https://example.com/x.jpg",
+    );
+  });
+
+  test("HTTP:// 大小写不敏感升级", () => {
+    expect(sanitizeMediaUrl("HTTP://example.com/x.jpg")).toBe(
+      "https://example.com/x.jpg",
+    );
+  });
+
+  test("显式 :80 端口在升级时清空（防止 https 拿 80 走 TLS）", () => {
+    expect(sanitizeMediaUrl("http://example.com:80/x.jpg")).toBe(
+      "https://example.com/x.jpg",
+    );
+  });
+
+  test("非 80 的显式端口保留（用户可能跑了 https on 8443 这种）", () => {
+    expect(sanitizeMediaUrl("http://example.com:8080/x.jpg")).toBe(
+      "https://example.com:8080/x.jpg",
+    );
+  });
+
+  test("升级保留 path / query / hash", () => {
+    expect(
+      sanitizeMediaUrl(
+        "http://mmbiz.qpic.cn/sz_mmbiz_jpg/abc/0?wx_fmt=jpeg&tp=webp#x",
+      ),
+    ).toBe("https://mmbiz.qpic.cn/sz_mmbiz_jpg/abc/0?wx_fmt=jpeg&tp=webp#x");
+  });
+
+  test("站内相对路径原样返回，不走 URL parser", () => {
+    expect(sanitizeMediaUrl("/logo.png")).toBe("/logo.png");
+    expect(sanitizeMediaUrl("/event/cover.webp?v=1")).toBe(
+      "/event/cover.webp?v=1",
+    );
+  });
+
+  test("协议相对 URL 被拒（//evil.com 会继承当前页协议跳到攻击者域）", () => {
+    expect(sanitizeMediaUrl("//evil.com/x.jpg")).toBeNull();
+  });
+
+  test("javascript: / data: / vbscript: 被拒", () => {
+    expect(sanitizeMediaUrl("javascript:alert(1)")).toBeNull();
+    expect(sanitizeMediaUrl("data:image/png;base64,AAA")).toBeNull();
+    expect(sanitizeMediaUrl("vbscript:msgbox(1)")).toBeNull();
+  });
+
+  test("mailto: 在媒体场景被拒（不在 SAFE_MEDIA_PROTOCOLS）", () => {
+    expect(sanitizeMediaUrl("mailto:a@b.com")).toBeNull();
+  });
+
+  test("空 / null / undefined / 仅空白 → null", () => {
+    expect(sanitizeMediaUrl(null)).toBeNull();
+    expect(sanitizeMediaUrl(undefined)).toBeNull();
+    expect(sanitizeMediaUrl("")).toBeNull();
+    expect(sanitizeMediaUrl("   ")).toBeNull();
+  });
+
+  test("升级行为幂等：已是 https 不改", () => {
+    const out1 = sanitizeMediaUrl("https://example.com/x.jpg");
+    const out2 = sanitizeMediaUrl(out1!);
+    expect(out2).toBe(out1);
+  });
+});
+
+describe("sanitizeExternalUrl", () => {
+  test("mailto 允许（媒体场景拒、链接场景允许，区分两个白名单）", () => {
+    expect(sanitizeExternalUrl("mailto:a@b.com")).toBe("mailto:a@b.com");
+  });
+
+  test("协议相对 URL 被拒（同 media）", () => {
+    expect(sanitizeExternalUrl("//evil.com/x")).toBeNull();
+  });
+
+  test("站内相对路径原样返回", () => {
+    expect(sanitizeExternalUrl("/about")).toBe("/about");
+  });
+});