diff --git a/apps/web/components/document-icon.tsx b/apps/web/components/document-icon.tsx
index 00e363414..b1dbc409c 100644
--- a/apps/web/components/document-icon.tsx
+++ b/apps/web/components/document-icon.tsx
@@ -17,6 +17,7 @@ import {
} from "@ui/assets/icons"
import { Globe, FileText, FileCode, Image } from "lucide-react"
import { cn } from "@lib/utils"
+import { isYouTubeUrl } from "./utils"
function MCPIcon({ className }: { className?: string }) {
return (
@@ -206,7 +207,7 @@ export function DocumentIcon({
return
}
- if (url?.includes("youtube.com") || url?.includes("youtu.be")) {
+ if (isYouTubeUrl(url)) {
return
}
diff --git a/apps/web/components/document-modal/content/index.tsx b/apps/web/components/document-modal/content/index.tsx
index 8e1c9a11e..6b60b8767 100644
--- a/apps/web/components/document-modal/content/index.tsx
+++ b/apps/web/components/document-modal/content/index.tsx
@@ -3,6 +3,7 @@
import type { DocumentsWithMemoriesResponseSchema } from "@repo/validation/api"
import type { z } from "zod"
import dynamic from "next/dynamic"
+import { isYouTubeUrl } from "@/components/utils"
import { isTwitterUrl } from "@/lib/url-helpers"
import { ImagePreview } from "./image-preview"
import { TweetContent } from "./tweet"
@@ -67,7 +68,7 @@ function getContentType(document: DocumentWithMemories | null): ContentType {
if (document.type === "google_doc") return "google_doc"
if (document.type === "google_sheet") return "google_sheet"
if (document.type === "google_slide") return "google_slide"
- if (document.url?.includes("youtube.com")) return "youtube"
+ if (isYouTubeUrl(document.url)) return "youtube"
if (document.type === "webpage") return "webpage"
return null
diff --git a/apps/web/components/document-modal/content/yt-video.tsx b/apps/web/components/document-modal/content/yt-video.tsx
index c0f78459f..ee9995a4b 100644
--- a/apps/web/components/document-modal/content/yt-video.tsx
+++ b/apps/web/components/document-modal/content/yt-video.tsx
@@ -1,30 +1,12 @@
"use client"
import { useState, useEffect } from "react"
+import { extractYouTubeVideoId } from "@/components/utils"
interface YoutubeVideoProps {
url: string | null | undefined
}
-// Extract YouTube video ID from various URL formats
-function extractVideoId(url: string): string | null {
- if (!url) return null
-
- const patterns = [
- /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)/,
- /youtube\.com\/watch\?.*v=([^&\n?#]+)/,
- ]
-
- for (const pattern of patterns) {
- const match = url.match(pattern)
- if (match?.[1]) {
- return match[1]
- }
- }
-
- return null
-}
-
export function YoutubeVideo({ url }: YoutubeVideoProps) {
const [videoId, setVideoId] = useState(null)
const [loading, setLoading] = useState(true)
@@ -37,7 +19,7 @@ export function YoutubeVideo({ url }: YoutubeVideoProps) {
return
}
- const id = extractVideoId(url)
+ const id = extractYouTubeVideoId(url)
if (!id) {
setError("Invalid YouTube URL format")
setLoading(false)
diff --git a/apps/web/components/timeline-view.tsx b/apps/web/components/timeline-view.tsx
index 18437d774..bd6f14548 100644
--- a/apps/web/components/timeline-view.tsx
+++ b/apps/web/components/timeline-view.tsx
@@ -9,6 +9,7 @@ import { dmSansClassName } from "@/lib/fonts"
import { SyncLogoIcon } from "@ui/assets/icons"
import { DocumentIcon } from "@/components/document-icon"
import { CheckIcon, ChevronDownIcon } from "lucide-react"
+import { isYouTubeUrl } from "./utils"
type DocumentsResponse = z.infer
type DocumentWithMemories = DocumentsResponse["documents"][0]
@@ -41,7 +42,7 @@ type CategoryInfo = { label: string; singularLabel: string; key: string }
function getDocumentTypeInfo(doc: DocumentWithMemories): CategoryInfo {
if (doc.source === "mcp")
return { label: "MCP Items", singularLabel: "MCP Item", key: "mcp" }
- if (doc.url?.includes("youtube.com") || doc.url?.includes("youtu.be"))
+ if (isYouTubeUrl(doc.url))
return {
label: "YouTube Videos",
singularLabel: "YouTube Video",
diff --git a/apps/web/components/utils.test.ts b/apps/web/components/utils.test.ts
new file mode 100644
index 000000000..91f2059dc
--- /dev/null
+++ b/apps/web/components/utils.test.ts
@@ -0,0 +1,69 @@
+import { describe, expect, it } from "bun:test"
+import { extractYouTubeVideoId, isYouTubeUrl } from "./utils"
+
+const VIDEO_ID = "dQw4w9WgXcQ"
+
+describe("isYouTubeUrl", () => {
+ it("matches real YouTube hostnames", () => {
+ expect(isYouTubeUrl(`https://youtube.com/watch?v=${VIDEO_ID}`)).toBe(true)
+ expect(isYouTubeUrl(`https://www.youtube.com/watch?v=${VIDEO_ID}`)).toBe(
+ true,
+ )
+ expect(isYouTubeUrl(`HTTPS://m.youtube.com/watch?v=${VIDEO_ID}`)).toBe(true)
+ expect(isYouTubeUrl(`youtu.be/${VIDEO_ID}`)).toBe(true)
+ })
+
+ it("rejects lookalike hosts and path-only matches", () => {
+ expect(isYouTubeUrl(`https://notyoutube.com/watch?v=${VIDEO_ID}`)).toBe(
+ false,
+ )
+ expect(
+ isYouTubeUrl(`https://evil.example/youtube.com/watch?v=${VIDEO_ID}`),
+ ).toBe(false)
+ expect(
+ isYouTubeUrl(`https://youtube.com.evil.example/watch?v=${VIDEO_ID}`),
+ ).toBe(false)
+ expect(isYouTubeUrl(`javascript://youtube.com/watch?v=${VIDEO_ID}`)).toBe(
+ false,
+ )
+ })
+})
+
+describe("extractYouTubeVideoId", () => {
+ it("extracts video ids from supported YouTube URL formats", () => {
+ expect(
+ extractYouTubeVideoId(`https://www.youtube.com/watch?v=${VIDEO_ID}&t=3`),
+ ).toBe(VIDEO_ID)
+ expect(
+ extractYouTubeVideoId(`HTTPS://m.youtube.com/watch?v=${VIDEO_ID}`),
+ ).toBe(VIDEO_ID)
+ expect(extractYouTubeVideoId(`https://youtu.be/${VIDEO_ID}?si=abc`)).toBe(
+ VIDEO_ID,
+ )
+ expect(extractYouTubeVideoId(`youtube.com/embed/${VIDEO_ID}`)).toBe(
+ VIDEO_ID,
+ )
+ expect(
+ extractYouTubeVideoId(`https://youtube.com/shorts/${VIDEO_ID}`),
+ ).toBe(VIDEO_ID)
+ expect(extractYouTubeVideoId(`https://youtube.com/live/${VIDEO_ID}`)).toBe(
+ VIDEO_ID,
+ )
+ })
+
+ it("does not extract ids from unrelated hosts or invalid ids", () => {
+ expect(
+ extractYouTubeVideoId(
+ `https://evil.example/youtube.com/watch?v=${VIDEO_ID}`,
+ ),
+ ).toBe(null)
+ expect(
+ extractYouTubeVideoId(
+ `https://youtube.com.evil.example/watch?v=${VIDEO_ID}`,
+ ),
+ ).toBe(null)
+ expect(extractYouTubeVideoId("https://youtube.com/watch?v=too-short")).toBe(
+ null,
+ )
+ })
+})
diff --git a/apps/web/components/utils.ts b/apps/web/components/utils.ts
index cd0cbc39a..4240fe45e 100644
--- a/apps/web/components/utils.ts
+++ b/apps/web/components/utils.ts
@@ -2,37 +2,70 @@
import { useQuery } from "@tanstack/react-query"
+const YOUTUBE_VIDEO_ID_REGEX = /^[a-zA-Z0-9_-]{11}$/
+
+function parseHttpUrl(url: string | undefined | null): URL | null {
+ const trimmed = url?.trim()
+ if (!trimmed) return null
+
+ try {
+ const parsed = new URL(trimmed)
+ return parsed.protocol === "http:" || parsed.protocol === "https:"
+ ? parsed
+ : null
+ } catch {
+ try {
+ return new URL(`https://${trimmed}`)
+ } catch {
+ return null
+ }
+ }
+}
+
+function hostnameMatches(hostname: string, domain: string): boolean {
+ const normalizedHostname = hostname.toLowerCase()
+ return (
+ normalizedHostname === domain || normalizedHostname.endsWith(`.${domain}`)
+ )
+}
+
+function validYouTubeVideoId(value: string | null | undefined): string | null {
+ if (!value || !YOUTUBE_VIDEO_ID_REGEX.test(value)) return null
+ return value
+}
+
export function isYouTubeUrl(url: string | undefined | null): boolean {
- if (!url) return false
+ const parsed = parseHttpUrl(url)
+ if (!parsed) return false
+
return (
- url.includes("youtube.com") ||
- url.includes("youtu.be") ||
- url.includes("m.youtube.com")
+ hostnameMatches(parsed.hostname, "youtube.com") ||
+ hostnameMatches(parsed.hostname, "youtu.be")
)
}
export function extractYouTubeVideoId(
url: string | undefined | null,
): string | null {
- if (!url) return null
+ const parsed = parseHttpUrl(url)
+ if (!parsed) return null
- // Handle youtu.be format
- const youtuBeMatch = url.match(/(?:youtu\.be\/)([a-zA-Z0-9_-]{11})/)
- if (youtuBeMatch?.[1]) return youtuBeMatch[1]
+ const pathSegments = parsed.pathname.split("/").filter(Boolean)
- // Handle youtube.com/watch?v= format
- const watchMatch = url.match(/(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/)
- if (watchMatch?.[1]) return watchMatch[1]
+ if (hostnameMatches(parsed.hostname, "youtu.be")) {
+ return validYouTubeVideoId(pathSegments[0])
+ }
- // Handle youtube.com/embed/ format
- const embedMatch = url.match(/(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/)
- if (embedMatch?.[1]) return embedMatch[1]
+ if (!hostnameMatches(parsed.hostname, "youtube.com")) return null
- // Handle m.youtube.com format
- const mobileMatch = url.match(
- /(?:m\.youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/,
- )
- if (mobileMatch?.[1]) return mobileMatch[1]
+ const route = pathSegments[0]?.toLowerCase()
+ if (route === "watch") {
+ return validYouTubeVideoId(parsed.searchParams.get("v"))
+ }
+
+ if (route === "embed" || route === "shorts" || route === "live") {
+ return validYouTubeVideoId(pathSegments[1])
+ }
return null
}