diff --git a/apps/web/components/document-icon.tsx b/apps/web/components/document-icon.tsx index 00e363414..b1dbc409c 100644 --- a/apps/web/components/document-icon.tsx +++ b/apps/web/components/document-icon.tsx @@ -17,6 +17,7 @@ import { } from "@ui/assets/icons" import { Globe, FileText, FileCode, Image } from "lucide-react" import { cn } from "@lib/utils" +import { isYouTubeUrl } from "./utils" function MCPIcon({ className }: { className?: string }) { return ( @@ -206,7 +207,7 @@ export function DocumentIcon({ return } - if (url?.includes("youtube.com") || url?.includes("youtu.be")) { + if (isYouTubeUrl(url)) { return } diff --git a/apps/web/components/document-modal/content/index.tsx b/apps/web/components/document-modal/content/index.tsx index 8e1c9a11e..6b60b8767 100644 --- a/apps/web/components/document-modal/content/index.tsx +++ b/apps/web/components/document-modal/content/index.tsx @@ -3,6 +3,7 @@ import type { DocumentsWithMemoriesResponseSchema } from "@repo/validation/api" import type { z } from "zod" import dynamic from "next/dynamic" +import { isYouTubeUrl } from "@/components/utils" import { isTwitterUrl } from "@/lib/url-helpers" import { ImagePreview } from "./image-preview" import { TweetContent } from "./tweet" @@ -67,7 +68,7 @@ function getContentType(document: DocumentWithMemories | null): ContentType { if (document.type === "google_doc") return "google_doc" if (document.type === "google_sheet") return "google_sheet" if (document.type === "google_slide") return "google_slide" - if (document.url?.includes("youtube.com")) return "youtube" + if (isYouTubeUrl(document.url)) return "youtube" if (document.type === "webpage") return "webpage" return null diff --git a/apps/web/components/document-modal/content/yt-video.tsx b/apps/web/components/document-modal/content/yt-video.tsx index c0f78459f..ee9995a4b 100644 --- a/apps/web/components/document-modal/content/yt-video.tsx +++ b/apps/web/components/document-modal/content/yt-video.tsx @@ -1,30 +1,12 @@ "use client" import { useState, useEffect } from "react" +import { extractYouTubeVideoId } from "@/components/utils" interface YoutubeVideoProps { url: string | null | undefined } -// Extract YouTube video ID from various URL formats -function extractVideoId(url: string): string | null { - if (!url) return null - - const patterns = [ - /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)/, - /youtube\.com\/watch\?.*v=([^&\n?#]+)/, - ] - - for (const pattern of patterns) { - const match = url.match(pattern) - if (match?.[1]) { - return match[1] - } - } - - return null -} - export function YoutubeVideo({ url }: YoutubeVideoProps) { const [videoId, setVideoId] = useState(null) const [loading, setLoading] = useState(true) @@ -37,7 +19,7 @@ export function YoutubeVideo({ url }: YoutubeVideoProps) { return } - const id = extractVideoId(url) + const id = extractYouTubeVideoId(url) if (!id) { setError("Invalid YouTube URL format") setLoading(false) diff --git a/apps/web/components/timeline-view.tsx b/apps/web/components/timeline-view.tsx index 18437d774..bd6f14548 100644 --- a/apps/web/components/timeline-view.tsx +++ b/apps/web/components/timeline-view.tsx @@ -9,6 +9,7 @@ import { dmSansClassName } from "@/lib/fonts" import { SyncLogoIcon } from "@ui/assets/icons" import { DocumentIcon } from "@/components/document-icon" import { CheckIcon, ChevronDownIcon } from "lucide-react" +import { isYouTubeUrl } from "./utils" type DocumentsResponse = z.infer type DocumentWithMemories = DocumentsResponse["documents"][0] @@ -41,7 +42,7 @@ type CategoryInfo = { label: string; singularLabel: string; key: string } function getDocumentTypeInfo(doc: DocumentWithMemories): CategoryInfo { if (doc.source === "mcp") return { label: "MCP Items", singularLabel: "MCP Item", key: "mcp" } - if (doc.url?.includes("youtube.com") || doc.url?.includes("youtu.be")) + if (isYouTubeUrl(doc.url)) return { label: "YouTube Videos", singularLabel: "YouTube Video", diff --git a/apps/web/components/utils.test.ts b/apps/web/components/utils.test.ts new file mode 100644 index 000000000..91f2059dc --- /dev/null +++ b/apps/web/components/utils.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from "bun:test" +import { extractYouTubeVideoId, isYouTubeUrl } from "./utils" + +const VIDEO_ID = "dQw4w9WgXcQ" + +describe("isYouTubeUrl", () => { + it("matches real YouTube hostnames", () => { + expect(isYouTubeUrl(`https://youtube.com/watch?v=${VIDEO_ID}`)).toBe(true) + expect(isYouTubeUrl(`https://www.youtube.com/watch?v=${VIDEO_ID}`)).toBe( + true, + ) + expect(isYouTubeUrl(`HTTPS://m.youtube.com/watch?v=${VIDEO_ID}`)).toBe(true) + expect(isYouTubeUrl(`youtu.be/${VIDEO_ID}`)).toBe(true) + }) + + it("rejects lookalike hosts and path-only matches", () => { + expect(isYouTubeUrl(`https://notyoutube.com/watch?v=${VIDEO_ID}`)).toBe( + false, + ) + expect( + isYouTubeUrl(`https://evil.example/youtube.com/watch?v=${VIDEO_ID}`), + ).toBe(false) + expect( + isYouTubeUrl(`https://youtube.com.evil.example/watch?v=${VIDEO_ID}`), + ).toBe(false) + expect(isYouTubeUrl(`javascript://youtube.com/watch?v=${VIDEO_ID}`)).toBe( + false, + ) + }) +}) + +describe("extractYouTubeVideoId", () => { + it("extracts video ids from supported YouTube URL formats", () => { + expect( + extractYouTubeVideoId(`https://www.youtube.com/watch?v=${VIDEO_ID}&t=3`), + ).toBe(VIDEO_ID) + expect( + extractYouTubeVideoId(`HTTPS://m.youtube.com/watch?v=${VIDEO_ID}`), + ).toBe(VIDEO_ID) + expect(extractYouTubeVideoId(`https://youtu.be/${VIDEO_ID}?si=abc`)).toBe( + VIDEO_ID, + ) + expect(extractYouTubeVideoId(`youtube.com/embed/${VIDEO_ID}`)).toBe( + VIDEO_ID, + ) + expect( + extractYouTubeVideoId(`https://youtube.com/shorts/${VIDEO_ID}`), + ).toBe(VIDEO_ID) + expect(extractYouTubeVideoId(`https://youtube.com/live/${VIDEO_ID}`)).toBe( + VIDEO_ID, + ) + }) + + it("does not extract ids from unrelated hosts or invalid ids", () => { + expect( + extractYouTubeVideoId( + `https://evil.example/youtube.com/watch?v=${VIDEO_ID}`, + ), + ).toBe(null) + expect( + extractYouTubeVideoId( + `https://youtube.com.evil.example/watch?v=${VIDEO_ID}`, + ), + ).toBe(null) + expect(extractYouTubeVideoId("https://youtube.com/watch?v=too-short")).toBe( + null, + ) + }) +}) diff --git a/apps/web/components/utils.ts b/apps/web/components/utils.ts index cd0cbc39a..4240fe45e 100644 --- a/apps/web/components/utils.ts +++ b/apps/web/components/utils.ts @@ -2,37 +2,70 @@ import { useQuery } from "@tanstack/react-query" +const YOUTUBE_VIDEO_ID_REGEX = /^[a-zA-Z0-9_-]{11}$/ + +function parseHttpUrl(url: string | undefined | null): URL | null { + const trimmed = url?.trim() + if (!trimmed) return null + + try { + const parsed = new URL(trimmed) + return parsed.protocol === "http:" || parsed.protocol === "https:" + ? parsed + : null + } catch { + try { + return new URL(`https://${trimmed}`) + } catch { + return null + } + } +} + +function hostnameMatches(hostname: string, domain: string): boolean { + const normalizedHostname = hostname.toLowerCase() + return ( + normalizedHostname === domain || normalizedHostname.endsWith(`.${domain}`) + ) +} + +function validYouTubeVideoId(value: string | null | undefined): string | null { + if (!value || !YOUTUBE_VIDEO_ID_REGEX.test(value)) return null + return value +} + export function isYouTubeUrl(url: string | undefined | null): boolean { - if (!url) return false + const parsed = parseHttpUrl(url) + if (!parsed) return false + return ( - url.includes("youtube.com") || - url.includes("youtu.be") || - url.includes("m.youtube.com") + hostnameMatches(parsed.hostname, "youtube.com") || + hostnameMatches(parsed.hostname, "youtu.be") ) } export function extractYouTubeVideoId( url: string | undefined | null, ): string | null { - if (!url) return null + const parsed = parseHttpUrl(url) + if (!parsed) return null - // Handle youtu.be format - const youtuBeMatch = url.match(/(?:youtu\.be\/)([a-zA-Z0-9_-]{11})/) - if (youtuBeMatch?.[1]) return youtuBeMatch[1] + const pathSegments = parsed.pathname.split("/").filter(Boolean) - // Handle youtube.com/watch?v= format - const watchMatch = url.match(/(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/) - if (watchMatch?.[1]) return watchMatch[1] + if (hostnameMatches(parsed.hostname, "youtu.be")) { + return validYouTubeVideoId(pathSegments[0]) + } - // Handle youtube.com/embed/ format - const embedMatch = url.match(/(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/) - if (embedMatch?.[1]) return embedMatch[1] + if (!hostnameMatches(parsed.hostname, "youtube.com")) return null - // Handle m.youtube.com format - const mobileMatch = url.match( - /(?:m\.youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/, - ) - if (mobileMatch?.[1]) return mobileMatch[1] + const route = pathSegments[0]?.toLowerCase() + if (route === "watch") { + return validYouTubeVideoId(parsed.searchParams.get("v")) + } + + if (route === "embed" || route === "shorts" || route === "live") { + return validYouTubeVideoId(pathSegments[1]) + } return null }