Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion apps/web/components/document-icon.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
} from "@ui/assets/icons"
import { Globe, FileText, FileCode, Image } from "lucide-react"
import { cn } from "@lib/utils"
import { isYouTubeUrl } from "./utils"

function MCPIcon({ className }: { className?: string }) {
return (
Expand Down Expand Up @@ -206,7 +207,7 @@ export function DocumentIcon({
return <MCPIcon className={iconClassName} />
}

if (url?.includes("youtube.com") || url?.includes("youtu.be")) {
if (isYouTubeUrl(url)) {
return <YouTubeIcon className={iconClassName} />
}

Expand Down
3 changes: 2 additions & 1 deletion apps/web/components/document-modal/content/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import type { DocumentsWithMemoriesResponseSchema } from "@repo/validation/api"
import type { z } from "zod"
import dynamic from "next/dynamic"
import { isYouTubeUrl } from "@/components/utils"
import { isTwitterUrl } from "@/lib/url-helpers"
import { ImagePreview } from "./image-preview"
import { TweetContent } from "./tweet"
Expand Down Expand Up @@ -67,7 +68,7 @@ function getContentType(document: DocumentWithMemories | null): ContentType {
if (document.type === "google_doc") return "google_doc"
if (document.type === "google_sheet") return "google_sheet"
if (document.type === "google_slide") return "google_slide"
if (document.url?.includes("youtube.com")) return "youtube"
if (isYouTubeUrl(document.url)) return "youtube"
if (document.type === "webpage") return "webpage"

return null
Expand Down
22 changes: 2 additions & 20 deletions apps/web/components/document-modal/content/yt-video.tsx
Original file line number Diff line number Diff line change
@@ -1,30 +1,12 @@
"use client"

import { useState, useEffect } from "react"
import { extractYouTubeVideoId } from "@/components/utils"

interface YoutubeVideoProps {
url: string | null | undefined
}

// Extract YouTube video ID from various URL formats
function extractVideoId(url: string): string | null {
if (!url) return null

const patterns = [
/(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([^&\n?#]+)/,
/youtube\.com\/watch\?.*v=([^&\n?#]+)/,
]

for (const pattern of patterns) {
const match = url.match(pattern)
if (match?.[1]) {
return match[1]
}
}

return null
}

export function YoutubeVideo({ url }: YoutubeVideoProps) {
const [videoId, setVideoId] = useState<string | null>(null)
const [loading, setLoading] = useState(true)
Expand All @@ -37,7 +19,7 @@ export function YoutubeVideo({ url }: YoutubeVideoProps) {
return
}

const id = extractVideoId(url)
const id = extractYouTubeVideoId(url)
if (!id) {
setError("Invalid YouTube URL format")
setLoading(false)
Expand Down
3 changes: 2 additions & 1 deletion apps/web/components/timeline-view.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { dmSansClassName } from "@/lib/fonts"
import { SyncLogoIcon } from "@ui/assets/icons"
import { DocumentIcon } from "@/components/document-icon"
import { CheckIcon, ChevronDownIcon } from "lucide-react"
import { isYouTubeUrl } from "./utils"

type DocumentsResponse = z.infer<typeof DocumentsWithMemoriesResponseSchema>
type DocumentWithMemories = DocumentsResponse["documents"][0]
Expand Down Expand Up @@ -41,7 +42,7 @@ type CategoryInfo = { label: string; singularLabel: string; key: string }
function getDocumentTypeInfo(doc: DocumentWithMemories): CategoryInfo {
if (doc.source === "mcp")
return { label: "MCP Items", singularLabel: "MCP Item", key: "mcp" }
if (doc.url?.includes("youtube.com") || doc.url?.includes("youtu.be"))
if (isYouTubeUrl(doc.url))
return {
label: "YouTube Videos",
singularLabel: "YouTube Video",
Expand Down
69 changes: 69 additions & 0 deletions apps/web/components/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { describe, expect, it } from "bun:test"
import { extractYouTubeVideoId, isYouTubeUrl } from "./utils"

const VIDEO_ID = "dQw4w9WgXcQ"

describe("isYouTubeUrl", () => {
it("matches real YouTube hostnames", () => {
expect(isYouTubeUrl(`https://youtube.com/watch?v=${VIDEO_ID}`)).toBe(true)
expect(isYouTubeUrl(`https://www.youtube.com/watch?v=${VIDEO_ID}`)).toBe(
true,
)
expect(isYouTubeUrl(`HTTPS://m.youtube.com/watch?v=${VIDEO_ID}`)).toBe(true)
expect(isYouTubeUrl(`youtu.be/${VIDEO_ID}`)).toBe(true)
})

it("rejects lookalike hosts and path-only matches", () => {
expect(isYouTubeUrl(`https://notyoutube.com/watch?v=${VIDEO_ID}`)).toBe(
false,
)
expect(
isYouTubeUrl(`https://evil.example/youtube.com/watch?v=${VIDEO_ID}`),
).toBe(false)
expect(
isYouTubeUrl(`https://youtube.com.evil.example/watch?v=${VIDEO_ID}`),
).toBe(false)
expect(isYouTubeUrl(`javascript://youtube.com/watch?v=${VIDEO_ID}`)).toBe(
false,
)
})
})

describe("extractYouTubeVideoId", () => {
it("extracts video ids from supported YouTube URL formats", () => {
expect(
extractYouTubeVideoId(`https://www.youtube.com/watch?v=${VIDEO_ID}&t=3`),
).toBe(VIDEO_ID)
expect(
extractYouTubeVideoId(`HTTPS://m.youtube.com/watch?v=${VIDEO_ID}`),
).toBe(VIDEO_ID)
expect(extractYouTubeVideoId(`https://youtu.be/${VIDEO_ID}?si=abc`)).toBe(
VIDEO_ID,
)
expect(extractYouTubeVideoId(`youtube.com/embed/${VIDEO_ID}`)).toBe(
VIDEO_ID,
)
expect(
extractYouTubeVideoId(`https://youtube.com/shorts/${VIDEO_ID}`),
).toBe(VIDEO_ID)
expect(extractYouTubeVideoId(`https://youtube.com/live/${VIDEO_ID}`)).toBe(
VIDEO_ID,
)
})

it("does not extract ids from unrelated hosts or invalid ids", () => {
expect(
extractYouTubeVideoId(
`https://evil.example/youtube.com/watch?v=${VIDEO_ID}`,
),
).toBe(null)
expect(
extractYouTubeVideoId(
`https://youtube.com.evil.example/watch?v=${VIDEO_ID}`,
),
).toBe(null)
expect(extractYouTubeVideoId("https://youtube.com/watch?v=too-short")).toBe(
null,
)
})
})
71 changes: 52 additions & 19 deletions apps/web/components/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,70 @@

import { useQuery } from "@tanstack/react-query"

const YOUTUBE_VIDEO_ID_REGEX = /^[a-zA-Z0-9_-]{11}$/

function parseHttpUrl(url: string | undefined | null): URL | null {
const trimmed = url?.trim()
if (!trimmed) return null

try {
const parsed = new URL(trimmed)
return parsed.protocol === "http:" || parsed.protocol === "https:"
? parsed
: null
} catch {
try {
return new URL(`https://${trimmed}`)
} catch {
return null
}
}
}

function hostnameMatches(hostname: string, domain: string): boolean {
const normalizedHostname = hostname.toLowerCase()
return (
normalizedHostname === domain || normalizedHostname.endsWith(`.${domain}`)
)
}

function validYouTubeVideoId(value: string | null | undefined): string | null {
if (!value || !YOUTUBE_VIDEO_ID_REGEX.test(value)) return null
return value
}

export function isYouTubeUrl(url: string | undefined | null): boolean {
Comment thread
anirudh5harma marked this conversation as resolved.
if (!url) return false
const parsed = parseHttpUrl(url)
if (!parsed) return false

return (
url.includes("youtube.com") ||
url.includes("youtu.be") ||
url.includes("m.youtube.com")
hostnameMatches(parsed.hostname, "youtube.com") ||
hostnameMatches(parsed.hostname, "youtu.be")
)
}

export function extractYouTubeVideoId(
url: string | undefined | null,
): string | null {
if (!url) return null
const parsed = parseHttpUrl(url)
if (!parsed) return null

// Handle youtu.be format
const youtuBeMatch = url.match(/(?:youtu\.be\/)([a-zA-Z0-9_-]{11})/)
if (youtuBeMatch?.[1]) return youtuBeMatch[1]
const pathSegments = parsed.pathname.split("/").filter(Boolean)

// Handle youtube.com/watch?v= format
const watchMatch = url.match(/(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/)
if (watchMatch?.[1]) return watchMatch[1]
if (hostnameMatches(parsed.hostname, "youtu.be")) {
return validYouTubeVideoId(pathSegments[0])
}

// Handle youtube.com/embed/ format
const embedMatch = url.match(/(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/)
if (embedMatch?.[1]) return embedMatch[1]
if (!hostnameMatches(parsed.hostname, "youtube.com")) return null

// Handle m.youtube.com format
const mobileMatch = url.match(
/(?:m\.youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/,
)
if (mobileMatch?.[1]) return mobileMatch[1]
const route = pathSegments[0]?.toLowerCase()
if (route === "watch") {
return validYouTubeVideoId(parsed.searchParams.get("v"))
}

if (route === "embed" || route === "shorts" || route === "live") {
return validYouTubeVideoId(pathSegments[1])
}

return null
}
Expand Down