diff --git a/CLAUDE.md b/CLAUDE.md index bb5c7a1..4c90446 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -85,7 +85,7 @@ One-liners + doc pointer. For everything else read the actual file in `commands/ ### Playback ([`docs/features/playback.md`](docs/features/playback.md)) -A-B repeat · crossfade (static / smart-album-aware / dynamic-tempo-aware) · gapless · ReplayGain · normalize · mono · 6-band peaking EQ (RBJ biquads, ±12 dB, 20 presets) · playback speed 0.5×–2× (resampler-shift, pitch follows) · DSD → PCM (256-tap Blackman-Harris FIR) · WASAPI Exclusive opt-in (Windows) with transparent fallback to cpal shared · spectrum visualizer (2048-pt FFT, opt-in) · output device persistence + cpal 0.17 friendly-name disambiguation · radio (seed + similar artists + BPM filter) · mood radio (focus/chill/workout/party/sleep) · sleep timer · TXXX:UNSYNCEDLYRICS fallback for MP3 K-Pop/J-Pop rips. +A-B repeat · crossfade (static / smart-album-aware / dynamic-tempo-aware) · gapless · ReplayGain · normalize · mono · 6-band peaking EQ (RBJ biquads, ±12 dB, 20 presets) · playback speed 0.5×–2× (resampler-shift, pitch follows) · DSD → PCM (256-tap Blackman-Harris FIR) · WASAPI Exclusive opt-in (Windows) with transparent fallback to cpal shared · spectrum visualizer (2048-pt FFT, opt-in) · output device persistence + cpal 0.17 friendly-name disambiguation · radio (seed + similar artists + BPM filter) · mood radio (focus/chill/workout/party/sleep) · sleep timer · TXXX:UNSYNCEDLYRICS fallback for MP3 K-Pop/J-Pop rips · word-level karaoke lyrics (Enhanced LRC + TTML parse, mot-à-mot capture in the editor). ### Library ([`docs/features/library.md`](docs/features/library.md)) diff --git a/docs/features/integrations.md b/docs/features/integrations.md index 85ce4e0..2feae93 100644 --- a/docs/features/integrations.md +++ b/docs/features/integrations.md @@ -126,3 +126,32 @@ UI is [`LyricsEditorModal`](../../src/components/common/LyricsEditorModal.tsx) o **Library-wide prefetch.** `prefetch_library_lyrics` walks every available track without a cached row (deduped by `file_hash`), runs the embedded → LRCLIB chain, and persists each hit. Network calls are throttled at 500 ms (~2 req/s) to be a polite guest; embedded hits skip the throttle. Progress streams over `lyrics:prefetch-progress`. A single global run is enforced via an `AtomicBool`; `cancel_lyrics_prefetch` flips a second `AtomicBool` the worker checks per iteration. Resumable — a partial cancel just leaves uncached rows for the next run. The lyrics panel renders synced lines with auto-scroll and a 200 ms transition; un-synced lyrics fall back to a static block. + +### Word-level lyrics (Enhanced LRC + TTML) + +WaveFlow recognises two word-timed formats in addition to plain LRC: + +- **Enhanced LRC** — `[mm:ss.xx]La nuit tombe`. Plain-text extension of the LRC ecosystem; round-trips cleanly through `USLT` so other players see it as regular synced LRC if they don't parse the inline word stamps. +- **TTML** (Apple Music) — XML envelope with `

word

`. Imported from `.ttml` / `.xml` files exported by tools like LyricsX. Char-level spans nested inside word spans are folded into their parent — v1 ships with word-level animation only. + +**Detection** — [`commands/lyrics.rs::detect_format`](../../src-tauri/src/commands/lyrics.rs) sniffs the cached content. TTML matches first on `` word stamp inside the line body; falling back to plain LRC otherwise. The same heuristic runs on the editor's save path so user-typed content gets re-classified if they switch between modes. + +**Storage** — `app.lyrics.format` accepts the new `'ttml'` value via [migration 20260516120000_lyrics_ttml_format.sql](../../src-tauri/migrations/app/20260516120000_lyrics_ttml_format.sql) (CHECK rebuild — SQLite has no ALTER CONSTRAINT). The `content` column stays raw text — there's no separate `words` column; parsing is done at render time on the frontend. This keeps the cache byte-for-byte identical to what would be written into the tag and avoids a hot migration over user data. + +**Parsing** — `src/lib/tauri/lyrics.ts` exposes `parseLrc`, `parseEnhancedLrc`, `parseTtml`, and a unifying `parseLyrics(content, format)` dispatcher. All three return the same `LyricsLine` shape (`timeMs`, `endMs`, `text`, optional `words[]`). The TTML parser uses the webview's built-in `DOMParser` — no XML dependency. `findActiveWordIndex` mirrors `findActiveLineIndex` (linear scan from hint, O(1) amortised). + +**Rendering** — [`LyricsPanel`](../../src/components/layout/LyricsPanel.tsx) and [`FullscreenLyrics`](../../src/components/player/FullscreenLyrics.tsx) share the same active-word animation: 150 ms transitions on color / opacity / transform, `scale(1.04)` on the active word, and a 0.45 → 0.8 → 1 opacity ramp for future / past / active words. The panel adds an accent-color tint that the fullscreen view leaves out (the white-on-dark contrast is enough there). Lines without `words` keep the existing line-level highlight. + +**Editor — word mode.** [`LyricsEditorModal`](../../src/components/common/LyricsEditorModal.tsx) adds a granularity toggle inside the synchronized tab. In word mode: + +- **Space** — stamps the next un-captured word in the active line. First press also stamps the line's own `timeMs` if it's not yet captured. +- **Enter** — advances to the next line (appending a fresh empty one at the end, like line mode). +- **Backspace** — undoes the last word capture on the active line. + +The row UI shows each word as a chip — pink for captured, green-ringed for the next word to capture, grey for future words. Editing a line's text invalidates its word tokenisation, so the user has to re-capture cleanly. The save path serialises back to Enhanced LRC via `serializeEnhancedLrc` regardless of the originally-imported format (TTML round-trip isn't part of v1). + +**TTML → USLT.** The audio file's `USLT` frame is plain-text by spec, so writing TTML into it would corrupt other players. `write_lyrics_to_file` therefore: + +- Plain / LRC / Enhanced LRC → `ItemKey::UnsyncLyrics` (USLT for ID3v2, UNSYNCEDLYRICS for Vorbis, `©lyr` for MP4) — unchanged. +- TTML on Vorbis / MP4 / FLAC → `ItemKey::Lyrics` (the XML-friendly key). +- TTML on MP3 — **skipped**. lofty has no clean ID3v2 mapping for arbitrary XML lyrics, so the file is left untouched, the DB cache still gets the TTML content, and `save_lyrics` returns `tag_write_skipped: true`. The editor surfaces this as a `lyrics.toast.tagWriteSkipped` warning so the user knows the file itself wasn't touched. diff --git a/src-tauri/migrations/app/20260516120000_lyrics_ttml_format.sql b/src-tauri/migrations/app/20260516120000_lyrics_ttml_format.sql new file mode 100644 index 0000000..3a44f95 --- /dev/null +++ b/src-tauri/migrations/app/20260516120000_lyrics_ttml_format.sql @@ -0,0 +1,28 @@ +-- ============================================================================= +-- Extend the lyrics.format CHECK constraint to accept the new 'ttml' value. +-- +-- The original 20260413000000_metadata_caches.sql migration created the +-- table with `CHECK (format IN ('plain','lrc','enhanced_lrc'))`. SQLite has +-- no ALTER for CHECK constraints, so we rebuild the table: create a clone +-- with the broader CHECK, copy the rows, drop the original, rename. +-- +-- No existing rows need transformation — both 'plain', 'lrc' and +-- 'enhanced_lrc' remain valid, and 'ttml' simply becomes a new accepted +-- value that the parser can now emit. +-- ============================================================================= + +CREATE TABLE lyrics_new ( + file_hash TEXT PRIMARY KEY, + content TEXT NOT NULL, + format TEXT NOT NULL CHECK (format IN ('plain','lrc','enhanced_lrc','ttml')), + source TEXT NOT NULL CHECK (source IN ('embedded','lrc_file','api','manual')), + language TEXT, + fetched_at INTEGER NOT NULL +); + +INSERT INTO lyrics_new (file_hash, content, format, source, language, fetched_at) +SELECT file_hash, content, format, source, language, fetched_at +FROM lyrics; + +DROP TABLE lyrics; +ALTER TABLE lyrics_new RENAME TO lyrics; diff --git a/src-tauri/src/commands/lyrics.rs b/src-tauri/src/commands/lyrics.rs index 3dc90d8..c651e4f 100644 --- a/src-tauri/src/commands/lyrics.rs +++ b/src-tauri/src/commands/lyrics.rs @@ -47,14 +47,15 @@ fn now_ms() -> i64 { /// Format flags returned to the frontend. /// /// `Plain` = unsynced text. `Lrc` = `[mm:ss.xx]`-prefixed lines. -/// `EnhancedLrc` is the per-word timed variant; we accept it from -/// imports but don't currently produce it. -#[derive(Debug, Clone, Serialize)] +/// `EnhancedLrc` is the per-word timed variant (`[00:01.00]Hello <00:01.50>world`). +/// `Ttml` is Apple-Music-style XML with `` word timing. +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] #[serde(rename_all = "snake_case")] pub enum LyricsFormat { Plain, Lrc, EnhancedLrc, + Ttml, } #[derive(Debug, Clone, Serialize)] @@ -72,12 +73,21 @@ pub struct LyricsPayload { pub content: String, pub format: LyricsFormat, pub source: LyricsSource, + /// Set by `save_lyrics` when `write_to_file` was requested but the + /// audio file's tag system can't carry the chosen format (e.g. + /// TTML in an MP3's ID3v2 where lofty has no mapping for the + /// XML-friendly `ItemKey::Lyrics`). DB cache is still updated; the + /// UI surfaces a toast so the user knows the file itself wasn't + /// touched. Absent on every other return path. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tag_write_skipped: Option, } fn parse_format(s: &str) -> LyricsFormat { match s { "lrc" => LyricsFormat::Lrc, "enhanced_lrc" => LyricsFormat::EnhancedLrc, + "ttml" => LyricsFormat::Ttml, _ => LyricsFormat::Plain, } } @@ -91,30 +101,121 @@ fn parse_source(s: &str) -> LyricsSource { } } -/// Heuristic: any line starting with `[mm:ss` (zero-padded or not) is -/// treated as LRC. We don't try to detect enhanced LRC from text — if -/// you imported `.lrc` from a "enhanced" source, pass the format -/// explicitly via [`import_lrc_file`]. +/// Heuristic format sniffer. +/// +/// Order matters: TTML (XML envelope) is checked first because its +/// `

` could otherwise look like nothing else, then +/// Enhanced LRC (LRC with inline `` word stamps), then +/// plain LRC, then unsynced text. fn detect_format(content: &str) -> LyricsFormat { - let has_timestamp = content.lines().take(20).any(|line| { - let line = line.trim_start(); - line.starts_with('[') + let head = content.trim_start(); + + // TTML: XML declaration, root `() + .to_ascii_lowercase(); + if head_lower_prefix.starts_with("") + || head_lower_prefix.contains("xmlns=\"http://www.w3.org/ns/ttml\"") + || head_lower_prefix.contains("= 7 && line[1..].chars().take(2).all(|c| c.is_ascii_digit()) && line.as_bytes().get(3) == Some(&b':') - }); - if has_timestamp { + { + has_line_stamp = true; + // Inline word stamp: `` somewhere after the + // first `]`. We scan the byte string directly to keep this + // cheap for large libraries. + if let Some(close) = line.find(']') { + let body = &line[close + 1..]; + if word_stamp_present(body) { + has_word_stamp = true; + break; + } + } + } + } + + if has_word_stamp { + LyricsFormat::EnhancedLrc + } else if has_line_stamp { LyricsFormat::Lrc } else { LyricsFormat::Plain } } +/// Return true if `s` contains at least one `<\d+:\d+(\.\d+)?>` token — +/// the Enhanced LRC word-stamp shape. Hand-rolled (no regex dep) to +/// keep `detect_format` allocation-free on the hot prefetch path. +fn word_stamp_present(s: &str) -> bool { + let bytes = s.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'<' { + let mut j = i + 1; + // Need at least one digit, then ':', then one digit, then '>'. + let digits1 = scan_digits(bytes, j); + if digits1 > 0 { + j += digits1; + if bytes.get(j) == Some(&b':') { + j += 1; + let digits2 = scan_digits(bytes, j); + if digits2 > 0 { + j += digits2; + // Optional fractional `.xx` or `:xx`. + if matches!(bytes.get(j), Some(b'.') | Some(b':')) { + j += 1; + let frac = scan_digits(bytes, j); + j += frac; + } + if bytes.get(j) == Some(&b'>') { + return true; + } + } + } + } + } + i += 1; + } + false +} + +fn scan_digits(bytes: &[u8], start: usize) -> usize { + let mut n = 0; + while let Some(&b) = bytes.get(start + n) { + if b.is_ascii_digit() { + n += 1; + } else { + break; + } + } + n +} + fn format_to_db(fmt: &LyricsFormat) -> &'static str { match fmt { LyricsFormat::Plain => "plain", LyricsFormat::Lrc => "lrc", LyricsFormat::EnhancedLrc => "enhanced_lrc", + LyricsFormat::Ttml => "ttml", } } @@ -259,6 +360,7 @@ async fn read_cached(pool: &sqlx::SqlitePool, track_id: i64) -> AppResult

+

+ Hello + world +

+
+ +"#; + assert_eq!(detect_format(sample), LyricsFormat::Ttml); + } + + #[test] + fn detect_format_ttml_no_decl() { + let sample = r#"

x

"#; + assert_eq!(detect_format(sample), LyricsFormat::Ttml); + } + + #[test] + fn detect_format_brackets_but_no_timestamp_stays_plain() { + // A line starting with `[foo]` (LRC metadata header) without + // any actual time-stamped line should NOT be classified as + // synchronized. + let sample = "[ar:Artist]\n[ti:Title]\nVerse without timestamps."; + assert_eq!(detect_format(sample), LyricsFormat::Plain); + } + + #[test] + fn word_stamp_present_basic() { + assert!(word_stamp_present("<00:01.50>word")); + assert!(word_stamp_present("plain<5:00>more")); + assert!(!word_stamp_present("nothing here")); + assert!(!word_stamp_present("")); + } +} diff --git a/src/components/common/LyricsEditorModal.tsx b/src/components/common/LyricsEditorModal.tsx index 336a4f8..0d5c9fe 100644 --- a/src/components/common/LyricsEditorModal.tsx +++ b/src/components/common/LyricsEditorModal.tsx @@ -19,8 +19,11 @@ import { useModalA11y } from "../../hooks/useModalA11y"; import { formatLrcTimestamp, parseLrc, + parseLyrics, saveLyrics, + serializeEnhancedLrc, serializeLrc, + type LyricsLine, type LyricsPayload, } from "../../lib/tauri/lyrics"; @@ -36,6 +39,15 @@ interface LyricsEditorModalProps { } type Mode = "plain" | "synced"; +/** Capture granularity inside the synced tab. */ +type Granularity = "line" | "word"; + +interface SyncedWord { + /** -1 when not yet captured. */ + timeMs: number; + /** Word text, kept verbatim including any trailing spaces. */ + text: string; +} interface SyncedRow { /** Stable id so React keys survive reorders. */ @@ -43,6 +55,13 @@ interface SyncedRow { /** -1 when not yet captured. */ timeMs: number; text: string; + /** + * Populated in word-mode once the user starts capturing per-word + * stamps for the row. Absent in line-mode and for plain rows. + */ + words?: SyncedWord[]; + /** Cursor inside `words` — index of the next word to capture. */ + wordCursor?: number; } /** @@ -64,12 +83,16 @@ export function LyricsEditorModal({ const dialogRef = useModalA11y(isOpen, onClose); const [mode, setMode] = useState("plain"); + const [granularity, setGranularity] = useState("line"); const [plainText, setPlainText] = useState(""); const [syncedRows, setSyncedRows] = useState([]); const [activeRow, setActiveRow] = useState(0); const [writeToFile, setWriteToFile] = useState(true); const [isSaving, setIsSaving] = useState(false); const [error, setError] = useState(null); + /** Surfaced after save when the backend kept the lyrics in-DB but + * couldn't write them to the audio file's tag (e.g. TTML on MP3). */ + const [warning, setWarning] = useState(null); // Global timestamp shift applied to every captured row at save // time. Stays "preview" until Save (we don't mutate `syncedRows` // on every drag) so the user can dial it in without losing the @@ -77,39 +100,86 @@ export function LyricsEditorModal({ const [globalOffsetMs, setGlobalOffsetMs] = useState(0); const nextIdRef = useRef(1); - const newRow = (timeMs: number, text: string): SyncedRow => ({ + const newRow = ( + timeMs: number, + text: string, + words?: SyncedWord[], + ): SyncedRow => ({ id: nextIdRef.current++, timeMs, text, + words, + wordCursor: words ? 0 : undefined, }); + /** Split a line into tokens that preserve trailing spaces, so the + * reassembled text still reads naturally. Empty tokens are dropped. */ + const tokenize = (text: string): SyncedWord[] => { + if (!text.trim()) return []; + const re = /\S+\s*/g; + const out: SyncedWord[] = []; + let m: RegExpExecArray | null; + while ((m = re.exec(text)) !== null) { + out.push({ timeMs: -1, text: m[0] }); + } + return out; + }; + // ── Hydrate from initial payload ───────────────────────────────── useEffect(() => { if (!isOpen) return; /* eslint-disable react-hooks/set-state-in-effect */ setError(null); + setWarning(null); setActiveRow(0); setGlobalOffsetMs(0); nextIdRef.current = 1; if (initial == null) { setMode("plain"); + setGranularity("line"); setPlainText(""); setSyncedRows([newRow(-1, "")]); return; } const trimmed = initial.content.trim(); - const isLrc = initial.format === "lrc" || initial.format === "enhanced_lrc"; + const isSynced = + initial.format === "lrc" || + initial.format === "enhanced_lrc" || + initial.format === "ttml"; + const hasWordTiming = + initial.format === "enhanced_lrc" || initial.format === "ttml"; setPlainText(trimmed); - if (isLrc) { - const parsed = parseLrc(trimmed); + if (isSynced) { + let parsed: LyricsLine[]; + if (hasWordTiming) { + parsed = parseLyrics(trimmed, initial.format); + } else { + parsed = parseLrc(trimmed); + } const rows = parsed.length - ? parsed.map((line) => newRow(line.timeMs, line.text)) + ? parsed.map((line) => { + const words = line.words?.map((w) => ({ + timeMs: w.timeMs, + text: w.text, + })); + const cursor = words + ? Math.min(words.length, words.findIndex((w) => w.timeMs < 0)) + : undefined; + return { + id: nextIdRef.current++, + timeMs: line.timeMs, + text: line.text, + words, + wordCursor: cursor != null && cursor < 0 ? words!.length : cursor, + } satisfies SyncedRow; + }) : [newRow(-1, "")]; setSyncedRows(rows); setMode("synced"); + setGranularity(hasWordTiming ? "word" : "line"); } else { // Pre-fill the synced tab with a row per non-empty line so the // user can capture timestamps without retyping. @@ -118,6 +188,7 @@ export function LyricsEditorModal({ : [newRow(-1, "")]; setSyncedRows(lines); setMode("plain"); + setGranularity("line"); } /* eslint-enable react-hooks/set-state-in-effect */ // We intentionally only rehydrate when the modal opens for a track, @@ -125,15 +196,15 @@ export function LyricsEditorModal({ // eslint-disable-next-line react-hooks/exhaustive-deps }, [isOpen, trackId]); - // ── Capture handler shared between button + Space shortcut ─────── - const captureCurrent = useCallback(() => { + // ── Capture handlers ───────────────────────────────────────────── + // Line mode: stamp the active row, append a fresh row if needed, + // advance the cursor. + const captureLine = useCallback(() => { setSyncedRows((rows) => { if (rows.length === 0) return rows; const idx = Math.min(activeRow, rows.length - 1); const next = rows.slice(); next[idx] = { ...next[idx], timeMs: Math.max(0, positionMs) }; - // If there's no row after this one, append a fresh blank so the - // user can keep typing the next line. if (idx === next.length - 1) { next.push(newRow(-1, "")); } @@ -142,19 +213,133 @@ export function LyricsEditorModal({ setActiveRow((i) => i + 1); }, [activeRow, positionMs]); - // ── Space-to-capture in synced mode (avoid hijacking inputs) ───── + // Word mode: stamp the next word in the active row. If the row has + // no `words` yet, tokenize its text first. Once every word is + // stamped, the next press advances to the next line (and stamps the + // line's own timeMs if it's still -1, like line mode). + const captureWord = useCallback(() => { + setSyncedRows((rows) => { + if (rows.length === 0) return rows; + const idx = Math.min(activeRow, rows.length - 1); + const next = rows.slice(); + const row = { ...next[idx] }; + + // Seed words from row.text on first capture. + let words = row.words ? row.words.slice() : tokenize(row.text); + if (words.length === 0) { + // Empty line — degrade to line capture so we don't get stuck. + row.timeMs = Math.max(0, positionMs); + next[idx] = row; + return next; + } + + const cursor = row.wordCursor ?? 0; + if (cursor >= words.length) { + // Out of words on this row — let the caller advance lines. + return rows; + } + // Stamp the line's timeMs on the very first word capture if the + // line itself isn't stamped yet. + if (row.timeMs < 0 && cursor === 0) { + row.timeMs = Math.max(0, positionMs); + } + words = words.slice(); + words[cursor] = { ...words[cursor], timeMs: Math.max(0, positionMs) }; + row.words = words; + row.wordCursor = cursor + 1; + next[idx] = row; + return next; + }); + }, [activeRow, positionMs]); + + // Advance to the next line in word mode (Enter shortcut). Appends a + // fresh empty row if we're at the end, mirroring line mode's UX. + const advanceLine = useCallback(() => { + setSyncedRows((rows) => { + if (rows.length === 0) return rows; + const idx = Math.min(activeRow, rows.length - 1); + if (idx === rows.length - 1) { + return [...rows, newRow(-1, "")]; + } + return rows; + }); + setActiveRow((i) => i + 1); + }, [activeRow]); + + // Undo the last word capture on the active row (Backspace in word + // mode). If no words are stamped yet, clears the line's own timeMs. + const undoLastWord = useCallback(() => { + setSyncedRows((rows) => { + if (rows.length === 0) return rows; + const idx = Math.min(activeRow, rows.length - 1); + const row = { ...rows[idx] }; + if (!row.words || row.words.length === 0) { + if (row.timeMs >= 0) { + row.timeMs = -1; + const next = rows.slice(); + next[idx] = row; + return next; + } + return rows; + } + const cursor = Math.max(0, (row.wordCursor ?? 0) - 1); + const words = row.words.slice(); + if (words[cursor]) { + words[cursor] = { ...words[cursor], timeMs: -1 }; + } + row.words = words; + row.wordCursor = cursor; + // If we backed all the way out, clear the line stamp too. + if (cursor === 0 && words.every((w) => w.timeMs < 0)) { + row.timeMs = -1; + } + const next = rows.slice(); + next[idx] = row; + return next; + }); + }, [activeRow]); + + // Single entry point used by the capture button + Space shortcut. + const captureCurrent = useCallback(() => { + if (granularity === "word") { + captureWord(); + } else { + captureLine(); + } + }, [granularity, captureWord, captureLine]); + + // ── Keyboard shortcuts in synced mode (avoid hijacking inputs) ─── useEffect(() => { if (!isOpen || mode !== "synced") return; const handler = (e: KeyboardEvent) => { - if (e.code !== "Space") return; const tag = (e.target as HTMLElement | null)?.tagName?.toLowerCase(); - if (tag === "input" || tag === "textarea") return; - e.preventDefault(); - captureCurrent(); + const isInput = tag === "input" || tag === "textarea"; + if (e.code === "Space" && !isInput) { + e.preventDefault(); + captureCurrent(); + return; + } + if ( + granularity === "word" && + !isInput && + (e.code === "Enter" || e.code === "NumpadEnter") + ) { + e.preventDefault(); + advanceLine(); + return; + } + if ( + granularity === "word" && + !isInput && + (e.code === "Backspace" || e.code === "Delete") + ) { + e.preventDefault(); + undoLastWord(); + } }; window.addEventListener("keydown", handler); return () => window.removeEventListener("keydown", handler); - }, [isOpen, mode, captureCurrent]); + }, [isOpen, mode, granularity, captureCurrent, advanceLine, undoLastWord]); // ── Player nudges (compose a ±2 s seek with current position) ──── const nudge = (deltaMs: number) => { @@ -164,7 +349,16 @@ export function LyricsEditorModal({ // ── Row-level helpers ──────────────────────────────────────────── const updateRowText = (id: number, text: string) => { setSyncedRows((rows) => - rows.map((r) => (r.id === id ? { ...r, text } : r)), + rows.map((r) => { + if (r.id !== id) return r; + // In word mode editing the text invalidates the captured word + // stamps (tokenization changes). Drop them so the user can + // re-capture cleanly — keep the line-level timeMs. + if (r.words) { + return { ...r, text, words: undefined, wordCursor: undefined }; + } + return { ...r, text }; + }), ); }; const removeRow = (id: number) => { @@ -204,37 +398,86 @@ export function LyricsEditorModal({ setError(null); try { const isSyncedMode = mode === "synced"; - const content = isSyncedMode - ? serializeLrc( - syncedRows - .filter((r) => r.text.trim().length > 0 || r.timeMs >= 0) - // Bake the previewed global offset into every captured - // timestamp on save. Negative results are clamped to 0 - // so a user who shifts past the start of the track - // doesn't end up with invalid LRC entries. - .map((r) => - r.timeMs >= 0 - ? { ...r, timeMs: Math.max(0, r.timeMs + globalOffsetMs) } - : r, - ) - .sort((a, b) => { - if (a.timeMs < 0 && b.timeMs < 0) return 0; - if (a.timeMs < 0) return 1; - if (b.timeMs < 0) return -1; - return a.timeMs - b.timeMs; - }), + const isWordMode = isSyncedMode && granularity === "word"; + + // Bake the previewed global offset into every captured stamp on + // save (both line- and word-level). Negative results are clamped + // to 0 so a user shifting past the start of the track doesn't + // emit invalid stamps. + const shift = (ts: number): number => + ts < 0 ? -1 : Math.max(0, ts + globalOffsetMs); + + let content: string; + let saveFormat: "plain" | "lrc" | "enhanced_lrc"; + if (!isSyncedMode) { + content = plainText.trim(); + saveFormat = "plain"; + } else if (isWordMode) { + // Keep every row the user typed text into, even if they + // haven't captured a stamp yet — line mode does the same, + // so saving in word mode shouldn't silently delete unstamped + // text. `serializeEnhancedLrc` emits `[--:--.--]` for rows + // with `timeMs < 0` and folds uncaptured words into the + // previous segment (no phantom `<00:00.00>` stamp), so half- + // finished work round-trips cleanly through save → reload. + const rowsForSave: LyricsLine[] = syncedRows + .filter( + (r) => + r.text.trim().length > 0 || + r.timeMs >= 0 || + (r.words?.some((w) => w.timeMs >= 0) ?? false), ) - : plainText.trim(); + .map((r) => ({ + timeMs: shift(r.timeMs), + endMs: -1, + text: r.text, + words: r.words?.map((w) => ({ + timeMs: shift(w.timeMs), + endMs: -1, + text: w.text, + })), + })) + // Untimed rows (timeMs < 0) sort to the end so the synced + // body stays monotonically ordered; the user can resume + // capturing them on the next edit. + .sort((a, b) => { + if (a.timeMs < 0 && b.timeMs < 0) return 0; + if (a.timeMs < 0) return 1; + if (b.timeMs < 0) return -1; + return a.timeMs - b.timeMs; + }); + content = serializeEnhancedLrc(rowsForSave); + saveFormat = "enhanced_lrc"; + } else { + content = serializeLrc( + syncedRows + .filter((r) => r.text.trim().length > 0 || r.timeMs >= 0) + .map((r) => (r.timeMs >= 0 ? { ...r, timeMs: shift(r.timeMs) } : r)) + .sort((a, b) => { + if (a.timeMs < 0 && b.timeMs < 0) return 0; + if (a.timeMs < 0) return 1; + if (b.timeMs < 0) return -1; + return a.timeMs - b.timeMs; + }), + ); + saveFormat = "lrc"; + } // The backend pauses playback if we're editing the currently // playing file, so the flag is passed through as-is. const next = await saveLyrics(trackId, { content, - format: isSyncedMode ? "lrc" : "plain", + format: saveFormat, write_to_file: writeToFile, }); onSaved(next); - onClose(); + if (next.tag_write_skipped) { + // Keep the modal open with a warning so the user knows the + // file itself wasn't touched — DB cache still updated. + setWarning(t("lyrics.toast.tagWriteSkipped")); + } else { + onClose(); + } } catch (err) { console.error("[LyricsEditor] save failed", err); setError(String(err)); @@ -332,18 +575,51 @@ export function LyricsEditorModal({ className="w-full h-[50vh] resize-none rounded-lg border border-zinc-200 dark:border-zinc-700 bg-zinc-50 dark:bg-zinc-800 p-4 text-sm leading-relaxed focus:outline-none focus:ring-2 focus:ring-pink-500" /> ) : ( - + <> + {/* Granularity toggle. Sits above the row list so users + can flip between line + word capture without losing + what they've already stamped. */} +
+ + {t("lyricsEditor.granularity.label")} + + + +
+ + )} @@ -390,7 +666,10 @@ export function LyricsEditorModal({

- {t("lyricsEditor.captureHint")} · {captured}/{syncedRows.length}{" "} + {granularity === "word" + ? t("lyricsEditor.captureHintWord") + : t("lyricsEditor.captureHint")} + {" "}· {captured}/{syncedRows.length}{" "} {t("lyricsEditor.lines")}

@@ -472,6 +751,11 @@ export function LyricsEditorModal({ {t("lyricsEditor.writeToFile")}
+ {warning && ( + + {warning} + + )} {error && ( {error} @@ -536,6 +820,8 @@ interface SyncedEditorProps { playingRow: number; /** Global timestamp shift previewed in the timestamp buttons. */ offsetMs: number; + /** Capture granularity — drives the per-word chip row. */ + granularity: Granularity; onActivate: (idx: number) => void; onUpdateText: (id: number, text: string) => void; onRemove: (id: number) => void; @@ -549,6 +835,7 @@ function SyncedEditor({ activeRow, playingRow, offsetMs, + granularity, onActivate, onUpdateText, onRemove, @@ -565,10 +852,12 @@ function SyncedEditor({ const captured = row.timeMs >= 0; const shifted = captured && offsetMs !== 0; const previewMs = captured ? Math.max(0, row.timeMs + offsetMs) : -1; + const showWordChips = + granularity === "word" && isActive && (row.words?.length ?? 0) > 0; return (
  • onActivate(idx)} > +
    +
    + {showWordChips && ( +
    + {row.words!.map((w, wi) => { + const wCursor = row.wordCursor ?? 0; + const wCaptured = w.timeMs >= 0; + const isNext = wi === wCursor; + return ( + + {w.text.trim() || "·"} + + ); + })} +
    + )}
  • ); })} diff --git a/src/components/layout/LyricsPanel.tsx b/src/components/layout/LyricsPanel.tsx index ba67165..90de1a5 100644 --- a/src/components/layout/LyricsPanel.tsx +++ b/src/components/layout/LyricsPanel.tsx @@ -15,9 +15,10 @@ import { clearLyrics, fetchLyrics, findActiveLineIndex, + findActiveWordIndex, importLrcFile, - parseLrc, - type LrcLine, + parseLyrics, + type LyricsLine, type LyricsPayload, } from "../../lib/tauri/lyrics"; import { FullscreenLyrics } from "../player/FullscreenLyrics"; @@ -76,13 +77,10 @@ export function LyricsPanel() { }; }, [trackId]); - // ── Parse LRC once per content change ─────────────────────────── - const lrcLines = useMemo(() => { + // ── Parse lyrics once per content change ───────────────────────── + const lrcLines = useMemo(() => { if (!payload) return []; - if (payload.format !== "lrc" && payload.format !== "enhanced_lrc") { - return []; - } - return parseLrc(payload.content); + return parseLyrics(payload.content, payload.format); }, [payload]); const isSynced = lrcLines.length > 0; @@ -113,11 +111,22 @@ export function LyricsPanel() { } }, [activeIndex, isLyricsOpen, isSynced]); + // Active word inside the active line — only computed when the line + // carries `words[]` so plain LRC stays cheap. + const activeLine = activeIndex >= 0 ? lrcLines[activeIndex] : undefined; + const activeWordIndex = useMemo(() => { + if (!activeLine?.words || activeLine.words.length === 0) return -1; + return findActiveWordIndex(activeLine.words, positionMs); + }, [activeLine, positionMs]); + // ── Actions ───────────────────────────────────────────────────── const handleImport = async () => { if (trackId == null) return; try { - const path = await pickFile(["lrc", "txt"], t("lyrics.importTitle")); + const path = await pickFile( + ["lrc", "elrc", "ttml", "xml", "txt"], + t("lyrics.importTitle"), + ); if (!path) return; const next = await importLrcFile(trackId, path); setPayload(next); @@ -153,7 +162,7 @@ export function LyricsPanel() { } }; - const handleSeekToLine = (line: LrcLine) => { + const handleSeekToLine = (line: LyricsLine) => { seek(line.timeMs).catch(() => {}); }; @@ -226,6 +235,7 @@ export function LyricsPanel() { {lrcLines.map((line, index) => { const isActive = index === activeIndex; const isPast = index < activeIndex; + const hasWords = isActive && (line.words?.length ?? 0) > 0; return (
  • - {line.text || " "} + {hasWords ? ( + + {line.words!.map((word, wi) => ( + + {word.text} + + ))} + + ) : ( + line.text || " " + )}
  • ); })} @@ -282,8 +320,18 @@ export function LyricsPanel() { {/* Footer actions */} {currentTrack != null && (
    - - {payload ? sourceLabel(payload.source, t) : ""} + + + {payload ? sourceLabel(payload.source, t) : ""} + + {payload && (payload.format === "enhanced_lrc" || payload.format === "ttml") && ( + + {payload.format === "ttml" ? "TTML" : "WORD"} + + )}