diff --git a/internal/parser/id3/id3.go b/internal/parser/id3/id3.go index fc168f1..6c27b9a 100644 --- a/internal/parser/id3/id3.go +++ b/internal/parser/id3/id3.go @@ -242,9 +242,12 @@ func parseFrame(r io.ReaderAt, pos int64, version byte) (*parser.Tag, int64, err // Attached picture - store metadata about it tag.Value = fmt.Sprintf("Picture (%d bytes)", frameSize) tag.DataType = "binary" - } else if frameID == frameComment { - // Comment frame + } else if frameID == frameComment || frameID == frameV2Comment { + // Comment frame (COMM for v2.3/2.4, COM for v2.2) tag.Value = decodeCommentFrame(frameData) + } else if frameID == frameUserText { + // User defined text frame (TXXX) + tag.Value = decodeUserTextFrame(frameData) } else { // Generic binary frame tag.Value = fmt.Sprintf("Binary data (%d bytes)", frameSize) @@ -298,29 +301,114 @@ func decodeTextFrame(data []byte) string { } } -// decodeCommentFrame decodes comment frame (COMM) +// decodeCommentFrame decodes comment frame (COMM/COM) +// Structure: encoding (1 byte) + language (3 bytes) + description (null-terminated) + comment func decodeCommentFrame(data []byte) string { - if len(data) < 4 { + if len(data) < 5 { // Minimum: encoding + language + null terminator return "" } encoding := data[0] - // Skip language (3 bytes) and short description - // For simplicity, just decode the entire content - text := data[4:] + // Skip language (3 bytes at bytes 1-3) + content := data[4:] + + // Find the null terminator that separates description from comment + // Null terminator size depends on encoding + var commentStart int + if encoding == encodingUTF16BOM || encoding == encodingUTF16BE { + // UTF-16 uses double-null terminator + nullPos := findDoubleNull(content) + if nullPos >= 0 && nullPos+2 < len(content) { + // There's content after the double-null, that's the comment + commentStart = nullPos + 2 + } else { + // No proper separator found, use entire content as comment + commentStart = 0 + } + } else { + // ISO-8859-1 and UTF-8 use single null + nullPos := findNull(content) + if nullPos >= 0 && nullPos+1 < len(content) { + // There's content after the null, that's the comment + commentStart = nullPos + 1 + } else { + // No proper separator found, use entire content as comment + commentStart = 0 + } + } + + text := content[commentStart:] + return decodeEncodedText(text, encoding) +} + +// decodeUserTextFrame decodes user defined text frame (TXXX) +// Structure: encoding (1 byte) + description (null-terminated) + value +func decodeUserTextFrame(data []byte) string { + if len(data) < 2 { + return "" + } + + encoding := data[0] + content := data[1:] + + // Find the null terminator that separates description from value + var valueStart int + if encoding == encodingUTF16BOM || encoding == encodingUTF16BE { + nullPos := findDoubleNull(content) + if nullPos >= 0 && nullPos+2 < len(content) { + valueStart = nullPos + 2 + } else { + // No proper separator, use entire content + valueStart = 0 + } + } else { + nullPos := findNull(content) + if nullPos >= 0 && nullPos+1 < len(content) { + valueStart = nullPos + 1 + } else { + // No proper separator, use entire content + valueStart = 0 + } + } + text := content[valueStart:] + return decodeEncodedText(text, encoding) +} + +// decodeEncodedText decodes text based on encoding byte +func decodeEncodedText(data []byte, encoding byte) string { switch encoding { case encodingISO88591: - return string(trimNull(text)) + return string(trimNull(data)) case encodingUTF16BOM: - return decodeUTF16WithBOM(text) + return decodeUTF16WithBOM(data) case encodingUTF16BE: - return decodeUTF16BE(text) + return decodeUTF16BE(data) case encodingUTF8: - return string(trimNull(text)) + return string(trimNull(data)) default: - return string(trimNull(text)) + return string(trimNull(data)) + } +} + +// findNull finds the first null byte and returns its index, or -1 if not found +func findNull(data []byte) int { + for i, b := range data { + if b == 0 { + return i + } + } + return -1 +} + +// findDoubleNull finds the first double-null (UTF-16 terminator) and returns its index +func findDoubleNull(data []byte) int { + for i := 0; i < len(data)-1; i += 2 { + if data[i] == 0 && data[i+1] == 0 { + return i + } } + return -1 } // decodeUTF16WithBOM decodes UTF-16 with byte order mark diff --git a/internal/parser/id3/id3_test.go b/internal/parser/id3/id3_test.go index c5216fa..ac0763f 100644 --- a/internal/parser/id3/id3_test.go +++ b/internal/parser/id3/id3_test.go @@ -1110,3 +1110,183 @@ func TestParser_Parse_TooManyFrames(t *testing.T) { t.Fatalf("Parse() got %d directories, want 1", len(dirs)) } } + +func TestParser_Parse_CommentFrame_WithDescription(t *testing.T) { + var buf bytes.Buffer + + // COMM frame with proper structure: encoding + lang + description\0 + comment\0 + buf.Write([]byte{'I', 'D', '3'}) + buf.WriteByte(0x03) + buf.WriteByte(0x00) + buf.WriteByte(0x00) + buf.Write(encodeSynchsafeInt(0x30)) + + // COMM frame: encoding + lang + desc\0 + comment\0 + commFrame := []byte{ + 0x00, // Encoding: ISO-8859-1 + 'e', 'n', 'g', // Language + 'S', 'h', 'o', 'r', 't', 0x00, // Description: "Short\0" + 'T', 'h', 'i', 's', ' ', 'i', 's', ' ', // Comment text + 't', 'h', 'e', ' ', 'c', 'o', 'm', 'm', + 'e', 'n', 't', 0x00, + } + + buf.Write([]byte{'C', 'O', 'M', 'M'}) + buf.Write([]byte{0x00, 0x00, 0x00, byte(len(commFrame))}) + buf.Write([]byte{0x00, 0x00}) // Flags + buf.Write(commFrame) + + p := New() + r := bytes.NewReader(buf.Bytes()) + + dirs, err := p.Parse(r) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // Check for comment frame with correct value + foundComment := false + for _, tag := range dirs[0].Tags { + if tag.Name == "Comment" { + foundComment = true + if tag.Value != "This is the comment" { + t.Errorf("Comment value = %q, want %q", tag.Value, "This is the comment") + } + } + } + if !foundComment { + t.Error("Comment frame not found") + } +} + +func TestParser_Parse_UserTextFrame(t *testing.T) { + var buf bytes.Buffer + + // TXXX frame with structure: encoding + description\0 + value\0 + buf.Write([]byte{'I', 'D', '3'}) + buf.WriteByte(0x03) + buf.WriteByte(0x00) + buf.WriteByte(0x00) + buf.Write(encodeSynchsafeInt(0x30)) + + // TXXX frame: encoding + desc\0 + value\0 + txxxFrame := []byte{ + 0x00, // Encoding: ISO-8859-1 + 'M', 'y', 'D', 'e', 's', 'c', 0x00, // Description: "MyDesc\0" + 'M', 'y', 'V', 'a', 'l', 'u', 'e', 0x00, // Value: "MyValue\0" + } + + buf.Write([]byte{'T', 'X', 'X', 'X'}) + buf.Write([]byte{0x00, 0x00, 0x00, byte(len(txxxFrame))}) + buf.Write([]byte{0x00, 0x00}) // Flags + buf.Write(txxxFrame) + + p := New() + r := bytes.NewReader(buf.Bytes()) + + dirs, err := p.Parse(r) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // Check for user text frame with correct value + foundTXXX := false + for _, tag := range dirs[0].Tags { + if tag.Name == "User Defined Text" { + foundTXXX = true + if tag.Value != "MyValue" { + t.Errorf("User Defined Text value = %q, want %q", tag.Value, "MyValue") + } + } + } + if !foundTXXX { + t.Error("User Defined Text frame not found") + } +} + +func TestParser_Parse_ID3v22_CommentFrame(t *testing.T) { + var buf bytes.Buffer + + // ID3v2.2 COM frame + buf.Write([]byte{'I', 'D', '3'}) + buf.WriteByte(0x02) // Version 2.2 + buf.WriteByte(0x00) + buf.WriteByte(0x00) + buf.Write(encodeSynchsafeInt(0x20)) + + // COM frame (3-char ID for v2.2): encoding + lang + comment + comFrame := []byte{ + 0x00, // Encoding: ISO-8859-1 + 'e', 'n', 'g', // Language + 'H', 'e', 'l', 'l', 'o', 0x00, // Comment: "Hello\0" + } + + buf.Write([]byte{'C', 'O', 'M'}) + buf.Write([]byte{0x00, 0x00, byte(len(comFrame))}) // 3-byte size for v2.2 + buf.Write(comFrame) + + p := New() + r := bytes.NewReader(buf.Bytes()) + + dirs, err := p.Parse(r) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // Check for comment frame + foundComment := false + for _, tag := range dirs[0].Tags { + if tag.Name == "Comment" { + foundComment = true + if tag.Value != "Hello" { + t.Errorf("Comment value = %q, want %q", tag.Value, "Hello") + } + } + } + if !foundComment { + t.Error("ID3v2.2 Comment frame not found") + } +} + +func TestDecodeUserTextFrame(t *testing.T) { + tests := []struct { + name string + data []byte + want string + }{ + { + name: "ISO-8859-1 with description", + data: []byte{0x00, 'd', 'e', 's', 'c', 0x00, 'v', 'a', 'l', 'u', 'e', 0x00}, + want: "value", + }, + { + name: "ISO-8859-1 no description", + data: []byte{0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0x00}, + want: "value", + }, + { + name: "UTF-8 with description", + data: []byte{0x03, 'd', 'e', 's', 'c', 0x00, 'v', 'a', 'l', 'u', 'e', 0x00}, + want: "value", + }, + { + name: "too short", + data: []byte{0x00}, + want: "", + }, + { + name: "empty", + data: []byte{}, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := decodeUserTextFrame(tt.data) + if got != tt.want { + t.Errorf("decodeUserTextFrame() = %q, want %q", got, tt.want) + } + }) + } +}