Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 100 additions & 12 deletions internal/parser/id3/id3.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,12 @@ func parseFrame(r io.ReaderAt, pos int64, version byte) (*parser.Tag, int64, err
// Attached picture - store metadata about it
tag.Value = fmt.Sprintf("Picture (%d bytes)", frameSize)
tag.DataType = "binary"
} else if frameID == frameComment {
// Comment frame
} else if frameID == frameComment || frameID == frameV2Comment {
// Comment frame (COMM for v2.3/2.4, COM for v2.2)
tag.Value = decodeCommentFrame(frameData)
} else if frameID == frameUserText {
// User defined text frame (TXXX)
tag.Value = decodeUserTextFrame(frameData)
} else {
// Generic binary frame
tag.Value = fmt.Sprintf("Binary data (%d bytes)", frameSize)
Expand Down Expand Up @@ -298,29 +301,114 @@ func decodeTextFrame(data []byte) string {
}
}

// decodeCommentFrame decodes comment frame (COMM)
// decodeCommentFrame decodes comment frame (COMM/COM)
// Structure: encoding (1 byte) + language (3 bytes) + description (null-terminated) + comment
func decodeCommentFrame(data []byte) string {
if len(data) < 4 {
if len(data) < 5 { // Minimum: encoding + language + null terminator
return ""
}

encoding := data[0]
// Skip language (3 bytes) and short description
// For simplicity, just decode the entire content
text := data[4:]
// Skip language (3 bytes at bytes 1-3)
content := data[4:]

// Find the null terminator that separates description from comment
// Null terminator size depends on encoding
var commentStart int
if encoding == encodingUTF16BOM || encoding == encodingUTF16BE {
// UTF-16 uses double-null terminator
nullPos := findDoubleNull(content)
if nullPos >= 0 && nullPos+2 < len(content) {
// There's content after the double-null, that's the comment
commentStart = nullPos + 2
} else {
// No proper separator found, use entire content as comment
commentStart = 0
}
} else {
// ISO-8859-1 and UTF-8 use single null
nullPos := findNull(content)
if nullPos >= 0 && nullPos+1 < len(content) {
// There's content after the null, that's the comment
commentStart = nullPos + 1
} else {
// No proper separator found, use entire content as comment
commentStart = 0
}
}

text := content[commentStart:]
return decodeEncodedText(text, encoding)
}

// decodeUserTextFrame decodes user defined text frame (TXXX)
// Structure: encoding (1 byte) + description (null-terminated) + value
func decodeUserTextFrame(data []byte) string {
if len(data) < 2 {
return ""
}

encoding := data[0]
content := data[1:]

// Find the null terminator that separates description from value
var valueStart int
if encoding == encodingUTF16BOM || encoding == encodingUTF16BE {
nullPos := findDoubleNull(content)
if nullPos >= 0 && nullPos+2 < len(content) {
valueStart = nullPos + 2
} else {
// No proper separator, use entire content
valueStart = 0
}
} else {
nullPos := findNull(content)
if nullPos >= 0 && nullPos+1 < len(content) {
valueStart = nullPos + 1
} else {
// No proper separator, use entire content
valueStart = 0
}
}

text := content[valueStart:]
return decodeEncodedText(text, encoding)
}

// decodeEncodedText decodes text based on encoding byte
func decodeEncodedText(data []byte, encoding byte) string {
switch encoding {
case encodingISO88591:
return string(trimNull(text))
return string(trimNull(data))
case encodingUTF16BOM:
return decodeUTF16WithBOM(text)
return decodeUTF16WithBOM(data)
case encodingUTF16BE:
return decodeUTF16BE(text)
return decodeUTF16BE(data)
case encodingUTF8:
return string(trimNull(text))
return string(trimNull(data))
default:
return string(trimNull(text))
return string(trimNull(data))
}
}

// findNull finds the first null byte and returns its index, or -1 if not found
func findNull(data []byte) int {
for i, b := range data {
if b == 0 {
return i
}
}
return -1
}

// findDoubleNull finds the first double-null (UTF-16 terminator) and returns its index
func findDoubleNull(data []byte) int {
for i := 0; i < len(data)-1; i += 2 {
if data[i] == 0 && data[i+1] == 0 {
return i
}
}
return -1
}

// decodeUTF16WithBOM decodes UTF-16 with byte order mark
Expand Down
180 changes: 180 additions & 0 deletions internal/parser/id3/id3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1110,3 +1110,183 @@ func TestParser_Parse_TooManyFrames(t *testing.T) {
t.Fatalf("Parse() got %d directories, want 1", len(dirs))
}
}

func TestParser_Parse_CommentFrame_WithDescription(t *testing.T) {
var buf bytes.Buffer

// COMM frame with proper structure: encoding + lang + description\0 + comment\0
buf.Write([]byte{'I', 'D', '3'})
buf.WriteByte(0x03)
buf.WriteByte(0x00)
buf.WriteByte(0x00)
buf.Write(encodeSynchsafeInt(0x30))

// COMM frame: encoding + lang + desc\0 + comment\0
commFrame := []byte{
0x00, // Encoding: ISO-8859-1
'e', 'n', 'g', // Language
'S', 'h', 'o', 'r', 't', 0x00, // Description: "Short\0"
'T', 'h', 'i', 's', ' ', 'i', 's', ' ', // Comment text
't', 'h', 'e', ' ', 'c', 'o', 'm', 'm',
'e', 'n', 't', 0x00,
}

buf.Write([]byte{'C', 'O', 'M', 'M'})
buf.Write([]byte{0x00, 0x00, 0x00, byte(len(commFrame))})
buf.Write([]byte{0x00, 0x00}) // Flags
buf.Write(commFrame)

p := New()
r := bytes.NewReader(buf.Bytes())

dirs, err := p.Parse(r)
if err != nil {
t.Fatalf("Parse() error = %v", err)
}

// Check for comment frame with correct value
foundComment := false
for _, tag := range dirs[0].Tags {
if tag.Name == "Comment" {
foundComment = true
if tag.Value != "This is the comment" {
t.Errorf("Comment value = %q, want %q", tag.Value, "This is the comment")
}
}
}
if !foundComment {
t.Error("Comment frame not found")
}
}

func TestParser_Parse_UserTextFrame(t *testing.T) {
var buf bytes.Buffer

// TXXX frame with structure: encoding + description\0 + value\0
buf.Write([]byte{'I', 'D', '3'})
buf.WriteByte(0x03)
buf.WriteByte(0x00)
buf.WriteByte(0x00)
buf.Write(encodeSynchsafeInt(0x30))

// TXXX frame: encoding + desc\0 + value\0
txxxFrame := []byte{
0x00, // Encoding: ISO-8859-1
'M', 'y', 'D', 'e', 's', 'c', 0x00, // Description: "MyDesc\0"
'M', 'y', 'V', 'a', 'l', 'u', 'e', 0x00, // Value: "MyValue\0"
}

buf.Write([]byte{'T', 'X', 'X', 'X'})
buf.Write([]byte{0x00, 0x00, 0x00, byte(len(txxxFrame))})
buf.Write([]byte{0x00, 0x00}) // Flags
buf.Write(txxxFrame)

p := New()
r := bytes.NewReader(buf.Bytes())

dirs, err := p.Parse(r)
if err != nil {
t.Fatalf("Parse() error = %v", err)
}

// Check for user text frame with correct value
foundTXXX := false
for _, tag := range dirs[0].Tags {
if tag.Name == "User Defined Text" {
foundTXXX = true
if tag.Value != "MyValue" {
t.Errorf("User Defined Text value = %q, want %q", tag.Value, "MyValue")
}
}
}
if !foundTXXX {
t.Error("User Defined Text frame not found")
}
}

func TestParser_Parse_ID3v22_CommentFrame(t *testing.T) {
var buf bytes.Buffer

// ID3v2.2 COM frame
buf.Write([]byte{'I', 'D', '3'})
buf.WriteByte(0x02) // Version 2.2
buf.WriteByte(0x00)
buf.WriteByte(0x00)
buf.Write(encodeSynchsafeInt(0x20))

// COM frame (3-char ID for v2.2): encoding + lang + comment
comFrame := []byte{
0x00, // Encoding: ISO-8859-1
'e', 'n', 'g', // Language
'H', 'e', 'l', 'l', 'o', 0x00, // Comment: "Hello\0"
}

buf.Write([]byte{'C', 'O', 'M'})
buf.Write([]byte{0x00, 0x00, byte(len(comFrame))}) // 3-byte size for v2.2
buf.Write(comFrame)

p := New()
r := bytes.NewReader(buf.Bytes())

dirs, err := p.Parse(r)
if err != nil {
t.Fatalf("Parse() error = %v", err)
}

// Check for comment frame
foundComment := false
for _, tag := range dirs[0].Tags {
if tag.Name == "Comment" {
foundComment = true
if tag.Value != "Hello" {
t.Errorf("Comment value = %q, want %q", tag.Value, "Hello")
}
}
}
if !foundComment {
t.Error("ID3v2.2 Comment frame not found")
}
}

func TestDecodeUserTextFrame(t *testing.T) {
tests := []struct {
name string
data []byte
want string
}{
{
name: "ISO-8859-1 with description",
data: []byte{0x00, 'd', 'e', 's', 'c', 0x00, 'v', 'a', 'l', 'u', 'e', 0x00},
want: "value",
},
{
name: "ISO-8859-1 no description",
data: []byte{0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0x00},
want: "value",
},
{
name: "UTF-8 with description",
data: []byte{0x03, 'd', 'e', 's', 'c', 0x00, 'v', 'a', 'l', 'u', 'e', 0x00},
want: "value",
},
{
name: "too short",
data: []byte{0x00},
want: "",
},
{
name: "empty",
data: []byte{},
want: "",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := decodeUserTextFrame(tt.data)
if got != tt.want {
t.Errorf("decodeUserTextFrame() = %q, want %q", got, tt.want)
}
})
}
}
Loading