diff --git a/src/error.rs b/src/error.rs index 70b5c07..575d8e3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -43,6 +43,10 @@ pub enum Error { /// Element body size is unknown. #[error("Element body size is unknown, ID: {0}")] ElementBodySizeUnknown(VInt64), + + /// Malformed lacing data. + #[error("Malformed lacing data")] + MalformedLacingData, } /// Result type for this crate. diff --git a/src/frame.rs b/src/frame.rs new file mode 100644 index 0000000..1203a1e --- /dev/null +++ b/src/frame.rs @@ -0,0 +1,293 @@ +use crate::{ + base::VInt64, + functional::{Decode, Encode}, + lacer::Lacer, + leaf::SimpleBlock, + master::{BlockGroup, Cluster}, +}; + +/// A Matroska encoded frame. +pub struct Frame<'a> { + /// in matroska timestamp units + pub data: &'a [u8], + /// whether the frame is a keyframe + pub is_keyframe: bool, + /// whether the frame is invisible (mostly for subtitle tracks) + pub is_invisible: bool, + /// whether the frame is discardable (for video tracks, e.g. non-reference frames) + pub is_discardable: bool, + /// track number the frame belongs to + pub track_number: u64, + /// timestamp of the frame, in the same timescale as the Cluster timestamp + pub timestamp: i64, +} + +/// A block in a Cluster, either a SimpleBlock or a BlockGroup. +/// +/// This is a convenience enum to allow handling both types of blocks uniformly. +/// * when reading: often we just want to iterate over all blocks in a cluster, regardless of type. +/// * when writing: we may want to write a list of blocks of mixed types. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ClusterBlock { + /// A SimpleBlock + Simple(SimpleBlock), + /// A BlockGroup + Group(BlockGroup), +} +impl ClusterBlock { + fn block_ref(&self) -> BlockRef<'_> { + match self { + ClusterBlock::Simple(b) => BlockRef::Simple(b), + ClusterBlock::Group(b) => BlockRef::Group(b), + } + } +} +impl From for ClusterBlock { + fn from(b: SimpleBlock) -> Self { + ClusterBlock::Simple(b) + } +} +impl From for ClusterBlock { + fn from(b: BlockGroup) -> Self { + ClusterBlock::Group(b) + } +} + +impl Encode for ClusterBlock { + fn encode(&self, buf: &mut B) -> crate::Result<()> { + match self { + ClusterBlock::Simple(b) => b.encode(buf), + ClusterBlock::Group(b) => b.encode(buf), + } + } +} + +enum BlockRef<'a> { + Simple(&'a crate::leaf::SimpleBlock), + Group(&'a crate::master::BlockGroup), +} + +impl<'a> BlockRef<'a> { + fn into_frames(self, cluster_ts: u64) -> impl Iterator>> + 'a { + // Without automatic sum types or generators, it's kind of amusing to write an iterator + // FIXME: Replace this workaround with a generator or sum type iterator when Rust stabilizes generators (see https://github.com/rust-lang/rust/issues/43122) + enum Output { + Once(T1), + Xiph(T2), + Xiph2(T3), + Ebml(T4), + Ebml2(T5), + FixedSize(T6), + FixedSize2(T7), + } + + impl Iterator for Output + where + T1: Iterator, + T2: Iterator, + T3: Iterator, + T4: Iterator, + T5: Iterator, + T6: Iterator, + T7: Iterator, + { + type Item = O; + fn next(&mut self) -> Option { + match self { + Output::Once(it) => it.next(), + Output::Xiph(it) => it.next(), + Output::Xiph2(it) => it.next(), + Output::Ebml(it) => it.next(), + Output::Ebml2(it) => it.next(), + Output::FixedSize(it) => it.next(), + Output::FixedSize2(it) => it.next(), + } + } + } + + match self { + BlockRef::Simple(block) => { + let body_buf = &mut &block[..]; + + let track_number = match VInt64::decode(body_buf) { + Ok(num) => num, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + let relative_timestamp = match i16::decode(body_buf) { + Ok(ts) => ts, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + let flag = match u8::decode(body_buf) { + Ok(f) => f, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + let data = *body_buf; + + let lacing = (flag >> 1) & 0x03; + + if lacing == 0 { + // no lacing, single frame + Output::Once(std::iter::once(Ok(Frame { + data, + is_keyframe: (flag & 0x80) != 0, + is_invisible: (flag & 0x08) != 0, + is_discardable: (flag & 0x01) != 0, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }))) + } else if lacing == 0b01 { + let data = match Lacer::Xiph.delace(data) { + Ok(frames) => frames, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + Output::Xiph(data.into_iter().map(move |d| { + Ok(Frame { + data: d, + is_keyframe: (flag & 0x80) != 0, + is_invisible: (flag & 0x08) != 0, + is_discardable: (flag & 0x01) != 0, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }) + })) + } else if lacing == 0b11 { + // EBML lacing + let data = match Lacer::Ebml.delace(data) { + Ok(frames) => frames, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + Output::Ebml(data.into_iter().map(move |d| { + Ok(Frame { + data: d, + is_keyframe: (flag & 0x80) != 0, + is_invisible: (flag & 0x08) != 0, + is_discardable: (flag & 0x01) != 0, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }) + })) + } else { + // fixed-size lacing + let data = match Lacer::FixedSize.delace(data) { + Ok(frames) => frames, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + Output::FixedSize(data.into_iter().map(move |d| { + Ok(Frame { + data: d, + is_keyframe: (flag & 0x80) != 0, + is_invisible: (flag & 0x08) != 0, + is_discardable: (flag & 0x01) != 0, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }) + })) + } + } + BlockRef::Group(g) => { + let block = &g.block; + let body_buf = &mut &block[..]; + + let track_number = match VInt64::decode(body_buf) { + Ok(num) => num, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + let relative_timestamp = match i16::decode(body_buf) { + Ok(ts) => ts, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + let flag = match u8::decode(body_buf) { + Ok(f) => f, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + let data = *body_buf; + let lacing = (flag >> 1) & 0x03; + if lacing == 0 { + // no lacing + Output::Once(std::iter::once(Ok(Frame { + data, + is_keyframe: g.reference_block.is_empty(), + is_invisible: flag & 0x08 != 0, + is_discardable: false, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }))) + } else if lacing == 0b01 { + let data = match Lacer::Xiph.delace(data) { + Ok(frames) => frames, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + + Output::Xiph2(data.into_iter().map(move |d| { + Ok(Frame { + data: d, + is_keyframe: g.reference_block.is_empty(), + is_invisible: flag & 0x08 != 0, + is_discardable: false, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }) + })) + } else if lacing == 0b11 { + let data = match Lacer::Ebml.delace(data) { + Ok(frames) => frames, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + Output::Ebml2(data.into_iter().map(move |d| { + Ok(Frame { + data: d, + is_keyframe: g.reference_block.is_empty(), + is_invisible: flag & 0x08 != 0, + is_discardable: false, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }) + })) + } else { + let data = match Lacer::FixedSize.delace(data) { + Ok(frames) => frames, + Err(e) => return Output::Once(std::iter::once(Err(e))), + }; + Output::FixedSize2(data.into_iter().map(move |d| { + Ok(Frame { + data: d, + is_keyframe: g.reference_block.is_empty(), + is_invisible: flag & 0x08 != 0, + is_discardable: false, + track_number: *track_number, + timestamp: cluster_ts as i64 + relative_timestamp as i64, + }) + })) + } + } + } + } +} + +impl<'a> From<&'a crate::leaf::SimpleBlock> for BlockRef<'a> { + fn from(b: &'a crate::leaf::SimpleBlock) -> Self { + BlockRef::Simple(b) + } +} +impl<'a> From<&'a crate::master::BlockGroup> for BlockRef<'a> { + fn from(b: &'a crate::master::BlockGroup) -> Self { + BlockRef::Group(b) + } +} + +impl Cluster { + /// frames in the cluster. + pub fn frames(&self) -> impl Iterator>> + '_ { + self.blocks + .iter() + .map(|b| b.block_ref()) + .flat_map(|b| b.into_frames(*self.timestamp)) + } +} diff --git a/src/io.rs b/src/io.rs index 1e84b29..d02f149 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,3 +1,5 @@ +//! I/O utilities. + /// blocking I/O implementations, supporting reading and writing. pub mod blocking_impl { use crate::{ diff --git a/src/lacer.rs b/src/lacer.rs new file mode 100644 index 0000000..e8c20f3 --- /dev/null +++ b/src/lacer.rs @@ -0,0 +1,390 @@ +//! Handler for lacing and delacing operations on frame data. + +use crate::{Error, base::VInt64, functional::Encode, io::blocking_impl::ReadFrom}; + +// https://www.matroska.org/technical/notes.html +/// Handler for lacing and delacing operations on frame data. +pub enum Lacer { + /// Xiph lacing (variable-size frames with size prefixes) + /// + /// The Xiph lacing uses the same coding of size as found in the Ogg container \[@?RFC3533\]. The bits 5-6 of the Block Header flags are set to 01. + /// The Block data with laced frames is stored as follows: + /// Lacing Head on 1 Octet: Number of frames in the lace minus 1. + /// Lacing size of each frame except the last one. + /// Binary data of each frame consecutively. + /// The lacing size is split into 255 values, stored as unsigned octets – for example, 500 is coded 255;245 or [0xFF 0xF5]. A frame with a size multiple of 255 is coded with a 0 at the end of the size – for example, 765 is coded 255;255;255;0 or [0xFF 0xFF 0xFF 0x00]. + /// The size of the last frame is deduced from the size remaining in the Block after the other frames. + Xiph, + + /// Fixed-size lacing (all frames have the same size) + FixedSize, + /// EBML lacing (variable-size frames with EBML-encoded sizes) + /// + /// The EBML lacing encodes the frame size with an EBML-like encoding \[@!RFC8794\]. The bits 5-6 of the Block Header flags are set to 11. + /// + /// The Block data with laced frames is stored as follows: + /// Lacing Head on 1 Octet: Number of frames in the lace minus 1. + /// Lacing size of each frame except the last one. + /// Binary data of each frame consecutively. + /// + /// The first frame size is encoded as an EBML Variable-Size Integer value, also known as VINT in \[@!RFC8794\]. + /// The remaining frame sizes are encoded as signed values using the difference between the frame size and the previous frame size. + /// These signed values are encoded as VINT, with a mapping from signed to unsigned numbers. + /// Decoding the unsigned number stored in the VINT to a signed number is done by subtracting 2^((7*n)-1)-1, where n is the octet size of the VINT. + Ebml, +} + +impl Lacer { + /// Encode multiple frames into a single laced block + pub fn lace(&self, frames: &[&[u8]]) -> Vec { + if frames.is_empty() { + return vec![]; + } + let num_frames = frames.len(); + let mut output = vec![]; + output.push((num_frames - 1) as u8); // Number of frames - 1 + + match self { + Lacer::Xiph => { + for frame in &frames[..num_frames - 1] { + let mut size = frame.len(); + while size >= 0xFF { + output.push(0xFF); + size -= 0xFF; + } + output.push(size as u8); + } + for frame in frames { + output.extend_from_slice(frame); + } + output + } + Lacer::FixedSize => { + let frame_size = frames[0].len(); + if let Some((idx, bad_frame)) = frames + .iter() + .enumerate() + .find(|(_, f)| f.len() != frame_size) + { + panic!( + "All frames must have the same size for FixedSize lacing: expected size {}, but frame at index {} has size {}", + frame_size, + idx, + bad_frame.len() + ); + } + for frame in frames { + output.extend_from_slice(frame); + } + output + } + Lacer::Ebml => { + if num_frames == 1 { + output.extend_from_slice(frames[0]); + return output; + } + let sizes = frames.iter().map(|f| f.len() as u64).collect::>(); + // except first size, other sizes are stored as diffs to the previous size + let diff_sizes = std::iter::once( + // first + VInt64::new(sizes[0]), + ) + .chain(sizes.windows(2).map(|w| { + let diff = w[1] as i64 - w[0] as i64; + + //-(2^6^-1) to 2^6^ + let n = if diff > -(2i64.pow(6) - 1) && diff < (2i64.pow(6)) { + 1 + } else if diff > -(2i64.pow(13) - 1) && diff < (2i64.pow(13)) { + 2 + } else if diff > -(2i64.pow(20) - 1) && diff < (2i64.pow(20)) { + 3 + } else if diff > -(2i64.pow(27) - 1) && diff < (2i64.pow(27)) { + 4 + } else if diff > -(2i64.pow(34) - 1) && diff < (2i64.pow(34)) { + 5 + } else if diff > -(2i64.pow(41) - 1) && diff < (2i64.pow(41)) { + 6 + } else if diff > -(2i64.pow(48) - 1) && diff < (2i64.pow(48)) { + 7 + } else { + panic!("Frame size diff too large for EBML lacing: diff = {}", diff); + }; + + // map to unsigned + let diff_unsigned = diff + (2i64.pow(7 * n as u32 - 1) - 1); + VInt64::new(diff_unsigned as u64) + })) + // dont include last size, it is deduced from remaining data + .take(num_frames - 1); + + for size in diff_sizes { + size.encode(&mut output).unwrap(); + } + for frame in frames { + output.extend_from_slice(frame); + } + output + } + } + } + + /// Decode a laced block into individual frames + pub fn delace<'a>(&self, data: &'a [u8]) -> crate::Result> { + // TODO(perf): avoid heap allocations ideally + // we should be able to return a `impl Iterator>` here + // can make it work using nightly features like `generators`. + // but not sure how to do that with the current stable Rust. + + if data.is_empty() { + return Ok(vec![]); + } + let num_frames = data[0] as usize + 1; + if num_frames == 1 { + return Ok(vec![&data[1..]]); + } + + match self { + Lacer::Xiph => { + let mut out = Vec::with_capacity(num_frames); + + let data_start_pos = data + .iter() + .enumerate() + .skip(1) + .filter(|(_, b)| **b != 0xFF) + .nth(num_frames - 2) + .map(|(i, _)| i) + .ok_or(Error::MalformedLacingData)? + + 1; + + let laced_data = data + .get(data_start_pos..) + .ok_or(Error::MalformedLacingData)?; + + let mut start = 0; + for size in data[1..data_start_pos] + .split_inclusive(|b| *b != 0xFF) + .map(|chunk| chunk.iter().map(|b| *b as usize).sum::()) + { + out.push( + laced_data + .get(start..start + size) + .ok_or(Error::MalformedLacingData)?, + ); + start += size; + } + out.push(laced_data.get(start..).ok_or(Error::MalformedLacingData)?); + Ok(out) + } + Lacer::FixedSize => { + let data_len = data.len() - 1; + + // all frames must have the same size + if !data_len.is_multiple_of(num_frames) { + return Err(Error::MalformedLacingData); + } + + Ok(data[1..].chunks(data_len / num_frames).collect()) + } + Lacer::Ebml => { + let mut data_buf = &data[1..]; + let mut out_sizes = Vec::with_capacity(num_frames - 1); + let first_size = VInt64::read_from(&mut data_buf)?; + out_sizes.push(*first_size as usize); + for _ in 1..(num_frames - 1) { + let oct_size = data_buf + .first() + .ok_or(Error::MalformedLacingData)? + .leading_zeros() + + 1; + let current_encoded_vint = VInt64::read_from(&mut data_buf)?; + // unsigned to signed + let diff = *current_encoded_vint as i64 - (2i64.pow(7 * oct_size - 1) - 1); + let new_size = out_sizes + .last() + .unwrap() + .checked_add_signed(diff as isize) + .ok_or(Error::MalformedLacingData)?; + out_sizes.push(new_size); + } + + let mut out = Vec::with_capacity(num_frames); + + let mut start = 0; + for size in out_sizes { + out.push( + data_buf + .get(start..start + size) + .ok_or(Error::MalformedLacingData)?, + ); + start += size; + } + out.push(data_buf.get(start..).ok_or(Error::MalformedLacingData)?); + Ok(out) + } + } + } +} + +#[cfg(test)] +mod lacer_tests { + use super::*; + #[test] + fn test_xiph_lacing() { + // 0 frames + let laced = Lacer::Xiph.lace(&[]); + assert_eq!(laced, vec![]); + let frames: Vec<_> = Lacer::Xiph.delace(&[]).unwrap(); + assert_eq!(frames.len(), 0); + + // 4 frames, sizes: 255, 256, 1, remaining + let len = vec![0x03, 0xFF, 0x00, 0xFF, 0x1, 0x1]; + let frame0 = vec![2u8; 255]; + let frame1 = vec![42u8; 256]; + let frame2 = vec![38u8; 1]; + let frame3 = vec![100u8; 1]; + + let laced = Lacer::Xiph.lace(&[&frame0, &frame1, &frame2, &frame3]); + let data = [len, frame0, frame1, frame2, frame3].concat(); + assert_eq!(laced, data); + + let frames: Vec<_> = Lacer::Xiph.delace(&data).unwrap(); + assert_eq!(frames.len(), 4); + assert_eq!(frames[0], &[2u8; 255]); + assert_eq!(frames[1], &[42u8; 256]); + assert_eq!(frames[2], &[38u8; 1]); + assert_eq!(frames[3], &[100u8; 1]); + + // 1 frame, size: remaining + let len = vec![0x00]; + let frame0 = vec![2u8; 255]; + + let laced = Lacer::Xiph.lace(&[&frame0]); + let data = [len, frame0].concat(); + assert_eq!(laced, data); + + let frames: Vec<_> = Lacer::Xiph.delace(&data).unwrap(); + assert_eq!(frames.len(), 1); + assert_eq!(frames[0], &[2u8; 255]); + + // 2 frames, sizes: 32, remaining + let len = vec![0x01, 0x20]; + let frame0 = vec![2u8; 32]; + let frame1 = vec![42u8; 256]; + + let laced = Lacer::Xiph.lace(&[&frame0, &frame1]); + let data = [len, frame0, frame1].concat(); + assert_eq!(laced, data); + + let frames: Vec<_> = Lacer::Xiph.delace(&data).unwrap(); + assert_eq!(frames.len(), 2); + assert_eq!(frames[0], &[2u8; 32]); + assert_eq!(frames[1], &[42u8; 256]); + + // 4 frames, sizes: 600, 3, 520, remaining + let len = vec![0x03, 0xFF, 0xFF, 0x5A, 0x3, 0xFF, 0xFF, 0xA]; + assert_eq!(0xff + 0xff + 0x5A, 600); + assert_eq!(0xff + 0xff + 0xA, 520); + let frame0 = vec![2u8; 600]; + let frame1 = vec![42u8; 3]; + let frame2 = vec![38u8; 520]; + let frame3 = vec![100u8; 1]; + + let laced = Lacer::Xiph.lace(&[&frame0, &frame1, &frame2, &frame3]); + let data = [len, frame0, frame1, frame2, frame3].concat(); + assert_eq!(laced, data); + + let frames: Vec<_> = Lacer::Xiph.delace(&data).unwrap(); + assert_eq!(frames.len(), 4); + assert_eq!(frames[0], &[2u8; 600]); + assert_eq!(frames[1], &[42u8; 3]); + assert_eq!(frames[2], &[38u8; 520]); + assert_eq!(frames[3], &[100u8; 1]); + } + + #[test] + fn test_ebml_lacing() { + // 0 frames + let laced = Lacer::Ebml.lace(&[]); + assert_eq!(laced, vec![]); + let frames: Vec<_> = Lacer::Ebml.delace(&[]).unwrap(); + assert_eq!(frames.len(), 0); + + // 3 frames, sizes: 800, 500, remaining(1000) + + // store as size diffs: 800, -300 + + // offset = 2**(7*n - 1) - 1 + // n = 2 -> 2**13 - 1 = 8191 + // convert to uint: 800, 7891(-300+8191) + + // encode as VInt: + // 0x4320(800), 0x5ED3(7891) + + let len = vec![0x02, 0x43, 0x20, 0x5E, 0xD3]; + let frame0 = vec![2u8; 800]; + let frame1 = vec![42u8; 500]; + let frame2 = vec![38u8; 1000]; + let laced = Lacer::Ebml.lace(&[&frame0, &frame1, &frame2]); + let data = [len, frame0, frame1, frame2].concat(); + assert_eq!(laced, data); + + let frames: Vec<_> = Lacer::Ebml.delace(&data).unwrap(); + assert_eq!(frames.len(), 3); + assert_eq!(frames[0], &[2u8; 800]); + assert_eq!(frames[1], &[42u8; 500]); + assert_eq!(frames[2], &[38u8; 1000]); + + // 7 frames, sizes 2, 5000, 4980, 400, 20, 2000, remaining(300) + // store as size diffs: 2, 4998, -20, -4580, -380, 1980 + let len = vec![ + 0x06, 0x82, 0x73, 0x85, 0xAB, 0x4E, 0x1B, 0x5E, 0x83, 0x67, 0xBB, + ]; + let frame0 = vec![2u8; 2]; + let frame1 = vec![42u8; 5000]; + let frame2 = vec![38u8; 4980]; + let frame3 = vec![100u8; 400]; + let frame4 = vec![7u8; 20]; + let frame5 = vec![8u8; 2000]; + let frame6 = vec![9u8; 300]; + let laced = Lacer::Ebml.lace(&[ + &frame0, &frame1, &frame2, &frame3, &frame4, &frame5, &frame6, + ]); + let data = [len, frame0, frame1, frame2, frame3, frame4, frame5, frame6].concat(); + assert_eq!(laced, data); + let frames: Vec<_> = Lacer::Ebml.delace(&data).unwrap(); + assert_eq!(frames.len(), 7); + assert_eq!(frames[0], &[2u8; 2]); + assert_eq!(frames[1], &[42u8; 5000]); + assert_eq!(frames[2], &[38u8; 4980]); + assert_eq!(frames[3], &[100u8; 400]); + assert_eq!(frames[4], &[7u8; 20]); + assert_eq!(frames[5], &[8u8; 2000]); + assert_eq!(frames[6], &[9u8; 300]); + } + + #[test] + fn test_fixed_size_lacing() { + // 0 frames + let laced = Lacer::FixedSize.lace(&[]); + assert_eq!(laced, vec![]); + let frames: Vec<_> = Lacer::FixedSize.delace(&[]).unwrap(); + assert_eq!(frames.len(), 0); + + // 3 frames, sizes: 500, 500, 500 + let len = vec![0x02]; + let frame0 = vec![2u8; 500]; + let frame1 = vec![42u8; 500]; + let frame2 = vec![38u8; 500]; + let laced = Lacer::FixedSize.lace(&[&frame0, &frame1, &frame2]); + let data = [len, frame0, frame1, frame2].concat(); + assert_eq!(laced, data); + + let frames: Vec<_> = Lacer::FixedSize.delace(&data).unwrap(); + assert_eq!(frames.len(), 3); + assert_eq!(frames[0], &[2u8; 500]); + assert_eq!(frames[1], &[42u8; 500]); + assert_eq!(frames[2], &[38u8; 500]); + } +} diff --git a/src/lib.rs b/src/lib.rs index 15638b2..63a355f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,26 +2,23 @@ #![warn(missing_docs)] #![doc = include_str!("../README.md")] -/// Error types for this crate. +mod base; // base types for Matroska elements. ie. `VInt64`, `Header`, etc. +mod element; // Element body definitions and traits. mod error; +mod frame; mod functional; -pub use error::*; +mod lacer; +mod leaf; // Leaf elements in Matroska. +mod master; // Master elements in Matroska. +mod supplement; // Supplementary elements in Matroska. Void elements, CRC-32, etc. -/// I/O utilities. +// following modules are public pub mod io; -/// base types for Matroska elements. ie. `VInt64`, `Header`, etc. -mod base; -/// Leaf elements in Matroska. -mod leaf; -/// Master elements in Matroska. -mod master; -/// Supplementary elements in Matroska. Void elements, CRC-32, etc. -/// -/// These elements are not from the Matroska specification, but Matroska specifications inherit them from EBML specifications. -mod supplement; -// Element body definitions and traits. -mod element; +// Re-export common types +pub use crate::frame::*; +pub use crate::lacer::*; +pub use error::*; /// A prelude for common types and traits. pub mod prelude { diff --git a/src/master.rs b/src/master.rs index a56852d..27f396a 100644 --- a/src/master.rs +++ b/src/master.rs @@ -1,6 +1,7 @@ use crate::Error; use crate::base::*; use crate::element::*; +use crate::frame::ClusterBlock; use crate::functional::*; use crate::leaf::*; use crate::supplement::*; @@ -293,18 +294,118 @@ pub struct Cluster { pub position: Option, /// Size of the previous Cluster, in octets. Can be useful for backward playing. pub prev_size: Option, - /// Similar to Block, see [basics](https://www.matroska.org/technical/basics.html#block-structure), but without all the extra information, mostly used to reduced overhead when no extra feature is needed; see basics on SimpleBlock Structure. - pub simple_block: Vec, - /// Basic container of information containing a single Block and information specific to that Block. - pub block_group: Vec, + /// One or more blocks of data (see Block and SimpleBlock) and their associated data (see BlockGroup). + pub blocks: Vec, } +// Here we manually implement Element for Cluster, aggregating both SimpleBlock and BlockGroup into ClusterBlock, preserving their order. impl Element for Cluster { const ID: VInt64 = VInt64::from_encoded(0x1F43B675); - nested! { - required: [ Timestamp ], - optional: [ Position, PrevSize ], - multiple: [ SimpleBlock, BlockGroup ], + fn decode_body(buf: &mut &[u8]) -> crate::Result { + let crc32 = if buf.len() > 6 && buf[0] == 0xBF && buf[1] == 0x84 { + Some(Crc32::decode(buf)?) + } else { + None + }; + + let mut timestamp = None; + let mut position = None; + let mut prev_size = None; + let mut blocks = Vec::new(); + + let mut void: Option = None; + + while let Ok(header) = Header::decode(buf) { + if *header.size > buf.len() as u64 { + return Err(Error::OverDecode(header.id)); + } + match header.id { + Timestamp::ID => { + if timestamp.is_some() { + return Err(Error::DuplicateElement { + id: header.id, + parent: Self::ID, + }); + } else { + timestamp = Some(Timestamp::decode_element(&header, buf)?) + } + } + Position::ID => { + if position.is_some() { + return Err(Error::DuplicateElement { + id: header.id, + parent: Self::ID, + }); + } else { + position = Some(Position::decode_element(&header, buf)?) + } + } + PrevSize::ID => { + if prev_size.is_some() { + return Err(Error::DuplicateElement { + id: header.id, + parent: Self::ID, + }); + } else { + prev_size = Some(PrevSize::decode_element(&header, buf)?) + } + } + SimpleBlock::ID => { + blocks.push(SimpleBlock::decode_element(&header, buf)?.into()); + } + BlockGroup::ID => { + blocks.push(BlockGroup::decode_element(&header, buf)?.into()); + } + Void::ID => { + let v = Void::decode_element(&header, buf)?; + if let Some(previous) = void { + void = Some(Void { + size: previous.size + v.size, + }); + } else { + void = Some(v); + } + log::info!( + "Skipping Void element in Element {}, size: {}B", + Self::ID, + *header.size + ); + } + _ => { + buf.advance(*header.size as usize); + log::warn!( + "Unknown element {}({}b) in Element({})", + header.id, + *header.size, + Self::ID + ); + } + } + } + + if buf.has_remaining() { + return Err(Error::ShortRead); + } + + Ok(Self { + crc32, + timestamp: timestamp.ok_or(Error::MissingElement(Timestamp::ID))?, + position, + prev_size, + blocks, + void, + }) + } + + fn encode_body(&self, buf: &mut B) -> crate::Result<()> { + self.crc32.encode(buf)?; + self.timestamp.encode(buf)?; + self.position.encode(buf)?; + self.prev_size.encode(buf)?; + self.blocks.encode(buf)?; + + self.void.encode(buf)?; + Ok(()) } } diff --git a/tests/ietf-mkv-test-cases.rs b/tests/ietf-mkv-test-cases.rs index 4a3183e..24d833d 100644 --- a/tests/ietf-mkv-test-cases.rs +++ b/tests/ietf-mkv-test-cases.rs @@ -1,6 +1,7 @@ use core::panic; use std::io::*; +use mkv_element::ClusterBlock; use mkv_element::io::blocking_impl::*; use mkv_element::prelude::*; @@ -43,7 +44,10 @@ fn ietf_test_1() { // It contains MPEG4.2 (DivX) video, (854x480) MP3 audio, uses only SimpleBlock (matroska DocType v2) assert!( - segment.cluster.iter().all(|c| c.block_group.is_empty()), + segment.cluster.iter().all(|c| c + .blocks + .iter() + .all(|b| matches!(b, ClusterBlock::Simple(_)))), "All clusters should use SimpleBlock only" ); @@ -154,10 +158,6 @@ fn ietf_test_3() { "Matroska Validation File 3, header stripping on the video track and no SimpleBlock" )) ); - assert!( - segment.cluster.iter().all(|c| !c.simple_block.is_empty()), - "All clusters use SimpleBlock only" - ); // It contains H264 (1024x576 pixels), and stereo MP3. let tracks = segment.tracks.as_ref().unwrap(); @@ -261,14 +261,16 @@ fn ietf_test_4() { Some(PrevSize::read_element(&header, &mut file).unwrap()); } SimpleBlock::ID => { - cluster - .simple_block - .push(SimpleBlock::read_element(&header, &mut file).unwrap()); + cluster.blocks.push( + SimpleBlock::read_element(&header, &mut file) + .unwrap() + .into(), + ); } BlockGroup::ID => { cluster - .block_group - .push(BlockGroup::read_element(&header, &mut file).unwrap()); + .blocks + .push(BlockGroup::read_element(&header, &mut file).unwrap().into()); } _ => { // unexpected element skip