diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 6b34afd012f..a89841b4235 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -167,34 +167,6 @@ impl Num { } } -/// Read and discard `n` bytes from `reader` using a buffer of size `buf_size`. -/// -/// This is more efficient than `io::copy` with `BufReader` because it reads -/// directly in `buf_size`-sized chunks, matching GNU dd's behavior. -/// Returns the total number of bytes actually read. -fn read_and_discard(reader: &mut R, n: u64, buf_size: usize) -> io::Result { - // todo: consider splice()ing to /dev/null on Linux - let mut buf = Vec::new(); - buf.try_reserve(buf_size.min(n as usize))?; // try_with_capacity is unstable - let mut total = 0u64; - let mut remaining = n; - while remaining > 0 { - let to_read = cmp::min(remaining, buf_size as u64); - buf.clear(); - match reader.by_ref().take(to_read).read_to_end(&mut buf) { - Ok(0) => break, // EOF - Ok(bytes_read) => { - total += bytes_read as u64; - remaining -= bytes_read as u64; - } - Err(e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - - Ok(total) -} - /// Data sources. /// /// Use [`Source::stdin_as_file`] if available to enable more @@ -235,7 +207,7 @@ impl Source { match self { #[cfg(not(unix))] Self::Stdin(stdin) => { - let m = read_and_discard(stdin, n, ibs)?; + let m = uucore::io::read_and_discard(stdin, n, ibs)?; if m < n { show_error!( "{}", @@ -274,7 +246,7 @@ impl Source { // ESPIPE means the file descriptor is not seekable (e.g., a pipe), // so fall back to reading and discarding bytes using ibs-sized buffer Some(Err(e)) if e.raw_os_error() == Some(libc::ESPIPE) => { - let m = read_and_discard(f, n, ibs)?; + let m = uucore::io::read_and_discard(f, n, ibs)?; if m < n { show_error!( "{}", @@ -295,7 +267,7 @@ impl Source { } Self::File(f) => f.seek(SeekFrom::Current(n.try_into().unwrap())), #[cfg(unix)] - Self::Fifo(f) => read_and_discard(f, n, ibs), + Self::Fifo(f) => uucore::io::read_and_discard(f, n, ibs), } } @@ -667,7 +639,7 @@ impl Dest { #[cfg(unix)] Self::Fifo(f) => { // Seeking in a named pipe means *reading* from the pipe. - read_and_discard(f, n, obs) + uucore::io::read_and_discard(f, n, obs) } #[cfg(unix)] Self::Sink => Ok(0), diff --git a/src/uu/od/src/multifile_reader.rs b/src/uu/od/src/multifile_reader.rs index 27c3b8376c3..bfbcfd7b81e 100644 --- a/src/uu/od/src/multifile_reader.rs +++ b/src/uu/od/src/multifile_reader.rs @@ -6,9 +6,15 @@ use std::fs::File; use std::io; +#[cfg(unix)] +use std::io::{Seek, SeekFrom}; use uucore::display::Quotable; use uucore::show_error; +use uucore::translate; + +/// Buffer size used when skipping bytes by reading and discarding them. +const SKIP_BUFFER_SIZE: usize = 16 * 1024; pub enum InputSource<'a> { FileName(&'a str), @@ -17,10 +23,27 @@ pub enum InputSource<'a> { Stream(Box), } +/// The file currently being read. A real `File` is kept as a concrete handle so +/// that `skip` can `fstat`/`seek` it; anything else (stdin, an in-memory stream) +/// can only be advanced by reading. +enum CurrentReader { + File(File), + Other(Box), +} + +impl io::Read for CurrentReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::File(f) => f.read(buf), + Self::Other(r) => r.read(buf), + } + } +} + // MultifileReader - concatenate all our input, file or stdin. pub struct MultifileReader<'a> { ni: Vec>, - curr_file: Option>, + curr_file: Option, any_err: bool, } @@ -57,7 +80,7 @@ impl MultifileReader<'_> { #[cfg(any(unix, target_os = "wasi"))] { let stdin = uucore::io::RawReader(rustix::stdio::stdin()); - self.curr_file = Some(Box::new(stdin)); + self.curr_file = Some(CurrentReader::Other(Box::new(stdin))); } // For non-unix platforms we don't have GNU compatibility requirements, so @@ -67,7 +90,7 @@ impl MultifileReader<'_> { #[cfg(not(any(unix, target_os = "wasi")))] { let stdin = io::stdin(); - self.curr_file = Some(Box::new(stdin)); + self.curr_file = Some(CurrentReader::Other(Box::new(stdin))); } break; } @@ -76,7 +99,7 @@ impl MultifileReader<'_> { Ok(f) => { // No need to wrap `f` in a BufReader - buffered reading is taken care // of elsewhere. - self.curr_file = Some(Box::new(f)); + self.curr_file = Some(CurrentReader::File(f)); break; } Err(e) => { @@ -96,12 +119,86 @@ impl MultifileReader<'_> { } } InputSource::Stream(s) => { - self.curr_file = Some(s); + self.curr_file = Some(CurrentReader::Other(s)); break; } } } } + + /// Skip `n_skip` bytes from the start of the combined input. + /// + /// A real file is positioned by `seek` whenever that is safe: a regular + /// file large enough that its reported size is trustworthy, or any seekable + /// special file (e.g. `/dev/null`, which can be skipped past its empty end). + /// Everything else - proc/sys files that report a bogus size, pipes, stdin - + /// is advanced by reading and discarding. Skipping past the end of the whole + /// input is an error, matching GNU `od`. + pub fn skip(&mut self, mut n_skip: u64) -> io::Result<()> { + while n_skip > 0 { + let Some(curr) = self.curr_file.as_mut() else { + break; + }; + n_skip = skip_in_file(curr, n_skip)?; + if n_skip == 0 { + break; + } + // Current file is exhausted; continue skipping in the next one. + self.next_file(); + } + + if n_skip > 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + translate!("od-error-skip-past-end"), + )); + } + Ok(()) + } +} + +/// Skip up to `n_skip` bytes within a single file. Returns the number of bytes +/// that still need to be skipped (0 if the skip landed inside this file, or +/// the remainder if the file ended first). +fn skip_in_file(curr: &mut CurrentReader, n_skip: u64) -> io::Result { + #[cfg(unix)] + if let CurrentReader::File(f) = curr { + if let Ok(meta) = f.metadata() { + let size = meta.len(); + let blksize = uucore::fs::sane_blksize::sane_blksize_from_metadata(&meta); + + // A regular file larger than a block reports a reliable size, so we + // can either drop the whole file or seek within it. Small or + // proc-like files lie about their size and fall through to reading. + if meta.is_file() && blksize < size { + if size < n_skip { + return Ok(n_skip - size); + } + if seek_forward(f, n_skip)? { + return Ok(0); + } + } else if !meta.is_file() { + // Seekable special files (character/block devices) can be + // skipped past their end without error. + if seek_forward(f, n_skip).unwrap_or(false) { + return Ok(0); + } + } + } + } + let read = uucore::io::read_and_discard(curr, n_skip, SKIP_BUFFER_SIZE)?; + Ok(n_skip - read) +} + +/// Seek `f` forward by `n` bytes. Returns `Ok(true)` if the seek happened, or +/// `Ok(false)` if `n` is too large to express as a seek offset (the caller +/// should fall back to reading and discarding). +#[cfg(unix)] +fn seek_forward(f: &mut File, n: u64) -> io::Result { + match i64::try_from(n) { + Ok(off) => f.seek(SeekFrom::Current(off)).map(|_| true), + Err(_) => Ok(false), + } } impl io::Read for MultifileReader<'_> { diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index 0db92ff7bc6..37379d6a366 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -263,7 +263,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { &od_options.input_strings, od_options.skip_bytes, od_options.read_bytes, - ); + )?; let mut input_decoder = InputDecoder::new( &mut input, od_options.line_bytes, @@ -763,17 +763,19 @@ fn open_input_peek_reader( input_strings: &[String], skip_bytes: u64, read_bytes: Option, -) -> PeekReader>>> { +) -> UResult>>>> { // should return "impl PeekRead + Read + HasError" when supported in (stable) rust let inputs = map_input_strings(input_strings); - let mf = MultifileReader::new(inputs); - let pr = PartialReader::new(mf, skip_bytes, read_bytes); + let mut mf = MultifileReader::new(inputs); + mf.skip(skip_bytes) + .map_err(|e| USimpleError::new(1, e.to_string()))?; + let pr = PartialReader::new(mf, read_bytes); // Add a BufReader over the top of the PartialReader. This will have the // effect of generating buffered reads to files/stdin, but since these reads // go through MultifileReader (which limits the maximum number of bytes read) // we won't ever read more bytes than were specified with the `-N` flag. let buf_pr = BufReader::new(pr); - PeekReader::new(buf_pr) + Ok(PeekReader::new(buf_pr)) } impl HasError for BufReader { diff --git a/src/uu/od/src/partial_reader.rs b/src/uu/od/src/partial_reader.rs index 31367181a85..6fe3dd8b9fa 100644 --- a/src/uu/od/src/partial_reader.rs +++ b/src/uu/od/src/partial_reader.rs @@ -5,63 +5,29 @@ // spell-checker:ignore mockstream abcdefgh bcdefgh multifile -use std::cmp; use std::io; use std::io::Read; use crate::multifile_reader::HasError; -use uucore::translate; -/// When a large number of bytes must be skipped, it will be read into a -/// dynamically allocated buffer. The buffer will be limited to this size. -const MAX_SKIP_BUFFER: usize = 16 * 1024; - -/// Wrapper for `std::io::Read` which can skip bytes at the beginning -/// of the input, and it can limit the returned bytes to a particular -/// number of bytes. +/// Wrapper for `std::io::Read` which limits the returned bytes to a particular +/// number of bytes. Skipping leading bytes is handled upstream by +/// `MultifileReader::skip`, which can seek seekable inputs. pub struct PartialReader { inner: R, - skip: u64, limit: Option, } impl PartialReader { - /// Create a new `PartialReader` wrapping `inner`, which will skip - /// `skip` bytes, and limits the output to `limit` bytes. Set `limit` - /// to `None` if there should be no limit. - pub fn new(inner: R, skip: u64, limit: Option) -> Self { - Self { inner, skip, limit } + /// Create a new `PartialReader` wrapping `inner`, limiting the output to + /// `limit` bytes. Set `limit` to `None` if there should be no limit. + pub fn new(inner: R, limit: Option) -> Self { + Self { inner, limit } } } impl Read for PartialReader { fn read(&mut self, out: &mut [u8]) -> io::Result { - if self.skip > 0 { - let mut bytes = [0; MAX_SKIP_BUFFER]; - - while self.skip > 0 { - let skip_count: usize = cmp::min(self.skip as usize, MAX_SKIP_BUFFER); - - loop { - match self.inner.read(&mut bytes[..skip_count]) { - Ok(0) => { - // this is an error as we still have more to skip - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - translate!("od-error-skip-past-end"), - )); - } - Ok(n) => { - self.skip -= n as u64; - break; - } - Err(e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - } - } - match self.limit { None => loop { match self.inner.read(out) { @@ -106,7 +72,7 @@ mod tests { #[test] fn test_read_without_limits() { let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, None); + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), None); assert_eq!(sut.read(v.as_mut()).unwrap(), 8); assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); @@ -116,65 +82,17 @@ mod tests { fn test_read_without_limits_with_error() { let mut v = [0; 10]; let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); - let mut sut = PartialReader::new(f, 0, None); - - let error = sut.read(v.as_mut()).unwrap_err(); - assert_eq!(error.kind(), ErrorKind::PermissionDenied); - assert_eq!(error.to_string(), "No access"); - } - - #[test] - fn test_read_skipping_bytes() { - let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, None); - - assert_eq!(sut.read(v.as_mut()).unwrap(), 6); - assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); - } - - #[test] - fn test_read_skipping_all() { - let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 20, None); - - let error = sut.read(v.as_mut()).unwrap_err(); - assert_eq!(error.kind(), ErrorKind::UnexpectedEof); - } - - #[test] - fn test_read_skipping_with_error() { - let mut v = [0; 10]; - let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); - let mut sut = PartialReader::new(f, 2, None); + let mut sut = PartialReader::new(f, None); let error = sut.read(v.as_mut()).unwrap_err(); assert_eq!(error.kind(), ErrorKind::PermissionDenied); assert_eq!(error.to_string(), "No access"); } - #[test] - fn test_read_skipping_with_two_reads_during_skip() { - let mut v = [0; 10]; - let c = Cursor::new(&b"a"[..]).chain(Cursor::new(&b"bcdefgh"[..])); - let mut sut = PartialReader::new(c, 2, None); - - assert_eq!(sut.read(v.as_mut()).unwrap(), 6); - assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); - } - - #[test] - fn test_read_skipping_huge_number() { - let mut v = [0; 10]; - // test if it does not eat all memory.... - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), u64::MAX, None); - - sut.read(v.as_mut()).unwrap_err(); - } - #[test] fn test_read_limiting_all() { let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(0)); + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), Some(0)); assert_eq!(sut.read(v.as_mut()).unwrap(), 0); } @@ -182,7 +100,7 @@ mod tests { #[test] fn test_read_limiting() { let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), Some(6)); assert_eq!(sut.read(v.as_mut()).unwrap(), 6); assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0]); @@ -192,7 +110,7 @@ mod tests { fn test_read_limiting_with_error() { let mut v = [0; 10]; let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); - let mut sut = PartialReader::new(f, 0, Some(6)); + let mut sut = PartialReader::new(f, Some(6)); let error = sut.read(v.as_mut()).unwrap_err(); assert_eq!(error.kind(), ErrorKind::PermissionDenied); @@ -202,7 +120,7 @@ mod tests { #[test] fn test_read_limiting_with_large_limit() { let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(20)); + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), Some(20)); assert_eq!(sut.read(v.as_mut()).unwrap(), 8); assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); @@ -211,7 +129,7 @@ mod tests { #[test] fn test_read_limiting_with_multiple_reads() { let mut v = [0; 3]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), Some(6)); assert_eq!(sut.read(v.as_mut()).unwrap(), 3); assert_eq!(v, [0x61, 0x62, 0x63]); @@ -219,13 +137,4 @@ mod tests { assert_eq!(v, [0x64, 0x65, 0x66]); assert_eq!(sut.read(v.as_mut()).unwrap(), 0); } - - #[test] - fn test_read_skipping_and_limiting() { - let mut v = [0; 10]; - let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, Some(4)); - - assert_eq!(sut.read(v.as_mut()).unwrap(), 4); - assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0, 0, 0]); - } } diff --git a/src/uucore/src/lib/mods/io.rs b/src/uucore/src/lib/mods/io.rs index eccbd10e37a..530be1ecc1b 100644 --- a/src/uucore/src/lib/mods/io.rs +++ b/src/uucore/src/lib/mods/io.rs @@ -134,3 +134,46 @@ impl From for Stdio { value.into_stdio() } } + +/// Read and discard up to `n` bytes from `reader`, using a `buf_size` buffer. +/// +/// Returns the number of bytes actually read; a value less than `n` means the +/// reader hit EOF first. Reads are retried on [`io::ErrorKind::Interrupted`]. +/// This is used to skip over the start of an input that cannot be `seek`ed. +pub fn read_and_discard(reader: &mut R, n: u64, buf_size: usize) -> io::Result { + use io::Read; + let mut buf = Vec::new(); + buf.try_reserve(buf_size.min(n as usize))?; + let mut total = 0u64; + while total < n { + let to_read = (n - total).min(buf_size as u64); + buf.clear(); + match reader.by_ref().take(to_read).read_to_end(&mut buf) { + Ok(0) => break, // EOF + Ok(read) => total += read as u64, + Err(e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + Ok(total) +} + +#[cfg(test)] +mod tests { + use super::read_and_discard; + use std::io::Cursor; + + #[test] + fn discard_within_input() { + let mut r = Cursor::new(b"abcdefgh".to_vec()); + assert_eq!(read_and_discard(&mut r, 3, 4).unwrap(), 3); + assert_eq!(r.position(), 3); + } + + #[test] + fn discard_stops_at_eof() { + let mut r = Cursor::new(b"abc".to_vec()); + // Asking for more than is available returns only what was read. + assert_eq!(read_and_discard(&mut r, 100, 4).unwrap(), 3); + } +} diff --git a/tests/by-util/test_od.rs b/tests/by-util/test_od.rs index bc601318606..b1191052371 100644 --- a/tests/by-util/test_od.rs +++ b/tests/by-util/test_od.rs @@ -934,6 +934,27 @@ fn test_skip_bytes_error() { .failure(); } +// A seekable special file such as /dev/null can be skipped past its (empty) +// end without error, matching GNU od. +#[cfg(unix)] +#[test] +fn test_skip_bytes_past_end_of_seekable_device() { + new_ucmd!() + .arg("-j1") + .arg("/dev/null") + .succeeds() + .stdout_only("0000001\n"); +} + +// Skipping past the end of a regular file is an error and must not print a +// trailing offset line, matching GNU od. +#[test] +fn test_skip_bytes_past_end_no_offset() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write("f", "hello"); + ucmd.arg("-j10").arg("f").fails().no_stdout(); +} + #[test] #[cfg_attr( wasi_runner,