Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 4 additions & 32 deletions src/uu/dd/src/dd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,34 +167,6 @@ impl Num {
}
}

/// Read and discard `n` bytes from `reader` using a buffer of size `buf_size`.
///
/// This is more efficient than `io::copy` with `BufReader` because it reads
/// directly in `buf_size`-sized chunks, matching GNU dd's behavior.
/// Returns the total number of bytes actually read.
fn read_and_discard<R: Read>(reader: &mut R, n: u64, buf_size: usize) -> io::Result<u64> {
// todo: consider splice()ing to /dev/null on Linux
let mut buf = Vec::new();
buf.try_reserve(buf_size.min(n as usize))?; // try_with_capacity is unstable <https://github.com/rust-lang/rust/issues/91913>
let mut total = 0u64;
let mut remaining = n;
while remaining > 0 {
let to_read = cmp::min(remaining, buf_size as u64);
buf.clear();
match reader.by_ref().take(to_read).read_to_end(&mut buf) {
Ok(0) => break, // EOF
Ok(bytes_read) => {
total += bytes_read as u64;
remaining -= bytes_read as u64;
}
Err(e) if e.kind() == io::ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
}

Ok(total)
}

/// Data sources.
///
/// Use [`Source::stdin_as_file`] if available to enable more
Expand Down Expand Up @@ -235,7 +207,7 @@ impl Source {
match self {
#[cfg(not(unix))]
Self::Stdin(stdin) => {
let m = read_and_discard(stdin, n, ibs)?;
let m = uucore::io::read_and_discard(stdin, n, ibs)?;
if m < n {
show_error!(
"{}",
Expand Down Expand Up @@ -274,7 +246,7 @@ impl Source {
// ESPIPE means the file descriptor is not seekable (e.g., a pipe),
// so fall back to reading and discarding bytes using ibs-sized buffer
Some(Err(e)) if e.raw_os_error() == Some(libc::ESPIPE) => {
let m = read_and_discard(f, n, ibs)?;
let m = uucore::io::read_and_discard(f, n, ibs)?;
if m < n {
show_error!(
"{}",
Expand All @@ -295,7 +267,7 @@ impl Source {
}
Self::File(f) => f.seek(SeekFrom::Current(n.try_into().unwrap())),
#[cfg(unix)]
Self::Fifo(f) => read_and_discard(f, n, ibs),
Self::Fifo(f) => uucore::io::read_and_discard(f, n, ibs),
}
}

Expand Down Expand Up @@ -667,7 +639,7 @@ impl Dest {
#[cfg(unix)]
Self::Fifo(f) => {
// Seeking in a named pipe means *reading* from the pipe.
read_and_discard(f, n, obs)
uucore::io::read_and_discard(f, n, obs)
}
#[cfg(unix)]
Self::Sink => Ok(0),
Expand Down
107 changes: 102 additions & 5 deletions src/uu/od/src/multifile_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@

use std::fs::File;
use std::io;
#[cfg(unix)]
use std::io::{Seek, SeekFrom};

use uucore::display::Quotable;
use uucore::show_error;
use uucore::translate;

/// Buffer size used when skipping bytes by reading and discarding them.
const SKIP_BUFFER_SIZE: usize = 16 * 1024;

pub enum InputSource<'a> {
FileName(&'a str),
Expand All @@ -17,10 +23,27 @@ pub enum InputSource<'a> {
Stream(Box<dyn io::Read>),
}

/// The file currently being read. A real `File` is kept as a concrete handle so
/// that `skip` can `fstat`/`seek` it; anything else (stdin, an in-memory stream)
/// can only be advanced by reading.
enum CurrentReader {
File(File),
Other(Box<dyn io::Read>),
}

impl io::Read for CurrentReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match self {
Self::File(f) => f.read(buf),
Self::Other(r) => r.read(buf),
}
}
}

// MultifileReader - concatenate all our input, file or stdin.
pub struct MultifileReader<'a> {
ni: Vec<InputSource<'a>>,
curr_file: Option<Box<dyn io::Read>>,
curr_file: Option<CurrentReader>,
any_err: bool,
}

Expand Down Expand Up @@ -57,7 +80,7 @@ impl MultifileReader<'_> {
#[cfg(any(unix, target_os = "wasi"))]
{
let stdin = uucore::io::RawReader(rustix::stdio::stdin());
self.curr_file = Some(Box::new(stdin));
self.curr_file = Some(CurrentReader::Other(Box::new(stdin)));
}

// For non-unix platforms we don't have GNU compatibility requirements, so
Expand All @@ -67,7 +90,7 @@ impl MultifileReader<'_> {
#[cfg(not(any(unix, target_os = "wasi")))]
{
let stdin = io::stdin();
self.curr_file = Some(Box::new(stdin));
self.curr_file = Some(CurrentReader::Other(Box::new(stdin)));
}
break;
}
Expand All @@ -76,7 +99,7 @@ impl MultifileReader<'_> {
Ok(f) => {
// No need to wrap `f` in a BufReader - buffered reading is taken care
// of elsewhere.
self.curr_file = Some(Box::new(f));
self.curr_file = Some(CurrentReader::File(f));
break;
}
Err(e) => {
Expand All @@ -96,12 +119,86 @@ impl MultifileReader<'_> {
}
}
InputSource::Stream(s) => {
self.curr_file = Some(s);
self.curr_file = Some(CurrentReader::Other(s));
break;
}
}
}
}

/// Skip `n_skip` bytes from the start of the combined input.
///
/// A real file is positioned by `seek` whenever that is safe: a regular
/// file large enough that its reported size is trustworthy, or any seekable
/// special file (e.g. `/dev/null`, which can be skipped past its empty end).
/// Everything else - proc/sys files that report a bogus size, pipes, stdin -
/// is advanced by reading and discarding. Skipping past the end of the whole
/// input is an error, matching GNU `od`.
pub fn skip(&mut self, mut n_skip: u64) -> io::Result<()> {
while n_skip > 0 {
let Some(curr) = self.curr_file.as_mut() else {
break;
};
n_skip = skip_in_file(curr, n_skip)?;
if n_skip == 0 {
break;
}
// Current file is exhausted; continue skipping in the next one.
self.next_file();
}

if n_skip > 0 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
translate!("od-error-skip-past-end"),
));
}
Ok(())
}
}

/// Skip up to `n_skip` bytes within a single file. Returns the number of bytes
/// that still need to be skipped (0 if the skip landed inside this file, or
/// the remainder if the file ended first).
fn skip_in_file(curr: &mut CurrentReader, n_skip: u64) -> io::Result<u64> {
#[cfg(unix)]
if let CurrentReader::File(f) = curr {
if let Ok(meta) = f.metadata() {
let size = meta.len();
let blksize = uucore::fs::sane_blksize::sane_blksize_from_metadata(&meta);

// A regular file larger than a block reports a reliable size, so we
// can either drop the whole file or seek within it. Small or
// proc-like files lie about their size and fall through to reading.
if meta.is_file() && blksize < size {
if size < n_skip {
return Ok(n_skip - size);
}
if seek_forward(f, n_skip)? {
return Ok(0);
}
} else if !meta.is_file() {
// Seekable special files (character/block devices) can be
// skipped past their end without error.
if seek_forward(f, n_skip).unwrap_or(false) {
return Ok(0);
}
}
}
}
let read = uucore::io::read_and_discard(curr, n_skip, SKIP_BUFFER_SIZE)?;
Ok(n_skip - read)
}

/// Seek `f` forward by `n` bytes. Returns `Ok(true)` if the seek happened, or
/// `Ok(false)` if `n` is too large to express as a seek offset (the caller
/// should fall back to reading and discarding).
#[cfg(unix)]
fn seek_forward(f: &mut File, n: u64) -> io::Result<bool> {
match i64::try_from(n) {
Ok(off) => f.seek(SeekFrom::Current(off)).map(|_| true),
Err(_) => Ok(false),
}
}

impl io::Read for MultifileReader<'_> {
Expand Down
12 changes: 7 additions & 5 deletions src/uu/od/src/od.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
&od_options.input_strings,
od_options.skip_bytes,
od_options.read_bytes,
);
)?;
let mut input_decoder = InputDecoder::new(
&mut input,
od_options.line_bytes,
Expand Down Expand Up @@ -763,17 +763,19 @@ fn open_input_peek_reader(
input_strings: &[String],
skip_bytes: u64,
read_bytes: Option<u64>,
) -> PeekReader<BufReader<PartialReader<MultifileReader<'_>>>> {
) -> UResult<PeekReader<BufReader<PartialReader<MultifileReader<'_>>>>> {
// should return "impl PeekRead + Read + HasError" when supported in (stable) rust
let inputs = map_input_strings(input_strings);
let mf = MultifileReader::new(inputs);
let pr = PartialReader::new(mf, skip_bytes, read_bytes);
let mut mf = MultifileReader::new(inputs);
mf.skip(skip_bytes)
.map_err(|e| USimpleError::new(1, e.to_string()))?;
let pr = PartialReader::new(mf, read_bytes);
// Add a BufReader over the top of the PartialReader. This will have the
// effect of generating buffered reads to files/stdin, but since these reads
// go through MultifileReader (which limits the maximum number of bytes read)
// we won't ever read more bytes than were specified with the `-N` flag.
let buf_pr = BufReader::new(pr);
PeekReader::new(buf_pr)
Ok(PeekReader::new(buf_pr))
}

impl<R: HasError> HasError for BufReader<R> {
Expand Down
Loading
Loading