From dbe781ba3d2de62d4c6066841c193273eddc6230 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 2 Mar 2026 16:47:29 +0000 Subject: [PATCH 1/6] interop-tar: Reuse parse_tar_core_with_limits from testutil Remove the duplicate parse_tar_core_archive function from the interop example and use parse_tar_core_with_limits from tar-core-testutil instead. The two callers (test_gnu_tar_to_tar_core and smoke_test_gnu_tar_non_utf8_roundtrip) now compare against OwnedEntry fields directly. EntryParams is intentionally kept since it serves a different purpose: generating test archives with build_tar_core_archive, where its field types (u32 vs u64, is_dir vs entry_type, non-optional username) better match the test generation API. Assisted-by: OpenCode (Claude claude-opus-4-6) --- examples/interop-tar.rs | 58 +++-------------------------------------- 1 file changed, 4 insertions(+), 54 deletions(-) diff --git a/examples/interop-tar.rs b/examples/interop-tar.rs index 7fdc04e..b64a2da 100644 --- a/examples/interop-tar.rs +++ b/examples/interop-tar.rs @@ -21,8 +21,9 @@ use arbitrary::{Arbitrary, Unstructured}; use tempfile::TempDir; use tar_core::builder::EntryBuilder; -use tar_core::parse::{Limits, ParseEvent, Parser}; +use tar_core::parse::Limits; use tar_core::{EntryType, HEADER_SIZE}; +use tar_core_testutil::{parse_tar_core_with_limits, OwnedEntry}; // ============================================================================= // Test parameters @@ -230,57 +231,6 @@ fn build_tar_core_archive(entries: &[EntryParams], format: &str) -> Vec { archive } -/// Parse a tar archive using tar-core's sans-IO parser. -fn parse_tar_core_archive(data: &[u8]) -> Vec { - let mut parser = Parser::new(Limits::default()); - let mut results = Vec::new(); - let mut offset = 0; - - loop { - let input = &data[offset..]; - match parser.parse(input).expect("parse should succeed") { - ParseEvent::NeedData { .. } => { - panic!("unexpected NeedData — archive should be complete in memory"); - } - ParseEvent::Entry { consumed, entry } => { - offset += consumed; - - let path = entry.path.to_vec(); - let size = entry.size as usize; - let uname = entry.uname.as_ref().map(|u| u.to_vec()).unwrap_or_default(); - let gname = entry.gname.as_ref().map(|g| g.to_vec()).unwrap_or_default(); - let is_dir = entry.entry_type.is_dir(); - - // Read content - let content = data[offset..offset + size].to_vec(); - let padded_size = size.next_multiple_of(HEADER_SIZE); - offset += padded_size; - - results.push(EntryParams { - path, - mode: entry.mode, - uid: entry.uid as u32, - gid: entry.gid as u32, - mtime: entry.mtime as u32, - username: uname, - groupname: gname, - content, - is_dir, - }); - } - ParseEvent::SparseEntry { .. } => { - panic!("unexpected SparseEntry in interop test"); - } - ParseEvent::GlobalExtensions { consumed, .. } => { - offset += consumed; - } - ParseEvent::End { .. } => break, - } - } - - results -} - // ============================================================================= // GNU tar interaction helpers // ============================================================================= @@ -471,7 +421,7 @@ fn test_gnu_tar_to_tar_core(sh: &Shell, entries: &[EntryParams], format: &str) { // Parse with tar-core let archive_data = std::fs::read(&archive_path).expect("read archive"); - let parsed = parse_tar_core_archive(&archive_data); + let parsed: Vec = parse_tar_core_with_limits(&archive_data, Limits::default()); // Verify entries match. // GNU tar may reorder or add parent directories, so we compare by path. @@ -705,7 +655,7 @@ fn smoke_test_gnu_tar_non_utf8_roundtrip(sh: &Shell) { ); let archive_data = std::fs::read(&archive_path).expect("read archive"); - let parsed = parse_tar_core_archive(&archive_data); + let parsed: Vec = parse_tar_core_with_limits(&archive_data, Limits::default()); let found = parsed .iter() From ec53cf070bd9435936a480685c04eef4ff1e7957 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 2 Mar 2026 18:13:47 +0000 Subject: [PATCH 2/6] ci: Fix semver-checks and MSRV jobs Disable semver-checks until the crate is published to crates.io; cargo-semver-checks needs a baseline version to compare against and currently always fails. Fix the MSRV job to read rust-version from Cargo.toml instead of hardcoding "1.86.0". Also restrict the MSRV build to --lib only, since dev-dependencies (like home@0.5.12) may legitimately require a newer Rust version than the library's MSRV. Assisted-by: OpenCode (Claude claude-opus-4-6) --- .github/workflows/ci.yaml | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3c6d8d0..2024d8f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -18,15 +18,16 @@ env: RUST_BACKTRACE: short jobs: - semver-checks: - name: Semver Checks - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v6 - - uses: obi1kenobi/cargo-semver-checks-action@v2 - with: - # Pinned until cargo-semver-checks supports rustdoc format v57 (Rust 1.93+) - rust-toolchain: "1.92.0" + # Disabled until the crate is published to crates.io for the first time. + # cargo-semver-checks needs a baseline version to compare against. + # Re-enable after initial `cargo publish`. + # + # semver-checks: + # name: Semver Checks + # runs-on: ubuntu-24.04 + # steps: + # - uses: actions/checkout@v6 + # - uses: obi1kenobi/cargo-semver-checks-action@v2 build-test: name: Build+Test @@ -57,13 +58,24 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - name: Install MSRV toolchain + - name: Determine MSRV from Cargo.toml + id: msrv + run: | + msrv=$(sed -n 's/^rust-version *= *"\(.*\)"/\1/p' Cargo.toml) + if [ -z "$msrv" ]; then + echo "::error::rust-version not found in Cargo.toml" + exit 1 + fi + echo "version=$msrv" >> "$GITHUB_OUTPUT" + - name: Install MSRV toolchain (${{ steps.msrv.outputs.version }}) uses: dtolnay/rust-toolchain@master with: - toolchain: "1.86.0" + toolchain: ${{ steps.msrv.outputs.version }} - name: Cache Dependencies uses: Swatinem/rust-cache@v2 + # Only build the library itself at MSRV — dev-dependencies may + # legitimately require a newer Rust version. - name: Build (MSRV) - run: cargo build - - name: Test (MSRV) - run: cargo test + run: cargo build --lib + - name: Check (no-std, MSRV) + run: cargo check --lib --no-default-features From d34ad29e91009186953c3f8a2c79efabb259389e Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 2 Mar 2026 18:14:13 +0000 Subject: [PATCH 3/6] ci: Add fuzz testing to CI and Justfile Add a fuzz job to the main CI workflow that runs all cargo-fuzz targets for 2 minutes each on every push/PR. Add a separate extended fuzzing workflow that runs post-merge only with 15 minutes per target. Add a `just fuzz-all` recipe (default 2 min per target) and include it in `just ci`. Also fix `just fuzz-list` to use the correct nightly toolchain. Assisted-by: OpenCode (Claude claude-opus-4-6) --- .github/workflows/ci.yaml | 32 ++++++++++++++++++++ .github/workflows/fuzz-extended.yaml | 44 ++++++++++++++++++++++++++++ Justfile | 27 ++++++++++++++--- 3 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/fuzz-extended.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2024d8f..eeaeaf0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -79,3 +79,35 @@ jobs: run: cargo build --lib - name: Check (no-std, MSRV) run: cargo check --lib --no-default-features + + fuzz: + name: Fuzz + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Install nightly toolchain + uses: dtolnay/rust-toolchain@nightly + - name: Install cargo-fuzz + run: cargo install cargo-fuzz --locked + - name: Cache Dependencies + uses: Swatinem/rust-cache@v2 + with: + workspaces: fuzz + - name: Generate seed corpus + run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus + - name: Fuzz all targets (2 minutes each) + run: | + mkdir -p fuzz-logs + for target in $(cargo +nightly fuzz list); do + echo "--- Fuzzing $target (2 min) ---" + cargo +nightly fuzz run "$target" -- -max_total_time=120 \ + > "fuzz-logs/$target.log" 2>&1 \ + && echo " $target: OK" \ + || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; } + # Print final stats line + tail -1 "fuzz-logs/$target.log" + done + working-directory: fuzz diff --git a/.github/workflows/fuzz-extended.yaml b/.github/workflows/fuzz-extended.yaml new file mode 100644 index 0000000..9a2bb3c --- /dev/null +++ b/.github/workflows/fuzz-extended.yaml @@ -0,0 +1,44 @@ +name: Extended Fuzzing + +permissions: + contents: read + +on: + push: + branches: [main] + +env: + CARGO_NET_RETRY: 10 + CARGO_TERM_COLOR: always + RUSTUP_MAX_RETRIES: 10 + RUST_BACKTRACE: short + +jobs: + fuzz-extended: + name: Fuzz (extended) + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Install nightly toolchain + uses: dtolnay/rust-toolchain@nightly + - name: Install cargo-fuzz + run: cargo install cargo-fuzz --locked + - name: Cache Dependencies + uses: Swatinem/rust-cache@v2 + with: + workspaces: fuzz + - name: Generate seed corpus + run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus + - name: Fuzz all targets (15 minutes each) + run: | + mkdir -p fuzz-logs + for target in $(cargo +nightly fuzz list); do + echo "--- Fuzzing $target (15 min) ---" + cargo +nightly fuzz run "$target" -- -max_total_time=900 \ + > "fuzz-logs/$target.log" 2>&1 \ + && echo " $target: OK" \ + || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; } + tail -1 "fuzz-logs/$target.log" + done + working-directory: fuzz diff --git a/Justfile b/Justfile index 1428116..8b65425 100644 --- a/Justfile +++ b/Justfile @@ -27,8 +27,8 @@ interop: # Run all tests test-all: unit interop -# Full CI check (format, lint, test) -ci: check unit +# Full CI check (format, lint, test, fuzz) +ci: check unit fuzz-all # Run Kani formal verification proofs (install: cargo install --locked kani-verifier && cargo kani setup) kani: @@ -42,13 +42,32 @@ kani-proof name: kani-list: cargo kani list -# Run a cargo-fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`) +# Run a single fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`) fuzz target *ARGS: cargo +nightly fuzz run {{target}} {{ARGS}} +# Run all fuzz targets for a given duration each (default: 2 minutes). +# Fuzzer output is redirected to target/fuzz-logs/; on failure the full log is printed. +fuzz-all seconds="120": + #!/usr/bin/env bash + set -euo pipefail + mkdir -p target/fuzz-logs + for target in $(cd fuzz && cargo +nightly fuzz list); do + echo "--- Fuzzing $target for {{seconds}}s ---" + log="target/fuzz-logs/$target.log" + if cargo +nightly fuzz run "$target" -- -max_total_time={{seconds}} > "$log" 2>&1; then + echo " $target: OK" + tail -1 "$log" + else + echo " $target: FAILED" + cat "$log" + exit 1 + fi + done + # List available fuzz targets fuzz-list: - cargo fuzz list + cd fuzz && cargo +nightly fuzz list # Generate seed corpus for the parse fuzz target generate-corpus: From 6d6c373bc32b9b638ac681f8f9e01857c562d568 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 2 Mar 2026 18:45:24 +0000 Subject: [PATCH 4/6] parse: Normalize empty link_target to None PAX `linkpath` and GNU long link extensions can set an empty value, which surfaces as `link_target: Some([])` instead of `None`. An empty link target is semantically equivalent to no link target, matching the behavior of tar-rs which filters these with `.filter(|b| !b.is_empty())`. The header path already had a guard (`if !header_link.is_empty()`) but the PAX linkpath and GNU long link code paths did not. Rather than adding guards at each source, normalize after all sources have been applied, right before constructing the ParsedEntry. Found by the differential fuzzer (crash-8689eb1a2a408c74f42ba6948f330effb32e2306). Assisted-by: OpenCode (Claude claude-opus-4-6) --- src/parse.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/parse.rs b/src/parse.rs index 56d4eee..b1e9ab3 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1558,6 +1558,13 @@ impl Parser { // Clear pending metadata self.pending.clear(); + // Normalize: empty link target is semantically equivalent to no link + // target. PAX `linkpath` and GNU long link can set an empty value that + // would otherwise surface as `Some([])` instead of `None`. + if link_target.as_ref().is_some_and(|v| v.is_empty()) { + link_target = None; + } + // Reject entries with empty paths if path.is_empty() && !self.allow_empty_path { return Err(ParseError::EmptyPath); From 570f09dedd7ad9fa149e4f6214a9665ee2ca579d Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 2 Mar 2026 18:45:32 +0000 Subject: [PATCH 5/6] parse: Fix integer overflow when computing total_size When using permissive limits (max_pax_size = u64::MAX), the size field can be large enough that `padded_size` (after rounding up to block alignment) overflows when added to HEADER_SIZE. Use checked_add in both the global PAX handler and the extension handler to return InvalidSize instead of panicking. Found by the differential fuzzer (crash-6300171db6afdcdb33b78318640acec5760fec81). Assisted-by: OpenCode (Claude claude-opus-4-6) --- src/parse.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index b1e9ab3..2054ded 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -815,7 +815,9 @@ impl Parser { }); } - let total_size = HEADER_SIZE as u64 + padded_size; + let total_size = (HEADER_SIZE as u64) + .checked_add(padded_size) + .ok_or(ParseError::InvalidSize(size))?; if (input.len() as u64) < total_size { return Ok(ParseEvent::NeedData { min_bytes: total_size as usize, @@ -898,7 +900,9 @@ impl Parser { }); } - let total_size = HEADER_SIZE as u64 + padded_size; + let total_size = (HEADER_SIZE as u64) + .checked_add(padded_size) + .ok_or(ParseError::InvalidSize(size))?; if (input.len() as u64) < total_size { return Ok(ParseEvent::NeedData { min_bytes: total_size as usize, From be4709b198210639824d42df65796cf37efe85f3 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 2 Mar 2026 18:47:30 +0000 Subject: [PATCH 6/6] parse: Normalize empty uname/gname to None Same class of fix as the link_target normalization: PAX overrides can set empty values that surface as Some([]) instead of None. Apply the same normalization for consistency. Assisted-by: OpenCode (Claude claude-opus-4-6) --- src/parse.rs | 10 ++++++++-- testutil/src/lib.rs | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index 2054ded..ff54de1 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1562,12 +1562,18 @@ impl Parser { // Clear pending metadata self.pending.clear(); - // Normalize: empty link target is semantically equivalent to no link - // target. PAX `linkpath` and GNU long link can set an empty value that + // Normalize: empty optional byte fields are semantically equivalent to + // absent. PAX overrides and GNU long link can set empty values that // would otherwise surface as `Some([])` instead of `None`. if link_target.as_ref().is_some_and(|v| v.is_empty()) { link_target = None; } + if uname.as_ref().is_some_and(|v| v.is_empty()) { + uname = None; + } + if gname.as_ref().is_some_and(|v| v.is_empty()) { + gname = None; + } // Reject entries with empty paths if path.is_empty() && !self.allow_empty_path { diff --git a/testutil/src/lib.rs b/testutil/src/lib.rs index a16eb80..69def71 100644 --- a/testutil/src/lib.rs +++ b/testutil/src/lib.rs @@ -225,9 +225,15 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec { if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) { xattrs.push((attr_name.as_bytes().to_vec(), ext.value_bytes().to_vec())); } else if key == "uname" { - uname = Some(ext.value_bytes().to_vec()); + let v = ext.value_bytes(); + if !v.is_empty() { + uname = Some(v.to_vec()); + } } else if key == "gname" { - gname = Some(ext.value_bytes().to_vec()); + let v = ext.value_bytes(); + if !v.is_empty() { + gname = Some(v.to_vec()); + } } } }