diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3c6d8d0..eeaeaf0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -18,15 +18,16 @@ env: RUST_BACKTRACE: short jobs: - semver-checks: - name: Semver Checks - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v6 - - uses: obi1kenobi/cargo-semver-checks-action@v2 - with: - # Pinned until cargo-semver-checks supports rustdoc format v57 (Rust 1.93+) - rust-toolchain: "1.92.0" + # Disabled until the crate is published to crates.io for the first time. + # cargo-semver-checks needs a baseline version to compare against. + # Re-enable after initial `cargo publish`. + # + # semver-checks: + # name: Semver Checks + # runs-on: ubuntu-24.04 + # steps: + # - uses: actions/checkout@v6 + # - uses: obi1kenobi/cargo-semver-checks-action@v2 build-test: name: Build+Test @@ -57,13 +58,56 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - name: Install MSRV toolchain + - name: Determine MSRV from Cargo.toml + id: msrv + run: | + msrv=$(sed -n 's/^rust-version *= *"\(.*\)"/\1/p' Cargo.toml) + if [ -z "$msrv" ]; then + echo "::error::rust-version not found in Cargo.toml" + exit 1 + fi + echo "version=$msrv" >> "$GITHUB_OUTPUT" + - name: Install MSRV toolchain (${{ steps.msrv.outputs.version }}) uses: dtolnay/rust-toolchain@master with: - toolchain: "1.86.0" + toolchain: ${{ steps.msrv.outputs.version }} - name: Cache Dependencies uses: Swatinem/rust-cache@v2 + # Only build the library itself at MSRV — dev-dependencies may + # legitimately require a newer Rust version. - name: Build (MSRV) - run: cargo build - - name: Test (MSRV) - run: cargo test + run: cargo build --lib + - name: Check (no-std, MSRV) + run: cargo check --lib --no-default-features + + fuzz: + name: Fuzz + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Install nightly toolchain + uses: dtolnay/rust-toolchain@nightly + - name: Install cargo-fuzz + run: cargo install cargo-fuzz --locked + - name: Cache Dependencies + uses: Swatinem/rust-cache@v2 + with: + workspaces: fuzz + - name: Generate seed corpus + run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus + - name: Fuzz all targets (2 minutes each) + run: | + mkdir -p fuzz-logs + for target in $(cargo +nightly fuzz list); do + echo "--- Fuzzing $target (2 min) ---" + cargo +nightly fuzz run "$target" -- -max_total_time=120 \ + > "fuzz-logs/$target.log" 2>&1 \ + && echo " $target: OK" \ + || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; } + # Print final stats line + tail -1 "fuzz-logs/$target.log" + done + working-directory: fuzz diff --git a/.github/workflows/fuzz-extended.yaml b/.github/workflows/fuzz-extended.yaml new file mode 100644 index 0000000..9a2bb3c --- /dev/null +++ b/.github/workflows/fuzz-extended.yaml @@ -0,0 +1,44 @@ +name: Extended Fuzzing + +permissions: + contents: read + +on: + push: + branches: [main] + +env: + CARGO_NET_RETRY: 10 + CARGO_TERM_COLOR: always + RUSTUP_MAX_RETRIES: 10 + RUST_BACKTRACE: short + +jobs: + fuzz-extended: + name: Fuzz (extended) + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Install nightly toolchain + uses: dtolnay/rust-toolchain@nightly + - name: Install cargo-fuzz + run: cargo install cargo-fuzz --locked + - name: Cache Dependencies + uses: Swatinem/rust-cache@v2 + with: + workspaces: fuzz + - name: Generate seed corpus + run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus + - name: Fuzz all targets (15 minutes each) + run: | + mkdir -p fuzz-logs + for target in $(cargo +nightly fuzz list); do + echo "--- Fuzzing $target (15 min) ---" + cargo +nightly fuzz run "$target" -- -max_total_time=900 \ + > "fuzz-logs/$target.log" 2>&1 \ + && echo " $target: OK" \ + || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; } + tail -1 "fuzz-logs/$target.log" + done + working-directory: fuzz diff --git a/Justfile b/Justfile index 1428116..8b65425 100644 --- a/Justfile +++ b/Justfile @@ -27,8 +27,8 @@ interop: # Run all tests test-all: unit interop -# Full CI check (format, lint, test) -ci: check unit +# Full CI check (format, lint, test, fuzz) +ci: check unit fuzz-all # Run Kani formal verification proofs (install: cargo install --locked kani-verifier && cargo kani setup) kani: @@ -42,13 +42,32 @@ kani-proof name: kani-list: cargo kani list -# Run a cargo-fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`) +# Run a single fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`) fuzz target *ARGS: cargo +nightly fuzz run {{target}} {{ARGS}} +# Run all fuzz targets for a given duration each (default: 2 minutes). +# Fuzzer output is redirected to target/fuzz-logs/; on failure the full log is printed. +fuzz-all seconds="120": + #!/usr/bin/env bash + set -euo pipefail + mkdir -p target/fuzz-logs + for target in $(cd fuzz && cargo +nightly fuzz list); do + echo "--- Fuzzing $target for {{seconds}}s ---" + log="target/fuzz-logs/$target.log" + if cargo +nightly fuzz run "$target" -- -max_total_time={{seconds}} > "$log" 2>&1; then + echo " $target: OK" + tail -1 "$log" + else + echo " $target: FAILED" + cat "$log" + exit 1 + fi + done + # List available fuzz targets fuzz-list: - cargo fuzz list + cd fuzz && cargo +nightly fuzz list # Generate seed corpus for the parse fuzz target generate-corpus: diff --git a/examples/interop-tar.rs b/examples/interop-tar.rs index 7fdc04e..b64a2da 100644 --- a/examples/interop-tar.rs +++ b/examples/interop-tar.rs @@ -21,8 +21,9 @@ use arbitrary::{Arbitrary, Unstructured}; use tempfile::TempDir; use tar_core::builder::EntryBuilder; -use tar_core::parse::{Limits, ParseEvent, Parser}; +use tar_core::parse::Limits; use tar_core::{EntryType, HEADER_SIZE}; +use tar_core_testutil::{parse_tar_core_with_limits, OwnedEntry}; // ============================================================================= // Test parameters @@ -230,57 +231,6 @@ fn build_tar_core_archive(entries: &[EntryParams], format: &str) -> Vec { archive } -/// Parse a tar archive using tar-core's sans-IO parser. -fn parse_tar_core_archive(data: &[u8]) -> Vec { - let mut parser = Parser::new(Limits::default()); - let mut results = Vec::new(); - let mut offset = 0; - - loop { - let input = &data[offset..]; - match parser.parse(input).expect("parse should succeed") { - ParseEvent::NeedData { .. } => { - panic!("unexpected NeedData — archive should be complete in memory"); - } - ParseEvent::Entry { consumed, entry } => { - offset += consumed; - - let path = entry.path.to_vec(); - let size = entry.size as usize; - let uname = entry.uname.as_ref().map(|u| u.to_vec()).unwrap_or_default(); - let gname = entry.gname.as_ref().map(|g| g.to_vec()).unwrap_or_default(); - let is_dir = entry.entry_type.is_dir(); - - // Read content - let content = data[offset..offset + size].to_vec(); - let padded_size = size.next_multiple_of(HEADER_SIZE); - offset += padded_size; - - results.push(EntryParams { - path, - mode: entry.mode, - uid: entry.uid as u32, - gid: entry.gid as u32, - mtime: entry.mtime as u32, - username: uname, - groupname: gname, - content, - is_dir, - }); - } - ParseEvent::SparseEntry { .. } => { - panic!("unexpected SparseEntry in interop test"); - } - ParseEvent::GlobalExtensions { consumed, .. } => { - offset += consumed; - } - ParseEvent::End { .. } => break, - } - } - - results -} - // ============================================================================= // GNU tar interaction helpers // ============================================================================= @@ -471,7 +421,7 @@ fn test_gnu_tar_to_tar_core(sh: &Shell, entries: &[EntryParams], format: &str) { // Parse with tar-core let archive_data = std::fs::read(&archive_path).expect("read archive"); - let parsed = parse_tar_core_archive(&archive_data); + let parsed: Vec = parse_tar_core_with_limits(&archive_data, Limits::default()); // Verify entries match. // GNU tar may reorder or add parent directories, so we compare by path. @@ -705,7 +655,7 @@ fn smoke_test_gnu_tar_non_utf8_roundtrip(sh: &Shell) { ); let archive_data = std::fs::read(&archive_path).expect("read archive"); - let parsed = parse_tar_core_archive(&archive_data); + let parsed: Vec = parse_tar_core_with_limits(&archive_data, Limits::default()); let found = parsed .iter() diff --git a/src/parse.rs b/src/parse.rs index 56d4eee..ff54de1 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -815,7 +815,9 @@ impl Parser { }); } - let total_size = HEADER_SIZE as u64 + padded_size; + let total_size = (HEADER_SIZE as u64) + .checked_add(padded_size) + .ok_or(ParseError::InvalidSize(size))?; if (input.len() as u64) < total_size { return Ok(ParseEvent::NeedData { min_bytes: total_size as usize, @@ -898,7 +900,9 @@ impl Parser { }); } - let total_size = HEADER_SIZE as u64 + padded_size; + let total_size = (HEADER_SIZE as u64) + .checked_add(padded_size) + .ok_or(ParseError::InvalidSize(size))?; if (input.len() as u64) < total_size { return Ok(ParseEvent::NeedData { min_bytes: total_size as usize, @@ -1558,6 +1562,19 @@ impl Parser { // Clear pending metadata self.pending.clear(); + // Normalize: empty optional byte fields are semantically equivalent to + // absent. PAX overrides and GNU long link can set empty values that + // would otherwise surface as `Some([])` instead of `None`. + if link_target.as_ref().is_some_and(|v| v.is_empty()) { + link_target = None; + } + if uname.as_ref().is_some_and(|v| v.is_empty()) { + uname = None; + } + if gname.as_ref().is_some_and(|v| v.is_empty()) { + gname = None; + } + // Reject entries with empty paths if path.is_empty() && !self.allow_empty_path { return Err(ParseError::EmptyPath); diff --git a/testutil/src/lib.rs b/testutil/src/lib.rs index a16eb80..69def71 100644 --- a/testutil/src/lib.rs +++ b/testutil/src/lib.rs @@ -225,9 +225,15 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec { if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) { xattrs.push((attr_name.as_bytes().to_vec(), ext.value_bytes().to_vec())); } else if key == "uname" { - uname = Some(ext.value_bytes().to_vec()); + let v = ext.value_bytes(); + if !v.is_empty() { + uname = Some(v.to_vec()); + } } else if key == "gname" { - gname = Some(ext.value_bytes().to_vec()); + let v = ext.value_bytes(); + if !v.is_empty() { + gname = Some(v.to_vec()); + } } } }