composefs · cgwalters · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -18,15 +18,16 @@ env:
   RUST_BACKTRACE: short
 
 jobs:
-  semver-checks:
-    name: Semver Checks
-    runs-on: ubuntu-24.04
-    steps:
-    - uses: actions/checkout@v6
-    - uses: obi1kenobi/cargo-semver-checks-action@v2
-      with:
-        # Pinned until cargo-semver-checks supports rustdoc format v57 (Rust 1.93+)
-        rust-toolchain: "1.92.0"
+  # Disabled until the crate is published to crates.io for the first time.
+  # cargo-semver-checks needs a baseline version to compare against.
+  # Re-enable after initial `cargo publish`.
+  #
+  # semver-checks:
+  #   name: Semver Checks
+  #   runs-on: ubuntu-24.04
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - uses: obi1kenobi/cargo-semver-checks-action@v2
 
   build-test:
     name: Build+Test
@@ -57,13 +58,56 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
-      - name: Install MSRV toolchain
+      - name: Determine MSRV from Cargo.toml
+        id: msrv
+        run: |
+          msrv=$(sed -n 's/^rust-version *= *"\(.*\)"/\1/p' Cargo.toml)
+          if [ -z "$msrv" ]; then
+            echo "::error::rust-version not found in Cargo.toml"
+            exit 1
+          fi
+          echo "version=$msrv" >> "$GITHUB_OUTPUT"
+      - name: Install MSRV toolchain (${{ steps.msrv.outputs.version }})
         uses: dtolnay/rust-toolchain@master
         with:
-          toolchain: "1.86.0"
+          toolchain: ${{ steps.msrv.outputs.version }}
       - name: Cache Dependencies
         uses: Swatinem/rust-cache@v2
+      # Only build the library itself at MSRV — dev-dependencies may
+      # legitimately require a newer Rust version.
       - name: Build (MSRV)
-        run: cargo build
-      - name: Test (MSRV)
-        run: cargo test
+        run: cargo build --lib
+      - name: Check (no-std, MSRV)
+        run: cargo check --lib --no-default-features
+
+  fuzz:
+    name: Fuzz
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - name: Install nightly toolchain
+        uses: dtolnay/rust-toolchain@nightly
+      - name: Install cargo-fuzz
+        run: cargo install cargo-fuzz --locked
+      - name: Cache Dependencies
+        uses: Swatinem/rust-cache@v2
+        with:
+          workspaces: fuzz
+      - name: Generate seed corpus
+        run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus
+      - name: Fuzz all targets (2 minutes each)
+        run: |
+          mkdir -p fuzz-logs
+          for target in $(cargo +nightly fuzz list); do
+            echo "--- Fuzzing $target (2 min) ---"
+            cargo +nightly fuzz run "$target" -- -max_total_time=120 \
+              > "fuzz-logs/$target.log" 2>&1 \
+              && echo "  $target: OK" \
+              || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; }
+            # Print final stats line
+            tail -1 "fuzz-logs/$target.log"
+          done
+        working-directory: fuzz
diff --git a/.github/workflows/fuzz-extended.yaml b/.github/workflows/fuzz-extended.yaml
@@ -0,0 +1,44 @@
+name: Extended Fuzzing
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [main]
+
+env:
+  CARGO_NET_RETRY: 10
+  CARGO_TERM_COLOR: always
+  RUSTUP_MAX_RETRIES: 10
+  RUST_BACKTRACE: short
+
+jobs:
+  fuzz-extended:
+    name: Fuzz (extended)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+      - name: Install nightly toolchain
+        uses: dtolnay/rust-toolchain@nightly
+      - name: Install cargo-fuzz
+        run: cargo install cargo-fuzz --locked
+      - name: Cache Dependencies
+        uses: Swatinem/rust-cache@v2
+        with:
+          workspaces: fuzz
+      - name: Generate seed corpus
+        run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus
+      - name: Fuzz all targets (15 minutes each)
+        run: |
+          mkdir -p fuzz-logs
+          for target in $(cargo +nightly fuzz list); do
+            echo "--- Fuzzing $target (15 min) ---"
+            cargo +nightly fuzz run "$target" -- -max_total_time=900 \
+              > "fuzz-logs/$target.log" 2>&1 \
+              && echo "  $target: OK" \
+              || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; }
+            tail -1 "fuzz-logs/$target.log"
+          done
+        working-directory: fuzz
diff --git a/Justfile b/Justfile
@@ -27,8 +27,8 @@ interop:
 # Run all tests
 test-all: unit interop
 
-# Full CI check (format, lint, test)
-ci: check unit
+# Full CI check (format, lint, test, fuzz)
+ci: check unit fuzz-all
 
 # Run Kani formal verification proofs (install: cargo install --locked kani-verifier && cargo kani setup)
 kani:
@@ -42,13 +42,32 @@ kani-proof name:
 kani-list:
     cargo kani list
 
-# Run a cargo-fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`)
+# Run a single fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`)
 fuzz target *ARGS:
     cargo +nightly fuzz run {{target}} {{ARGS}}
 
+# Run all fuzz targets for a given duration each (default: 2 minutes).
+# Fuzzer output is redirected to target/fuzz-logs/; on failure the full log is printed.
+fuzz-all seconds="120":
+    #!/usr/bin/env bash
+    set -euo pipefail
+    mkdir -p target/fuzz-logs
+    for target in $(cd fuzz && cargo +nightly fuzz list); do
+        echo "--- Fuzzing $target for {{seconds}}s ---"
+        log="target/fuzz-logs/$target.log"
+        if cargo +nightly fuzz run "$target" -- -max_total_time={{seconds}} > "$log" 2>&1; then
+            echo "  $target: OK"
+            tail -1 "$log"
+        else
+            echo "  $target: FAILED"
+            cat "$log"
+            exit 1
+        fi
+    done
+
 # List available fuzz targets
 fuzz-list:
-    cargo fuzz list
+    cd fuzz && cargo +nightly fuzz list
 
 # Generate seed corpus for the parse fuzz target
 generate-corpus:

diff --git a/examples/interop-tar.rs b/examples/interop-tar.rs
@@ -21,8 +21,9 @@ use arbitrary::{Arbitrary, Unstructured};
 use tempfile::TempDir;
 
 use tar_core::builder::EntryBuilder;
-use tar_core::parse::{Limits, ParseEvent, Parser};
+use tar_core::parse::Limits;
 use tar_core::{EntryType, HEADER_SIZE};
+use tar_core_testutil::{parse_tar_core_with_limits, OwnedEntry};
 
 // =============================================================================
 // Test parameters
@@ -230,57 +231,6 @@ fn build_tar_core_archive(entries: &[EntryParams], format: &str) -> Vec<u8> {
     archive
 }
 
-/// Parse a tar archive using tar-core's sans-IO parser.
-fn parse_tar_core_archive(data: &[u8]) -> Vec<EntryParams> {
-    let mut parser = Parser::new(Limits::default());
-    let mut results = Vec::new();
-    let mut offset = 0;
-
-    loop {
-        let input = &data[offset..];
-        match parser.parse(input).expect("parse should succeed") {
-            ParseEvent::NeedData { .. } => {
-                panic!("unexpected NeedData — archive should be complete in memory");
-            }
-            ParseEvent::Entry { consumed, entry } => {
-                offset += consumed;
-
-                let path = entry.path.to_vec();
-                let size = entry.size as usize;
-                let uname = entry.uname.as_ref().map(|u| u.to_vec()).unwrap_or_default();
-                let gname = entry.gname.as_ref().map(|g| g.to_vec()).unwrap_or_default();
-                let is_dir = entry.entry_type.is_dir();
-
-                // Read content
-                let content = data[offset..offset + size].to_vec();
-                let padded_size = size.next_multiple_of(HEADER_SIZE);
-                offset += padded_size;
-
-                results.push(EntryParams {
-                    path,
-                    mode: entry.mode,
-                    uid: entry.uid as u32,
-                    gid: entry.gid as u32,
-                    mtime: entry.mtime as u32,
-                    username: uname,
-                    groupname: gname,
-                    content,
-                    is_dir,
-                });
-            }
-            ParseEvent::SparseEntry { .. } => {
-                panic!("unexpected SparseEntry in interop test");
-            }
-            ParseEvent::GlobalExtensions { consumed, .. } => {
-                offset += consumed;
-            }
-            ParseEvent::End { .. } => break,
-        }
-    }
-
-    results
-}
-
 // =============================================================================
 // GNU tar interaction helpers
 // =============================================================================
@@ -471,7 +421,7 @@ fn test_gnu_tar_to_tar_core(sh: &Shell, entries: &[EntryParams], format: &str) {
 
     // Parse with tar-core
     let archive_data = std::fs::read(&archive_path).expect("read archive");
-    let parsed = parse_tar_core_archive(&archive_data);
+    let parsed: Vec<OwnedEntry> = parse_tar_core_with_limits(&archive_data, Limits::default());
 
     // Verify entries match.
     // GNU tar may reorder or add parent directories, so we compare by path.
@@ -705,7 +655,7 @@ fn smoke_test_gnu_tar_non_utf8_roundtrip(sh: &Shell) {
     );
 
     let archive_data = std::fs::read(&archive_path).expect("read archive");
-    let parsed = parse_tar_core_archive(&archive_data);
+    let parsed: Vec<OwnedEntry> = parse_tar_core_with_limits(&archive_data, Limits::default());
 
     let found = parsed
         .iter()

diff --git a/src/parse.rs b/src/parse.rs
@@ -815,7 +815,9 @@ impl Parser {
                     });
                 }
 
-                let total_size = HEADER_SIZE as u64 + padded_size;
+                let total_size = (HEADER_SIZE as u64)
+                    .checked_add(padded_size)
+                    .ok_or(ParseError::InvalidSize(size))?;
                 if (input.len() as u64) < total_size {
                     return Ok(ParseEvent::NeedData {
                         min_bytes: total_size as usize,
@@ -898,7 +900,9 @@ impl Parser {
             });
         }
 
-        let total_size = HEADER_SIZE as u64 + padded_size;
+        let total_size = (HEADER_SIZE as u64)
+            .checked_add(padded_size)
+            .ok_or(ParseError::InvalidSize(size))?;
         if (input.len() as u64) < total_size {
             return Ok(ParseEvent::NeedData {
                 min_bytes: total_size as usize,
@@ -1558,6 +1562,19 @@ impl Parser {
         // Clear pending metadata
         self.pending.clear();
 
+        // Normalize: empty optional byte fields are semantically equivalent to
+        // absent.  PAX overrides and GNU long link can set empty values that
+        // would otherwise surface as `Some([])` instead of `None`.
+        if link_target.as_ref().is_some_and(|v| v.is_empty()) {
+            link_target = None;
+        }
+        if uname.as_ref().is_some_and(|v| v.is_empty()) {
+            uname = None;
+        }
+        if gname.as_ref().is_some_and(|v| v.is_empty()) {
+            gname = None;
+        }
+
         // Reject entries with empty paths
         if path.is_empty() && !self.allow_empty_path {
             return Err(ParseError::EmptyPath);

diff --git a/testutil/src/lib.rs b/testutil/src/lib.rs
@@ -225,9 +225,15 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec<OwnedEntry> {
                     if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) {
                         xattrs.push((attr_name.as_bytes().to_vec(), ext.value_bytes().to_vec()));
                     } else if key == "uname" {
-                        uname = Some(ext.value_bytes().to_vec());
+                        let v = ext.value_bytes();
+                        if !v.is_empty() {
+                            uname = Some(v.to_vec());
+                        }
                     } else if key == "gname" {
-                        gname = Some(ext.value_bytes().to_vec());
+                        let v = ext.value_bytes();
+                        if !v.is_empty() {
+                            gname = Some(v.to_vec());
+                        }
                     }
                 }
             }