From dbe781ba3d2de62d4c6066841c193273eddc6230 Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Mon, 2 Mar 2026 16:47:29 +0000
Subject: [PATCH 1/6] interop-tar: Reuse parse_tar_core_with_limits from
 testutil

Remove the duplicate parse_tar_core_archive function from the interop
example and use parse_tar_core_with_limits from tar-core-testutil
instead. The two callers (test_gnu_tar_to_tar_core and
smoke_test_gnu_tar_non_utf8_roundtrip) now compare against OwnedEntry
fields directly.

EntryParams is intentionally kept since it serves a different purpose:
generating test archives with build_tar_core_archive, where its field
types (u32 vs u64, is_dir vs entry_type, non-optional username) better
match the test generation API.

Assisted-by: OpenCode (Claude claude-opus-4-6)
---
 examples/interop-tar.rs | 58 +++--------------------------------------
 1 file changed, 4 insertions(+), 54 deletions(-)
diff --git a/examples/interop-tar.rs b/examples/interop-tar.rs
index 7fdc04e..b64a2da 100644
--- a/examples/interop-tar.rs
+++ b/examples/interop-tar.rs
@@ -21,8 +21,9 @@ use arbitrary::{Arbitrary, Unstructured};
 use tempfile::TempDir;
 
 use tar_core::builder::EntryBuilder;
-use tar_core::parse::{Limits, ParseEvent, Parser};
+use tar_core::parse::Limits;
 use tar_core::{EntryType, HEADER_SIZE};
+use tar_core_testutil::{parse_tar_core_with_limits, OwnedEntry};
 
 // =============================================================================
 // Test parameters
@@ -230,57 +231,6 @@ fn build_tar_core_archive(entries: &[EntryParams], format: &str) -> Vec<u8> {
     archive
 }
 
-/// Parse a tar archive using tar-core's sans-IO parser.
-fn parse_tar_core_archive(data: &[u8]) -> Vec<EntryParams> {
-    let mut parser = Parser::new(Limits::default());
-    let mut results = Vec::new();
-    let mut offset = 0;
-
-    loop {
-        let input = &data[offset..];
-        match parser.parse(input).expect("parse should succeed") {
-            ParseEvent::NeedData { .. } => {
-                panic!("unexpected NeedData — archive should be complete in memory");
-            }
-            ParseEvent::Entry { consumed, entry } => {
-                offset += consumed;
-
-                let path = entry.path.to_vec();
-                let size = entry.size as usize;
-                let uname = entry.uname.as_ref().map(|u| u.to_vec()).unwrap_or_default();
-                let gname = entry.gname.as_ref().map(|g| g.to_vec()).unwrap_or_default();
-                let is_dir = entry.entry_type.is_dir();
-
-                // Read content
-                let content = data[offset..offset + size].to_vec();
-                let padded_size = size.next_multiple_of(HEADER_SIZE);
-                offset += padded_size;
-
-                results.push(EntryParams {
-                    path,
-                    mode: entry.mode,
-                    uid: entry.uid as u32,
-                    gid: entry.gid as u32,
-                    mtime: entry.mtime as u32,
-                    username: uname,
-                    groupname: gname,
-                    content,
-                    is_dir,
-                });
-            }
-            ParseEvent::SparseEntry { .. } => {
-                panic!("unexpected SparseEntry in interop test");
-            }
-            ParseEvent::GlobalExtensions { consumed, .. } => {
-                offset += consumed;
-            }
-            ParseEvent::End { .. } => break,
-        }
-    }
-
-    results
-}
-
 // =============================================================================
 // GNU tar interaction helpers
 // =============================================================================
@@ -471,7 +421,7 @@ fn test_gnu_tar_to_tar_core(sh: &Shell, entries: &[EntryParams], format: &str) {
 
     // Parse with tar-core
     let archive_data = std::fs::read(&archive_path).expect("read archive");
-    let parsed = parse_tar_core_archive(&archive_data);
+    let parsed: Vec<OwnedEntry> = parse_tar_core_with_limits(&archive_data, Limits::default());
 
     // Verify entries match.
     // GNU tar may reorder or add parent directories, so we compare by path.
@@ -705,7 +655,7 @@ fn smoke_test_gnu_tar_non_utf8_roundtrip(sh: &Shell) {
     );
 
     let archive_data = std::fs::read(&archive_path).expect("read archive");
-    let parsed = parse_tar_core_archive(&archive_data);
+    let parsed: Vec<OwnedEntry> = parse_tar_core_with_limits(&archive_data, Limits::default());
 
     let found = parsed
         .iter()

From ec53cf070bd9435936a480685c04eef4ff1e7957 Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Mon, 2 Mar 2026 18:13:47 +0000
Subject: [PATCH 2/6] ci: Fix semver-checks and MSRV jobs

Disable semver-checks until the crate is published to crates.io;
cargo-semver-checks needs a baseline version to compare against and
currently always fails.

Fix the MSRV job to read rust-version from Cargo.toml instead of
hardcoding "1.86.0". Also restrict the MSRV build to --lib only, since
dev-dependencies (like home@0.5.12) may legitimately require a newer
Rust version than the library's MSRV.

Assisted-by: OpenCode (Claude claude-opus-4-6)
---
 .github/workflows/ci.yaml | 40 +++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 3c6d8d0..2024d8f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -18,15 +18,16 @@ env:
   RUST_BACKTRACE: short
 
 jobs:
-  semver-checks:
-    name: Semver Checks
-    runs-on: ubuntu-24.04
-    steps:
-    - uses: actions/checkout@v6
-    - uses: obi1kenobi/cargo-semver-checks-action@v2
-      with:
-        # Pinned until cargo-semver-checks supports rustdoc format v57 (Rust 1.93+)
-        rust-toolchain: "1.92.0"
+  # Disabled until the crate is published to crates.io for the first time.
+  # cargo-semver-checks needs a baseline version to compare against.
+  # Re-enable after initial `cargo publish`.
+  #
+  # semver-checks:
+  #   name: Semver Checks
+  #   runs-on: ubuntu-24.04
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - uses: obi1kenobi/cargo-semver-checks-action@v2
 
   build-test:
     name: Build+Test
@@ -57,13 +58,24 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
-      - name: Install MSRV toolchain
+      - name: Determine MSRV from Cargo.toml
+        id: msrv
+        run: |
+          msrv=$(sed -n 's/^rust-version *= *"\(.*\)"/\1/p' Cargo.toml)
+          if [ -z "$msrv" ]; then
+            echo "::error::rust-version not found in Cargo.toml"
+            exit 1
+          fi
+          echo "version=$msrv" >> "$GITHUB_OUTPUT"
+      - name: Install MSRV toolchain (${{ steps.msrv.outputs.version }})
         uses: dtolnay/rust-toolchain@master
         with:
-          toolchain: "1.86.0"
+          toolchain: ${{ steps.msrv.outputs.version }}
       - name: Cache Dependencies
         uses: Swatinem/rust-cache@v2
+      # Only build the library itself at MSRV — dev-dependencies may
+      # legitimately require a newer Rust version.
       - name: Build (MSRV)
-        run: cargo build
-      - name: Test (MSRV)
-        run: cargo test
+        run: cargo build --lib
+      - name: Check (no-std, MSRV)
+        run: cargo check --lib --no-default-features

From d34ad29e91009186953c3f8a2c79efabb259389e Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Mon, 2 Mar 2026 18:14:13 +0000
Subject: [PATCH 3/6] ci: Add fuzz testing to CI and Justfile

Add a fuzz job to the main CI workflow that runs all cargo-fuzz targets
for 2 minutes each on every push/PR. Add a separate extended fuzzing
workflow that runs post-merge only with 15 minutes per target.

Add a `just fuzz-all` recipe (default 2 min per target) and include it
in `just ci`. Also fix `just fuzz-list` to use the correct nightly
toolchain.

Assisted-by: OpenCode (Claude claude-opus-4-6)
---
 .github/workflows/ci.yaml            | 32 ++++++++++++++++++++
 .github/workflows/fuzz-extended.yaml | 44 ++++++++++++++++++++++++++++
 Justfile                             | 27 ++++++++++++++---
 3 files changed, 99 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/fuzz-extended.yaml

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 2024d8f..eeaeaf0 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -79,3 +79,35 @@ jobs:
         run: cargo build --lib
       - name: Check (no-std, MSRV)
         run: cargo check --lib --no-default-features
+
+  fuzz:
+    name: Fuzz
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - name: Install nightly toolchain
+        uses: dtolnay/rust-toolchain@nightly
+      - name: Install cargo-fuzz
+        run: cargo install cargo-fuzz --locked
+      - name: Cache Dependencies
+        uses: Swatinem/rust-cache@v2
+        with:
+          workspaces: fuzz
+      - name: Generate seed corpus
+        run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus
+      - name: Fuzz all targets (2 minutes each)
+        run: |
+          mkdir -p fuzz-logs
+          for target in $(cargo +nightly fuzz list); do
+            echo "--- Fuzzing $target (2 min) ---"
+            cargo +nightly fuzz run "$target" -- -max_total_time=120 \
+              > "fuzz-logs/$target.log" 2>&1 \
+              && echo "  $target: OK" \
+              || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; }
+            # Print final stats line
+            tail -1 "fuzz-logs/$target.log"
+          done
+        working-directory: fuzz
diff --git a/.github/workflows/fuzz-extended.yaml b/.github/workflows/fuzz-extended.yaml
new file mode 100644
index 0000000..9a2bb3c
--- /dev/null
+++ b/.github/workflows/fuzz-extended.yaml
@@ -0,0 +1,44 @@
+name: Extended Fuzzing
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [main]
+
+env:
+  CARGO_NET_RETRY: 10
+  CARGO_TERM_COLOR: always
+  RUSTUP_MAX_RETRIES: 10
+  RUST_BACKTRACE: short
+
+jobs:
+  fuzz-extended:
+    name: Fuzz (extended)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+      - name: Install nightly toolchain
+        uses: dtolnay/rust-toolchain@nightly
+      - name: Install cargo-fuzz
+        run: cargo install cargo-fuzz --locked
+      - name: Cache Dependencies
+        uses: Swatinem/rust-cache@v2
+        with:
+          workspaces: fuzz
+      - name: Generate seed corpus
+        run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus
+      - name: Fuzz all targets (15 minutes each)
+        run: |
+          mkdir -p fuzz-logs
+          for target in $(cargo +nightly fuzz list); do
+            echo "--- Fuzzing $target (15 min) ---"
+            cargo +nightly fuzz run "$target" -- -max_total_time=900 \
+              > "fuzz-logs/$target.log" 2>&1 \
+              && echo "  $target: OK" \
+              || { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; }
+            tail -1 "fuzz-logs/$target.log"
+          done
+        working-directory: fuzz
diff --git a/Justfile b/Justfile
index 1428116..8b65425 100644
--- a/Justfile
+++ b/Justfile
@@ -27,8 +27,8 @@ interop:
 # Run all tests
 test-all: unit interop
 
-# Full CI check (format, lint, test)
-ci: check unit
+# Full CI check (format, lint, test, fuzz)
+ci: check unit fuzz-all
 
 # Run Kani formal verification proofs (install: cargo install --locked kani-verifier && cargo kani setup)
 kani:
@@ -42,13 +42,32 @@ kani-proof name:
 kani-list:
     cargo kani list
 
-# Run a cargo-fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`)
+# Run a single fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`)
 fuzz target *ARGS:
     cargo +nightly fuzz run {{target}} {{ARGS}}
 
+# Run all fuzz targets for a given duration each (default: 2 minutes).
+# Fuzzer output is redirected to target/fuzz-logs/; on failure the full log is printed.
+fuzz-all seconds="120":
+    #!/usr/bin/env bash
+    set -euo pipefail
+    mkdir -p target/fuzz-logs
+    for target in $(cd fuzz && cargo +nightly fuzz list); do
+        echo "--- Fuzzing $target for {{seconds}}s ---"
+        log="target/fuzz-logs/$target.log"
+        if cargo +nightly fuzz run "$target" -- -max_total_time={{seconds}} > "$log" 2>&1; then
+            echo "  $target: OK"
+            tail -1 "$log"
+        else
+            echo "  $target: FAILED"
+            cat "$log"
+            exit 1
+        fi
+    done
+
 # List available fuzz targets
 fuzz-list:
-    cargo fuzz list
+    cd fuzz && cargo +nightly fuzz list
 
 # Generate seed corpus for the parse fuzz target
 generate-corpus:

From 6d6c373bc32b9b638ac681f8f9e01857c562d568 Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Mon, 2 Mar 2026 18:45:24 +0000
Subject: [PATCH 4/6] parse: Normalize empty link_target to None

PAX `linkpath` and GNU long link extensions can set an empty value,
which surfaces as `link_target: Some([])` instead of `None`. An empty
link target is semantically equivalent to no link target, matching
the behavior of tar-rs which filters these with `.filter(|b| !b.is_empty())`.

The header path already had a guard (`if !header_link.is_empty()`) but
the PAX linkpath and GNU long link code paths did not. Rather than
adding guards at each source, normalize after all sources have been
applied, right before constructing the ParsedEntry.

Found by the differential fuzzer (crash-8689eb1a2a408c74f42ba6948f330effb32e2306).

Assisted-by: OpenCode (Claude claude-opus-4-6)
---
 src/parse.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/parse.rs b/src/parse.rs
index 56d4eee..b1e9ab3 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -1558,6 +1558,13 @@ impl Parser {
         // Clear pending metadata
         self.pending.clear();
 
+        // Normalize: empty link target is semantically equivalent to no link
+        // target.  PAX `linkpath` and GNU long link can set an empty value that
+        // would otherwise surface as `Some([])` instead of `None`.
+        if link_target.as_ref().is_some_and(|v| v.is_empty()) {
+            link_target = None;
+        }
+
         // Reject entries with empty paths
         if path.is_empty() && !self.allow_empty_path {
             return Err(ParseError::EmptyPath);

From 570f09dedd7ad9fa149e4f6214a9665ee2ca579d Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Mon, 2 Mar 2026 18:45:32 +0000
Subject: [PATCH 5/6] parse: Fix integer overflow when computing total_size

When using permissive limits (max_pax_size = u64::MAX), the size field
can be large enough that `padded_size` (after rounding up to block
alignment) overflows when added to HEADER_SIZE. Use checked_add in
both the global PAX handler and the extension handler to return
InvalidSize instead of panicking.

Found by the differential fuzzer (crash-6300171db6afdcdb33b78318640acec5760fec81).

Assisted-by: OpenCode (Claude claude-opus-4-6)
---
 src/parse.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/parse.rs b/src/parse.rs
index b1e9ab3..2054ded 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -815,7 +815,9 @@ impl Parser {
                     });
                 }
 
-                let total_size = HEADER_SIZE as u64 + padded_size;
+                let total_size = (HEADER_SIZE as u64)
+                    .checked_add(padded_size)
+                    .ok_or(ParseError::InvalidSize(size))?;
                 if (input.len() as u64) < total_size {
                     return Ok(ParseEvent::NeedData {
                         min_bytes: total_size as usize,
@@ -898,7 +900,9 @@ impl Parser {
             });
         }
 
-        let total_size = HEADER_SIZE as u64 + padded_size;
+        let total_size = (HEADER_SIZE as u64)
+            .checked_add(padded_size)
+            .ok_or(ParseError::InvalidSize(size))?;
         if (input.len() as u64) < total_size {
             return Ok(ParseEvent::NeedData {
                 min_bytes: total_size as usize,

From be4709b198210639824d42df65796cf37efe85f3 Mon Sep 17 00:00:00 2001
From: Colin Walters <walters@verbum.org>
Date: Mon, 2 Mar 2026 18:47:30 +0000
Subject: [PATCH 6/6] parse: Normalize empty uname/gname to None

Same class of fix as the link_target normalization: PAX overrides
can set empty values that surface as Some([]) instead of None.
Apply the same normalization for consistency.

Assisted-by: OpenCode (Claude claude-opus-4-6)
---
 src/parse.rs        | 10 ++++++++--
 testutil/src/lib.rs | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/parse.rs b/src/parse.rs
index 2054ded..ff54de1 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -1562,12 +1562,18 @@ impl Parser {
         // Clear pending metadata
         self.pending.clear();
 
-        // Normalize: empty link target is semantically equivalent to no link
-        // target.  PAX `linkpath` and GNU long link can set an empty value that
+        // Normalize: empty optional byte fields are semantically equivalent to
+        // absent.  PAX overrides and GNU long link can set empty values that
         // would otherwise surface as `Some([])` instead of `None`.
         if link_target.as_ref().is_some_and(|v| v.is_empty()) {
             link_target = None;
         }
+        if uname.as_ref().is_some_and(|v| v.is_empty()) {
+            uname = None;
+        }
+        if gname.as_ref().is_some_and(|v| v.is_empty()) {
+            gname = None;
+        }
 
         // Reject entries with empty paths
         if path.is_empty() && !self.allow_empty_path {
diff --git a/testutil/src/lib.rs b/testutil/src/lib.rs
index a16eb80..69def71 100644
--- a/testutil/src/lib.rs
+++ b/testutil/src/lib.rs
@@ -225,9 +225,15 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec<OwnedEntry> {
                     if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) {
                         xattrs.push((attr_name.as_bytes().to_vec(), ext.value_bytes().to_vec()));
                     } else if key == "uname" {
-                        uname = Some(ext.value_bytes().to_vec());
+                        let v = ext.value_bytes();
+                        if !v.is_empty() {
+                            uname = Some(v.to_vec());
+                        }
                     } else if key == "gname" {
-                        gname = Some(ext.value_bytes().to_vec());
+                        let v = ext.value_bytes();
+                        if !v.is_empty() {
+                            gname = Some(v.to_vec());
+                        }
                     }
                 }
             }