Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 58 additions & 14 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ env:
RUST_BACKTRACE: short

jobs:
semver-checks:
name: Semver Checks
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v6
- uses: obi1kenobi/cargo-semver-checks-action@v2
with:
# Pinned until cargo-semver-checks supports rustdoc format v57 (Rust 1.93+)
rust-toolchain: "1.92.0"
# Disabled until the crate is published to crates.io for the first time.
# cargo-semver-checks needs a baseline version to compare against.
# Re-enable after initial `cargo publish`.
#
# semver-checks:
# name: Semver Checks
# runs-on: ubuntu-24.04
# steps:
# - uses: actions/checkout@v6
# - uses: obi1kenobi/cargo-semver-checks-action@v2

build-test:
name: Build+Test
Expand Down Expand Up @@ -57,13 +58,56 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Install MSRV toolchain
- name: Determine MSRV from Cargo.toml
id: msrv
run: |
msrv=$(sed -n 's/^rust-version *= *"\(.*\)"/\1/p' Cargo.toml)
if [ -z "$msrv" ]; then
echo "::error::rust-version not found in Cargo.toml"
exit 1
fi
echo "version=$msrv" >> "$GITHUB_OUTPUT"
- name: Install MSRV toolchain (${{ steps.msrv.outputs.version }})
uses: dtolnay/rust-toolchain@master
with:
toolchain: "1.86.0"
toolchain: ${{ steps.msrv.outputs.version }}
- name: Cache Dependencies
uses: Swatinem/rust-cache@v2
# Only build the library itself at MSRV — dev-dependencies may
# legitimately require a newer Rust version.
- name: Build (MSRV)
run: cargo build
- name: Test (MSRV)
run: cargo test
run: cargo build --lib
- name: Check (no-std, MSRV)
run: cargo check --lib --no-default-features

fuzz:
name: Fuzz
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Install nightly toolchain
uses: dtolnay/rust-toolchain@nightly
- name: Install cargo-fuzz
run: cargo install cargo-fuzz --locked
- name: Cache Dependencies
uses: Swatinem/rust-cache@v2
with:
workspaces: fuzz
- name: Generate seed corpus
run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus
- name: Fuzz all targets (2 minutes each)
run: |
mkdir -p fuzz-logs
for target in $(cargo +nightly fuzz list); do
echo "--- Fuzzing $target (2 min) ---"
cargo +nightly fuzz run "$target" -- -max_total_time=120 \
> "fuzz-logs/$target.log" 2>&1 \
&& echo " $target: OK" \
|| { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; }
# Print final stats line
tail -1 "fuzz-logs/$target.log"
done
working-directory: fuzz
44 changes: 44 additions & 0 deletions .github/workflows/fuzz-extended.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Extended Fuzzing

permissions:
contents: read

on:
push:
branches: [main]

env:
CARGO_NET_RETRY: 10
CARGO_TERM_COLOR: always
RUSTUP_MAX_RETRIES: 10
RUST_BACKTRACE: short

jobs:
fuzz-extended:
name: Fuzz (extended)
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Install nightly toolchain
uses: dtolnay/rust-toolchain@nightly
- name: Install cargo-fuzz
run: cargo install cargo-fuzz --locked
- name: Cache Dependencies
uses: Swatinem/rust-cache@v2
with:
workspaces: fuzz
- name: Generate seed corpus
run: cargo run --manifest-path fuzz/Cargo.toml --bin generate-corpus
- name: Fuzz all targets (15 minutes each)
run: |
mkdir -p fuzz-logs
for target in $(cargo +nightly fuzz list); do
echo "--- Fuzzing $target (15 min) ---"
cargo +nightly fuzz run "$target" -- -max_total_time=900 \
> "fuzz-logs/$target.log" 2>&1 \
&& echo " $target: OK" \
|| { echo "::error::Fuzzer $target failed"; cat "fuzz-logs/$target.log"; exit 1; }
tail -1 "fuzz-logs/$target.log"
done
working-directory: fuzz
27 changes: 23 additions & 4 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ interop:
# Run all tests
test-all: unit interop

# Full CI check (format, lint, test)
ci: check unit
# Full CI check (format, lint, test, fuzz)
ci: check unit fuzz-all

# Run Kani formal verification proofs (install: cargo install --locked kani-verifier && cargo kani setup)
kani:
Expand All @@ -42,13 +42,32 @@ kani-proof name:
kani-list:
cargo kani list

# Run a cargo-fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`)
# Run a single fuzz target (e.g., `just fuzz parse`, `just fuzz roundtrip -- -max_total_time=60`)
fuzz target *ARGS:
cargo +nightly fuzz run {{target}} {{ARGS}}

# Run all fuzz targets for a given duration each (default: 2 minutes).
# Fuzzer output is redirected to target/fuzz-logs/; on failure the full log is printed.
fuzz-all seconds="120":
#!/usr/bin/env bash
set -euo pipefail
mkdir -p target/fuzz-logs
for target in $(cd fuzz && cargo +nightly fuzz list); do
echo "--- Fuzzing $target for {{seconds}}s ---"
log="target/fuzz-logs/$target.log"
if cargo +nightly fuzz run "$target" -- -max_total_time={{seconds}} > "$log" 2>&1; then
echo " $target: OK"
tail -1 "$log"
else
echo " $target: FAILED"
cat "$log"
exit 1
fi
done

# List available fuzz targets
fuzz-list:
cargo fuzz list
cd fuzz && cargo +nightly fuzz list

# Generate seed corpus for the parse fuzz target
generate-corpus:
Expand Down
58 changes: 4 additions & 54 deletions examples/interop-tar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ use arbitrary::{Arbitrary, Unstructured};
use tempfile::TempDir;

use tar_core::builder::EntryBuilder;
use tar_core::parse::{Limits, ParseEvent, Parser};
use tar_core::parse::Limits;
use tar_core::{EntryType, HEADER_SIZE};
use tar_core_testutil::{parse_tar_core_with_limits, OwnedEntry};

// =============================================================================
// Test parameters
Expand Down Expand Up @@ -230,57 +231,6 @@ fn build_tar_core_archive(entries: &[EntryParams], format: &str) -> Vec<u8> {
archive
}

/// Parse a tar archive using tar-core's sans-IO parser.
fn parse_tar_core_archive(data: &[u8]) -> Vec<EntryParams> {
let mut parser = Parser::new(Limits::default());
let mut results = Vec::new();
let mut offset = 0;

loop {
let input = &data[offset..];
match parser.parse(input).expect("parse should succeed") {
ParseEvent::NeedData { .. } => {
panic!("unexpected NeedData — archive should be complete in memory");
}
ParseEvent::Entry { consumed, entry } => {
offset += consumed;

let path = entry.path.to_vec();
let size = entry.size as usize;
let uname = entry.uname.as_ref().map(|u| u.to_vec()).unwrap_or_default();
let gname = entry.gname.as_ref().map(|g| g.to_vec()).unwrap_or_default();
let is_dir = entry.entry_type.is_dir();

// Read content
let content = data[offset..offset + size].to_vec();
let padded_size = size.next_multiple_of(HEADER_SIZE);
offset += padded_size;

results.push(EntryParams {
path,
mode: entry.mode,
uid: entry.uid as u32,
gid: entry.gid as u32,
mtime: entry.mtime as u32,
username: uname,
groupname: gname,
content,
is_dir,
});
}
ParseEvent::SparseEntry { .. } => {
panic!("unexpected SparseEntry in interop test");
}
ParseEvent::GlobalExtensions { consumed, .. } => {
offset += consumed;
}
ParseEvent::End { .. } => break,
}
}

results
}

// =============================================================================
// GNU tar interaction helpers
// =============================================================================
Expand Down Expand Up @@ -471,7 +421,7 @@ fn test_gnu_tar_to_tar_core(sh: &Shell, entries: &[EntryParams], format: &str) {

// Parse with tar-core
let archive_data = std::fs::read(&archive_path).expect("read archive");
let parsed = parse_tar_core_archive(&archive_data);
let parsed: Vec<OwnedEntry> = parse_tar_core_with_limits(&archive_data, Limits::default());

// Verify entries match.
// GNU tar may reorder or add parent directories, so we compare by path.
Expand Down Expand Up @@ -705,7 +655,7 @@ fn smoke_test_gnu_tar_non_utf8_roundtrip(sh: &Shell) {
);

let archive_data = std::fs::read(&archive_path).expect("read archive");
let parsed = parse_tar_core_archive(&archive_data);
let parsed: Vec<OwnedEntry> = parse_tar_core_with_limits(&archive_data, Limits::default());

let found = parsed
.iter()
Expand Down
21 changes: 19 additions & 2 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,9 @@ impl Parser {
});
}

let total_size = HEADER_SIZE as u64 + padded_size;
let total_size = (HEADER_SIZE as u64)
.checked_add(padded_size)
.ok_or(ParseError::InvalidSize(size))?;
if (input.len() as u64) < total_size {
return Ok(ParseEvent::NeedData {
min_bytes: total_size as usize,
Expand Down Expand Up @@ -898,7 +900,9 @@ impl Parser {
});
}

let total_size = HEADER_SIZE as u64 + padded_size;
let total_size = (HEADER_SIZE as u64)
.checked_add(padded_size)
.ok_or(ParseError::InvalidSize(size))?;
if (input.len() as u64) < total_size {
return Ok(ParseEvent::NeedData {
min_bytes: total_size as usize,
Expand Down Expand Up @@ -1558,6 +1562,19 @@ impl Parser {
// Clear pending metadata
self.pending.clear();

// Normalize: empty optional byte fields are semantically equivalent to
// absent. PAX overrides and GNU long link can set empty values that
// would otherwise surface as `Some([])` instead of `None`.
if link_target.as_ref().is_some_and(|v| v.is_empty()) {
link_target = None;
}
if uname.as_ref().is_some_and(|v| v.is_empty()) {
uname = None;
}
if gname.as_ref().is_some_and(|v| v.is_empty()) {
gname = None;
}

// Reject entries with empty paths
if path.is_empty() && !self.allow_empty_path {
return Err(ParseError::EmptyPath);
Expand Down
10 changes: 8 additions & 2 deletions testutil/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,15 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec<OwnedEntry> {
if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) {
xattrs.push((attr_name.as_bytes().to_vec(), ext.value_bytes().to_vec()));
} else if key == "uname" {
uname = Some(ext.value_bytes().to_vec());
let v = ext.value_bytes();
if !v.is_empty() {
uname = Some(v.to_vec());
}
} else if key == "gname" {
gname = Some(ext.value_bytes().to_vec());
let v = ext.value_bytes();
if !v.is_empty() {
gname = Some(v.to_vec());
}
}
}
}
Expand Down
Loading