From 7126439a89926c2719223397c34af47dd9896df6 Mon Sep 17 00:00:00 2001 From: Jared Lunde Date: Thu, 4 Jun 2026 10:24:16 -0700 Subject: [PATCH] make OCI bless deterministic via content-addressed ext4 UUID The OCI bless pipeline was deterministic except for one line: the ext4 filesystem UUID came from rand::random(). That UUID seeds both the superblock uuid field and the directory hash_seed, so every bless of the same image produced byte-different output, which changed the content-addressed pack IDs downstream. Derive the UUID deterministically from the resolved manifest digest (sha256:..., itself content-addressed) by hashing it with blake3 and stamping RFC 4122 version/variant bits. Same image content -> same digest -> same UUID -> byte-identical ext4 image. Tests: - ext4 test_conversion_is_byte_deterministic: convert a representative multi-layer OCI image three times with fixed writer options, assert byte-for-byte identical output. - ext4 test_uuid_controls_output_bytes: prove the UUID actually flows into the image bytes, so reproducibility hinges on pinning it. - bless deterministic_uuid_is_stable_and_content_addressed: guard the derivation and prevent reintroducing rand::random(). Co-Authored-By: Claude Opus 4.8 (1M context) --- ext4/src/tar_convert.rs | 104 +++++++++++++++++++++++++++++++++++++++ glidefs/src/cli/bless.rs | 47 +++++++++++++++++- 2 files changed, 150 insertions(+), 1 deletion(-) diff --git a/ext4/src/tar_convert.rs b/ext4/src/tar_convert.rs index 43c6365..ee38d68 100644 --- a/ext4/src/tar_convert.rs +++ b/ext4/src/tar_convert.rs @@ -561,4 +561,108 @@ mod tests { let result = convert_oci_layers_to_ext4(&mut layers, output, &opts); assert!(result.is_ok()); } + + /// Build a representative multi-layer OCI image: base files, an override, + /// a nested directory, a whiteout, and an opaque whiteout. Returns fresh + /// tar bytes each call so repeated conversions never share mutable state. + fn representative_layers() -> Vec> { + let layer0 = build_tar_with_dirs(&[ + TarEntry::Dir("etc/"), + TarEntry::File("etc/hostname", b"base-host"), + TarEntry::File("etc/passwd", b"root:x:0:0"), + TarEntry::Dir("var/"), + TarEntry::Dir("var/log/"), + TarEntry::File("var/log/old.log", b"stale"), + TarEntry::File("readme.txt", b"base readme"), + ]); + let layer1 = build_tar_with_dirs(&[ + // Override a base file. + TarEntry::File("etc/hostname", b"top-host"), + // Delete a base file. + TarEntry::Whiteout("etc/.wh.passwd"), + // Opaque-whiteout the log dir, then add a new entry. + TarEntry::Dir("var/log/"), + TarEntry::Whiteout("var/log/.wh..wh..opq"), + TarEntry::File("var/log/new.log", b"fresh"), + // Add a brand new nested tree. + TarEntry::Dir("app/"), + TarEntry::File("app/main.bin", b"\x00\x01\x02\x03binary-ish\xff"), + ]); + vec![layer0, layer1] + } + + /// Convert the representative layers with a fixed UUID and return the + /// resulting ext4 image bytes. + fn convert_with_fixed_uuid() -> Vec { + let raw = representative_layers(); + let mut layers: Vec>> = raw.into_iter().map(Cursor::new).collect(); + let opts = ConvertOptions { + convert_backslash: false, + writer_options: vec![ + WriterOption::MaximumDiskSize(64 * 1024 * 1024), + // Pinning the UUID is what makes the output reproducible: it + // seeds the superblock UUID and the directory hash seed. + WriterOption::Uuid([0x42u8; 16]), + WriterOption::Journal(1024), + ], + }; + let output = Cursor::new(Vec::new()); + convert_oci_layers_to_ext4(&mut layers, output, &opts) + .unwrap() + .into_inner() + } + + /// The whole OCI→ext4 conversion must be byte-for-byte deterministic when + /// the writer options (including UUID) are fixed. This is the invariant the + /// `bless` pipeline relies on for content-addressed, reproducible images. + #[test] + fn test_conversion_is_byte_deterministic() { + let a = convert_with_fixed_uuid(); + let b = convert_with_fixed_uuid(); + let c = convert_with_fixed_uuid(); + + assert_eq!(a.len(), b.len(), "image length must be stable"); + assert_eq!(a, b, "two conversions of the same input must be byte-identical"); + assert_eq!(b, c, "conversion must be byte-identical across repeated runs"); + + // Sanity: the image actually contains the merged result, not an empty fs. + assert_eq!(read_file(&a, "/etc/hostname").unwrap(), b"top-host"); + assert!(!path_exists(&a, "/etc/passwd"), "whiteout must delete passwd"); + assert!(!path_exists(&a, "/var/log/old.log"), "opaque whiteout must drop old.log"); + assert_eq!(read_file(&a, "/var/log/new.log").unwrap(), b"fresh"); + assert_eq!(read_file(&a, "/app/main.bin").unwrap(), b"\x00\x01\x02\x03binary-ish\xff"); + } + + /// A different UUID must change the bytes (proving the UUID genuinely flows + /// into the image) while everything else stays fixed — so reproducibility + /// depends solely on pinning the UUID, which `bless` now derives + /// deterministically from the manifest digest. + #[test] + fn test_uuid_controls_output_bytes() { + let with_a = { + let raw = representative_layers(); + let mut layers: Vec>> = raw.into_iter().map(Cursor::new).collect(); + let opts = ConvertOptions { + convert_backslash: false, + writer_options: vec![WriterOption::Uuid([0x11u8; 16])], + }; + convert_oci_layers_to_ext4(&mut layers, Cursor::new(Vec::new()), &opts) + .unwrap() + .into_inner() + }; + let with_b = { + let raw = representative_layers(); + let mut layers: Vec>> = raw.into_iter().map(Cursor::new).collect(); + let opts = ConvertOptions { + convert_backslash: false, + writer_options: vec![WriterOption::Uuid([0x22u8; 16])], + }; + convert_oci_layers_to_ext4(&mut layers, Cursor::new(Vec::new()), &opts) + .unwrap() + .into_inner() + }; + + assert_eq!(with_a.len(), with_b.len(), "only the UUID changed; layout is identical"); + assert_ne!(with_a, with_b, "the UUID must actually flow into the image bytes"); + } } diff --git a/glidefs/src/cli/bless.rs b/glidefs/src/cli/bless.rs index 525960b..1f01420 100644 --- a/glidefs/src/cli/bless.rs +++ b/glidefs/src/cli/bless.rs @@ -198,6 +198,21 @@ pub async fn run_bless( Ok(()) } +/// Derive a deterministic, stable ext4 filesystem UUID from an OCI manifest +/// digest. +/// +/// The manifest digest (`sha256:...`) is content-addressed: the same image +/// content always resolves to the same digest, so hashing it yields the same +/// UUID on every bless. We hash rather than slice the digest directly so the +/// result is uniformly distributed over the 16-byte space, then stamp the +/// RFC 4122 version (8 = custom) and variant bits so it is a well-formed UUID. +fn deterministic_uuid(manifest_digest: &str) -> [u8; 16] { + let mut uuid = blake3_128(manifest_digest.as_bytes()).0; + uuid[6] = (uuid[6] & 0x0f) | 0x80; // version 8 (custom) + uuid[8] = (uuid[8] & 0x3f) | 0x80; // variant 1 (RFC 4122) + uuid +} + /// Bless an OCI image into a content-addressed base image. /// /// Pulls layers from the registry, converts to ext4, writes through @@ -319,7 +334,12 @@ pub async fn run_bless_oci( )); // --- Pull + ingest OCI image --- - let uuid: [u8; 16] = rand::random(); + // Derive the filesystem UUID deterministically from the resolved manifest + // digest so that blessing the same image (same content-addressed manifest) + // produces a byte-for-byte identical ext4 image every time. The UUID feeds + // the superblock and the directory hash seed, so a random UUID would make + // the whole pipeline non-reproducible. + let uuid = deterministic_uuid(&resolved.manifest_digest); let ingest_opts = IngestOptions { writer_options: vec![ WriterOption::MaximumDiskSize(device_size as i64), @@ -546,6 +566,31 @@ mod tests { use object_store::path::Path as ObjectPath; use object_store::ObjectStore; + #[test] + fn deterministic_uuid_is_stable_and_content_addressed() { + let digest = "sha256:deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"; + + // Same digest → same UUID, every time. + assert_eq!(deterministic_uuid(digest), deterministic_uuid(digest)); + + // Different digest → different UUID (no collision on a trivial change). + let other = "sha256:00000000000000000000000000000000000000000000000000000000deadbeef"; + assert_ne!(deterministic_uuid(digest), deterministic_uuid(other)); + + // Well-formed RFC 4122 v8 UUID: version nibble = 8, variant top bits = 10. + let uuid = deterministic_uuid(digest); + assert_eq!(uuid[6] & 0xf0, 0x80, "version must be 8"); + assert_eq!(uuid[8] & 0xc0, 0x80, "variant must be RFC 4122"); + + // No randomness leaked in: the value is a pure function of the digest, + // so it is reproducible across process runs (regression guard against + // reintroducing rand::random()). + assert_eq!( + deterministic_uuid("sha256:abc"), + deterministic_uuid("sha256:abc"), + ); + } + /// Helper: run the bless pipeline directly against an InMemory object store. async fn bless_bytes( content_store: &ContentStore,