diff --git a/Cargo.lock b/Cargo.lock index 3cde378..9cc259d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,16 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "ahash" version = "0.8.12" @@ -1975,6 +1985,30 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.44" @@ -2001,6 +2035,17 @@ dependencies = [ "unsigned-varint 0.8.0", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.5.60" @@ -2307,9 +2352,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] +[[package]] +name = "crypto_box" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16182b4f39a82ec8a6851155cc4c0cda3065bb1db33651726a29e1951de0f009" +dependencies = [ + "aead", + "chacha20", + "crypto_secretbox", + "curve25519-dalek", + "salsa20", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto_secretbox" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d6cf87adf719ddf43a805e92c6870a531aedda35ff640442cbaf8674e141e1" +dependencies = [ + "aead", + "chacha20", + "cipher", + "generic-array", + "poly1305", + "salsa20", + "subtle", + "zeroize", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -3255,8 +3332,11 @@ version = "0.3.9" dependencies = [ "anyhow", "base64", + "chacha20poly1305", "chrono", "cid", + "crypto_box", + "curve25519-dalek", "ed25519-dalek", "hex", "multibase", @@ -3290,6 +3370,7 @@ dependencies = [ "cid", "clap", "dirs-next", + "ed25519-dalek", "futures", "gitlawb-core", "hex", @@ -3919,6 +4000,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "ipconfig" version = "0.3.4" @@ -4811,6 +4901,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "openssl-probe" version = "0.2.1" @@ -5060,6 +5156,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -5787,6 +5894,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6518fc26bced4d53678a22d6e423e9d8716377def84545fe328236e3af070e7f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "schannel" version = "0.1.29" @@ -7035,6 +7151,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsigned-varint" version = "0.7.2" diff --git a/crates/gitlawb-core/Cargo.toml b/crates/gitlawb-core/Cargo.toml index 486a5aa..4468d0c 100644 --- a/crates/gitlawb-core/Cargo.toml +++ b/crates/gitlawb-core/Cargo.toml @@ -23,6 +23,9 @@ chrono = { workspace = true } uuid = { workspace = true } zeroize = { version = "1", features = ["derive"] } pkcs8 = { version = "0.10", features = ["pem", "std"] } +curve25519-dalek = "4" +crypto_box = { version = "0.9", features = ["std", "chacha20"] } +chacha20poly1305 = "0.10" [dev-dependencies] tokio = { workspace = true } diff --git a/crates/gitlawb-core/src/encrypt.rs b/crates/gitlawb-core/src/encrypt.rs new file mode 100644 index 0000000..0336270 --- /dev/null +++ b/crates/gitlawb-core/src/encrypt.rs @@ -0,0 +1,291 @@ +//! Envelope encryption for withheld blobs (Option B). A random content key +//! encrypts the blob (XChaCha20-Poly1305); the content key is wrapped to each +//! recipient via an X25519 box keyed from their Ed25519 `did:key`. The node +//! seals with public keys only; readers open with their own private key. + +use crate::identity::Keypair; +use anyhow::{Context, Result}; +use ed25519_dalek::VerifyingKey; + +/// X25519 public key (Montgomery u) for an Ed25519 verifying key. +fn x25519_public(vk: &VerifyingKey) -> Result<[u8; 32]> { + use curve25519_dalek::edwards::CompressedEdwardsY; + let edwards = CompressedEdwardsY::from_slice(vk.as_bytes()) + .ok() + .and_then(|c| c.decompress()) + .context("verifying key is not a valid edwards point")?; + Ok(edwards.to_montgomery().to_bytes()) +} + +/// X25519 secret scalar for an Ed25519 seed (SHA-512 of seed, lower 32, clamped). +fn x25519_secret_from_seed(seed: &[u8; 32]) -> [u8; 32] { + use sha2::{Digest, Sha512}; + let h = Sha512::digest(seed); + let mut s = [0u8; 32]; + s.copy_from_slice(&h[..32]); + s[0] &= 248; + s[31] &= 127; + s[31] |= 64; + s +} + +use base64::{engine::general_purpose::STANDARD as B64, Engine}; +use chacha20poly1305::{ + aead::{Aead, KeyInit}, + XChaCha20Poly1305, XNonce, +}; +use crypto_box::{ + aead::{AeadCore, OsRng}, + ChaChaBox, PublicKey as XPublic, SecretKey as XSecret, +}; +use rand::RngCore; +use serde::{Deserialize, Serialize}; + +const MAGIC: &[u8] = b"GLENC"; +const VERSION: u8 = 2; + +#[derive(Serialize, Deserialize)] +struct Recipient { + eph: String, // base64 ephemeral x25519 pubkey (32B) + nonce: String, // base64 box nonce (24B) + wrap: String, // base64 wrapped content key +} + +#[derive(Serialize, Deserialize)] +struct Header { + alg: String, + nonce: String, // base64 body nonce (24B) + recipients: Vec, +} + +/// Encrypt `plaintext` so any of `recipients` (Ed25519 keys) can decrypt. +pub fn seal_blob(plaintext: &[u8], recipients: &[VerifyingKey]) -> Result> { + if recipients.is_empty() { + return Err(anyhow::anyhow!("seal_blob: no recipients")); + } + let mut content_key = [0u8; 32]; + OsRng.fill_bytes(&mut content_key); + let body_cipher = XChaCha20Poly1305::new_from_slice(&content_key) + .map_err(|e| anyhow::anyhow!("content key: {e}"))?; + let mut body_nonce = [0u8; 24]; + OsRng.fill_bytes(&mut body_nonce); + let body = body_cipher + .encrypt(XNonce::from_slice(&body_nonce), plaintext) + .map_err(|e| anyhow::anyhow!("body encrypt: {e}"))?; + + let mut wrapped = Vec::with_capacity(recipients.len()); + for vk in recipients { + let recip_x = XPublic::from(x25519_public(vk)?); + let eph = XSecret::generate(&mut OsRng); + let abox = ChaChaBox::new(&recip_x, &eph); + let nonce = ChaChaBox::generate_nonce(&mut OsRng); + let ct = abox + .encrypt(&nonce, &content_key[..]) + .map_err(|e| anyhow::anyhow!("wrap: {e}"))?; + wrapped.push(Recipient { + eph: B64.encode(eph.public_key().as_bytes()), + nonce: B64.encode(nonce), + wrap: B64.encode(ct), + }); + } + + let header = Header { + alg: "xchacha20poly1305".into(), + nonce: B64.encode(body_nonce), + recipients: wrapped, + }; + let header_json = serde_json::to_vec(&header).context("encode header")?; + + let mut out = Vec::new(); + out.extend_from_slice(MAGIC); + out.push(VERSION); + out.extend_from_slice(&(header_json.len() as u32).to_le_bytes()); + out.extend_from_slice(&header_json); + out.extend_from_slice(&body); + Ok(out) +} + +/// Decrypt an envelope with `keypair`. Errors if not a recipient or on auth fail. +pub fn open_blob(envelope: &[u8], keypair: &Keypair) -> Result> { + let mut p = 0; + if envelope.len() < MAGIC.len() + 1 + 4 || &envelope[..MAGIC.len()] != MAGIC { + return Err(anyhow::anyhow!("bad envelope magic")); + } + p += MAGIC.len(); + if envelope[p] != VERSION { + return Err(anyhow::anyhow!("unsupported envelope version")); + } + p += 1; + let hlen = u32::from_le_bytes(envelope[p..p + 4].try_into().unwrap()) as usize; + p += 4; + let header: Header = + serde_json::from_slice(envelope.get(p..p + hlen).context("truncated header")?) + .context("decode header")?; + let body = &envelope[p + hlen..]; + + let my_x = XSecret::from(x25519_secret_from_seed(&keypair.seed_bytes())); + + // Identities are blinded: no entry says which recipient it belongs to, so + // try each one. The ChaChaBox AEAD tag authenticates, so exactly the + // reader's own entry unwraps; every other entry fails cleanly. + let mut content_key: Option> = None; + for entry in &header.recipients { + let eph = match B64 + .decode(&entry.eph) + .ok() + .and_then(|b| <[u8; 32]>::try_from(b.as_slice()).ok()) + { + Some(b) => XPublic::from(b), + None => continue, + }; + // from_slice panics on a wrong length, and the envelope is attacker + // controlled, so validate the 24-byte box nonce before using it. + let nonce = match B64 + .decode(&entry.nonce) + .ok() + .and_then(|n| <[u8; 24]>::try_from(n.as_slice()).ok()) + { + Some(n) => n, + None => continue, + }; + let wrap = match B64.decode(&entry.wrap) { + Ok(w) => w, + Err(_) => continue, + }; + let abox = ChaChaBox::new(&eph, &my_x); + if let Ok(ck) = abox.decrypt( + crypto_box::aead::generic_array::GenericArray::from_slice(&nonce), + wrap.as_slice(), + ) { + content_key = Some(ck); + break; + } + } + let content_key = content_key.context("not a recipient of this envelope")?; + + let body_cipher = XChaCha20Poly1305::new_from_slice(&content_key) + .map_err(|e| anyhow::anyhow!("content key: {e}"))?; + let body_nonce = B64 + .decode(&header.nonce) + .ok() + .and_then(|n| <[u8; 24]>::try_from(n.as_slice()).ok()) + .context("invalid body nonce")?; + body_cipher + .decrypt(XNonce::from_slice(&body_nonce), body) + .map_err(|_| anyhow::anyhow!("body decrypt failed")) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::identity::Keypair; + + #[test] + fn ed25519_to_x25519_keypair_agrees() { + // The X25519 public derived from the Ed25519 public must equal the + // X25519 public of the X25519 secret derived from the same seed. + let kp = Keypair::generate(); + let seed = kp.seed_bytes(); + let xpub_from_public = x25519_public(&kp.verifying_key()).unwrap(); + let xsec = x25519_secret_from_seed(&seed); + let xpub_from_secret = crypto_box::SecretKey::from(xsec).public_key().to_bytes(); + assert_eq!(xpub_from_public, xpub_from_secret); + } + + #[test] + fn seal_open_round_trip_for_recipients() { + let owner = Keypair::generate(); + let reader_a = Keypair::generate(); + let reader_b = Keypair::generate(); + let msg = b"private blob contents"; + + let env = seal_blob(msg, &[owner.verifying_key(), reader_a.verifying_key()]).unwrap(); + + assert_eq!(open_blob(&env, &owner).unwrap(), msg); + assert_eq!(open_blob(&env, &reader_a).unwrap(), msg); + assert!( + open_blob(&env, &reader_b).is_err(), + "non-recipient must fail" + ); + } + + #[test] + fn tampered_envelope_fails() { + let owner = Keypair::generate(); + let mut env = seal_blob(b"hi", &[owner.verifying_key()]).unwrap(); + let last = env.len() - 1; + env[last] ^= 0x01; + assert!(open_blob(&env, &owner).is_err()); + } + + #[test] + fn v2_header_contains_no_recipient_pubkey() { + // The blinded envelope header must not carry any recipient's public key. + let reader = Keypair::generate(); + let env = seal_blob(b"private blob contents", &[reader.verifying_key()]).unwrap(); + + // Slice out the header bytes using the envelope framing: + // MAGIC | version(1B) | header_len(4B LE) | header_json | body + let mut p = MAGIC.len() + 1; // skip MAGIC + version byte + let hlen = u32::from_le_bytes(env[p..p + 4].try_into().unwrap()) as usize; + p += 4; + let header = &env[p..p + hlen]; + let header_str = String::from_utf8_lossy(header); + + let pubkey_b64 = B64.encode(reader.verifying_key().as_bytes()); + assert!( + !header_str.contains(&pubkey_b64), + "recipient public key must not appear in the blinded header" + ); + } + + #[test] + fn v1_envelope_is_rejected() { + let reader = Keypair::generate(); + let mut env = seal_blob(b"hi", &[reader.verifying_key()]).unwrap(); + // Flip the version byte (immediately after MAGIC) from 2 to 1. + env[MAGIC.len()] = 1; + let err = open_blob(&env, &reader).unwrap_err(); + assert!( + err.to_string().contains("unsupported envelope version"), + "expected version-rejection error, got: {err}" + ); + } + + #[test] + fn malformed_nonce_returns_err_not_panic() { + // from_slice panics on wrong-length input; a crafted envelope on the + // public recovery path must surface an error, never panic. + let reader = Keypair::generate(); + let env = seal_blob(b"private blob contents", &[reader.verifying_key()]).unwrap(); + + // Split the envelope framing into header JSON and body. + let mut p = MAGIC.len() + 1; + let hlen = u32::from_le_bytes(env[p..p + 4].try_into().unwrap()) as usize; + p += 4; + let header_bytes = &env[p..p + hlen]; + let body = &env[p + hlen..]; + + let reframe = |header: &serde_json::Value| -> Vec { + let hj = serde_json::to_vec(header).unwrap(); + let mut out = Vec::new(); + out.extend_from_slice(MAGIC); + out.push(VERSION); + out.extend_from_slice(&(hj.len() as u32).to_le_bytes()); + out.extend_from_slice(&hj); + out.extend_from_slice(body); + out + }; + let bad_nonce = serde_json::Value::String(B64.encode([0u8; 12])); + + // Corrupted per-recipient nonce: entry is skipped, no match. + let mut header: serde_json::Value = serde_json::from_slice(header_bytes).unwrap(); + header["recipients"][0]["nonce"] = bad_nonce.clone(); + assert!(open_blob(&reframe(&header), &reader).is_err()); + + // Corrupted body nonce: unwrap succeeds, body nonce is rejected. + let mut header: serde_json::Value = serde_json::from_slice(header_bytes).unwrap(); + header["nonce"] = bad_nonce; + assert!(open_blob(&reframe(&header), &reader).is_err()); + } +} diff --git a/crates/gitlawb-core/src/identity.rs b/crates/gitlawb-core/src/identity.rs index 96d50b9..9d3fea1 100644 --- a/crates/gitlawb-core/src/identity.rs +++ b/crates/gitlawb-core/src/identity.rs @@ -52,6 +52,12 @@ impl Keypair { URL_SAFE_NO_PAD.encode(sig.to_bytes()) } + /// The raw 32-byte Ed25519 seed. Used to derive the X25519 secret for + /// envelope decryption (see `crate::encrypt`). + pub fn seed_bytes(&self) -> [u8; 32] { + self.signing_key.to_bytes() + } + /// Export the signing key as raw 32-byte seed (wrapped in Zeroizing). pub fn to_seed(&self) -> Zeroizing<[u8; 32]> { Zeroizing::new(self.signing_key.to_bytes()) diff --git a/crates/gitlawb-core/src/lib.rs b/crates/gitlawb-core/src/lib.rs index a608be1..a9e91f6 100644 --- a/crates/gitlawb-core/src/lib.rs +++ b/crates/gitlawb-core/src/lib.rs @@ -1,6 +1,7 @@ pub mod cert; pub mod cid; pub mod did; +pub mod encrypt; pub mod error; pub mod http_sig; pub mod identity; diff --git a/crates/gitlawb-node/Cargo.toml b/crates/gitlawb-node/Cargo.toml index 5f10ec9..a210aa0 100644 --- a/crates/gitlawb-node/Cargo.toml +++ b/crates/gitlawb-node/Cargo.toml @@ -11,6 +11,7 @@ path = "src/main.rs" [dependencies] gitlawb-core = { path = "../gitlawb-core" } +ed25519-dalek = { workspace = true } tokio = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs new file mode 100644 index 0000000..20827fb --- /dev/null +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -0,0 +1,101 @@ +//! Authenticated discovery + fetch for encrypted withheld blobs (Option B1). + +use axum::extract::{Extension, Path, State}; +use axum::Json; + +use crate::auth::AuthenticatedDid; +use crate::error::{AppError, Result}; +use crate::state::AppState; + +/// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs +/// Returns [{oid, cid}] for encrypted blobs the caller may decrypt. +pub async fn list_encrypted_blobs( + State(state): State, + auth: Option>, + Path((owner, repo)): Path<(String, String)>, +) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let rows = state + .db + .list_encrypted_blobs_for(&record.id, caller) + .await?; + let blobs: Vec<_> = rows + .into_iter() + .map(|(oid, cid)| serde_json::json!({ "oid": oid, "cid": cid })) + .collect(); + Ok(Json(serde_json::json!({ "blobs": blobs }))) +} + +/// GET /api/v1/repos/{owner}/{repo}/encrypted-blob/{oid} +/// Returns raw envelope bytes if the caller is a recipient. +pub async fn get_encrypted_blob( + State(state): State, + auth: Option>, + Path((owner, repo, oid)): Path<(String, String, String)>, +) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let cid = state + .db + .encrypted_blob_cid(&record.id, &oid, caller) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}/{oid}")))?; + let bytes = crate::ipfs_pin::cat(&state.config.ipfs_api, &cid) + .await + .map_err(|e| AppError::Git(e.to_string()))?; + Ok(bytes) +} + +/// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate +/// Returns [{oid, cid}] for every encrypted blob in the repo, for peer-mirror +/// replication (Option B2). Recipient identities are deliberately withheld: the +/// v2 envelopes no longer carry recipient public keys, so peers must not learn +/// the reader set either. A mirror detects a re-seal by the CID changing (the +/// OID is stable across re-seals). Ciphertext metadata only, never plaintext. +pub async fn replicate_encrypted_blobs( + State(state): State, + Path((owner, repo)): Path<(String, String)>, +) -> Result> { + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let rows = state.db.list_all_encrypted_blobs(&record.id).await?; + let blobs: Vec<_> = rows + .into_iter() + .map(|(oid, cid, _recipients)| replicate_blob_json(oid, cid)) + .collect(); + Ok(Json(serde_json::json!({ "blobs": blobs }))) +} + +/// Serialize one blob for the replication wire. Recipient identities are +/// intentionally absent so a mirror never learns the reader set. +fn replicate_blob_json(oid: String, cid: String) -> serde_json::Value { + serde_json::json!({ "oid": oid, "cid": cid }) +} + +#[cfg(test)] +mod tests { + use super::replicate_blob_json; + + #[test] + fn replicate_blob_json_omits_recipients() { + let v = replicate_blob_json("oid1".into(), "cidA".into()); + assert_eq!(v["oid"], "oid1"); + assert_eq!(v["cid"], "cidA"); + assert!( + v.get("recipients").is_none(), + "replication wire must not carry recipient identities" + ); + } +} diff --git a/crates/gitlawb-node/src/api/mod.rs b/crates/gitlawb-node/src/api/mod.rs index 2595c48..7f01365 100644 --- a/crates/gitlawb-node/src/api/mod.rs +++ b/crates/gitlawb-node/src/api/mod.rs @@ -3,6 +3,7 @@ pub mod arweave; pub mod bounties; pub mod certs; pub mod changelog; +pub mod encrypted; pub mod events; pub mod ipfs; pub mod issues; diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 2886926..6fa028d 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -686,6 +686,14 @@ pub async fn git_receive_pack( let ipfs_api = state.config.ipfs_api.clone(); let repo_path_clone = disk_path.clone(); let db_clone = state.db.clone(); + let rules_for_enc = rules_opt.clone(); + let repo_id = record.id.clone(); + let owner_did = record.owner_did.clone(); + let is_public = record.is_public; + let irys_url = state.config.irys_url.clone(); + let http_client = std::sync::Arc::clone(&state.http_client); + let node_did_str = state.node_did.to_string(); + let repo_name = record.name.clone(); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( &ipfs_api, @@ -700,6 +708,64 @@ pub async fn git_receive_pack( tracing::info!(sha = %sha, %cid, "pinned"); } } + + // Option B1: encrypt-then-pin the withheld blobs so authorized + // readers can recover them when the origin cannot serve them. + if let Some(rules) = rules_for_enc.filter(|r| !r.is_empty()) { + let p = repo_path_clone.clone(); + let owner = owner_did.clone(); + let recip = tokio::task::spawn_blocking(move || { + crate::git::visibility_pack::withheld_blob_recipients( + &p, &rules, is_public, &owner, + ) + }) + .await; + if let Ok(Ok(recipients)) = recip { + let delta = crate::encrypted_pin::encrypt_and_pin( + &ipfs_api, + &repo_path_clone, + &db_clone, + &repo_id, + &recipients, + ) + .await; + + // Option B3: anchor a per-push manifest of the blobs sealed + // this push to Arweave, so the oid->cid index survives total + // node loss. Best-effort; never fails the push. + if !delta.is_empty() && !irys_url.is_empty() { + let owner_short = owner_did.split(':').next_back().unwrap_or(&owner_did); + let repo_slug = format!("{owner_short}/{repo_name}"); + let ts = chrono::Utc::now().to_rfc3339(); + let manifest = crate::arweave::EncryptedManifest { + repo: &repo_slug, + owner_did: &owner_did, + node_did: &node_did_str, + timestamp: &ts, + blobs: &delta, + }; + match crate::arweave::anchor_encrypted_manifest( + &http_client, + &irys_url, + &manifest, + ) + .await + { + Ok(tx) if !tx.is_empty() => tracing::info!( + repo = %repo_slug, + tx_id = %tx, + "anchored encrypted manifest to Arweave" + ), + Ok(_) => {} + Err(e) => tracing::warn!( + repo = %repo_slug, + err = %e, + "encrypted manifest anchor failed" + ), + } + } + } + } }); } diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index a88f31f..cf13947 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -103,6 +103,108 @@ pub async fn anchor_ref_update( Ok(tx_id) } +/// A per-push manifest of the blobs encrypted this push (Option B3). The +/// `blobs` slice is `(oid, cid, recipients)` tuples; only `oid` and `cid` are +/// anchored. Anchored directly to Arweave as its JSON body so the discovery +/// index survives total node loss. +pub struct EncryptedManifest<'a> { + pub repo: &'a str, + pub owner_did: &'a str, + pub node_did: &'a str, + pub timestamp: &'a str, + pub blobs: &'a [(String, String, Vec)], +} + +/// Anchor a per-push encrypted-blob manifest to Arweave via Irys. The manifest +/// JSON body is the payload (not a CID pointer to IPFS), so the index is +/// permanent and self-contained. Recipient identities are deliberately omitted: +/// the anchor is permanent and public, and the v2 envelopes no longer expose +/// recipients, so the reader set must not be written to Arweave either. +/// +/// Returns the Irys/Arweave transaction ID, or `Ok("")` when `irys_url` is empty +/// (anchoring disabled) or there are no blobs to anchor. +pub async fn anchor_encrypted_manifest( + client: &reqwest::Client, + irys_url: &str, + manifest: &EncryptedManifest<'_>, +) -> Result { + if irys_url.is_empty() || manifest.blobs.is_empty() { + return Ok(String::new()); + } + + let blobs_json: Vec = manifest + .blobs + .iter() + .map(|(oid, cid, _recipients)| manifest_blob_json(oid, cid)) + .collect(); + + let payload = json!({ + "schema": "gitlawb/encrypted-manifest/v1", + "repo": manifest.repo, + "owner_did": manifest.owner_did, + "node_did": manifest.node_did, + "timestamp": manifest.timestamp, + "blobs": blobs_json, + }); + + let body = serde_json::to_vec(&payload)?; + let url = format!("{}/upload", irys_url.trim_end_matches('/')); + + let resp = client + .post(&url) + .header("Content-Type", "application/json") + .header("x-irys-tags", build_manifest_tags_header(manifest)) + .body(body) + .send() + .await + .map_err(|e| anyhow::anyhow!("Irys upload failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!("Irys returned {status}: {body}")); + } + + let json: serde_json::Value = resp + .json() + .await + .map_err(|e| anyhow::anyhow!("failed to parse Irys response: {e}"))?; + + let tx_id = json["id"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("no 'id' in Irys response: {json}"))? + .to_string(); + + tracing::info!( + repo = %manifest.repo, + tx_id = %tx_id, + blobs = manifest.blobs.len(), + "anchored encrypted manifest to Arweave" + ); + + Ok(tx_id) +} + +/// Serialize one blob for the Arweave manifest. Recipient identities are +/// intentionally absent so the permanent public anchor never records who can +/// read a blob. +fn manifest_blob_json(oid: &str, cid: &str) -> serde_json::Value { + json!({ "oid": oid, "cid": cid }) +} + +/// Build the Irys tag header for an encrypted-blob manifest. `Repo` and `Schema` +/// are the tags the `gl` recovery query filters on. +fn build_manifest_tags_header(manifest: &EncryptedManifest<'_>) -> String { + [ + "App-Name:gitlawb".to_string(), + "Schema:gitlawb/encrypted-manifest/v1".to_string(), + format!("Repo:{}", sanitize_tag(manifest.repo)), + format!("Owner-DID:{}", sanitize_tag(manifest.owner_did)), + format!("Node-DID:{}", sanitize_tag(manifest.node_did)), + ] + .join(",") +} + /// Arweave permanent URL for a given Irys transaction ID. pub fn arweave_url(tx_id: &str) -> String { format!("https://arweave.net/{tx_id}") @@ -193,6 +295,87 @@ mod tests { ); } + #[tokio::test] + async fn test_manifest_anchor_noop_when_url_empty() { + let client = reqwest::Client::new(); + let blobs = vec![( + "oid1".to_string(), + "cid1".to_string(), + vec!["did:key:zA".to_string()], + )]; + let m = EncryptedManifest { + repo: "alice/r", + owner_did: "did:key:zO", + node_did: "did:key:zN", + timestamp: "2026-06-11T00:00:00Z", + blobs: &blobs, + }; + assert_eq!( + anchor_encrypted_manifest(&client, "", &m).await.unwrap(), + "" + ); + } + + #[tokio::test] + async fn test_manifest_anchor_noop_when_no_blobs() { + let client = reqwest::Client::new(); + let blobs: Vec<(String, String, Vec)> = vec![]; + let m = EncryptedManifest { + repo: "alice/r", + owner_did: "did:key:zO", + node_did: "did:key:zN", + timestamp: "2026-06-11T00:00:00Z", + blobs: &blobs, + }; + // Non-empty URL, but no blobs: still a no-op. + assert_eq!( + anchor_encrypted_manifest(&client, "https://example.invalid", &m) + .await + .unwrap(), + "" + ); + } + + #[tokio::test] + async fn test_manifest_anchor_success() { + let mut server = mockito::Server::new_async().await; + let _mock = server + .mock("POST", "/upload") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"id":"MANIFESTTX123","timestamp":1710000000000,"version":"1.0.0"}"#) + .create_async() + .await; + + let client = reqwest::Client::new(); + let blobs = vec![( + "oid1".to_string(), + "cid1".to_string(), + vec!["did:key:zA".to_string()], + )]; + let m = EncryptedManifest { + repo: "alice/r", + owner_did: "did:key:zO", + node_did: "did:key:zN", + timestamp: "2026-06-11T00:00:00Z", + blobs: &blobs, + }; + let r = anchor_encrypted_manifest(&client, &server.url(), &m).await; + assert_eq!(r.unwrap(), "MANIFESTTX123"); + _mock.assert_async().await; + } + + #[test] + fn manifest_blob_json_omits_recipients() { + let v = manifest_blob_json("oid1", "cidA"); + assert_eq!(v["oid"], "oid1"); + assert_eq!(v["cid"], "cidA"); + assert!( + v.get("recipients").is_none(), + "Arweave manifest must not anchor recipient identities" + ); + } + #[test] fn test_sanitize_tag() { assert_eq!(sanitize_tag("alice/myrepo"), "alice/myrepo"); diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index b00c861..4a1c107 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -720,6 +720,21 @@ const MIGRATIONS: &[Migration] = &[ "CREATE INDEX IF NOT EXISTS idx_visibility_rules_repo ON visibility_rules(repo_id)", ], }, + Migration { + version: 4, + name: "encrypted_blobs", + stmts: &[ + r#"CREATE TABLE IF NOT EXISTS encrypted_blobs ( + repo_id TEXT NOT NULL, + oid TEXT NOT NULL, + cid TEXT NOT NULL, + recipients TEXT NOT NULL, + created_at TEXT NOT NULL, + PRIMARY KEY (repo_id, oid) + )"#, + "CREATE INDEX IF NOT EXISTS idx_encrypted_blobs_repo ON encrypted_blobs(repo_id)", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -1628,6 +1643,133 @@ impl Db { Ok(()) } + pub async fn record_encrypted_blob( + &self, + repo_id: &str, + oid: &str, + cid: &str, + recipients: &[String], + ) -> Result<()> { + let recipients_json = serde_json::to_string(recipients)?; + sqlx::query( + "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients, created_at) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients = EXCLUDED.recipients", + ) + .bind(repo_id) + .bind(oid) + .bind(cid) + .bind(recipients_json) + .bind(Utc::now().to_rfc3339()) + .execute(&self.pool) + .await?; + Ok(()) + } + + /// Deserialize the stored recipients JSON. Corruption is surfaced as an + /// error rather than silently treated as an empty recipient list, which + /// would deny access to every legitimate reader and hand peers incomplete + /// replication metadata. + fn parse_recipients(repo_id: &str, oid: &str, raw: &str) -> Result> { + serde_json::from_str(raw).with_context(|| { + format!("corrupt recipients JSON in encrypted_blobs (repo_id={repo_id}, oid={oid})") + }) + } + + /// (oid, cid) for every encrypted blob in the repo that `caller` may decrypt. + pub async fn list_encrypted_blobs_for( + &self, + repo_id: &str, + caller: &str, + ) -> Result> { + let rows = + sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; + let mut out = Vec::new(); + for row in rows { + let oid: String = row.get("oid"); + let cid: String = row.get("cid"); + let recipients: String = row.get("recipients"); + let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; + if recipients.iter().any(|d| d == caller) { + out.push((oid, cid)); + } + } + Ok(out) + } + + /// (oid, cid, recipients) for every encrypted blob in the repo, unscoped by + /// caller. This is the replication view used by peer mirrors (Option B2), + /// distinct from the recipient-scoped `list_encrypted_blobs_for`. It returns + /// only ciphertext metadata; no plaintext or key material is involved. + pub async fn list_all_encrypted_blobs( + &self, + repo_id: &str, + ) -> Result)>> { + let rows = + sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; + let mut out = Vec::new(); + for row in rows { + let oid: String = row.get("oid"); + let cid: String = row.get("cid"); + let recipients: String = row.get("recipients"); + let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; + out.push((oid, cid, recipients)); + } + Ok(out) + } + + /// The CID of one encrypted blob, only if `caller` is a recipient. + pub async fn encrypted_blob_cid( + &self, + repo_id: &str, + oid: &str, + caller: &str, + ) -> Result> { + let row = sqlx::query( + "SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", + ) + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + let Some(row) = row else { return Ok(None) }; + let recipients: String = row.get("recipients"); + let recipients = Self::parse_recipients(repo_id, oid, &recipients)?; + if recipients.iter().any(|d| d == caller) { + Ok(Some(row.get("cid"))) + } else { + Ok(None) + } + } + + /// The recipient DID list stored for an encrypted blob, or None if there is + /// no row. Used to decide whether a re-seal is needed (recipients changed). + pub async fn encrypted_blob_recipients( + &self, + repo_id: &str, + oid: &str, + ) -> Result>> { + let row = + sqlx::query("SELECT recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + match row { + None => Ok(None), + Some(r) => { + let recipients: String = r.get("recipients"); + Ok(Some(Self::parse_recipients(repo_id, oid, &recipients)?)) + } + } + } + pub async fn list_pinned_cids(&self) -> Result> { let rows = sqlx::query( "SELECT sha256_hex, cid, pinned_at, pinata_cid FROM pinned_cids ORDER BY pinned_at DESC", diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs new file mode 100644 index 0000000..50797b5 --- /dev/null +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -0,0 +1,75 @@ +//! Encrypt-then-pin for withheld blobs (Option B1). Each withheld blob is sealed +//! to its recipient DIDs and the envelope pinned to IPFS, recorded in +//! `encrypted_blobs`. Best-effort per blob: a failure is logged and skipped, +//! never pinned in plaintext. + +use std::collections::{BTreeSet, HashMap}; +use std::path::Path; +use std::str::FromStr; + +use ed25519_dalek::VerifyingKey; +use gitlawb_core::did::Did; +use gitlawb_core::encrypt::seal_blob; + +use crate::db::Db; + +/// Resolve a DID string to its Ed25519 verifying key, or None if it carries no +/// inline key (e.g. did:web / did:gitlawb). +fn did_to_key(did: &str) -> Option { + Did::from_str(did).ok()?.to_verifying_key().ok() +} + +/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set. +/// Returns `(oid, cid, recipients)` for each blob actually sealed and recorded +/// this call (the per-push delta), used by Option B3 to anchor a manifest. +pub async fn encrypt_and_pin( + ipfs_api: &str, + repo_path: &Path, + db: &Db, + repo_id: &str, + recipients: &HashMap>, +) -> Vec<(String, String, Vec)> { + let mut sealed = Vec::new(); + for (oid, dids) in recipients { + // Skip only if an existing envelope already covers exactly these + // recipients. If the recipient set changed (e.g. a reader was added to + // the rule), re-seal so the new reader can recover the blob. Reader + // removal is not retroactive: the old envelope is already public. + if let Ok(Some(stored)) = db.encrypted_blob_recipients(repo_id, oid).await { + let stored: BTreeSet = stored.into_iter().collect(); + if &stored == dids { + continue; + } + } + let keys: Vec = dids.iter().filter_map(|d| did_to_key(d)).collect(); + if keys.is_empty() { + tracing::warn!(oid = %oid, "no resolvable recipient keys; skipping encrypted pin"); + continue; + } + let data = match crate::git::store::read_object(repo_path, oid) { + Ok(Some((_t, bytes))) => bytes, + _ => continue, + }; + let envelope = match seal_blob(&data, &keys) { + Ok(e) => e, + Err(e) => { + tracing::warn!(oid = %oid, err = %e, "seal_blob failed; skipping"); + continue; + } + }; + let cid = match crate::ipfs_pin::pin_git_object(ipfs_api, oid, &envelope).await { + Ok(c) if !c.is_empty() => c, + _ => continue, + }; + let dids_vec: Vec = dids.iter().cloned().collect(); + if let Err(e) = db + .record_encrypted_blob(repo_id, oid, &cid, &dids_vec) + .await + { + tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); + continue; + } + sealed.push((oid.clone(), cid.clone(), dids_vec)); + } + sealed +} diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index c9c6d6b..90ca772 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -7,7 +7,7 @@ use crate::db::VisibilityRule; use crate::git::store; use crate::visibility::{visibility_check, Decision}; use anyhow::{Context, Result}; -use std::collections::HashSet; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::Path; /// List every (blob_oid, "/repo/relative/path") pair reachable from any branch @@ -87,6 +87,42 @@ pub fn replicable_objects(all: Vec, withheld: &HashSet) -> Vec Result>> { + let withheld = withheld_blob_oids(repo_path, rules, is_public, owner_did, None)?; + if withheld.is_empty() { + return Ok(HashMap::new()); + } + let mut candidates: BTreeSet = BTreeSet::new(); + for r in rules { + for d in &r.reader_dids { + candidates.insert(d.clone()); + } + } + let mut out: HashMap> = HashMap::new(); + for (oid, path) in blob_paths(repo_path)? { + if !withheld.contains(&oid) { + continue; + } + let entry = out.entry(oid).or_default(); + entry.insert(owner_did.to_string()); + for did in &candidates { + if visibility_check(rules, is_public, owner_did, Some(did), &path) == Decision::Allow { + entry.insert(did.clone()); + } + } + } + Ok(out) +} + #[cfg(test)] mod tests { use super::*; @@ -230,4 +266,33 @@ mod tests { let got = replicable_objects(all.clone(), &withheld); assert_eq!(got, all); } + + #[test] + fn recipients_are_owner_plus_allowed_readers_only() { + let (_td, repo, secret_oid, public_oid) = fixture(); + let reader = "did:key:zReader"; + let rules = vec![rule("/secret/**", &[reader])]; + let map = withheld_blob_recipients(&repo, &rules, true, OWNER).unwrap(); + + let recips = map.get(&secret_oid).expect("secret blob has recipients"); + assert!(recips.contains(OWNER)); + assert!(recips.contains(reader)); + assert!( + !map.contains_key(&public_oid), + "public blob is not encrypted" + ); + } + + #[test] + fn node_seal_open_round_trip() { + use gitlawb_core::encrypt::{open_blob, seal_blob}; + use gitlawb_core::identity::Keypair; + let (_td, repo, secret_oid, _public) = fixture(); + let (_t, bytes) = crate::git::store::read_object(&repo, &secret_oid) + .unwrap() + .unwrap(); + let reader = Keypair::generate(); + let env = seal_blob(&bytes, &[reader.verifying_key()]).unwrap(); + assert_eq!(open_blob(&env, &reader).unwrap(), bytes); + } } diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 96d6abd..9bdaade 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -72,6 +72,19 @@ pub async fn pin_git_object(ipfs_api: &str, sha256_hex: &str, data: &[u8]) -> Re Ok(cid) } +/// Fetch raw bytes for a CID from the local Kubo node (`/api/v0/cat`). +pub async fn cat(ipfs_api: &str, cid: &str) -> Result> { + if ipfs_api.is_empty() { + return Err(anyhow::anyhow!("IPFS not configured")); + } + let url = format!("{}/api/v0/cat?arg={}", ipfs_api.trim_end_matches('/'), cid); + let resp = reqwest::Client::new().post(&url).send().await?; + if !resp.status().is_success() { + return Err(anyhow::anyhow!("ipfs cat {cid}: {}", resp.status())); + } + Ok(resp.bytes().await?.to_vec()) +} + /// List all git objects in the given bare repo and pin any that are not yet /// recorded in `pinned_cids`. /// diff --git a/crates/gitlawb-node/src/main.rs b/crates/gitlawb-node/src/main.rs index 87f3432..ac39a1c 100644 --- a/crates/gitlawb-node/src/main.rs +++ b/crates/gitlawb-node/src/main.rs @@ -5,6 +5,7 @@ mod bootstrap; mod cert; mod config; mod db; +mod encrypted_pin; mod error; mod git; mod graphql; diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index 9baea20..31ce4b4 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -356,6 +356,18 @@ pub fn build_router(state: AppState) -> Router { "/api/v1/repos/{owner}/{repo}/withheld-paths", axum::routing::get(visibility::withheld_paths), ) + .route( + "/api/v1/repos/{owner}/{repo}/encrypted-blobs", + axum::routing::get(crate::api::encrypted::list_encrypted_blobs), + ) + .route( + "/api/v1/repos/{owner}/{repo}/encrypted-blob/{oid}", + axum::routing::get(crate::api::encrypted::get_encrypted_blob), + ) + .route( + "/api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate", + axum::routing::get(crate::api::encrypted::replicate_encrypted_blobs), + ) .layer(DefaultBodyLimit::disable()) .layer(RequestBodyLimitLayer::new(pack_limit)) .layer(middleware::from_fn(auth::optional_signature)); diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index f1ffecc..615ce22 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -12,6 +12,7 @@ //! 5. On success, register ourselves as a replica with the origin node so //! its `replica_count` reflects reality (best-effort, idempotent). +use std::collections::HashMap; use std::path::Path; use std::sync::Arc; @@ -47,6 +48,44 @@ fn classify_mirror(withheld: Option>) -> MirrorMode { } } +/// One encrypted blob as advertised by an origin's `encrypted-blobs/replicate` +/// endpoint (Option B2). Ciphertext metadata only; recipient identities are +/// withheld from peers, so a re-seal is detected by the CID changing. +#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize)] +struct ReplicaBlob { + oid: String, + cid: String, +} + +/// The shape of the `encrypted-blobs/replicate` JSON response. +#[derive(Debug, serde::Deserialize)] +struct ReplicateResponse { + #[serde(default)] + blobs: Vec, +} + +/// Decide which of the origin's encrypted blobs this mirror must (re)replicate. +/// +/// `have` maps each already-stored blob's oid to the CID the mirror pinned. A +/// remote blob is returned when the mirror has no row for that oid, or when the +/// stored CID differs from the advertised one. A re-seal regenerates the +/// envelope (new content key, nonce, and per-recipient wraps), so the CID +/// changes while the OID stays stable; comparing CIDs detects a re-seal without +/// the mirror ever holding recipient identities. +fn blobs_needing_replication( + remote: &[ReplicaBlob], + have: &HashMap, +) -> Vec { + remote + .iter() + .filter(|b| match have.get(&b.oid) { + None => true, + Some(stored_cid) => stored_cid != &b.cid, + }) + .cloned() + .collect() +} + /// Start the background sync worker. Returns immediately; the worker runs /// as a detached tokio task that exits cleanly when `shutdown_rx` flips /// to `true`. @@ -167,6 +206,20 @@ async fn process_batch( machine_id, ) .await; + // Option B2: carry the encrypted withheld-blob envelopes too, so an + // authorized reader can recover private content from this mirror if + // the origin dies. `item.repo` is the slug "{owner_short}/{name}", + // which is the id upsert_mirror_repo wrote (the local repo_id). + replicate_encrypted_blobs( + client, + &origin_url, + owner_short, + repo_name, + db, + &item.repo, + &config.ipfs_api, + ) + .await; let _ = db.mark_sync_done(&item.id).await; crate::metrics::record_sync_processed("done"); @@ -277,6 +330,87 @@ async fn register_replica_with_origin( } } +/// Replicate the origin's encrypted withheld blobs onto this mirror (Option B2). +/// +/// After the git objects are mirrored, fetch the origin's replication listing, +/// then for each blob the mirror does not already hold (or whose CID changed, +/// i.e. the origin re-sealed) pull the ciphertext envelope over IPFS, pin it +/// locally, and record the `encrypted_blobs` row keyed by this mirror's local +/// `repo_id`. The mirror stores no recipient identities. +/// +/// Best-effort and idempotent: any per-blob failure is logged and skipped, to be +/// retried on the next sync. Confidentiality is never at risk; the mirror only +/// ever handles ciphertext and never decrypts. Cleanly a no-op when IPFS is +/// unconfigured, the origin reports no encrypted blobs, or the replicate endpoint +/// is absent (older peer) or unreachable. +async fn replicate_encrypted_blobs( + client: &reqwest::Client, + origin_url: &str, + owner: &str, + repo: &str, + db: &Db, + repo_id: &str, + ipfs_api: &str, +) { + if ipfs_api.is_empty() { + return; + } + + let url = format!("{origin_url}/api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate"); + let resp = match client.get(&url).send().await { + Ok(r) if r.status().is_success() => r, + _ => return, + }; + let parsed: ReplicateResponse = match resp.json().await { + Ok(p) => p, + Err(e) => { + warn!(repo = %repo, err = %e, "failed to parse encrypted-blobs/replicate response"); + return; + } + }; + if parsed.blobs.is_empty() { + return; + } + + let have: HashMap = match db.list_all_encrypted_blobs(repo_id).await { + Ok(rows) => rows + .into_iter() + .map(|(oid, cid, _recipients)| (oid, cid)) + .collect(), + Err(e) => { + warn!(repo = %repo, err = %e, "failed to list local encrypted blobs for replication"); + return; + } + }; + + for blob in blobs_needing_replication(&parsed.blobs, &have) { + let envelope = match crate::ipfs_pin::cat(ipfs_api, &blob.cid).await { + Ok(bytes) => bytes, + Err(e) => { + warn!(oid = %blob.oid, cid = %blob.cid, err = %e, "failed to fetch encrypted envelope over IPFS; will retry next sync"); + continue; + } + }; + match crate::ipfs_pin::pin_git_object(ipfs_api, &blob.oid, &envelope).await { + Ok(cid) if !cid.is_empty() => { + if cid != blob.cid { + warn!(oid = %blob.oid, expected = %blob.cid, got = %cid, "replicated envelope CID mismatch; skipping record"); + continue; + } + if let Err(e) = db + .record_encrypted_blob(repo_id, &blob.oid, &cid, &[]) + .await + { + warn!(oid = %blob.oid, err = %e, "failed to record replicated encrypted blob"); + } + } + _ => { + warn!(oid = %blob.oid, "failed to pin replicated encrypted envelope; will retry next sync"); + } + } + } +} + /// Run a git subprocess, returning an error with stderr on non-zero exit. async fn git_run(args: &[&str]) -> anyhow::Result<()> { let out = tokio::process::Command::new("git") @@ -423,6 +557,58 @@ mod tests { assert!(matches!(mode, MirrorMode::Plain)); } + fn rb(oid: &str, cid: &str) -> ReplicaBlob { + ReplicaBlob { + oid: oid.to_string(), + cid: cid.to_string(), + } + } + + #[test] + fn replicate_stores_new_blob() { + let remote = vec![rb("oid1", "cidA")]; + let have = HashMap::new(); + assert_eq!(blobs_needing_replication(&remote, &have), remote); + } + + #[test] + fn replicate_skips_already_present_same_cid() { + let remote = vec![rb("oid1", "cidA")]; + let mut have = HashMap::new(); + have.insert("oid1".to_string(), "cidA".to_string()); + assert!(blobs_needing_replication(&remote, &have).is_empty()); + } + + #[test] + fn replicate_restores_on_cid_change() { + // The origin re-sealed: same oid, new envelope, new cid. + let remote = vec![rb("oid1", "cidB")]; + let mut have = HashMap::new(); + have.insert("oid1".to_string(), "cidA".to_string()); + assert_eq!(blobs_needing_replication(&remote, &have), remote); + } + + #[test] + fn replicate_empty_remote_is_noop() { + assert!(blobs_needing_replication(&[], &HashMap::new()).is_empty()); + } + + #[test] + fn replicate_response_parses() { + // An older origin may still send a recipients field; it must be ignored. + let json = r#"{"blobs":[{"oid":"o1","cid":"c1","recipients":["did:key:zA"]}]}"#; + let parsed: ReplicateResponse = serde_json::from_str(json).unwrap(); + assert_eq!(parsed.blobs.len(), 1); + assert_eq!(parsed.blobs[0].oid, "o1"); + assert_eq!(parsed.blobs[0].cid, "c1"); + } + + #[test] + fn replicate_response_empty_blobs_parses() { + let parsed: ReplicateResponse = serde_json::from_str(r#"{"blobs":[]}"#).unwrap(); + assert!(parsed.blobs.is_empty()); + } + fn g(args: &[&str], dir: &Path) { assert!(Command::new("git") .args(args) diff --git a/crates/gl/src/clone.rs b/crates/gl/src/clone.rs index b5fe39d..93e998d 100644 --- a/crates/gl/src/clone.rs +++ b/crates/gl/src/clone.rs @@ -29,6 +29,23 @@ pub struct CloneArgs { #[arg(long, default_value = "https://node.gitlawb.com", env = "GITLAWB_NODE")] pub node: String, + + /// Arweave gateway for B3 manifest discovery/fetch when a node cannot supply + /// the encrypted-blob mapping. + #[arg( + long, + default_value = "https://arweave.net", + env = "GITLAWB_ARWEAVE_GATEWAY" + )] + pub arweave_gateway: String, + + /// Public IPFS gateway for fetching encrypted envelopes during B3 recovery. + #[arg( + long, + default_value = "https://dweb.link", + env = "GITLAWB_IPFS_GATEWAY" + )] + pub ipfs_gateway: String, } /// Run a git command inside `dir`, erroring with stderr on failure. @@ -217,6 +234,294 @@ struct WithheldPathsResponse { reinclude: Vec, } +/// After the base clone, recover encrypted blobs the caller is authorized for +/// that are missing locally: fetch the envelope, decrypt with the caller's key, +/// install as a loose object. Returns the repo-relative paths recovered. +/// Best-effort; logs and continues on any per-blob failure. +async fn recover_encrypted_blobs( + node: &str, + owner: &str, + name: &str, + dest: &Path, + keypair: &gitlawb_core::identity::Keypair, +) -> Result> { + use gitlawb_core::encrypt::open_blob; + use std::collections::HashMap; + use std::io::Write; + + let dest_str = dest.to_str().context("dest path not utf-8")?; + let client = NodeClient::new(node, Some(keypair.clone())); + + let resp = match client + .get_signed(&format!("/api/v1/repos/{owner}/{name}/encrypted-blobs")) + .await + { + Ok(r) if r.status().is_success() => r, + _ => return Ok(vec![]), + }; + let body: serde_json::Value = resp.json().await.context("parsing encrypted-blobs")?; + let blobs = body + .get("blobs") + .and_then(|b| b.as_array()) + .cloned() + .unwrap_or_default(); + if blobs.is_empty() { + return Ok(vec![]); + } + + // Map oid -> repo-relative path from the cloned tree. + let ls = Command::new("git") + .args(["-C", dest_str, "ls-tree", "-r", "HEAD"]) + .output()?; + let mut oid_to_path: HashMap = HashMap::new(); + for line in String::from_utf8_lossy(&ls.stdout).lines() { + if let Some((meta, path)) = line.split_once('\t') { + if let Some(oid) = meta.split_whitespace().nth(2) { + oid_to_path.insert(oid.to_string(), path.to_string()); + } + } + } + + let mut recovered = Vec::new(); + for entry in blobs { + let Some(oid) = entry.get("oid").and_then(|o| o.as_str()) else { + continue; + }; + // Skip if already present locally. + let present = Command::new("git") + .args(["-C", dest_str, "cat-file", "-e", oid]) + .status() + .map(|s| s.success()) + .unwrap_or(false); + if present { + continue; + } + let env_resp = match client + .get_signed(&format!( + "/api/v1/repos/{owner}/{name}/encrypted-blob/{oid}" + )) + .await + { + Ok(r) if r.status().is_success() => r, + _ => continue, + }; + let Ok(envelope) = env_resp.bytes().await else { + continue; + }; + let plaintext = match open_blob(&envelope, keypair) { + Ok(p) => p, + Err(e) => { + eprintln!("warning: could not decrypt {oid}: {e}"); + continue; + } + }; + // Install as a loose object; verify the OID matches. + let mut child = Command::new("git") + .args(["-C", dest_str, "hash-object", "-w", "-t", "blob", "--stdin"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn()?; + child.stdin.take().unwrap().write_all(&plaintext)?; + let out = child.wait_with_output()?; + let written = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if written == oid { + if let Some(p) = oid_to_path.get(oid) { + recovered.push(p.clone()); + } + } else { + eprintln!("warning: recovered blob {oid} hashed to {written}; discarding"); + } + } + Ok(recovered) +} + +/// One blob entry in an Arweave-anchored encrypted manifest. The manifest also +/// carries a `recipients` field per blob, but `gl` does not need it: authorization +/// is enforced by whether `open_blob` can decrypt with the caller's key. Unknown +/// JSON fields are ignored by serde, so `recipients` is simply not declared here. +#[derive(Deserialize)] +struct ManifestBlob { + oid: String, + cid: String, +} + +/// An Arweave-anchored per-push encrypted manifest (Option B3). +#[derive(Deserialize)] +struct Manifest { + #[serde(default)] + timestamp: String, + #[serde(default)] + blobs: Vec, +} + +/// Extract transaction ids from an Arweave GraphQL `transactions` response. +fn parse_tx_ids(v: &serde_json::Value) -> Vec { + v.get("data") + .and_then(|d| d.get("transactions")) + .and_then(|t| t.get("edges")) + .and_then(|e| e.as_array()) + .map(|edges| { + edges + .iter() + .filter_map(|edge| { + edge.get("node") + .and_then(|n| n.get("id")) + .and_then(|i| i.as_str()) + .map(String::from) + }) + .collect() + }) + .unwrap_or_default() +} + +/// Merge per-push manifests into a single `oid -> cid` map, latest-wins by the +/// manifest `timestamp` (RFC3339, compared lexicographically; a later push that +/// re-sealed a blob overrides the earlier entry). +fn merge_manifests(manifests: Vec) -> std::collections::HashMap { + let mut best: std::collections::HashMap = + std::collections::HashMap::new(); // oid -> (cid, timestamp) + for m in manifests { + for b in m.blobs { + match best.get(&b.oid) { + Some((_, ts)) if ts.as_str() >= m.timestamp.as_str() => {} + _ => { + best.insert(b.oid, (b.cid, m.timestamp.clone())); + } + } + } + } + best.into_iter().map(|(oid, (cid, _))| (oid, cid)).collect() +} + +/// Option B3 fallback recovery, with no dependency on a gitlawb node API. Query +/// the Arweave gateway for this repo's encrypted manifests, merge them, and for +/// each blob still missing locally that the caller can decrypt, pull the envelope +/// from a public IPFS gateway, decrypt, and install it as a loose object. Returns +/// the repo-relative paths recovered. Best-effort; silent when gateways are +/// unreachable, leaving the clone exactly as node-based recovery left it. +async fn recover_from_arweave( + arweave_gateway: &str, + ipfs_gateway: &str, + owner: &str, + name: &str, + dest: &Path, + keypair: &gitlawb_core::identity::Keypair, +) -> Result> { + use gitlawb_core::encrypt::open_blob; + use std::collections::HashMap; + use std::io::Write; + + let dest_str = dest.to_str().context("dest path not utf-8")?; + let owner_short = owner.split(':').next_back().unwrap_or(owner); + let slug = format!("{owner_short}/{name}"); + let ag = arweave_gateway.trim_end_matches('/'); + let ig = ipfs_gateway.trim_end_matches('/'); + // Bound every gateway request: this runs on every clone, so a slow or hung + // public gateway must not stall it. Best-effort recovery, so a timeout just + // skips the affected blob. + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build() + .unwrap_or_else(|_| reqwest::Client::new()); + + // 1. Discover manifest transaction ids via Arweave GraphQL. + let query = r#"query($repo:String!){transactions(tags:[{name:"App-Name",values:["gitlawb"]},{name:"Schema",values:["gitlawb/encrypted-manifest/v1"]},{name:"Repo",values:[$repo]}],first:100){edges{node{id}}}}"#; + let gql_body = serde_json::json!({ "query": query, "variables": { "repo": slug } }); + let resp = match client + .post(format!("{ag}/graphql")) + .json(&gql_body) + .send() + .await + { + Ok(r) if r.status().is_success() => r, + _ => return Ok(vec![]), + }; + let gql: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(_) => return Ok(vec![]), + }; + let tx_ids = parse_tx_ids(&gql); + if tx_ids.is_empty() { + return Ok(vec![]); + } + + // 2. Fetch and parse each manifest body, then merge latest-wins per oid. + let mut manifests = Vec::new(); + for tx in tx_ids { + let m = match client.get(format!("{ag}/{tx}")).send().await { + Ok(r) if r.status().is_success() => r, + _ => continue, + }; + if let Ok(parsed) = m.json::().await { + manifests.push(parsed); + } + } + let oid_cid = merge_manifests(manifests); + if oid_cid.is_empty() { + return Ok(vec![]); + } + + // Map oid -> repo-relative path from the cloned tree. + let ls = Command::new("git") + .args(["-C", dest_str, "ls-tree", "-r", "HEAD"]) + .output()?; + let mut oid_to_path: HashMap = HashMap::new(); + for line in String::from_utf8_lossy(&ls.stdout).lines() { + if let Some((meta, path)) = line.split_once('\t') { + if let Some(oid) = meta.split_whitespace().nth(2) { + oid_to_path.insert(oid.to_string(), path.to_string()); + } + } + } + + // 3. Recover each missing blob the caller can decrypt. + let mut recovered = Vec::new(); + for (oid, cid) in oid_cid { + // Local presence check. GIT_NO_LAZY_FETCH stops git from making a wasted + // promisor fetch attempt (we are recovering precisely because the promisor + // cannot supply the blob), and `.output()` captures git's "missing object" + // stderr so that expected case does not leak a confusing error to the user. + let present = Command::new("git") + .args(["-C", dest_str, "cat-file", "-e", &oid]) + .env("GIT_NO_LAZY_FETCH", "1") + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if present { + continue; + } + let env_resp = match client.get(format!("{ig}/ipfs/{cid}")).send().await { + Ok(r) if r.status().is_success() => r, + _ => continue, + }; + let Ok(envelope) = env_resp.bytes().await else { + continue; + }; + // open_blob succeeds only if this caller is a recipient: this is the + // authorization gate (no node, no DID check needed). + let plaintext = match open_blob(&envelope, keypair) { + Ok(p) => p, + Err(_) => continue, + }; + let mut child = Command::new("git") + .args(["-C", dest_str, "hash-object", "-w", "-t", "blob", "--stdin"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn()?; + child.stdin.take().unwrap().write_all(&plaintext)?; + let out = child.wait_with_output()?; + let written = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if written == oid { + if let Some(p) = oid_to_path.get(&oid) { + recovered.push(p.clone()); + } + } else { + eprintln!("warning: recovered blob {oid} hashed to {written}; discarding"); + } + } + Ok(recovered) +} + pub async fn run(args: CloneArgs) -> Result<()> { let (url, owner, name) = parse_repo(&args.repo)?; let dest_name = args.dir.unwrap_or_else(|| name.clone()); @@ -236,6 +541,60 @@ pub async fn run(args: CloneArgs) -> Result<()> { } setup_partial_clone(&dest, &url, &withheld, &reinclude, args.branch.as_deref())?; + + if let Ok(keypair) = load_keypair_from_dir(None) { + // Node-based recovery first (B1/B2), then the B3 Arweave/IPFS gateway + // fallback for any authorized blobs the node could not supply. + let mut paths = recover_encrypted_blobs(&args.node, &owner, &name, &dest, &keypair) + .await + .unwrap_or_default(); + let from_arweave = recover_from_arweave( + &args.arweave_gateway, + &args.ipfs_gateway, + &owner, + &name, + &dest, + &keypair, + ) + .await + .unwrap_or_default(); + paths.extend(from_arweave); + + if !paths.is_empty() { + // Re-include recovered paths if this was a sparse clone, then + // materialize them in the working tree. + let spec = dest.join(".git/info/sparse-checkout"); + if spec.exists() { + match std::fs::read_to_string(&spec) { + Ok(mut s) => { + for p in &paths { + s.push_str(&format!("/{p}\n")); + } + if let Err(e) = std::fs::write(&spec, &s) { + eprintln!( + "warning: failed to update sparse-checkout, recovered files may not appear: {e}" + ); + } + } + Err(e) => { + eprintln!( + "warning: failed to read sparse-checkout, recovered files may not appear: {e}" + ); + } + } + } + if let Err(e) = git(&dest, &["checkout", "--", "."]) { + eprintln!( + "warning: checkout after recovery failed, recovered files may not appear: {e}" + ); + } + println!( + "Recovered {} private file(s) you are authorized to read", + paths.len() + ); + } + } + println!("Done. Cloned into {dest_name}"); Ok(()) } @@ -428,6 +787,269 @@ mod tests { .is_err()); } + #[test] + fn parse_tx_ids_extracts_node_ids() { + let v: serde_json::Value = serde_json::from_str( + r#"{"data":{"transactions":{"edges":[{"node":{"id":"TX1"}},{"node":{"id":"TX2"}}]}}}"#, + ) + .unwrap(); + assert_eq!(parse_tx_ids(&v), vec!["TX1".to_string(), "TX2".to_string()]); + } + + #[test] + fn parse_tx_ids_empty_on_no_edges() { + let v: serde_json::Value = + serde_json::from_str(r#"{"data":{"transactions":{"edges":[]}}}"#).unwrap(); + assert!(parse_tx_ids(&v).is_empty()); + } + + #[test] + fn manifest_parses_and_ignores_recipients() { + let m: Manifest = serde_json::from_str( + r#"{"timestamp":"2026-06-11T00:00:00Z","blobs":[{"oid":"o1","cid":"c1","recipients":["did:key:zA"]}]}"#, + ) + .unwrap(); + assert_eq!(m.timestamp, "2026-06-11T00:00:00Z"); + assert_eq!(m.blobs.len(), 1); + assert_eq!(m.blobs[0].oid, "o1"); + assert_eq!(m.blobs[0].cid, "c1"); + } + + #[test] + fn merge_manifests_latest_wins_per_oid() { + let older = Manifest { + timestamp: "2026-06-10T00:00:00Z".to_string(), + blobs: vec![ManifestBlob { + oid: "o1".to_string(), + cid: "cidOLD".to_string(), + }], + }; + let newer = Manifest { + timestamp: "2026-06-11T00:00:00Z".to_string(), + blobs: vec![ + ManifestBlob { + oid: "o1".to_string(), + cid: "cidNEW".to_string(), + }, + ManifestBlob { + oid: "o2".to_string(), + cid: "cid2".to_string(), + }, + ], + }; + let merged = merge_manifests(vec![older, newer]); + assert_eq!(merged.get("o1").map(String::as_str), Some("cidNEW")); + assert_eq!(merged.get("o2").map(String::as_str), Some("cid2")); + } + + #[test] + fn merge_manifests_is_order_independent() { + let older = Manifest { + timestamp: "2026-06-10T00:00:00Z".to_string(), + blobs: vec![ManifestBlob { + oid: "o1".to_string(), + cid: "cidOLD".to_string(), + }], + }; + let newer = Manifest { + timestamp: "2026-06-11T00:00:00Z".to_string(), + blobs: vec![ManifestBlob { + oid: "o1".to_string(), + cid: "cidNEW".to_string(), + }], + }; + // Newer first, older second: newer must still win. + let merged = merge_manifests(vec![newer, older]); + assert_eq!(merged.get("o1").map(String::as_str), Some("cidNEW")); + } + + /// Read-path end-to-end over a mocked Arweave + IPFS gateway: discover the + /// manifest via GraphQL, fetch it, fetch the envelope, decrypt with the + /// caller's key, and install the previously-withheld blob. + #[tokio::test] + async fn recover_from_arweave_installs_authorized_blob() { + use gitlawb_core::encrypt::seal_blob; + use gitlawb_core::identity::Keypair; + + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n"), ("secret/b.txt", b"SECRET\n")]); + let dest = td.path().join("dest"); + // Make the bare honor `--filter=blob:none` over file:// so the withheld + // blob is genuinely omitted from the local store, not just unchecked-out. + let bare = url.strip_prefix("file://").unwrap(); + assert!(Command::new("git") + .args(["-C", bare, "config", "uploadpack.allowFilter", "true"]) + .status() + .unwrap() + .success()); + setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); + assert!( + !dest.join("secret/b.txt").exists(), + "secret starts withheld" + ); + + let oid = { + let out = Command::new("git") + .args([ + "-C", + dest.to_str().unwrap(), + "rev-parse", + "HEAD:secret/b.txt", + ]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + + // Simulate origin death: drop the promisor remote so `cat-file -e` cannot + // lazily fetch the withheld blob. This is exactly the B3 premise (the node + // can no longer serve it), and forces recovery to go through Arweave/IPFS. + std::fs::remove_dir_all(url.strip_prefix("file://").unwrap()).unwrap(); + + let reader = Keypair::generate(); + let envelope = seal_blob(b"SECRET\n", &[reader.verifying_key()]).unwrap(); + + let cid = "testcid123"; + let mut server = mockito::Server::new_async().await; + let _gql = server + .mock("POST", "/graphql") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"data":{"transactions":{"edges":[{"node":{"id":"TX1"}}]}}}"#) + .create_async() + .await; + let manifest_body = serde_json::json!({ + "timestamp": "2026-06-11T00:00:00Z", + "blobs": [{ "oid": oid, "cid": cid, "recipients": [] }], + }) + .to_string(); + let _tx = server + .mock("GET", "/TX1") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(manifest_body) + .create_async() + .await; + let _blob = server + .mock("GET", format!("/ipfs/{cid}").as_str()) + .with_status(200) + .with_body(envelope) + .create_async() + .await; + + let paths = recover_from_arweave( + &server.url(), + &server.url(), + "alice", + "myrepo", + &dest, + &reader, + ) + .await + .unwrap(); + assert_eq!(paths, vec!["secret/b.txt".to_string()]); + + let present = Command::new("git") + .args(["-C", dest.to_str().unwrap(), "cat-file", "-e", &oid]) + .env("GIT_NO_LAZY_FETCH", "1") + .output() + .unwrap() + .status + .success(); + assert!( + present, + "authorized reader's blob must be installed locally" + ); + } + + /// A caller who is not a recipient cannot decrypt the envelope, so nothing is + /// recovered even though the manifest and envelope are reachable. + #[tokio::test] + async fn recover_from_arweave_skips_unauthorized() { + use gitlawb_core::encrypt::seal_blob; + use gitlawb_core::identity::Keypair; + + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n"), ("secret/b.txt", b"SECRET\n")]); + let dest = td.path().join("dest"); + let bare = url.strip_prefix("file://").unwrap(); + assert!(Command::new("git") + .args(["-C", bare, "config", "uploadpack.allowFilter", "true"]) + .status() + .unwrap() + .success()); + setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); + + let oid = { + let out = Command::new("git") + .args([ + "-C", + dest.to_str().unwrap(), + "rev-parse", + "HEAD:secret/b.txt", + ]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + + // Simulate origin death (see the authorized test) so the withheld blob + // cannot be lazily fetched from the promisor remote. + std::fs::remove_dir_all(url.strip_prefix("file://").unwrap()).unwrap(); + + // Sealed to a different reader; the caller below is not a recipient. + let authorized = Keypair::generate(); + let envelope = seal_blob(b"SECRET\n", &[authorized.verifying_key()]).unwrap(); + let intruder = Keypair::generate(); + + let cid = "testcid123"; + let mut server = mockito::Server::new_async().await; + let _gql = server + .mock("POST", "/graphql") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"data":{"transactions":{"edges":[{"node":{"id":"TX1"}}]}}}"#) + .create_async() + .await; + let manifest_body = serde_json::json!({ + "timestamp": "2026-06-11T00:00:00Z", + "blobs": [{ "oid": oid, "cid": cid, "recipients": [] }], + }) + .to_string(); + let _tx = server + .mock("GET", "/TX1") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(manifest_body) + .create_async() + .await; + let _blob = server + .mock("GET", format!("/ipfs/{cid}").as_str()) + .with_status(200) + .with_body(envelope) + .create_async() + .await; + + let paths = recover_from_arweave( + &server.url(), + &server.url(), + "alice", + "myrepo", + &dest, + &intruder, + ) + .await + .unwrap(); + assert!(paths.is_empty(), "non-recipient must recover nothing"); + + let present = Command::new("git") + .args(["-C", dest.to_str().unwrap(), "cat-file", "-e", &oid]) + .env("GIT_NO_LAZY_FETCH", "1") + .output() + .unwrap() + .status + .success(); + assert!(!present, "non-recipient must not install the blob"); + } + #[test] fn parse_repo_accepts_url_and_bare() { let (url, o, n) = parse_repo("gitlawb://did:key:zAbc/myrepo").unwrap(); @@ -447,4 +1069,46 @@ mod tests { // An extra slash would otherwise smuggle a path segment into the name. assert!(parse_repo("owner/name/extra").is_err()); } + + #[test] + fn recovered_blob_installs_with_matching_oid() { + use gitlawb_core::encrypt::{open_blob, seal_blob}; + use gitlawb_core::identity::Keypair; + let (td, url) = bare_remote(&[("public/a.txt", b"pub\n"), ("secret/b.txt", b"SECRET\n")]); + let dest = td.path().join("dest"); + setup_partial_clone(&dest, &url, &["/secret/**".to_string()], &[], None).unwrap(); + let oid = { + let out = std::process::Command::new("git") + .args([ + "-C", + dest.to_str().unwrap(), + "rev-parse", + "HEAD:secret/b.txt", + ]) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let reader = Keypair::generate(); + let env = seal_blob(b"SECRET\n", &[reader.verifying_key()]).unwrap(); + let plaintext = open_blob(&env, &reader).unwrap(); + let mut child = std::process::Command::new("git") + .args([ + "-C", + dest.to_str().unwrap(), + "hash-object", + "-w", + "-t", + "blob", + "--stdin", + ]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn() + .unwrap(); + use std::io::Write; + child.stdin.take().unwrap().write_all(&plaintext).unwrap(); + let out = child.wait_with_output().unwrap(); + assert_eq!(String::from_utf8_lossy(&out.stdout).trim(), oid); + } }