From 704e706bec824d58beef4de9add87e4d7c61f5a5 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 15:14:53 -0500 Subject: [PATCH 1/5] feat(node): blind recipient identities at rest and gate B1 by repo readability The origin no longer stores recipient DIDs. Migration v5 replaces the encrypted_blobs.recipients column with an opaque, node-keyed recipients_tag used only to detect a recipient-set change for re-seal. B1 discovery and fetch are now gated by the same repo-readability check the git read path uses, not by per-recipient matching; decryption is gated by the envelope crypto, so a non-recipient who can read the repo sees a blob's {oid, cid} but cannot open it. encrypt_and_pin keys the tag from the node seed and returns {oid, cid}; the Arweave manifest tuple drops the now-unused recipient vec. A DB compromise no longer reveals the reader set; recovering it would require brute-forcing candidate DID sets against the keyed tag with the node key. --- crates/gitlawb-node/src/api/encrypted.rs | 29 +++-- crates/gitlawb-node/src/api/repos.rs | 2 + crates/gitlawb-node/src/arweave.rs | 24 ++--- crates/gitlawb-node/src/db/mod.rs | 128 ++++++++--------------- crates/gitlawb-node/src/encrypted_pin.rs | 83 ++++++++++++--- crates/gitlawb-node/src/sync.rs | 10 +- 6 files changed, 142 insertions(+), 134 deletions(-) diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index 20827fb..b7bda00 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -6,24 +6,30 @@ use axum::Json; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::state::AppState; +use crate::visibility::{visibility_check, Decision}; /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs -/// Returns [{oid, cid}] for encrypted blobs the caller may decrypt. +/// Returns [{oid, cid}] for every encrypted blob in the repo, to any caller who +/// can read the repo. Not recipient-scoped: recipient identities are not stored, +/// so access control here is repo readability and decryption is gated by the +/// envelope crypto (only a real recipient can open an envelope). pub async fn list_encrypted_blobs( State(state): State, auth: Option>, Path((owner, repo)): Path<(String, String)>, ) -> Result> { - let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); let record = state .db .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; - let rows = state - .db - .list_encrypted_blobs_for(&record.id, caller) - .await?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}"))); + } + let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() .map(|(oid, cid)| serde_json::json!({ "oid": oid, "cid": cid })) @@ -38,15 +44,20 @@ pub async fn get_encrypted_blob( auth: Option>, Path((owner, repo, oid)): Path<(String, String, String)>, ) -> Result> { - let caller = auth.as_ref().map(|e| e.0 .0.as_str()).unwrap_or(""); let record = state .db .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}/{oid}"))); + } let cid = state .db - .encrypted_blob_cid(&record.id, &oid, caller) + .encrypted_blob_cid(&record.id, &oid) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}/{oid}")))?; let bytes = crate::ipfs_pin::cat(&state.config.ipfs_api, &cid) @@ -73,7 +84,7 @@ pub async fn replicate_encrypted_blobs( let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() - .map(|(oid, cid, _recipients)| replicate_blob_json(oid, cid)) + .map(|(oid, cid)| replicate_blob_json(oid, cid)) .collect(); Ok(Json(serde_json::json!({ "blobs": blobs }))) } diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 9a06f7f..3fae91e 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -689,6 +689,7 @@ pub async fn git_receive_pack( let irys_url = state.config.irys_url.clone(); let http_client = std::sync::Arc::clone(&state.http_client); let node_did_str = state.node_did.to_string(); + let node_seed = state.node_keypair.seed_bytes(); let repo_name = record.name.clone(); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( @@ -722,6 +723,7 @@ pub async fn git_receive_pack( &repo_path_clone, &db_clone, &repo_id, + &node_seed, &recipients, ) .await; diff --git a/crates/gitlawb-node/src/arweave.rs b/crates/gitlawb-node/src/arweave.rs index cf13947..43f35a0 100644 --- a/crates/gitlawb-node/src/arweave.rs +++ b/crates/gitlawb-node/src/arweave.rs @@ -104,15 +104,15 @@ pub async fn anchor_ref_update( } /// A per-push manifest of the blobs encrypted this push (Option B3). The -/// `blobs` slice is `(oid, cid, recipients)` tuples; only `oid` and `cid` are -/// anchored. Anchored directly to Arweave as its JSON body so the discovery -/// index survives total node loss. +/// `blobs` slice is `(oid, cid)` tuples. Anchored directly to Arweave as its JSON +/// body so the discovery index survives total node loss. Recipient identities are +/// never part of the manifest. pub struct EncryptedManifest<'a> { pub repo: &'a str, pub owner_did: &'a str, pub node_did: &'a str, pub timestamp: &'a str, - pub blobs: &'a [(String, String, Vec)], + pub blobs: &'a [(String, String)], } /// Anchor a per-push encrypted-blob manifest to Arweave via Irys. The manifest @@ -135,7 +135,7 @@ pub async fn anchor_encrypted_manifest( let blobs_json: Vec = manifest .blobs .iter() - .map(|(oid, cid, _recipients)| manifest_blob_json(oid, cid)) + .map(|(oid, cid)| manifest_blob_json(oid, cid)) .collect(); let payload = json!({ @@ -298,11 +298,7 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_url_empty() { let client = reqwest::Client::new(); - let blobs = vec![( - "oid1".to_string(), - "cid1".to_string(), - vec!["did:key:zA".to_string()], - )]; + let blobs = vec![("oid1".to_string(), "cid1".to_string())]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -319,7 +315,7 @@ mod tests { #[tokio::test] async fn test_manifest_anchor_noop_when_no_blobs() { let client = reqwest::Client::new(); - let blobs: Vec<(String, String, Vec)> = vec![]; + let blobs: Vec<(String, String)> = vec![]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", @@ -348,11 +344,7 @@ mod tests { .await; let client = reqwest::Client::new(); - let blobs = vec![( - "oid1".to_string(), - "cid1".to_string(), - vec!["did:key:zA".to_string()], - )]; + let blobs = vec![("oid1".to_string(), "cid1".to_string())]; let m = EncryptedManifest { repo: "alice/r", owner_did: "did:key:zO", diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 4a1c107..81bd00d 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -735,6 +735,17 @@ const MIGRATIONS: &[Migration] = &[ "CREATE INDEX IF NOT EXISTS idx_encrypted_blobs_repo ON encrypted_blobs(repo_id)", ], }, + Migration { + version: 5, + name: "encrypted_blobs_blind_recipients", + stmts: &[ + // Replace the cleartext recipient DID list with an opaque, node-keyed + // tag used only to detect a recipient-set change. Existing rows get an + // empty tag and are re-sealed on the next push. + "ALTER TABLE encrypted_blobs DROP COLUMN IF EXISTS recipients", + "ALTER TABLE encrypted_blobs ADD COLUMN IF NOT EXISTS recipients_tag TEXT NOT NULL DEFAULT ''", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -1648,126 +1659,69 @@ impl Db { repo_id: &str, oid: &str, cid: &str, - recipients: &[String], + recipients_tag: &str, ) -> Result<()> { - let recipients_json = serde_json::to_string(recipients)?; sqlx::query( - "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients, created_at) + "INSERT INTO encrypted_blobs (repo_id, oid, cid, recipients_tag, created_at) VALUES ($1, $2, $3, $4, $5) - ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients = EXCLUDED.recipients", + ON CONFLICT (repo_id, oid) DO UPDATE SET cid = EXCLUDED.cid, recipients_tag = EXCLUDED.recipients_tag", ) .bind(repo_id) .bind(oid) .bind(cid) - .bind(recipients_json) + .bind(recipients_tag) .bind(Utc::now().to_rfc3339()) .execute(&self.pool) .await?; Ok(()) } - /// Deserialize the stored recipients JSON. Corruption is surfaced as an - /// error rather than silently treated as an empty recipient list, which - /// would deny access to every legitimate reader and hand peers incomplete - /// replication metadata. - fn parse_recipients(repo_id: &str, oid: &str, raw: &str) -> Result> { - serde_json::from_str(raw).with_context(|| { - format!("corrupt recipients JSON in encrypted_blobs (repo_id={repo_id}, oid={oid})") - }) - } - - /// (oid, cid) for every encrypted blob in the repo that `caller` may decrypt. - pub async fn list_encrypted_blobs_for( - &self, - repo_id: &str, - caller: &str, - ) -> Result> { - let rows = - sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; + /// (oid, cid) for every encrypted blob in the repo, unscoped by caller. Used + /// by both the B2 replication view and B1 discovery. Recipient identities are + /// not stored, so authorization is the caller's repo readability, not a per + /// recipient check. Ciphertext metadata only. + pub async fn list_all_encrypted_blobs(&self, repo_id: &str) -> Result> { + let rows = sqlx::query("SELECT oid, cid FROM encrypted_blobs WHERE repo_id = $1") + .bind(repo_id) + .fetch_all(&self.pool) + .await?; let mut out = Vec::new(); for row in rows { let oid: String = row.get("oid"); let cid: String = row.get("cid"); - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; - if recipients.iter().any(|d| d == caller) { - out.push((oid, cid)); - } + out.push((oid, cid)); } Ok(out) } - /// (oid, cid, recipients) for every encrypted blob in the repo, unscoped by - /// caller. This is the replication view used by peer mirrors (Option B2), - /// distinct from the recipient-scoped `list_encrypted_blobs_for`. It returns - /// only ciphertext metadata; no plaintext or key material is involved. - pub async fn list_all_encrypted_blobs( - &self, - repo_id: &str, - ) -> Result)>> { - let rows = - sqlx::query("SELECT oid, cid, recipients FROM encrypted_blobs WHERE repo_id = $1") - .bind(repo_id) - .fetch_all(&self.pool) - .await?; - let mut out = Vec::new(); - for row in rows { - let oid: String = row.get("oid"); - let cid: String = row.get("cid"); - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, &oid, &recipients)?; - out.push((oid, cid, recipients)); - } - Ok(out) + /// The CID of one encrypted blob, or None if there is no such row. Recipient + /// authorization is not enforced here: the handler checks repo readability and + /// the envelope crypto gates decryption. + pub async fn encrypted_blob_cid(&self, repo_id: &str, oid: &str) -> Result> { + let row = sqlx::query("SELECT cid FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") + .bind(repo_id) + .bind(oid) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(|r| r.get("cid"))) } - /// The CID of one encrypted blob, only if `caller` is a recipient. - pub async fn encrypted_blob_cid( + /// The opaque recipients tag stored for an encrypted blob, or None if there is + /// no row. Used only to decide whether a re-seal is needed (the recipient set + /// changed); the tag is a node-keyed fingerprint, not the DID list. + pub async fn encrypted_blob_recipients_tag( &self, repo_id: &str, oid: &str, - caller: &str, ) -> Result> { let row = sqlx::query( - "SELECT cid, recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", + "SELECT recipients_tag FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2", ) .bind(repo_id) .bind(oid) .fetch_optional(&self.pool) .await?; - let Some(row) = row else { return Ok(None) }; - let recipients: String = row.get("recipients"); - let recipients = Self::parse_recipients(repo_id, oid, &recipients)?; - if recipients.iter().any(|d| d == caller) { - Ok(Some(row.get("cid"))) - } else { - Ok(None) - } - } - - /// The recipient DID list stored for an encrypted blob, or None if there is - /// no row. Used to decide whether a re-seal is needed (recipients changed). - pub async fn encrypted_blob_recipients( - &self, - repo_id: &str, - oid: &str, - ) -> Result>> { - let row = - sqlx::query("SELECT recipients FROM encrypted_blobs WHERE repo_id = $1 AND oid = $2") - .bind(repo_id) - .bind(oid) - .fetch_optional(&self.pool) - .await?; - match row { - None => Ok(None), - Some(r) => { - let recipients: String = r.get("recipients"); - Ok(Some(Self::parse_recipients(repo_id, oid, &recipients)?)) - } - } + Ok(row.map(|r| r.get("recipients_tag"))) } pub async fn list_pinned_cids(&self) -> Result> { diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 50797b5..9732b88 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -13,31 +13,53 @@ use gitlawb_core::encrypt::seal_blob; use crate::db::Db; +use hmac::{Hmac, Mac}; +use sha2::Sha256; + +type HmacSha256 = Hmac; + +/// Opaque, node-keyed fingerprint of a blob's recipient set. Stored in place of +/// the cleartext DID list so a DB compromise cannot reveal the reader set; used +/// only to detect a recipient-set change so an unchanged blob is not re-sealed. +/// Order-insensitive (the input `BTreeSet` is already sorted). +pub fn recipients_tag(node_seed: &[u8; 32], dids: &BTreeSet) -> String { + let mut mac = HmacSha256::new_from_slice(node_seed).expect("HMAC accepts any key length"); + mac.update(b"gitlawb/recipients-tag/v1"); + for did in dids { + mac.update(b"\n"); + mac.update(did.as_bytes()); + } + hex::encode(mac.finalize().into_bytes()) +} + /// Resolve a DID string to its Ed25519 verifying key, or None if it carries no /// inline key (e.g. did:web / did:gitlawb). fn did_to_key(did: &str) -> Option { Did::from_str(did).ok()?.to_verifying_key().ok() } -/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set. -/// Returns `(oid, cid, recipients)` for each blob actually sealed and recorded -/// this call (the per-push delta), used by Option B3 to anchor a manifest. +/// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set; +/// `node_seed` keys the opaque recipients tag. Returns `(oid, cid)` for each blob +/// actually sealed and recorded this call (the per-push delta), used by Option B3 +/// to anchor a manifest. Recipient identities are never stored or returned. pub async fn encrypt_and_pin( ipfs_api: &str, repo_path: &Path, db: &Db, repo_id: &str, + node_seed: &[u8; 32], recipients: &HashMap>, -) -> Vec<(String, String, Vec)> { +) -> Vec<(String, String)> { let mut sealed = Vec::new(); for (oid, dids) in recipients { // Skip only if an existing envelope already covers exactly these // recipients. If the recipient set changed (e.g. a reader was added to // the rule), re-seal so the new reader can recover the blob. Reader - // removal is not retroactive: the old envelope is already public. - if let Ok(Some(stored)) = db.encrypted_blob_recipients(repo_id, oid).await { - let stored: BTreeSet = stored.into_iter().collect(); - if &stored == dids { + // removal is not retroactive: the old envelope is already public. The + // comparison is on the opaque node-keyed tag, never the DID list. + let tag = recipients_tag(node_seed, dids); + if let Ok(Some(stored_tag)) = db.encrypted_blob_recipients_tag(repo_id, oid).await { + if stored_tag == tag { continue; } } @@ -61,15 +83,48 @@ pub async fn encrypt_and_pin( Ok(c) if !c.is_empty() => c, _ => continue, }; - let dids_vec: Vec = dids.iter().cloned().collect(); - if let Err(e) = db - .record_encrypted_blob(repo_id, oid, &cid, &dids_vec) - .await - { + if let Err(e) = db.record_encrypted_blob(repo_id, oid, &cid, &tag).await { tracing::warn!(oid = %oid, err = %e, "record_encrypted_blob failed"); continue; } - sealed.push((oid.clone(), cid.clone(), dids_vec)); + sealed.push((oid.clone(), cid.clone())); } sealed } + +#[cfg(test)] +mod tests { + use super::recipients_tag; + use std::collections::BTreeSet; + + fn set(dids: &[&str]) -> BTreeSet { + dids.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn tag_is_order_insensitive() { + let seed = [7u8; 32]; + let a = recipients_tag(&seed, &set(&["did:key:zA", "did:key:zB"])); + let b = recipients_tag(&seed, &set(&["did:key:zB", "did:key:zA"])); + assert_eq!(a, b); + } + + #[test] + fn tag_differs_for_different_sets() { + let seed = [7u8; 32]; + let a = recipients_tag(&seed, &set(&["did:key:zA"])); + let b = recipients_tag(&seed, &set(&["did:key:zA", "did:key:zB"])); + assert_ne!(a, b); + } + + #[test] + fn tag_is_keyed_by_node_seed() { + let dids = set(&["did:key:zA", "did:key:zB"]); + let a = recipients_tag(&[1u8; 32], &dids); + let b = recipients_tag(&[2u8; 32], &dids); + assert_ne!( + a, b, + "tag must depend on the node seed, not be a plain hash" + ); + } +} diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index 615ce22..58cfa4d 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -373,10 +373,7 @@ async fn replicate_encrypted_blobs( } let have: HashMap = match db.list_all_encrypted_blobs(repo_id).await { - Ok(rows) => rows - .into_iter() - .map(|(oid, cid, _recipients)| (oid, cid)) - .collect(), + Ok(rows) => rows.into_iter().collect(), Err(e) => { warn!(repo = %repo, err = %e, "failed to list local encrypted blobs for replication"); return; @@ -397,10 +394,7 @@ async fn replicate_encrypted_blobs( warn!(oid = %blob.oid, expected = %blob.cid, got = %cid, "replicated envelope CID mismatch; skipping record"); continue; } - if let Err(e) = db - .record_encrypted_blob(repo_id, &blob.oid, &cid, &[]) - .await - { + if let Err(e) = db.record_encrypted_blob(repo_id, &blob.oid, &cid, "").await { warn!(oid = %blob.oid, err = %e, "failed to record replicated encrypted blob"); } } From 54bcab480a5461199d83725d1153311512dc5dea Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 15:28:48 -0500 Subject: [PATCH 2/5] chore: remove redundant .gitignore entry (covered by local exclude) --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index a36d8f7..404c87b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,3 @@ keys/ # Logs *.log .openclaude-profile.json - -# Local planning / scratch docs (never commit) -docs/superpowers/ From 0f425933a64fa983c6445af0aa7bf0d91269f9d3 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 15:43:54 -0500 Subject: [PATCH 3/5] fix(node): gate /replicate by repo readability; harden reseal on DB error Address review on the at-rest blinding change: - The encrypted-blobs/replicate listing returned {oid, cid} with no visibility check, so a non-readable repo's blob index was reachable by an unauthenticated caller who guessed {owner}/{repo}. Gate it by the same repo-readability check discovery and fetch use. For the intended case (a public repo with withheld subtrees) the public root keeps this open to peers; only fully non-readable repos are withheld, which is the desired behavior. - encrypt_and_pin treated a recipients_tag DB read error as a cache miss and resealed, causing avoidable IPFS writes during a partial outage; skip and retry on the next push instead. - Correct the get_encrypted_blob doc comment to describe repo-readability access. --- crates/gitlawb-node/src/api/encrypted.rs | 21 ++++++++++++++++----- crates/gitlawb-node/src/encrypted_pin.rs | 10 ++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/crates/gitlawb-node/src/api/encrypted.rs b/crates/gitlawb-node/src/api/encrypted.rs index b7bda00..d9fa52a 100644 --- a/crates/gitlawb-node/src/api/encrypted.rs +++ b/crates/gitlawb-node/src/api/encrypted.rs @@ -38,7 +38,8 @@ pub async fn list_encrypted_blobs( } /// GET /api/v1/repos/{owner}/{repo}/encrypted-blob/{oid} -/// Returns raw envelope bytes if the caller is a recipient. +/// Returns raw envelope bytes to callers who can read the repo; the envelope +/// crypto still ensures only true recipients can decrypt. pub async fn get_encrypted_blob( State(state): State, auth: Option>, @@ -68,12 +69,16 @@ pub async fn get_encrypted_blob( /// GET /api/v1/repos/{owner}/{repo}/encrypted-blobs/replicate /// Returns [{oid, cid}] for every encrypted blob in the repo, for peer-mirror -/// replication (Option B2). Recipient identities are deliberately withheld: the -/// v2 envelopes no longer carry recipient public keys, so peers must not learn -/// the reader set either. A mirror detects a re-seal by the CID changing (the -/// OID is stable across re-seals). Ciphertext metadata only, never plaintext. +/// replication (Option B2). Gated by repo readability, like discovery, so a +/// non-readable repo does not expose its blob index; for the intended case (a +/// public repo with withheld subtrees) the public root keeps this open to peers. +/// Recipient identities are deliberately withheld: the v2 envelopes no longer +/// carry recipient public keys, so peers must not learn the reader set either. A +/// mirror detects a re-seal by the CID changing (the OID is stable across +/// re-seals). Ciphertext metadata only, never plaintext. pub async fn replicate_encrypted_blobs( State(state): State, + auth: Option>, Path((owner, repo)): Path<(String, String)>, ) -> Result> { let record = state @@ -81,6 +86,12 @@ pub async fn replicate_encrypted_blobs( .get_repo(&owner, &repo) .await? .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{repo}"))); + } let rows = state.db.list_all_encrypted_blobs(&record.id).await?; let blobs: Vec<_> = rows .into_iter() diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 9732b88..25439ee 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -58,8 +58,14 @@ pub async fn encrypt_and_pin( // removal is not retroactive: the old envelope is already public. The // comparison is on the opaque node-keyed tag, never the DID list. let tag = recipients_tag(node_seed, dids); - if let Ok(Some(stored_tag)) = db.encrypted_blob_recipients_tag(repo_id, oid).await { - if stored_tag == tag { + match db.encrypted_blob_recipients_tag(repo_id, oid).await { + Ok(Some(stored_tag)) if stored_tag == tag => continue, + Ok(_) => {} + Err(e) => { + // A DB read failure is not a cache miss: re-sealing here would do + // an avoidable IPFS write during a partial outage. Skip and retry + // on the next push. + tracing::warn!(oid = %oid, err = %e, "recipients_tag lookup failed; skipping reseal"); continue; } } From cd5cda4c5a699a9309f553007e6336066bf06dc3 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 12 Jun 2026 18:27:03 -0500 Subject: [PATCH 4/5] fix(node): close under-withholding via full ref scope and reachable-only pin set blob_paths walked only refs/heads/* and refs/tags/* and skipped silently on a failed git ls-tree, so a blob reachable only through another namespace, or a ref that failed to traverse, could fall out of the withheld set and ship in cleartext. Walk every ref and fail closed on traversal error. The pin enumerators (ipfs_pin, pinata) used git cat-file --batch-all-objects, which includes unreachable/dangling objects that have no path and cannot be classified for withholding. Switch them to git rev-list --objects --all so the pin set matches the reachable graph blob_paths evaluates. --- .../gitlawb-node/src/git/visibility_pack.rs | 76 +++++++++++++++-- crates/gitlawb-node/src/ipfs_pin.rs | 84 ++++++++++++++++--- crates/gitlawb-node/src/pinata.rs | 73 ++++++++++++++-- 3 files changed, 206 insertions(+), 27 deletions(-) diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index 90ca772..a670dfc 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -10,25 +10,32 @@ use anyhow::{Context, Result}; use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::Path; -/// List every (blob_oid, "/repo/relative/path") pair reachable from any branch -/// ref in `repo_path`. Uses `git ls-tree -r` per ref so each path a blob lives -/// at is represented (the same blob content can appear at several paths). Paths -/// are returned with a leading "/" to match the glob form used by visibility -/// rules ("/secret/**"). +/// List every (blob_oid, "/repo/relative/path") pair reachable from any ref in +/// `repo_path`. Walks every ref, not just `refs/heads/*` and `refs/tags/*`, so +/// the withheld set covers the same object graph the pack and pin paths expose; +/// a blob reachable only through another namespace (e.g. `refs/notes/*`) must not +/// escape withholding. Uses `git ls-tree -r` per ref so each path a blob lives +/// at is represented (the same blob content can appear at several paths). This is +/// why it is not `git rev-list --objects`, which reports only one path per object. +/// Paths carry a leading "/" to match the glob form used by visibility rules +/// ("/secret/**"). +/// +/// Fails closed: if a ref cannot be traversed, returns an error so the caller +/// aborts the serve/pin rather than producing a partial (under-withheld) set. fn blob_paths(repo_path: &Path) -> Result> { let refs = store::list_refs(repo_path).context("list_refs failed")?; let mut out = Vec::new(); for (refname, _oid) in refs { - if !refname.starts_with("refs/heads/") && !refname.starts_with("refs/tags/") { - continue; - } let listing = std::process::Command::new("git") .args(["ls-tree", "-r", &refname]) .current_dir(repo_path) .output() .context("git ls-tree -r failed")?; if !listing.status.success() { - continue; + anyhow::bail!( + "git ls-tree -r {refname} failed: {}", + String::from_utf8_lossy(&listing.stderr) + ); } for line in String::from_utf8_lossy(&listing.stdout).lines() { // " blob \t" @@ -295,4 +302,55 @@ mod tests { let env = seal_blob(&bytes, &[reader.verifying_key()]).unwrap(); assert_eq!(open_blob(&env, &reader).unwrap(), bytes); } + + #[test] + fn withholds_blob_reachable_only_via_nonstandard_ref() { + let (_td, bare, secret_oid, _public) = fixture(); + // Move the sole ref out of refs/heads/* into a custom namespace so the + // secret blob is reachable only through a ref the old heads/tags filter + // skipped. It must still be withheld. + let head_ref = { + let out = Command::new("git") + .args(["symbolic-ref", "HEAD"]) + .current_dir(&bare) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(&bare) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["update-ref", "refs/custom/snap", "HEAD"]); + run(&["update-ref", "-d", &head_ref]); + + let rules = [rule("/secret/**", &[])]; + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, None).unwrap(); + assert!( + withheld.contains(&secret_oid), + "blob reachable only via refs/custom/* must still be withheld" + ); + } + + #[test] + fn fails_closed_when_a_ref_cannot_be_traversed() { + let (_td, bare, secret, _public) = fixture(); + // Point a ref at a blob (a valid object that is not tree-ish). `ls-tree -r` + // fails on it; that must propagate as Err rather than silently dropping the + // ref and under-withholding. + std::fs::write(bare.join("refs/heads/blobref"), format!("{secret}\n")).unwrap(); + let rules = [rule("/secret/**", &[])]; + let result = withheld_blob_oids(&bare, &rules, true, OWNER, None); + assert!( + result.is_err(), + "a ref that cannot be traversed must fail closed (Err)" + ); + } } diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 9bdaade..89b500b 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -151,30 +151,94 @@ pub async fn pin_new_objects( pinned } -/// Run `git cat-file --batch-all-objects --batch-check='%(objectname)'` -/// to get all object SHA-256 hashes in the repository. +/// Names of every object reachable from any ref, via `git rev-list --objects --all`. +/// Reachable-only on purpose (not `cat-file --batch-all-objects`): an unreachable +/// or dangling object has no ref and no path, so visibility rules cannot classify +/// it for withholding, so it must not be pinned in cleartext. This keeps the pin set +/// aligned with what `blob_paths` can evaluate. fn list_all_objects(repo_path: &std::path::Path) -> Result> { let output = std::process::Command::new("git") - .args([ - "cat-file", - "--batch-all-objects", - "--batch-check=%(objectname)", - ]) + .args(["rev-list", "--objects", "--all"]) .current_dir(repo_path) .output() - .map_err(|e| anyhow::anyhow!("failed to run git cat-file: {e}"))?; + .map_err(|e| anyhow::anyhow!("failed to run git rev-list: {e}"))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - return Err(anyhow::anyhow!("git cat-file failed: {stderr}")); + return Err(anyhow::anyhow!("git rev-list failed: {stderr}")); } + // `rev-list --objects` lines are "" or " "; keep the oid. let stdout = String::from_utf8_lossy(&output.stdout); let hashes = stdout .lines() - .map(|l| l.trim().to_string()) + .filter_map(|l| l.split_whitespace().next().map(str::to_string)) .filter(|l| !l.is_empty()) .collect(); Ok(hashes) } + +#[cfg(test)] +mod tests { + use super::list_all_objects; + use std::process::Command; + use tempfile::TempDir; + + #[test] + fn list_all_objects_excludes_unreachable_blobs() { + let td = TempDir::new().unwrap(); + let work = td.path(); + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(work) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["init", "-q"]); + run(&["config", "user.email", "t@t"]); + run(&["config", "user.name", "t"]); + std::fs::write(work.join("a.txt"), b"reachable\n").unwrap(); + run(&["add", "."]); + run(&["commit", "-qm", "init"]); + + let reachable = String::from_utf8_lossy( + &Command::new("git") + .args(["rev-parse", "HEAD:a.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + // Write a loose blob that no ref reaches (dangling). + std::fs::write(work.join("dangling.txt"), b"DANGLING SECRET\n").unwrap(); + let dangling = String::from_utf8_lossy( + &Command::new("git") + .args(["hash-object", "-w", "dangling.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + let objs = list_all_objects(work).unwrap(); + assert!( + objs.contains(&reachable), + "the committed (reachable) blob must be listed" + ); + assert!( + !objs.contains(&dangling), + "an unreachable/dangling blob must NOT be listed" + ); + } +} diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index 90bddad..1021d77 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -134,25 +134,26 @@ pub async fn pin_new_objects( pinned } +/// Names of every object reachable from any ref, via `git rev-list --objects --all`. +/// Reachable-only on purpose (not `cat-file --batch-all-objects`): an unreachable +/// or dangling object has no path, cannot be classified for withholding, and must +/// not be pinned in cleartext. fn list_all_objects(repo_path: &std::path::Path) -> Result> { let out = std::process::Command::new("git") - .args([ - "cat-file", - "--batch-all-objects", - "--batch-check=%(objectname)", - ]) + .args(["rev-list", "--objects", "--all"]) .current_dir(repo_path) .output() - .map_err(|e| anyhow::anyhow!("failed to run git cat-file: {e}"))?; + .map_err(|e| anyhow::anyhow!("failed to run git rev-list: {e}"))?; if !out.status.success() { let stderr = String::from_utf8_lossy(&out.stderr); - return Err(anyhow::anyhow!("git cat-file failed: {stderr}")); + return Err(anyhow::anyhow!("git rev-list failed: {stderr}")); } + // `rev-list --objects` lines are "" or " "; keep the oid. Ok(String::from_utf8_lossy(&out.stdout) .lines() - .map(|l| l.trim().to_string()) + .filter_map(|l| l.split_whitespace().next().map(str::to_string)) .filter(|l| !l.is_empty()) .collect()) } @@ -163,6 +164,62 @@ fn list_all_objects(repo_path: &std::path::Path) -> Result> { mod tests { use super::*; + #[test] + fn list_all_objects_excludes_unreachable_blobs() { + use std::process::Command; + use tempfile::TempDir; + + let td = TempDir::new().unwrap(); + let work = td.path(); + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(work) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["init", "-q"]); + run(&["config", "user.email", "t@t"]); + run(&["config", "user.name", "t"]); + std::fs::write(work.join("a.txt"), b"reachable\n").unwrap(); + run(&["add", "."]); + run(&["commit", "-qm", "init"]); + + let reachable = String::from_utf8_lossy( + &Command::new("git") + .args(["rev-parse", "HEAD:a.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + std::fs::write(work.join("dangling.txt"), b"DANGLING SECRET\n").unwrap(); + let dangling = String::from_utf8_lossy( + &Command::new("git") + .args(["hash-object", "-w", "dangling.txt"]) + .current_dir(work) + .output() + .unwrap() + .stdout, + ) + .trim() + .to_string(); + + let objs = list_all_objects(work).unwrap(); + assert!(objs.contains(&reachable), "reachable blob must be listed"); + assert!( + !objs.contains(&dangling), + "unreachable/dangling blob must NOT be listed" + ); + } + #[tokio::test] async fn test_pin_skipped_when_jwt_empty() { let client = reqwest::Client::new(); From 48c45bc32d7b75e7daaa384028b25a54759df7ff Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Fri, 19 Jun 2026 11:31:27 -0500 Subject: [PATCH 5/5] fix(node): fail closed on partial recipient sets and unknown mirror state Two correctness fixes flagged in review on this branch. encrypted_pin: encrypt_and_pin built its recipient key list with filter_map, silently dropping any DID that does not resolve to an inline key (did:web / did:gitlawb) and sealing to the remaining subset. Because recipients_tag is computed over the full DID set, the blob would never be re-sealed once the dropped DID later resolved, permanently locking that authorized reader out. resolve_recipient_keys now returns None if any DID is unresolvable, and the blob is skipped (and retried next push) rather than sealed to a partial set. sync: fetch_withheld returns None on any 404/5xx/network/parse error, and classify_mirror collapsed None into MirrorMode::Plain. On an existing promisor mirror that downgrade strips the partial-clone config and --refetches a still-withheld repo, which fails, so a transient withheld-paths outage broke syncs until the endpoint recovered. decide_mode now preserves an existing promisor mirror's mode when the lookup is unknown; fresh-clone fail-closed behavior is unchanged. --- crates/gitlawb-node/src/encrypted_pin.rs | 51 ++++++++++++++++++--- crates/gitlawb-node/src/sync.rs | 56 +++++++++++++++++------- 2 files changed, 86 insertions(+), 21 deletions(-) diff --git a/crates/gitlawb-node/src/encrypted_pin.rs b/crates/gitlawb-node/src/encrypted_pin.rs index 25439ee..00b7048 100644 --- a/crates/gitlawb-node/src/encrypted_pin.rs +++ b/crates/gitlawb-node/src/encrypted_pin.rs @@ -38,6 +38,17 @@ fn did_to_key(did: &str) -> Option { Did::from_str(did).ok()?.to_verifying_key().ok() } +/// Resolve every recipient DID to its verifying key. Returns `None` if *any* DID +/// fails to resolve locally (e.g. a did:web / did:gitlawb reader). +/// +/// Fail closed rather than sealing to the resolvable subset: a partial seal would +/// silently exclude an authorized reader, and because `recipients_tag` is computed +/// over the full DID set the blob would never be re-sealed once that DID later +/// resolves — locking the reader out permanently. +fn resolve_recipient_keys(dids: &BTreeSet) -> Option> { + dids.iter().map(|d| did_to_key(d)).collect() +} + /// Encrypt and pin every withheld blob. `recipients` maps blob oid -> DID set; /// `node_seed` keys the opaque recipients tag. Returns `(oid, cid)` for each blob /// actually sealed and recorded this call (the per-push delta), used by Option B3 @@ -69,11 +80,19 @@ pub async fn encrypt_and_pin( continue; } } - let keys: Vec = dids.iter().filter_map(|d| did_to_key(d)).collect(); - if keys.is_empty() { - tracing::warn!(oid = %oid, "no resolvable recipient keys; skipping encrypted pin"); - continue; - } + let keys = match resolve_recipient_keys(dids) { + Some(keys) if !keys.is_empty() => keys, + Some(_) => { + tracing::warn!(oid = %oid, "empty recipient set; skipping encrypted pin"); + continue; + } + None => { + // At least one recipient DID is unresolvable; sealing now would + // exclude it permanently. Skip and retry on the next push. + tracing::warn!(oid = %oid, "unresolvable recipient DID; skipping encrypted pin rather than sealing to a partial set"); + continue; + } + }; let data = match crate::git::store::read_object(repo_path, oid) { Ok(Some((_t, bytes))) => bytes, _ => continue, @@ -100,13 +119,33 @@ pub async fn encrypt_and_pin( #[cfg(test)] mod tests { - use super::recipients_tag; + use super::{recipients_tag, resolve_recipient_keys}; + use gitlawb_core::did::Did; + use gitlawb_core::identity::Keypair; use std::collections::BTreeSet; fn set(dids: &[&str]) -> BTreeSet { dids.iter().map(|s| s.to_string()).collect() } + #[test] + fn resolve_returns_all_keys_when_every_did_resolves() { + let a = Keypair::generate().did().to_string(); + let b = Keypair::generate().did().to_string(); + let keys = resolve_recipient_keys(&set(&[&a, &b])); + assert_eq!(keys.map(|k| k.len()), Some(2)); + } + + #[test] + fn resolve_returns_none_when_any_did_is_unresolvable() { + let a = Keypair::generate().did().to_string(); + let web = Did::web("example.com").to_string(); + assert!( + resolve_recipient_keys(&set(&[&a, &web])).is_none(), + "an unresolvable did:web recipient must abort sealing, not seal to a partial set" + ); + } + #[test] fn tag_is_order_insensitive() { let seed = [7u8; 32]; diff --git a/crates/gitlawb-node/src/sync.rs b/crates/gitlawb-node/src/sync.rs index 58cfa4d..984f7db 100644 --- a/crates/gitlawb-node/src/sync.rs +++ b/crates/gitlawb-node/src/sync.rs @@ -32,19 +32,27 @@ enum MirrorMode { Promisor, } -/// Decide the mirror mode from the origin's `withheld-paths` response. +/// Decide the mirror mode from the origin's `withheld-paths` response and the +/// mirror's current state. /// /// `Some(non-empty)` → the repo has a private subtree → `Promisor`. /// `Some(empty)` → fully public → `Plain`. -/// `None` → the lookup 404'd or failed. Attempt a `Plain` mirror; a -/// mode-A repo also 404s the git read endpoint, so the clone -/// fails and nothing is mirrored (fail-closed at the git -/// layer), while a public repo on a peer that predates the -/// `withheld-paths` route still gets mirrored. -fn classify_mirror(withheld: Option>) -> MirrorMode { +/// `None` → the lookup 404'd, failed, or didn't parse, i.e. the state +/// is *unknown*. Preserve the mirror's existing mode rather +/// than downgrading: an existing promisor mirror stays +/// `Promisor`, so a transient `withheld-paths` outage cannot +/// strip its partial-clone config and break fetches of a +/// still-withheld repo. With no existing promisor state (a +/// fresh clone) attempt `Plain`; a mode-A repo then 404s the +/// git read endpoint and the clone fails (fail-closed at the +/// git layer), while a public repo on a peer that predates +/// the `withheld-paths` route still gets mirrored. +fn decide_mode(withheld: Option>, existing_promisor: bool) -> MirrorMode { match withheld { Some(globs) if !globs.is_empty() => MirrorMode::Promisor, - _ => MirrorMode::Plain, + Some(_) => MirrorMode::Plain, + None if existing_promisor => MirrorMode::Promisor, + None => MirrorMode::Plain, } } @@ -186,7 +194,15 @@ async fn process_batch( let remote_url = format!("{}/{}", origin_url, item.repo); let withheld = fetch_withheld(client, &origin_url, owner_short, repo_name).await; - let mode = classify_mirror(withheld); + // When the lookup is unknown (None), preserve an existing promisor mirror + // so a transient withheld-paths outage doesn't strip its partial-clone + // config and break fetches of a still-withheld repo. + let existing_promisor = local_path.exists() + && git_config_get(local_path.to_str().unwrap_or("."), "remote.origin.promisor") + .await + .as_deref() + == Some("true"); + let mode = decide_mode(withheld, existing_promisor); let result = if local_path.exists() { fetch_repo(&local_path, &remote_url, mode).await @@ -533,24 +549,34 @@ mod tests { #[test] fn classify_promisor_when_withheld_nonempty() { - let mode = classify_mirror(Some(vec!["/secret/**".to_string()])); + let mode = decide_mode(Some(vec!["/secret/**".to_string()]), false); assert!(matches!(mode, MirrorMode::Promisor)); } #[test] fn classify_plain_when_withheld_empty() { - let mode = classify_mirror(Some(vec![])); + let mode = decide_mode(Some(vec![]), false); assert!(matches!(mode, MirrorMode::Plain)); } #[test] - fn classify_plain_when_lookup_failed() { - // None == 404 / network error / parse failure: attempt a plain mirror - // and let the git read endpoint fail-close a mode-A repo. - let mode = classify_mirror(None); + fn classify_plain_when_lookup_failed_on_fresh_clone() { + // None == 404 / network error / parse failure with no existing mirror: + // attempt a plain mirror and let the git read endpoint fail-close a + // mode-A repo. + let mode = decide_mode(None, false); assert!(matches!(mode, MirrorMode::Plain)); } + #[test] + fn classify_preserves_promisor_when_lookup_failed_on_existing_mirror() { + // A transient withheld-paths outage (None) on a repo that is already a + // promisor mirror must NOT downgrade to Plain: doing so would strip the + // partial-clone config and break the fetch of a still-withheld repo. + let mode = decide_mode(None, true); + assert!(matches!(mode, MirrorMode::Promisor)); + } + fn rb(oid: &str, cid: &str) -> ReplicaBlob { ReplicaBlob { oid: oid.to_string(),