diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 88e5c7c..2886926 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -632,14 +632,68 @@ pub async fn git_receive_pack( } } - // Pin new git objects to the local IPFS node (no-op if ipfs_api is empty) - { + // Replication enforcement (Phase 2): decide once per push whether the public + // may read this repo at all and, if so, which blob OIDs must not leave the + // node. `withheld == None` means replicate nothing (private / mode A / + // undetermined): skip every pin so even commit and tree objects (which + // withheld_blob_oids never lists) stay local. `announce` gates the + // network-facing announcements. Fail closed: a private or undetermined repo + // never leaks. + let rules_opt = state.db.list_visibility_rules(&record.id).await.ok(); + let announce = match &rules_opt { + Some(rules) => { + visibility_check(rules, record.is_public, &record.owner_did, None, "/") + == Decision::Allow + } + None => false, + }; + let withheld: Option> = if !announce { + None + } else { + match &rules_opt { + Some(rules) if rules.is_empty() => Some(std::collections::HashSet::new()), + // withheld_blob_oids walks every ref with blocking `git ls-tree`; + // keep that off the async worker thread. + Some(rules) => { + let path = disk_path.clone(); + let rules = rules.clone(); + let owner_did = record.owner_did.clone(); + let is_public = record.is_public; + tokio::task::spawn_blocking(move || { + crate::git::visibility_pack::withheld_blob_oids( + &path, &rules, is_public, &owner_did, None, + ) + }) + .await + .map_err(|e| { + tracing::warn!(err = %e, "withheld_blob_oids task panicked; skipping replication for this push") + }) + .ok() + .and_then(|r| { + r.map_err(|e| { + tracing::warn!(err = %e, "withheld_blob_oids failed; skipping replication for this push") + }) + .ok() + }) + } + None => None, + } + }; + + // Pin new git objects to the local IPFS node (no-op if ipfs_api is empty). + // Skipped entirely when the public cannot read the repo (withheld == None). + if let Some(withheld_ipfs) = withheld.clone() { let ipfs_api = state.config.ipfs_api.clone(); let repo_path_clone = disk_path.clone(); let db_clone = state.db.clone(); tokio::spawn(async move { - let pinned = - crate::ipfs_pin::pin_new_objects(&ipfs_api, &repo_path_clone, &db_clone).await; + let pinned = crate::ipfs_pin::pin_new_objects( + &ipfs_api, + &repo_path_clone, + &db_clone, + &withheld_ipfs, + ) + .await; if !pinned.is_empty() { tracing::info!(count = pinned.len(), "pinned git objects to IPFS"); for (sha, cid) in &pinned { @@ -678,15 +732,22 @@ pub async fn git_receive_pack( let owner_did_for_arweave = record.owner_did.clone(); let self_public_url = state.config.public_url.clone(); let node_keypair = Arc::clone(&state.node_keypair); + let withheld_pinata = withheld; tokio::spawn(async move { - let pinned = crate::pinata::pin_new_objects( - &http_client, - &pinata_upload_url, - &pinata_jwt, - &repo_path_clone, - &db_clone, - ) - .await; + let pinned = match &withheld_pinata { + Some(withheld) => { + crate::pinata::pin_new_objects( + &http_client, + &pinata_upload_url, + &pinata_jwt, + &repo_path_clone, + &db_clone, + withheld, + ) + .await + } + None => Vec::new(), + }; if !pinned.is_empty() { tracing::info!(count = pinned.len(), "pinned git objects to Pinata"); @@ -705,77 +766,82 @@ pub async fn git_receive_pack( .await; } - if let Some(p2p) = &p2p_handle { - p2p.publish_ref_update(crate::p2p::RefUpdateEvent { - node_did: node_did_str.clone(), - pusher_did: pusher_did_clone.clone(), - repo: repo_slug.clone(), - ref_name: ref_name.clone(), - old_sha: "".to_string(), - new_sha: new_sha.clone(), - timestamp: chrono::Utc::now().to_rfc3339(), - cert_id: None, - cid: cid.map(|s| s.to_string()), - }) - .await; + if announce { + if let Some(p2p) = &p2p_handle { + p2p.publish_ref_update(crate::p2p::RefUpdateEvent { + node_did: node_did_str.clone(), + pusher_did: pusher_did_clone.clone(), + repo: repo_slug.clone(), + ref_name: ref_name.clone(), + old_sha: "".to_string(), + new_sha: new_sha.clone(), + timestamp: chrono::Utc::now().to_rfc3339(), + cert_id: None, + cid: cid.map(|s| s.to_string()), + }) + .await; + } } } // HTTP peer notification — notify all known peers to pull from us. // This is the reliable fallback when Gossipsub p2p is not yet connected. - if let Ok(peers) = db_for_peers.list_peers().await { - for peer in peers { - if peer.http_url.is_empty() { - continue; - } - let peer_url = peer.http_url.trim_end_matches('/'); - if let Some(self_url) = self_public_url.as_deref() { - if peer_url == self_url.trim_end_matches('/') { - continue; - } - } - let path = "/api/v1/sync/notify"; - let notify_url = format!("{peer_url}{path}"); - let body = serde_json::json!({ - "repo": repo_slug.clone(), - "ref_name": ref_updates_clone.first().map(|(r, _)| r).unwrap_or(&String::new()), - "new_sha": ref_updates_clone.first().map(|(_, s)| s).unwrap_or(&String::new()), - "node_did": node_did_str.clone(), - "pusher_did": pusher_did_clone.clone(), - "old_sha": "0000000000000000000000000000000000000000", - "timestamp": chrono::Utc::now().to_rfc3339(), - }); - let body_bytes = match serde_json::to_vec(&body) { - Ok(bytes) => bytes, - Err(e) => { - tracing::warn!(peer = %peer.did, err = %e, "failed to serialize peer sync notify"); + // Suppressed for repos the public cannot read. + if announce { + if let Ok(peers) = db_for_peers.list_peers().await { + for peer in peers { + if peer.http_url.is_empty() { continue; } - }; - let signed = gitlawb_core::http_sig::sign_request( - node_keypair.as_ref(), - "POST", - path, - &body_bytes, - ); - match http_client - .post(¬ify_url) - .header("Content-Type", "application/json") - .header("Content-Digest", signed.content_digest) - .header("Signature-Input", signed.signature_input) - .header("Signature", signed.signature) - .body(body_bytes) - .send() - .await - { - Ok(r) if r.status().is_success() => { - tracing::info!(peer = %peer.did, repo = %repo_slug, "notified peer to sync") - } - Ok(r) => { - tracing::warn!(peer = %peer.did, status = %r.status(), "peer sync notify returned error") + let peer_url = peer.http_url.trim_end_matches('/'); + if let Some(self_url) = self_public_url.as_deref() { + if peer_url == self_url.trim_end_matches('/') { + continue; + } } - Err(e) => { - tracing::warn!(peer = %peer.did, err = %e, "failed to notify peer") + let path = "/api/v1/sync/notify"; + let notify_url = format!("{peer_url}{path}"); + let body = serde_json::json!({ + "repo": repo_slug.clone(), + "ref_name": ref_updates_clone.first().map(|(r, _)| r).unwrap_or(&String::new()), + "new_sha": ref_updates_clone.first().map(|(_, s)| s).unwrap_or(&String::new()), + "node_did": node_did_str.clone(), + "pusher_did": pusher_did_clone.clone(), + "old_sha": "0000000000000000000000000000000000000000", + "timestamp": chrono::Utc::now().to_rfc3339(), + }); + let body_bytes = match serde_json::to_vec(&body) { + Ok(bytes) => bytes, + Err(e) => { + tracing::warn!(peer = %peer.did, err = %e, "failed to serialize peer sync notify"); + continue; + } + }; + let signed = gitlawb_core::http_sig::sign_request( + node_keypair.as_ref(), + "POST", + path, + &body_bytes, + ); + match http_client + .post(¬ify_url) + .header("Content-Type", "application/json") + .header("Content-Digest", signed.content_digest) + .header("Signature-Input", signed.signature_input) + .header("Signature", signed.signature) + .body(body_bytes) + .send() + .await + { + Ok(r) if r.status().is_success() => { + tracing::info!(peer = %peer.did, repo = %repo_slug, "notified peer to sync") + } + Ok(r) => { + tracing::warn!(peer = %peer.did, status = %r.status(), "peer sync notify returned error") + } + Err(e) => { + tracing::warn!(peer = %peer.did, err = %e, "failed to notify peer") + } } } } @@ -799,8 +865,9 @@ pub async fn git_receive_pack( timestamp: now_ts.clone(), }); - // Arweave permanent anchoring — fire for each ref update - if !irys_url.is_empty() { + // Arweave permanent anchoring — fire for each ref update. + // Suppressed for repos the public cannot read (public permanent ledger). + if announce && !irys_url.is_empty() { for (ref_name, new_sha) in &ref_updates_clone { let cid = cid_map.get(new_sha).cloned(); let anchor = crate::arweave::RefAnchor { diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index d386415..c9c6d6b 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -77,6 +77,16 @@ pub fn withheld_blob_oids( Ok(denied.difference(&allowed).cloned().collect()) } +/// Objects that may replicate to the public: everything not in `withheld`. +/// Order-preserving. The single seam every replication site (IPFS, Pinata) +/// passes its object list through; option B would later reroute the withheld +/// ones through encrypt-then-pin instead of dropping them. +pub fn replicable_objects(all: Vec, withheld: &HashSet) -> Vec { + all.into_iter() + .filter(|oid| !withheld.contains(oid)) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -146,6 +156,24 @@ mod tests { (td, bare, secret, public) } + #[test] + fn anonymous_caller_withholds_only_private_blob() { + let (_td, bare, secret_oid, public_oid) = fixture(); + let rules = [rule("/secret/**", &[])]; + // caller = None models the public / any peer: what must not replicate. + let withheld = withheld_blob_oids(&bare, &rules, true, OWNER, None).unwrap(); + assert!( + withheld.contains(&secret_oid), + "secret blob must be withheld" + ); + assert!( + !withheld.contains(&public_oid), + "public blob must replicate" + ); + // Trees and commits are never withheld; the set holds only the secret blob. + assert_eq!(withheld.len(), 1, "only the secret blob OID is withheld"); + } + #[test] fn non_reader_withholds_only_the_private_blob() { let (_td, bare, secret, public) = fixture(); @@ -186,4 +214,20 @@ mod tests { "public repo, no rules, nothing withheld" ); } + + #[test] + fn replicable_objects_drops_withheld_keeps_rest() { + let all = vec!["aaa".to_string(), "bbb".to_string(), "ccc".to_string()]; + let withheld: HashSet = ["bbb".to_string()].into_iter().collect(); + let got = replicable_objects(all, &withheld); + assert_eq!(got, vec!["aaa".to_string(), "ccc".to_string()]); + } + + #[test] + fn replicable_objects_empty_withheld_keeps_all() { + let all = vec!["aaa".to_string(), "bbb".to_string()]; + let withheld: HashSet = HashSet::new(); + let got = replicable_objects(all.clone(), &withheld); + assert_eq!(got, all); + } } diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 831f1ad..96d6abd 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -7,6 +7,8 @@ //! If `ipfs_api` is empty the functions are no-ops, so the node works fine //! without a local IPFS daemon. +use std::collections::HashSet; + use anyhow::Result; use gitlawb_core::cid::Cid; @@ -78,6 +80,7 @@ pub async fn pin_new_objects( ipfs_api: &str, repo_path: &std::path::Path, db: &crate::db::Db, + withheld: &HashSet, ) -> Vec<(String, String)> { if ipfs_api.is_empty() { return vec![]; @@ -92,6 +95,8 @@ pub async fn pin_new_objects( } }; + let object_list = crate::git::visibility_pack::replicable_objects(object_list, withheld); + let mut pinned = Vec::new(); for sha in object_list { diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index ee9d416..90bddad 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -7,6 +7,7 @@ //! no-op, so nodes without Pinata backing work fine. use anyhow::Result; +use std::collections::HashSet; /// Pin a single git object's raw bytes on Pinata (v3 API). /// @@ -76,6 +77,7 @@ pub async fn pin_new_objects( jwt: &str, repo_path: &std::path::Path, db: &crate::db::Db, + withheld: &HashSet, ) -> Vec<(String, String)> { if jwt.is_empty() { return vec![]; @@ -92,6 +94,7 @@ pub async fn pin_new_objects( return vec![]; } }; + let object_list = crate::git::visibility_pack::replicable_objects(object_list, withheld); let mut pinned = Vec::new(); diff --git a/crates/gitlawb-node/src/visibility.rs b/crates/gitlawb-node/src/visibility.rs index 345f41d..afe6a7c 100644 --- a/crates/gitlawb-node/src/visibility.rs +++ b/crates/gitlawb-node/src/visibility.rs @@ -351,4 +351,24 @@ mod tests { Decision::Allow ); } + + // Mirrors the gossip-announce gate in git_receive_pack: announce iff an + // anonymous caller can read "/". + #[test] + fn announce_gate_matches_public_readability() { + let announce = |rules: &[VisibilityRule], is_public: bool| { + visibility_check(rules, is_public, OWNER, None, "/") == Decision::Allow + }; + // Public repo, no rules → announce. + assert!(announce(&[], true)); + // Legacy private repo (is_public false, no rules) → silent. + assert!(!announce(&[], false)); + // Mode A whole-repo rule with no public readers → silent. + assert!(!announce(&[rule("/", VisibilityMode::A, &[])], true)); + // Mode B public repo with a private subtree → still announce. + assert!(announce( + &[rule("/secret/**", VisibilityMode::B, &[])], + true + )); + } }