From 631117fac621cb19184bf9ce5e7a05e1bca33cae Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 13 Jun 2026 11:35:40 -0500 Subject: [PATCH 1/3] fix(node): enforce path-scoped visibility on the REST read API The repo-scoped REST read endpoints (blob, tree, commits, refs, changelog, pulls, pr diff, repo metadata) read straight from disk after a bare repo lookup, with no visibility check, so private repositories and private subtrees of public repositories were served to unauthenticated callers, bypassing the withholding the git and replication paths enforce. Add a shared authorize_repo_read gate (resolve repo, load rules, visibility_check, 404 on deny) and call it from every repo-scoped read handler. get_blob gates at the requested file path; get_pr_diff withholds the whole diff when it touches a path the caller cannot read. Attach optional_signature to the read routes so authorized readers authenticate. Listings stay gated at the repo root, consistent with mode B exposing tree and commit SHAs over git. Closes #51 --- crates/gitlawb-node/src/api/changelog.rs | 12 ++-- crates/gitlawb-node/src/api/mod.rs | 34 +++++++++++ crates/gitlawb-node/src/api/pulls.rs | 46 ++++++++++----- crates/gitlawb-node/src/api/repos.rs | 55 ++++++++---------- crates/gitlawb-node/src/git/store.rs | 74 ++++++++++++++++++++++++ crates/gitlawb-node/src/server.rs | 3 +- 6 files changed, 172 insertions(+), 52 deletions(-) diff --git a/crates/gitlawb-node/src/api/changelog.rs b/crates/gitlawb-node/src/api/changelog.rs index b94e40c..b8a69a7 100644 --- a/crates/gitlawb-node/src/api/changelog.rs +++ b/crates/gitlawb-node/src/api/changelog.rs @@ -1,10 +1,11 @@ //! Changelog endpoint — unified timeline of commits, merged PRs, and closed issues. -use axum::extract::{Path, Query, State}; +use axum::extract::{Extension, Path, Query, State}; use axum::Json; use serde::Deserialize; use crate::error::{AppError, Result}; +use crate::auth::AuthenticatedDid; use crate::git::store; use crate::state::AppState; @@ -27,12 +28,11 @@ pub async fn get_changelog( State(state): State, Path((owner, repo)): Path<(String, String)>, Query(query): Query, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &repo) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &repo, caller, "/").await?; let limit = query.limit.min(100); diff --git a/crates/gitlawb-node/src/api/mod.rs b/crates/gitlawb-node/src/api/mod.rs index 2595c48..13286c7 100644 --- a/crates/gitlawb-node/src/api/mod.rs +++ b/crates/gitlawb-node/src/api/mod.rs @@ -1,3 +1,8 @@ +use crate::db::{RepoRecord, VisibilityRule}; +use crate::error::{AppError, Result}; +use crate::state::AppState; +use crate::visibility::{visibility_check, Decision}; + pub mod agents; pub mod arweave; pub mod bounties; @@ -19,3 +24,32 @@ pub mod stars; pub mod tasks; pub mod visibility; pub mod webhooks; + +/// Resolve a repo for a read request and enforce path-scoped visibility. +/// +/// Returns 404 (`RepoNotFound`) if the repo does not exist or the caller may not +/// read `path`, using the same opaque response the git serve path returns so +/// existence is not confirmed. Returns the record and its visibility rules so a +/// content handler can apply an extra per-path check without a second DB query. +/// +/// Callers pass `"/"` for repo-level reads (listings); content endpoints pass the +/// specific path so a withheld subtree is denied even on an otherwise-public repo. +pub(crate) async fn authorize_repo_read( + state: &AppState, + owner: &str, + name: &str, + caller: Option<&str>, + path: &str, +) -> Result<(RepoRecord, Vec)> { + let record = state + .db + .get_repo(owner, name) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, path) == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{name}"))); + } + Ok((record, rules)) +} diff --git a/crates/gitlawb-node/src/api/pulls.rs b/crates/gitlawb-node/src/api/pulls.rs index 419a932..510c446 100644 --- a/crates/gitlawb-node/src/api/pulls.rs +++ b/crates/gitlawb-node/src/api/pulls.rs @@ -101,12 +101,11 @@ pub async fn create_pr( pub async fn list_prs( State(state): State, Path((owner, name)): Path<(String, String)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let prs = state.db.list_prs(&record.id).await?; Ok(Json( @@ -118,12 +117,11 @@ pub async fn list_prs( pub async fn get_pr( State(state): State, Path((owner, name, number)): Path<(String, String, i64)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let pr = state .db @@ -138,12 +136,11 @@ pub async fn get_pr( pub async fn get_pr_diff( State(state): State, Path((owner, name, number)): Path<(String, String, i64)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let pr = state .db @@ -156,6 +153,25 @@ pub async fn get_pr_diff( .acquire(&record.owner_did, &record.name) .await .map_err(|e| AppError::Git(e.to_string()))?; + + // Withhold the entire diff if it touches a path the caller cannot read, so a + // PR diff cannot leak private-subtree content of an otherwise-public repo. + let touched = store::branch_diff_names(&disk_path, &pr.target_branch, &pr.source_branch) + .map_err(|e| AppError::Git(e.to_string()))?; + for p in &touched { + let gate = format!("/{}", p.trim_start_matches('/')); + if crate::visibility::visibility_check( + &rules, + record.is_public, + &record.owner_did, + caller, + &gate, + ) == crate::visibility::Decision::Deny + { + return Err(AppError::NotFound(format!("PR #{number} not found"))); + } + } + let diff = store::branch_diff(&disk_path, &pr.target_branch, &pr.source_branch) .map_err(|e| AppError::Git(e.to_string()))?; diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 2886926..0e26398 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -186,12 +186,11 @@ pub async fn list_repos( pub async fn get_repo( State(state): State, Path((owner, name)): Path<(String, String)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let count = state.db.count_stars(&record.id).await.unwrap_or(0); Ok(Json(to_response(&record, &state, count))) } @@ -200,12 +199,11 @@ pub async fn get_repo( pub async fn list_commits( State(state): State, Path((owner, name)): Path<(String, String)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let disk_path = state .repo_store @@ -222,6 +220,7 @@ pub async fn list_commits( pub async fn get_blob( State(state): State, Path((owner, name, file_path)): Path<(String, String, String)>, + auth: Option>, ) -> Result { use axum::http::header; use axum::response::IntoResponse; @@ -238,11 +237,10 @@ pub async fn get_blob( return Err(AppError::BadRequest("invalid file path".into())); } - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let gate_path = format!("/{file_path}"); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, &gate_path).await?; let disk_path = state .repo_store @@ -283,12 +281,11 @@ pub async fn get_blob( pub async fn get_tree_root( State(state): State, Path((owner, name)): Path<(String, String)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let disk_path = state .repo_store @@ -305,12 +302,11 @@ pub async fn get_tree_root( pub async fn get_tree( State(state): State, Path((owner, name, tree_path)): Path<(String, String, String)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let disk_path = state .repo_store @@ -916,12 +912,11 @@ pub async fn git_receive_pack( pub async fn list_refs( State(state): State, Path((owner, repo)): Path<(String, String)>, + auth: Option>, ) -> Result> { - let _record = state - .db - .get_repo(&owner, &repo) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (_record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &repo, caller, "/").await?; let repo_slug = format!("{owner}/{repo}"); let refs = state.db.list_branch_cids(&repo_slug).await?; diff --git a/crates/gitlawb-node/src/git/store.rs b/crates/gitlawb-node/src/git/store.rs index 4d8c8eb..9ddaff7 100644 --- a/crates/gitlawb-node/src/git/store.rs +++ b/crates/gitlawb-node/src/git/store.rs @@ -314,6 +314,30 @@ pub fn branch_diff(repo_path: &Path, target_branch: &str, source_branch: &str) - Ok(String::from_utf8_lossy(&output.stdout).to_string()) } +/// The repo-relative paths changed by `git diff target...source` (the same range +/// as `branch_diff`). Used to enforce per-path visibility on a PR diff: if the +/// caller cannot read one of these paths, the diff is withheld. +pub fn branch_diff_names( + repo_path: &Path, + target_branch: &str, + source_branch: &str, +) -> Result> { + let output = Command::new("git") + .args([ + "diff", + "--name-only", + &format!("{target_branch}...{source_branch}"), + ]) + .current_dir(repo_path) + .output() + .context("failed to run git diff --name-only")?; + Ok(String::from_utf8_lossy(&output.stdout) + .lines() + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect()) +} + /// Merge source_branch into target_branch in a bare repo using a temporary worktree. /// Returns the new merge commit hash. pub fn merge_branch( @@ -399,3 +423,53 @@ pub fn repo_disk_path(repos_dir: &Path, owner_did: &str, repo_name: &str) -> Pat let owner_slug = owner_did.replace([':', '/'], "_"); repos_dir.join(owner_slug).join(format!("{repo_name}.git")) } + +#[cfg(test)] +mod tests { + use super::branch_diff_names; + use std::path::Path; + use std::process::Command; + + #[test] + fn branch_diff_names_lists_changed_paths() { + let td = tempfile::TempDir::new().unwrap(); + let work: &Path = td.path(); + let g = |args: &[&str]| { + assert!(Command::new("git") + .args(args) + .current_dir(work) + .status() + .unwrap() + .success()); + }; + g(&["init", "-q"]); + g(&["config", "user.email", "t@t"]); + g(&["config", "user.name", "t"]); + std::fs::write(work.join("base.txt"), b"base\n").unwrap(); + g(&["add", "."]); + g(&["commit", "-qm", "base"]); + let main = { + let o = Command::new("git") + .args(["symbolic-ref", "--short", "HEAD"]) + .current_dir(work) + .output() + .unwrap(); + String::from_utf8_lossy(&o.stdout).trim().to_string() + }; + g(&["checkout", "-q", "-b", "feature"]); + std::fs::create_dir_all(work.join("secret")).unwrap(); + std::fs::write(work.join("secret/x.txt"), b"secret\n").unwrap(); + g(&["add", "."]); + g(&["commit", "-qm", "feat"]); + + let names = branch_diff_names(work, &main, "feature").unwrap(); + assert!( + names.iter().any(|p| p == "secret/x.txt"), + "expected secret/x.txt in changed paths, got {names:?}" + ); + assert!( + !names.iter().any(|p| p == "base.txt"), + "unchanged file must not appear: {names:?}" + ); + } +} diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index 9baea20..52a8e0f 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -342,7 +342,8 @@ pub fn build_router(state: AppState) -> Router { .route( "/api/v1/repos/{owner}/{repo}/replicas", get(replicas::list_replicas), - ); + ) + .layer(middleware::from_fn(auth::optional_signature)); // git-upload-pack (clone/fetch) — same raised body limit as receive-pack so // large pack responses from the server don't get truncated on the client side. From 83cc2c9c0e1d062bf55d0dd1d5e11c609e019f65 Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 13 Jun 2026 14:48:29 -0500 Subject: [PATCH 2/3] fix(node): gate PR reviews/comments and fork on repo read visibility The REST visibility gate added in this PR missed three read surfaces: list_reviews and list_comments served a private repo's PR reviews and comments to unauthenticated callers, and fork_repo cloned a full mirror of the source without checking the caller could read it. Route all three through authorize_repo_read at the repo root, matching the git-protocol gate. Also make branch_diff_names fail closed: it ignored git's exit status and returned an empty path list on error, which would let get_pr_diff serve a diff it should have withheld. Fix changelog.rs import order to satisfy cargo fmt. --- crates/gitlawb-node/src/api/changelog.rs | 2 +- crates/gitlawb-node/src/api/pulls.rs | 18 ++++++++---------- crates/gitlawb-node/src/api/repos.rs | 9 ++++----- crates/gitlawb-node/src/git/store.rs | 4 ++++ 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/crates/gitlawb-node/src/api/changelog.rs b/crates/gitlawb-node/src/api/changelog.rs index b8a69a7..40e52cb 100644 --- a/crates/gitlawb-node/src/api/changelog.rs +++ b/crates/gitlawb-node/src/api/changelog.rs @@ -4,8 +4,8 @@ use axum::extract::{Extension, Path, Query, State}; use axum::Json; use serde::Deserialize; -use crate::error::{AppError, Result}; use crate::auth::AuthenticatedDid; +use crate::error::{AppError, Result}; use crate::git::store; use crate::state::AppState; diff --git a/crates/gitlawb-node/src/api/pulls.rs b/crates/gitlawb-node/src/api/pulls.rs index 510c446..f9cf387 100644 --- a/crates/gitlawb-node/src/api/pulls.rs +++ b/crates/gitlawb-node/src/api/pulls.rs @@ -340,12 +340,11 @@ pub async fn create_review( pub async fn list_reviews( State(state): State, Path((owner, name, number)): Path<(String, String, i64)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let pr = state .db @@ -399,12 +398,11 @@ pub async fn create_comment( pub async fn list_comments( State(state): State, Path((owner, name, number)): Path<(String, String, i64)>, + auth: Option>, ) -> Result> { - let record = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let (record, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, caller, "/").await?; let pr = state .db diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 0e26398..357c95b 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -1019,11 +1019,10 @@ pub async fn fork_repo( Path((owner, name)): Path<(String, String)>, Json(req): Json, ) -> Result<(StatusCode, Json)> { - let source = state - .db - .get_repo(&owner, &name) - .await? - .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + // Enforce read visibility on the source before cloning: an unauthorized + // caller must not be able to fork (full mirror) a repo they cannot read. + let (source, _rules) = + crate::api::authorize_repo_read(&state, &owner, &name, Some(auth.0.as_str()), "/").await?; let fork_name = req.name.unwrap_or_else(|| source.name.clone()); let forker_did = auth.0; diff --git a/crates/gitlawb-node/src/git/store.rs b/crates/gitlawb-node/src/git/store.rs index 9ddaff7..c3f0194 100644 --- a/crates/gitlawb-node/src/git/store.rs +++ b/crates/gitlawb-node/src/git/store.rs @@ -331,6 +331,10 @@ pub fn branch_diff_names( .current_dir(repo_path) .output() .context("failed to run git diff --name-only")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("git diff --name-only failed: {stderr}"); + } Ok(String::from_utf8_lossy(&output.stdout) .lines() .filter(|l| !l.is_empty()) From e906bbb6334717223083414a2336807bd517796d Mon Sep 17 00:00:00 2001 From: beardthelion <56458543+beardthelion@users.noreply.github.com> Date: Sat, 13 Jun 2026 16:05:54 -0500 Subject: [PATCH 3/3] fix(node): NUL-delimit branch_diff_names to keep path-gate enforcement exact git diff --name-only quotes and escapes paths containing newlines, so they no longer match the visibility globs in get_pr_diff. On a public repo where / is allowed but a subtree is denied, a touched restricted path could be missed by the deny check and the full PR diff disclosed. Use -z and split on NUL to preserve exact path bytes. --- crates/gitlawb-node/src/git/store.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/gitlawb-node/src/git/store.rs b/crates/gitlawb-node/src/git/store.rs index c3f0194..b975914 100644 --- a/crates/gitlawb-node/src/git/store.rs +++ b/crates/gitlawb-node/src/git/store.rs @@ -326,6 +326,7 @@ pub fn branch_diff_names( .args([ "diff", "--name-only", + "-z", &format!("{target_branch}...{source_branch}"), ]) .current_dir(repo_path) @@ -335,10 +336,14 @@ pub fn branch_diff_names( let stderr = String::from_utf8_lossy(&output.stderr); bail!("git diff --name-only failed: {stderr}"); } - Ok(String::from_utf8_lossy(&output.stdout) - .lines() - .filter(|l| !l.is_empty()) - .map(|l| l.to_string()) + // Split on NUL (`-z`) so paths containing newlines keep their exact bytes; + // `--name-only` without `-z` would quote/escape such paths and they would no + // longer match the visibility globs in get_pr_diff, leaking the diff. + Ok(output + .stdout + .split(|b| *b == b'\0') + .filter(|s| !s.is_empty()) + .map(|s| String::from_utf8_lossy(s).into_owned()) .collect()) }