From f225324ee2f8afcc064765bcd6bf7c7f6ec7f8c4 Mon Sep 17 00:00:00 2001 From: Scott Chacon Date: Mon, 22 Jun 2026 10:16:37 +0200 Subject: [PATCH 1/3] docs: design for byte-correct (non-UTF-8) paths --- docs/non-utf8-paths-design.md | 208 ++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 docs/non-utf8-paths-design.md diff --git a/docs/non-utf8-paths-design.md b/docs/non-utf8-paths-design.md new file mode 100644 index 000000000..08688a9b7 --- /dev/null +++ b/docs/non-utf8-paths-design.md @@ -0,0 +1,208 @@ +# Design: byte-correct paths (non-UTF-8 path support) + +Status: draft / RFC +Author: (drafted with Claude Code) +Date: 2026-06-22 +Origin: Discord discussion — Caleb Owens & Byron — on `grit-lib` forcing OS +paths into UTF-8 `String`/`&str`. + +## Problem + +Git treats pathnames in trees, the index, and the wire protocol as **arbitrary +byte strings** (only `NUL` and, by component, `/` are special). On Unix the +kernel agrees: `OsStr` is bytes, not UTF-8. `grit` decodes many of these byte +paths into Rust `String`/`&str` via `String::from_utf8_lossy`, which **replaces +invalid bytes with U+FFFD**. That is lossy and irreversible: + +- A tree entry whose name is `caf\xe9` (Latin-1) round-trips to `caf`, + so a checkout, diff, or status against it produces the wrong filename — and a + re-commit would change the tree OID. +- Any operation keyed on the path string (pathspec match, rename detection, + index lookup) silently mismatches the real on-disk name. + +This is a correctness bug for repos containing non-UTF-8 paths, not a +theoretical nicety. Real-world repos have them (legacy encodings, deliberate +test fixtures, `git`'s own `t/` suite). + +### Byron's constraint (important) + +The fix is **not** "replace `String` with bare `BString`." `gix` did that and +regrets it: a bare `BString` is indistinguishable from "any byte blob," so it +becomes an *implicit* `RepoPath` whose meaning depends on call-site context. +We want a **dedicated newtype** that encodes the invariant ("this is a +repo-relative, `/`-separated path in Git's byte encoding") and makes the +filesystem boundary explicit and platform-aware. + +## Current state (grounded) + +Good news: the **storage layer already keeps bytes.** The lossiness is injected +at a small number of *boundaries*, not baked into the core data structures. + +| Layer | Type today | Lossy? | +|---|---|---| +| Tree entry name — `objects.rs:366` `TreeEntry.name` | `Vec` | ✅ byte-correct | +| Tree parse — `objects.rs:445` | `data[..nul].to_vec()` | ✅ byte-correct | +| Index entry path — `index.rs:87` `IndexEntry.path` | `Vec` | ✅ byte-correct | +| Index parse (v2/v3/v4) — `index.rs:2133,2140` | `to_vec()` | ✅ byte-correct | +| **DiffEntry** — `diff.rs:1373,1375` `old_path`/`new_path` | `Option` | ❌ `from_utf8_lossy` at `diff.rs:1550,1595,1638,1678` | +| **Pathspec** — `pathspec.rs:604,877,1328` | `&str` / `&[String]` | ❌ operates on lossy `&str` | +| **Working-tree join** — `porcelain/checkout.rs:125`, `diff.rs:2070` | `work_tree.join(rel: &str)` | ❌ joins a lossy `&str` | +| Pathspec source parse — `pathspec.rs:47` | `from_utf8_lossy` | ❌ | +| Tree → diff name — `diff.rs:1550`; merge — `merge_trees.rs:673` | `from_utf8_lossy` | ❌ | + +Existing platform-aware byte conversions already exist and are the model to +generalize: + +- `crlf.rs:417` `path_to_index_bytes(&Path)` — uses + `std::os::unix::ffi::OsStrExt::as_bytes()`, with a `to_string_lossy` fallback + on non-Unix. +- `ident_resolve.rs:135` — `OsStrExt::as_bytes()` on env vars. + +There is **no** `RepoPath` newtype today, and `bstr` is **not** a workspace +dependency. `grit-utils` is a benchmark binary (only `main.rs`, no `lib.rs`), so +it is *not* the home for shared types. Existing path modules live in `grit-lib`: +`git_path.rs`, `path.rs`, `pathspec.rs`, `quote_path.rs`, `transport_path.rs`, +`tree_path_follow.rs`. + +There are ~291 `to_string_lossy`/`from_utf8_lossy`/`to_str()` calls in +`grit-lib/src` and ~54 `String`-typed path fields. Most are at boundaries; the +core stays bytes. + +## Proposed design + +### 1. The newtype pair + +Add a new module `grit-lib/src/repo_path.rs` exporting an owned/borrowed pair, +modeled on `PathBuf`/`Path` and `String`/`str`: + +```rust +/// An owned, repo-relative, `/`-separated path in Git's byte encoding. +/// Invariants: no `NUL`; `/`-separated; not normalized for `.`/`..` here. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct RepoPathBuf(BString); + +/// Borrowed counterpart. `&RepoPath` is to `RepoPathBuf` as `&Path` is to `PathBuf`. +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct RepoPath(BStr); // unsized, used behind & +``` + +Core API (intentionally small; grow as needed): + +```rust +impl RepoPath { + pub fn from_bytes(b: &[u8]) -> &RepoPath; // the canonical constructor + pub fn as_bytes(&self) -> &[u8]; + pub fn to_str(&self) -> Option<&str>; // Some iff valid UTF-8 + pub fn display(&self) -> impl Display; // lossy, for humans ONLY + pub fn file_name(&self) -> Option<&RepoPath>; + pub fn parent(&self) -> Option<&RepoPath>; + pub fn components(&self) -> impl Iterator; + pub fn join(&self, other: &RepoPath) -> RepoPathBuf; + + /// The platform boundary: build a real filesystem path under `root`. + pub fn to_fs_path(&self, root: &Path) -> PathBuf; // unix: bytes->OsStr; else lossy+warn +} + +impl RepoPathBuf { + pub fn from_bytes(v: Vec) -> Self; + pub fn from_fs_relative(p: &Path) -> Self; // OsStr bytes -> RepoPath, normalize `\`->`/` on win +} +``` + +Conversions are **explicit**: there is no `From`/`Deref`, +because that is exactly the implicit-coercion trap Byron warned about. Display +to a human goes through `.display()` (lossy and clearly named); programmatic +UTF-8 access goes through `.to_str() -> Option`. + +### 2. Where the bytes/OS boundary sits + +One function — `RepoPath::to_fs_path` / `RepoPathBuf::from_fs_relative` — owns +the conversion between Git's byte path and the OS path: + +- **Unix**: `OsStrExt::{as_bytes, from_bytes}` — zero loss. +- **Windows**: Git paths are UTF-8 (or GBK etc.) and `/`-separated; convert via + the existing lossy fallback and translate separators. Windows already can't + represent arbitrary bytes in filenames, so lossiness there is inherent to the + OS, not to us. (Matches `crlf.rs` strategy.) + +Every `work_tree.join(rel_str)` site (`checkout.rs:125`, `diff.rs:2070`, …) +becomes `repo_path.to_fs_path(work_tree)`. + +### 3. Pathspec matching on bytes + +`pathspec.rs` is the one place with real *logic* on paths (globbing, prefix, +icase). Migrate its core matchers to operate on `&[u8]`/`&RepoPath`. Git's own +pathspec matching is byte-oriented, so this also fixes latent glob bugs on +non-ASCII. `&str`-typed public entry points can keep thin shims during +migration. + +### 4. `bstr` dependency + +Add `bstr` to `[workspace.dependencies]` and depend on it from `grit-lib`. We +use it as the *inner representation and the byte-string utility library* (Unicode-aware +`to_str_lossy`, `Display`, search), but never expose bare `BStr`/`BString` in +public path signatures — only `RepoPath`/`RepoPathBuf`. + +## Migration plan (phased, each phase ships independently) + +The storage layer is already bytes, so we migrate **outward from the boundaries**. + +- **Phase 0 — foundation (no behavior change).** + Add `bstr`; implement `repo_path.rs` with the newtype, conversions, and a unit + test using a literal non-UTF-8 byte path (`b"caf\xe9"`). Wire `to_fs_path` + through the existing `crlf.rs` platform logic so there's one boundary impl. + +- **Phase 1 — DiffEntry vertical slice (the screenshot).** + `DiffEntry.old_path`/`new_path: Option`. Replace the + `from_utf8_lossy` at `diff.rs:1550/1595/1638/1678` with byte construction. + Update diff *formatters* to call `.display()` (or `quote_path` for `-z`/quoting). + Add a t-test fixture: diff/status over a repo containing a non-UTF-8 filename, + asserting the bytes survive a `commit`→`diff`→`checkout` round-trip. + +- **Phase 2 — working-tree I/O.** + Route checkout/apply/status writes through `to_fs_path`. This is where the + *user-visible* corruption (wrong files on disk) actually gets fixed. + +- **Phase 3 — pathspec on bytes.** + Migrate matcher cores to `&[u8]`; keep `&str` shims; delete shims once callers + pass `RepoPath`. + +- **Phase 4 — sweep the long tail.** + Remaining `String` path fields (`apply.rs`, `show.rs`, `am.rs`, the CLI + command structs that mirror `DiffEntry`). Mostly mechanical once the newtype + exists. + +Each phase is independently mergeable and verified against the `data/tests` t-suite. + +## Testing strategy + +1. **Unit**: round-trip `b"caf\xe9/\xff.txt"` through `RepoPathBuf` → bytes, + `from_fs_relative`/`to_fs_path` on Unix, and assert `to_str()` is `None`. +2. **Integration t-test** (new `data/tests/.../*.toml`): create a file with a + non-UTF-8 name, `add`/`commit`, then `diff`, `status`, `ls-files`, + `checkout` and assert byte-exact output and that the working-tree file + matches the original bytes. Compare against real `git` output. +3. **Regression**: full t-suite must stay green at each phase (boundaries change + type, not behavior, for UTF-8 paths). + +## Open questions + +1. **Home**: `grit-lib/src/repo_path.rs` (proposed) vs. a new `grit-path` crate + shared by `grit-lib`/`grit-cli`/`grit-protocol`. Lean module-first; promote + to a crate only if `grit-cli`/`grit-protocol` need it directly. +2. **`bstr` vs hand-rolled**: `bstr` buys Unicode-aware lossy display + search + for ~one dependency. Alternative is `Vec` + `os_str_bytes`. Recommend + `bstr` (matches the ecosystem `gix` is converging toward). +3. **Quoting**: how `RepoPath::display` interacts with the existing + `quote_path.rs` (`core.quotePath`, `-z`) — display is for logs; user-facing + command output must continue to route through `quote_path`. +4. **Wire/protocol paths**: `transport_path.rs` and protocol pathnames are also + bytes; in scope for a later phase but not Phases 0–2. + +## Recommendation + +Adopt the `RepoPath`/`RepoPathBuf` newtype in `grit-lib` backed by `bstr`, with +the single platform boundary in `to_fs_path`/`from_fs_relative`. Do **not** +expose bare `BString`. Execute Phase 0 + Phase 1 first (foundation + the +`DiffEntry` slice from the discussion) as a reviewable proof point before +committing to the full sweep. From 7a742e1f7b3e6cc9fadb481322f4d0146db29d00 Mon Sep 17 00:00:00 2001 From: Scott Chacon Date: Mon, 22 Jun 2026 10:53:20 +0200 Subject: [PATCH 2/3] feat(repo_path): add bstr-backed RepoPath/RepoPathBuf newtype (Phase 0) --- Cargo.lock | 1 + Cargo.toml | 1 + grit-lib/Cargo.toml | 1 + grit-lib/src/lib.rs | 1 + grit-lib/src/repo_path.rs | 290 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 294 insertions(+) create mode 100644 grit-lib/src/repo_path.rs diff --git a/Cargo.lock b/Cargo.lock index 91ecdf1eb..311ed9e18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -552,6 +552,7 @@ name = "grit-lib" version = "0.5.0" dependencies = [ "base64", + "bstr", "encoding_rs", "filetime", "flate2", diff --git a/Cargo.toml b/Cargo.toml index 71cfe54cb..50b9dd764 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ time = { version = "0.3", features = ["formatting", "parsing"] } hex = "0.4" tempfile = "3" similar = "3" +bstr = "1" imara-diff = { version = "0.2", features = ["unified_diff"] } merge3 = "0.2" regex = "1" diff --git a/grit-lib/Cargo.toml b/grit-lib/Cargo.toml index cb98b9bb5..dc019f940 100644 --- a/grit-lib/Cargo.toml +++ b/grit-lib/Cargo.toml @@ -33,6 +33,7 @@ time.workspace = true hex.workspace = true tempfile.workspace = true similar.workspace = true +bstr.workspace = true merge3.workspace = true imara-diff.workspace = true regex.workspace = true diff --git a/grit-lib/src/lib.rs b/grit-lib/src/lib.rs index e473ec898..212b556b7 100644 --- a/grit-lib/src/lib.rs +++ b/grit-lib/src/lib.rs @@ -206,6 +206,7 @@ pub mod refs_fsck; pub mod refspec; pub mod reftable; pub mod repo; +pub mod repo_path; pub mod rerere; pub mod resolve_undo; pub mod rev_list; diff --git a/grit-lib/src/repo_path.rs b/grit-lib/src/repo_path.rs new file mode 100644 index 000000000..802b3eb82 --- /dev/null +++ b/grit-lib/src/repo_path.rs @@ -0,0 +1,290 @@ +//! Byte-correct repository paths. +//! +//! Git stores pathnames in trees, the index, and on the wire as raw byte +//! strings (only `NUL` is forbidden, and `/` separates components). On Unix the +//! OS agrees: `OsStr` is bytes. Representing these paths as `String`/`&str` +//! forces a lossy `from_utf8_lossy` that replaces invalid bytes with U+FFFD, +//! silently corrupting non-UTF-8 names. +//! +//! [`RepoPathBuf`] / [`RepoPath`] are an owned/borrowed pair (mirroring +//! `PathBuf`/`Path`) that hold the bytes losslessly. They are a *dedicated* +//! type, not a bare `BString`: a value of this type means "a repo-relative, +//! `/`-separated Git path". Conversions to/from `str` and the filesystem are +//! explicit, so a path can never implicitly masquerade as an arbitrary byte +//! blob (the `gix`/`BString` pitfall). + +use bstr::ByteSlice; +use std::borrow::Borrow; +use std::fmt; +use std::ops::Deref; +use std::path::{Path, PathBuf}; + +/// An owned, repo-relative, `/`-separated path in Git's byte encoding. +/// +/// Invariants the type *intends* (not enforced on construction): no interior +/// `NUL`, `/`-separated. Normalization of `.`/`..` is the caller's job. +#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct RepoPathBuf(Vec); + +/// Borrowed counterpart of [`RepoPathBuf`]; `&RepoPath` is to `RepoPathBuf` as +/// `&Path` is to `PathBuf`. +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +pub struct RepoPath([u8]); + +impl RepoPath { + /// Borrow a byte slice as a repo path. Bytes are taken verbatim. + #[must_use] + pub fn from_bytes(bytes: &[u8]) -> &RepoPath { + // SAFETY: `RepoPath` is `#[repr(transparent)]` over `[u8]`, so `&[u8]` + // and `&RepoPath` have identical layout. This mirrors how `std::path::Path` + // wraps `OsStr`. + unsafe { &*(bytes as *const [u8] as *const RepoPath) } + } + + /// Borrow a `&str` as a repo path (UTF-8 is a subset of valid byte paths). + #[must_use] + #[allow(clippy::should_implement_trait)] // intentional inherent ctor, mirrors `Path::new` + pub fn from_str(s: &str) -> &RepoPath { + RepoPath::from_bytes(s.as_bytes()) + } + + /// The raw bytes of the path. + #[must_use] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// `true` if the path is empty. + #[must_use] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// The path as `&str` iff it is valid UTF-8, else `None`. Use this for + /// programmatic comparison; never for human display (see [`RepoPath::display`]). + #[must_use] + pub fn to_str(&self) -> Option<&str> { + std::str::from_utf8(&self.0).ok() + } + + /// A lossy `Display` adapter for humans/logs. Invalid bytes render as + /// U+FFFD. User-facing command output that must round-trip should route + /// through `quote_path` instead. + #[must_use] + pub fn display(&self) -> RepoPathDisplay<'_> { + RepoPathDisplay(&self.0) + } + + /// The final component (after the last `/`), or `None` if empty or ends in `/`. + #[must_use] + pub fn file_name(&self) -> Option<&RepoPath> { + if self.0.is_empty() { + return None; + } + match self.0.rfind_byte(b'/') { + Some(i) => { + let last = &self.0[i + 1..]; + if last.is_empty() { + None + } else { + Some(RepoPath::from_bytes(last)) + } + } + None => Some(self), + } + } + + /// The path up to (but not including) the last `/`, or `None` if there is + /// no separator. + #[must_use] + pub fn parent(&self) -> Option<&RepoPath> { + self.0 + .rfind_byte(b'/') + .map(|i| RepoPath::from_bytes(&self.0[..i])) + } + + /// Iterate the `/`-separated components, skipping empty ones. + pub fn components(&self) -> impl Iterator { + self.0 + .split(|&b| b == b'/') + .filter(|c| !c.is_empty()) + .map(RepoPath::from_bytes) + } + + /// Join another repo path onto this one with a `/` separator. + #[must_use] + pub fn join(&self, other: &RepoPath) -> RepoPathBuf { + let mut out = Vec::with_capacity(self.0.len() + 1 + other.0.len()); + out.extend_from_slice(&self.0); + if !self.0.is_empty() && !self.0.ends_with(b"/") { + out.push(b'/'); + } + out.extend_from_slice(&other.0); + RepoPathBuf(out) + } + + /// Resolve this repo-relative path against an on-disk `root`, producing a + /// real filesystem path. This is the single OS boundary: on Unix the bytes + /// map directly to an `OsStr`; on other platforms they go through a lossy + /// UTF-8 conversion (those platforms cannot represent arbitrary bytes in + /// filenames anyway). + #[must_use] + pub fn to_fs_path(&self, root: &Path) -> PathBuf { + #[cfg(unix)] + { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + root.join(OsStr::from_bytes(&self.0)) + } + #[cfg(not(unix))] + { + // `Path` accepts `/` as a separator on Windows. + root.join(&*String::from_utf8_lossy(&self.0)) + } + } +} + +impl RepoPathBuf { + /// Take ownership of raw bytes as a repo path. + #[must_use] + pub fn from_bytes(bytes: Vec) -> RepoPathBuf { + RepoPathBuf(bytes) + } + + /// Build a repo path from a UTF-8 `String` (lossless; UTF-8 ⊂ byte paths). + #[must_use] + pub fn from_string(s: String) -> RepoPathBuf { + RepoPathBuf(s.into_bytes()) + } + + /// Build a repo-relative path from a filesystem path. On Unix the `OsStr` + /// bytes are taken verbatim; elsewhere `\` separators are normalized to `/`. + #[must_use] + pub fn from_fs_relative(path: &Path) -> RepoPathBuf { + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + RepoPathBuf(path.as_os_str().as_bytes().to_vec()) + } + #[cfg(not(unix))] + { + RepoPathBuf(path.to_string_lossy().replace('\\', "/").into_bytes()) + } + } + + /// Consume into the underlying bytes. + #[must_use] + pub fn into_bytes(self) -> Vec { + self.0 + } +} + +impl Deref for RepoPathBuf { + type Target = RepoPath; + + fn deref(&self) -> &RepoPath { + RepoPath::from_bytes(&self.0) + } +} + +impl Borrow for RepoPathBuf { + fn borrow(&self) -> &RepoPath { + self + } +} + +impl ToOwned for RepoPath { + type Owned = RepoPathBuf; + + fn to_owned(&self) -> RepoPathBuf { + RepoPathBuf(self.0.to_vec()) + } +} + +impl AsRef for RepoPath { + fn as_ref(&self) -> &RepoPath { + self + } +} + +impl AsRef for RepoPathBuf { + fn as_ref(&self) -> &RepoPath { + self + } +} + +/// Lossy `Display`/`Debug` adapter returned by [`RepoPath::display`]. +pub struct RepoPathDisplay<'a>(&'a [u8]); + +impl fmt::Display for RepoPathDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // `BStr`'s Display renders invalid UTF-8 as U+FFFD without allocating. + fmt::Display::fmt(self.0.as_bstr(), f) + } +} + +impl fmt::Debug for RepoPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "RepoPath({:?})", self.0.as_bstr()) + } +} + +impl fmt::Debug for RepoPathBuf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "RepoPathBuf({:?})", self.0.as_bstr()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A non-UTF-8 byte path survives a full round-trip and is reported as + /// non-UTF-8 by `to_str`. + #[test] + fn non_utf8_round_trip() { + let raw = b"caf\xe9/\xff.txt"; + let p = RepoPathBuf::from_bytes(raw.to_vec()); + + assert_eq!(p.as_bytes(), raw); + assert!(p.to_str().is_none(), "invalid UTF-8 must not decode"); + // Lossy display inserts replacement chars but never panics. + assert!(p.display().to_string().contains('\u{fffd}')); + } + + #[test] + fn components_file_name_parent() { + let p = RepoPathBuf::from_string("a/b/c.txt".into()); + let comps: Vec<_> = p.components().map(|c| c.as_bytes().to_vec()).collect(); + assert_eq!(comps, vec![b"a".to_vec(), b"b".to_vec(), b"c.txt".to_vec()]); + assert_eq!(p.file_name().map(RepoPath::as_bytes), Some(&b"c.txt"[..])); + assert_eq!(p.parent().map(RepoPath::as_bytes), Some(&b"a/b"[..])); + + let top = RepoPathBuf::from_string("readme".into()); + assert_eq!(top.file_name().map(RepoPath::as_bytes), Some(&b"readme"[..])); + assert!(top.parent().is_none()); + } + + #[test] + fn join_inserts_single_separator() { + let a = RepoPathBuf::from_string("a/b".into()); + let joined = a.join(RepoPath::from_str("c")); + assert_eq!(joined.as_bytes(), b"a/b/c"); + + // Empty base does not produce a leading separator. + let empty = RepoPathBuf::default(); + assert_eq!(empty.join(RepoPath::from_str("x")).as_bytes(), b"x"); + } + + #[cfg(unix)] + #[test] + fn fs_path_round_trip_unix() { + let raw = b"sub/\xe9dir/file.txt"; + let p = RepoPathBuf::from_bytes(raw.to_vec()); + let fs = p.to_fs_path(Path::new("/tmp/repo")); + // Round-trips back to the same repo-relative bytes when re-derived. + let back = RepoPathBuf::from_fs_relative(Path::new(&fs)); + assert!(back.as_bytes().ends_with(raw)); + } +} From 1097a8ea51de1d2a45693a19cebf68e0ff5ef298 Mon Sep 17 00:00:00 2001 From: Scott Chacon Date: Mon, 22 Jun 2026 12:32:46 +0200 Subject: [PATCH 3/3] =?UTF-8?q?feat(diff):=20byte-true=20paths=20in=20Diff?= =?UTF-8?q?Entry=20=E2=80=94=20Option=20(Phase=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flip DiffEntry.old_path/new_path from Option to Option so non-UTF-8 Git paths survive verbatim instead of being mangled by from_utf8_lossy (U+FFFD). Tree-to-tree diffs are now fully byte-true, including non-UTF-8 directory prefixes carried through recursion; many worktree FS reads now resolve via RepoPath::to_fs_path. Index/worktree path sources and pathspec/quote-path matching remain lossy for now (marked // TODO(byte-paths)) — deferred to Phase 2 (FS I/O) and Phase 3 (pathspec) per docs/non-utf8-paths-design.md. RepoPath gains a lossy Display (mirrors bstr::BStr) and byte-exact PartialEq. Adds grit-lib/tests/non_utf8_paths.rs covering add/modify/nested-dir round-trips. Workspace builds clean; diff/status/log/stash t-tests pass (pre-existing --graph and ignore-glob failures unrelated). Co-Authored-By: Claude Opus 4.8 (1M context) --- grit-cli/src/commands/add.rs | 4 +- grit-cli/src/commands/diff.rs | 14 +- grit-cli/src/commands/switch.rs | 7 +- grit-cli/src/output.rs | 2 +- grit-cli/src/ui.rs | 10 +- grit-git/src/commands/add.rs | 3 +- grit-git/src/commands/add_interactive.rs | 4 +- grit-git/src/commands/add_patch.rs | 7 +- grit-git/src/commands/commit.rs | 12 +- grit-git/src/commands/diff.rs | 233 +++++++++++++---------- grit-git/src/commands/diff_files.rs | 82 ++++---- grit-git/src/commands/diff_index.rs | 164 ++++++++++------ grit-git/src/commands/diff_pairs.rs | 11 +- grit-git/src/commands/diff_tree.rs | 98 ++++++---- grit-git/src/commands/format_patch.rs | 73 ++++--- grit-git/src/commands/history.rs | 10 +- grit-git/src/commands/log.rs | 99 ++++++---- grit-git/src/commands/merge.rs | 11 +- grit-git/src/commands/rebase.rs | 3 +- grit-git/src/commands/remerge_diff.rs | 53 ++++-- grit-git/src/commands/replay.rs | 7 +- grit-git/src/commands/rev_list.rs | 4 +- grit-git/src/commands/show.rs | 70 ++++--- grit-git/src/commands/stash.rs | 49 +++-- grit-git/src/commands/status.rs | 51 +++-- grit-git/src/commands/submodule.rs | 44 +++-- grit-git/src/main.rs | 2 +- grit-lib/src/commit_graph_file.rs | 2 +- grit-lib/src/diff.rs | 215 +++++++++++---------- grit-lib/src/difftool.rs | 27 +-- grit-lib/src/fast_export.rs | 38 ++-- grit-lib/src/fetch_submodules.rs | 6 +- grit-lib/src/line_log.rs | 9 +- grit-lib/src/merge_diff.rs | 9 +- grit-lib/src/patch_ids.rs | 7 +- grit-lib/src/porcelain/checkout.rs | 10 +- grit-lib/src/porcelain/status.rs | 4 +- grit-lib/src/push_submodules.rs | 7 +- grit-lib/src/repo_path.rs | 77 ++++++++ grit-lib/tests/non_utf8_paths.rs | 91 +++++++++ 40 files changed, 1036 insertions(+), 593 deletions(-) create mode 100644 grit-lib/tests/non_utf8_paths.rs diff --git a/grit-cli/src/commands/add.rs b/grit-cli/src/commands/add.rs index 425e5a127..402a9891f 100644 --- a/grit-cli/src/commands/add.rs +++ b/grit-cli/src/commands/add.rs @@ -69,7 +69,7 @@ pub fn stage(repo: &Repository, selectors: &[String]) -> Result { let mut staged = 0; for entry in &model.unstaged { let path = entry_path(entry); - if !matches(path) { + if !matches(&path) { continue; } if entry.status == DiffStatus::Deleted { @@ -77,7 +77,7 @@ pub fn stage(repo: &Repository, selectors: &[String]) -> Result { staged += 1; } } else { - stage_worktree_file(repo, &work_tree, path, &mut index)?; + stage_worktree_file(repo, &work_tree, &path, &mut index)?; staged += 1; } } diff --git a/grit-cli/src/commands/diff.rs b/grit-cli/src/commands/diff.rs index feefdcd92..ca6065bb8 100644 --- a/grit-cli/src/commands/diff.rs +++ b/grit-cli/src/commands/diff.rs @@ -158,7 +158,7 @@ fn worktree_changes(repo: &grit_lib::repo::Repository) -> Result .map(|e| { let (old_text, old_bin) = blob_text(&repo.odb, &e.old_oid)?; let (new_text, new_bin) = match &e.new_path { - Some(p) => file_text(&work_tree.join(p)), + Some(p) => file_text(&p.to_fs_path(work_tree)), None => (String::new(), false), }; Ok(file_change(e, old_text, new_text, old_bin || new_bin)) @@ -194,15 +194,21 @@ fn sort_entries(entries: &mut [DiffEntry]) { } fn file_change(e: DiffEntry, old_text: String, new_text: String, binary: bool) -> FileChange { + // CLI display boundary: render byte paths lossily into the text-diff model. let path = e .new_path - .clone() - .or_else(|| e.old_path.clone()) + .as_deref() + .or(e.old_path.as_deref()) + .map(|p| p.to_string_lossy().into_owned()) .unwrap_or_default(); // Record the pre-rename path only on a true rename (old differs from the // displayed path). For a deletion the displayed path *is* the old path, so // this must not treat it as a rename. - let old_path = e.old_path.filter(|op| *op != path); + let old_path = e + .old_path + .as_deref() + .map(|p| p.to_string_lossy().into_owned()) + .filter(|op| *op != path); FileChange { path, old_path, diff --git a/grit-cli/src/commands/switch.rs b/grit-cli/src/commands/switch.rs index 68e767038..6d9a5ac11 100644 --- a/grit-cli/src/commands/switch.rs +++ b/grit-cli/src/commands/switch.rs @@ -104,8 +104,11 @@ fn guard_untracked( continue; } if let Some(path) = &change.new_path { - if untracked.contains(path.as_str()) { - bail!("untracked file '{path}' would be overwritten — move or remove it first"); + if path.to_str().is_some_and(|s| untracked.contains(s)) { + bail!( + "untracked file '{}' would be overwritten — move or remove it first", + path.to_string_lossy() + ); } } } diff --git a/grit-cli/src/output.rs b/grit-cli/src/output.rs index 7a36a9585..d5c49e08e 100644 --- a/grit-cli/src/output.rs +++ b/grit-cli/src/output.rs @@ -170,7 +170,7 @@ pub fn change_status_str(status: &DiffStatus) -> &'static str { /// Serialize a [`DiffEntry`] as a [`ChangeJson`]. pub fn change_json(entry: &DiffEntry) -> ChangeJson { ChangeJson { - path: entry_path(entry).to_owned(), + path: entry_path(entry).into_owned(), status: change_status_str(&entry.status).to_owned(), } } diff --git a/grit-cli/src/ui.rs b/grit-cli/src/ui.rs index 92946ccf2..61b0ed9cd 100644 --- a/grit-cli/src/ui.rs +++ b/grit-cli/src/ui.rs @@ -1,8 +1,10 @@ //! Small output-formatting helpers shared by `grit` commands. +use std::borrow::Cow; use std::io::IsTerminal; use grit_lib::diff::{DiffEntry, DiffStatus}; +use grit_lib::repo_path::RepoPath; use crate::context::{self, CommitSummary}; @@ -96,13 +98,15 @@ fn truncate(s: &str, max: usize) -> String { out } -/// The path a diff entry refers to (prefers the new side, falls back to old). -pub fn entry_path(entry: &DiffEntry) -> &str { +/// The path a diff entry refers to (prefers the new side, falls back to old), +/// rendered lossily for display/matching. +pub fn entry_path(entry: &DiffEntry) -> Cow<'_, str> { entry .new_path .as_deref() .or(entry.old_path.as_deref()) - .unwrap_or("?") + .map(RepoPath::to_string_lossy) + .unwrap_or(Cow::Borrowed("?")) } /// A single-character glyph summarizing a change. diff --git a/grit-git/src/commands/add.rs b/grit-git/src/commands/add.rs index 039e2f2b8..519045b66 100644 --- a/grit-git/src/commands/add.rs +++ b/grit-git/src/commands/add.rs @@ -1046,7 +1046,8 @@ fn maybe_emit_interactive_add_sparse_index_trace( ) -> Result<()> { let entries = diff_index_to_worktree(&repo.odb, index, work_tree, false, false)?; if entries.iter().any(|entry| { - entry.status != DiffStatus::Unmerged && path_under_sparse_index_dir(raw_index, entry.path()) + entry.status != DiffStatus::Unmerged + && path_under_sparse_index_dir(raw_index, &entry.path().to_string_lossy()) }) { emit_index_trace_region("ensure_full_index"); } diff --git a/grit-git/src/commands/add_interactive.rs b/grit-git/src/commands/add_interactive.rs index b7cb419bd..fbbdc2edf 100644 --- a/grit-git/src/commands/add_interactive.rs +++ b/grit-git/src/commands/add_interactive.rs @@ -69,7 +69,7 @@ fn read_new_side(odb: &Odb, entry: &DiffEntry, work_tree: &Path, from_worktree: return Vec::new(); } if from_worktree { - let p = work_tree.join(entry.path()); + let p = entry.path().to_fs_path(work_tree); std::fs::read(&p).unwrap_or_default() } else { read_blob(odb, &entry.new_oid) @@ -832,7 +832,7 @@ fn emit_cached_diff(ctx: &mut AddIContext, out: &mut impl Write, paths: &[String let entries = diff_index_to_tree(&ctx.repo.odb, &ctx.index, ctx.head_tree.as_ref(), false)?; let filtered: Vec = entries .into_iter() - .filter(|e| paths.iter().any(|p| p == e.path())) + .filter(|e| paths.iter().any(|p| e.path() == p)) .collect(); let mut buf: Vec = Vec::new(); crate::commands::diff::write_patch_from_pairs(&mut buf, &filtered, ctx.repo) diff --git a/grit-git/src/commands/add_patch.rs b/grit-git/src/commands/add_patch.rs index c5fc7ccfd..ee5e57fa8 100644 --- a/grit-git/src/commands/add_patch.rs +++ b/grit-git/src/commands/add_patch.rs @@ -880,13 +880,13 @@ pub(crate) fn run_add_patch_with_reader( if e.status == DiffStatus::Unmerged { return false; } - patch_path_filter_matches(e.path(), &filter_paths) + patch_path_filter_matches(&e.path().to_string_lossy(), &filter_paths) }); entries.sort_by(|a, b| a.path().cmp(b.path())); if entries .iter() - .any(|entry| path_under_sparse_index_dir(&raw_index, entry.path())) + .any(|entry| path_under_sparse_index_dir(&raw_index, &entry.path().to_string_lossy())) { emit_index_trace_region("ensure_full_index"); } @@ -930,7 +930,8 @@ pub(crate) fn run_add_patch_with_reader( let mut binary_count = 0usize; for entry in entries { - let path_str = entry.path().to_owned(); + // TODO(byte-paths): lossy worktree path until add-patch FS I/O migrates (Phase 2). + let path_str = entry.path().to_string_lossy().into_owned(); let path_bytes = path_str.as_bytes(); let Some(ie) = index.get(path_bytes, 0).cloned() else { diff --git a/grit-git/src/commands/commit.rs b/grit-git/src/commands/commit.rs index 105e4e2e8..7f1594465 100644 --- a/grit-git/src/commands/commit.rs +++ b/grit-git/src/commands/commit.rs @@ -1046,8 +1046,8 @@ pub fn run(mut args: Args) -> Result<()> { }; let unmerged_full = crate::commands::status::unmerged_paths_and_mask(&index); let unmerged_keys: BTreeSet = unmerged_full.keys().cloned().collect(); - staged.retain(|e| !unmerged_keys.contains(e.path())); - unstaged.retain(|e| !unmerged_keys.contains(e.path())); + staged.retain(|e| !unmerged_keys.contains(&*e.path().to_string_lossy())); + unstaged.retain(|e| !unmerged_keys.contains(&*e.path().to_string_lossy())); // `-u` / `--untracked-files`: `no` suppresses the untracked listing entirely (Git prints // "Untracked files not listed (use -u option to show untracked files)" instead). The default // and `normal`/`all` collect untracked files (t7508 commit -uno --dry-run). @@ -2100,7 +2100,7 @@ fn print_dry_run( let mut u = unstaged.to_vec(); let mut extra_ut = Vec::new(); for e in staged_out { - if unstaged_paths.contains(e.path()) { + if unstaged_paths.contains(&*e.path().to_string_lossy()) { continue; } // Git: fully staged paths excluded from the commit are listed like untracked in @@ -2121,7 +2121,7 @@ fn print_dry_run( .into_iter() .filter(|e| { submodule_decisions - .get(e.path()) + .get(&*e.path().to_string_lossy()) .map(|(_, _, suppress_staged)| !suppress_staged) .unwrap_or(true) }) @@ -2130,7 +2130,7 @@ fn print_dry_run( .into_iter() .filter(|e| { submodule_decisions - .get(e.path()) + .get(&*e.path().to_string_lossy()) .map(|(_, suppress_unstaged, _)| !suppress_unstaged) .unwrap_or(true) }) @@ -2185,7 +2185,7 @@ fn print_dry_run( for entry in &unstaged_show { let label = status_label_unstaged(entry.status); let suffix = submodule_decisions - .get(entry.path()) + .get(&*entry.path().to_string_lossy()) .map(|(annotation, _, _)| annotation.as_str()) .unwrap_or(""); writeln!(out, "\t{label}: {}{suffix}", entry.path())?; diff --git a/grit-git/src/commands/diff.rs b/grit-git/src/commands/diff.rs index d89339e44..e4e6d7c79 100644 --- a/grit-git/src/commands/diff.rs +++ b/grit-git/src/commands/diff.rs @@ -37,6 +37,7 @@ use grit_lib::diff::{ DiffStatus, }; use grit_lib::diffstat::{terminal_columns, write_diffstat_block, DiffstatOptions, FileStatInput}; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::error::Error; use grit_lib::index::{Index, MODE_GITLINK}; use grit_lib::merge_base::{ @@ -363,7 +364,7 @@ fn submodule_head_oid_for_gitlink_patch(entry: &DiffEntry, work_tree: Option<&Pa return entry.new_oid; } work_tree - .and_then(|wt| grit_lib::diff::read_submodule_head_oid(&wt.join(entry.path()))) + .and_then(|wt| grit_lib::diff::read_submodule_head_oid(&entry.path().to_fs_path(wt))) .unwrap_or_else(zero_oid) } @@ -490,7 +491,13 @@ fn gitlink_suppressed_ignore_all( if cli.is_some() { return false; } - submodule_effective_ignore_token(entry.path(), path_to_name, gm_name_ignore, cfg).as_deref() + submodule_effective_ignore_token( + &entry.path().to_string_lossy(), + path_to_name, + gm_name_ignore, + cfg, + ) + .as_deref() == Some("all") } @@ -512,8 +519,8 @@ fn gitlink_same_oid_suppressed( if cli.is_some() { return false; } - let path = entry.path(); - let token = submodule_effective_ignore_token(path, path_to_name, gm_name_ignore, cfg); + let path = entry.path().to_string_lossy(); + let token = submodule_effective_ignore_token(&path, path_to_name, gm_name_ignore, cfg); if token.as_deref() == Some("none") { return false; } @@ -523,7 +530,7 @@ fn gitlink_same_oid_suppressed( let Some(wt) = work_tree else { return false; }; - let flags = grit_lib::diff::submodule_porcelain_flags(wt, path, entry.old_oid); + let flags = grit_lib::diff::submodule_porcelain_flags(wt, &path, entry.old_oid); if token.as_deref() == Some("untracked") { return !flags.modified && flags.untracked; } @@ -545,9 +552,9 @@ fn submodule_gitlink_patch_plus_suffix( let Some(wt) = work_tree else { return String::new(); }; - let path = entry.path(); - let flags = grit_lib::diff::submodule_porcelain_flags(wt, path, entry.old_oid); - let eff = submodule_effective_ignore_token(path, path_to_name, gm_name_ignore, diff_cfg); + let path = entry.path().to_string_lossy(); + let flags = grit_lib::diff::submodule_porcelain_flags(wt, &path, entry.old_oid); + let eff = submodule_effective_ignore_token(&path, path_to_name, gm_name_ignore, diff_cfg); let cli_none = ignore_submodules_cli.is_some_and(|s| s.eq_ignore_ascii_case("none")); let count_untracked = !submodule_ignore.ignore_untracked && (cli_none || eff.as_deref() == Some("none")); @@ -610,7 +617,9 @@ fn write_submodule_log_lines( writeln!(out, "Submodule {} {}..{}:", entry.path(), old_a, new_a)?; return Ok(()); }; - let Some(sub_repo) = open_submodule_repo_for_log(&repo.git_dir, Some(wt), entry.path()) else { + let Some(sub_repo) = + open_submodule_repo_for_log(&repo.git_dir, Some(wt), &entry.path().to_string_lossy()) + else { writeln!( out, "Submodule {} {}...{} (commits not present)", @@ -2995,10 +3004,11 @@ pub fn run(mut args: Args) -> Result<()> { let z = zero_oid(); names .into_iter() + // TODO(byte-paths): combined_diff_paths yields lossy String paths (Phase 2/4). .map(|p| DiffEntry { status: DiffStatus::Modified, - old_path: Some(p.clone()), - new_path: Some(p), + old_path: Some(RepoPathBuf::from_string(p.clone())), + new_path: Some(RepoPathBuf::from_string(p)), old_mode: "100644".to_string(), new_mode: "100644".to_string(), old_oid: z, @@ -3041,8 +3051,9 @@ pub fn run(mut args: Args) -> Result<()> { } else { vec![DiffEntry { status: DiffStatus::Modified, - old_path: Some(a.path), - new_path: Some(b.path), + // TODO(byte-paths): blob-diff spec paths are lossy String (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(a.path)), + new_path: Some(RepoPathBuf::from_string(b.path)), old_mode: a.mode, new_mode: b.mode, old_oid: a.oid, @@ -3302,24 +3313,29 @@ pub fn run(mut args: Args) -> Result<()> { let old_match = e .old_path .as_ref() - .is_some_and(|p| p.starts_with(pfx.as_str())); + .is_some_and(|p| p.starts_with_bytes(pfx.as_bytes())); let new_match = e .new_path .as_ref() - .is_some_and(|p| p.starts_with(pfx.as_str())); + .is_some_and(|p| p.starts_with_bytes(pfx.as_bytes())); if !old_match && !new_match { return None; } - // Strip prefix from paths, then strip leading '/' - if let Some(ref mut p) = e.old_path { - if let Some(stripped) = p.strip_prefix(pfx.as_str()) { - *p = stripped.trim_start_matches('/').to_owned(); + // Strip prefix from paths, then strip leading '/' (byte-wise). + let strip = |p: &mut RepoPathBuf| { + if let Some(stripped) = p.as_bytes().strip_prefix(pfx.as_bytes()) { + let mut t = stripped; + while let Some(rest) = t.strip_prefix(b"/") { + t = rest; + } + *p = RepoPathBuf::from_bytes(t.to_vec()); } + }; + if let Some(ref mut p) = e.old_path { + strip(p); } if let Some(ref mut p) = e.new_path { - if let Some(stripped) = p.strip_prefix(pfx.as_str()) { - *p = stripped.trim_start_matches('/').to_owned(); - } + strip(p); } Some(e) }) @@ -3509,7 +3525,7 @@ pub fn run(mut args: Args) -> Result<()> { // combined `diff --cc` replaces them only when unified patch is the sole format. let strip_conflict_index_lines = show_unified_patch && !format_besides_unified_patch; if strip_conflict_index_lines { - entries.retain(|e| !conflict_paths.iter().any(|p| p == e.path())); + entries.retain(|e| !conflict_paths.iter().any(|p| e.path() == p.as_str())); } } } @@ -3870,8 +3886,12 @@ pub fn run(mut args: Args) -> Result<()> { let env_cfg_ext = resolve_env_config_external_diff(&diff_config); // Run when env/config OR any attribute driver could apply. let any_attr_driver = entries.iter().any(|e| { - grit_lib::merge_diff::diff_attr_external_driver(&repo.git_dir, &diff_config, e.path()) - .is_some() + grit_lib::merge_diff::diff_attr_external_driver( + &repo.git_dir, + &diff_config, + &e.path().to_string_lossy(), + ) + .is_some() }); if env_cfg_ext.is_some() || any_attr_driver { let mut sink = io::sink(); @@ -3990,8 +4010,9 @@ fn run_diff_blob_vs_file( let entries = vec![DiffEntry { status: DiffStatus::Modified, - old_path: Some(old_path), - new_path: Some(file_path.to_owned()), + // TODO(byte-paths): blob-diff display paths are lossy String (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(old_path)), + new_path: Some(RepoPathBuf::from_string(file_path.to_owned())), old_mode, new_mode: wt_mode, old_oid, @@ -5865,8 +5886,10 @@ fn repo_relative_path_for_relative_display(display: &str, prefix: Option<&str>) /// Repository-relative path for attributes / worktree reads when `--relative` stripped display paths. fn repo_path_for_diff_entry(entry: &DiffEntry, relative_prefix: Option<&str>) -> String { match relative_prefix { - Some(p) if !p.is_empty() => repo_relative_path_for_relative_display(entry.path(), Some(p)), - _ => entry.path().to_owned(), + Some(p) if !p.is_empty() => { + repo_relative_path_for_relative_display(&entry.path().to_string_lossy(), Some(p)) + } + _ => entry.path().to_string_lossy().into_owned(), } } @@ -6212,7 +6235,8 @@ fn filter_by_paths(entries: Vec, paths: &[String]) -> Vec entries .into_iter() .filter(|e| { - let path = e.path(); + // TODO(byte-paths): lossy pathspec matching until Phase 3 byte-correct matchers. + let path = e.path().to_string_lossy(); // Classify the entry from its modes so `dir/` style pathspecs match // gitlinks (t4010: `submod/` must match submodule entry `submod`). let old_ctx = grit_lib::pathspec::context_from_mode_octal(&e.old_mode); @@ -6225,11 +6249,11 @@ fn filter_by_paths(entries: Vec, paths: &[String]) -> Vec if spec == &"." || spec.is_empty() { return true; } - grit_lib::pathspec::matches_pathspec_with_context(spec, path, ctx) + grit_lib::pathspec::matches_pathspec_with_context(spec, &path, ctx) }); let excluded = exclude_inners .iter() - .any(|inner| grit_lib::pathspec::matches_pathspec_with_context(inner, path, ctx)); + .any(|inner| grit_lib::pathspec::matches_pathspec_with_context(inner, &path, ctx)); included && !excluded }) .collect() @@ -6237,7 +6261,7 @@ fn filter_by_paths(entries: Vec, paths: &[String]) -> Vec /// Read content for a diff entry side, falling back to the working tree if /// the OID is not in the ODB (worktree files are hashed but not stored). -fn read_content(odb: &Odb, oid: &ObjectId, work_tree: Option<&Path>, path: &str) -> String { +fn read_content(odb: &Odb, oid: &ObjectId, work_tree: Option<&Path>, path: &RepoPath) -> String { let raw = read_content_raw_or_worktree(odb, oid, work_tree, path); String::from_utf8_lossy(&raw).into_owned() } @@ -6416,8 +6440,8 @@ fn read_content_raw(odb: &Odb, oid: &ObjectId) -> Vec { /// /// For symlinks, returns the link target as UTF-8 bytes (same as blob hashing), not the /// dereferenced file contents (t4011: `git diff` for intent-to-add symlinks vs `*.bin` rules). -fn read_worktree_file_raw(wt: &Path, path: &str) -> Option> { - let full = wt.join(path); +fn read_worktree_file_raw(wt: &Path, path: &RepoPath) -> Option> { + let full = path.to_fs_path(wt); let meta = std::fs::symlink_metadata(&full).ok()?; if meta.file_type().is_symlink() { std::fs::read_link(&full) @@ -6433,13 +6457,13 @@ fn read_content_raw_or_worktree( odb: &Odb, oid: &ObjectId, work_tree: Option<&Path>, - path: &str, + path: &RepoPath, ) -> Vec { if *oid == zero_oid() { // Empty tree / new file side: read from the work tree when available (t1501 tree diffs). if let Some(wt) = work_tree { - if path != "/dev/null" { - let p = wt.join(path); + if path.as_bytes() != b"/dev/null" { + let p = path.to_fs_path(wt); if std::fs::symlink_metadata(&p) .map(|m| m.is_dir()) .unwrap_or(false) @@ -6459,8 +6483,8 @@ fn read_content_raw_or_worktree( } // Fall back to reading from working tree if let Some(wt) = work_tree { - if path != "/dev/null" { - let p = wt.join(path); + if path.as_bytes() != b"/dev/null" { + let p = path.to_fs_path(wt); if std::fs::symlink_metadata(&p) .map(|m| m.is_dir()) .unwrap_or(false) @@ -6541,7 +6565,7 @@ fn stat_ins_del_for_entry( old_content = ws_mode.normalize(&old_content); new_content = ws_mode.normalize(&new_content); } - let (algo, hist) = diff_algorithm_for_path(entry.path(), algo_cli, algo_ctx); + let (algo, hist) = diff_algorithm_for_path(&entry.path().to_string_lossy(), algo_cli, algo_ctx); count_changes_with_algorithm(&old_content, &new_content, algo, hist) } @@ -6677,11 +6701,11 @@ fn write_diff_header_with_prefix( let old_path = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); + .unwrap_or(entry.new_path.as_deref().unwrap_or(RepoPath::from_str(""))); let new_path = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .unwrap_or(entry.old_path.as_deref().unwrap_or(RepoPath::from_str(""))); let (b, r) = if use_color { (BOLD, RESET) } else { ("", "") }; let old_git = if old_path.is_empty() { @@ -6689,14 +6713,14 @@ fn write_diff_header_with_prefix( } else if old_path == "/dev/null" { "/dev/null".to_string() } else { - format_diff_path_with_prefix(src_prefix, old_path, quote_path_fully) + format_diff_path_with_prefix(src_prefix, &old_path.to_string_lossy(), quote_path_fully) }; let new_git = if new_path.is_empty() { format!("{dst_prefix}") } else if new_path == "/dev/null" { "/dev/null".to_string() } else { - format_diff_path_with_prefix(dst_prefix, new_path, quote_path_fully) + format_diff_path_with_prefix(dst_prefix, &new_path.to_string_lossy(), quote_path_fully) }; writeln!(out, "{b}diff --git {old_git} {new_git}{r}")?; @@ -6776,12 +6800,12 @@ fn write_diff_header_with_prefix( writeln!( out, "{b}rename from {}{r}", - quote_c_style(old_path, quote_path_fully) + quote_c_style(&old_path.to_string_lossy(), quote_path_fully) )?; writeln!( out, "{b}rename to {}{r}", - quote_c_style(new_path, quote_path_fully) + quote_c_style(&new_path.to_string_lossy(), quote_path_fully) )?; if entry.old_oid != entry.new_oid { // Git appends the (shared) mode to the rename's `index` line when both @@ -6810,12 +6834,12 @@ fn write_diff_header_with_prefix( writeln!( out, "{b}copy from {}{r}", - quote_c_style(old_path, quote_path_fully) + quote_c_style(&old_path.to_string_lossy(), quote_path_fully) )?; writeln!( out, "{b}copy to {}{r}", - quote_c_style(new_path, quote_path_fully) + quote_c_style(&new_path.to_string_lossy(), quote_path_fully) )?; if entry.old_oid != entry.new_oid { if entry.old_mode == entry.new_mode { @@ -7565,8 +7589,8 @@ fn write_patch_with_prefix( let mut ext_died: Option = None; let mut ext_counter: usize = 0; for entry in entries { - let old_path = entry.old_path.as_deref().unwrap_or("/dev/null"); - let new_path = entry.new_path.as_deref().unwrap_or("/dev/null"); + let old_path = entry.old_path.as_deref().unwrap_or(RepoPath::from_str("/dev/null")); + let new_path = entry.new_path.as_deref().unwrap_or(RepoPath::from_str("/dev/null")); let path_for_attrs = repo_path_for_diff_entry(entry, relative_prefix); // `git diff --cached` reports an unmerged (conflicted) index entry as a single @@ -7589,7 +7613,7 @@ fn write_patch_with_prefix( { let head = work_tree .and_then(|wt| { - grit_lib::diff::read_submodule_head_oid(&wt.join(entry.path())) + grit_lib::diff::read_submodule_head_oid(&entry.path().to_fs_path(wt)) }) .unwrap_or_else(zero_oid); if head == zero_oid() { @@ -7646,15 +7670,15 @@ fn write_patch_with_prefix( } } - let old_wt_path = repo_path_for_diff_side(old_path, relative_prefix); + let old_wt_path = repo_path_for_diff_side(&old_path.to_string_lossy(), relative_prefix); let old_content_raw = if entry.old_oid == zero_oid() { - read_content_raw_or_worktree(odb, &entry.old_oid, work_tree, &old_wt_path) + read_content_raw_or_worktree(odb, &entry.old_oid, work_tree, RepoPath::from_str(&old_wt_path)) } else { read_content_raw(odb, &entry.old_oid) }; let wt_path = path_for_attrs.clone(); let new_content_raw = - read_content_raw_or_worktree(odb, &entry.new_oid, work_tree, &wt_path); + read_content_raw_or_worktree(odb, &entry.new_oid, work_tree, RepoPath::from_str(&wt_path)); if !no_ext_diff && entry.status != DiffStatus::Unmerged { // Git precedence: a path's `diff=` attribute driver @@ -7693,7 +7717,7 @@ fn write_patch_with_prefix( let new_zero_hex = new_is_worktree; let new_borrow: Option = if new_is_worktree && mode_is_regular_blob_mode_str(&entry.new_mode) { - let p = repo_path_for_diff_side(new_path, relative_prefix); + let p = repo_path_for_diff_side(&new_path.to_string_lossy(), relative_prefix); work_tree.and_then(|wt| { let full = wt.join(&p); full.symlink_metadata().ok().map(|_| p.clone()) @@ -7742,8 +7766,8 @@ fn write_patch_with_prefix( match run_external_diff_for_patch( out, ext, - display, - other, + &display.to_string_lossy(), + other.map(|o| o.to_string_lossy()).as_deref(), &old_conv, &new_conv, &entry.old_oid, @@ -7818,8 +7842,8 @@ fn write_patch_with_prefix( abbrev_len, src_prefix, dst_prefix, - old_path, - new_path, + &old_path.to_string_lossy(), + &new_path.to_string_lossy(), &old_t, &new_t, suppress_blank_empty, @@ -7837,8 +7861,8 @@ fn write_patch_with_prefix( abbrev_len, src_prefix, dst_prefix, - old_path, - new_path, + &old_path.to_string_lossy(), + &new_path.to_string_lossy(), &old_t, &new_t, suppress_blank_empty, @@ -7869,15 +7893,15 @@ fn write_patch_with_prefix( let (old_label, new_label) = match entry.status { DiffStatus::Added => ( "/dev/null".to_owned(), - format_diff_path_with_prefix(dst_prefix, new_path, quote_path_fully), + format_diff_path_with_prefix(dst_prefix, &new_path.to_string_lossy(), quote_path_fully), ), DiffStatus::Deleted => ( - format_diff_path_with_prefix(src_prefix, old_path, quote_path_fully), + format_diff_path_with_prefix(src_prefix, &old_path.to_string_lossy(), quote_path_fully), "/dev/null".to_owned(), ), _ => ( - format_diff_path_with_prefix(src_prefix, old_path, quote_path_fully), - format_diff_path_with_prefix(dst_prefix, new_path, quote_path_fully), + format_diff_path_with_prefix(src_prefix, &old_path.to_string_lossy(), quote_path_fully), + format_diff_path_with_prefix(dst_prefix, &new_path.to_string_lossy(), quote_path_fully), ), }; writeln!(out, "--- {old_label}")?; @@ -7979,19 +8003,19 @@ fn write_patch_with_prefix( out, &old_content_raw, &new_content_raw, - old_path, - new_path, + &old_path.to_string_lossy(), + &new_path.to_string_lossy(), )?; } else { let bo = if old_path == "/dev/null" { "/dev/null".to_string() } else { - format_diff_path_with_prefix(src_prefix, old_path, quote_path_fully) + format_diff_path_with_prefix(src_prefix, &old_path.to_string_lossy(), quote_path_fully) }; let bn = if new_path == "/dev/null" { "/dev/null".to_string() } else { - format_diff_path_with_prefix(dst_prefix, new_path, quote_path_fully) + format_diff_path_with_prefix(dst_prefix, &new_path.to_string_lossy(), quote_path_fully) }; writeln!(out, "Binary files {bo} and {bn} differ")?; } @@ -8069,15 +8093,17 @@ fn write_patch_with_prefix( } // For Added files, show --- /dev/null; for Deleted files, show +++ /dev/null - let display_old = if entry.status == DiffStatus::Added { + let old_path_lossy = old_path.to_string_lossy(); + let new_path_lossy = new_path.to_string_lossy(); + let display_old: &str = if entry.status == DiffStatus::Added { "/dev/null" } else { - old_path + &old_path_lossy }; - let display_new = if entry.status == DiffStatus::Deleted { + let display_new: &str = if entry.status == DiffStatus::Deleted { "/dev/null" } else { - new_path + &new_path_lossy }; if break_rewrites @@ -10150,7 +10176,7 @@ fn mode_str_has_executable_bit(mode_str: &str) -> bool { } fn compact_summary_display_path(entry: &DiffEntry, quote_path_fully: bool) -> String { - let path = grit_lib::quote_path::quote_c_style(entry.path(), quote_path_fully); + let path = grit_lib::quote_path::quote_c_style(&entry.path().to_string_lossy(), quote_path_fully); match entry.status { DiffStatus::Added => format!("{path} (new)"), DiffStatus::Deleted => format!("{path} (gone)"), @@ -10389,11 +10415,11 @@ fn write_stat( .iter() .map(|e| match e.status { DiffStatus::Renamed | DiffStatus::Copied => { - let old = e.old_path.as_deref().unwrap_or(""); - let new = e.new_path.as_deref().unwrap_or(""); - format_rename_display(old, new, quote_path_fully) + let old = e.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = e.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + format_rename_display(&old, &new, quote_path_fully) } - _ => grit_lib::quote_path::quote_c_style(e.path(), quote_path_fully), + _ => grit_lib::quote_path::quote_c_style(&e.path().to_string_lossy(), quote_path_fully), }) .collect(); @@ -10536,9 +10562,9 @@ fn write_numstat( algo_ctx, algo_cli, ); - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - let display = format_rename_display(old, new, false); + let old = entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let display = format_rename_display(&old, &new, false); writeln!(out, "{ins}\t{del}\t{display}")?; } _ if binary_rewrite_numstat => { @@ -10573,16 +10599,16 @@ pub(crate) fn write_diff_summary( for entry in entries { match entry.status { DiffStatus::Renamed => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - let display = format_rename_display(old, new, quote_path_fully); + let old = entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let display = format_rename_display(&old, &new, quote_path_fully); let sim = entry.score.unwrap_or(100); writeln!(out, " rename {display} ({sim}%)")?; } DiffStatus::Copied => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - let display = format_rename_display(old, new, quote_path_fully); + let old = entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let display = format_rename_display(&old, &new, quote_path_fully); let sim = entry.score.unwrap_or(100); writeln!(out, " copy {display} ({sim}%)")?; } @@ -10591,7 +10617,7 @@ pub(crate) fn write_diff_summary( out, " create mode {} {}", entry.new_mode, - grit_lib::quote_path::quote_c_style(entry.path(), quote_path_fully) + grit_lib::quote_path::quote_c_style(&entry.path().to_string_lossy(), quote_path_fully) )?; } DiffStatus::Deleted => { @@ -10599,7 +10625,7 @@ pub(crate) fn write_diff_summary( out, " delete mode {} {}", entry.old_mode, - grit_lib::quote_path::quote_c_style(entry.path(), quote_path_fully) + grit_lib::quote_path::quote_c_style(&entry.path().to_string_lossy(), quote_path_fully) )?; } DiffStatus::Modified => { @@ -10608,7 +10634,7 @@ pub(crate) fn write_diff_summary( writeln!( out, " rewrite {} ({pct}%)", - grit_lib::quote_path::quote_c_style(entry.path(), quote_path_fully) + grit_lib::quote_path::quote_c_style(&entry.path().to_string_lossy(), quote_path_fully) )?; } } @@ -10628,7 +10654,7 @@ fn write_name_only( writeln!( out, "{}", - grit_lib::quote_path::quote_c_style(entry.path(), quote_path_fully) + grit_lib::quote_path::quote_c_style(&entry.path().to_string_lossy(), quote_path_fully) )?; } Ok(()) @@ -10692,8 +10718,8 @@ fn write_raw( match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { let score = entry.score.unwrap_or(100); - let old_path = entry.old_path.as_deref().unwrap_or(""); - let new_path = entry.new_path.as_deref().unwrap_or(""); + let old_path = entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")); + let new_path = entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")); writeln!(out, ":{old_mode} {new_mode} {old_oid} {new_oid} {status}{score:03}\t{old_path}\t{new_path}")?; } DiffStatus::Modified => { @@ -10742,11 +10768,11 @@ fn write_name_status( "R{:03}\t{}\t{}", s, grit_lib::quote_path::quote_c_style( - entry.old_path.as_deref().unwrap_or(""), + &entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")).to_string_lossy(), quote_path_fully, ), grit_lib::quote_path::quote_c_style( - entry.new_path.as_deref().unwrap_or(""), + &entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")).to_string_lossy(), quote_path_fully, ), )?; @@ -10758,11 +10784,11 @@ fn write_name_status( "C{:03}\t{}\t{}", s, grit_lib::quote_path::quote_c_style( - entry.old_path.as_deref().unwrap_or(""), + &entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")).to_string_lossy(), quote_path_fully, ), grit_lib::quote_path::quote_c_style( - entry.new_path.as_deref().unwrap_or(""), + &entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")).to_string_lossy(), quote_path_fully, ), )?; @@ -10772,7 +10798,7 @@ fn write_name_status( out, "{}\t{}", entry.status.letter(), - grit_lib::quote_path::quote_c_style(entry.path(), quote_path_fully) + grit_lib::quote_path::quote_c_style(&entry.path().to_string_lossy(), quote_path_fully) )?; } } @@ -11047,10 +11073,11 @@ pub(crate) fn check_whitespace_errors( continue; } let path = entry.path(); - let marker_size = conflict_marker_size_for_path(merged_attrs, path, ignore_case_attrs); + let path_str = path.to_string_lossy(); + let marker_size = conflict_marker_size_for_path(merged_attrs, &path_str, ignore_case_attrs); let ws_rule = match effective_ws_rule_for_path( merged_attrs, - path, + &path_str, &entry.new_mode, config, ignore_case_attrs, @@ -11088,8 +11115,8 @@ pub(crate) fn check_whitespace_errors( let patch = unified_diff_with_prefix_and_funcname_and_algorithm( &old_content, &new_content, - path, - path, + &path_str, + &path_str, 1, 0, "", diff --git a/grit-git/src/commands/diff_files.rs b/grit-git/src/commands/diff_files.rs index ddc1d8538..617ef25d3 100644 --- a/grit-git/src/commands/diff_files.rs +++ b/grit-git/src/commands/diff_files.rs @@ -16,6 +16,7 @@ use grit_lib::diff::{ rewrite_dissimilarity_index_percent, should_break_rewrite_for_stat, stat_matches, submodule_porcelain_flags, unified_diff, zero_oid, DiffEntry, DiffStatus, }; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::diffstat::{terminal_columns, write_diffstat_block, DiffstatOptions, FileStatInput}; use grit_lib::index::{ Index, IndexEntry, MODE_EXECUTABLE, MODE_GITLINK, MODE_REGULAR, MODE_SYMLINK, @@ -85,7 +86,7 @@ pub fn run(mut args: Args) -> Result<()> { Vec::new() } else { let path = entry.new_path.as_deref().unwrap_or(entry.path()); - let abs = work_tree.join(path); + let abs = path.to_fs_path(&work_tree); match fs::read(&abs) { Ok(b) => b, Err(e) if e.kind() == std::io::ErrorKind::NotFound => Vec::new(), @@ -332,15 +333,15 @@ pub fn run(mut args: Args) -> Result<()> { (DiffStatus::Renamed, Some(s)) => { println!( "R{s:03}\t{}\t{}", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) ); } (DiffStatus::Copied, Some(s)) => { println!( "C{s:03}\t{}\t{}", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) ); } _ => { @@ -468,16 +469,18 @@ fn print_diff_files_summary(entries: &[DiffEntry]) -> Result<()> { for entry in entries { match entry.status { DiffStatus::Renamed => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - let display = format_rename_summary_display(old, new); + let old = entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")); + let new = entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")); + let display = + format_rename_summary_display(&old.to_string_lossy(), &new.to_string_lossy()); let sim = entry.score.unwrap_or(100); println!(" rename {display} ({sim}%)"); } DiffStatus::Copied => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - let display = format_rename_summary_display(old, new); + let old = entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")); + let new = entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")); + let display = + format_rename_summary_display(&old.to_string_lossy(), &new.to_string_lossy()); let sim = entry.score.unwrap_or(100); println!(" copy {display} ({sim}%)"); } @@ -485,14 +488,14 @@ fn print_diff_files_summary(entries: &[DiffEntry]) -> Result<()> { println!( " create mode {} {}", entry.new_mode, - quote_c_style_path(entry.path()) + quote_c_style_path(&entry.path().to_string_lossy()) ); } DiffStatus::Deleted => { println!( " delete mode {} {}", entry.old_mode, - quote_c_style_path(entry.path()) + quote_c_style_path(&entry.path().to_string_lossy()) ); } DiffStatus::TypeChanged => { @@ -500,7 +503,7 @@ fn print_diff_files_summary(entries: &[DiffEntry]) -> Result<()> { " mode change {} => {} {}", entry.old_mode, entry.new_mode, - quote_c_style_path(entry.path()) + quote_c_style_path(&entry.path().to_string_lossy()) ); } DiffStatus::Modified => { @@ -509,11 +512,11 @@ fn print_diff_files_summary(entries: &[DiffEntry]) -> Result<()> { " mode change {} => {} {}", entry.old_mode, entry.new_mode, - quote_c_style_path(entry.path()) + quote_c_style_path(&entry.path().to_string_lossy()) ); } if let Some(pct) = entry.score { - println!(" rewrite {} ({pct}%)", quote_c_style_path(entry.path())); + println!(" rewrite {} ({pct}%)", quote_c_style_path(&entry.path().to_string_lossy())); } } DiffStatus::Unmerged => {} @@ -1236,7 +1239,7 @@ fn change_to_diff_entry(c: &Change) -> DiffEntry { return DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(c.path.clone()), + new_path: Some(RepoPathBuf::from_string(c.path.clone())), old_mode: "000000".to_owned(), new_mode: new_mode_str, old_oid: zero_oid(), @@ -1249,7 +1252,8 @@ fn change_to_diff_entry(c: &Change) -> DiffEntry { match c.status { 'D' => DiffEntry { status: DiffStatus::Deleted, - old_path: Some(c.path.clone()), + // TODO(byte-paths): Change.path is a lossy String; migrate its source (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(c.path.clone())), new_path: None, old_mode: old_mode_str, new_mode: new_mode_str, @@ -1259,8 +1263,9 @@ fn change_to_diff_entry(c: &Change) -> DiffEntry { }, 'U' => DiffEntry { status: DiffStatus::Unmerged, - old_path: Some(c.path.clone()), - new_path: Some(c.path.clone()), + // TODO(byte-paths): Change.path is a lossy String; migrate its source (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(c.path.clone())), + new_path: Some(RepoPathBuf::from_string(c.path.clone())), old_mode: old_mode_str, new_mode: new_mode_str, old_oid: c.old_oid, @@ -1269,8 +1274,9 @@ fn change_to_diff_entry(c: &Change) -> DiffEntry { }, 'T' => DiffEntry { status: DiffStatus::TypeChanged, - old_path: Some(c.path.clone()), - new_path: Some(c.path.clone()), + // TODO(byte-paths): Change.path is a lossy String; migrate its source (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(c.path.clone())), + new_path: Some(RepoPathBuf::from_string(c.path.clone())), old_mode: old_mode_str, new_mode: new_mode_str, old_oid: c.old_oid, @@ -1279,8 +1285,9 @@ fn change_to_diff_entry(c: &Change) -> DiffEntry { }, _ => DiffEntry { status: DiffStatus::Modified, - old_path: Some(c.path.clone()), - new_path: Some(c.path.clone()), + // TODO(byte-paths): Change.path is a lossy String; migrate its source (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(c.path.clone())), + new_path: Some(RepoPathBuf::from_string(c.path.clone())), old_mode: old_mode_str, new_mode: new_mode_str, old_oid: c.old_oid, @@ -1344,10 +1351,10 @@ fn render_raw_diff_entry( let path = match entry.status { DiffStatus::Renamed | DiffStatus::Copied => format!( "{}\t{}", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) ), - _ => entry.path().to_owned(), + _ => entry.path().to_string(), }; Ok(format!( @@ -1393,11 +1400,11 @@ fn print_patch_from_diff_entry( let old_path = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); + .unwrap_or(entry.new_path.as_deref().unwrap_or(RepoPath::from_str(""))); let new_path = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .unwrap_or(entry.old_path.as_deref().unwrap_or(RepoPath::from_str(""))); let (src_prefix, dst_prefix) = if no_prefix { ("", "") } else { ("a/", "b/") }; let old_label = match entry.status { @@ -1409,7 +1416,7 @@ fn print_patch_from_diff_entry( _ => format!("{dst_prefix}{new_path}"), }; - let display_path = entry.path(); + let display_path = entry.path().to_string_lossy(); let mut header = format!("diff --git {src_prefix}{old_path} {dst_prefix}{new_path}"); match entry.status { DiffStatus::Deleted => { @@ -1485,8 +1492,8 @@ fn print_patch_from_diff_entry( let patch = unified_diff( &old_content, &new_content, - display_path, - display_path, + &display_path, + &display_path, 3, indent_heuristic, quote_path_fully, @@ -1527,7 +1534,10 @@ fn print_stat_from_diff_entries( let (ins, del) = count_changes(&old, &new); total_ins += ins; total_del += del; - println!("{}", format_stat_line(entry.path(), ins, del, max_len)); + println!( + "{}", + format_stat_line(&entry.path().to_string_lossy(), ins, del, max_len) + ); } let files = entries.len(); let mut summary = format!( @@ -1629,7 +1639,7 @@ fn mode_has_executable_bit(mode_str: &str) -> bool { } fn compact_summary_path_display(entry: &DiffEntry) -> String { - let path = entry.path().to_owned(); + let path = entry.path().to_string(); match entry.status { DiffStatus::Added => format!("{path} (new)"), DiffStatus::Deleted => format!("{path} (gone)"), @@ -1666,7 +1676,7 @@ fn load_patch_bytes_for_diff_entry( Vec::new() } else { let path = entry.new_path.as_deref().unwrap_or(entry.path()); - let abs = work_tree.join(path); + let abs = path.to_fs_path(work_tree); match fs::read(&abs) { Ok(b) => b, Err(e) if e.kind() == std::io::ErrorKind::NotFound => Vec::new(), @@ -1764,7 +1774,7 @@ fn load_patch_contents_for_diff_entry( String::new() } else { let path = entry.new_path.as_deref().unwrap_or(entry.path()); - let abs = work_tree.join(path); + let abs = path.to_fs_path(work_tree); match fs::symlink_metadata(&abs) { Ok(meta) if meta.file_type().is_symlink() => { let target = fs::read_link(&abs)?; diff --git a/grit-git/src/commands/diff_index.rs b/grit-git/src/commands/diff_index.rs index 6be1af6ec..5e1f9c04a 100644 --- a/grit-git/src/commands/diff_index.rs +++ b/grit-git/src/commands/diff_index.rs @@ -13,6 +13,7 @@ use grit_lib::index::{ Index, IndexEntry, MODE_EXECUTABLE, MODE_GITLINK, MODE_REGULAR, MODE_SYMLINK, }; use grit_lib::merge_base::{merge_base_for_diff_index, MergeBaseForDiffError}; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::objects::{parse_commit, parse_tag, parse_tree, ObjectId, ObjectKind}; use grit_lib::odb::Odb; use grit_lib::pathspec::context_from_mode_bits; @@ -171,8 +172,9 @@ pub fn run(mut args: Args) -> Result<()> { } diff_entries.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + // TODO(byte-paths): lossy until index_map keys migrate (Phase 2/4) + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: "160000".to_owned(), new_mode: "160000".to_owned(), old_oid: snap.oid, @@ -290,14 +292,16 @@ pub fn run(mut args: Args) -> Result<()> { // `D`/`R`. Copy detection may still attribute new blobs to an unmerged source (`-C`). let unmerged_ref = &unmerged_paths; diff_entries.retain(|e| { - if unmerged_ref.contains(e.path()) && e.status == DiffStatus::Deleted { + if unmerged_ref.contains(&*e.path().to_string_lossy()) + && e.status == DiffStatus::Deleted + { return false; } true }); for e in &mut diff_entries { let src = e.old_path.as_deref(); - if src.is_some_and(|p| unmerged_ref.contains(p)) { + if src.is_some_and(|p| unmerged_ref.contains(&*p.to_string_lossy())) { if options.find_copies { // Git `-C`: paths still show as copies from the unmerged source (not renames). if e.status == DiffStatus::Renamed { @@ -346,10 +350,11 @@ pub fn run(mut args: Args) -> Result<()> { .into_iter() .filter_map(|mut e| { let path = e.path().to_owned(); - if !path.starts_with(&rel_prefix) { + if !path.starts_with_bytes(rel_prefix.as_bytes()) { return None; } - let stripped = path[rel_prefix.len()..].to_owned(); + let stripped = + RepoPathBuf::from_bytes(path.as_bytes()[rel_prefix.len()..].to_vec()); if e.old_path.is_some() { e.old_path = Some(stripped.clone()); } @@ -429,7 +434,11 @@ pub fn run(mut args: Args) -> Result<()> { if options.nul_terminated { out.write_all(entry.path().as_bytes())?; } else { - write!(out, "{}", quote_c_style(entry.path(), quote_fully))?; + write!( + out, + "{}", + quote_c_style(&entry.path().to_string_lossy(), quote_fully) + )?; } out.write_all(&[term])?; } @@ -443,7 +452,7 @@ pub fn run(mut args: Args) -> Result<()> { ignore_dirty: options.ignore_dirty_submodules, }; for entry in &diff_entries { - let p = entry.path(); + let p = entry.path().to_string_lossy(); write_patch_entry( &mut out, &repo, @@ -453,7 +462,7 @@ pub fn run(mut args: Args) -> Result<()> { wt, options.submodule_format, sm_ignore, - p, + &p, options.indent_heuristic, )?; } @@ -553,7 +562,7 @@ fn read_entry_raw_contents(repo: &Repository, entry: &DiffEntry) -> (Vec, Ve { if let Some(wt) = repo.work_tree.as_ref() { let path = entry.new_path.as_deref().unwrap_or(entry.path()); - read_worktree_path_raw(&wt.join(path)) + read_worktree_path_raw(&path.to_fs_path(wt)) } else { Vec::new() } @@ -572,7 +581,7 @@ fn worktree_side_is_placeholder(repo: &Repository, entry: &DiffEntry) -> bool { return false; }; let path = entry.new_path.as_deref().unwrap_or(entry.path()); - let abs = wt.join(path); + let abs = path.to_fs_path(wt); match fs::symlink_metadata(&abs) { Ok(meta) => { let mode = canonicalize_mode(meta.permissions().mode()); @@ -1216,8 +1225,9 @@ fn collect_unmerged_index_paths(index: &Index) -> BTreeSet { fn diff_entry_unmerged(path: &str) -> DiffEntry { DiffEntry { status: DiffStatus::Unmerged, - old_path: Some(path.to_owned()), - new_path: Some(path.to_owned()), + // TODO(byte-paths): caller passes a lossy &str (Phase 2/4) + old_path: Some(RepoPathBuf::from_string(path.to_owned())), + new_path: Some(RepoPathBuf::from_string(path.to_owned())), old_mode: "000000".to_owned(), new_mode: "000000".to_owned(), old_oid: zero_oid(), @@ -1766,7 +1776,7 @@ fn apply_diffcore_break_rewrites_split( out.push(e); continue; }; - match fs::read(wt.join(path)) { + match fs::read(path.to_fs_path(wt)) { Ok(b) => b, Err(_) => { out.push(e); @@ -1797,7 +1807,7 @@ fn apply_diffcore_break_rewrites_split( out.push(e); continue; }; - broken_paths.insert(path.clone()); + broken_paths.insert(path.to_string_lossy().into_owned()); out.push(DiffEntry { status: DiffStatus::Deleted, old_path: Some(path.clone()), @@ -1828,8 +1838,8 @@ fn drop_break_delete_superseded_by_rename_dest(entries: Vec) -> Vec = HashSet::new(); for e in &entries { if matches!(e.status, DiffStatus::Renamed | DiffStatus::Copied) { - if let Some(p) = e.new_path.clone() { - targets.insert(p); + if let Some(p) = e.new_path.as_deref() { + targets.insert(p.to_string_lossy().into_owned()); } } } @@ -1842,7 +1852,7 @@ fn drop_break_delete_superseded_by_rename_dest(entries: Vec) -> Vec { merged.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path), + // TODO(byte-paths): slots keyed by lossy String (Phase 2/4) + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path)), old_mode: d.old_mode, new_mode: a.new_mode, old_oid: d.old_oid, @@ -1931,15 +1944,16 @@ fn raw_change_to_diff_entry(change: &RawChange) -> DiffEntry { DiffEntry { status, + // TODO(byte-paths): RawChange.path is a lossy String (Phase 2/4) old_path: if change.status == 'A' { None } else { - Some(change.path.clone()) + Some(RepoPathBuf::from_string(change.path.clone())) }, new_path: if change.status == 'D' { None } else { - Some(change.path.clone()) + Some(RepoPathBuf::from_string(change.path.clone())) }, old_mode: format!("{old_mode:06o}"), new_mode: format!("{new_mode:06o}"), @@ -1949,6 +1963,23 @@ fn raw_change_to_diff_entry(change: &RawChange) -> DiffEntry { } } +fn old_path_bytes(e: &DiffEntry) -> &[u8] { + e.old_path.as_deref().map(RepoPath::as_bytes).unwrap_or(b"") +} +fn new_path_bytes(e: &DiffEntry) -> &[u8] { + e.new_path.as_deref().map(RepoPath::as_bytes).unwrap_or(b"") +} +fn old_path_lossy(e: &DiffEntry) -> std::borrow::Cow<'_, str> { + e.old_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed(""), RepoPath::to_string_lossy) +} +fn new_path_lossy(e: &DiffEntry) -> std::borrow::Cow<'_, str> { + e.new_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed(""), RepoPath::to_string_lossy) +} + pub(crate) fn write_diff_index_name_status( out: &mut impl std::io::Write, entries: &[DiffEntry], @@ -1960,32 +1991,32 @@ pub(crate) fn write_diff_index_name_status( (DiffStatus::Renamed, Some(s)) => { if nul { write!(out, "R{s:03}\0")?; - out.write_all(entry.old_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(old_path_bytes(entry))?; out.write_all(b"\0")?; - out.write_all(entry.new_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(new_path_bytes(entry))?; out.write_all(b"\0")?; } else { writeln!( out, "R{s:03}\t{}\t{}", - quote_c_style(entry.old_path.as_deref().unwrap_or(""), quote_fully), - quote_c_style(entry.new_path.as_deref().unwrap_or(""), quote_fully), + quote_c_style(&old_path_lossy(entry), quote_fully), + quote_c_style(&new_path_lossy(entry), quote_fully), )?; } } (DiffStatus::Copied, Some(s)) => { if nul { write!(out, "C{s:03}\0")?; - out.write_all(entry.old_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(old_path_bytes(entry))?; out.write_all(b"\0")?; - out.write_all(entry.new_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(new_path_bytes(entry))?; out.write_all(b"\0")?; } else { writeln!( out, "C{s:03}\t{}\t{}", - quote_c_style(entry.old_path.as_deref().unwrap_or(""), quote_fully), - quote_c_style(entry.new_path.as_deref().unwrap_or(""), quote_fully), + quote_c_style(&old_path_lossy(entry), quote_fully), + quote_c_style(&new_path_lossy(entry), quote_fully), )?; } } @@ -2001,7 +2032,7 @@ pub(crate) fn write_diff_index_name_status( writeln!( out, "{letter}{s:03}\t{}", - quote_c_style(entry.path(), quote_fully) + quote_c_style(&entry.path().to_string_lossy(), quote_fully) )?; } } @@ -2015,7 +2046,7 @@ pub(crate) fn write_diff_index_name_status( out, "{}\t{}", entry.status.letter(), - quote_c_style(entry.path(), quote_fully) + quote_c_style(&entry.path().to_string_lossy(), quote_fully) )?; } } @@ -2127,9 +2158,9 @@ fn write_raw_diff_entry_z( match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { - out.write_all(entry.old_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(old_path_bytes(entry))?; out.write_all(b"\0")?; - out.write_all(entry.new_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(new_path_bytes(entry))?; out.write_all(b"\0")?; } _ => { @@ -2186,11 +2217,11 @@ fn render_raw_diff_entry( DiffStatus::Renamed | DiffStatus::Copied => { format!( "{}\t{}", - quote_c_style(entry.old_path.as_deref().unwrap_or(""), quote_fully), - quote_c_style(entry.new_path.as_deref().unwrap_or(""), quote_fully), + quote_c_style(&old_path_lossy(entry), quote_fully), + quote_c_style(&new_path_lossy(entry), quote_fully), ) } - _ => quote_c_style(entry.path(), quote_fully), + _ => quote_c_style(&entry.path().to_string_lossy(), quote_fully), }; Ok(format!( @@ -2838,14 +2869,15 @@ pub(crate) fn write_patch_entry_inner( .unwrap_or_default() .quote_path_fully(); + let empty = RepoPath::from_str(""); let old_path = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); + .unwrap_or(entry.new_path.as_deref().unwrap_or(empty)); let new_path = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .unwrap_or(entry.old_path.as_deref().unwrap_or(empty)); let (disp_old, disp_new) = if path_prefix.is_empty() { (old_path.to_string(), new_path.to_string()) @@ -2863,7 +2895,7 @@ pub(crate) fn write_patch_entry_inner( && entry.new_oid == zero_oid() { if let Some(wt) = work_tree { - let new_raw = read_worktree_path_raw(&wt.join(new_path)); + let new_raw = read_worktree_path_raw(&new_path.to_fs_path(wt)); if read_blob_raw(odb, &entry.old_oid) == new_raw { return Ok(()); } @@ -3067,8 +3099,16 @@ pub(crate) fn write_patch_entry_inner( } let sim = entry.score.unwrap_or(100); writeln!(out, "similarity index {sim}%")?; - writeln!(out, "rename from {}", quote_c_style(old_path, quote_fully))?; - writeln!(out, "rename to {}", quote_c_style(new_path, quote_fully))?; + writeln!( + out, + "rename from {}", + quote_c_style(&old_path.to_string_lossy(), quote_fully) + )?; + writeln!( + out, + "rename to {}", + quote_c_style(&new_path.to_string_lossy(), quote_fully) + )?; if entry.old_oid != entry.new_oid { writeln!( out, @@ -3081,8 +3121,16 @@ pub(crate) fn write_patch_entry_inner( DiffStatus::Copied => { let sim = entry.score.unwrap_or(100); writeln!(out, "similarity index {sim}%")?; - writeln!(out, "copy from {}", quote_c_style(old_path, quote_fully))?; - writeln!(out, "copy to {}", quote_c_style(new_path, quote_fully))?; + writeln!( + out, + "copy from {}", + quote_c_style(&old_path.to_string_lossy(), quote_fully) + )?; + writeln!( + out, + "copy to {}", + quote_c_style(&new_path.to_string_lossy(), quote_fully) + )?; if entry.old_oid != entry.new_oid { writeln!( out, @@ -3111,7 +3159,7 @@ pub(crate) fn write_patch_entry_inner( // Zero OID for non-deleted entries means worktree content if let Some(wt) = work_tree { let path = entry.new_path.as_deref().unwrap_or(new_path); - read_worktree_path_raw(&wt.join(path)) + read_worktree_path_raw(&path.to_fs_path(wt)) } else { Vec::new() } @@ -3126,7 +3174,7 @@ pub(crate) fn write_patch_entry_inner( { if let Some(wt) = work_tree { let path = entry.new_path.as_deref().unwrap_or(new_path); - read_worktree_path_raw(&wt.join(path)) + read_worktree_path_raw(&path.to_fs_path(wt)) } else { blob } @@ -3138,18 +3186,18 @@ pub(crate) fn write_patch_entry_inner( // Check for binary content let treat_as_binary_by_driver = !mode_is_symlink(&entry.old_mode) && !mode_is_symlink(&entry.new_mode) - && (is_binary_driver_path(repo, work_tree, old_path) - || is_binary_driver_path(repo, work_tree, new_path)); + && (is_binary_driver_path(repo, work_tree, &old_path.to_string_lossy()) + || is_binary_driver_path(repo, work_tree, &new_path.to_string_lossy())); if treat_as_binary_by_driver || is_binary(&old_raw) || is_binary(&new_raw) { let bo = if entry.status == DiffStatus::Added { "/dev/null".to_owned() } else { - format_diff_path_with_prefix("a/", old_path, quote_fully) + format_diff_path_with_prefix("a/", &old_path.to_string_lossy(), quote_fully) }; let bn = if entry.status == DiffStatus::Deleted { "/dev/null".to_owned() } else { - format_diff_path_with_prefix("b/", new_path, quote_fully) + format_diff_path_with_prefix("b/", &new_path.to_string_lossy(), quote_fully) }; writeln!(out, "Binary files {bo} and {bn} differ")?; return Ok(()); @@ -3252,7 +3300,7 @@ fn validate_patch_entry_oids(entry: &DiffEntry) -> Result<()> { /// Write --stat output for diff-index. fn write_diff_index_stat(entries: &[DiffEntry], odb: &Odb) -> Result<()> { - let mut file_stats: Vec<(&str, usize, usize, bool)> = Vec::new(); + let mut file_stats: Vec<(std::borrow::Cow, usize, usize, bool)> = Vec::new(); let mut total_ins = 0usize; let mut total_del = 0usize; let mut files_changed = 0usize; @@ -3268,7 +3316,7 @@ fn write_diff_index_stat(entries: &[DiffEntry], odb: &Odb) -> Result<()> { let new_content = String::from_utf8_lossy(&new_raw).into_owned(); count_line_changes(&old_content, &new_content) }; - file_stats.push((entry.path(), ins, del, binary)); + file_stats.push((entry.path().to_string_lossy(), ins, del, binary)); total_ins += ins; total_del += del; files_changed += 1; diff --git a/grit-git/src/commands/diff_pairs.rs b/grit-git/src/commands/diff_pairs.rs index 4ed21d650..9f4585389 100644 --- a/grit-git/src/commands/diff_pairs.rs +++ b/grit-git/src/commands/diff_pairs.rs @@ -188,8 +188,15 @@ fn render_raw_entry(pair: &ParsedPair, out: &mut impl std::io::Write) -> Result< fn to_diff_entry(pair: &ParsedPair) -> DiffEntry { DiffEntry { status: pair.status, - old_path: pair.old_path.clone(), - new_path: pair.new_path.clone(), + // TODO(byte-paths): ParsedPair stores lossy String paths; migrate its source to bytes (Phase 2/4). + old_path: pair + .old_path + .clone() + .map(grit_lib::repo_path::RepoPathBuf::from_string), + new_path: pair + .new_path + .clone() + .map(grit_lib::repo_path::RepoPathBuf::from_string), old_mode: pair.old_mode.clone(), new_mode: pair.new_mode.clone(), old_oid: pair.old_oid, diff --git a/grit-git/src/commands/diff_tree.rs b/grit-git/src/commands/diff_tree.rs index 416c3b8f3..ac978e3ad 100644 --- a/grit-git/src/commands/diff_tree.rs +++ b/grit-git/src/commands/diff_tree.rs @@ -17,6 +17,7 @@ use grit_lib::combined_tree_diff::{ }; use grit_lib::config::{ConfigFile, ConfigScope, ConfigSet}; use grit_lib::delta_encode::{encode_lcp_delta, encode_prefix_extension_delta}; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::diff::{ count_changes, detect_copies as lib_detect_copies, detect_renames, diff_trees, diff_trees_show_tree_entries, format_raw, format_raw_abbrev, @@ -1511,7 +1512,8 @@ fn filter_max_depth( let mut seen = std::collections::HashSet::new(); let mut result = Vec::new(); for entry in entries { - let path = entry.path().to_owned(); + // TODO(byte-paths): depth-collapse logic runs on the lossy string form (Phase 3). + let path = entry.path().to_string_lossy().into_owned(); // Find the deepest directory-prefix of `path` that git would refuse to // recurse into; if found, the entry collapses to that directory name. match collapse_boundary(&path, &specs, max_depth) { @@ -1519,8 +1521,8 @@ fn filter_max_depth( Some(dir) => { if seen.insert(dir.clone()) { let mut collapsed = entry; - collapsed.old_path = Some(dir.clone()); - collapsed.new_path = Some(dir); + collapsed.old_path = Some(RepoPathBuf::from_string(dir.clone())); + collapsed.new_path = Some(RepoPathBuf::from_string(dir)); result.push(collapsed); } } @@ -1625,13 +1627,12 @@ fn diff_trees_toplevel( while oi < old_entries.len() || ni < new_entries.len() { match (old_entries.get(oi), new_entries.get(ni)) { (Some(o), Some(n)) => { - let o_name = String::from_utf8_lossy(&o.name); - let n_name = String::from_utf8_lossy(&n.name); - match o_name.cmp(&n_name) { + // Byte-true ordering and paths (Git tree names are raw bytes). + match o.name.cmp(&n.name) { std::cmp::Ordering::Less => { result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(o_name.into_owned()), + old_path: Some(RepoPathBuf::from_bytes(o.name.clone())), new_path: None, old_mode: format!("{:06o}", o.mode), new_mode: "000000".to_owned(), @@ -1645,7 +1646,7 @@ fn diff_trees_toplevel( result.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(n_name.into_owned()), + new_path: Some(RepoPathBuf::from_bytes(n.name.clone())), old_mode: "000000".to_owned(), new_mode: format!("{:06o}", n.mode), old_oid: zero, @@ -1656,7 +1657,7 @@ fn diff_trees_toplevel( } std::cmp::Ordering::Equal => { if o.oid != n.oid || o.mode != n.mode { - let path = o_name.into_owned(); + let path = RepoPathBuf::from_bytes(o.name.clone()); let old_type = o.mode & 0o170000; let new_type = n.mode & 0o170000; let old_is_tree = old_type == 0o040000; @@ -1713,7 +1714,7 @@ fn diff_trees_toplevel( (Some(o), None) => { result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(String::from_utf8_lossy(&o.name).into_owned()), + old_path: Some(RepoPathBuf::from_bytes(o.name.clone())), new_path: None, old_mode: format!("{:06o}", o.mode), new_mode: "000000".to_owned(), @@ -1727,7 +1728,7 @@ fn diff_trees_toplevel( result.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(String::from_utf8_lossy(&n.name).into_owned()), + new_path: Some(RepoPathBuf::from_bytes(n.name.clone())), old_mode: "000000".to_owned(), new_mode: format!("{:06o}", n.mode), old_oid: zero, @@ -1897,7 +1898,7 @@ fn print_submodule_log_for_entry( message = Some("(submodule deleted)"); } - let sub_repo = open_submodule_repo(super_git_dir, work_tree, path); + let sub_repo = open_submodule_repo(super_git_dir, work_tree, &path.to_string_lossy()); if sub_repo.is_none() && message.is_none() { message = Some("(commits not present)"); } @@ -2051,8 +2052,9 @@ fn combined_paths_to_first_parent_entries( }; out.push(DiffEntry { status, - old_path: Some(p.path.clone()), - new_path: Some(p.path.clone()), + // TODO(byte-paths): CombinedDiffPath.path is a lossy String (Phase 3/4). + old_path: Some(RepoPathBuf::from_string(p.path.clone())), + new_path: Some(RepoPathBuf::from_string(p.path.clone())), old_mode, new_mode, old_oid, @@ -2358,9 +2360,9 @@ fn write_raw_diff_tree_z( )?; match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { - out.write_all(entry.old_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")).as_bytes())?; out.write_all(b"\0")?; - out.write_all(entry.new_path.as_deref().unwrap_or("").as_bytes())?; + out.write_all(entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")).as_bytes())?; out.write_all(b"\0")?; } _ => { @@ -2476,7 +2478,8 @@ fn break_then_detect( prepared.push(entry); continue; } - let path = entry.path().to_owned(); + // TODO(byte-paths): break/rename bookkeeping keyed on lossy string (Phase 3). + let path = entry.path().to_string_lossy().into_owned(); broken_paths.insert(path.clone()); // Deleted half (old side). let del = DiffEntry { @@ -2530,13 +2533,13 @@ fn break_then_detect( let surviving_add: HashSet = detected .iter() .filter(|e| e.status == DiffStatus::Added) - .filter_map(|e| e.new_path.clone()) + .filter_map(|e| e.new_path.as_deref().map(|p| p.to_string_lossy().into_owned())) .filter(|p| broken_paths.contains(p)) .collect(); let surviving_del: HashSet = detected .iter() .filter(|e| e.status == DiffStatus::Deleted) - .filter_map(|e| e.old_path.clone()) + .filter_map(|e| e.old_path.as_deref().map(|p| p.to_string_lossy().into_owned())) .filter(|p| broken_paths.contains(p)) .collect(); @@ -2550,7 +2553,7 @@ fn break_then_detect( if entry .old_path .as_deref() - .is_some_and(|p| surviving_add.contains(p)) => + .is_some_and(|p| surviving_add.contains(p.to_string_lossy().as_ref())) => { let mut e = entry; e.status = DiffStatus::Copied; @@ -2564,9 +2567,13 @@ fn break_then_detect( if entry .new_path .as_deref() - .is_some_and(|p| broken_paths.contains(p)) => + .is_some_and(|p| broken_paths.contains(p.to_string_lossy().as_ref())) => { - let path = entry.new_path.clone().unwrap_or_default(); + let path = entry + .new_path + .as_deref() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); if merged_paths.insert(path.clone()) { if let (Some(del), Some(add)) = (broken_old.get(&path), broken_new.get(&path)) { result.push(merge_broken_pair(del, add)); @@ -2577,9 +2584,13 @@ fn break_then_detect( if entry .old_path .as_deref() - .is_some_and(|p| broken_paths.contains(p)) => + .is_some_and(|p| broken_paths.contains(p.to_string_lossy().as_ref())) => { - let path = entry.old_path.clone().unwrap_or_default(); + let path = entry + .old_path + .as_deref() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); // Only emit here if the add-half did NOT survive (otherwise the // Added arm above handles the merge); this covers the symmetric // case where the delete half is the survivor. @@ -2784,7 +2795,7 @@ fn print_diff( out.write_all(entry.path().as_bytes())?; out.write_all(b"\0")?; } else { - writeln!(out, "{}", quote_c_style(entry.path(), quote_fully))?; + writeln!(out, "{}", quote_c_style(&entry.path().to_string_lossy(), quote_fully))?; } } } @@ -2851,8 +2862,8 @@ fn write_summary(out: &mut impl Write, entries: &[DiffEntry]) -> Result<()> { writeln!( out, " rename {} => {} ({sim}%)", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) )?; } DiffStatus::Copied => { @@ -2860,8 +2871,8 @@ fn write_summary(out: &mut impl Write, entries: &[DiffEntry]) -> Result<()> { writeln!( out, " copy {} => {} ({sim}%)", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) )?; } _ => {} @@ -2982,14 +2993,21 @@ pub(crate) fn write_patch_entry( ) -> Result { let (old_content, new_content) = read_blob_pair(odb, entry)?; - let old_path = entry + // TODO(byte-paths): diff-header path quoting is lossy until quote_path goes byte-based (Phase 3). + let old_lossy = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); - let new_path = entry + .or(entry.new_path.as_deref()) + .map(RepoPath::to_string_lossy) + .unwrap_or(std::borrow::Cow::Borrowed("")); + let old_path: &str = &old_lossy; + let new_lossy = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .or(entry.old_path.as_deref()) + .map(RepoPath::to_string_lossy) + .unwrap_or(std::borrow::Cow::Borrowed("")); + let new_path: &str = &new_lossy; let old_hex = entry.old_oid.to_hex(); let new_hex = entry.new_oid.to_hex(); @@ -3052,11 +3070,12 @@ pub(crate) fn write_patch_entry( DiffStatus::Unmerged => {} } - let path_for_attrs = entry.path(); + // TODO(byte-paths): attribute lookup uses the lossy path until pathspec/attrs go byte-based (Phase 3). + let path_for_attrs = entry.path().to_string_lossy(); let old_raw = old_content.as_bytes(); let new_raw = new_content.as_bytes(); - if is_binary_for_diff(git_dir, path_for_attrs, old_raw) - || is_binary_for_diff(git_dir, path_for_attrs, new_raw) + if is_binary_for_diff(git_dir, &path_for_attrs, old_raw) + || is_binary_for_diff(git_dir, &path_for_attrs, new_raw) { if binary_patch { emit_git_binary_patch(out, old_raw, new_raw)?; @@ -3113,7 +3132,7 @@ fn stat_mode_has_executable_bit(mode_str: &str) -> bool { /// Quote `entry`'s path and, for `--compact-summary`, append git's /// `(new)` / `(gone)` / `(mode +x)` / `(mode -x)` annotation. fn stat_display_path(entry: &DiffEntry, quote_fully: bool, compact: bool) -> String { - let path = quote_c_style(entry.path(), quote_fully); + let path = quote_c_style(&entry.path().to_string_lossy(), quote_fully); if !compact { return path; } @@ -3798,15 +3817,16 @@ fn diff_entry_pathspec_context(entry: &DiffEntry) -> grit_lib::pathspec::Pathspe } fn diff_entry_matches_pathspecs(entry: &DiffEntry, pathspecs: &[String]) -> bool { + // TODO(byte-paths): pathspec matching runs on the lossy string form (Phase 3). let ctx = diff_entry_pathspec_context(entry); if let Some(ref p) = entry.new_path { - if matches_pathspec_list_with_context(p, pathspecs, ctx) { + if matches_pathspec_list_with_context(&p.to_string_lossy(), pathspecs, ctx) { return true; } } if let Some(ref p) = entry.old_path { if entry.new_path.as_ref() != Some(p) - && matches_pathspec_list_with_context(p, pathspecs, ctx) + && matches_pathspec_list_with_context(&p.to_string_lossy(), pathspecs, ctx) { return true; } diff --git a/grit-git/src/commands/format_patch.rs b/grit-git/src/commands/format_patch.rs index 85ed7ac5a..39030e4a6 100644 --- a/grit-git/src/commands/format_patch.rs +++ b/grit-git/src/commands/format_patch.rs @@ -8,6 +8,7 @@ use anyhow::{Context, Result}; use clap::Args as ClapArgs; use grit_lib::config::{parse_bool, ConfigSet}; use grit_lib::diff::{count_changes, diff_trees, unified_diff_with_prefix, zero_oid, DiffStatus}; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::diffstat::{ write_diffstat_block, DiffstatOptions, FileStatInput, FORMAT_PATCH_STAT_WIDTH, }; @@ -1492,9 +1493,9 @@ fn diffstat_for_patch_entries( // Renames/copies are displayed in the diffstat as `old => new` (pretty-printed). let path = match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - grit_lib::diff::format_rename_path(old, new) + let old = entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + grit_lib::diff::format_rename_path(&old, &new) } _ => entry.path().to_string(), }; @@ -1539,9 +1540,9 @@ fn diffstat_for_patch_entries( for (file, entry) in files.iter().zip(entries.iter()) { let display = match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - let pretty = grit_lib::diff::format_rename_path(old, new); + let old = entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let pretty = grit_lib::diff::format_rename_path(&old, &new); grit_lib::quote_path::quote_c_style(&pretty, false) } _ => file.path_display.clone(), @@ -1655,16 +1656,16 @@ fn format_summary_lines(entries: &[grit_lib::diff::DiffEntry]) -> String { let sim = entry.score.unwrap_or(100); out.push_str(&format!( " rename {} => {} ({sim}%)\n", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) )); } DiffStatus::Copied => { let sim = entry.score.unwrap_or(100); out.push_str(&format!( " copy {} => {} ({sim}%)\n", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) )); } _ => {} @@ -1799,7 +1800,7 @@ fn format_cover_letter( patch_opts .pathspec .iter() - .any(|ps| path_matches_spec(e.path(), ps)) + .any(|ps| path_matches_spec(&e.path().to_string_lossy(), ps)) }); } let diff_entries = apply_relative_filter(diff_entries, patch_opts.relative.as_deref()); @@ -2037,7 +2038,7 @@ fn format_single_patch( diff_entries_raw.retain(|e| { opts.pathspec .iter() - .any(|ps| path_matches_spec(e.path(), ps)) + .any(|ps| path_matches_spec(&e.path().to_string_lossy(), ps)) }); } let diff_entries_raw = apply_relative_filter(diff_entries_raw, opts.relative.as_deref()); @@ -2057,15 +2058,23 @@ fn format_single_patch( } for entry in &diff_entries { - let old_path = entry.old_path.as_deref().unwrap_or("/dev/null"); - let new_path = entry.new_path.as_deref().unwrap_or("/dev/null"); + let old_path = entry + .old_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed("/dev/null"), RepoPath::to_string_lossy); + let new_path = entry + .new_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed("/dev/null"), RepoPath::to_string_lossy); write_diff_header_to_string( &mut diff_text, entry, opts.diff_src_prefix, opts.diff_dst_prefix, ); - let path_for_attrs = entry.path(); + // TODO(byte-paths): attribute/textconv lookup is still str-keyed (Phase 3). + let path_for_attrs_owned = entry.path().to_string_lossy(); + let path_for_attrs: &str = &path_for_attrs_owned; let use_textconv = !no_binary; let textconv_patch = use_textconv && diff_textconv_active(git_dir, &config, path_for_attrs); let old_raw = read_blob_bytes(odb, &entry.old_oid); @@ -2118,8 +2127,8 @@ fn format_single_patch( let patch = unified_diff_with_prefix( &old_content, &new_content, - old_path, - new_path, + &old_path, + &new_path, opts.context_lines, 0, opts.diff_src_prefix, @@ -2389,11 +2398,11 @@ fn write_diff_header_to_string( let old_path = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); + .unwrap_or(entry.new_path.as_deref().unwrap_or(RepoPath::from_str(""))); let new_path = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .unwrap_or(entry.old_path.as_deref().unwrap_or(RepoPath::from_str(""))); if src_prefix.is_empty() && dst_prefix.is_empty() { let _ = writeln!(out, "diff --git {old_path} {new_path}"); @@ -2753,18 +2762,18 @@ fn apply_relative_filter( }; let mut out = Vec::new(); for mut e in entries { - let keep = e.path().starts_with(prefix); + let keep = e.path().starts_with_bytes(prefix.as_bytes()); if !keep { continue; } if let Some(ref p) = e.old_path { - if let Some(stripped) = p.strip_prefix(prefix) { - e.old_path = Some(stripped.to_string()); + if let Some(stripped) = p.as_bytes().strip_prefix(prefix.as_bytes()) { + e.old_path = Some(RepoPathBuf::from_bytes(stripped.to_vec())); } } if let Some(ref p) = e.new_path { - if let Some(stripped) = p.strip_prefix(prefix) { - e.new_path = Some(stripped.to_string()); + if let Some(stripped) = p.as_bytes().strip_prefix(prefix.as_bytes()) { + e.new_path = Some(RepoPathBuf::from_bytes(stripped.to_vec())); } } out.push(e); @@ -2787,7 +2796,7 @@ fn commit_touches_pathspec(repo: &Repository, commit: &CommitData, pathspec: &[S }; entries .iter() - .any(|e| pathspec.iter().any(|ps| path_matches_spec(e.path(), ps))) + .any(|e| pathspec.iter().any(|ps| path_matches_spec(&e.path().to_string_lossy(), ps))) } /// Cover-letter subject/blurb resolved from description settings. @@ -3222,16 +3231,22 @@ fn compute_interdiff( .context("computing interdiff")?; let mut out = String::new(); for entry in &entries { - let old_path = entry.old_path.as_deref().unwrap_or("/dev/null"); - let new_path = entry.new_path.as_deref().unwrap_or("/dev/null"); + let old_path = entry + .old_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed("/dev/null"), RepoPath::to_string_lossy); + let new_path = entry + .new_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed("/dev/null"), RepoPath::to_string_lossy); write_diff_header_to_string(&mut out, entry, opts.diff_src_prefix, opts.diff_dst_prefix); let old_content = read_blob_content(&repo.odb, &entry.old_oid); let new_content = read_blob_content(&repo.odb, &entry.new_oid); let patch = unified_diff_with_prefix( &old_content, &new_content, - old_path, - new_path, + &old_path, + &new_path, opts.context_lines, 0, opts.diff_src_prefix, diff --git a/grit-git/src/commands/history.rs b/grit-git/src/commands/history.rs index 2b4f3da09..783d7d004 100644 --- a/grit-git/src/commands/history.rs +++ b/grit-git/src/commands/history.rs @@ -555,8 +555,8 @@ fn split_commit_interactive( .new_path .as_deref() .or(entry.old_path.as_deref()) - .unwrap_or(""); - if !path_matches_any_pathspec(path, pathspecs) { + .unwrap_or(grit_lib::repo_path::RepoPath::from_str("")); + if !path_matches_any_pathspec(&path.to_string_lossy(), pathspecs) { continue; } @@ -989,12 +989,12 @@ fn append_tree_diff_status( .new_path .as_deref() .or(a.old_path.as_deref()) - .unwrap_or(""); + .unwrap_or(grit_lib::repo_path::RepoPath::from_str("")); let pb = b .new_path .as_deref() .or(b.old_path.as_deref()) - .unwrap_or(""); + .unwrap_or(grit_lib::repo_path::RepoPath::from_str("")); pa.cmp(pb) }); for e in paths { @@ -1002,7 +1002,7 @@ fn append_tree_diff_status( .new_path .as_deref() .or(e.old_path.as_deref()) - .unwrap_or("?"); + .unwrap_or(grit_lib::repo_path::RepoPath::from_str("?")); let label = match e.status { DiffStatus::Added => "new file", DiffStatus::Deleted => "deleted", diff --git a/grit-git/src/commands/log.rs b/grit-git/src/commands/log.rs index a9d758c44..1dc1e06c2 100644 --- a/grit-git/src/commands/log.rs +++ b/grit-git/src/commands/log.rs @@ -35,6 +35,7 @@ use grit_lib::merge_diff::{ }; use grit_lib::objects::{parse_commit, parse_tag, CommitData, ObjectId, ObjectKind}; use grit_lib::odb::Odb; +use grit_lib::repo_path::RepoPath; use grit_lib::porcelain::log::{ collect_decorations, collect_decorations_inner, current_branch_decoration_index, normalize_glob_ref, DecorationFilter, DecorationKind, DecorationMap, @@ -3772,7 +3773,7 @@ fn treesame_parent_for_path_rewrite( let entries = diff_trees(&repo.odb, Some(&parent.tree), Some(&commit.tree), "")?; let differs = entries .iter() - .any(|entry| path_matches(entry.path(), pathspecs)); + .any(|entry| path_matches(&entry.path().to_string_lossy(), pathspecs)); if !differs { treesame.push(*parent_oid); } @@ -7176,8 +7177,8 @@ fn reflog_transition_touches_paths( }; let entries = diff_trees(odb, old_t.as_ref(), Some(to_tree), "")?; Ok(entries.iter().any(|e| { - let path = e.path(); - path_matches(path, pathspecs) + let path = e.path().to_string_lossy(); + path_matches(&path, pathspecs) })) }; @@ -8863,8 +8864,8 @@ fn commit_touches_paths( } let entries = diff_trees(odb, None, Some(&info.tree), "")?; let touches = entries.iter().any(|e| { - let path = e.path(); - path_matches(path, pathspecs) + let path = e.path().to_string_lossy(); + path_matches(&path, pathspecs) }); if bloom_ret == BloomPrecheck::Maybe && !touches { if let Some(stats) = bloom_stats { @@ -8905,8 +8906,8 @@ fn commit_touches_paths( let parent_commit = parse_commit(&parent_obj.data)?; let entries = diff_trees(odb, Some(&parent_commit.tree), Some(&info.tree), "")?; let touches = entries.iter().any(|e| { - let path = e.path(); - path_matches(path, pathspecs) + let path = e.path().to_string_lossy(); + path_matches(&path, pathspecs) }); if bloom_ret == BloomPrecheck::Maybe && !touches { if let Some(stats) = bloom_stats { @@ -8923,8 +8924,8 @@ fn commit_touches_paths( let parent_commit = parse_commit(&parent_obj.data)?; let entries = diff_trees(odb, Some(&parent_commit.tree), Some(&info.tree), "")?; if entries.iter().any(|e| { - let path = e.path(); - path_matches(path, pathspecs) + let path = e.path().to_string_lossy(); + path_matches(&path, pathspecs) }) { return Ok(true); } @@ -9687,7 +9688,9 @@ fn commit_pickaxe_matches( }; for entry in &entries { - let path = entry.path(); + let path_rp = entry.path(); + let path_owned = path_rp.to_string_lossy(); + let path: &str = &path_owned; let old_raw = read_blob_bytes(odb, &entry.old_oid); let new_raw = read_blob_bytes(odb, &entry.new_oid); @@ -9709,8 +9712,8 @@ fn commit_pickaxe_matches( let patch = unified_diff( old_text.as_str(), new_text.as_str(), - entry.old_path.as_deref().unwrap_or(path), - entry.new_path.as_deref().unwrap_or(path), + &entry.old_path.as_deref().unwrap_or(path_rp).to_string_lossy(), + &entry.new_path.as_deref().unwrap_or(path_rp).to_string_lossy(), 3, indent_heuristic_from_config(&config), config.quote_path_fully(), @@ -9790,7 +9793,9 @@ fn pickaxe_matching_paths( }; for entry in entries { - let path = entry.path(); + let path_rp = entry.path(); + let path_owned = path_rp.to_string_lossy(); + let path: &str = &path_owned; let old_raw = read_blob_bytes(odb, &entry.old_oid); let new_raw = read_blob_bytes(odb, &entry.new_oid); @@ -9811,8 +9816,8 @@ fn pickaxe_matching_paths( let patch = unified_diff( old_text.as_str(), new_text.as_str(), - entry.old_path.as_deref().unwrap_or(path), - entry.new_path.as_deref().unwrap_or(path), + &entry.old_path.as_deref().unwrap_or(path_rp).to_string_lossy(), + &entry.new_path.as_deref().unwrap_or(path_rp).to_string_lossy(), 3, indent_heuristic_from_config(&config), config.quote_path_fully(), @@ -12805,7 +12810,7 @@ fn compute_combined_diff_entries(odb: &Odb, info: &CommitInfo) -> Result bool return true; } let paths = [ - entry.path(), - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or(""), + entry.path().to_string_lossy(), + entry + .old_path + .as_deref() + .map_or(Cow::Borrowed(""), RepoPath::to_string_lossy), + entry + .new_path + .as_deref() + .map_or(Cow::Borrowed(""), RepoPath::to_string_lossy), ]; for spec in specs { - for p in paths { + for p in &paths { if !p.is_empty() && grit_lib::pathspec::matches_pathspec(spec, p) { return true; } @@ -13251,8 +13262,8 @@ fn write_commit_diff_body( "{}{:03}\t{}\t{}", entry.status.letter(), score, - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or(""), + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")), )?; } _ => { @@ -13339,14 +13350,14 @@ fn log_write_combined_patches( let path = entry.path(); let parent_sides = cpaths .iter() - .find(|p| p.path == path) + .find(|p| path == &p.path) .map(|p| p.parents.clone()) .unwrap_or_default(); if let Some(patch) = format_combined_textconv_patch( git_dir, config, odb, - path, + &path.to_string_lossy(), &parent_trees, merge_tree, abbrev, @@ -13428,14 +13439,18 @@ fn log_write_patch_entry( context_lines: usize, indent_heuristic: bool, ) -> Result<()> { - let old_path = entry + let old_path_owned = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); - let new_path = entry + .or(entry.new_path.as_deref()) + .map_or(Cow::Borrowed(""), RepoPath::to_string_lossy); + let old_path: &str = &old_path_owned; + let new_path_owned = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .or(entry.old_path.as_deref()) + .map_or(Cow::Borrowed(""), RepoPath::to_string_lossy); + let new_path: &str = &new_path_owned; if args.no_prefix { writeln!(out, "diff --git {old_path} {new_path}")?; @@ -13544,7 +13559,8 @@ fn log_write_patch_entry( return Ok(()); } - let path_for_attrs = entry.path(); + let path_for_attrs_owned = entry.path().to_string_lossy(); + let path_for_attrs: &str = &path_for_attrs_owned; let use_textconv = !args.no_textconv; let textconv_patch = use_textconv && diff_textconv_active(git_dir, config, path_for_attrs); let old_raw = read_blob_bytes(odb, &entry.old_oid); @@ -13737,9 +13753,15 @@ fn log_print_stat_summary( for entry in entries { let path_display = match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { - let old = entry.old_path.as_deref().unwrap_or(""); - let new = entry.new_path.as_deref().unwrap_or(""); - grit_lib::diff::format_rename_path(old, new) + let old = entry + .old_path + .as_deref() + .map_or(Cow::Borrowed(""), RepoPath::to_string_lossy); + let new = entry + .new_path + .as_deref() + .map_or(Cow::Borrowed(""), RepoPath::to_string_lossy); + grit_lib::diff::format_rename_path(&old, &new) } _ => entry.path().to_string(), }; @@ -14174,24 +14196,27 @@ fn follow_filter( match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { // The new path is the copy/rename destination; follow it back to the source. - if entry.new_path.as_deref() == Some(tracked_path.as_str()) { + if entry.new_path.as_deref().map(RepoPath::as_bytes) + == Some(tracked_path.as_bytes()) + { touches = true; display_entry = Some(entry.clone()); if let Some(old_path) = entry.old_path.as_deref() { - retarget = Some(old_path.to_string()); + retarget = Some(old_path.to_string_lossy().into_owned()); } } // A *rename* away from the tracked path is a change to it (the // file disappears under this name). A *copy* from the tracked // path leaves the source unchanged, so it does not count. if entry.status == DiffStatus::Renamed - && entry.old_path.as_deref() == Some(tracked_path.as_str()) + && entry.old_path.as_deref().map(RepoPath::as_bytes) + == Some(tracked_path.as_bytes()) { touches = true; } } _ => { - if entry.path() == tracked_path { + if entry.path().as_bytes() == tracked_path.as_bytes() { touches = true; if display_entry.is_none() { display_entry = Some(entry.clone()); diff --git a/grit-git/src/commands/merge.rs b/grit-git/src/commands/merge.rs index 5bd525252..32be65dcc 100644 --- a/grit-git/src/commands/merge.rs +++ b/grit-git/src/commands/merge.rs @@ -6288,14 +6288,13 @@ fn detect_merge_renames( for path in all_paths { let b = base.get(path); let s = side.get(path); - let path_str = String::from_utf8_lossy(path).to_string(); match (b, s) { (Some(be), None) => { // Deleted in side if !rename_sources.contains(path) { entries.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str), + old_path: Some(grit_lib::repo_path::RepoPathBuf::from_bytes(path.to_vec())), new_path: None, old_mode: format!("{:06o}", be.mode), new_mode: String::new(), @@ -6312,7 +6311,7 @@ fn detect_merge_renames( entries.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(path_str), + new_path: Some(grit_lib::repo_path::RepoPathBuf::from_bytes(path.to_vec())), old_mode: String::new(), new_mode: format!("{:06o}", se.mode), old_oid: zero_oid, @@ -6329,7 +6328,7 @@ fn detect_merge_renames( // The old content moved away → emit Deleted for rename detection entries.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str.clone()), + old_path: Some(grit_lib::repo_path::RepoPathBuf::from_bytes(path.to_vec())), new_path: None, old_mode: format!("{:06o}", be.mode), new_mode: String::new(), @@ -11692,8 +11691,8 @@ fn print_diffstat(repo: &Repository, entries: &[DiffEntry], compact: bool) { .new_path .as_deref() .or(entry.old_path.as_deref()) - .unwrap_or("unknown") - .to_string(); + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_else(|| "unknown".to_string()); let is_new = entry.old_oid == zero_oid(); let is_deleted = entry.new_oid == zero_oid(); diff --git a/grit-git/src/commands/rebase.rs b/grit-git/src/commands/rebase.rs index 9739b7cc7..da83e18fd 100644 --- a/grit-git/src/commands/rebase.rs +++ b/grit-git/src/commands/rebase.rs @@ -7076,7 +7076,8 @@ fn print_diffstat_from_entries(repo: &Repository, entries: &[DiffEntry]) { .new_path .as_deref() .or(entry.old_path.as_deref()) - .unwrap_or("unknown"); + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_else(|| "unknown".to_string()); let is_new = entry.old_oid == diff::zero_oid(); let is_deleted = entry.new_oid == diff::zero_oid(); diff --git a/grit-git/src/commands/remerge_diff.rs b/grit-git/src/commands/remerge_diff.rs index e976215bf..96ce745eb 100644 --- a/grit-git/src/commands/remerge_diff.rs +++ b/grit-git/src/commands/remerge_diff.rs @@ -6,6 +6,7 @@ use anyhow::{bail, Result}; use grit_lib::config::ConfigSet; use grit_lib::diff::{detect_renames, diff_trees, unified_diff, zero_oid, DiffEntry, DiffStatus}; use grit_lib::merge_diff::{blob_text_for_diff, is_binary_for_diff}; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::objects::{parse_tree, ObjectId, ObjectKind}; use grit_lib::odb::Odb; use grit_lib::repo::Repository; @@ -52,9 +53,9 @@ fn conflict_desc_matches_pathspecs(d: &ConflictDescription, pathspecs: &[String] } fn entry_matches_pathspecs(e: &DiffEntry, pathspecs: &[String]) -> bool { - let old = e.old_path.as_deref().unwrap_or(""); - let new = e.new_path.as_deref().unwrap_or(""); - path_matches_pathspecs(pathspecs, old) || path_matches_pathspecs(pathspecs, new) + let old = e.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new = e.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + path_matches_pathspecs(pathspecs, &old) || path_matches_pathspecs(pathspecs, &new) } fn parse_diff_filter(filter: &str) -> (Vec, Vec) { @@ -168,7 +169,8 @@ fn conflict_header_for_entry<'a>( descs: &'a [ConflictDescription], e: &DiffEntry, ) -> Option<&'a ConflictDescription> { - let primary = e.path(); + let primary_owned = e.path().to_string_lossy(); + let primary: &str = &primary_owned; for d in descs { let anchor = d .remerge_anchor_path @@ -183,9 +185,10 @@ fn conflict_header_for_entry<'a>( return Some(d); } for p in [ - e.old_path.as_deref().unwrap_or(""), - e.new_path.as_deref().unwrap_or(""), + e.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(), + e.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(), ] { + let p: &str = &p; if path_matches_remerge_anchor(anchor, p) || p == d.subject_path.as_str() { return Some(d); } @@ -331,8 +334,9 @@ pub(crate) fn write_remerge_diff( .remerge_anchor_path .as_deref() .unwrap_or(d.subject_path.as_str()); - let old = e.old_path.as_deref().unwrap_or(""); - let _new = e.new_path.as_deref().unwrap_or(""); + let old = e.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let old: &str = &old; + let _new = e.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); match d.kind { "file/directory" => { e.status == DiffStatus::Renamed @@ -391,8 +395,9 @@ pub(crate) fn write_remerge_diff( }; let pseudo = DiffEntry { status: pseudo_status, - old_path: Some(pseudo_old), - new_path: Some(pseudo_new), + // TODO(byte-paths): anchor/subject paths are still String (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(pseudo_old)), + new_path: Some(RepoPathBuf::from_string(pseudo_new)), old_mode: "100644".to_string(), new_mode: "100644".to_string(), old_oid: zero_oid(), @@ -513,7 +518,7 @@ pub(crate) fn write_remerge_diff( continue; } if e.status == DiffStatus::Deleted - && e.old_path.as_deref() == Some(theirs_path.as_str()) + && e.old_path.as_deref().is_some_and(|p| p == theirs_path.as_str()) { used_entry[i] = true; break; @@ -525,7 +530,8 @@ pub(crate) fn write_remerge_diff( if let Ok(blob_oid) = blob_oid_at_path_in_tree(repo, &remerge_tree, &theirs_path) { let del = DiffEntry { status: DiffStatus::Deleted, - old_path: Some(theirs_path.clone()), + // TODO(byte-paths): theirs_path is still String (Phase 2/4). + old_path: Some(RepoPathBuf::from_string(theirs_path.clone())), new_path: None, old_mode: "100644".to_string(), new_mode: "000000".to_string(), @@ -580,14 +586,15 @@ pub(crate) fn write_remerge_diff( } if e.status == DiffStatus::Added { if let Some(np) = e.new_path.as_deref() { - if rename_rr_ours_dests.contains(np) { + if rename_rr_ours_dests.contains(&*np.to_string_lossy()) { continue; } } } if e.status == DiffStatus::Renamed { if let Some(o) = e.old_path.as_deref() { - if has_rename_rename.contains(o) + let o = o.to_string_lossy(); + if has_rename_rename.contains(&*o) || has_rename_rename .iter() .any(|a| o.starts_with(&format!("{a}~"))) @@ -641,8 +648,14 @@ fn emit_patch_for_entry( context_lines: usize, indent_heuristic: bool, ) -> Result<()> { - let old_path = e.old_path.as_deref().unwrap_or("/dev/null"); - let new_path = e.new_path.as_deref().unwrap_or("/dev/null"); + let old_path = e + .old_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed("/dev/null"), RepoPath::to_string_lossy); + let new_path = e + .new_path + .as_deref() + .map_or(std::borrow::Cow::Borrowed("/dev/null"), RepoPath::to_string_lossy); let old_raw = if e.old_oid.is_zero() { Vec::new() } else { @@ -659,7 +672,9 @@ fn emit_patch_for_entry( .map(|o| o.data) .unwrap_or_default() }; - let path_for_attrs = e.path(); + // TODO(byte-paths): attribute/textconv lookup is still str-keyed (Phase 3). + let path_for_attrs_owned = e.path().to_string_lossy(); + let path_for_attrs: &str = &path_for_attrs_owned; if is_binary_for_diff(git_dir, path_for_attrs, &old_raw) || is_binary_for_diff(git_dir, path_for_attrs, &new_raw) { @@ -671,8 +686,8 @@ fn emit_patch_for_entry( let patch = unified_diff( &old_content, &new_content, - old_path, - new_path, + &old_path, + &new_path, context_lines, indent_heuristic, config.quote_path_fully(), diff --git a/grit-git/src/commands/replay.rs b/grit-git/src/commands/replay.rs index 64cbdd888..2ce53afbf 100644 --- a/grit-git/src/commands/replay.rs +++ b/grit-git/src/commands/replay.rs @@ -1197,13 +1197,12 @@ fn detect_side_renames( all_paths.extend(base.keys()); all_paths.extend(side.keys()); for path in all_paths { - let path_str = String::from_utf8_lossy(path).to_string(); match (base.get(path), side.get(path)) { (Some(be), None) => { if !rename_sources.contains(path) { diff_entries.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str), + old_path: Some(grit_lib::repo_path::RepoPathBuf::from_bytes(path.to_vec())), new_path: None, old_mode: format!("{:06o}", be.mode), new_mode: String::new(), @@ -1218,7 +1217,7 @@ fn detect_side_renames( diff_entries.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(path_str), + new_path: Some(grit_lib::repo_path::RepoPathBuf::from_bytes(path.to_vec())), old_mode: String::new(), new_mode: format!("{:06o}", se.mode), old_oid: zero_oid, @@ -1231,7 +1230,7 @@ fn detect_side_renames( if rename_sources.contains(path) && be.oid != se.oid { diff_entries.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str), + old_path: Some(grit_lib::repo_path::RepoPathBuf::from_bytes(path.to_vec())), new_path: None, old_mode: format!("{:06o}", be.mode), new_mode: String::new(), diff --git a/grit-git/src/commands/rev_list.rs b/grit-git/src/commands/rev_list.rs index b6e93229a..2a4987a47 100644 --- a/grit-git/src/commands/rev_list.rs +++ b/grit-git/src/commands/rev_list.rs @@ -1830,8 +1830,8 @@ fn rev_list_reflog_transition_touches_paths( let tree_diff_touches = |from_tree: Option, to_tree: &ObjectId| -> Result { let entries = diff_trees(odb, from_tree.as_ref(), Some(to_tree), "")?; Ok(entries.iter().any(|e| { - let path = e.path(); - pathspecs.iter().any(|ps| matches_pathspec(ps, path)) + let path = e.path().to_string_lossy(); + pathspecs.iter().any(|ps| matches_pathspec(ps, &path)) })) }; diff --git a/grit-git/src/commands/show.rs b/grit-git/src/commands/show.rs index fe110c3f9..86e9c7cad 100644 --- a/grit-git/src/commands/show.rs +++ b/grit-git/src/commands/show.rs @@ -19,6 +19,7 @@ use grit_lib::diff::{ parse_indent_heuristic_cli_flags, resolve_indent_heuristic, unified_diff, zero_oid, DiffEntry, DiffStatus, }; +use grit_lib::repo_path::RepoPath; use grit_lib::diffstat::{terminal_columns, write_diffstat_block, DiffstatOptions, FileStatInput}; use grit_lib::merge_base::merge_bases_first_vs_rest; use grit_lib::merge_diff::{ @@ -1053,7 +1054,8 @@ fn expand_typechange_entries_for_porcelain(entries: Vec) -> Vec 'A', grit_lib::diff::DiffStatus::Deleted => 'D', @@ -1785,12 +1794,12 @@ fn show_commit( .old_path .as_deref() .or(entry.new_path.as_deref()) - .unwrap_or(""); + .unwrap_or(RepoPath::from_str("")); let new_path = entry .new_path .as_deref() .or(entry.old_path.as_deref()) - .unwrap_or(""); + .unwrap_or(RepoPath::from_str("")); let status_char = match entry.status { grit_lib::diff::DiffStatus::Added => 'A', grit_lib::diff::DiffStatus::Deleted => 'D', @@ -1898,7 +1907,7 @@ fn show_commit( git_dir, &config, odb, - entry.path(), + &entry.path().to_string_lossy(), ptree, &commit.tree, abbrev_len, @@ -2000,8 +2009,17 @@ fn show_commit( // Default: full unified diff (first parent or root) for entry in &diff_entries { - let old_path = entry.old_path.as_deref().unwrap_or("/dev/null"); - let new_path = entry.new_path.as_deref().unwrap_or("/dev/null"); + // TODO(byte-paths): patch headers are &str-based until quote_path/Phase 3. + let old_path_buf = entry + .old_path + .as_deref() + .map_or_else(|| "/dev/null".to_string(), |p| p.to_string_lossy().into_owned()); + let new_path_buf = entry + .new_path + .as_deref() + .map_or_else(|| "/dev/null".to_string(), |p| p.to_string_lossy().into_owned()); + let old_path: &str = &old_path_buf; + let new_path: &str = &new_path_buf; // Print the diff header write_diff_header(out, entry)?; @@ -2029,7 +2047,8 @@ fn show_commit( .unwrap_or_default() }; - let path_for_attrs = entry.path(); + let path_for_attrs_buf = entry.path().to_string_lossy(); + let path_for_attrs: &str = &path_for_attrs_buf; let textconv_patch = use_textconv && diff_textconv_active(git_dir, &config, path_for_attrs); if !textconv_patch && (is_binary_for_diff(git_dir, path_for_attrs, &old_raw) @@ -2134,8 +2153,8 @@ fn write_show_summary_lines( writeln!( out, " rename {} => {} ({sim}%)", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) )?; } DiffStatus::Copied => { @@ -2143,8 +2162,8 @@ fn write_show_summary_lines( writeln!( out, " copy {} => {} ({sim}%)", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")), + entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")) )?; } _ => {} @@ -2189,12 +2208,15 @@ fn write_numstat_line( /// Format path for numstat/stat display (with rename arrow notation). fn format_rename_path(entry: &grit_lib::diff::DiffEntry) -> String { - let old_path = entry.old_path.as_deref().unwrap_or(""); - let new_path = entry.new_path.as_deref().unwrap_or(""); + let old_path = entry.old_path.as_deref().unwrap_or(RepoPath::from_str("")); + let new_path = entry.new_path.as_deref().unwrap_or(RepoPath::from_str("")); match entry.status { grit_lib::diff::DiffStatus::Renamed | grit_lib::diff::DiffStatus::Copied => { // Use compact rename format: common_prefix/{old => new}/common_suffix - grit_lib::diff::format_rename_path(old_path, new_path) + grit_lib::diff::format_rename_path( + &old_path.to_string_lossy(), + &new_path.to_string_lossy(), + ) } _ => new_path.to_string(), } @@ -2309,11 +2331,11 @@ pub(crate) fn write_diff_header_with_remerge( let old_path = entry .old_path .as_deref() - .unwrap_or(entry.new_path.as_deref().unwrap_or("")); + .unwrap_or(entry.new_path.as_deref().unwrap_or(RepoPath::from_str(""))); let new_path = entry .new_path .as_deref() - .unwrap_or(entry.old_path.as_deref().unwrap_or("")); + .unwrap_or(entry.old_path.as_deref().unwrap_or(RepoPath::from_str(""))); writeln!(out, "diff --git a/{old_path} b/{new_path}")?; if let Some(line) = remerge_line { diff --git a/grit-git/src/commands/stash.rs b/grit-git/src/commands/stash.rs index 74cfb3cb5..ce9e31d64 100644 --- a/grit-git/src/commands/stash.rs +++ b/grit-git/src/commands/stash.rs @@ -877,14 +877,12 @@ fn stash_is_intent_to_add_only( return false; } for e in staged { - let p = e.path().to_owned(); - if !ita_paths.contains(&p) { + if !ita_paths.contains(e.path().to_string_lossy().as_ref()) { return false; } } for e in unstaged { - let p = e.path().to_owned(); - if !ita_paths.contains(&p) { + if !ita_paths.contains(e.path().to_string_lossy().as_ref()) { return false; } } @@ -1150,18 +1148,18 @@ fn do_stash_patch_push( let mut candidate_paths: BTreeSet = BTreeSet::new(); for e in &staged { if let Some(p) = e.new_path.as_ref().or(e.old_path.as_ref()) { - if has_pathspec && !matches_pathspec(p, &normalized_pathspec) { + if has_pathspec && !matches_pathspec(&p.to_string_lossy(), &normalized_pathspec) { continue; } - candidate_paths.insert(p.clone()); + candidate_paths.insert(p.to_string_lossy().into_owned()); } } for e in &unstaged { if let Some(p) = e.new_path.as_ref().or(e.old_path.as_ref()) { - if has_pathspec && !matches_pathspec(p, &normalized_pathspec) { + if has_pathspec && !matches_pathspec(&p.to_string_lossy(), &normalized_pathspec) { continue; } - candidate_paths.insert(p.clone()); + candidate_paths.insert(p.to_string_lossy().into_owned()); } } @@ -1742,11 +1740,15 @@ fn do_push_pathspec( // Filter by pathspec let matching_staged: Vec<_> = staged .iter() - .filter(|e| stash_pathspec_matches_worktree(repo, index, work_tree, e.path(), pathspec)) + .filter(|e| { + stash_pathspec_matches_worktree(repo, index, work_tree, &e.path().to_string_lossy(), pathspec) + }) .collect(); let matching_unstaged: Vec<_> = unstaged .iter() - .filter(|e| stash_pathspec_matches_worktree(repo, index, work_tree, e.path(), pathspec)) + .filter(|e| { + stash_pathspec_matches_worktree(repo, index, work_tree, &e.path().to_string_lossy(), pathspec) + }) .collect(); let mut matched_untracked: Vec = untracked_files.to_vec(); @@ -1768,12 +1770,12 @@ fn do_push_pathspec( let mut matched_paths: BTreeSet = BTreeSet::new(); for e in &matching_staged { if let Some(p) = e.new_path.as_ref().or(e.old_path.as_ref()) { - matched_paths.insert(p.clone()); + matched_paths.insert(p.to_string_lossy().into_owned()); } } for e in &matching_unstaged { if let Some(p) = e.new_path.as_ref().or(e.old_path.as_ref()) { - matched_paths.insert(p.clone()); + matched_paths.insert(p.to_string_lossy().into_owned()); } } for u in &matched_untracked { @@ -2043,8 +2045,8 @@ fn do_push_staged( // Revert staged changes in the worktree for change in &staged { if let Some(path) = change.new_path.as_ref().or(change.old_path.as_ref()) { - let file_path = work_tree.join(path); - if !head_paths.contains(path) { + let file_path = path.to_fs_path(work_tree); + if !head_paths.contains(path.to_string_lossy().as_ref()) { // New file (added) — remove from worktree let _ = fs::remove_file(&file_path); if let Some(parent) = file_path.parent() { @@ -2053,7 +2055,7 @@ fn do_push_staged( } else { // Modified file — restore HEAD content for te in &head_tree_entries { - if te.path == *path { + if *path == te.path { let blob = repo.odb.read(&te.oid)?; if te.mode == MODE_SYMLINK { let target = String::from_utf8(blob.data) @@ -2644,13 +2646,22 @@ fn do_show(repo: &Repository, parsed: ParsedStashShow) -> Result<()> { } fn stash_stat_path_display(entry: &DiffEntry) -> String { - let old_path = entry.old_path.as_deref().unwrap_or(""); - let new_path = entry.new_path.as_deref().unwrap_or(""); + // TODO(byte-paths): stat display path is lossy (Phase 3 quoting). + let old_path = entry + .old_path + .as_deref() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); + let new_path = entry + .new_path + .as_deref() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); match entry.status { DiffStatus::Renamed | DiffStatus::Copied => { - grit_lib::diff::format_rename_path(old_path, new_path) + grit_lib::diff::format_rename_path(&old_path, &new_path) } - _ => new_path.to_string(), + _ => new_path, } } diff --git a/grit-git/src/commands/status.rs b/grit-git/src/commands/status.rs index 9ed253e9a..e2a1c6b6e 100644 --- a/grit-git/src/commands/status.rs +++ b/grit-git/src/commands/status.rs @@ -11,6 +11,7 @@ use grit_lib::diff::{ DiffIndexToWorktreeOptions, DiffStatus, }; use grit_lib::error::Error; +use grit_lib::repo_path::{RepoPath, RepoPathBuf}; use grit_lib::index::{Index, IndexEntry, MODE_GITLINK, MODE_TREE}; use grit_lib::objects::{parse_commit, ObjectId}; use grit_lib::porcelain::status::{ @@ -385,7 +386,14 @@ fn should_advise_sparse_index_expanded( } unstaged .iter() - .any(|entry| status_path_outside_sparse_checkout(repo, config, work_tree, entry.path())) + .any(|entry| { + status_path_outside_sparse_checkout( + repo, + config, + work_tree, + &entry.path().to_string_lossy(), + ) + }) || untracked .iter() .chain(ignored_files.iter()) @@ -622,7 +630,7 @@ pub fn run(mut args: Args) -> Result<()> { let staged_raw = diff_index_to_tree(&repo.odb, &index, head_tree.as_ref(), false)?; let staged_raw: Vec = staged_raw .into_iter() - .filter(|entry| status_path_matches(entry.path(), &user_pathspecs)) + .filter(|entry| status_path_matches(&entry.path().to_string_lossy(), &user_pathspecs)) .collect(); // Detect renames among staged entries when enabled. let staged = if let Some(settings) = status_rename_settings { @@ -644,11 +652,13 @@ pub fn run(mut args: Args) -> Result<()> { )?; let unstaged_raw: Vec = unstaged_raw .into_iter() - .filter(|entry| status_path_matches(entry.path(), &user_pathspecs)) + .filter(|entry| status_path_matches(&entry.path().to_string_lossy(), &user_pathspecs)) .filter(|entry| { fsmonitor_query .as_ref() - .is_none_or(|(_, reported)| fsmonitor_reported_path_matches(entry.path(), reported)) + .is_none_or(|(_, reported)| { + fsmonitor_reported_path_matches(&entry.path().to_string_lossy(), reported) + }) }) .collect(); let unstaged = if let Some(settings) = status_rename_settings { @@ -1453,8 +1463,8 @@ fn format_porcelain_v2( let qpath = quote_status_path(path, config, nul); let v2_rename_line = |e: &grit_lib::diff::DiffEntry| -> String { - let old_p = e.old_path.as_deref().unwrap_or(""); - let qold = quote_status_path(old_p, config, nul); + let old_p = e.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let qold = quote_status_path(&old_p, config, nul); let score = e.score.unwrap_or(100); let rch = if e.status == DiffStatus::Renamed { 'R' @@ -1750,11 +1760,11 @@ fn format_short( continue; } if entry.status == DiffStatus::Renamed || entry.status == DiffStatus::Copied { - let key = entry.path().to_owned(); + let key = entry.path().to_string_lossy().into_owned(); staged_map.insert(key.clone(), entry.status.letter()); paths.insert(key); } else { - let path = entry.path().to_owned(); + let path = entry.path().to_string_lossy().into_owned(); staged_map.insert(path.clone(), entry.status.letter()); paths.insert(path); } @@ -1764,7 +1774,7 @@ fn format_short( if entry.status == DiffStatus::Unmerged { continue; } - let path = entry.path().to_owned(); + let path = entry.path().to_string_lossy().into_owned(); if let Some(ie) = index.get(path.as_bytes(), 0) { if ie.mode == MODE_GITLINK && submodule_suppressed(&path, ie.oid) { continue; @@ -1832,8 +1842,10 @@ fn format_short( && (e.status == DiffStatus::Renamed || e.status == DiffStatus::Copied) }); if let Some(e) = rename_or_copy { - let old_p = e.old_path.as_deref().unwrap_or(""); - let new_p = e.new_path.as_deref().unwrap_or(""); + let old_p = e.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let old_p: &str = &old_p; + let new_p = e.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(); + let new_p: &str = &new_p; if args.null_terminated { let new_disp = disp(new_p); let old_disp = disp(old_p); @@ -1851,7 +1863,7 @@ fn format_short( writeln!( out, "{}", - quote_status_short_path(&disp(e.path()), quote_path_cfg) + quote_status_short_path(&disp(&e.path().to_string_lossy()), quote_path_cfg) )?; } } @@ -2910,17 +2922,20 @@ fn format_long( .filter(|e| e.status != DiffStatus::Unmerged) .filter(|e| { submodule_decisions - .get(e.path()) + .get(&*e.path().to_string_lossy()) .map(|(_, _, suppress_staged)| !suppress_staged) .unwrap_or(true) }) .collect(); let unstaged_normal: Vec<&DiffEntry> = unstaged .iter() - .filter(|e| e.status != DiffStatus::Unmerged && !unmerged_map.contains_key(e.path())) + .filter(|e| { + e.status != DiffStatus::Unmerged + && !unmerged_map.contains_key(&*e.path().to_string_lossy()) + }) .filter(|e| { submodule_decisions - .get(e.path()) + .get(&*e.path().to_string_lossy()) .map(|(_, suppress_unstaged, _)| !suppress_unstaged) .unwrap_or(true) }) @@ -3034,7 +3049,7 @@ fn format_long( _ => "changed", }; let suffix = submodule_decisions - .get(entry.path()) + .get(&*entry.path().to_string_lossy()) .map(|(annotation, _, _)| annotation.as_str()) .unwrap_or(""); cpw( @@ -3721,10 +3736,10 @@ fn remap_diff_paths( .map(|e| { let mut new_entry = e.clone(); if let Some(ref p) = e.old_path { - new_entry.old_path = Some(f(p)); + new_entry.old_path = Some(RepoPathBuf::from_string(f(&p.to_string_lossy()))); } if let Some(ref p) = e.new_path { - new_entry.new_path = Some(f(p)); + new_entry.new_path = Some(RepoPathBuf::from_string(f(&p.to_string_lossy()))); } new_entry }) diff --git a/grit-git/src/commands/submodule.rs b/grit-git/src/commands/submodule.rs index 68f6fb75f..c93ac009b 100644 --- a/grit-git/src/commands/submodule.rs +++ b/grit-git/src/commands/submodule.rs @@ -176,6 +176,7 @@ fn run_with_top_opts(top: SubmoduleTopOpts, args: Args) -> Result<()> { } use grit_lib::config::{canonical_key, ConfigFile, ConfigScope, ConfigSet}; use grit_lib::diff::{diff_index_to_tree, format_mode, head_path_states, DiffEntry, DiffStatus}; +use grit_lib::repo_path::RepoPathBuf; use grit_lib::error::Error as LibError; use grit_lib::gitmodules::check_submodule_name; use grit_lib::index::{Index, IndexEntry, MODE_GITLINK}; @@ -4490,8 +4491,12 @@ fn resolve_summary_base_tree(repo: &Repository, commit_spec: &str) -> Result &str { - entry.old_path.as_deref().unwrap_or_else(|| entry.path()) +fn summary_display_path(entry: &DiffEntry) -> std::borrow::Cow<'_, str> { + entry + .old_path + .as_deref() + .unwrap_or_else(|| entry.path()) + .to_string_lossy() } fn pathspec_selected(pathspecs: &[String], sm_path: &str) -> bool { @@ -4736,8 +4741,8 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { } out.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path_str.clone()), - new_path: Some(path_str), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: format_mode(MODE_GITLINK), new_mode: format_mode(MODE_GITLINK), old_oid: ie.oid, @@ -4747,8 +4752,8 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { } else if let Some(dst_oid) = grit_lib::diff::read_submodule_head_oid(&sub_path) { out.push(DiffEntry { status: DiffStatus::TypeChanged, - old_path: Some(path_str.clone()), - new_path: Some(path_str), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: format_mode(ie.mode), new_mode: format_mode(MODE_GITLINK), old_oid: ie.oid, @@ -4790,8 +4795,8 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { } extra.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path_str.clone()), - new_path: Some(path_str), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: format!("{:o}", MODE_GITLINK), new_mode: format!("{:o}", MODE_GITLINK), old_oid: ie.oid, @@ -4828,8 +4833,9 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { }; replacements.push(DiffEntry { status, - old_path: (old_mode != 0).then_some(path_str.clone()), - new_path: Some(path_str), + old_path: (old_mode != 0) + .then(|| RepoPathBuf::from_bytes(ie.path.clone())), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: format_mode(old_mode), new_mode: format_mode(MODE_GITLINK), old_oid, @@ -4839,7 +4845,7 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { } else if ie.mode == MODE_GITLINK && !work_tree.join(&path_str).exists() { replacements.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str.clone()), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), new_path: None, old_mode: format_mode(MODE_GITLINK), new_mode: "000000".to_owned(), @@ -4850,8 +4856,8 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { } } for replacement in replacements { - let key = replacement.path().to_string(); - entries.retain(|e| e.path() != key); + let key = replacement.path().to_owned(); + entries.retain(|e| e.path() != &*key); entries.push(replacement); } entries.sort_by(|a, b| a.path().cmp(b.path())); @@ -4867,17 +4873,17 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { continue; } let sm_path = summary_display_path(e); - if !pathspec_selected(&pathspecs, sm_path) { + if !pathspec_selected(&pathspecs, &sm_path) { continue; } - let display_path = summary_display_path_from_cwd(work_tree, &cwd, sm_path); + let display_path = summary_display_path_from_cwd(work_tree, &cwd, &sm_path); if args.for_status && e.status != DiffStatus::Added && submodule_ignore_all_for_summary( local_cfg_for_ignore.as_ref(), &ignore_all_for_status, - sm_path, + &sm_path, ) { continue; @@ -4888,7 +4894,7 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { let src_gitlink = mode_is_gitlink(&e.old_mode); let dst_gitlink = mode_is_gitlink(&e.new_mode); - let sub_path = submodule_work_tree_for_summary(work_tree, sm_path); + let sub_path = submodule_work_tree_for_summary(work_tree, &sm_path); if !args.cached && !sub_path.join(".git").exists() && !oid_dst.is_zero() @@ -4908,10 +4914,10 @@ fn run_summary(args: &SummaryArgs, _quiet: bool) -> Result<()> { if src_gitlink && dst_gitlink { let _ = submodule_fetch_gitlink_if_missing( - &grit_bin, work_tree, sm_path, &sub_path, &src_hex, + &grit_bin, work_tree, &sm_path, &sub_path, &src_hex, ); let _ = submodule_fetch_gitlink_if_missing( - &grit_bin, work_tree, sm_path, &sub_path, &dst_hex, + &grit_bin, work_tree, &sm_path, &sub_path, &dst_hex, ); } diff --git a/grit-git/src/main.rs b/grit-git/src/main.rs index 34fd01115..6a7a24646 100644 --- a/grit-git/src/main.rs +++ b/grit-git/src/main.rs @@ -2820,7 +2820,7 @@ fn run_test_tool_bloom(rest: &[String]) -> Result<()> { std::collections::BTreeSet::new(); for d in diffs { if let Some(path) = d.new_path.or(d.old_path) { - bloom_collect_paths_with_prefixes(&path, &mut changed_paths); + bloom_collect_paths_with_prefixes(&path.to_string_lossy(), &mut changed_paths); } } diff --git a/grit-lib/src/commit_graph_file.rs b/grit-lib/src/commit_graph_file.rs index 42af2d891..efb10c9ee 100644 --- a/grit-lib/src/commit_graph_file.rs +++ b/grit-lib/src/commit_graph_file.rs @@ -1084,7 +1084,7 @@ pub fn diff_changed_paths_for_bloom( let raw_len = entries.len(); let mut paths = Vec::new(); for e in entries { - let p = e.path().to_string(); + let p = e.path().to_string_lossy().into_owned(); if !p.is_empty() { paths.push(p); } diff --git a/grit-lib/src/diff.rs b/grit-lib/src/diff.rs index 4465ff6bc..dd0440e49 100644 --- a/grit-lib/src/diff.rs +++ b/grit-lib/src/diff.rs @@ -31,7 +31,9 @@ use crate::error::{Error, Result}; use crate::index::{Index, IndexEntry}; use crate::objects::{parse_commit, parse_tree, CommitData, ObjectId, ObjectKind, TreeEntry}; use crate::odb::Odb; +use crate::repo_path::{RepoPath, RepoPathBuf}; use crate::userdiff::FuncnameMatcher; +use std::borrow::Cow; /// Splits imara-diff unified body (concatenated hunks) into per-hunk slices for post-processing. fn imara_unified_hunk_slices(body: &str) -> Vec<&str> { @@ -1369,10 +1371,11 @@ impl DiffStatus { pub struct DiffEntry { /// The status of this change. pub status: DiffStatus, - /// Path in the "old" side (None for Added). - pub old_path: Option, - /// Path in the "new" side (None for Deleted). - pub new_path: Option, + /// Path in the "old" side (None for Added). Byte-true (Git paths are not + /// guaranteed UTF-8); use [`RepoPath::display`]/[`RepoPath::to_str`] to render. + pub old_path: Option, + /// Path in the "new" side (None for Deleted). Byte-true. + pub new_path: Option, /// Old file mode (as octal string, e.g. "100644"). pub old_mode: String, /// New file mode. @@ -1386,32 +1389,45 @@ pub struct DiffEntry { } impl DiffEntry { - /// The primary path for display (new_path for adds, old_path for deletes). + /// The primary path for this entry (new_path for adds, old_path for + /// deletes), as byte-true [`RepoPath`]. Empty if neither side is set. #[must_use] - pub fn path(&self) -> &str { + pub fn path(&self) -> &RepoPath { self.new_path .as_deref() .or(self.old_path.as_deref()) - .unwrap_or("") + .unwrap_or(RepoPath::from_str("")) + } + + /// The primary path rendered lossily as a UTF-8 string. Convenience for the + /// many display/format call sites; non-UTF-8 bytes become U+FFFD. For + /// round-trip-safe output use `quote_path`. + #[must_use] + pub fn path_str(&self) -> Cow<'_, str> { + String::from_utf8_lossy(self.path().as_bytes()) } /// Return a human-oriented path display for this entry. /// /// For renames and copies this returns `old -> new`; for all other entry - /// kinds this returns the primary path. + /// kinds this returns the primary path. Lossy for non-UTF-8 paths. #[must_use] pub fn display_path(&self) -> String { match self.status { DiffStatus::Renamed | DiffStatus::Copied => { - let old = self.old_path.as_deref().unwrap_or(""); - let new = self.new_path.as_deref().unwrap_or(""); + let old = self.old_path.as_deref().map(RepoPath::as_bytes).unwrap_or(b""); + let new = self.new_path.as_deref().map(RepoPath::as_bytes).unwrap_or(b""); if old.is_empty() || new.is_empty() { - self.path().to_owned() + self.path_str().into_owned() } else { - format!("{old} -> {new}") + format!( + "{} -> {}", + String::from_utf8_lossy(old), + String::from_utf8_lossy(new) + ) } } - _ => self.path().to_owned(), + _ => self.path_str().into_owned(), } } } @@ -1457,7 +1473,7 @@ pub fn diff_trees( new_tree_oid: Option<&ObjectId>, prefix: &str, ) -> Result> { - diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false) + diff_trees_opts(odb, old_tree_oid, new_tree_oid, RepoPath::from_str(prefix), false) } /// Like `diff_trees` but with `show_trees` flag: when true, emit entries for @@ -1469,14 +1485,14 @@ pub fn diff_trees_show_tree_entries( new_tree_oid: Option<&ObjectId>, prefix: &str, ) -> Result> { - diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true) + diff_trees_opts(odb, old_tree_oid, new_tree_oid, RepoPath::from_str(prefix), true) } fn diff_trees_opts( odb: &Odb, old_tree_oid: Option<&ObjectId>, new_tree_oid: Option<&ObjectId>, - prefix: &str, + prefix: &RepoPath, show_trees: bool, ) -> Result> { let old_entries = match old_tree_oid { @@ -1517,7 +1533,7 @@ fn diff_tree_entries_opts( odb: &Odb, old: &[TreeEntry], new: &[TreeEntry], - prefix: &str, + prefix: &RepoPath, show_trees: bool, result: &mut Vec, ) -> Result<()> { @@ -1547,8 +1563,7 @@ fn diff_tree_entries_opts( std::cmp::Ordering::Equal => { // Both present — check for changes if o.oid != n.oid || o.mode != n.mode { - let name_str = String::from_utf8_lossy(&o.name); - let path = format_path(prefix, &name_str); + let path = format_path(prefix, &o.name); if is_tree_mode(o.mode) && is_tree_mode(n.mode) { // Both are trees if show_trees { @@ -1625,12 +1640,11 @@ fn diff_tree_entries_opts( fn emit_deleted_opts( odb: &Odb, entry: &TreeEntry, - prefix: &str, + prefix: &RepoPath, show_trees: bool, result: &mut Vec, ) -> Result<()> { - let name_str = String::from_utf8_lossy(&entry.name); - let path = format_path(prefix, &name_str); + let path = format_path(prefix, &entry.name); if is_tree_mode(entry.mode) { if show_trees { result.push(DiffEntry { @@ -1665,12 +1679,11 @@ fn emit_deleted_opts( fn emit_added_opts( odb: &Odb, entry: &TreeEntry, - prefix: &str, + prefix: &RepoPath, show_trees: bool, result: &mut Vec, ) -> Result<()> { - let name_str = String::from_utf8_lossy(&entry.name); - let path = format_path(prefix, &name_str); + let path = format_path(prefix, &entry.name); if is_tree_mode(entry.mode) { if show_trees { result.push(DiffEntry { @@ -1780,8 +1793,8 @@ pub fn diff_index_to_tree( if te.oid != ie.oid || te.mode != ie.mode { result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(ie.mode), old_oid: te.oid, @@ -1795,7 +1808,7 @@ pub fn diff_index_to_tree( result.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(path), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: "000000".to_owned(), new_mode: format_mode(ie.mode), old_oid: zero_oid(), @@ -1811,10 +1824,11 @@ pub fn diff_index_to_tree( continue; } tree_map.remove(path.as_str()); + // TODO(byte-paths): lossy until index/worktree sources migrate (Phase 2/4) result.push(DiffEntry { status: DiffStatus::Unmerged, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: "000000".to_owned(), new_mode: format_mode(*mode), old_oid: zero_oid(), @@ -1828,9 +1842,10 @@ pub fn diff_index_to_tree( if ignore_submodules && te.mode == 0o160000 { continue; } + // TODO(byte-paths): lossy until index/worktree sources migrate (Phase 2/4) result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path.to_owned()), + old_path: Some(RepoPathBuf::from_string(path.to_owned())), new_path: None, old_mode: format_mode(te.mode), new_mode: "000000".to_owned(), @@ -1988,7 +2003,7 @@ pub fn diff_index_to_worktree_with_options( // a placeholder and stays unchanged. Skipped when `simplify_gitlinks` so callers that // only compare recorded HEADs keep their behaviour. (t4060 #50/#51.) if !simplify_gitlinks && sub_head_oid.is_none() && !sub_dir.exists() { - let path_owned = path_str_ref.to_owned(); + let path_owned = RepoPathBuf::from_bytes(ie.path.clone()); result.push(DiffEntry { status: DiffStatus::Deleted, old_path: Some(path_owned.clone()), @@ -2017,7 +2032,7 @@ pub fn diff_index_to_worktree_with_options( }; if simplify_gitlinks { if !ref_matches { - let path_owned = path_str_ref.to_owned(); + let path_owned = RepoPathBuf::from_bytes(ie.path.clone()); let new_oid = sub_head_oid.unwrap_or_else(zero_oid); result.push(DiffEntry { status: DiffStatus::Modified, @@ -2047,7 +2062,7 @@ pub fn diff_index_to_worktree_with_options( } let inner_dirty = flags.modified || flags.untracked; if !ref_matches || inner_dirty { - let path_owned = path_str_ref.to_owned(); + let path_owned = RepoPathBuf::from_bytes(ie.path.clone()); let new_oid = if !ref_matches { sub_head_oid.unwrap_or_else(zero_oid) } else { @@ -2086,7 +2101,7 @@ pub fn diff_index_to_worktree_with_options( result.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(path_str_ref.to_owned()), + new_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), old_mode: "000000".to_owned(), new_mode: format_mode(worktree_mode), // `ita_invisible_in_index`: null OID on the index side for patch output @@ -2102,7 +2117,7 @@ pub fn diff_index_to_worktree_with_options( { result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str_ref.to_owned()), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), new_path: None, old_mode: format_mode(ie.mode), new_mode: "000000".to_owned(), @@ -2121,7 +2136,7 @@ pub fn diff_index_to_worktree_with_options( if dir_symlinks.has_symlink_in_path(work_tree, path_str_ref) { result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str_ref.to_owned()), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), new_path: None, old_mode: format_mode(ie.mode), new_mode: "000000".to_owned(), @@ -2140,7 +2155,7 @@ pub fn diff_index_to_worktree_with_options( // deleted. See t4041/t4060 #13. if file_path.join(".git").exists() { let head = read_submodule_head_oid(&file_path).unwrap_or_else(zero_oid); - let path_owned = path_str_ref.to_owned(); + let path_owned = RepoPathBuf::from_bytes(ie.path.clone()); result.push(DiffEntry { status: DiffStatus::TypeChanged, old_path: Some(path_owned.clone()), @@ -2155,7 +2170,7 @@ pub fn diff_index_to_worktree_with_options( } result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str_ref.to_owned()), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), new_path: None, old_mode: format_mode(ie.mode), new_mode: String::new(), @@ -2169,7 +2184,7 @@ pub fn diff_index_to_worktree_with_options( let stat_same = stat_matches(ie, &meta); // Mode-only change: stat still matches the index entry but executable bit differs. if stat_same && worktree_mode != ie.mode { - let path_owned = path_str_ref.to_owned(); + let path_owned = RepoPathBuf::from_bytes(ie.path.clone()); result.push(DiffEntry { status: DiffStatus::Modified, old_path: Some(path_owned.clone()), @@ -2215,7 +2230,7 @@ pub fn diff_index_to_worktree_with_options( } if eff_oid != ie.oid || worktree_mode != ie.mode { - let path_owned = path_str_ref.to_owned(); + let path_owned = RepoPathBuf::from_bytes(ie.path.clone()); result.push(DiffEntry { status: DiffStatus::Modified, old_path: Some(path_owned.clone()), @@ -2233,7 +2248,7 @@ pub fn diff_index_to_worktree_with_options( // File deleted from working tree (or parent replaced by a file) result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path_str_ref.to_owned()), + old_path: Some(RepoPathBuf::from_bytes(ie.path.clone())), new_path: None, old_mode: format_mode(ie.mode), new_mode: "000000".to_owned(), @@ -2263,10 +2278,11 @@ pub fn diff_index_to_worktree_with_options( || "000000".to_owned(), |meta| format_mode(mode_from_metadata(meta)), ); + // TODO(byte-paths): lossy until index/worktree sources migrate (Phase 2/4) result.push(DiffEntry { status: DiffStatus::Unmerged, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: "000000".to_owned(), new_mode, old_oid: zero_oid(), @@ -2287,10 +2303,11 @@ pub fn diff_index_to_worktree_with_options( )?; let wt_mode = mode_from_metadata(&meta); if wt_oid != base_entry.oid || wt_mode != base_entry.mode { + // TODO(byte-paths): lossy until index/worktree sources migrate (Phase 2/4) result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path)), old_mode: format_mode(base_entry.mode), new_mode: format_mode(wt_mode), old_oid: base_entry.oid, @@ -2762,8 +2779,8 @@ pub fn diff_tree_to_worktree( if sub_head.is_none() && !sub_dir.exists() { result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: "000000".to_string(), old_oid: te.oid, @@ -2784,8 +2801,8 @@ pub fn diff_tree_to_worktree( let new_oid = if head_differs { zero_oid() } else { te.oid }; result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(te.mode), old_oid: te.oid, @@ -2802,8 +2819,8 @@ pub fn diff_tree_to_worktree( let new_oid = read_submodule_head_oid(&sub_dir).unwrap_or(idx_oid); result.push(DiffEntry { status: DiffStatus::Added, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: "000000".to_string(), new_mode: format_mode(0o160000), old_oid: zero_oid(), @@ -2833,8 +2850,8 @@ pub fn diff_tree_to_worktree( if wt_oid != te.oid || wt_mode != te.mode { result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(wt_mode), old_oid: te.oid, @@ -2867,8 +2884,8 @@ pub fn diff_tree_to_worktree( if ie.oid == te.oid && ie.mode != te.mode { result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(ie.mode), old_oid: te.oid, @@ -2902,8 +2919,8 @@ pub fn diff_tree_to_worktree( if eff_oid != te.oid { result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(wt_mode), old_oid: te.oid, @@ -2913,8 +2930,8 @@ pub fn diff_tree_to_worktree( } else if wt_mode != te.mode { result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(wt_mode), old_oid: te.oid, @@ -2940,8 +2957,8 @@ pub fn diff_tree_to_worktree( if eff_oid != te.oid || wt_mode != te.mode { result.push(DiffEntry { status: DiffStatus::Modified, - old_path: Some(path.clone()), - new_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: format_mode(te.mode), new_mode: format_mode(wt_mode), old_oid: te.oid, @@ -2954,7 +2971,7 @@ pub fn diff_tree_to_worktree( // In tree but missing from worktree result.push(DiffEntry { status: DiffStatus::Deleted, - old_path: Some(path.clone()), + old_path: Some(RepoPathBuf::from_string(path.clone())), new_path: None, old_mode: format_mode(te.mode), new_mode: "000000".to_owned(), @@ -2979,7 +2996,7 @@ pub fn diff_tree_to_worktree( result.push(DiffEntry { status: DiffStatus::Added, old_path: None, - new_path: Some(path.clone()), + new_path: Some(RepoPathBuf::from_string(path.clone())), old_mode: "000000".to_owned(), new_mode: format_mode(wt_mode), old_oid: zero_oid(), @@ -3009,7 +3026,7 @@ fn read_added_entry_bytes( } let path = entry.new_path.as_deref()?; let root = work_root?; - fs::read(root.join(path)).ok() + fs::read(path.to_fs_path(root)).ok() } fn modified_as_copy_from_sources( @@ -3040,7 +3057,7 @@ fn modified_as_copy_from_sources( if *is_deleted { continue; } - if e.new_path.as_deref() == Some(src_path.as_str()) { + if e.new_path.as_deref().map(RepoPath::as_bytes) == Some(src_path.as_bytes()) { continue; } let src_mode_str = source_tree_entries @@ -3081,7 +3098,8 @@ fn modified_as_copy_from_sources( Some(DiffEntry { status: DiffStatus::Copied, - old_path: Some(src_path.clone()), + // TODO(byte-paths): source paths are lossy String until rename/copy sources migrate (Phase 4) + old_path: Some(RepoPathBuf::from_string(src_path.clone())), new_path: e.new_path.clone(), old_mode: src_mode, new_mode: e.new_mode.clone(), @@ -3285,8 +3303,10 @@ pub fn detect_copies( for entry in &deleted { if let Some(ref path) = entry.old_path { - deleted_source_idx.insert(path.clone(), sources.len()); - sources.push((path.clone(), entry.old_oid, true)); + // TODO(byte-paths): source keys are lossy String until rename/copy sources migrate (Phase 4) + let key = path.to_string_lossy().into_owned(); + deleted_source_idx.insert(key.clone(), sources.len()); + sources.push((key, entry.old_oid, true)); } } @@ -3295,8 +3315,8 @@ pub fn detect_copies( for entry in &others { if matches!(entry.status, DiffStatus::Modified | DiffStatus::TypeChanged) { if let Some(ref old_path) = entry.old_path { - if !sources.iter().any(|(p, _, _)| p == old_path) { - sources.push((old_path.clone(), entry.old_oid, false)); + if !sources.iter().any(|(p, _, _)| p.as_bytes() == old_path.as_bytes()) { + sources.push((old_path.to_string_lossy().into_owned(), entry.old_oid, false)); } } } @@ -3342,7 +3362,7 @@ pub fn detect_copies( for (ai, add) in added.iter().enumerate() { // Never pair a path with itself as copy source (matches Git; avoids // arbitrary tie-breaking when several sources share the same blob). - if add.new_path.as_deref() == Some(src_path.as_str()) { + if add.new_path.as_deref().map(RepoPath::as_bytes) == Some(src_path.as_bytes()) { continue; } let add_oid = if add.new_oid != zero_oid() { @@ -3388,7 +3408,7 @@ pub fn detect_copies( // Git tends to pick the rename as the last alphabetically. let rename_ai = assignments_for_src .iter() - .max_by_key(|(ai, _score)| added[*ai].path().to_string()) + .max_by_key(|(ai, _score)| added[*ai].path().as_bytes().to_vec()) .map(|(ai, _)| *ai); for &(ai, score) in assignments_for_src { @@ -3401,13 +3421,14 @@ pub fn detect_copies( .unwrap_or_else(|| add.old_mode.clone()); let is_rename = Some(ai) == rename_ai; + // TODO(byte-paths): source paths lossy until rename/copy sources migrate (Phase 4) result_entries.push(DiffEntry { status: if is_rename { DiffStatus::Renamed } else { DiffStatus::Copied }, - old_path: Some(src_path.clone()), + old_path: Some(RepoPathBuf::from_string(src_path.clone())), new_path: add.new_path.clone(), old_mode: src_mode, new_mode: add.new_mode.clone(), @@ -3429,9 +3450,10 @@ pub fn detect_copies( .map(|(_, m, _)| m.clone()) .unwrap_or_else(|| add.old_mode.clone()); + // TODO(byte-paths): source paths lossy until rename/copy sources migrate (Phase 4) result_entries.push(DiffEntry { status: DiffStatus::Copied, - old_path: Some(src_path.clone()), + old_path: Some(RepoPathBuf::from_string(src_path.clone())), new_path: add.new_path.clone(), old_mode: src_mode, new_mode: add.new_mode.clone(), @@ -3448,7 +3470,7 @@ pub fn detect_copies( // Keep deleted entries that weren't consumed by a rename. for entry in deleted.into_iter() { if let Some(ref path) = entry.old_path { - if let Some(&si) = deleted_source_idx.get(path) { + if let Some(&si) = deleted_source_idx.get(path.to_string_lossy().as_ref()) { if renamed_deleted.contains(&si) { // This deletion was consumed by a rename; skip it. continue; @@ -3622,11 +3644,12 @@ pub fn format_rename_path(old: &str, new: &str) -> String { /// Check if two entries share the same filename (basename). fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool { - let old = del.old_path.as_deref().unwrap_or(""); - let new = add.new_path.as_deref().unwrap_or(""); - let old_base = old.rsplit('/').next().unwrap_or(old); - let new_base = new.rsplit('/').next().unwrap_or(new); - old_base == new_base && !old_base.is_empty() + let old_base = del.old_path.as_deref().and_then(RepoPath::file_name); + let new_base = add.new_path.as_deref().and_then(RepoPath::file_name); + match (old_base, new_base) { + (Some(o), Some(n)) => o == n && !o.is_empty(), + _ => false, + } } /// Compute a similarity percentage (0–100) between two byte slices. @@ -3689,11 +3712,11 @@ pub fn format_raw(entry: &DiffEntry) -> String { DiffStatus::Renamed | DiffStatus::Copied => { format!( "{}\t{}", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(), + entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default() ) } - _ => entry.path().to_owned(), + _ => entry.path_str().into_owned(), }; let status_str = match (entry.status, entry.score) { @@ -3750,10 +3773,10 @@ pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String { let path = match entry.status { DiffStatus::Renamed | DiffStatus::Copied => format!( "{}\t{}", - entry.old_path.as_deref().unwrap_or(""), - entry.new_path.as_deref().unwrap_or("") + entry.old_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default(), + entry.new_path.as_deref().map(RepoPath::to_string_lossy).unwrap_or_default() ), - _ => entry.path().to_owned(), + _ => entry.path_str().into_owned(), }; let status_str = match (entry.status, entry.score) { (DiffStatus::Renamed, Some(s)) => format!("R{s:03}"), @@ -5640,7 +5663,11 @@ fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result bool { } /// Build a path with an optional prefix. -fn format_path(prefix: &str, name: &str) -> String { - if prefix.is_empty() { - name.to_owned() - } else { - format!("{prefix}/{name}") - } +fn format_path(prefix: &RepoPath, name: &[u8]) -> RepoPathBuf { + prefix.join(RepoPath::from_bytes(name)) } /// Format a numeric mode as a zero-padded octal string. @@ -6059,7 +6082,7 @@ fn apply_orderfile( .cloned() .unwrap_or_default(); for (i, pat) in patterns.iter().enumerate() { - if orderfile_pattern_matches(pat, &path) { + if orderfile_pattern_matches(pat, &path.to_string_lossy()) { return i; } } @@ -6165,7 +6188,7 @@ fn apply_rotate_skip_ordered_paths( use std::collections::HashMap; let mut by_path: HashMap = HashMap::new(); for e in entries { - by_path.insert(e.path().to_string(), e); + by_path.insert(e.path().to_string_lossy().into_owned(), e); } // `git log --skip-to`: only list changed paths from the skip point onward (unmodified paths diff --git a/grit-lib/src/difftool.rs b/grit-lib/src/difftool.rs index ae84772e0..6dab143f5 100644 --- a/grit-lib/src/difftool.rs +++ b/grit-lib/src/difftool.rs @@ -555,7 +555,7 @@ fn filter_paths(entries: Vec, paths: &[String]) -> Vec { let p = e.path(); paths .iter() - .any(|f| p == f || p.starts_with(&format!("{f}/"))) + .any(|f| p == f || p.starts_with_bytes(format!("{f}/").as_bytes())) }) .collect() } @@ -615,7 +615,7 @@ fn launch_file_diff( if should_prompt { writeln!(stdout)?; - writeln!(stdout, "Viewing ({counter}/{total}): '{merged}'")?; + writeln!(stdout, "Viewing ({counter}/{total}): '{}'", merged.display())?; let prompt_label = tool.extcmd.as_deref().unwrap_or(&tool.tool_name); write!(stdout, "Launch '{prompt_label}' [Y/n]? ")?; stdout.flush().map_err(Error::Io)?; @@ -629,7 +629,7 @@ fn launch_file_diff( } } - let status = run_tool(tool, &local_path, &remote_path, merged, counter, total)?; + let status = run_tool(tool, &local_path, &remote_path, &merged.to_string_lossy(), counter, total)?; let mut code = status.code().unwrap_or(1); if code == 127 { code = 128; @@ -649,7 +649,7 @@ fn materialize_pair( work_tree: &Path, tmp_dir: &Path, ) -> Result<(PathBuf, PathBuf)> { - let safe_name = entry.path().replace('/', "_"); + let safe_name = entry.path().to_string_lossy().replace('/', "_"); let local_tmp = tmp_dir.join(format!("local_{safe_name}")); let remote_tmp = tmp_dir.join(format!("remote_{safe_name}")); @@ -665,7 +665,7 @@ fn materialize_pair( } _ => { write_blob_or_empty(&repo.odb, &entry.old_oid, &local_tmp)?; - let wt = work_tree.join(entry.path()); + let wt = entry.path().to_fs_path(work_tree); if wt.exists() { Ok((local_tmp, wt)) } else { @@ -876,7 +876,10 @@ fn capture_dir_diff_baseline( let Some(path) = entry.new_path.as_deref().or(entry.old_path.as_deref()) else { continue; }; - baseline.insert(path.to_string(), std::fs::read(right.join(path)).ok()); + baseline.insert( + path.to_string_lossy().into_owned(), + std::fs::read(path.to_fs_path(right)).ok(), + ); } baseline } @@ -931,7 +934,7 @@ fn populate_dir_side( let Some(rel) = path else { return Ok(()); }; - let dest = dir.join(rel); + let dest = rel.to_fs_path(dir); let mode_str = if is_left { &entry.old_mode @@ -961,9 +964,9 @@ fn populate_dir_side( if let Some(parent) = dest.parent() { std::fs::create_dir_all(parent).map_err(Error::Io)?; } - let wt_symlink = work_tree.join(rel); + let wt_symlink = rel.to_fs_path(work_tree); let target = if oid.is_zero() || (!is_left && use_symlinks && wt_symlink.is_symlink()) { - std::fs::read_link(work_tree.join(rel)) + std::fs::read_link(rel.to_fs_path(work_tree)) .map(|p| p.to_string_lossy().into_owned()) .unwrap_or_default() } else { @@ -991,7 +994,7 @@ fn populate_dir_side( // through to copying the work-tree file into the temp dir below. #[cfg(unix)] if !is_left && use_symlinks { - let wt = work_tree.join(rel); + let wt = rel.to_fs_path(work_tree); if wt.is_file() { let _ = std::fs::remove_file(&dest); std::os::unix::fs::symlink(&wt, &dest).map_err(Error::Io)?; @@ -1002,7 +1005,7 @@ fn populate_dir_side( let _ = use_symlinks; if !is_left { - let wt = work_tree.join(rel); + let wt = rel.to_fs_path(work_tree); if wt.is_file() { std::fs::copy(wt, &dest).map_err(Error::Io)?; return Ok(()); @@ -1014,7 +1017,7 @@ fn populate_dir_side( // Copy working-tree modifications for right side when applicable. if !is_left { - let wt = work_tree.join(rel); + let wt = rel.to_fs_path(work_tree); if wt.exists() { if let Ok(bytes) = std::fs::read(&wt) { std::fs::write(&dest, bytes).map_err(Error::Io)?; diff --git a/grit-lib/src/fast_export.rs b/grit-lib/src/fast_export.rs index 31e98611f..37472d8d8 100644 --- a/grit-lib/src/fast_export.rs +++ b/grit-lib/src/fast_export.rs @@ -348,11 +348,11 @@ fn diff_entry_matches_paths(entry: &DiffEntry, paths: &[String]) -> bool { if paths.is_empty() { return true; } - matches_pathspec_list(entry.path(), paths) + matches_pathspec_list(&entry.path().to_string_lossy(), paths) || entry .old_path .as_deref() - .is_some_and(|path| matches_pathspec_list(path, paths)) + .is_some_and(|path| matches_pathspec_list(&path.to_string_lossy(), paths)) } fn export_ref_for_non_all(repo: &Repository) -> Result { @@ -541,28 +541,32 @@ pub fn export_stream( match e.status { DiffStatus::Deleted => { let path = if let Some(a) = anon.as_mut() { - a.anonymize_path(e.path()) + a.anonymize_path(&e.path().to_string_lossy()) } else { - e.path().to_string() + e.path().to_string_lossy().into_owned() }; writeln!(writer, "D {path}")?; - changed.insert(e.path().to_string()); + changed.insert(e.path().to_string_lossy().into_owned()); } DiffStatus::Renamed | DiffStatus::Copied => { - let old_p = e.old_path.as_deref().unwrap_or(""); + let old_p = e + .old_path + .as_deref() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); let skip_modify = e.old_oid == e.new_oid && e.old_mode == e.new_mode - && !changed.contains(old_p); - if !changed.contains(old_p) { + && !changed.contains(&old_p); + if !changed.contains(&old_p) { let op = if let Some(a) = anon.as_mut() { - a.anonymize_path(old_p) + a.anonymize_path(&old_p) } else { - old_p.to_string() + old_p.clone() }; let np = if let Some(a) = anon.as_mut() { - a.anonymize_path(e.path()) + a.anonymize_path(&e.path().to_string_lossy()) } else { - e.path().to_string() + e.path().to_string_lossy().into_owned() }; writeln!(writer, "{} {op} {np}", e.status.letter())?; } @@ -577,8 +581,8 @@ pub fn export_stream( options.no_data, )?; } - changed.insert(old_p.to_string()); - changed.insert(e.path().to_string()); + changed.insert(old_p); + changed.insert(e.path().to_string_lossy().into_owned()); } DiffStatus::Added | DiffStatus::Modified | DiffStatus::TypeChanged => { fallthrough_modify( @@ -590,7 +594,7 @@ pub fn export_stream( options.anonymize, options.no_data, )?; - changed.insert(e.path().to_string()); + changed.insert(e.path().to_string_lossy().into_owned()); } _ => {} } @@ -673,9 +677,9 @@ fn fallthrough_modify( ) -> Result<()> { let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0); let path = if let Some(a) = anon.as_mut() { - a.anonymize_path(e.path()) + a.anonymize_path(&e.path().to_string_lossy()) } else { - e.path().to_string() + e.path().to_string_lossy().into_owned() }; if mode == MODE_GITLINK { let hex = e.new_oid.to_hex(); diff --git a/grit-lib/src/fetch_submodules.rs b/grit-lib/src/fetch_submodules.rs index 31211cb3f..4dae35eda 100644 --- a/grit-lib/src/fetch_submodules.rs +++ b/grit-lib/src/fetch_submodules.rs @@ -217,7 +217,7 @@ pub fn collect_changed_submodules_for_fetch( if !is_gitlink_mode(&e.new_mode) { continue; } - record_gitlink(e.path().to_string(), e.new_oid, &commit.tree)?; + record_gitlink(e.path().to_string_lossy().into_owned(), e.new_oid, &commit.tree)?; } } else if parents.len() == 1 { let pobj = odb.read(&parents[0])?; @@ -247,8 +247,8 @@ pub fn collect_changed_submodules_for_fetch( .new_path .as_deref() .or(e.old_path.as_deref()) - .unwrap_or("") - .to_string(); + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); if path.is_empty() { continue; } diff --git a/grit-lib/src/line_log.rs b/grit-lib/src/line_log.rs index 2ead86316..b8a618bb5 100644 --- a/grit-lib/src/line_log.rs +++ b/grit-lib/src/line_log.rs @@ -999,7 +999,7 @@ fn filter_entries_for_paths(entries: Vec, paths: &[String]) -> Vec = - entries.iter().map(|e| e.path().to_string()).collect(); + entries.iter().map(|e| e.path().to_string_lossy().into_owned()).collect(); per_parent.push(paths); } if per_parent.is_empty() { @@ -105,11 +105,14 @@ pub fn combined_merge_parent_blob_paths( if e.status != DiffStatus::Renamed { continue; } - let new_p = e.new_path.as_deref().unwrap_or(""); + let Some(new_p) = e.new_path.as_deref() else { + continue; + }; if new_p != merge_path { continue; } - let old_p = e.old_path.clone()?; + // TODO(byte-paths): merge path interop stays lossy until rename sources migrate (Phase 4) + let old_p = e.old_path.as_ref()?.to_string_lossy().into_owned(); if blob_oid_at_path(odb, t, &old_p).is_some() { if found.is_some() { return None; diff --git a/grit-lib/src/patch_ids.rs b/grit-lib/src/patch_ids.rs index ec95e36c8..96fa6a2ea 100644 --- a/grit-lib/src/patch_ids.rs +++ b/grit-lib/src/patch_ids.rs @@ -22,6 +22,7 @@ use crate::error::Result; use crate::merge_file; use crate::objects::{parse_commit, ObjectId, ObjectKind}; use crate::odb::Odb; +use crate::repo_path::RepoPath; /// How to compute a patch-ID from unified diff text. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -466,12 +467,12 @@ fn compute_patch_id_filtered( .old_path .as_deref() .or(entry.new_path.as_deref()) - .unwrap_or(""); + .unwrap_or(RepoPath::from_str("")); let new_path = entry .new_path .as_deref() .or(entry.old_path.as_deref()) - .unwrap_or(""); + .unwrap_or(RepoPath::from_str("")); let mut old_path_buf = old_path.as_bytes().to_vec(); let mut new_path_buf = new_path.as_bytes().to_vec(); let len1 = remove_space_bytes(&mut old_path_buf); @@ -559,7 +560,7 @@ fn diff_entry_matches_paths(entry: &DiffEntry, paths: &[String]) -> bool { .as_deref() .into_iter() .chain(entry.new_path.as_deref()) - .any(|path| crate::pathspec::matches_pathspec_list(path, paths)) + .any(|path| crate::pathspec::matches_pathspec_list(&path.to_string_lossy(), paths)) } fn parse_mode_u32(mode: &str) -> u32 { diff --git a/grit-lib/src/porcelain/checkout.rs b/grit-lib/src/porcelain/checkout.rs index a9aac228b..291c9c518 100644 --- a/grit-lib/src/porcelain/checkout.rs +++ b/grit-lib/src/porcelain/checkout.rs @@ -48,7 +48,10 @@ pub fn checkout_between_trees( for change in &changes { if change.status == DiffStatus::Deleted { if let Some(path) = &change.old_path { - let abs = work_tree.join(path); + // TODO(byte-paths): worktree write/stat paths route through the &str + // worktree API and stay lossy until Phase 2 migrates working-tree I/O. + let path = path.to_string_lossy(); + let abs = work_tree.join(&*path); let _ = std::fs::remove_file(&abs); remove_empty_parent_dirs(&work_tree, &abs); index.remove(path.as_bytes()); @@ -59,10 +62,11 @@ pub fn checkout_between_trees( let Some(path) = &change.new_path else { continue; }; + let path = path.to_string_lossy(); let mode = u32::from_str_radix(&change.new_mode, 8).unwrap_or(MODE_REGULAR); let object = repo.odb.read(&change.new_oid)?; - write_to_worktree(&work_tree, path, &object.data, mode)?; - let entry = entry_from_stat(&work_tree.join(path), path.as_bytes(), change.new_oid, mode)?; + write_to_worktree(&work_tree, &path, &object.data, mode)?; + let entry = entry_from_stat(&work_tree.join(&*path), path.as_bytes(), change.new_oid, mode)?; index.add_or_replace(entry); } diff --git a/grit-lib/src/porcelain/status.rs b/grit-lib/src/porcelain/status.rs index 313f67101..88c4d7289 100644 --- a/grit-lib/src/porcelain/status.rs +++ b/grit-lib/src/porcelain/status.rs @@ -735,7 +735,7 @@ pub fn status( let mut staged: Vec = crate::diff::diff_index_to_tree(&repo.odb, &index, head_tree.as_ref(), false)? .into_iter() - .filter(|e| status_path_matches(e.path(), &opts.pathspecs)) + .filter(|e| status_path_matches(&e.path().to_string_lossy(), &opts.pathspecs)) .collect(); // Unstaged: worktree vs index, narrowed before rename detection. @@ -749,7 +749,7 @@ pub fn status( }, )? .into_iter() - .filter(|e| status_path_matches(e.path(), &opts.pathspecs)) + .filter(|e| status_path_matches(&e.path().to_string_lossy(), &opts.pathspecs)) .collect(); if let Some(rd) = opts.renames { diff --git a/grit-lib/src/push_submodules.rs b/grit-lib/src/push_submodules.rs index 62c89389b..60e57e65d 100644 --- a/grit-lib/src/push_submodules.rs +++ b/grit-lib/src/push_submodules.rs @@ -180,7 +180,7 @@ pub fn collect_changed_gitlinks_for_push( if !is_gitlink_mode(&e.new_mode) { continue; } - let path = e.path().to_string(); + let path = e.path().to_string_lossy().into_owned(); by_path.entry(path).or_default().push(e.new_oid); } } else if parents.len() == 1 { @@ -211,11 +211,12 @@ pub fn collect_changed_gitlinks_for_push( .new_path .as_deref() .or(e.old_path.as_deref()) - .unwrap_or(""); + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); if path.is_empty() { continue; } - by_path.entry(path.to_string()).or_default().push(oid); + by_path.entry(path).or_default().push(oid); } } else { let paths = diff --git a/grit-lib/src/repo_path.rs b/grit-lib/src/repo_path.rs index 802b3eb82..7bb653ca0 100644 --- a/grit-lib/src/repo_path.rs +++ b/grit-lib/src/repo_path.rs @@ -61,6 +61,18 @@ impl RepoPath { self.0.is_empty() } + /// Length of the path in bytes. + #[must_use] + pub fn len(&self) -> usize { + self.0.len() + } + + /// `true` if the path begins with the given byte prefix (raw, not component-aware). + #[must_use] + pub fn starts_with_bytes(&self, prefix: &[u8]) -> bool { + self.0.starts_with(prefix) + } + /// The path as `&str` iff it is valid UTF-8, else `None`. Use this for /// programmatic comparison; never for human display (see [`RepoPath::display`]). #[must_use] @@ -76,6 +88,14 @@ impl RepoPath { RepoPathDisplay(&self.0) } + /// Lossy UTF-8 view as a `Cow` (invalid bytes become U+FFFD). For + /// String-keyed maps and other interop where a byte path must be stringified; + /// prefer [`RepoPath::as_bytes`]/[`RepoPath::to_str`] when losslessness matters. + #[must_use] + pub fn to_string_lossy(&self) -> std::borrow::Cow<'_, str> { + String::from_utf8_lossy(&self.0) + } + /// The final component (after the last `/`), or `None` if empty or ends in `/`. #[must_use] pub fn file_name(&self) -> Option<&RepoPath> { @@ -214,6 +234,45 @@ impl AsRef for RepoPathBuf { } } +// Byte-exact equality against UTF-8 string types. Comparing path bytes to a +// `str`'s UTF-8 bytes is lossless (not a `from_utf8_lossy`), so these are safe +// conveniences for the many `entry.path() == "foo"` call sites. +impl PartialEq for RepoPath { + fn eq(&self, other: &str) -> bool { + &self.0 == other.as_bytes() + } +} + +impl PartialEq<&str> for RepoPath { + fn eq(&self, other: &&str) -> bool { + &self.0 == other.as_bytes() + } +} + +impl PartialEq for RepoPath { + fn eq(&self, other: &String) -> bool { + &self.0 == other.as_bytes() + } +} + +impl PartialEq for RepoPathBuf { + fn eq(&self, other: &str) -> bool { + self.0 == other.as_bytes() + } +} + +impl PartialEq<&str> for RepoPathBuf { + fn eq(&self, other: &&str) -> bool { + self.0 == other.as_bytes() + } +} + +impl PartialEq for RepoPathBuf { + fn eq(&self, other: &String) -> bool { + self.0 == other.as_bytes() + } +} + /// Lossy `Display`/`Debug` adapter returned by [`RepoPath::display`]. pub struct RepoPathDisplay<'a>(&'a [u8]); @@ -224,6 +283,24 @@ impl fmt::Display for RepoPathDisplay<'_> { } } +// Lossy `Display` for the path types themselves (invalid UTF-8 → U+FFFD). +// `std::path::Path` deliberately omits `Display`, but Git paths are displayed +// in hundreds of call sites (diff/log/show/status output); a lossy `Display` +// matches `bstr::BStr` and keeps those sites ergonomic. It does NOT weaken the +// newtype's identity — there is still no implicit coercion *to* a path, and +// round-trip-safe output must still go through `quote_path`/`as_bytes`. +impl fmt::Display for RepoPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.0.as_bstr(), f) + } +} + +impl fmt::Display for RepoPathBuf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.0.as_bstr(), f) + } +} + impl fmt::Debug for RepoPath { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "RepoPath({:?})", self.0.as_bstr()) diff --git a/grit-lib/tests/non_utf8_paths.rs b/grit-lib/tests/non_utf8_paths.rs new file mode 100644 index 000000000..4133d9b67 --- /dev/null +++ b/grit-lib/tests/non_utf8_paths.rs @@ -0,0 +1,91 @@ +//! Phase 1 regression: a diff over a tree whose entry name is **not** valid +//! UTF-8 must preserve the path bytes exactly, rather than mangling them with +//! `String::from_utf8_lossy` (U+FFFD). See `docs/non-utf8-paths-design.md`. + +use grit_lib::diff::{diff_trees, DiffStatus}; +use grit_lib::objects::{serialize_tree, ObjectId, ObjectKind, TreeEntry}; +use grit_lib::odb::Odb; + +/// A filename with a stray Latin-1 byte (`0xE9`, 'é') and a raw `0xFF` — both +/// illegal in UTF-8, both legal in a Git tree on Unix. +const RAW_NAME: &[u8] = b"caf\xe9-\xff.txt"; + +fn write_blob(odb: &Odb, contents: &[u8]) -> ObjectId { + odb.write(ObjectKind::Blob, contents).unwrap() +} + +fn write_tree(odb: &Odb, name: &[u8], blob: ObjectId) -> ObjectId { + let entries = vec![TreeEntry { + mode: 0o100644, + name: name.to_vec(), + oid: blob, + }]; + odb.write(ObjectKind::Tree, &serialize_tree(&entries)).unwrap() +} + +#[test] +fn added_entry_preserves_non_utf8_bytes() { + let tmp = tempfile::tempdir().unwrap(); + let odb = Odb::new(tmp.path()); + + let blob = write_blob(&odb, b"hello\n"); + let tree = write_tree(&odb, RAW_NAME, blob); + + // Empty tree -> our tree: the file is Added. + let entries = diff_trees(&odb, None, Some(&tree), "").unwrap(); + assert_eq!(entries.len(), 1); + let e = &entries[0]; + assert_eq!(e.status, DiffStatus::Added); + + let path = e.new_path.as_ref().expect("added entry has a new_path"); + // The bytes must survive verbatim — no U+FFFD substitution. + assert_eq!(path.as_bytes(), RAW_NAME); + assert!(path.to_str().is_none(), "name is intentionally non-UTF-8"); +} + +#[test] +fn modified_entry_preserves_non_utf8_bytes_on_both_sides() { + let tmp = tempfile::tempdir().unwrap(); + let odb = Odb::new(tmp.path()); + + let old_blob = write_blob(&odb, b"one\n"); + let new_blob = write_blob(&odb, b"two\n"); + let old_tree = write_tree(&odb, RAW_NAME, old_blob); + let new_tree = write_tree(&odb, RAW_NAME, new_blob); + + let entries = diff_trees(&odb, Some(&old_tree), Some(&new_tree), "").unwrap(); + assert_eq!(entries.len(), 1); + let e = &entries[0]; + assert_eq!(e.status, DiffStatus::Modified); + assert_eq!(e.old_path.as_ref().unwrap().as_bytes(), RAW_NAME); + assert_eq!(e.new_path.as_ref().unwrap().as_bytes(), RAW_NAME); +} + +#[test] +fn nested_non_utf8_directory_round_trips() { + let tmp = tempfile::tempdir().unwrap(); + let odb = Odb::new(tmp.path()); + + // A subtree whose *directory* name is non-UTF-8, holding a normal file. + let blob = write_blob(&odb, b"x\n"); + let inner = vec![TreeEntry { + mode: 0o100644, + name: b"file.txt".to_vec(), + oid: blob, + }]; + let inner_oid = odb.write(ObjectKind::Tree, &serialize_tree(&inner)).unwrap(); + let dir_name: &[u8] = b"d\xe9r"; + let outer = vec![TreeEntry { + mode: 0o040000, + name: dir_name.to_vec(), + oid: inner_oid, + }]; + let outer_oid = odb.write(ObjectKind::Tree, &serialize_tree(&outer)).unwrap(); + + let entries = diff_trees(&odb, None, Some(&outer_oid), "").unwrap(); + assert_eq!(entries.len(), 1); + // The non-UTF-8 directory prefix must be carried through the recursion. + let mut expected = dir_name.to_vec(); + expected.extend_from_slice(b"/file.txt"); + assert_eq!(entries[0].new_path.as_ref().unwrap().as_bytes(), &expected[..]); +}