diff --git a/README.md b/README.md index 655a551..e1e46e5 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,16 @@ Issues are grouped into Milestones for organizational purposes. | [`ghqc auth status`](docs/auth.md) | Display stored tokens and auth source resolution for the selected host | | [`ghqc auth token`](docs/auth.md) | Print the resolved token for the selected host | +### Cache + +`ghqc` keeps a small on-disk cache (per-commit file changes, issue comments/events, repo users and labels) under `~/.cache/ghqc///`. + +| Command | Description | +|---|---| +| [`ghqc cache status`](docs/cache.md) | Show cache root, total size, TTL, and per-element sizes for the current repo | +| [`ghqc cache dir`](docs/cache.md) | Print the cache directory for the current repo (or `--global` for the root) | +| [`ghqc cache remove`](docs/cache.md) | Remove cached data for the current repo, a single element, or globally | + ### Diagnostics | Command | Description | diff --git a/docs/cache.md b/docs/cache.md new file mode 100644 index 0000000..a699667 --- /dev/null +++ b/docs/cache.md @@ -0,0 +1,112 @@ +# Cache + +`ghqc` keeps a small on-disk cache of GitHub data and per-commit file-change records to speed up repeated operations. The cache lives under the system cache directory (typically `~/.cache/ghqc` on Linux/macOS via XDG; honors `XDG_CACHE_HOME`) and is namespaced per repository as `///`. + +Within a repository's cache directory, data is grouped by **element**: + +| Element | Contents | +|---|---| +| `commits` | Per-commit file-change records (drives the "commits that changed file X" list) | +| `issues` | Issue comments and events | +| `users` | Repo assignees and user details | +| `labels` | Repo labels | + +TTL defaults to 1 hour (3600s). Override with the `GHQC_CACHE_TIMEOUT` environment variable (in seconds). Some entries (issue comments/events, user details) are stored without a TTL and refresh based on GitHub-side timestamps instead. + +## Status + +```shell +ghqc cache status +``` + +Show the cache root, total size, configured TTL, and a per-element table for the current repo. + +### Example output + +``` +── Cache ───────────────────────────────────── +root: /home/user/.cache/ghqc +size: 1.4 MB (87 files) +ttl: 3600s (default; override with GHQC_CACHE_TIMEOUT) +── Repository ──────────────────────────────── +repo: A2-ai/ghqctoolkit +path: /home/user/.cache/ghqc/A2-ai/ghqctoolkit + + element size files + ------- ---- ----- + commits 612.4 KB 12 + issues 780.2 KB 64 + users 3.1 KB 2 + labels — — +``` + +When run outside a git repository, only the global section is shown. + +## Dir + +```shell +ghqc cache dir [--global] +``` + +Print the cache directory for the current repo (default) or the cache root (`--global`). Useful for piping into other tools, e.g. `du -sh "$(ghqc cache dir)"`. + +Aliases: `ghqc cache directory`. + +### Examples + +```shell +# Per-repo cache directory +ghqc cache dir +# /home/user/.cache/ghqc/A2-ai/ghqctoolkit + +# Cache root +ghqc cache dir --global +# /home/user/.cache/ghqc +``` + +When run outside a git repository, the per-repo form errors; pass `--global` instead. + +## Remove + +```shell +ghqc cache remove [ELEMENT] [--global] +``` + +Remove cached data from disk. The cache is reconstructible — entries will be re-fetched on next use — so deletion is safe. The command prints what was removed and exits 0 even if no matching entries existed. + +Aliases: `ghqc cache rm`. + +### Behavior + +| Invocation | Effect | +|---|---| +| `ghqc cache remove` | Remove the entire per-repo cache for the current repo | +| `ghqc cache remove ` | Remove just `` for the current repo | +| `ghqc cache remove --global` | Wipe the entire `ghqc` cache directory (all repos, all elements) | +| `ghqc cache remove --global` | Remove `` for **every** repo under the cache root | + +When run outside a git repository, the repo-scoped forms error; use `--global` instead. + +### Examples + +```shell +# Drop just the commits cache for this repo (e.g. after a force-push or rebase) +ghqc cache remove commits + +# Drop everything cached for this repo +ghqc cache remove + +# Drop the labels cache for every repo on this machine +ghqc cache remove labels --global + +# Wipe the entire ghqc cache +ghqc cache remove --global +``` + +### When to clear + +In normal use the cache refreshes itself on TTL expiry or when GitHub-side timestamps change. You typically only need to clear it when: + +- A `ghqc` upgrade changes how something is cached and you want the new logic applied immediately to already-cached commits/issues. +- A force-push or rebase rewrote history and the per-commit cache no longer reflects the branch. +- You're debugging unexpected behavior and want to rule out a stale cache. diff --git a/src/cache.rs b/src/cache.rs index 6f8824f..c364655 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -93,8 +93,7 @@ impl DiskCache { /// Create a new DiskCache instance using the system cache directory pub fn new(owner: String, repo: String) -> Result> { - let strategy = etcetera::choose_base_strategy()?; - let root = strategy.cache_dir().join("ghqc"); + let root = cache_root()?; let ttl = default_ttl(); Ok(Self { @@ -191,6 +190,12 @@ impl DiskCache { } } +/// Path of the on-disk cache root (`/ghqc`). +pub fn cache_root() -> Result> { + let strategy = etcetera::choose_base_strategy()?; + Ok(strategy.cache_dir().join("ghqc")) +} + /// Get the default cache TTL from environment or use 1 hour default fn default_ttl() -> Duration { let ttl_seconds = std::env::var("GHQC_CACHE_TIMEOUT") diff --git a/src/cli/cache.rs b/src/cli/cache.rs new file mode 100644 index 0000000..52293c0 --- /dev/null +++ b/src/cli/cache.rs @@ -0,0 +1,263 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::{Result, anyhow, bail}; +use clap::{Subcommand, ValueEnum}; + +use crate::cache::cache_root; +use crate::git::GitInfo; +use crate::git::GitRepository; +use crate::utils::StdEnvProvider; + +#[derive(Subcommand)] +pub enum CacheCommands { + /// Remove cached data from disk + #[command(alias = "rm")] + Remove { + /// Which cache element to clear. Omit to clear all caches for the current repo + /// (or, with --global, the entire ghqc cache directory). + #[arg(value_enum)] + element: Option, + + /// Clear across every owner/repo. With : removes that feature for every + /// repo. Without : wipes the entire ghqc cache directory. + #[arg(long)] + global: bool, + }, + /// Print the cache directory for the current repo (or --global for the root) + #[command(alias = "directory")] + Dir { + /// Print the ghqc cache root instead of the per-repo directory. + #[arg(long)] + global: bool, + }, + /// Show cache locations, sizes, and TTL settings + Status, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum CacheElement { + /// Per-commit file-change records (drives the "commits that changed file X" list). + Commits, + /// Cached issue comments and events. + Issues, + /// Repo assignees and user details. + Users, + /// Repo labels. + Labels, +} + +impl CacheElement { + fn dir_name(self) -> &'static str { + match self { + CacheElement::Commits => "commits", + CacheElement::Issues => "issues", + CacheElement::Users => "users", + CacheElement::Labels => "labels", + } + } +} + +pub fn handle_cache(cmd: CacheCommands, directory: &Path) -> Result<()> { + match cmd { + CacheCommands::Remove { element, global } => clear(element, global, directory), + CacheCommands::Dir { global } => dir(global, directory), + CacheCommands::Status => status(directory), + } +} + +fn status(directory: &Path) -> Result<()> { + let root = cache_root().map_err(|e| anyhow!("failed to resolve cache root: {e}"))?; + + println!("{}", super::section_header("Cache")); + println!("root: {}", root.display()); + if root.exists() { + let (size, files) = dir_stats(&root)?; + println!( + "size: {} ({} file{})", + format_bytes(size), + files, + if files == 1 { "" } else { "s" } + ); + } else { + println!("size: (cache root does not exist yet)"); + } + println!("ttl: {}", ttl_description()); + + println!("{}", super::section_header("Repository")); + match resolve_repo(directory) { + Ok((owner, repo)) => { + let repo_dir = root.join(&owner).join(&repo); + println!("repo: {}/{}", owner, repo); + println!("path: {}", repo_dir.display()); + if !repo_dir.exists() { + println!("(no cache entries for this repo yet)"); + } else { + println!(); + println!(" {:<10} {:>10} {:>8}", "element", "size", "files"); + println!(" {:<10} {:>10} {:>8}", "-------", "----", "-----"); + for elem in [ + CacheElement::Commits, + CacheElement::Issues, + CacheElement::Users, + CacheElement::Labels, + ] { + let p = repo_dir.join(elem.dir_name()); + let (size, files) = if p.exists() { dir_stats(&p)? } else { (0, 0) }; + let size_str = if files == 0 { + "—".to_string() + } else { + format_bytes(size) + }; + let files_str = if files == 0 { + "—".to_string() + } else { + files.to_string() + }; + println!( + " {:<10} {:>10} {:>8}", + elem.dir_name(), + size_str, + files_str + ); + } + } + } + Err(_) => { + println!("(not in a git repository — run from inside a repo for per-repo stats)"); + } + } + + Ok(()) +} + +fn dir_stats(path: &Path) -> Result<(u64, u64)> { + let mut size = 0u64; + let mut files = 0u64; + let mut stack = vec![path.to_path_buf()]; + while let Some(p) = stack.pop() { + let meta = fs::symlink_metadata(&p)?; + if meta.is_dir() { + for entry in fs::read_dir(&p)? { + stack.push(entry?.path()); + } + } else if meta.is_file() { + size += meta.len(); + files += 1; + } + } + Ok((size, files)) +} + +fn format_bytes(n: u64) -> String { + const KB: u64 = 1024; + const MB: u64 = KB * 1024; + const GB: u64 = MB * 1024; + if n >= GB { + format!("{:.2} GB", n as f64 / GB as f64) + } else if n >= MB { + format!("{:.2} MB", n as f64 / MB as f64) + } else if n >= KB { + format!("{:.1} KB", n as f64 / KB as f64) + } else { + format!("{} B", n) + } +} + +fn ttl_description() -> String { + match std::env::var("GHQC_CACHE_TIMEOUT") { + Ok(v) => format!("{}s (from GHQC_CACHE_TIMEOUT)", v), + Err(_) => "3600s (default; override with GHQC_CACHE_TIMEOUT)".to_string(), + } +} + +fn dir(global: bool, directory: &Path) -> Result<()> { + let root = cache_root().map_err(|e| anyhow!("failed to resolve cache root: {e}"))?; + let path = if global { + root + } else { + let (owner, repo) = resolve_repo(directory)?; + root.join(owner).join(repo) + }; + println!("{}", path.display()); + Ok(()) +} + +fn clear(element: Option, global: bool, directory: &Path) -> Result<()> { + let root = cache_root().map_err(|e| anyhow!("failed to resolve cache root: {e}"))?; + + let removed = match (global, element) { + (true, None) => remove_dir(&root)?.into_iter().collect::>(), + (true, Some(f)) => clear_feature_global(&root, f)?, + (false, None) => { + let (owner, repo) = resolve_repo(directory)?; + remove_dir(&root.join(&owner).join(&repo))? + .into_iter() + .collect() + } + (false, Some(f)) => { + let (owner, repo) = resolve_repo(directory)?; + remove_dir(&root.join(&owner).join(&repo).join(f.dir_name()))? + .into_iter() + .collect() + } + }; + + if removed.is_empty() { + println!("no cache entries found"); + } else { + for path in &removed { + println!("removed {}", path.display()); + } + println!( + "cleared {} cache director{}", + removed.len(), + if removed.len() == 1 { "y" } else { "ies" } + ); + } + Ok(()) +} + +fn resolve_repo(directory: &Path) -> Result<(String, String)> { + let env = StdEnvProvider; + let git_info = GitInfo::from_path(directory, &env, None).map_err(|e| { + anyhow!( + "not in a git repository (or repo info unavailable): {e}. \ + Use --global to operate on the entire ghqc cache." + ) + })?; + Ok((git_info.owner().to_string(), git_info.repo().to_string())) +} + +fn clear_feature_global(root: &Path, element: CacheElement) -> Result> { + if !root.exists() { + return Ok(Vec::new()); + } + let mut removed = Vec::new(); + for owner_entry in fs::read_dir(root)? { + let owner_path = owner_entry?.path(); + if !owner_path.is_dir() { + continue; + } + for repo_entry in fs::read_dir(&owner_path)? { + let repo_path = repo_entry?.path(); + if !repo_path.is_dir() { + continue; + } + let target = repo_path.join(element.dir_name()); + if let Some(p) = remove_dir(&target)? { + removed.push(p); + } + } + } + Ok(removed) +} + +/// Remove `path` if it exists. Returns the path if something was removed, None otherwise. +fn remove_dir(path: &Path) -> Result> { + match fs::remove_dir_all(path) { + Ok(()) => Ok(Some(path.to_path_buf())), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => bail!("failed to remove {}: {e}", path.display()), + } +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 335c7f3..72b0565 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,5 +1,6 @@ mod archive; mod auth; +pub mod cache; mod context; mod file_parser; mod interactive; @@ -27,6 +28,7 @@ pub use archive::{ MilestoneSelectionFilter, generate_archive_name, get_milestone_issue_threads, prompt_archive, }; pub use auth::{gh_auth_login, gh_auth_logout, gh_auth_status, gh_auth_token}; +pub use cache::{CacheCommands, handle_cache}; pub use context::find_issue; pub use file_parser::{ FileCommitPair, FileCommitPairParser, IssueUrlArg, IssueUrlArgParser, RelevantFileArg, diff --git a/src/git/action.rs b/src/git/action.rs index 66e6d4e..57ff6ee 100644 --- a/src/git/action.rs +++ b/src/git/action.rs @@ -187,20 +187,11 @@ impl GitCli for GitCommand { ); let mut cmd = std::process::Command::new("git"); - // --full-history disables history simplification so merge commits that - // introduce changes to the file are included (matching the prior - // tree-diff behaviour which compared every commit against all parents). - // -m causes diffs for merge commits to be computed against each parent - // individually, which is required for --full-history to detect per-file - // changes in merge commits correctly. - cmd.args([ - "-C", - &repo_path.to_string_lossy(), - "log", - "--format=%H", - "--full-history", - "-m", - ]); + // Rely on git's default history simplification: a merge that is + // TREESAME with one parent for this path is dropped, which avoids + // listing merge commits whose resolution matched a parent verbatim + // (i.e. no net content change for the file). + cmd.args(["-C", &repo_path.to_string_lossy(), "log", "--format=%H"]); if let Some(b) = branch { cmd.arg(b); } diff --git a/src/lib.rs b/src/lib.rs index cf64b8c..d3cbd35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,8 +40,8 @@ pub use auth::{ }; pub use cache::DiskCache; pub use cache::{ - CachedCommit, CachedEvents, FileChangeRecord, create_labels_if_needed, get_issue_comments, - get_issue_events, get_repo_users, + CachedCommit, CachedEvents, FileChangeRecord, cache_root, create_labels_if_needed, + get_issue_comments, get_issue_events, get_repo_users, }; pub use comment::QCComment; pub use comment_system::CommentBody; diff --git a/src/main.rs b/src/main.rs index 7b229c6..329e7d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,12 +6,12 @@ use std::path::PathBuf; use ghqctoolkit::AuthStore; use ghqctoolkit::cli::{ - FileCommitPair, FileCommitPairParser, IssueUrlArg, IssueUrlArgParser, MilestoneSelectionFilter, - RelevantFileArg, RelevantFileArgParser, confirm_rename_noninteractive, find_issue, - generate_archive_name, get_milestone_issue_threads, gh_auth_login, gh_auth_logout, - gh_auth_status, gh_auth_token, interactive_milestone_status, interactive_rename, - interactive_status, milestone_status, prompt_archive, prompt_context_files, - prompt_milestone_record, single_issue_status, + CacheCommands, FileCommitPair, FileCommitPairParser, IssueUrlArg, IssueUrlArgParser, + MilestoneSelectionFilter, RelevantFileArg, RelevantFileArgParser, + confirm_rename_noninteractive, find_issue, generate_archive_name, get_milestone_issue_threads, + gh_auth_login, gh_auth_logout, gh_auth_status, gh_auth_token, handle_cache, + interactive_milestone_status, interactive_rename, interactive_status, milestone_status, + prompt_archive, prompt_context_files, prompt_milestone_record, single_issue_status, }; use ghqctoolkit::utils::StdEnvProvider; use ghqctoolkit::{ @@ -60,6 +60,11 @@ enum Commands { #[command(subcommand)] configuration_command: ConfigurationCommands, }, + /// Cache management commands + Cache { + #[command(subcommand)] + cache_command: CacheCommands, + }, /// Authentication management commands Auth { /// GitHub host to use, e.g. github.com or https://ghe.example.com @@ -1136,6 +1141,9 @@ async fn main() -> Result<()> { println!("{}", configuration_status(&configuration, &git_info)) } }, + Commands::Cache { cache_command } => { + handle_cache(cache_command, &cli.directory)?; + } Commands::Auth { host, auth_command } => { let store = auth_store .as_ref()