diff --git a/Cargo.lock b/Cargo.lock index 1c32895..34d88d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -452,8 +452,10 @@ dependencies = [ "codebook_config", "env_logger", "fs2", + "glob", "log", "lru", + "owo-colors", "serde", "serde_json", "streaming-iterator", @@ -461,6 +463,7 @@ dependencies = [ "tempfile", "tokio", "tower-lsp", + "walkdir", ] [[package]] @@ -1398,6 +1401,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "is_ci" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1729,6 +1738,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "owo-colors" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" +dependencies = [ + "supports-color 2.1.0", + "supports-color 3.0.2", +] + [[package]] name = "parking" version = "2.2.1" @@ -2481,6 +2500,25 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + +[[package]] +name = "supports-color" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6" +dependencies = [ + "is_ci", +] + [[package]] name = "symbolic-common" version = "12.17.2" diff --git a/Cargo.toml b/Cargo.toml index 1e33f56..401d901 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ env_logger = "0.11.6" fs2 = "0.4" git2 = "0.20.0" glob = "0.3" +owo-colors = { version = "4", features = ["supports-colors"] } httpmock = "<0.9.0" lazy_static = "1.5.0" log = "0.4.22" diff --git a/README.md b/README.md index 074e96f..908283b 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,26 @@ Any editor that implements the Language Server Protocol should be compatible wit codebook-lsp serve ``` +### CLI (Lint) + +Codebook can also be used as a standalone command-line spell checker, which is useful for CI pipelines, pre-commit hooks, or one-off checks. + +```sh +# Check specific files +codebook-lsp lint src/main.rs src/lib.rs + +# Check all files in a directory (recursive) +codebook-lsp lint src/ + +# Show spelling suggestions +codebook-lsp lint --suggest src/ + +# Only report each misspelled word once across all files +codebook-lsp lint --unique src/ +``` + +The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there were unreadable files, invalid UTF-8, etc. + ## About Codebook is a spell checker for code. It binds together the venerable Tree Sitter and the fast spell checker [Spellbook](https://github.com/helix-editor/spellbook). Included is a Language Server for use in (theoretically) any editor. Everything is done in Rust to keep response times snappy and memory usage _low_. diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index 7c2a99a..4b30dc5 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -28,6 +28,9 @@ env_logger.workspace = true fs2.workspace = true log.workspace = true lru.workspace = true +glob.workspace = true +owo-colors.workspace = true +walkdir.workspace = true serde.workspace = true serde_json.workspace = true string-offsets.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs new file mode 100644 index 0000000..3a1b1cc --- /dev/null +++ b/crates/codebook-lsp/src/lint.rs @@ -0,0 +1,396 @@ +use codebook::Codebook; +use codebook_config::CodebookConfigFile; +use owo_colors::{OwoColorize, Stream, Style}; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use string_offsets::{AllConfig, StringOffsets}; + +const BOLD: Style = Style::new().bold(); +const DIM: Style = Style::new().dimmed(); +const YELLOW: Style = Style::new().yellow(); +const BOLD_RED: Style = Style::new().bold().red(); + +macro_rules! err { + ($($arg:tt)*) => { + eprintln!( + "{} {}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + format_args!($($arg)*) + ) + }; +} + +macro_rules! paint { + ($val:expr, $stream:expr, $style:expr) => { + $val.if_supports_color($stream, |t| t.style($style)) + }; +} + +fn fatal(msg: impl std::fmt::Display) -> ! { + err!("{msg}"); + std::process::exit(2); +} + +/// Computes a workspace-relative path string for a given file. Falls back to +/// the absolute path if the file is outside the workspace or canonicalization +/// fails. `root_canonical` should be the already-canonicalized workspace root. +fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { + root_canonical + .and_then(|root| { + let canon = path.canonicalize().ok()?; + canon + .strip_prefix(root) + .ok() + .map(|rel| rel.to_string_lossy().into_owned()) + }) + .unwrap_or_else(|| path.to_string_lossy().into_owned()) +} + +/// Returns `true` if any spelling errors were found. +/// +/// Exits with code 2 if infrastructure failures occurred (unreadable files, +/// directory errors, unmatched or invalid patterns). +pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { + let config = Arc::new( + CodebookConfigFile::load(Some(root)) + .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"))), + ); + + print_config_source(&config); + eprintln!(); + + let codebook = Codebook::new(config.clone()) + .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); + + // Canonicalize the root once here rather than once per file. + let root_canonical = root.canonicalize().ok(); + + let (resolved, mut had_failure) = resolve_paths(files, root); + + let mut seen_words: HashSet = HashSet::new(); + let mut total_errors = 0usize; + let mut files_with_errors = 0usize; + + for path in &resolved { + let relative = relative_to_root(root_canonical.as_deref(), path); + let (errors, file_failure) = + check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); + had_failure |= file_failure; + if errors > 0 { + total_errors += errors; + files_with_errors += 1; + } + } + + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {} {unique_label}spelling error(s) in {} file(s).", + paint!(total_errors, Stream::Stderr, BOLD), + paint!(files_with_errors, Stream::Stderr, BOLD), + ); + + if had_failure { + std::process::exit(2); + } + + total_errors > 0 +} + +/// Spell-checks a single file and prints any diagnostics to stdout. +/// +/// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was +/// clean; `had_io_error` is true when the file could not be read. `relative` is +/// the workspace-relative path used for display and ignore matching. +fn check_file( + path: &Path, + relative: &str, + codebook: &Codebook, + seen_words: &mut HashSet, + unique: bool, + suggest: bool, +) -> (usize, bool) { + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(e) => { + err!("{}: {e}", path.display()); + return (0, true); + } + }; + + let display = relative.strip_prefix("./").unwrap_or(relative); + + // Build the offset table once per file + let offsets = StringOffsets::::new(&text); + let mut locations = codebook.spell_check(&text, None, Some(relative)); + // Sort inner locations first (HashSet iteration order is nondeterministic), + // then sort the outer list by first occurrence in the file. + for wl in &mut locations { + wl.locations.sort_by_key(|r| r.start_byte); + } + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + + // Collect hits first so we can compute pad_len for column alignment. The + // unique check is per-word, so all ranges of a word are included or skipped + // together. + let mut hits: Vec<(String, &str, Option>)> = Vec::new(); + for wl in &locations { + if unique && !seen_words.insert(wl.word.to_lowercase()) { + continue; + } + + let mut suggestions = if suggest { + codebook.get_suggestions(wl.word.as_str()) + } else { + None + }; + + // If unique mode: Only emit the first occurrence of each word. + let ranges = if unique { + &wl.locations[..1] + } else { + &wl.locations[..] + }; + + for (i, range) in ranges.iter().enumerate() { + // utf8_to_char_pos returns 0-based line and Unicode-char column. + let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); + + // Move out of `suggestions` on the last iteration to avoid a clone. + let sugg = if i + 1 < ranges.len() { + suggestions.clone() + } else { + suggestions.take() + }; + + hits.push(( + format!("{}:{}", pos.line + 1, pos.col + 1), + wl.word.as_str(), + sugg, + )); + } + } + + if hits.is_empty() { + return (0, false); + } + + let pad_len = hits.iter().map(|(lc, _, _)| lc.len()).max().unwrap_or(0); + + println!( + "{}", + display.if_supports_color(Stream::Stdout, |t| t.style(BOLD)) + ); + for (linecol, word, suggestions) in &hits { + let pad = " ".repeat(pad_len - linecol.len()); + print!( + " {}:{}{} {}", + paint!(display, Stream::Stdout, DIM), + paint!(linecol, Stream::Stdout, YELLOW), + pad, + paint!(word, Stream::Stdout, BOLD_RED), + ); + if let Some(s) = suggestions { + let text = format!("→ {}", s.join(", ")); + println!(" {}", paint!(text, Stream::Stdout, DIM)); + } else { + println!(); + } + } + println!(); + + (hits.len(), false) +} + +/// Prints which config file is being used, or notes that the default is active. +fn print_config_source(config: &CodebookConfigFile) { + let cwd = std::env::current_dir().unwrap_or_default(); + let (label, path) = match ( + config.project_config_path().filter(|p| p.is_file()), + config.global_config_path().filter(|p| p.is_file()), + ) { + (Some(p), _) => ("using config", p), + (None, Some(g)) => ("using global config", g), + (None, None) => { + eprintln!("No config found, using default config"); + return; + } + }; + let display = path + .strip_prefix(&cwd) + .unwrap_or(&path) + .display() + .to_string(); + eprintln!( + "{label} {}", + display.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); +} + +/// Resolves a mix of file paths, directories, and glob patterns into a sorted, +/// deduplicated list of file paths. Non-absolute patterns are resolved relative +/// to `root`. `Path::join` replaces the base when the argument is absolute, so +/// no explicit `is_absolute` check is needed. +/// +/// Returns `(paths, had_failure)`. `had_failure` is true for unmatched +/// patterns, invalid globs, or glob I/O errors. +fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { + let mut paths = Vec::new(); + let mut had_failure = false; + + for pattern in patterns { + // root.join() is a no-op when pattern is absolute + let p = root.join(pattern); + if p.is_dir() { + had_failure |= collect_dir(&p, &mut paths); + } else { + let pattern_str = p.to_string_lossy(); + match glob::glob(&pattern_str) { + Ok(entries) => { + let mut matched = false; + for entry in entries { + match entry { + Ok(e) if e.is_file() => { + paths.push(e); + matched = true; + } + Ok(e) if e.is_dir() => { + had_failure |= collect_dir(&e, &mut paths); + matched = true; + } + Ok(_) => {} + Err(e) => { + err!("failed to read glob entry: {e}"); + had_failure = true; + } + } + } + if !matched { + err!("no match for '{pattern_str}'"); + had_failure = true; + } + } + Err(e) => { + err!("invalid pattern '{pattern_str}': {e}"); + had_failure = true; + } + } + } + } + + paths.sort(); + paths.dedup(); + (paths, had_failure) +} + +/// Recursively collects all files under `dir` into `out`. Returns `true` if any +/// directory-entry I/O error occurred. +fn collect_dir(dir: &Path, out: &mut Vec) -> bool { + let mut had_failure = false; + for entry in walkdir::WalkDir::new(dir).follow_links(false) { + match entry { + Ok(e) if e.file_type().is_file() => out.push(e.into_path()), + Ok(_) => {} + Err(e) => { + err!( + "failed to read directory entry under '{}': {e}", + dir.display() + ); + had_failure = true; + } + } + } + had_failure +} + +#[cfg(test)] +mod tests { + use super::*; + use codebook::Codebook; + use codebook_config::CodebookConfigMemory; + use std::collections::HashSet; + use std::fs; + use std::sync::Arc; + use tempfile::tempdir; + + #[test] + fn test_path_and_dir_resolution() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("sub"); + fs::create_dir_all(&sub).unwrap(); + + let f1 = dir.path().join("a.rs"); + let f2 = sub.join("b.txt"); + fs::write(&f1, "").unwrap(); + fs::write(&f2, "").unwrap(); + + let root_canon = dir.path().canonicalize().unwrap(); + assert_eq!(relative_to_root(Some(&root_canon), &f1), "a.rs"); + + let pattern = format!("{}/**/*.*", dir.path().display()); + let (paths, err) = resolve_paths(&[pattern], dir.path()); + + assert!(!err); + assert_eq!(paths.len(), 2); + let path_strs: HashSet<_> = paths.iter().map(|p| p.to_string_lossy()).collect(); + assert!(path_strs.iter().any(|s| s.ends_with("a.rs"))); + assert!(path_strs.iter().any(|s| s.ends_with("b.txt"))); + + let (_, err_missing) = resolve_paths(&["nonexistent.rs".into()], dir.path()); + assert!(err_missing); + } + + #[test] + fn test_check_file_logic() { + let dir = tempdir().unwrap(); + let f = dir.path().join("test.txt"); + fs::write(&f, "actualbad\n🦀 actualbad").unwrap(); + + let cb = Codebook::new(Arc::new(CodebookConfigMemory::default())).unwrap(); + let mut seen = HashSet::new(); + + // Test basic flagging and multi-occurrence counting + let (count, err) = check_file(&f, "test.txt", &cb, &mut seen, false, false); + assert_eq!(count, 2); + assert!(!err); + + // Test unique mode + let mut seen_unique = HashSet::new(); + let (c1, _) = check_file(&f, "f1.txt", &cb, &mut seen_unique, true, false); + let (c2, _) = check_file(&f, "f2.txt", &cb, &mut seen_unique, true, false); + assert_eq!(c1, 1, "Should flag word once"); + assert_eq!(c2, 0, "Should skip already-seen word in second file"); + + // Test IO failure + let (_, err_io) = check_file( + &dir.path().join("missing"), + "!", + &cb, + &mut seen, + false, + false, + ); + assert!(err_io); + } + + #[test] + fn test_unicode_line_col() { + let cases = [ + ("actualbad", 0, 1, 1), // Start + ("ok\nactualbad", 3, 2, 1), // Newline + ("résumé actualbad", 9, 1, 8), // Multi-byte chars (é is 2 bytes) + ("🦀 actualbad", 5, 1, 3), // Emoji (4 bytes, 1 char) + ]; + + for (text, offset, line, col) in cases { + let table = StringOffsets::::new(text); + let pos = table.utf8_to_char_pos(offset); + assert_eq!( + (pos.line + 1, pos.col + 1), + (line, col), + "Failed on: {}", + text + ); + } + } +} diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs index bc055ee..449c174 100644 --- a/crates/codebook-lsp/src/main.rs +++ b/crates/codebook-lsp/src/main.rs @@ -1,5 +1,6 @@ mod file_cache; mod init_options; +mod lint; mod lsp; mod lsp_logger; @@ -30,14 +31,29 @@ enum Commands { Serve {}, /// Remove server cache Clean {}, + /// Check files for spelling errors + Lint { + /// Files or glob patterns to spell-check + #[arg(required = true)] + files: Vec, + /// Only report each misspelled word once, ignoring duplicates across files + #[arg(short = 'u', long)] + unique: bool, + /// Show spelling suggestions for each misspelled word + #[arg(short = 's', long)] + suggest: bool, + }, } #[tokio::main(flavor = "current_thread")] async fn main() { // Initialize logger early with stderr output and buffering - // Default to INFO level, will be adjusted when LSP client connects + // Default to INFO for LSP, WARN for lint (to suppress LSP-oriented noise) + let is_lint = std::env::args().nth(1).as_deref() == Some("lint"); let log_level = match env::var("RUST_LOG").as_deref() { Ok("debug") => LevelFilter::Debug, + Ok("info") => LevelFilter::Info, + _ if is_lint => LevelFilter::Warn, _ => LevelFilter::Info, }; LspLogger::init_early(log_level).expect("Failed to initialize early logger"); @@ -58,6 +74,15 @@ async fn main() { info!("Cleaning: {:?}", config.cache_dir); config.clean_cache() } + Some(Commands::Lint { + files, + unique, + suggest, + }) => { + if lint::run_lint(files, root, *unique, *suggest) { + std::process::exit(1); + } + } None => {} } }