From bee444a3c9d6c201997a2d0eb1922b19286a4d9d Mon Sep 17 00:00:00 2001 From: niekdomi Date: Wed, 25 Feb 2026 20:13:37 +0100 Subject: [PATCH 1/8] Added cli --- Cargo.lock | 2 + crates/codebook-lsp/Cargo.toml | 2 + crates/codebook-lsp/src/lint.rs | 215 ++++++++++++++++++++++++++++++++ crates/codebook-lsp/src/main.rs | 15 +++ 4 files changed, 234 insertions(+) create mode 100644 crates/codebook-lsp/src/lint.rs diff --git a/Cargo.lock b/Cargo.lock index a481ac85..2f3d8d55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -427,6 +427,7 @@ dependencies = [ "codebook_config", "env_logger", "fs2", + "glob", "log", "lru", "serde", @@ -436,6 +437,7 @@ dependencies = [ "tempfile", "tokio", "tower-lsp", + "walkdir", ] [[package]] diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index b621344e..27d1a11a 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -28,6 +28,8 @@ env_logger.workspace = true fs2.workspace = true log.workspace = true lru.workspace = true +glob.workspace = true +walkdir.workspace = true serde.workspace = true serde_json.workspace = true string-offsets.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs new file mode 100644 index 00000000..3ecd5ab4 --- /dev/null +++ b/crates/codebook-lsp/src/lint.rs @@ -0,0 +1,215 @@ +use codebook::Codebook; +use codebook_config::{CodebookConfig, CodebookConfigFile}; +use std::collections::HashSet; +use std::io::{IsTerminal, Write}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +struct Styles { + bold: &'static str, + dim: &'static str, + yellow: &'static str, + bold_red: &'static str, + reset: &'static str, +} + +const STYLES_ON: Styles = Styles { + bold: "\x1b[1m", + dim: "\x1b[2m", + yellow: "\x1b[33m", + bold_red: "\x1b[1;31m", + reset: "\x1b[0m", +}; + +const STYLES_OFF: Styles = Styles { + bold: "", + dim: "", + yellow: "", + bold_red: "", + reset: "", +}; + +fn styles(is_terminal: bool) -> &'static Styles { + if is_terminal && std::env::var_os("NO_COLOR").is_none() { + &STYLES_ON + } else { + &STYLES_OFF + } +} + +fn fatal(msg: impl std::fmt::Display, s: &Styles) -> ! { + eprintln!("{}error:{} {msg}", s.bold_red, s.reset); + std::process::exit(2); +} + +pub fn run_lint(files: &[String], root: &Path, unique: bool) -> bool { + let out = styles(std::io::stdout().is_terminal()); + let err = styles(std::io::stderr().is_terminal()); + + let config = Arc::new( + CodebookConfigFile::load(Some(root)) + .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"), err)), + ); + + print_config_source(&config, err); + + let codebook = Codebook::new(config.clone()) + .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"), err)); + + let resolved = resolve_paths(files); + if resolved.is_empty() { + fatal("no files matched the given patterns", err); + } + + let mut seen_words: HashSet = HashSet::new(); + let mut total_errors = 0usize; + let mut files_with_errors = 0usize; + + for path in &resolved { + if config.should_ignore_path(path) { + continue; + } + + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(e) => { + eprintln!( + "{}error:{} {}: {e}", + err.bold_red, + err.reset, + path.display() + ); + continue; + } + }; + + let mut locations = codebook.spell_check(&text, None, path.to_str()); + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + + let hits: Vec<(usize, usize, &str)> = locations + .iter() + .flat_map(|wl| wl.locations.iter().map(move |r| (wl, r))) + .filter_map(|(wl, range)| { + if unique && !seen_words.insert(wl.word.to_lowercase()) { + return None; + } + let (line, col) = byte_offset_to_line_col(&text, range.start_byte); + Some((line, col, wl.word.as_str())) + }) + .collect(); + + if hits.is_empty() { + continue; + } + + let raw = path.to_string_lossy(); + let display = raw.strip_prefix("./").unwrap_or(&raw); + let pad_len = hits + .iter() + .map(|(l, c, _)| format!("{l}:{c}").len()) + .max() + .unwrap_or(0); + + println!("{}{display}{}", out.bold, out.reset); + for (line, col, word) in &hits { + let loc = format!("{line}:{col}"); + println!( + " {}{display}{}:{}{loc}{}{pad} {}{}{}", + out.dim, + out.reset, + out.yellow, + out.reset, + out.bold_red, + word, + out.reset, + pad = " ".repeat(pad_len - loc.len()), + ); + } + println!(); + + total_errors += hits.len(); + files_with_errors += 1; + } + + if total_errors > 0 { + let _ = std::io::stdout().flush(); + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {}{total_errors}{} {unique_label}spelling error(s) in {}{files_with_errors}{} file(s).", + err.bold, err.reset, err.bold, err.reset, + ); + } + + total_errors > 0 +} + +fn print_config_source(config: &CodebookConfigFile, s: &Styles) { + let cwd = std::env::current_dir().unwrap_or_default(); + match ( + config.project_config_path().filter(|p| p.is_file()), + config.global_config_path().filter(|p| p.is_file()), + ) { + (Some(p), _) => eprintln!( + "using config {}{}{}", + s.dim, + p.strip_prefix(&cwd).unwrap_or(&p).display(), + s.reset + ), + (None, Some(g)) => eprintln!( + "using global config {}{}{}", + s.dim, + g.strip_prefix(&cwd).unwrap_or(&g).display(), + s.reset + ), + (None, None) => eprintln!("using default config"), + } +} + +fn resolve_paths(patterns: &[String]) -> Vec { + let mut paths = Vec::new(); + for pattern in patterns { + let p = PathBuf::from(pattern); + if p.is_dir() { + collect_dir(&p, &mut paths); + } else { + match glob::glob(pattern) { + Ok(entries) => { + let mut matched = false; + for entry in entries.flatten() { + if entry.is_file() { + paths.push(entry); + matched = true; + } else if entry.is_dir() { + collect_dir(&entry, &mut paths); + matched = true; + } + } + if !matched { + eprintln!("codebook: no match for '{pattern}'"); + } + } + Err(e) => eprintln!("codebook: invalid pattern '{pattern}': {e}"), + } + } + } + paths.sort(); + paths.dedup(); + paths +} + +fn collect_dir(dir: &Path, out: &mut Vec) { + walkdir::WalkDir::new(dir) + .follow_links(false) + .into_iter() + .flatten() + .filter(|e| e.file_type().is_file()) + .for_each(|e| out.push(e.into_path())); +} + +fn byte_offset_to_line_col(text: &str, byte_offset: usize) -> (usize, usize) { + let offset = byte_offset.min(text.len()); + let before = &text[..offset]; + let line = before.bytes().filter(|&b| b == b'\n').count() + 1; + let col = before.rfind('\n').map(|p| offset - p).unwrap_or(offset + 1); + (line, col) +} diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs index bc055ee2..c6898c49 100644 --- a/crates/codebook-lsp/src/main.rs +++ b/crates/codebook-lsp/src/main.rs @@ -1,5 +1,6 @@ mod file_cache; mod init_options; +mod lint; mod lsp; mod lsp_logger; @@ -30,6 +31,15 @@ enum Commands { Serve {}, /// Remove server cache Clean {}, + /// Check files for spelling errors + Lint { + /// Files or glob patterns to spell-check + #[arg(required = true)] + files: Vec, + /// Only report each misspelled word once, ignoring duplicates across files + #[arg(short = 'u', long)] + unique: bool, + }, } #[tokio::main(flavor = "current_thread")] @@ -58,6 +68,11 @@ async fn main() { info!("Cleaning: {:?}", config.cache_dir); config.clean_cache() } + Some(Commands::Lint { files, unique }) => { + if lint::run_lint(files, root, *unique) { + std::process::exit(1); + } + } None => {} } } From 3a4b0d8cedbf5c7ee532d3a597609637dd53cdf4 Mon Sep 17 00:00:00 2001 From: niekdomi Date: Wed, 4 Mar 2026 18:56:01 +0100 Subject: [PATCH 2/8] improved the cli integration Replaces the manual ansi escape approach with owo-colors, which handles NO_COLOR and windows automatically. The code is also way better in general than my previous approach... --- Cargo.lock | 36 +++++ Cargo.toml | 1 + crates/codebook-lsp/Cargo.toml | 1 + crates/codebook-lsp/src/lint.rs | 261 ++++++++++++++++---------------- crates/codebook-lsp/src/main.rs | 16 +- 5 files changed, 185 insertions(+), 130 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 946d94e4..6db3a140 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -430,6 +430,7 @@ dependencies = [ "glob", "log", "lru", + "owo-colors", "serde", "serde_json", "streaming-iterator", @@ -1374,6 +1375,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "is_ci" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1705,6 +1712,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "owo-colors" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" +dependencies = [ + "supports-color 2.1.0", + "supports-color 3.0.2", +] + [[package]] name = "parking" version = "2.2.1" @@ -2451,6 +2468,25 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + +[[package]] +name = "supports-color" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6" +dependencies = [ + "is_ci", +] + [[package]] name = "symbolic-common" version = "12.17.2" diff --git a/Cargo.toml b/Cargo.toml index 09220040..0ad3137a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ env_logger = "0.11.6" fs2 = "0.4" git2 = "0.20.0" glob = "0.3" +owo-colors = { version = "4", features = ["supports-colors"] } httpmock = "<0.9.0" lazy_static = "1.5.0" log = "0.4.22" diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml index 3398f960..114e3fd7 100644 --- a/crates/codebook-lsp/Cargo.toml +++ b/crates/codebook-lsp/Cargo.toml @@ -29,6 +29,7 @@ fs2.workspace = true log.workspace = true lru.workspace = true glob.workspace = true +owo-colors.workspace = true walkdir.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 3ecd5ab4..12c9e1a6 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -1,170 +1,185 @@ use codebook::Codebook; -use codebook_config::{CodebookConfig, CodebookConfigFile}; +use codebook_config::CodebookConfigFile; +use owo_colors::{OwoColorize, Stream, Style}; use std::collections::HashSet; -use std::io::{IsTerminal, Write}; use std::path::{Path, PathBuf}; use std::sync::Arc; -struct Styles { - bold: &'static str, - dim: &'static str, - yellow: &'static str, - bold_red: &'static str, - reset: &'static str, -} - -const STYLES_ON: Styles = Styles { - bold: "\x1b[1m", - dim: "\x1b[2m", - yellow: "\x1b[33m", - bold_red: "\x1b[1;31m", - reset: "\x1b[0m", -}; - -const STYLES_OFF: Styles = Styles { - bold: "", - dim: "", - yellow: "", - bold_red: "", - reset: "", -}; - -fn styles(is_terminal: bool) -> &'static Styles { - if is_terminal && std::env::var_os("NO_COLOR").is_none() { - &STYLES_ON - } else { - &STYLES_OFF - } -} +const BOLD: Style = Style::new().bold(); +const DIM: Style = Style::new().dimmed(); +const YELLOW: Style = Style::new().yellow(); +const BOLD_RED: Style = Style::new().bold().red(); -fn fatal(msg: impl std::fmt::Display, s: &Styles) -> ! { - eprintln!("{}error:{} {msg}", s.bold_red, s.reset); +fn fatal(msg: impl std::fmt::Display) -> ! { + eprintln!( + "{} {msg}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); std::process::exit(2); } -pub fn run_lint(files: &[String], root: &Path, unique: bool) -> bool { - let out = styles(std::io::stdout().is_terminal()); - let err = styles(std::io::stderr().is_terminal()); - +/// Returns `true` if any spelling errors were found. +pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { let config = Arc::new( CodebookConfigFile::load(Some(root)) - .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"), err)), + .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"))), ); - print_config_source(&config, err); + print_config_source(&config); + eprintln!(); let codebook = Codebook::new(config.clone()) - .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"), err)); + .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); let resolved = resolve_paths(files); - if resolved.is_empty() { - fatal("no files matched the given patterns", err); - } let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; let mut files_with_errors = 0usize; for path in &resolved { - if config.should_ignore_path(path) { - continue; + let error_count = check_file(path, &codebook, &mut seen_words, unique, suggest); + if error_count > 0 { + total_errors += error_count; + files_with_errors += 1; } + } - let text = match std::fs::read_to_string(path) { - Ok(t) => t, - Err(e) => { - eprintln!( - "{}error:{} {}: {e}", - err.bold_red, - err.reset, - path.display() - ); - continue; - } - }; + if total_errors > 0 { + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {} {unique_label}spelling error(s) in {} file(s).", + total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + ); + } - let mut locations = codebook.spell_check(&text, None, path.to_str()); - locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + total_errors > 0 +} - let hits: Vec<(usize, usize, &str)> = locations - .iter() - .flat_map(|wl| wl.locations.iter().map(move |r| (wl, r))) - .filter_map(|(wl, range)| { - if unique && !seen_words.insert(wl.word.to_lowercase()) { - return None; - } - let (line, col) = byte_offset_to_line_col(&text, range.start_byte); - Some((line, col, wl.word.as_str())) - }) - .collect(); +/// Spell-checks a single file and prints any diagnostics to stdout. +/// +/// Returns the number of errors found (0 if the file was clean or unreadable). +fn check_file( + path: &Path, + codebook: &Codebook, + seen_words: &mut HashSet, + unique: bool, + suggest: bool, +) -> usize { + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(e) => { + eprintln!( + "{} {}: {e}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + path.display() + ); + return 0; + } + }; + + let raw = path.to_string_lossy(); + let display = raw.strip_prefix("./").unwrap_or(&raw); + + let mut locations = codebook.spell_check(&text, None, path.to_str()); + // Sort by first occurrence in the file. + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); - if hits.is_empty() { + // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment. + // unique check is per-word (outer loop) so all ranges of a word are included or skipped together. + let mut hits: Vec<(String, &str, Option>)> = Vec::new(); + for wl in &locations { + if unique && !seen_words.insert(wl.word.to_lowercase()) { continue; } - let raw = path.to_string_lossy(); - let display = raw.strip_prefix("./").unwrap_or(&raw); - let pad_len = hits - .iter() - .map(|(l, c, _)| format!("{l}:{c}").len()) - .max() - .unwrap_or(0); - - println!("{}{display}{}", out.bold, out.reset); - for (line, col, word) in &hits { - let loc = format!("{line}:{col}"); - println!( - " {}{display}{}:{}{loc}{}{pad} {}{}{}", - out.dim, - out.reset, - out.yellow, - out.reset, - out.bold_red, - word, - out.reset, - pad = " ".repeat(pad_len - loc.len()), - ); + let suggestions = if suggest { + codebook.get_suggestions(wl.word.as_str()) + } else { + None + }; + + for range in &wl.locations { + let before = &text[..range.start_byte.min(text.len())]; + let line = before.bytes().filter(|&b| b == b'\n').count() + 1; + let col = before + .rfind('\n') + .map(|p| before.len() - p) + .unwrap_or(before.len() + 1); + hits.push(( + format!("{line}:{col}"), + wl.word.as_str(), + suggestions.clone(), + )); } - println!(); + } - total_errors += hits.len(); - files_with_errors += 1; + if hits.is_empty() { + return 0; } - if total_errors > 0 { - let _ = std::io::stdout().flush(); - let unique_label = if unique { "unique " } else { "" }; - eprintln!( - "Found {}{total_errors}{} {unique_label}spelling error(s) in {}{files_with_errors}{} file(s).", - err.bold, err.reset, err.bold, err.reset, + let pad_len = hits + .iter() + .map(|(linecol, _, _)| linecol.len()) + .max() + .unwrap_or(0); + + println!( + "{}", + display.if_supports_color(Stream::Stdout, |t| t.style(BOLD)) + ); + for (linecol, word, suggestions) in &hits { + let pad = " ".repeat(pad_len - linecol.len()); + print!( + " {}:{}{} {}", + display.if_supports_color(Stream::Stdout, |t| t.style(DIM)), + linecol.if_supports_color(Stream::Stdout, |t| t.style(YELLOW)), + pad, + word.if_supports_color(Stream::Stdout, |t| t.style(BOLD_RED)), ); + if let Some(suggestions) = suggestions { + println!( + " {}", + format!("→ {}", suggestions.join(", ")) + .if_supports_color(Stream::Stdout, |t| t.style(DIM)), + ); + } else { + println!(); + } } + println!(); - total_errors > 0 + hits.len() } -fn print_config_source(config: &CodebookConfigFile, s: &Styles) { +/// Prints which config file is being used, or notes that the default is active. +fn print_config_source(config: &CodebookConfigFile) { let cwd = std::env::current_dir().unwrap_or_default(); match ( config.project_config_path().filter(|p| p.is_file()), config.global_config_path().filter(|p| p.is_file()), ) { - (Some(p), _) => eprintln!( - "using config {}{}{}", - s.dim, - p.strip_prefix(&cwd).unwrap_or(&p).display(), - s.reset - ), - (None, Some(g)) => eprintln!( - "using global config {}{}{}", - s.dim, - g.strip_prefix(&cwd).unwrap_or(&g).display(), - s.reset - ), - (None, None) => eprintln!("using default config"), + (Some(p), _) => { + let path = p.strip_prefix(&cwd).unwrap_or(&p).display().to_string(); + eprintln!( + "using config {}", + path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); + } + (None, Some(g)) => { + let path = g.strip_prefix(&cwd).unwrap_or(&g).display().to_string(); + eprintln!( + "using global config {}", + path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); + } + (None, None) => eprintln!("No config found, using default config"), } } +/// Resolves a mix of file paths, directories, and glob patterns into a sorted, +/// deduplicated list of file paths. fn resolve_paths(patterns: &[String]) -> Vec { let mut paths = Vec::new(); for pattern in patterns { @@ -205,11 +220,3 @@ fn collect_dir(dir: &Path, out: &mut Vec) { .filter(|e| e.file_type().is_file()) .for_each(|e| out.push(e.into_path())); } - -fn byte_offset_to_line_col(text: &str, byte_offset: usize) -> (usize, usize) { - let offset = byte_offset.min(text.len()); - let before = &text[..offset]; - let line = before.bytes().filter(|&b| b == b'\n').count() + 1; - let col = before.rfind('\n').map(|p| offset - p).unwrap_or(offset + 1); - (line, col) -} diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs index c6898c49..449c174a 100644 --- a/crates/codebook-lsp/src/main.rs +++ b/crates/codebook-lsp/src/main.rs @@ -39,15 +39,21 @@ enum Commands { /// Only report each misspelled word once, ignoring duplicates across files #[arg(short = 'u', long)] unique: bool, + /// Show spelling suggestions for each misspelled word + #[arg(short = 's', long)] + suggest: bool, }, } #[tokio::main(flavor = "current_thread")] async fn main() { // Initialize logger early with stderr output and buffering - // Default to INFO level, will be adjusted when LSP client connects + // Default to INFO for LSP, WARN for lint (to suppress LSP-oriented noise) + let is_lint = std::env::args().nth(1).as_deref() == Some("lint"); let log_level = match env::var("RUST_LOG").as_deref() { Ok("debug") => LevelFilter::Debug, + Ok("info") => LevelFilter::Info, + _ if is_lint => LevelFilter::Warn, _ => LevelFilter::Info, }; LspLogger::init_early(log_level).expect("Failed to initialize early logger"); @@ -68,8 +74,12 @@ async fn main() { info!("Cleaning: {:?}", config.cache_dir); config.clean_cache() } - Some(Commands::Lint { files, unique }) => { - if lint::run_lint(files, root, *unique) { + Some(Commands::Lint { + files, + unique, + suggest, + }) => { + if lint::run_lint(files, root, *unique, *suggest) { std::process::exit(1); } } From e21a221079acb9b7e282b6c0a079ec7a05721d5a Mon Sep 17 00:00:00 2001 From: niekdomi Date: Wed, 4 Mar 2026 19:21:50 +0100 Subject: [PATCH 3/8] improve --- crates/codebook-lsp/src/lint.rs | 59 +++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 12c9e1a6..565e4ae2 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -18,6 +18,22 @@ fn fatal(msg: impl std::fmt::Display) -> ! { std::process::exit(2); } +/// Computes a workspace-relative path string for a given file. Falls back to +/// the absolute path if the file is outside the workspace or canonicalization fails. +fn relative_to_root(root: &Path, path: &Path) -> String { + let root_canonical = match root.canonicalize() { + Ok(r) => r, + Err(_) => return path.to_string_lossy().to_string(), + }; + match path.canonicalize() { + Ok(canon) => match canon.strip_prefix(&root_canonical) { + Ok(rel) => rel.to_string_lossy().to_string(), + Err(_) => path.to_string_lossy().to_string(), + }, + Err(_) => path.to_string_lossy().to_string(), + } +} + /// Returns `true` if any spelling errors were found. pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { let config = Arc::new( @@ -31,28 +47,27 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let codebook = Codebook::new(config.clone()) .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); - let resolved = resolve_paths(files); + let resolved = resolve_paths(files, root); let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; let mut files_with_errors = 0usize; for path in &resolved { - let error_count = check_file(path, &codebook, &mut seen_words, unique, suggest); + let relative = relative_to_root(root, path); + let error_count = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); if error_count > 0 { total_errors += error_count; files_with_errors += 1; } } - if total_errors > 0 { - let unique_label = if unique { "unique " } else { "" }; - eprintln!( - "Found {} {unique_label}spelling error(s) in {} file(s).", - total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), - files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), - ); - } + let unique_label = if unique { "unique " } else { "" }; + eprintln!( + "Found {} {unique_label}spelling error(s) in {} file(s).", + total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + ); total_errors > 0 } @@ -60,8 +75,10 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b /// Spell-checks a single file and prints any diagnostics to stdout. /// /// Returns the number of errors found (0 if the file was clean or unreadable). +/// `relative` is the workspace-relative path used for display and ignore matching. fn check_file( path: &Path, + relative: &str, codebook: &Codebook, seen_words: &mut HashSet, unique: bool, @@ -79,10 +96,9 @@ fn check_file( } }; - let raw = path.to_string_lossy(); - let display = raw.strip_prefix("./").unwrap_or(&raw); + let display = relative.strip_prefix("./").unwrap_or(relative); - let mut locations = codebook.spell_check(&text, None, path.to_str()); + let mut locations = codebook.spell_check(&text, None, Some(relative)); // Sort by first occurrence in the file. locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); @@ -100,7 +116,14 @@ fn check_file( None }; - for range in &wl.locations { + // In unique mode only emit the first occurrence of each word + let ranges = if unique { + &wl.locations[..1] + } else { + &wl.locations[..] + }; + + for range in ranges { let before = &text[..range.start_byte.min(text.len())]; let line = before.bytes().filter(|&b| b == b'\n').count() + 1; let col = before @@ -179,15 +202,17 @@ fn print_config_source(config: &CodebookConfigFile) { } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. -fn resolve_paths(patterns: &[String]) -> Vec { +/// deduplicated list of file paths. Non-absolute patterns are resolved relative to root. +fn resolve_paths(patterns: &[String], root: &Path) -> Vec { let mut paths = Vec::new(); for pattern in patterns { let p = PathBuf::from(pattern); + let p = if p.is_absolute() { p } else { root.join(&p) }; if p.is_dir() { collect_dir(&p, &mut paths); } else { - match glob::glob(pattern) { + let pattern = p.to_string_lossy(); + match glob::glob(&pattern) { Ok(entries) => { let mut matched = false; for entry in entries.flatten() { From 87f77c8cef8bd09ecb226ee1684ed03ab4aa508d Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Sat, 21 Mar 2026 15:09:00 +0100 Subject: [PATCH 4/8] apply review --- crates/codebook-lsp/src/lint.rs | 145 +++++++++++++++++++++++--------- 1 file changed, 104 insertions(+), 41 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 565e4ae2..9a39560b 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -4,6 +4,7 @@ use owo_colors::{OwoColorize, Stream, Style}; use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; +use string_offsets::{AllConfig, StringOffsets}; const BOLD: Style = Style::new().bold(); const DIM: Style = Style::new().dimmed(); @@ -20,21 +21,24 @@ fn fatal(msg: impl std::fmt::Display) -> ! { /// Computes a workspace-relative path string for a given file. Falls back to /// the absolute path if the file is outside the workspace or canonicalization fails. -fn relative_to_root(root: &Path, path: &Path) -> String { - let root_canonical = match root.canonicalize() { - Ok(r) => r, - Err(_) => return path.to_string_lossy().to_string(), +/// `root_canonical` should be the already-canonicalized workspace root. +fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { + let Some(root_canonical) = root_canonical else { + return path.to_string_lossy().into_owned(); }; match path.canonicalize() { - Ok(canon) => match canon.strip_prefix(&root_canonical) { - Ok(rel) => rel.to_string_lossy().to_string(), - Err(_) => path.to_string_lossy().to_string(), + Ok(canon) => match canon.strip_prefix(root_canonical) { + Ok(rel) => rel.to_string_lossy().into_owned(), + Err(_) => path.to_string_lossy().into_owned(), }, - Err(_) => path.to_string_lossy().to_string(), + Err(_) => path.to_string_lossy().into_owned(), } } /// Returns `true` if any spelling errors were found. +/// +/// Exits with code 2 if infrastructure failures occurred (unreadable files, +/// directory errors, unmatched or invalid patterns). pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool { let config = Arc::new( CodebookConfigFile::load(Some(root)) @@ -47,15 +51,27 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let codebook = Codebook::new(config.clone()) .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"))); - let resolved = resolve_paths(files, root); + // Canonicalize the root once here rather than once per file. + let root_canonical = root.canonicalize().ok(); + + let mut had_failure = false; + let resolved = resolve_paths(files, root, &mut had_failure); let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; let mut files_with_errors = 0usize; for path in &resolved { - let relative = relative_to_root(root, path); - let error_count = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); + let relative = relative_to_root(root_canonical.as_deref(), path); + let error_count = check_file( + path, + &relative, + &codebook, + &mut seen_words, + unique, + suggest, + &mut had_failure, + ); if error_count > 0 { total_errors += error_count; files_with_errors += 1; @@ -69,13 +85,18 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), ); + if had_failure { + std::process::exit(2); + } + total_errors > 0 } /// Spell-checks a single file and prints any diagnostics to stdout. /// -/// Returns the number of errors found (0 if the file was clean or unreadable). +/// Returns the number of spelling errors found (0 if the file was clean). /// `relative` is the workspace-relative path used for display and ignore matching. +/// Sets `*had_failure = true` on I/O errors so the caller can exit non-zero. fn check_file( path: &Path, relative: &str, @@ -83,6 +104,7 @@ fn check_file( seen_words: &mut HashSet, unique: bool, suggest: bool, + had_failure: &mut bool, ) -> usize { let text = match std::fs::read_to_string(path) { Ok(t) => t, @@ -92,6 +114,7 @@ fn check_file( "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), path.display() ); + *had_failure = true; return 0; } }; @@ -102,8 +125,13 @@ fn check_file( // Sort by first occurrence in the file. locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + // Build the offset table once per file – O(n) construction, O(1) per lookup – + // and gives Unicode character columns rather than raw byte offsets. + let offsets = StringOffsets::::new(&text); + // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment. - // unique check is per-word (outer loop) so all ranges of a word are included or skipped together. + // The unique check is per-word (outer loop) so all ranges of a word are included + // or skipped together. let mut hits: Vec<(String, &str, Option>)> = Vec::new(); for wl in &locations { if unique && !seen_words.insert(wl.word.to_lowercase()) { @@ -116,7 +144,7 @@ fn check_file( None }; - // In unique mode only emit the first occurrence of each word + // In unique mode only emit the first occurrence of each word. let ranges = if unique { &wl.locations[..1] } else { @@ -124,12 +152,10 @@ fn check_file( }; for range in ranges { - let before = &text[..range.start_byte.min(text.len())]; - let line = before.bytes().filter(|&b| b == b'\n').count() + 1; - let col = before - .rfind('\n') - .map(|p| before.len() - p) - .unwrap_or(before.len() + 1); + // utf8_to_char_pos returns 0-based line and Unicode-char column. + let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); + let line = pos.line + 1; // 0-based → 1-based + let col = pos.col + 1; // 0-based → 1-based hits.push(( format!("{line}:{col}"), wl.word.as_str(), @@ -202,33 +228,57 @@ fn print_config_source(config: &CodebookConfigFile) { } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. Non-absolute patterns are resolved relative to root. -fn resolve_paths(patterns: &[String], root: &Path) -> Vec { +/// deduplicated list of file paths. Non-absolute patterns are resolved relative to `root`. +/// +/// Sets `*had_failure = true` for unmatched patterns, invalid globs, or glob I/O errors. +fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Vec { let mut paths = Vec::new(); for pattern in patterns { let p = PathBuf::from(pattern); let p = if p.is_absolute() { p } else { root.join(&p) }; if p.is_dir() { - collect_dir(&p, &mut paths); + collect_dir(&p, &mut paths, had_failure); } else { - let pattern = p.to_string_lossy(); - match glob::glob(&pattern) { + let pattern_str = p.to_string_lossy(); + match glob::glob(&pattern_str) { Ok(entries) => { let mut matched = false; - for entry in entries.flatten() { - if entry.is_file() { - paths.push(entry); - matched = true; - } else if entry.is_dir() { - collect_dir(&entry, &mut paths); - matched = true; + for entry in entries { + match entry { + Ok(e) => { + if e.is_file() { + paths.push(e); + matched = true; + } else if e.is_dir() { + collect_dir(&e, &mut paths, had_failure); + matched = true; + } + } + Err(e) => { + eprintln!( + "{} failed to read glob entry: {e}", + "error:" + .if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); + *had_failure = true; + } } } if !matched { - eprintln!("codebook: no match for '{pattern}'"); + eprintln!( + "{} no match for '{pattern_str}'", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); + *had_failure = true; } } - Err(e) => eprintln!("codebook: invalid pattern '{pattern}': {e}"), + Err(e) => { + eprintln!( + "{} invalid pattern '{pattern_str}': {e}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) + ); + *had_failure = true; + } } } } @@ -237,11 +287,24 @@ fn resolve_paths(patterns: &[String], root: &Path) -> Vec { paths } -fn collect_dir(dir: &Path, out: &mut Vec) { - walkdir::WalkDir::new(dir) - .follow_links(false) - .into_iter() - .flatten() - .filter(|e| e.file_type().is_file()) - .for_each(|e| out.push(e.into_path())); +/// Recursively collects all files under `dir` into `out`. +/// Sets `*had_failure = true` for any directory-entry I/O error (e.g. permission denied). +fn collect_dir(dir: &Path, out: &mut Vec, had_failure: &mut bool) { + for entry in walkdir::WalkDir::new(dir).follow_links(false).into_iter() { + match entry { + Ok(e) => { + if e.file_type().is_file() { + out.push(e.into_path()); + } + } + Err(err) => { + eprintln!( + "{} failed to read directory entry under '{}': {err}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + dir.display() + ); + *had_failure = true; + } + } + } } From e88087c21cbebf2bbf4d7a9e1c1c665460a72bee Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Sat, 21 Mar 2026 16:25:19 +0100 Subject: [PATCH 5/8] test --- crates/codebook-lsp/src/lint.rs | 616 ++++++++++++++++++++++++++------ 1 file changed, 498 insertions(+), 118 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 9a39560b..9c2faadd 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -11,11 +11,18 @@ const DIM: Style = Style::new().dimmed(); const YELLOW: Style = Style::new().yellow(); const BOLD_RED: Style = Style::new().bold().red(); +macro_rules! err { + ($($arg:tt)*) => { + eprintln!( + "{} {}", + "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + format_args!($($arg)*) + ) + }; +} + fn fatal(msg: impl std::fmt::Display) -> ! { - eprintln!( - "{} {msg}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); + err!("{msg}"); std::process::exit(2); } @@ -23,16 +30,15 @@ fn fatal(msg: impl std::fmt::Display) -> ! { /// the absolute path if the file is outside the workspace or canonicalization fails. /// `root_canonical` should be the already-canonicalized workspace root. fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { - let Some(root_canonical) = root_canonical else { - return path.to_string_lossy().into_owned(); - }; - match path.canonicalize() { - Ok(canon) => match canon.strip_prefix(root_canonical) { - Ok(rel) => rel.to_string_lossy().into_owned(), - Err(_) => path.to_string_lossy().into_owned(), - }, - Err(_) => path.to_string_lossy().into_owned(), - } + root_canonical + .and_then(|root| { + let canon = path.canonicalize().ok()?; + canon + .strip_prefix(root) + .ok() + .map(|rel| rel.to_string_lossy().into_owned()) + }) + .unwrap_or_else(|| path.to_string_lossy().into_owned()) } /// Returns `true` if any spelling errors were found. @@ -54,8 +60,7 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b // Canonicalize the root once here rather than once per file. let root_canonical = root.canonicalize().ok(); - let mut had_failure = false; - let resolved = resolve_paths(files, root, &mut had_failure); + let (resolved, mut had_failure) = resolve_paths(files, root); let mut seen_words: HashSet = HashSet::new(); let mut total_errors = 0usize; @@ -63,17 +68,11 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b for path in &resolved { let relative = relative_to_root(root_canonical.as_deref(), path); - let error_count = check_file( - path, - &relative, - &codebook, - &mut seen_words, - unique, - suggest, - &mut had_failure, - ); - if error_count > 0 { - total_errors += error_count; + let (errors, file_failure) = + check_file(path, &relative, &codebook, &mut seen_words, unique, suggest); + had_failure |= file_failure; + if errors > 0 { + total_errors += errors; files_with_errors += 1; } } @@ -94,9 +93,9 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b /// Spell-checks a single file and prints any diagnostics to stdout. /// -/// Returns the number of spelling errors found (0 if the file was clean). +/// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was +/// clean; `had_io_error` is true when the file could not be read. /// `relative` is the workspace-relative path used for display and ignore matching. -/// Sets `*had_failure = true` on I/O errors so the caller can exit non-zero. fn check_file( path: &Path, relative: &str, @@ -104,41 +103,35 @@ fn check_file( seen_words: &mut HashSet, unique: bool, suggest: bool, - had_failure: &mut bool, -) -> usize { +) -> (usize, bool) { let text = match std::fs::read_to_string(path) { Ok(t) => t, Err(e) => { - eprintln!( - "{} {}: {e}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), - path.display() - ); - *had_failure = true; - return 0; + err!("{}: {e}", path.display()); + return (0, true); } }; let display = relative.strip_prefix("./").unwrap_or(relative); - let mut locations = codebook.spell_check(&text, None, Some(relative)); - // Sort by first occurrence in the file. - locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); - // Build the offset table once per file – O(n) construction, O(1) per lookup – // and gives Unicode character columns rather than raw byte offsets. let offsets = StringOffsets::::new(&text); - // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment. - // The unique check is per-word (outer loop) so all ranges of a word are included - // or skipped together. + let mut locations = codebook.spell_check(&text, None, Some(relative)); + // Sort by first occurrence in the file. + locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); + + // Collect hits first so we can compute pad_len for column alignment. + // The unique check is per-word (outer loop) so all ranges of a word are + // included or skipped together. let mut hits: Vec<(String, &str, Option>)> = Vec::new(); for wl in &locations { if unique && !seen_words.insert(wl.word.to_lowercase()) { continue; } - let suggestions = if suggest { + let mut suggestions = if suggest { codebook.get_suggestions(wl.word.as_str()) } else { None @@ -151,28 +144,28 @@ fn check_file( &wl.locations[..] }; - for range in ranges { + for (i, range) in ranges.iter().enumerate() { // utf8_to_char_pos returns 0-based line and Unicode-char column. let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); - let line = pos.line + 1; // 0-based → 1-based - let col = pos.col + 1; // 0-based → 1-based + // Move out of `suggestions` on the last iteration to avoid a clone. + let sugg = if i + 1 < ranges.len() { + suggestions.clone() + } else { + suggestions.take() + }; hits.push(( - format!("{line}:{col}"), + format!("{}:{}", pos.line + 1, pos.col + 1), wl.word.as_str(), - suggestions.clone(), + sugg, )); } } if hits.is_empty() { - return 0; + return (0, false); } - let pad_len = hits - .iter() - .map(|(linecol, _, _)| linecol.len()) - .max() - .unwrap_or(0); + let pad_len = hits.iter().map(|(lc, _, _)| lc.len()).max().unwrap_or(0); println!( "{}", @@ -199,45 +192,50 @@ fn check_file( } println!(); - hits.len() + (hits.len(), false) } /// Prints which config file is being used, or notes that the default is active. fn print_config_source(config: &CodebookConfigFile) { let cwd = std::env::current_dir().unwrap_or_default(); - match ( + let (label, path) = match ( config.project_config_path().filter(|p| p.is_file()), config.global_config_path().filter(|p| p.is_file()), ) { - (Some(p), _) => { - let path = p.strip_prefix(&cwd).unwrap_or(&p).display().to_string(); - eprintln!( - "using config {}", - path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) - ); + (Some(p), _) => ("using config", p), + (None, Some(g)) => ("using global config", g), + (None, None) => { + eprintln!("No config found, using default config"); + return; } - (None, Some(g)) => { - let path = g.strip_prefix(&cwd).unwrap_or(&g).display().to_string(); - eprintln!( - "using global config {}", - path.if_supports_color(Stream::Stderr, |t| t.style(DIM)) - ); - } - (None, None) => eprintln!("No config found, using default config"), - } + }; + let display = path + .strip_prefix(&cwd) + .unwrap_or(&path) + .display() + .to_string(); + eprintln!( + "{label} {}", + display.if_supports_color(Stream::Stderr, |t| t.style(DIM)) + ); } /// Resolves a mix of file paths, directories, and glob patterns into a sorted, -/// deduplicated list of file paths. Non-absolute patterns are resolved relative to `root`. +/// deduplicated list of file paths. Non-absolute patterns are resolved relative +/// to `root`. `Path::join` replaces the base when the argument is absolute, so +/// no explicit `is_absolute` check is needed. /// -/// Sets `*had_failure = true` for unmatched patterns, invalid globs, or glob I/O errors. -fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Vec { +/// Returns `(paths, had_failure)`. `had_failure` is true for unmatched +/// patterns, invalid globs, or glob I/O errors. +fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut paths = Vec::new(); + let mut had_failure = false; + for pattern in patterns { - let p = PathBuf::from(pattern); - let p = if p.is_absolute() { p } else { root.join(&p) }; + // root.join() is a no-op when pattern is absolute (replaces root entirely). + let p = root.join(pattern); if p.is_dir() { - collect_dir(&p, &mut paths, had_failure); + had_failure |= collect_dir(&p, &mut paths); } else { let pattern_str = p.to_string_lossy(); match glob::glob(&pattern_str) { @@ -245,66 +243,448 @@ fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Ve let mut matched = false; for entry in entries { match entry { - Ok(e) => { - if e.is_file() { - paths.push(e); - matched = true; - } else if e.is_dir() { - collect_dir(&e, &mut paths, had_failure); - matched = true; - } + Ok(e) if e.is_file() => { + paths.push(e); + matched = true; + } + Ok(e) if e.is_dir() => { + had_failure |= collect_dir(&e, &mut paths); + matched = true; } + Ok(_) => {} Err(e) => { - eprintln!( - "{} failed to read glob entry: {e}", - "error:" - .if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); - *had_failure = true; + err!("failed to read glob entry: {e}"); + had_failure = true; } } } if !matched { - eprintln!( - "{} no match for '{pattern_str}'", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); - *had_failure = true; + err!("no match for '{pattern_str}'"); + had_failure = true; } } Err(e) => { - eprintln!( - "{} invalid pattern '{pattern_str}': {e}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)) - ); - *had_failure = true; + err!("invalid pattern '{pattern_str}': {e}"); + had_failure = true; } } } } + paths.sort(); paths.dedup(); - paths + (paths, had_failure) } /// Recursively collects all files under `dir` into `out`. -/// Sets `*had_failure = true` for any directory-entry I/O error (e.g. permission denied). -fn collect_dir(dir: &Path, out: &mut Vec, had_failure: &mut bool) { - for entry in walkdir::WalkDir::new(dir).follow_links(false).into_iter() { +/// Returns `true` if any directory-entry I/O error occurred (e.g. permission denied). +fn collect_dir(dir: &Path, out: &mut Vec) -> bool { + let mut had_failure = false; + for entry in walkdir::WalkDir::new(dir).follow_links(false) { match entry { - Ok(e) => { - if e.file_type().is_file() { - out.push(e.into_path()); - } - } - Err(err) => { - eprintln!( - "{} failed to read directory entry under '{}': {err}", - "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)), + Ok(e) if e.file_type().is_file() => out.push(e.into_path()), + Ok(_) => {} + Err(e) => { + err!( + "failed to read directory entry under '{}': {e}", dir.display() ); - *had_failure = true; + had_failure = true; } } } + had_failure +} + +#[cfg(test)] +mod tests { + use super::*; + use codebook::Codebook; + use codebook_config::CodebookConfigMemory; + use std::collections::HashSet; + use std::fs; + use std::sync::Arc; + use tempfile::tempdir; + + /// Builds a Codebook backed by the default in-memory config (en_us dictionary). + /// This mirrors the pattern used throughout the rest of the test suite. + fn make_codebook() -> Codebook { + let config = Arc::new(CodebookConfigMemory::default()); + Codebook::new(config).unwrap() + } + + // ── relative_to_root ───────────────────────────────────────────────────── + + #[test] + fn test_relative_to_root_inside_workspace() { + let dir = tempdir().unwrap(); + let subdir = dir.path().join("src"); + fs::create_dir_all(&subdir).unwrap(); + let file = subdir.join("lib.rs"); + fs::write(&file, "").unwrap(); + + let root_canonical = dir.path().canonicalize().unwrap(); + let result = relative_to_root(Some(&root_canonical), &file); + assert_eq!(result, "src/lib.rs"); + } + + #[test] + fn test_relative_to_root_outside_workspace() { + let root = tempdir().unwrap(); + let other = tempdir().unwrap(); + let file = other.path().join("outside.rs"); + fs::write(&file, "").unwrap(); + + let root_canonical = root.path().canonicalize().unwrap(); + let result = relative_to_root(Some(&root_canonical), &file); + // Falls back to a path string since the file is outside the workspace. + assert!(result.contains("outside.rs")); + } + + #[test] + fn test_relative_to_root_none_returns_raw_path() { + let result = relative_to_root(None, Path::new("/some/path/file.rs")); + assert_eq!(result, "/some/path/file.rs"); + } + + // ── collect_dir ─────────────────────────────────────────────────────────── + + #[test] + fn test_collect_dir_empty_dir() { + let dir = tempdir().unwrap(); + let mut out = Vec::new(); + let had_failure = collect_dir(dir.path(), &mut out); + assert!(out.is_empty()); + assert!(!had_failure); + } + + #[test] + fn test_collect_dir_collects_files_recursively() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("sub"); + fs::create_dir_all(&sub).unwrap(); + fs::write(dir.path().join("a.txt"), "").unwrap(); + fs::write(sub.join("b.txt"), "").unwrap(); + + let mut out = Vec::new(); + let had_failure = collect_dir(dir.path(), &mut out); + + assert_eq!(out.len(), 2); + assert!(!had_failure); + } + + #[test] + fn test_collect_dir_only_yields_files_not_directories() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("subdir"); + fs::create_dir_all(&sub).unwrap(); + fs::write(sub.join("c.txt"), "").unwrap(); + + let mut out = Vec::new(); + let had_failure = collect_dir(dir.path(), &mut out); + + assert!( + out.iter().all(|p| p.is_file()), + "only files should be collected, not directories" + ); + assert!(!had_failure); + } + + // ── resolve_paths ───────────────────────────────────────────────────────── + + #[test] + fn test_resolve_paths_single_existing_file() { + let dir = tempdir().unwrap(); + let file = dir.path().join("hello.txt"); + fs::write(&file, "").unwrap(); + + let (result, had_failure) = + resolve_paths(&[file.to_string_lossy().to_string()], dir.path()); + + assert_eq!(result, vec![file]); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_missing_file_sets_failure() { + let dir = tempdir().unwrap(); + let nonexistent = dir.path().join("nope.txt"); + + let (result, had_failure) = + resolve_paths(&[nonexistent.to_string_lossy().to_string()], dir.path()); + + assert!(result.is_empty()); + assert!(had_failure, "unmatched path should set had_failure"); + } + + #[test] + fn test_resolve_paths_directory_collects_all_files() { + let dir = tempdir().unwrap(); + let sub = dir.path().join("sub"); + fs::create_dir_all(&sub).unwrap(); + fs::write(dir.path().join("a.txt"), "").unwrap(); + fs::write(sub.join("b.txt"), "").unwrap(); + + let (result, had_failure) = + resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); + + assert_eq!(result.len(), 2); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_glob_matches_by_extension() { + let dir = tempdir().unwrap(); + fs::write(dir.path().join("foo.rs"), "").unwrap(); + fs::write(dir.path().join("bar.rs"), "").unwrap(); + fs::write(dir.path().join("baz.txt"), "").unwrap(); + + let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); + let (result, had_failure) = resolve_paths(&[pattern], dir.path()); + + assert_eq!(result.len(), 2, "only .rs files should match"); + assert!( + result + .iter() + .all(|p| p.extension().unwrap_or_default() == "rs") + ); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_glob_no_match_sets_failure() { + let dir = tempdir().unwrap(); // empty dir — no .rs files + let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); + + let (result, had_failure) = resolve_paths(&[pattern], dir.path()); + + assert!(result.is_empty()); + assert!(had_failure, "unmatched glob should set had_failure"); + } + + #[test] + fn test_resolve_paths_deduplicates_same_file() { + let dir = tempdir().unwrap(); + let file = dir.path().join("hello.txt"); + fs::write(&file, "").unwrap(); + + let path_str = file.to_string_lossy().to_string(); + // List the same file twice — it should appear only once in the output. + let (result, had_failure) = resolve_paths(&[path_str.clone(), path_str], dir.path()); + + assert_eq!(result.len(), 1); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_output_is_sorted() { + let dir = tempdir().unwrap(); + // Create files in non-alphabetical order. + fs::write(dir.path().join("c.txt"), "").unwrap(); + fs::write(dir.path().join("a.txt"), "").unwrap(); + fs::write(dir.path().join("b.txt"), "").unwrap(); + + let (result, had_failure) = + resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); + + let names: Vec<_> = result + .iter() + .map(|p| p.file_name().unwrap().to_string_lossy().into_owned()) + .collect(); + assert_eq!(names, ["a.txt", "b.txt", "c.txt"]); + assert!(!had_failure); + } + + #[test] + fn test_resolve_paths_relative_pattern_resolved_against_root() { + let dir = tempdir().unwrap(); + fs::write(dir.path().join("x.txt"), "").unwrap(); + + // "x.txt" is relative — resolve_paths should join it with the root. + let (result, had_failure) = resolve_paths(&["x.txt".to_string()], dir.path()); + + assert_eq!(result.len(), 1); + assert!(!had_failure); + } + + // ── check_file ──────────────────────────────────────────────────────────── + + #[test] + fn test_check_file_unreadable_sets_failure() { + let dir = tempdir().unwrap(); + let nonexistent = dir.path().join("ghost.txt"); + + let codebook = make_codebook(); + let (count, had_failure) = check_file( + &nonexistent, + "ghost.txt", + &codebook, + &mut HashSet::new(), + false, + false, + ); + + assert_eq!(count, 0); + assert!(had_failure, "unreadable file must set had_failure"); + } + + #[test] + fn test_check_file_flags_misspelling() { + let dir = tempdir().unwrap(); + let file = dir.path().join("bad.txt"); + // "actualbad" is not a real English word; the checker should flag it. + fs::write(&file, "actualbad").unwrap(); + + let codebook = make_codebook(); + let (count, had_failure) = check_file( + &file, + "bad.txt", + &codebook, + &mut HashSet::new(), + false, + false, + ); + + assert!( + count > 0, + "expected at least one misspelling to be reported" + ); + assert!(!had_failure); + } + + #[test] + fn test_check_file_unique_counts_each_word_once_per_file() { + let dir = tempdir().unwrap(); + let file = dir.path().join("dup.txt"); + // The checker groups all occurrences of the same misspelled word into one + // WordLocation with multiple TextRanges; unique mode takes only the first. + fs::write(&file, "actualbad actualbad").unwrap(); + + let codebook = make_codebook(); + let mut had_failure = false; + + let (count_all, f) = check_file( + &file, + "dup.txt", + &codebook, + &mut HashSet::new(), + false, // unique = false + false, + ); + had_failure |= f; + + let (count_unique, f) = check_file( + &file, + "dup.txt", + &codebook, + &mut HashSet::new(), + true, // unique = true + false, + ); + had_failure |= f; + + assert_eq!(count_all, 2, "non-unique mode must count every occurrence"); + assert_eq!(count_unique, 1, "unique mode must count each word once"); + assert!(!had_failure); + } + + #[test] + fn test_check_file_unique_deduplicates_across_files() { + let dir = tempdir().unwrap(); + let file1 = dir.path().join("first.txt"); + let file2 = dir.path().join("second.txt"); + fs::write(&file1, "actualbad").unwrap(); + fs::write(&file2, "actualbad").unwrap(); + + let codebook = make_codebook(); + // A single shared `seen_words` set, exactly as run_lint uses it. + let mut seen = HashSet::new(); + + let (count1, f1) = check_file(&file1, "first.txt", &codebook, &mut seen, true, false); + let (count2, f2) = check_file(&file2, "second.txt", &codebook, &mut seen, true, false); + + assert_eq!(count1, 1, "first file should report the misspelling"); + assert_eq!(count2, 0, "second file should skip the already-seen word"); + assert!(!f1); + assert!(!f2); + } + + // ── line/col correctness (pure StringOffsets, no network I/O) ──────────── + // + // These tests exercise the exact same utf8_to_char_pos call that check_file + // uses, confirming that columns are in Unicode characters rather than raw + // bytes and that the 0-based → 1-based conversion is applied correctly. + + #[test] + fn test_linecol_word_at_start_of_file() { + let text = "actualbad rest of line"; + let offsets = StringOffsets::::new(text); + let pos = offsets.utf8_to_char_pos(0); + assert_eq!(pos.line + 1, 1); + assert_eq!(pos.col + 1, 1); + } + + #[test] + fn test_linecol_word_on_second_line() { + // "ok text\n" is 8 bytes; "actualbad" starts at byte 8. + let text = "ok text\nactualbad more"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 8, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 2, "should be on the second line"); + assert_eq!(pos.col + 1, 1, "should be the first column of that line"); + } + + #[test] + fn test_linecol_mid_line_ascii() { + // "hello " is 6 bytes/chars; "actualbad" starts at byte/char 6. + let text = "hello actualbad"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 6, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 1); + assert_eq!(pos.col + 1, 7); + } + + /// The old byte-count approach reported col=10 here; StringOffsets correctly + /// reports col=8 because "résumé" is 8 bytes but only 6 Unicode characters. + #[test] + fn test_linecol_multibyte_chars_before_word() { + // r(1) + é(2) + s(1) + u(1) + m(1) + é(2) + space(1) = 9 bytes, 7 chars. + // "actualbad" starts at byte 9, char offset 7 (0-based) → col 8 (1-based). + let text = "résumé actualbad"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 9, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 1); + assert_eq!( + pos.col + 1, + 8, + "col must count Unicode chars (6 + space = 7, 1-based = 8), not bytes (9, 1-based = 10)" + ); + } + + #[test] + fn test_linecol_emoji_before_word() { + // 🐛 is U+1F41B — 4 UTF-8 bytes, 1 Unicode char. + // "🐛 " = 5 bytes, 2 chars; "actualbad" starts at byte 5, char 2 (0-based) → col 3. + let text = "🐛 actualbad"; + let offsets = StringOffsets::::new(text); + let byte_offset = text.find("actualbad").unwrap(); + assert_eq!(byte_offset, 5, "sanity-check byte layout"); + + let pos = offsets.utf8_to_char_pos(byte_offset); + assert_eq!(pos.line + 1, 1); + assert_eq!( + pos.col + 1, + 3, + "col must count Unicode chars (emoji=1, space=1, 1-based=3), not bytes (5, 1-based=6)" + ); + } } From adcf2787e0c897c53ae8781c92abc3aee943d283 Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:19:10 +0100 Subject: [PATCH 6/8] update tests --- crates/codebook-lsp/src/lint.rs | 470 ++++++-------------------------- 1 file changed, 86 insertions(+), 384 deletions(-) diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 9c2faadd..3f0c8ac5 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -21,14 +21,20 @@ macro_rules! err { }; } +macro_rules! paint { + ($val:expr, $stream:expr, $style:expr) => { + $val.if_supports_color($stream, |t| t.style($style)) + }; +} + fn fatal(msg: impl std::fmt::Display) -> ! { err!("{msg}"); std::process::exit(2); } /// Computes a workspace-relative path string for a given file. Falls back to -/// the absolute path if the file is outside the workspace or canonicalization fails. -/// `root_canonical` should be the already-canonicalized workspace root. +/// the absolute path if the file is outside the workspace or canonicalization +/// fails. `root_canonical` should be the already-canonicalized workspace root. fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String { root_canonical .and_then(|root| { @@ -80,8 +86,8 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b let unique_label = if unique { "unique " } else { "" }; eprintln!( "Found {} {unique_label}spelling error(s) in {} file(s).", - total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), - files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)), + paint!(total_errors, Stream::Stderr, BOLD), + paint!(files_with_errors, Stream::Stderr, BOLD), ); if had_failure { @@ -94,8 +100,8 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b /// Spell-checks a single file and prints any diagnostics to stdout. /// /// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was -/// clean; `had_io_error` is true when the file could not be read. -/// `relative` is the workspace-relative path used for display and ignore matching. +/// clean; `had_io_error` is true when the file could not be read. `relative` is +/// the workspace-relative path used for display and ignore matching. fn check_file( path: &Path, relative: &str, @@ -114,17 +120,15 @@ fn check_file( let display = relative.strip_prefix("./").unwrap_or(relative); - // Build the offset table once per file – O(n) construction, O(1) per lookup – - // and gives Unicode character columns rather than raw byte offsets. + // Build the offset table once per file let offsets = StringOffsets::::new(&text); - let mut locations = codebook.spell_check(&text, None, Some(relative)); // Sort by first occurrence in the file. locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); - // Collect hits first so we can compute pad_len for column alignment. - // The unique check is per-word (outer loop) so all ranges of a word are - // included or skipped together. + // Collect hits first so we can compute pad_len for column alignment. The + // unique check is per-word, so all ranges of a word are included or skipped + // together. let mut hits: Vec<(String, &str, Option>)> = Vec::new(); for wl in &locations { if unique && !seen_words.insert(wl.word.to_lowercase()) { @@ -137,7 +141,7 @@ fn check_file( None }; - // In unique mode only emit the first occurrence of each word. + // If unique mode: Only emit the first occurrence of each word. let ranges = if unique { &wl.locations[..1] } else { @@ -147,12 +151,14 @@ fn check_file( for (i, range) in ranges.iter().enumerate() { // utf8_to_char_pos returns 0-based line and Unicode-char column. let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len())); + // Move out of `suggestions` on the last iteration to avoid a clone. let sugg = if i + 1 < ranges.len() { suggestions.clone() } else { suggestions.take() }; + hits.push(( format!("{}:{}", pos.line + 1, pos.col + 1), wl.word.as_str(), @@ -175,17 +181,14 @@ fn check_file( let pad = " ".repeat(pad_len - linecol.len()); print!( " {}:{}{} {}", - display.if_supports_color(Stream::Stdout, |t| t.style(DIM)), - linecol.if_supports_color(Stream::Stdout, |t| t.style(YELLOW)), + paint!(display, Stream::Stdout, DIM), + paint!(linecol, Stream::Stdout, YELLOW), pad, - word.if_supports_color(Stream::Stdout, |t| t.style(BOLD_RED)), + paint!(word, Stream::Stdout, BOLD_RED), ); - if let Some(suggestions) = suggestions { - println!( - " {}", - format!("→ {}", suggestions.join(", ")) - .if_supports_color(Stream::Stdout, |t| t.style(DIM)), - ); + if let Some(s) = suggestions { + let text = format!("→ {}", s.join(", ")); + println!(" {}", paint!(text, Stream::Stdout, DIM)); } else { println!(); } @@ -232,7 +235,7 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { let mut had_failure = false; for pattern in patterns { - // root.join() is a no-op when pattern is absolute (replaces root entirely). + // root.join() is a no-op when pattern is absolute let p = root.join(pattern); if p.is_dir() { had_failure |= collect_dir(&p, &mut paths); @@ -276,8 +279,8 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec, bool) { (paths, had_failure) } -/// Recursively collects all files under `dir` into `out`. -/// Returns `true` if any directory-entry I/O error occurred (e.g. permission denied). +/// Recursively collects all files under `dir` into `out`. Returns `true` if any +/// directory-entry I/O error occurred. fn collect_dir(dir: &Path, out: &mut Vec) -> bool { let mut had_failure = false; for entry in walkdir::WalkDir::new(dir).follow_links(false) { @@ -306,385 +309,84 @@ mod tests { use std::sync::Arc; use tempfile::tempdir; - /// Builds a Codebook backed by the default in-memory config (en_us dictionary). - /// This mirrors the pattern used throughout the rest of the test suite. - fn make_codebook() -> Codebook { - let config = Arc::new(CodebookConfigMemory::default()); - Codebook::new(config).unwrap() - } - - // ── relative_to_root ───────────────────────────────────────────────────── - #[test] - fn test_relative_to_root_inside_workspace() { - let dir = tempdir().unwrap(); - let subdir = dir.path().join("src"); - fs::create_dir_all(&subdir).unwrap(); - let file = subdir.join("lib.rs"); - fs::write(&file, "").unwrap(); - - let root_canonical = dir.path().canonicalize().unwrap(); - let result = relative_to_root(Some(&root_canonical), &file); - assert_eq!(result, "src/lib.rs"); - } - - #[test] - fn test_relative_to_root_outside_workspace() { - let root = tempdir().unwrap(); - let other = tempdir().unwrap(); - let file = other.path().join("outside.rs"); - fs::write(&file, "").unwrap(); - - let root_canonical = root.path().canonicalize().unwrap(); - let result = relative_to_root(Some(&root_canonical), &file); - // Falls back to a path string since the file is outside the workspace. - assert!(result.contains("outside.rs")); - } - - #[test] - fn test_relative_to_root_none_returns_raw_path() { - let result = relative_to_root(None, Path::new("/some/path/file.rs")); - assert_eq!(result, "/some/path/file.rs"); - } - - // ── collect_dir ─────────────────────────────────────────────────────────── - - #[test] - fn test_collect_dir_empty_dir() { - let dir = tempdir().unwrap(); - let mut out = Vec::new(); - let had_failure = collect_dir(dir.path(), &mut out); - assert!(out.is_empty()); - assert!(!had_failure); - } - - #[test] - fn test_collect_dir_collects_files_recursively() { + fn test_path_and_dir_resolution() { let dir = tempdir().unwrap(); let sub = dir.path().join("sub"); fs::create_dir_all(&sub).unwrap(); - fs::write(dir.path().join("a.txt"), "").unwrap(); - fs::write(sub.join("b.txt"), "").unwrap(); - - let mut out = Vec::new(); - let had_failure = collect_dir(dir.path(), &mut out); - - assert_eq!(out.len(), 2); - assert!(!had_failure); - } - #[test] - fn test_collect_dir_only_yields_files_not_directories() { - let dir = tempdir().unwrap(); - let sub = dir.path().join("subdir"); - fs::create_dir_all(&sub).unwrap(); - fs::write(sub.join("c.txt"), "").unwrap(); + let f1 = dir.path().join("a.rs"); + let f2 = sub.join("b.txt"); + fs::write(&f1, "").unwrap(); + fs::write(&f2, "").unwrap(); - let mut out = Vec::new(); - let had_failure = collect_dir(dir.path(), &mut out); + let root_canon = dir.path().canonicalize().unwrap(); + assert_eq!(relative_to_root(Some(&root_canon), &f1), "a.rs"); - assert!( - out.iter().all(|p| p.is_file()), - "only files should be collected, not directories" - ); - assert!(!had_failure); - } - - // ── resolve_paths ───────────────────────────────────────────────────────── - - #[test] - fn test_resolve_paths_single_existing_file() { - let dir = tempdir().unwrap(); - let file = dir.path().join("hello.txt"); - fs::write(&file, "").unwrap(); - - let (result, had_failure) = - resolve_paths(&[file.to_string_lossy().to_string()], dir.path()); - - assert_eq!(result, vec![file]); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_missing_file_sets_failure() { - let dir = tempdir().unwrap(); - let nonexistent = dir.path().join("nope.txt"); - - let (result, had_failure) = - resolve_paths(&[nonexistent.to_string_lossy().to_string()], dir.path()); - - assert!(result.is_empty()); - assert!(had_failure, "unmatched path should set had_failure"); - } - - #[test] - fn test_resolve_paths_directory_collects_all_files() { - let dir = tempdir().unwrap(); - let sub = dir.path().join("sub"); - fs::create_dir_all(&sub).unwrap(); - fs::write(dir.path().join("a.txt"), "").unwrap(); - fs::write(sub.join("b.txt"), "").unwrap(); - - let (result, had_failure) = - resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); - - assert_eq!(result.len(), 2); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_glob_matches_by_extension() { - let dir = tempdir().unwrap(); - fs::write(dir.path().join("foo.rs"), "").unwrap(); - fs::write(dir.path().join("bar.rs"), "").unwrap(); - fs::write(dir.path().join("baz.txt"), "").unwrap(); - - let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); - let (result, had_failure) = resolve_paths(&[pattern], dir.path()); - - assert_eq!(result.len(), 2, "only .rs files should match"); - assert!( - result - .iter() - .all(|p| p.extension().unwrap_or_default() == "rs") - ); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_glob_no_match_sets_failure() { - let dir = tempdir().unwrap(); // empty dir — no .rs files - let pattern = format!("{}/*.rs", dir.path().to_string_lossy()); + let pattern = format!("{}/**/*.*", dir.path().display()); + let (paths, err) = resolve_paths(&[pattern], dir.path()); - let (result, had_failure) = resolve_paths(&[pattern], dir.path()); + assert!(!err); + assert_eq!(paths.len(), 2); + let path_strs: HashSet<_> = paths.iter().map(|p| p.to_string_lossy()).collect(); + assert!(path_strs.iter().any(|s| s.ends_with("a.rs"))); + assert!(path_strs.iter().any(|s| s.ends_with("b.txt"))); - assert!(result.is_empty()); - assert!(had_failure, "unmatched glob should set had_failure"); + let (_, err_missing) = resolve_paths(&["nonexistent.rs".into()], dir.path()); + assert!(err_missing); } #[test] - fn test_resolve_paths_deduplicates_same_file() { + fn test_check_file_logic() { let dir = tempdir().unwrap(); - let file = dir.path().join("hello.txt"); - fs::write(&file, "").unwrap(); + let f = dir.path().join("test.txt"); + fs::write(&f, "actualbad\n🦀 actualbad").unwrap(); - let path_str = file.to_string_lossy().to_string(); - // List the same file twice — it should appear only once in the output. - let (result, had_failure) = resolve_paths(&[path_str.clone(), path_str], dir.path()); - - assert_eq!(result.len(), 1); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_output_is_sorted() { - let dir = tempdir().unwrap(); - // Create files in non-alphabetical order. - fs::write(dir.path().join("c.txt"), "").unwrap(); - fs::write(dir.path().join("a.txt"), "").unwrap(); - fs::write(dir.path().join("b.txt"), "").unwrap(); - - let (result, had_failure) = - resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path()); - - let names: Vec<_> = result - .iter() - .map(|p| p.file_name().unwrap().to_string_lossy().into_owned()) - .collect(); - assert_eq!(names, ["a.txt", "b.txt", "c.txt"]); - assert!(!had_failure); - } - - #[test] - fn test_resolve_paths_relative_pattern_resolved_against_root() { - let dir = tempdir().unwrap(); - fs::write(dir.path().join("x.txt"), "").unwrap(); - - // "x.txt" is relative — resolve_paths should join it with the root. - let (result, had_failure) = resolve_paths(&["x.txt".to_string()], dir.path()); - - assert_eq!(result.len(), 1); - assert!(!had_failure); - } - - // ── check_file ──────────────────────────────────────────────────────────── - - #[test] - fn test_check_file_unreadable_sets_failure() { - let dir = tempdir().unwrap(); - let nonexistent = dir.path().join("ghost.txt"); - - let codebook = make_codebook(); - let (count, had_failure) = check_file( - &nonexistent, - "ghost.txt", - &codebook, - &mut HashSet::new(), - false, - false, - ); - - assert_eq!(count, 0); - assert!(had_failure, "unreadable file must set had_failure"); - } - - #[test] - fn test_check_file_flags_misspelling() { - let dir = tempdir().unwrap(); - let file = dir.path().join("bad.txt"); - // "actualbad" is not a real English word; the checker should flag it. - fs::write(&file, "actualbad").unwrap(); - - let codebook = make_codebook(); - let (count, had_failure) = check_file( - &file, - "bad.txt", - &codebook, - &mut HashSet::new(), - false, - false, - ); - - assert!( - count > 0, - "expected at least one misspelling to be reported" - ); - assert!(!had_failure); - } + let cb = Codebook::new(Arc::new(CodebookConfigMemory::default())).unwrap(); + let mut seen = HashSet::new(); - #[test] - fn test_check_file_unique_counts_each_word_once_per_file() { - let dir = tempdir().unwrap(); - let file = dir.path().join("dup.txt"); - // The checker groups all occurrences of the same misspelled word into one - // WordLocation with multiple TextRanges; unique mode takes only the first. - fs::write(&file, "actualbad actualbad").unwrap(); - - let codebook = make_codebook(); - let mut had_failure = false; - - let (count_all, f) = check_file( - &file, - "dup.txt", - &codebook, - &mut HashSet::new(), - false, // unique = false + // Test basic flagging and multi-occurrence counting + let (count, err) = check_file(&f, "test.txt", &cb, &mut seen, false, false); + assert_eq!(count, 2); + assert!(!err); + + // Test unique mode + let mut seen_unique = HashSet::new(); + let (c1, _) = check_file(&f, "f1.txt", &cb, &mut seen_unique, true, false); + let (c2, _) = check_file(&f, "f2.txt", &cb, &mut seen_unique, true, false); + assert_eq!(c1, 1, "Should flag word once"); + assert_eq!(c2, 0, "Should skip already-seen word in second file"); + + // Test IO failure + let (_, err_io) = check_file( + &dir.path().join("missing"), + "!", + &cb, + &mut seen, false, - ); - had_failure |= f; - - let (count_unique, f) = check_file( - &file, - "dup.txt", - &codebook, - &mut HashSet::new(), - true, // unique = true false, ); - had_failure |= f; - - assert_eq!(count_all, 2, "non-unique mode must count every occurrence"); - assert_eq!(count_unique, 1, "unique mode must count each word once"); - assert!(!had_failure); - } - - #[test] - fn test_check_file_unique_deduplicates_across_files() { - let dir = tempdir().unwrap(); - let file1 = dir.path().join("first.txt"); - let file2 = dir.path().join("second.txt"); - fs::write(&file1, "actualbad").unwrap(); - fs::write(&file2, "actualbad").unwrap(); - - let codebook = make_codebook(); - // A single shared `seen_words` set, exactly as run_lint uses it. - let mut seen = HashSet::new(); - - let (count1, f1) = check_file(&file1, "first.txt", &codebook, &mut seen, true, false); - let (count2, f2) = check_file(&file2, "second.txt", &codebook, &mut seen, true, false); - - assert_eq!(count1, 1, "first file should report the misspelling"); - assert_eq!(count2, 0, "second file should skip the already-seen word"); - assert!(!f1); - assert!(!f2); - } - - // ── line/col correctness (pure StringOffsets, no network I/O) ──────────── - // - // These tests exercise the exact same utf8_to_char_pos call that check_file - // uses, confirming that columns are in Unicode characters rather than raw - // bytes and that the 0-based → 1-based conversion is applied correctly. - - #[test] - fn test_linecol_word_at_start_of_file() { - let text = "actualbad rest of line"; - let offsets = StringOffsets::::new(text); - let pos = offsets.utf8_to_char_pos(0); - assert_eq!(pos.line + 1, 1); - assert_eq!(pos.col + 1, 1); - } - - #[test] - fn test_linecol_word_on_second_line() { - // "ok text\n" is 8 bytes; "actualbad" starts at byte 8. - let text = "ok text\nactualbad more"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 8, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 2, "should be on the second line"); - assert_eq!(pos.col + 1, 1, "should be the first column of that line"); - } - - #[test] - fn test_linecol_mid_line_ascii() { - // "hello " is 6 bytes/chars; "actualbad" starts at byte/char 6. - let text = "hello actualbad"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 6, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 1); - assert_eq!(pos.col + 1, 7); - } - - /// The old byte-count approach reported col=10 here; StringOffsets correctly - /// reports col=8 because "résumé" is 8 bytes but only 6 Unicode characters. - #[test] - fn test_linecol_multibyte_chars_before_word() { - // r(1) + é(2) + s(1) + u(1) + m(1) + é(2) + space(1) = 9 bytes, 7 chars. - // "actualbad" starts at byte 9, char offset 7 (0-based) → col 8 (1-based). - let text = "résumé actualbad"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 9, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 1); - assert_eq!( - pos.col + 1, - 8, - "col must count Unicode chars (6 + space = 7, 1-based = 8), not bytes (9, 1-based = 10)" - ); + assert!(err_io); } #[test] - fn test_linecol_emoji_before_word() { - // 🐛 is U+1F41B — 4 UTF-8 bytes, 1 Unicode char. - // "🐛 " = 5 bytes, 2 chars; "actualbad" starts at byte 5, char 2 (0-based) → col 3. - let text = "🐛 actualbad"; - let offsets = StringOffsets::::new(text); - let byte_offset = text.find("actualbad").unwrap(); - assert_eq!(byte_offset, 5, "sanity-check byte layout"); - - let pos = offsets.utf8_to_char_pos(byte_offset); - assert_eq!(pos.line + 1, 1); - assert_eq!( - pos.col + 1, - 3, - "col must count Unicode chars (emoji=1, space=1, 1-based=3), not bytes (5, 1-based=6)" - ); + fn test_unicode_line_col() { + let cases = [ + ("actualbad", 0, 1, 1), // Start + ("ok\nactualbad", 3, 2, 1), // Newline + ("résumé actualbad", 9, 1, 8), // Multi-byte chars (é is 2 bytes) + ("🦀 actualbad", 5, 1, 3), // Emoji (4 bytes, 1 char) + ]; + + for (text, offset, line, col) in cases { + let table = StringOffsets::::new(text); + let pos = table.utf8_to_char_pos(offset); + assert_eq!( + (pos.line + 1, pos.col + 1), + (line, col), + "Failed on: {}", + text + ); + } } } From 086e983d2d65ee1a0efbf9b870392fd06139752b Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Mon, 23 Mar 2026 10:45:16 +0100 Subject: [PATCH 7/8] Update readme --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 074e96f5..2b1486d0 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,26 @@ Any editor that implements the Language Server Protocol should be compatible wit codebook-lsp serve ``` +### CLI (Lint) + +Codebook can also be used as a standalone command-line spell checker, which is useful for CI pipelines, pre-commit hooks, or one-off checks. + +```sh +# Check specific files +codebook-lsp lint src/main.rs src/lib.rs + +# Check all files in a directory (recursive) +codebook-lsp lint src/ + +# Show spelling suggestions +codebook-lsp lint --suggest src/ + +# Only report each misspelled word once across all files +codebook-lsp lint --unique src/ +``` + +The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there was an unreadable files, invalid UTF-8, etc... + ## About Codebook is a spell checker for code. It binds together the venerable Tree Sitter and the fast spell checker [Spellbook](https://github.com/helix-editor/spellbook). Included is a Language Server for use in (theoretically) any editor. Everything is done in Rust to keep response times snappy and memory usage _low_. From fc626039190073d1718e220bfc77c216dde72a93 Mon Sep 17 00:00:00 2001 From: niekdomi <152265924+niekdomi@users.noreply.github.com> Date: Mon, 23 Mar 2026 11:43:14 +0100 Subject: [PATCH 8/8] apply copilot review --- README.md | 2 +- crates/codebook-lsp/src/lint.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b1486d0..908283b7 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ codebook-lsp lint --suggest src/ codebook-lsp lint --unique src/ ``` -The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there was an unreadable files, invalid UTF-8, etc... +The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there were unreadable files, invalid UTF-8, etc. ## About diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs index 3f0c8ac5..3a1b1cc0 100644 --- a/crates/codebook-lsp/src/lint.rs +++ b/crates/codebook-lsp/src/lint.rs @@ -123,7 +123,11 @@ fn check_file( // Build the offset table once per file let offsets = StringOffsets::::new(&text); let mut locations = codebook.spell_check(&text, None, Some(relative)); - // Sort by first occurrence in the file. + // Sort inner locations first (HashSet iteration order is nondeterministic), + // then sort the outer list by first occurrence in the file. + for wl in &mut locations { + wl.locations.sort_by_key(|r| r.start_byte); + } locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0)); // Collect hits first so we can compute pad_len for column alignment. The