From bee444a3c9d6c201997a2d0eb1922b19286a4d9d Mon Sep 17 00:00:00 2001
From: niekdomi <dom.buehler@outlook.com>
Date: Wed, 25 Feb 2026 20:13:37 +0100
Subject: [PATCH 1/8] Added cli

---
 Cargo.lock                      |   2 +
 crates/codebook-lsp/Cargo.toml  |   2 +
 crates/codebook-lsp/src/lint.rs | 215 ++++++++++++++++++++++++++++++++
 crates/codebook-lsp/src/main.rs |  15 +++
 4 files changed, 234 insertions(+)
 create mode 100644 crates/codebook-lsp/src/lint.rs
diff --git a/Cargo.lock b/Cargo.lock
index a481ac85..2f3d8d55 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -427,6 +427,7 @@ dependencies = [
  "codebook_config",
  "env_logger",
  "fs2",
+ "glob",
  "log",
  "lru",
  "serde",
@@ -436,6 +437,7 @@ dependencies = [
  "tempfile",
  "tokio",
  "tower-lsp",
+ "walkdir",
 ]
 
 [[package]]
diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml
index b621344e..27d1a11a 100644
--- a/crates/codebook-lsp/Cargo.toml
+++ b/crates/codebook-lsp/Cargo.toml
@@ -28,6 +28,8 @@ env_logger.workspace = true
 fs2.workspace = true
 log.workspace = true
 lru.workspace = true
+glob.workspace = true
+walkdir.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 string-offsets.workspace = true
diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
new file mode 100644
index 00000000..3ecd5ab4
--- /dev/null
+++ b/crates/codebook-lsp/src/lint.rs
@@ -0,0 +1,215 @@
+use codebook::Codebook;
+use codebook_config::{CodebookConfig, CodebookConfigFile};
+use std::collections::HashSet;
+use std::io::{IsTerminal, Write};
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+struct Styles {
+    bold: &'static str,
+    dim: &'static str,
+    yellow: &'static str,
+    bold_red: &'static str,
+    reset: &'static str,
+}
+
+const STYLES_ON: Styles = Styles {
+    bold: "\x1b[1m",
+    dim: "\x1b[2m",
+    yellow: "\x1b[33m",
+    bold_red: "\x1b[1;31m",
+    reset: "\x1b[0m",
+};
+
+const STYLES_OFF: Styles = Styles {
+    bold: "",
+    dim: "",
+    yellow: "",
+    bold_red: "",
+    reset: "",
+};
+
+fn styles(is_terminal: bool) -> &'static Styles {
+    if is_terminal && std::env::var_os("NO_COLOR").is_none() {
+        &STYLES_ON
+    } else {
+        &STYLES_OFF
+    }
+}
+
+fn fatal(msg: impl std::fmt::Display, s: &Styles) -> ! {
+    eprintln!("{}error:{} {msg}", s.bold_red, s.reset);
+    std::process::exit(2);
+}
+
+pub fn run_lint(files: &[String], root: &Path, unique: bool) -> bool {
+    let out = styles(std::io::stdout().is_terminal());
+    let err = styles(std::io::stderr().is_terminal());
+
+    let config = Arc::new(
+        CodebookConfigFile::load(Some(root))
+            .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"), err)),
+    );
+
+    print_config_source(&config, err);
+
+    let codebook = Codebook::new(config.clone())
+        .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"), err));
+
+    let resolved = resolve_paths(files);
+    if resolved.is_empty() {
+        fatal("no files matched the given patterns", err);
+    }
+
+    let mut seen_words: HashSet<String> = HashSet::new();
+    let mut total_errors = 0usize;
+    let mut files_with_errors = 0usize;
+
+    for path in &resolved {
+        if config.should_ignore_path(path) {
+            continue;
+        }
+
+        let text = match std::fs::read_to_string(path) {
+            Ok(t) => t,
+            Err(e) => {
+                eprintln!(
+                    "{}error:{} {}: {e}",
+                    err.bold_red,
+                    err.reset,
+                    path.display()
+                );
+                continue;
+            }
+        };
+
+        let mut locations = codebook.spell_check(&text, None, path.to_str());
+        locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
+
+        let hits: Vec<(usize, usize, &str)> = locations
+            .iter()
+            .flat_map(|wl| wl.locations.iter().map(move |r| (wl, r)))
+            .filter_map(|(wl, range)| {
+                if unique && !seen_words.insert(wl.word.to_lowercase()) {
+                    return None;
+                }
+                let (line, col) = byte_offset_to_line_col(&text, range.start_byte);
+                Some((line, col, wl.word.as_str()))
+            })
+            .collect();
+
+        if hits.is_empty() {
+            continue;
+        }
+
+        let raw = path.to_string_lossy();
+        let display = raw.strip_prefix("./").unwrap_or(&raw);
+        let pad_len = hits
+            .iter()
+            .map(|(l, c, _)| format!("{l}:{c}").len())
+            .max()
+            .unwrap_or(0);
+
+        println!("{}{display}{}", out.bold, out.reset);
+        for (line, col, word) in &hits {
+            let loc = format!("{line}:{col}");
+            println!(
+                "  {}{display}{}:{}{loc}{}{pad}  {}{}{}",
+                out.dim,
+                out.reset,
+                out.yellow,
+                out.reset,
+                out.bold_red,
+                word,
+                out.reset,
+                pad = " ".repeat(pad_len - loc.len()),
+            );
+        }
+        println!();
+
+        total_errors += hits.len();
+        files_with_errors += 1;
+    }
+
+    if total_errors > 0 {
+        let _ = std::io::stdout().flush();
+        let unique_label = if unique { "unique " } else { "" };
+        eprintln!(
+            "Found {}{total_errors}{} {unique_label}spelling error(s) in {}{files_with_errors}{} file(s).",
+            err.bold, err.reset, err.bold, err.reset,
+        );
+    }
+
+    total_errors > 0
+}
+
+fn print_config_source(config: &CodebookConfigFile, s: &Styles) {
+    let cwd = std::env::current_dir().unwrap_or_default();
+    match (
+        config.project_config_path().filter(|p| p.is_file()),
+        config.global_config_path().filter(|p| p.is_file()),
+    ) {
+        (Some(p), _) => eprintln!(
+            "using config {}{}{}",
+            s.dim,
+            p.strip_prefix(&cwd).unwrap_or(&p).display(),
+            s.reset
+        ),
+        (None, Some(g)) => eprintln!(
+            "using global config {}{}{}",
+            s.dim,
+            g.strip_prefix(&cwd).unwrap_or(&g).display(),
+            s.reset
+        ),
+        (None, None) => eprintln!("using default config"),
+    }
+}
+
+fn resolve_paths(patterns: &[String]) -> Vec<PathBuf> {
+    let mut paths = Vec::new();
+    for pattern in patterns {
+        let p = PathBuf::from(pattern);
+        if p.is_dir() {
+            collect_dir(&p, &mut paths);
+        } else {
+            match glob::glob(pattern) {
+                Ok(entries) => {
+                    let mut matched = false;
+                    for entry in entries.flatten() {
+                        if entry.is_file() {
+                            paths.push(entry);
+                            matched = true;
+                        } else if entry.is_dir() {
+                            collect_dir(&entry, &mut paths);
+                            matched = true;
+                        }
+                    }
+                    if !matched {
+                        eprintln!("codebook: no match for '{pattern}'");
+                    }
+                }
+                Err(e) => eprintln!("codebook: invalid pattern '{pattern}': {e}"),
+            }
+        }
+    }
+    paths.sort();
+    paths.dedup();
+    paths
+}
+
+fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>) {
+    walkdir::WalkDir::new(dir)
+        .follow_links(false)
+        .into_iter()
+        .flatten()
+        .filter(|e| e.file_type().is_file())
+        .for_each(|e| out.push(e.into_path()));
+}
+
+fn byte_offset_to_line_col(text: &str, byte_offset: usize) -> (usize, usize) {
+    let offset = byte_offset.min(text.len());
+    let before = &text[..offset];
+    let line = before.bytes().filter(|&b| b == b'\n').count() + 1;
+    let col = before.rfind('\n').map(|p| offset - p).unwrap_or(offset + 1);
+    (line, col)
+}
diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs
index bc055ee2..c6898c49 100644
--- a/crates/codebook-lsp/src/main.rs
+++ b/crates/codebook-lsp/src/main.rs
@@ -1,5 +1,6 @@
 mod file_cache;
 mod init_options;
+mod lint;
 mod lsp;
 mod lsp_logger;
 
@@ -30,6 +31,15 @@ enum Commands {
     Serve {},
     /// Remove server cache
     Clean {},
+    /// Check files for spelling errors
+    Lint {
+        /// Files or glob patterns to spell-check
+        #[arg(required = true)]
+        files: Vec<String>,
+        /// Only report each misspelled word once, ignoring duplicates across files
+        #[arg(short = 'u', long)]
+        unique: bool,
+    },
 }
 
 #[tokio::main(flavor = "current_thread")]
@@ -58,6 +68,11 @@ async fn main() {
             info!("Cleaning: {:?}", config.cache_dir);
             config.clean_cache()
         }
+        Some(Commands::Lint { files, unique }) => {
+            if lint::run_lint(files, root, *unique) {
+                std::process::exit(1);
+            }
+        }
         None => {}
     }
 }

From 3a4b0d8cedbf5c7ee532d3a597609637dd53cdf4 Mon Sep 17 00:00:00 2001
From: niekdomi <dom.buehler@outlook.com>
Date: Wed, 4 Mar 2026 18:56:01 +0100
Subject: [PATCH 2/8] improved the cli integration

Replaces the manual ansi escape approach with owo-colors, which handles
NO_COLOR and windows automatically. The code is also way better in
general than my previous approach...
---
 Cargo.lock                      |  36 +++++
 Cargo.toml                      |   1 +
 crates/codebook-lsp/Cargo.toml  |   1 +
 crates/codebook-lsp/src/lint.rs | 261 ++++++++++++++++----------------
 crates/codebook-lsp/src/main.rs |  16 +-
 5 files changed, 185 insertions(+), 130 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 946d94e4..6db3a140 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -430,6 +430,7 @@ dependencies = [
  "glob",
  "log",
  "lru",
+ "owo-colors",
  "serde",
  "serde_json",
  "streaming-iterator",
@@ -1374,6 +1375,12 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "is_ci"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45"
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.2"
@@ -1705,6 +1712,16 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
 
+[[package]]
+name = "owo-colors"
+version = "4.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d"
+dependencies = [
+ "supports-color 2.1.0",
+ "supports-color 3.0.2",
+]
+
 [[package]]
 name = "parking"
 version = "2.2.1"
@@ -2451,6 +2468,25 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
+[[package]]
+name = "supports-color"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89"
+dependencies = [
+ "is-terminal",
+ "is_ci",
+]
+
+[[package]]
+name = "supports-color"
+version = "3.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6"
+dependencies = [
+ "is_ci",
+]
+
 [[package]]
 name = "symbolic-common"
 version = "12.17.2"
diff --git a/Cargo.toml b/Cargo.toml
index 09220040..0ad3137a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ env_logger = "0.11.6"
 fs2 = "0.4"
 git2 = "0.20.0"
 glob = "0.3"
+owo-colors = { version = "4", features = ["supports-colors"] }
 httpmock = "<0.9.0"
 lazy_static = "1.5.0"
 log = "0.4.22"
diff --git a/crates/codebook-lsp/Cargo.toml b/crates/codebook-lsp/Cargo.toml
index 3398f960..114e3fd7 100644
--- a/crates/codebook-lsp/Cargo.toml
+++ b/crates/codebook-lsp/Cargo.toml
@@ -29,6 +29,7 @@ fs2.workspace = true
 log.workspace = true
 lru.workspace = true
 glob.workspace = true
+owo-colors.workspace = true
 walkdir.workspace = true
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
index 3ecd5ab4..12c9e1a6 100644
--- a/crates/codebook-lsp/src/lint.rs
+++ b/crates/codebook-lsp/src/lint.rs
@@ -1,170 +1,185 @@
 use codebook::Codebook;
-use codebook_config::{CodebookConfig, CodebookConfigFile};
+use codebook_config::CodebookConfigFile;
+use owo_colors::{OwoColorize, Stream, Style};
 use std::collections::HashSet;
-use std::io::{IsTerminal, Write};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
-struct Styles {
-    bold: &'static str,
-    dim: &'static str,
-    yellow: &'static str,
-    bold_red: &'static str,
-    reset: &'static str,
-}
-
-const STYLES_ON: Styles = Styles {
-    bold: "\x1b[1m",
-    dim: "\x1b[2m",
-    yellow: "\x1b[33m",
-    bold_red: "\x1b[1;31m",
-    reset: "\x1b[0m",
-};
-
-const STYLES_OFF: Styles = Styles {
-    bold: "",
-    dim: "",
-    yellow: "",
-    bold_red: "",
-    reset: "",
-};
-
-fn styles(is_terminal: bool) -> &'static Styles {
-    if is_terminal && std::env::var_os("NO_COLOR").is_none() {
-        &STYLES_ON
-    } else {
-        &STYLES_OFF
-    }
-}
+const BOLD: Style = Style::new().bold();
+const DIM: Style = Style::new().dimmed();
+const YELLOW: Style = Style::new().yellow();
+const BOLD_RED: Style = Style::new().bold().red();
 
-fn fatal(msg: impl std::fmt::Display, s: &Styles) -> ! {
-    eprintln!("{}error:{} {msg}", s.bold_red, s.reset);
+fn fatal(msg: impl std::fmt::Display) -> ! {
+    eprintln!(
+        "{} {msg}",
+        "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
+    );
     std::process::exit(2);
 }
 
-pub fn run_lint(files: &[String], root: &Path, unique: bool) -> bool {
-    let out = styles(std::io::stdout().is_terminal());
-    let err = styles(std::io::stderr().is_terminal());
-
+/// Returns `true` if any spelling errors were found.
+pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool {
     let config = Arc::new(
         CodebookConfigFile::load(Some(root))
-            .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"), err)),
+            .unwrap_or_else(|e| fatal(format!("failed to load config: {e}"))),
     );
 
-    print_config_source(&config, err);
+    print_config_source(&config);
+    eprintln!();
 
     let codebook = Codebook::new(config.clone())
-        .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}"), err));
+        .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}")));
 
     let resolved = resolve_paths(files);
-    if resolved.is_empty() {
-        fatal("no files matched the given patterns", err);
-    }
 
     let mut seen_words: HashSet<String> = HashSet::new();
     let mut total_errors = 0usize;
     let mut files_with_errors = 0usize;
 
     for path in &resolved {
-        if config.should_ignore_path(path) {
-            continue;
+        let error_count = check_file(path, &codebook, &mut seen_words, unique, suggest);
+        if error_count > 0 {
+            total_errors += error_count;
+            files_with_errors += 1;
         }
+    }
 
-        let text = match std::fs::read_to_string(path) {
-            Ok(t) => t,
-            Err(e) => {
-                eprintln!(
-                    "{}error:{} {}: {e}",
-                    err.bold_red,
-                    err.reset,
-                    path.display()
-                );
-                continue;
-            }
-        };
+    if total_errors > 0 {
+        let unique_label = if unique { "unique " } else { "" };
+        eprintln!(
+            "Found {} {unique_label}spelling error(s) in {} file(s).",
+            total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
+            files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
+        );
+    }
 
-        let mut locations = codebook.spell_check(&text, None, path.to_str());
-        locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
+    total_errors > 0
+}
 
-        let hits: Vec<(usize, usize, &str)> = locations
-            .iter()
-            .flat_map(|wl| wl.locations.iter().map(move |r| (wl, r)))
-            .filter_map(|(wl, range)| {
-                if unique && !seen_words.insert(wl.word.to_lowercase()) {
-                    return None;
-                }
-                let (line, col) = byte_offset_to_line_col(&text, range.start_byte);
-                Some((line, col, wl.word.as_str()))
-            })
-            .collect();
+/// Spell-checks a single file and prints any diagnostics to stdout.
+///
+/// Returns the number of errors found (0 if the file was clean or unreadable).
+fn check_file(
+    path: &Path,
+    codebook: &Codebook,
+    seen_words: &mut HashSet<String>,
+    unique: bool,
+    suggest: bool,
+) -> usize {
+    let text = match std::fs::read_to_string(path) {
+        Ok(t) => t,
+        Err(e) => {
+            eprintln!(
+                "{} {}: {e}",
+                "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)),
+                path.display()
+            );
+            return 0;
+        }
+    };
+
+    let raw = path.to_string_lossy();
+    let display = raw.strip_prefix("./").unwrap_or(&raw);
+
+    let mut locations = codebook.spell_check(&text, None, path.to_str());
+    // Sort by first occurrence in the file.
+    locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
 
-        if hits.is_empty() {
+    // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment.
+    // unique check is per-word (outer loop) so all ranges of a word are included or skipped together.
+    let mut hits: Vec<(String, &str, Option<Vec<String>>)> = Vec::new();
+    for wl in &locations {
+        if unique && !seen_words.insert(wl.word.to_lowercase()) {
             continue;
         }
 
-        let raw = path.to_string_lossy();
-        let display = raw.strip_prefix("./").unwrap_or(&raw);
-        let pad_len = hits
-            .iter()
-            .map(|(l, c, _)| format!("{l}:{c}").len())
-            .max()
-            .unwrap_or(0);
-
-        println!("{}{display}{}", out.bold, out.reset);
-        for (line, col, word) in &hits {
-            let loc = format!("{line}:{col}");
-            println!(
-                "  {}{display}{}:{}{loc}{}{pad}  {}{}{}",
-                out.dim,
-                out.reset,
-                out.yellow,
-                out.reset,
-                out.bold_red,
-                word,
-                out.reset,
-                pad = " ".repeat(pad_len - loc.len()),
-            );
+        let suggestions = if suggest {
+            codebook.get_suggestions(wl.word.as_str())
+        } else {
+            None
+        };
+
+        for range in &wl.locations {
+            let before = &text[..range.start_byte.min(text.len())];
+            let line = before.bytes().filter(|&b| b == b'\n').count() + 1;
+            let col = before
+                .rfind('\n')
+                .map(|p| before.len() - p)
+                .unwrap_or(before.len() + 1);
+            hits.push((
+                format!("{line}:{col}"),
+                wl.word.as_str(),
+                suggestions.clone(),
+            ));
         }
-        println!();
+    }
 
-        total_errors += hits.len();
-        files_with_errors += 1;
+    if hits.is_empty() {
+        return 0;
     }
 
-    if total_errors > 0 {
-        let _ = std::io::stdout().flush();
-        let unique_label = if unique { "unique " } else { "" };
-        eprintln!(
-            "Found {}{total_errors}{} {unique_label}spelling error(s) in {}{files_with_errors}{} file(s).",
-            err.bold, err.reset, err.bold, err.reset,
+    let pad_len = hits
+        .iter()
+        .map(|(linecol, _, _)| linecol.len())
+        .max()
+        .unwrap_or(0);
+
+    println!(
+        "{}",
+        display.if_supports_color(Stream::Stdout, |t| t.style(BOLD))
+    );
+    for (linecol, word, suggestions) in &hits {
+        let pad = " ".repeat(pad_len - linecol.len());
+        print!(
+            "  {}:{}{}  {}",
+            display.if_supports_color(Stream::Stdout, |t| t.style(DIM)),
+            linecol.if_supports_color(Stream::Stdout, |t| t.style(YELLOW)),
+            pad,
+            word.if_supports_color(Stream::Stdout, |t| t.style(BOLD_RED)),
         );
+        if let Some(suggestions) = suggestions {
+            println!(
+                "  {}",
+                format!("→ {}", suggestions.join(", "))
+                    .if_supports_color(Stream::Stdout, |t| t.style(DIM)),
+            );
+        } else {
+            println!();
+        }
     }
+    println!();
 
-    total_errors > 0
+    hits.len()
 }
 
-fn print_config_source(config: &CodebookConfigFile, s: &Styles) {
+/// Prints which config file is being used, or notes that the default is active.
+fn print_config_source(config: &CodebookConfigFile) {
     let cwd = std::env::current_dir().unwrap_or_default();
     match (
         config.project_config_path().filter(|p| p.is_file()),
         config.global_config_path().filter(|p| p.is_file()),
     ) {
-        (Some(p), _) => eprintln!(
-            "using config {}{}{}",
-            s.dim,
-            p.strip_prefix(&cwd).unwrap_or(&p).display(),
-            s.reset
-        ),
-        (None, Some(g)) => eprintln!(
-            "using global config {}{}{}",
-            s.dim,
-            g.strip_prefix(&cwd).unwrap_or(&g).display(),
-            s.reset
-        ),
-        (None, None) => eprintln!("using default config"),
+        (Some(p), _) => {
+            let path = p.strip_prefix(&cwd).unwrap_or(&p).display().to_string();
+            eprintln!(
+                "using config {}",
+                path.if_supports_color(Stream::Stderr, |t| t.style(DIM))
+            );
+        }
+        (None, Some(g)) => {
+            let path = g.strip_prefix(&cwd).unwrap_or(&g).display().to_string();
+            eprintln!(
+                "using global config {}",
+                path.if_supports_color(Stream::Stderr, |t| t.style(DIM))
+            );
+        }
+        (None, None) => eprintln!("No config found, using default config"),
     }
 }
 
+/// Resolves a mix of file paths, directories, and glob patterns into a sorted,
+/// deduplicated list of file paths.
 fn resolve_paths(patterns: &[String]) -> Vec<PathBuf> {
     let mut paths = Vec::new();
     for pattern in patterns {
@@ -205,11 +220,3 @@ fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>) {
         .filter(|e| e.file_type().is_file())
         .for_each(|e| out.push(e.into_path()));
 }
-
-fn byte_offset_to_line_col(text: &str, byte_offset: usize) -> (usize, usize) {
-    let offset = byte_offset.min(text.len());
-    let before = &text[..offset];
-    let line = before.bytes().filter(|&b| b == b'\n').count() + 1;
-    let col = before.rfind('\n').map(|p| offset - p).unwrap_or(offset + 1);
-    (line, col)
-}
diff --git a/crates/codebook-lsp/src/main.rs b/crates/codebook-lsp/src/main.rs
index c6898c49..449c174a 100644
--- a/crates/codebook-lsp/src/main.rs
+++ b/crates/codebook-lsp/src/main.rs
@@ -39,15 +39,21 @@ enum Commands {
         /// Only report each misspelled word once, ignoring duplicates across files
         #[arg(short = 'u', long)]
         unique: bool,
+        /// Show spelling suggestions for each misspelled word
+        #[arg(short = 's', long)]
+        suggest: bool,
     },
 }
 
 #[tokio::main(flavor = "current_thread")]
 async fn main() {
     // Initialize logger early with stderr output and buffering
-    // Default to INFO level, will be adjusted when LSP client connects
+    // Default to INFO for LSP, WARN for lint (to suppress LSP-oriented noise)
+    let is_lint = std::env::args().nth(1).as_deref() == Some("lint");
     let log_level = match env::var("RUST_LOG").as_deref() {
         Ok("debug") => LevelFilter::Debug,
+        Ok("info") => LevelFilter::Info,
+        _ if is_lint => LevelFilter::Warn,
         _ => LevelFilter::Info,
     };
     LspLogger::init_early(log_level).expect("Failed to initialize early logger");
@@ -68,8 +74,12 @@ async fn main() {
             info!("Cleaning: {:?}", config.cache_dir);
             config.clean_cache()
         }
-        Some(Commands::Lint { files, unique }) => {
-            if lint::run_lint(files, root, *unique) {
+        Some(Commands::Lint {
+            files,
+            unique,
+            suggest,
+        }) => {
+            if lint::run_lint(files, root, *unique, *suggest) {
                 std::process::exit(1);
             }
         }

From e21a221079acb9b7e282b6c0a079ec7a05721d5a Mon Sep 17 00:00:00 2001
From: niekdomi <dom.buehler@outlook.com>
Date: Wed, 4 Mar 2026 19:21:50 +0100
Subject: [PATCH 3/8] improve

---
 crates/codebook-lsp/src/lint.rs | 59 +++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
index 12c9e1a6..565e4ae2 100644
--- a/crates/codebook-lsp/src/lint.rs
+++ b/crates/codebook-lsp/src/lint.rs
@@ -18,6 +18,22 @@ fn fatal(msg: impl std::fmt::Display) -> ! {
     std::process::exit(2);
 }
 
+/// Computes a workspace-relative path string for a given file. Falls back to
+/// the absolute path if the file is outside the workspace or canonicalization fails.
+fn relative_to_root(root: &Path, path: &Path) -> String {
+    let root_canonical = match root.canonicalize() {
+        Ok(r) => r,
+        Err(_) => return path.to_string_lossy().to_string(),
+    };
+    match path.canonicalize() {
+        Ok(canon) => match canon.strip_prefix(&root_canonical) {
+            Ok(rel) => rel.to_string_lossy().to_string(),
+            Err(_) => path.to_string_lossy().to_string(),
+        },
+        Err(_) => path.to_string_lossy().to_string(),
+    }
+}
+
 /// Returns `true` if any spelling errors were found.
 pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool {
     let config = Arc::new(
@@ -31,28 +47,27 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
     let codebook = Codebook::new(config.clone())
         .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}")));
 
-    let resolved = resolve_paths(files);
+    let resolved = resolve_paths(files, root);
 
     let mut seen_words: HashSet<String> = HashSet::new();
     let mut total_errors = 0usize;
     let mut files_with_errors = 0usize;
 
     for path in &resolved {
-        let error_count = check_file(path, &codebook, &mut seen_words, unique, suggest);
+        let relative = relative_to_root(root, path);
+        let error_count = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest);
         if error_count > 0 {
             total_errors += error_count;
             files_with_errors += 1;
         }
     }
 
-    if total_errors > 0 {
-        let unique_label = if unique { "unique " } else { "" };
-        eprintln!(
-            "Found {} {unique_label}spelling error(s) in {} file(s).",
-            total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
-            files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
-        );
-    }
+    let unique_label = if unique { "unique " } else { "" };
+    eprintln!(
+        "Found {} {unique_label}spelling error(s) in {} file(s).",
+        total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
+        files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
+    );
 
     total_errors > 0
 }
@@ -60,8 +75,10 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
 /// Spell-checks a single file and prints any diagnostics to stdout.
 ///
 /// Returns the number of errors found (0 if the file was clean or unreadable).
+/// `relative` is the workspace-relative path used for display and ignore matching.
 fn check_file(
     path: &Path,
+    relative: &str,
     codebook: &Codebook,
     seen_words: &mut HashSet<String>,
     unique: bool,
@@ -79,10 +96,9 @@ fn check_file(
         }
     };
 
-    let raw = path.to_string_lossy();
-    let display = raw.strip_prefix("./").unwrap_or(&raw);
+    let display = relative.strip_prefix("./").unwrap_or(relative);
 
-    let mut locations = codebook.spell_check(&text, None, path.to_str());
+    let mut locations = codebook.spell_check(&text, None, Some(relative));
     // Sort by first occurrence in the file.
     locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
 
@@ -100,7 +116,14 @@ fn check_file(
             None
         };
 
-        for range in &wl.locations {
+        // In unique mode only emit the first occurrence of each word
+        let ranges = if unique {
+            &wl.locations[..1]
+        } else {
+            &wl.locations[..]
+        };
+
+        for range in ranges {
             let before = &text[..range.start_byte.min(text.len())];
             let line = before.bytes().filter(|&b| b == b'\n').count() + 1;
             let col = before
@@ -179,15 +202,17 @@ fn print_config_source(config: &CodebookConfigFile) {
 }
 
 /// Resolves a mix of file paths, directories, and glob patterns into a sorted,
-/// deduplicated list of file paths.
-fn resolve_paths(patterns: &[String]) -> Vec<PathBuf> {
+/// deduplicated list of file paths. Non-absolute patterns are resolved relative to root.
+fn resolve_paths(patterns: &[String], root: &Path) -> Vec<PathBuf> {
     let mut paths = Vec::new();
     for pattern in patterns {
         let p = PathBuf::from(pattern);
+        let p = if p.is_absolute() { p } else { root.join(&p) };
         if p.is_dir() {
             collect_dir(&p, &mut paths);
         } else {
-            match glob::glob(pattern) {
+            let pattern = p.to_string_lossy();
+            match glob::glob(&pattern) {
                 Ok(entries) => {
                     let mut matched = false;
                     for entry in entries.flatten() {

From 87f77c8cef8bd09ecb226ee1684ed03ab4aa508d Mon Sep 17 00:00:00 2001
From: niekdomi <152265924+niekdomi@users.noreply.github.com>
Date: Sat, 21 Mar 2026 15:09:00 +0100
Subject: [PATCH 4/8] apply review

---
 crates/codebook-lsp/src/lint.rs | 145 +++++++++++++++++++++++---------
 1 file changed, 104 insertions(+), 41 deletions(-)

diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
index 565e4ae2..9a39560b 100644
--- a/crates/codebook-lsp/src/lint.rs
+++ b/crates/codebook-lsp/src/lint.rs
@@ -4,6 +4,7 @@ use owo_colors::{OwoColorize, Stream, Style};
 use std::collections::HashSet;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
+use string_offsets::{AllConfig, StringOffsets};
 
 const BOLD: Style = Style::new().bold();
 const DIM: Style = Style::new().dimmed();
@@ -20,21 +21,24 @@ fn fatal(msg: impl std::fmt::Display) -> ! {
 
 /// Computes a workspace-relative path string for a given file. Falls back to
 /// the absolute path if the file is outside the workspace or canonicalization fails.
-fn relative_to_root(root: &Path, path: &Path) -> String {
-    let root_canonical = match root.canonicalize() {
-        Ok(r) => r,
-        Err(_) => return path.to_string_lossy().to_string(),
+/// `root_canonical` should be the already-canonicalized workspace root.
+fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String {
+    let Some(root_canonical) = root_canonical else {
+        return path.to_string_lossy().into_owned();
     };
     match path.canonicalize() {
-        Ok(canon) => match canon.strip_prefix(&root_canonical) {
-            Ok(rel) => rel.to_string_lossy().to_string(),
-            Err(_) => path.to_string_lossy().to_string(),
+        Ok(canon) => match canon.strip_prefix(root_canonical) {
+            Ok(rel) => rel.to_string_lossy().into_owned(),
+            Err(_) => path.to_string_lossy().into_owned(),
         },
-        Err(_) => path.to_string_lossy().to_string(),
+        Err(_) => path.to_string_lossy().into_owned(),
     }
 }
 
 /// Returns `true` if any spelling errors were found.
+///
+/// Exits with code 2 if infrastructure failures occurred (unreadable files,
+/// directory errors, unmatched or invalid patterns).
 pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> bool {
     let config = Arc::new(
         CodebookConfigFile::load(Some(root))
@@ -47,15 +51,27 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
     let codebook = Codebook::new(config.clone())
         .unwrap_or_else(|e| fatal(format!("failed to initialize: {e}")));
 
-    let resolved = resolve_paths(files, root);
+    // Canonicalize the root once here rather than once per file.
+    let root_canonical = root.canonicalize().ok();
+
+    let mut had_failure = false;
+    let resolved = resolve_paths(files, root, &mut had_failure);
 
     let mut seen_words: HashSet<String> = HashSet::new();
     let mut total_errors = 0usize;
     let mut files_with_errors = 0usize;
 
     for path in &resolved {
-        let relative = relative_to_root(root, path);
-        let error_count = check_file(path, &relative, &codebook, &mut seen_words, unique, suggest);
+        let relative = relative_to_root(root_canonical.as_deref(), path);
+        let error_count = check_file(
+            path,
+            &relative,
+            &codebook,
+            &mut seen_words,
+            unique,
+            suggest,
+            &mut had_failure,
+        );
         if error_count > 0 {
             total_errors += error_count;
             files_with_errors += 1;
@@ -69,13 +85,18 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
         files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
     );
 
+    if had_failure {
+        std::process::exit(2);
+    }
+
     total_errors > 0
 }
 
 /// Spell-checks a single file and prints any diagnostics to stdout.
 ///
-/// Returns the number of errors found (0 if the file was clean or unreadable).
+/// Returns the number of spelling errors found (0 if the file was clean).
 /// `relative` is the workspace-relative path used for display and ignore matching.
+/// Sets `*had_failure = true` on I/O errors so the caller can exit non-zero.
 fn check_file(
     path: &Path,
     relative: &str,
@@ -83,6 +104,7 @@ fn check_file(
     seen_words: &mut HashSet<String>,
     unique: bool,
     suggest: bool,
+    had_failure: &mut bool,
 ) -> usize {
     let text = match std::fs::read_to_string(path) {
         Ok(t) => t,
@@ -92,6 +114,7 @@ fn check_file(
                 "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)),
                 path.display()
             );
+            *had_failure = true;
             return 0;
         }
     };
@@ -102,8 +125,13 @@ fn check_file(
     // Sort by first occurrence in the file.
     locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
 
+    // Build the offset table once per file – O(n) construction, O(1) per lookup –
+    // and gives Unicode character columns rather than raw byte offsets.
+    let offsets = StringOffsets::<AllConfig>::new(&text);
+
     // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment.
-    // unique check is per-word (outer loop) so all ranges of a word are included or skipped together.
+    // The unique check is per-word (outer loop) so all ranges of a word are included
+    // or skipped together.
     let mut hits: Vec<(String, &str, Option<Vec<String>>)> = Vec::new();
     for wl in &locations {
         if unique && !seen_words.insert(wl.word.to_lowercase()) {
@@ -116,7 +144,7 @@ fn check_file(
             None
         };
 
-        // In unique mode only emit the first occurrence of each word
+        // In unique mode only emit the first occurrence of each word.
         let ranges = if unique {
             &wl.locations[..1]
         } else {
@@ -124,12 +152,10 @@ fn check_file(
         };
 
         for range in ranges {
-            let before = &text[..range.start_byte.min(text.len())];
-            let line = before.bytes().filter(|&b| b == b'\n').count() + 1;
-            let col = before
-                .rfind('\n')
-                .map(|p| before.len() - p)
-                .unwrap_or(before.len() + 1);
+            // utf8_to_char_pos returns 0-based line and Unicode-char column.
+            let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len()));
+            let line = pos.line + 1; // 0-based → 1-based
+            let col = pos.col + 1; // 0-based → 1-based
             hits.push((
                 format!("{line}:{col}"),
                 wl.word.as_str(),
@@ -202,33 +228,57 @@ fn print_config_source(config: &CodebookConfigFile) {
 }
 
 /// Resolves a mix of file paths, directories, and glob patterns into a sorted,
-/// deduplicated list of file paths. Non-absolute patterns are resolved relative to root.
-fn resolve_paths(patterns: &[String], root: &Path) -> Vec<PathBuf> {
+/// deduplicated list of file paths. Non-absolute patterns are resolved relative to `root`.
+///
+/// Sets `*had_failure = true` for unmatched patterns, invalid globs, or glob I/O errors.
+fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Vec<PathBuf> {
     let mut paths = Vec::new();
     for pattern in patterns {
         let p = PathBuf::from(pattern);
         let p = if p.is_absolute() { p } else { root.join(&p) };
         if p.is_dir() {
-            collect_dir(&p, &mut paths);
+            collect_dir(&p, &mut paths, had_failure);
         } else {
-            let pattern = p.to_string_lossy();
-            match glob::glob(&pattern) {
+            let pattern_str = p.to_string_lossy();
+            match glob::glob(&pattern_str) {
                 Ok(entries) => {
                     let mut matched = false;
-                    for entry in entries.flatten() {
-                        if entry.is_file() {
-                            paths.push(entry);
-                            matched = true;
-                        } else if entry.is_dir() {
-                            collect_dir(&entry, &mut paths);
-                            matched = true;
+                    for entry in entries {
+                        match entry {
+                            Ok(e) => {
+                                if e.is_file() {
+                                    paths.push(e);
+                                    matched = true;
+                                } else if e.is_dir() {
+                                    collect_dir(&e, &mut paths, had_failure);
+                                    matched = true;
+                                }
+                            }
+                            Err(e) => {
+                                eprintln!(
+                                    "{} failed to read glob entry: {e}",
+                                    "error:"
+                                        .if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
+                                );
+                                *had_failure = true;
+                            }
                         }
                     }
                     if !matched {
-                        eprintln!("codebook: no match for '{pattern}'");
+                        eprintln!(
+                            "{} no match for '{pattern_str}'",
+                            "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
+                        );
+                        *had_failure = true;
                     }
                 }
-                Err(e) => eprintln!("codebook: invalid pattern '{pattern}': {e}"),
+                Err(e) => {
+                    eprintln!(
+                        "{} invalid pattern '{pattern_str}': {e}",
+                        "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
+                    );
+                    *had_failure = true;
+                }
             }
         }
     }
@@ -237,11 +287,24 @@ fn resolve_paths(patterns: &[String], root: &Path) -> Vec<PathBuf> {
     paths
 }
 
-fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>) {
-    walkdir::WalkDir::new(dir)
-        .follow_links(false)
-        .into_iter()
-        .flatten()
-        .filter(|e| e.file_type().is_file())
-        .for_each(|e| out.push(e.into_path()));
+/// Recursively collects all files under `dir` into `out`.
+/// Sets `*had_failure = true` for any directory-entry I/O error (e.g. permission denied).
+fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>, had_failure: &mut bool) {
+    for entry in walkdir::WalkDir::new(dir).follow_links(false).into_iter() {
+        match entry {
+            Ok(e) => {
+                if e.file_type().is_file() {
+                    out.push(e.into_path());
+                }
+            }
+            Err(err) => {
+                eprintln!(
+                    "{} failed to read directory entry under '{}': {err}",
+                    "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)),
+                    dir.display()
+                );
+                *had_failure = true;
+            }
+        }
+    }
 }

From e88087c21cbebf2bbf4d7a9e1c1c665460a72bee Mon Sep 17 00:00:00 2001
From: niekdomi <152265924+niekdomi@users.noreply.github.com>
Date: Sat, 21 Mar 2026 16:25:19 +0100
Subject: [PATCH 5/8] test

---
 crates/codebook-lsp/src/lint.rs | 616 ++++++++++++++++++++++++++------
 1 file changed, 498 insertions(+), 118 deletions(-)

diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
index 9a39560b..9c2faadd 100644
--- a/crates/codebook-lsp/src/lint.rs
+++ b/crates/codebook-lsp/src/lint.rs
@@ -11,11 +11,18 @@ const DIM: Style = Style::new().dimmed();
 const YELLOW: Style = Style::new().yellow();
 const BOLD_RED: Style = Style::new().bold().red();
 
+macro_rules! err {
+    ($($arg:tt)*) => {
+        eprintln!(
+            "{} {}",
+            "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)),
+            format_args!($($arg)*)
+        )
+    };
+}
+
 fn fatal(msg: impl std::fmt::Display) -> ! {
-    eprintln!(
-        "{} {msg}",
-        "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
-    );
+    err!("{msg}");
     std::process::exit(2);
 }
 
@@ -23,16 +30,15 @@ fn fatal(msg: impl std::fmt::Display) -> ! {
 /// the absolute path if the file is outside the workspace or canonicalization fails.
 /// `root_canonical` should be the already-canonicalized workspace root.
 fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String {
-    let Some(root_canonical) = root_canonical else {
-        return path.to_string_lossy().into_owned();
-    };
-    match path.canonicalize() {
-        Ok(canon) => match canon.strip_prefix(root_canonical) {
-            Ok(rel) => rel.to_string_lossy().into_owned(),
-            Err(_) => path.to_string_lossy().into_owned(),
-        },
-        Err(_) => path.to_string_lossy().into_owned(),
-    }
+    root_canonical
+        .and_then(|root| {
+            let canon = path.canonicalize().ok()?;
+            canon
+                .strip_prefix(root)
+                .ok()
+                .map(|rel| rel.to_string_lossy().into_owned())
+        })
+        .unwrap_or_else(|| path.to_string_lossy().into_owned())
 }
 
 /// Returns `true` if any spelling errors were found.
@@ -54,8 +60,7 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
     // Canonicalize the root once here rather than once per file.
     let root_canonical = root.canonicalize().ok();
 
-    let mut had_failure = false;
-    let resolved = resolve_paths(files, root, &mut had_failure);
+    let (resolved, mut had_failure) = resolve_paths(files, root);
 
     let mut seen_words: HashSet<String> = HashSet::new();
     let mut total_errors = 0usize;
@@ -63,17 +68,11 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
 
     for path in &resolved {
         let relative = relative_to_root(root_canonical.as_deref(), path);
-        let error_count = check_file(
-            path,
-            &relative,
-            &codebook,
-            &mut seen_words,
-            unique,
-            suggest,
-            &mut had_failure,
-        );
-        if error_count > 0 {
-            total_errors += error_count;
+        let (errors, file_failure) =
+            check_file(path, &relative, &codebook, &mut seen_words, unique, suggest);
+        had_failure |= file_failure;
+        if errors > 0 {
+            total_errors += errors;
             files_with_errors += 1;
         }
     }
@@ -94,9 +93,9 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
 
 /// Spell-checks a single file and prints any diagnostics to stdout.
 ///
-/// Returns the number of spelling errors found (0 if the file was clean).
+/// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was
+/// clean; `had_io_error` is true when the file could not be read.
 /// `relative` is the workspace-relative path used for display and ignore matching.
-/// Sets `*had_failure = true` on I/O errors so the caller can exit non-zero.
 fn check_file(
     path: &Path,
     relative: &str,
@@ -104,41 +103,35 @@ fn check_file(
     seen_words: &mut HashSet<String>,
     unique: bool,
     suggest: bool,
-    had_failure: &mut bool,
-) -> usize {
+) -> (usize, bool) {
     let text = match std::fs::read_to_string(path) {
         Ok(t) => t,
         Err(e) => {
-            eprintln!(
-                "{} {}: {e}",
-                "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)),
-                path.display()
-            );
-            *had_failure = true;
-            return 0;
+            err!("{}: {e}", path.display());
+            return (0, true);
         }
     };
 
     let display = relative.strip_prefix("./").unwrap_or(relative);
 
-    let mut locations = codebook.spell_check(&text, None, Some(relative));
-    // Sort by first occurrence in the file.
-    locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
-
     // Build the offset table once per file – O(n) construction, O(1) per lookup –
     // and gives Unicode character columns rather than raw byte offsets.
     let offsets = StringOffsets::<AllConfig>::new(&text);
 
-    // Collect (linecol, word, suggestions) first so we can compute pad_len for alignment.
-    // The unique check is per-word (outer loop) so all ranges of a word are included
-    // or skipped together.
+    let mut locations = codebook.spell_check(&text, None, Some(relative));
+    // Sort by first occurrence in the file.
+    locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
+
+    // Collect hits first so we can compute pad_len for column alignment.
+    // The unique check is per-word (outer loop) so all ranges of a word are
+    // included or skipped together.
     let mut hits: Vec<(String, &str, Option<Vec<String>>)> = Vec::new();
     for wl in &locations {
         if unique && !seen_words.insert(wl.word.to_lowercase()) {
             continue;
         }
 
-        let suggestions = if suggest {
+        let mut suggestions = if suggest {
             codebook.get_suggestions(wl.word.as_str())
         } else {
             None
@@ -151,28 +144,28 @@ fn check_file(
             &wl.locations[..]
         };
 
-        for range in ranges {
+        for (i, range) in ranges.iter().enumerate() {
             // utf8_to_char_pos returns 0-based line and Unicode-char column.
             let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len()));
-            let line = pos.line + 1; // 0-based → 1-based
-            let col = pos.col + 1; // 0-based → 1-based
+            // Move out of `suggestions` on the last iteration to avoid a clone.
+            let sugg = if i + 1 < ranges.len() {
+                suggestions.clone()
+            } else {
+                suggestions.take()
+            };
             hits.push((
-                format!("{line}:{col}"),
+                format!("{}:{}", pos.line + 1, pos.col + 1),
                 wl.word.as_str(),
-                suggestions.clone(),
+                sugg,
             ));
         }
     }
 
     if hits.is_empty() {
-        return 0;
+        return (0, false);
     }
 
-    let pad_len = hits
-        .iter()
-        .map(|(linecol, _, _)| linecol.len())
-        .max()
-        .unwrap_or(0);
+    let pad_len = hits.iter().map(|(lc, _, _)| lc.len()).max().unwrap_or(0);
 
     println!(
         "{}",
@@ -199,45 +192,50 @@ fn check_file(
     }
     println!();
 
-    hits.len()
+    (hits.len(), false)
 }
 
 /// Prints which config file is being used, or notes that the default is active.
 fn print_config_source(config: &CodebookConfigFile) {
     let cwd = std::env::current_dir().unwrap_or_default();
-    match (
+    let (label, path) = match (
         config.project_config_path().filter(|p| p.is_file()),
         config.global_config_path().filter(|p| p.is_file()),
     ) {
-        (Some(p), _) => {
-            let path = p.strip_prefix(&cwd).unwrap_or(&p).display().to_string();
-            eprintln!(
-                "using config {}",
-                path.if_supports_color(Stream::Stderr, |t| t.style(DIM))
-            );
+        (Some(p), _) => ("using config", p),
+        (None, Some(g)) => ("using global config", g),
+        (None, None) => {
+            eprintln!("No config found, using default config");
+            return;
         }
-        (None, Some(g)) => {
-            let path = g.strip_prefix(&cwd).unwrap_or(&g).display().to_string();
-            eprintln!(
-                "using global config {}",
-                path.if_supports_color(Stream::Stderr, |t| t.style(DIM))
-            );
-        }
-        (None, None) => eprintln!("No config found, using default config"),
-    }
+    };
+    let display = path
+        .strip_prefix(&cwd)
+        .unwrap_or(&path)
+        .display()
+        .to_string();
+    eprintln!(
+        "{label} {}",
+        display.if_supports_color(Stream::Stderr, |t| t.style(DIM))
+    );
 }
 
 /// Resolves a mix of file paths, directories, and glob patterns into a sorted,
-/// deduplicated list of file paths. Non-absolute patterns are resolved relative to `root`.
+/// deduplicated list of file paths. Non-absolute patterns are resolved relative
+/// to `root`. `Path::join` replaces the base when the argument is absolute, so
+/// no explicit `is_absolute` check is needed.
 ///
-/// Sets `*had_failure = true` for unmatched patterns, invalid globs, or glob I/O errors.
-fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Vec<PathBuf> {
+/// Returns `(paths, had_failure)`. `had_failure` is true for unmatched
+/// patterns, invalid globs, or glob I/O errors.
+fn resolve_paths(patterns: &[String], root: &Path) -> (Vec<PathBuf>, bool) {
     let mut paths = Vec::new();
+    let mut had_failure = false;
+
     for pattern in patterns {
-        let p = PathBuf::from(pattern);
-        let p = if p.is_absolute() { p } else { root.join(&p) };
+        // root.join() is a no-op when pattern is absolute (replaces root entirely).
+        let p = root.join(pattern);
         if p.is_dir() {
-            collect_dir(&p, &mut paths, had_failure);
+            had_failure |= collect_dir(&p, &mut paths);
         } else {
             let pattern_str = p.to_string_lossy();
             match glob::glob(&pattern_str) {
@@ -245,66 +243,448 @@ fn resolve_paths(patterns: &[String], root: &Path, had_failure: &mut bool) -> Ve
                     let mut matched = false;
                     for entry in entries {
                         match entry {
-                            Ok(e) => {
-                                if e.is_file() {
-                                    paths.push(e);
-                                    matched = true;
-                                } else if e.is_dir() {
-                                    collect_dir(&e, &mut paths, had_failure);
-                                    matched = true;
-                                }
+                            Ok(e) if e.is_file() => {
+                                paths.push(e);
+                                matched = true;
+                            }
+                            Ok(e) if e.is_dir() => {
+                                had_failure |= collect_dir(&e, &mut paths);
+                                matched = true;
                             }
+                            Ok(_) => {}
                             Err(e) => {
-                                eprintln!(
-                                    "{} failed to read glob entry: {e}",
-                                    "error:"
-                                        .if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
-                                );
-                                *had_failure = true;
+                                err!("failed to read glob entry: {e}");
+                                had_failure = true;
                             }
                         }
                     }
                     if !matched {
-                        eprintln!(
-                            "{} no match for '{pattern_str}'",
-                            "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
-                        );
-                        *had_failure = true;
+                        err!("no match for '{pattern_str}'");
+                        had_failure = true;
                     }
                 }
                 Err(e) => {
-                    eprintln!(
-                        "{} invalid pattern '{pattern_str}': {e}",
-                        "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED))
-                    );
-                    *had_failure = true;
+                    err!("invalid pattern '{pattern_str}': {e}");
+                    had_failure = true;
                 }
             }
         }
     }
+
     paths.sort();
     paths.dedup();
-    paths
+    (paths, had_failure)
 }
 
 /// Recursively collects all files under `dir` into `out`.
-/// Sets `*had_failure = true` for any directory-entry I/O error (e.g. permission denied).
-fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>, had_failure: &mut bool) {
-    for entry in walkdir::WalkDir::new(dir).follow_links(false).into_iter() {
+/// Returns `true` if any directory-entry I/O error occurred (e.g. permission denied).
+fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>) -> bool {
+    let mut had_failure = false;
+    for entry in walkdir::WalkDir::new(dir).follow_links(false) {
         match entry {
-            Ok(e) => {
-                if e.file_type().is_file() {
-                    out.push(e.into_path());
-                }
-            }
-            Err(err) => {
-                eprintln!(
-                    "{} failed to read directory entry under '{}': {err}",
-                    "error:".if_supports_color(Stream::Stderr, |t| t.style(BOLD_RED)),
+            Ok(e) if e.file_type().is_file() => out.push(e.into_path()),
+            Ok(_) => {}
+            Err(e) => {
+                err!(
+                    "failed to read directory entry under '{}': {e}",
                     dir.display()
                 );
-                *had_failure = true;
+                had_failure = true;
             }
         }
     }
+    had_failure
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use codebook::Codebook;
+    use codebook_config::CodebookConfigMemory;
+    use std::collections::HashSet;
+    use std::fs;
+    use std::sync::Arc;
+    use tempfile::tempdir;
+
+    /// Builds a Codebook backed by the default in-memory config (en_us dictionary).
+    /// This mirrors the pattern used throughout the rest of the test suite.
+    fn make_codebook() -> Codebook {
+        let config = Arc::new(CodebookConfigMemory::default());
+        Codebook::new(config).unwrap()
+    }
+
+    // ── relative_to_root ─────────────────────────────────────────────────────
+
+    #[test]
+    fn test_relative_to_root_inside_workspace() {
+        let dir = tempdir().unwrap();
+        let subdir = dir.path().join("src");
+        fs::create_dir_all(&subdir).unwrap();
+        let file = subdir.join("lib.rs");
+        fs::write(&file, "").unwrap();
+
+        let root_canonical = dir.path().canonicalize().unwrap();
+        let result = relative_to_root(Some(&root_canonical), &file);
+        assert_eq!(result, "src/lib.rs");
+    }
+
+    #[test]
+    fn test_relative_to_root_outside_workspace() {
+        let root = tempdir().unwrap();
+        let other = tempdir().unwrap();
+        let file = other.path().join("outside.rs");
+        fs::write(&file, "").unwrap();
+
+        let root_canonical = root.path().canonicalize().unwrap();
+        let result = relative_to_root(Some(&root_canonical), &file);
+        // Falls back to a path string since the file is outside the workspace.
+        assert!(result.contains("outside.rs"));
+    }
+
+    #[test]
+    fn test_relative_to_root_none_returns_raw_path() {
+        let result = relative_to_root(None, Path::new("/some/path/file.rs"));
+        assert_eq!(result, "/some/path/file.rs");
+    }
+
+    // ── collect_dir ───────────────────────────────────────────────────────────
+
+    #[test]
+    fn test_collect_dir_empty_dir() {
+        let dir = tempdir().unwrap();
+        let mut out = Vec::new();
+        let had_failure = collect_dir(dir.path(), &mut out);
+        assert!(out.is_empty());
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_collect_dir_collects_files_recursively() {
+        let dir = tempdir().unwrap();
+        let sub = dir.path().join("sub");
+        fs::create_dir_all(&sub).unwrap();
+        fs::write(dir.path().join("a.txt"), "").unwrap();
+        fs::write(sub.join("b.txt"), "").unwrap();
+
+        let mut out = Vec::new();
+        let had_failure = collect_dir(dir.path(), &mut out);
+
+        assert_eq!(out.len(), 2);
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_collect_dir_only_yields_files_not_directories() {
+        let dir = tempdir().unwrap();
+        let sub = dir.path().join("subdir");
+        fs::create_dir_all(&sub).unwrap();
+        fs::write(sub.join("c.txt"), "").unwrap();
+
+        let mut out = Vec::new();
+        let had_failure = collect_dir(dir.path(), &mut out);
+
+        assert!(
+            out.iter().all(|p| p.is_file()),
+            "only files should be collected, not directories"
+        );
+        assert!(!had_failure);
+    }
+
+    // ── resolve_paths ─────────────────────────────────────────────────────────
+
+    #[test]
+    fn test_resolve_paths_single_existing_file() {
+        let dir = tempdir().unwrap();
+        let file = dir.path().join("hello.txt");
+        fs::write(&file, "").unwrap();
+
+        let (result, had_failure) =
+            resolve_paths(&[file.to_string_lossy().to_string()], dir.path());
+
+        assert_eq!(result, vec![file]);
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_resolve_paths_missing_file_sets_failure() {
+        let dir = tempdir().unwrap();
+        let nonexistent = dir.path().join("nope.txt");
+
+        let (result, had_failure) =
+            resolve_paths(&[nonexistent.to_string_lossy().to_string()], dir.path());
+
+        assert!(result.is_empty());
+        assert!(had_failure, "unmatched path should set had_failure");
+    }
+
+    #[test]
+    fn test_resolve_paths_directory_collects_all_files() {
+        let dir = tempdir().unwrap();
+        let sub = dir.path().join("sub");
+        fs::create_dir_all(&sub).unwrap();
+        fs::write(dir.path().join("a.txt"), "").unwrap();
+        fs::write(sub.join("b.txt"), "").unwrap();
+
+        let (result, had_failure) =
+            resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path());
+
+        assert_eq!(result.len(), 2);
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_resolve_paths_glob_matches_by_extension() {
+        let dir = tempdir().unwrap();
+        fs::write(dir.path().join("foo.rs"), "").unwrap();
+        fs::write(dir.path().join("bar.rs"), "").unwrap();
+        fs::write(dir.path().join("baz.txt"), "").unwrap();
+
+        let pattern = format!("{}/*.rs", dir.path().to_string_lossy());
+        let (result, had_failure) = resolve_paths(&[pattern], dir.path());
+
+        assert_eq!(result.len(), 2, "only .rs files should match");
+        assert!(
+            result
+                .iter()
+                .all(|p| p.extension().unwrap_or_default() == "rs")
+        );
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_resolve_paths_glob_no_match_sets_failure() {
+        let dir = tempdir().unwrap(); // empty dir — no .rs files
+        let pattern = format!("{}/*.rs", dir.path().to_string_lossy());
+
+        let (result, had_failure) = resolve_paths(&[pattern], dir.path());
+
+        assert!(result.is_empty());
+        assert!(had_failure, "unmatched glob should set had_failure");
+    }
+
+    #[test]
+    fn test_resolve_paths_deduplicates_same_file() {
+        let dir = tempdir().unwrap();
+        let file = dir.path().join("hello.txt");
+        fs::write(&file, "").unwrap();
+
+        let path_str = file.to_string_lossy().to_string();
+        // List the same file twice — it should appear only once in the output.
+        let (result, had_failure) = resolve_paths(&[path_str.clone(), path_str], dir.path());
+
+        assert_eq!(result.len(), 1);
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_resolve_paths_output_is_sorted() {
+        let dir = tempdir().unwrap();
+        // Create files in non-alphabetical order.
+        fs::write(dir.path().join("c.txt"), "").unwrap();
+        fs::write(dir.path().join("a.txt"), "").unwrap();
+        fs::write(dir.path().join("b.txt"), "").unwrap();
+
+        let (result, had_failure) =
+            resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path());
+
+        let names: Vec<_> = result
+            .iter()
+            .map(|p| p.file_name().unwrap().to_string_lossy().into_owned())
+            .collect();
+        assert_eq!(names, ["a.txt", "b.txt", "c.txt"]);
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_resolve_paths_relative_pattern_resolved_against_root() {
+        let dir = tempdir().unwrap();
+        fs::write(dir.path().join("x.txt"), "").unwrap();
+
+        // "x.txt" is relative — resolve_paths should join it with the root.
+        let (result, had_failure) = resolve_paths(&["x.txt".to_string()], dir.path());
+
+        assert_eq!(result.len(), 1);
+        assert!(!had_failure);
+    }
+
+    // ── check_file ────────────────────────────────────────────────────────────
+
+    #[test]
+    fn test_check_file_unreadable_sets_failure() {
+        let dir = tempdir().unwrap();
+        let nonexistent = dir.path().join("ghost.txt");
+
+        let codebook = make_codebook();
+        let (count, had_failure) = check_file(
+            &nonexistent,
+            "ghost.txt",
+            &codebook,
+            &mut HashSet::new(),
+            false,
+            false,
+        );
+
+        assert_eq!(count, 0);
+        assert!(had_failure, "unreadable file must set had_failure");
+    }
+
+    #[test]
+    fn test_check_file_flags_misspelling() {
+        let dir = tempdir().unwrap();
+        let file = dir.path().join("bad.txt");
+        // "actualbad" is not a real English word; the checker should flag it.
+        fs::write(&file, "actualbad").unwrap();
+
+        let codebook = make_codebook();
+        let (count, had_failure) = check_file(
+            &file,
+            "bad.txt",
+            &codebook,
+            &mut HashSet::new(),
+            false,
+            false,
+        );
+
+        assert!(
+            count > 0,
+            "expected at least one misspelling to be reported"
+        );
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_check_file_unique_counts_each_word_once_per_file() {
+        let dir = tempdir().unwrap();
+        let file = dir.path().join("dup.txt");
+        // The checker groups all occurrences of the same misspelled word into one
+        // WordLocation with multiple TextRanges; unique mode takes only the first.
+        fs::write(&file, "actualbad actualbad").unwrap();
+
+        let codebook = make_codebook();
+        let mut had_failure = false;
+
+        let (count_all, f) = check_file(
+            &file,
+            "dup.txt",
+            &codebook,
+            &mut HashSet::new(),
+            false, // unique = false
+            false,
+        );
+        had_failure |= f;
+
+        let (count_unique, f) = check_file(
+            &file,
+            "dup.txt",
+            &codebook,
+            &mut HashSet::new(),
+            true, // unique = true
+            false,
+        );
+        had_failure |= f;
+
+        assert_eq!(count_all, 2, "non-unique mode must count every occurrence");
+        assert_eq!(count_unique, 1, "unique mode must count each word once");
+        assert!(!had_failure);
+    }
+
+    #[test]
+    fn test_check_file_unique_deduplicates_across_files() {
+        let dir = tempdir().unwrap();
+        let file1 = dir.path().join("first.txt");
+        let file2 = dir.path().join("second.txt");
+        fs::write(&file1, "actualbad").unwrap();
+        fs::write(&file2, "actualbad").unwrap();
+
+        let codebook = make_codebook();
+        // A single shared `seen_words` set, exactly as run_lint uses it.
+        let mut seen = HashSet::new();
+
+        let (count1, f1) = check_file(&file1, "first.txt", &codebook, &mut seen, true, false);
+        let (count2, f2) = check_file(&file2, "second.txt", &codebook, &mut seen, true, false);
+
+        assert_eq!(count1, 1, "first file should report the misspelling");
+        assert_eq!(count2, 0, "second file should skip the already-seen word");
+        assert!(!f1);
+        assert!(!f2);
+    }
+
+    // ── line/col correctness (pure StringOffsets, no network I/O) ────────────
+    //
+    // These tests exercise the exact same utf8_to_char_pos call that check_file
+    // uses, confirming that columns are in Unicode characters rather than raw
+    // bytes and that the 0-based → 1-based conversion is applied correctly.
+
+    #[test]
+    fn test_linecol_word_at_start_of_file() {
+        let text = "actualbad rest of line";
+        let offsets = StringOffsets::<AllConfig>::new(text);
+        let pos = offsets.utf8_to_char_pos(0);
+        assert_eq!(pos.line + 1, 1);
+        assert_eq!(pos.col + 1, 1);
+    }
+
+    #[test]
+    fn test_linecol_word_on_second_line() {
+        // "ok text\n" is 8 bytes; "actualbad" starts at byte 8.
+        let text = "ok text\nactualbad more";
+        let offsets = StringOffsets::<AllConfig>::new(text);
+        let byte_offset = text.find("actualbad").unwrap();
+        assert_eq!(byte_offset, 8, "sanity-check byte layout");
+
+        let pos = offsets.utf8_to_char_pos(byte_offset);
+        assert_eq!(pos.line + 1, 2, "should be on the second line");
+        assert_eq!(pos.col + 1, 1, "should be the first column of that line");
+    }
+
+    #[test]
+    fn test_linecol_mid_line_ascii() {
+        // "hello " is 6 bytes/chars; "actualbad" starts at byte/char 6.
+        let text = "hello actualbad";
+        let offsets = StringOffsets::<AllConfig>::new(text);
+        let byte_offset = text.find("actualbad").unwrap();
+        assert_eq!(byte_offset, 6, "sanity-check byte layout");
+
+        let pos = offsets.utf8_to_char_pos(byte_offset);
+        assert_eq!(pos.line + 1, 1);
+        assert_eq!(pos.col + 1, 7);
+    }
+
+    /// The old byte-count approach reported col=10 here; StringOffsets correctly
+    /// reports col=8 because "résumé" is 8 bytes but only 6 Unicode characters.
+    #[test]
+    fn test_linecol_multibyte_chars_before_word() {
+        // r(1) + é(2) + s(1) + u(1) + m(1) + é(2) + space(1) = 9 bytes, 7 chars.
+        // "actualbad" starts at byte 9, char offset 7 (0-based) → col 8 (1-based).
+        let text = "résumé actualbad";
+        let offsets = StringOffsets::<AllConfig>::new(text);
+        let byte_offset = text.find("actualbad").unwrap();
+        assert_eq!(byte_offset, 9, "sanity-check byte layout");
+
+        let pos = offsets.utf8_to_char_pos(byte_offset);
+        assert_eq!(pos.line + 1, 1);
+        assert_eq!(
+            pos.col + 1,
+            8,
+            "col must count Unicode chars (6 + space = 7, 1-based = 8), not bytes (9, 1-based = 10)"
+        );
+    }
+
+    #[test]
+    fn test_linecol_emoji_before_word() {
+        // 🐛 is U+1F41B — 4 UTF-8 bytes, 1 Unicode char.
+        // "🐛 " = 5 bytes, 2 chars; "actualbad" starts at byte 5, char 2 (0-based) → col 3.
+        let text = "🐛 actualbad";
+        let offsets = StringOffsets::<AllConfig>::new(text);
+        let byte_offset = text.find("actualbad").unwrap();
+        assert_eq!(byte_offset, 5, "sanity-check byte layout");
+
+        let pos = offsets.utf8_to_char_pos(byte_offset);
+        assert_eq!(pos.line + 1, 1);
+        assert_eq!(
+            pos.col + 1,
+            3,
+            "col must count Unicode chars (emoji=1, space=1, 1-based=3), not bytes (5, 1-based=6)"
+        );
+    }
 }

From adcf2787e0c897c53ae8781c92abc3aee943d283 Mon Sep 17 00:00:00 2001
From: niekdomi <152265924+niekdomi@users.noreply.github.com>
Date: Sat, 21 Mar 2026 20:19:10 +0100
Subject: [PATCH 6/8] update tests

---
 crates/codebook-lsp/src/lint.rs | 470 ++++++--------------------------
 1 file changed, 86 insertions(+), 384 deletions(-)

diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
index 9c2faadd..3f0c8ac5 100644
--- a/crates/codebook-lsp/src/lint.rs
+++ b/crates/codebook-lsp/src/lint.rs
@@ -21,14 +21,20 @@ macro_rules! err {
     };
 }
 
+macro_rules! paint {
+    ($val:expr, $stream:expr, $style:expr) => {
+        $val.if_supports_color($stream, |t| t.style($style))
+    };
+}
+
 fn fatal(msg: impl std::fmt::Display) -> ! {
     err!("{msg}");
     std::process::exit(2);
 }
 
 /// Computes a workspace-relative path string for a given file. Falls back to
-/// the absolute path if the file is outside the workspace or canonicalization fails.
-/// `root_canonical` should be the already-canonicalized workspace root.
+/// the absolute path if the file is outside the workspace or canonicalization
+/// fails. `root_canonical` should be the already-canonicalized workspace root.
 fn relative_to_root(root_canonical: Option<&Path>, path: &Path) -> String {
     root_canonical
         .and_then(|root| {
@@ -80,8 +86,8 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
     let unique_label = if unique { "unique " } else { "" };
     eprintln!(
         "Found {} {unique_label}spelling error(s) in {} file(s).",
-        total_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
-        files_with_errors.if_supports_color(Stream::Stderr, |t| t.style(BOLD)),
+        paint!(total_errors, Stream::Stderr, BOLD),
+        paint!(files_with_errors, Stream::Stderr, BOLD),
     );
 
     if had_failure {
@@ -94,8 +100,8 @@ pub fn run_lint(files: &[String], root: &Path, unique: bool, suggest: bool) -> b
 /// Spell-checks a single file and prints any diagnostics to stdout.
 ///
 /// Returns `(error_count, had_io_error)`. `error_count` is 0 if the file was
-/// clean; `had_io_error` is true when the file could not be read.
-/// `relative` is the workspace-relative path used for display and ignore matching.
+/// clean; `had_io_error` is true when the file could not be read. `relative` is
+/// the workspace-relative path used for display and ignore matching.
 fn check_file(
     path: &Path,
     relative: &str,
@@ -114,17 +120,15 @@ fn check_file(
 
     let display = relative.strip_prefix("./").unwrap_or(relative);
 
-    // Build the offset table once per file – O(n) construction, O(1) per lookup –
-    // and gives Unicode character columns rather than raw byte offsets.
+    // Build the offset table once per file
     let offsets = StringOffsets::<AllConfig>::new(&text);
-
     let mut locations = codebook.spell_check(&text, None, Some(relative));
     // Sort by first occurrence in the file.
     locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
 
-    // Collect hits first so we can compute pad_len for column alignment.
-    // The unique check is per-word (outer loop) so all ranges of a word are
-    // included or skipped together.
+    // Collect hits first so we can compute pad_len for column alignment. The
+    // unique check is per-word, so all ranges of a word are included or skipped
+    // together.
     let mut hits: Vec<(String, &str, Option<Vec<String>>)> = Vec::new();
     for wl in &locations {
         if unique && !seen_words.insert(wl.word.to_lowercase()) {
@@ -137,7 +141,7 @@ fn check_file(
             None
         };
 
-        // In unique mode only emit the first occurrence of each word.
+        // If unique mode: Only emit the first occurrence of each word.
         let ranges = if unique {
             &wl.locations[..1]
         } else {
@@ -147,12 +151,14 @@ fn check_file(
         for (i, range) in ranges.iter().enumerate() {
             // utf8_to_char_pos returns 0-based line and Unicode-char column.
             let pos = offsets.utf8_to_char_pos(range.start_byte.min(text.len()));
+
             // Move out of `suggestions` on the last iteration to avoid a clone.
             let sugg = if i + 1 < ranges.len() {
                 suggestions.clone()
             } else {
                 suggestions.take()
             };
+
             hits.push((
                 format!("{}:{}", pos.line + 1, pos.col + 1),
                 wl.word.as_str(),
@@ -175,17 +181,14 @@ fn check_file(
         let pad = " ".repeat(pad_len - linecol.len());
         print!(
             "  {}:{}{}  {}",
-            display.if_supports_color(Stream::Stdout, |t| t.style(DIM)),
-            linecol.if_supports_color(Stream::Stdout, |t| t.style(YELLOW)),
+            paint!(display, Stream::Stdout, DIM),
+            paint!(linecol, Stream::Stdout, YELLOW),
             pad,
-            word.if_supports_color(Stream::Stdout, |t| t.style(BOLD_RED)),
+            paint!(word, Stream::Stdout, BOLD_RED),
         );
-        if let Some(suggestions) = suggestions {
-            println!(
-                "  {}",
-                format!("→ {}", suggestions.join(", "))
-                    .if_supports_color(Stream::Stdout, |t| t.style(DIM)),
-            );
+        if let Some(s) = suggestions {
+            let text = format!("→ {}", s.join(", "));
+            println!("  {}", paint!(text, Stream::Stdout, DIM));
         } else {
             println!();
         }
@@ -232,7 +235,7 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec<PathBuf>, bool) {
     let mut had_failure = false;
 
     for pattern in patterns {
-        // root.join() is a no-op when pattern is absolute (replaces root entirely).
+        // root.join() is a no-op when pattern is absolute
         let p = root.join(pattern);
         if p.is_dir() {
             had_failure |= collect_dir(&p, &mut paths);
@@ -276,8 +279,8 @@ fn resolve_paths(patterns: &[String], root: &Path) -> (Vec<PathBuf>, bool) {
     (paths, had_failure)
 }
 
-/// Recursively collects all files under `dir` into `out`.
-/// Returns `true` if any directory-entry I/O error occurred (e.g. permission denied).
+/// Recursively collects all files under `dir` into `out`. Returns `true` if any
+/// directory-entry I/O error occurred.
 fn collect_dir(dir: &Path, out: &mut Vec<PathBuf>) -> bool {
     let mut had_failure = false;
     for entry in walkdir::WalkDir::new(dir).follow_links(false) {
@@ -306,385 +309,84 @@ mod tests {
     use std::sync::Arc;
     use tempfile::tempdir;
 
-    /// Builds a Codebook backed by the default in-memory config (en_us dictionary).
-    /// This mirrors the pattern used throughout the rest of the test suite.
-    fn make_codebook() -> Codebook {
-        let config = Arc::new(CodebookConfigMemory::default());
-        Codebook::new(config).unwrap()
-    }
-
-    // ── relative_to_root ─────────────────────────────────────────────────────
-
     #[test]
-    fn test_relative_to_root_inside_workspace() {
-        let dir = tempdir().unwrap();
-        let subdir = dir.path().join("src");
-        fs::create_dir_all(&subdir).unwrap();
-        let file = subdir.join("lib.rs");
-        fs::write(&file, "").unwrap();
-
-        let root_canonical = dir.path().canonicalize().unwrap();
-        let result = relative_to_root(Some(&root_canonical), &file);
-        assert_eq!(result, "src/lib.rs");
-    }
-
-    #[test]
-    fn test_relative_to_root_outside_workspace() {
-        let root = tempdir().unwrap();
-        let other = tempdir().unwrap();
-        let file = other.path().join("outside.rs");
-        fs::write(&file, "").unwrap();
-
-        let root_canonical = root.path().canonicalize().unwrap();
-        let result = relative_to_root(Some(&root_canonical), &file);
-        // Falls back to a path string since the file is outside the workspace.
-        assert!(result.contains("outside.rs"));
-    }
-
-    #[test]
-    fn test_relative_to_root_none_returns_raw_path() {
-        let result = relative_to_root(None, Path::new("/some/path/file.rs"));
-        assert_eq!(result, "/some/path/file.rs");
-    }
-
-    // ── collect_dir ───────────────────────────────────────────────────────────
-
-    #[test]
-    fn test_collect_dir_empty_dir() {
-        let dir = tempdir().unwrap();
-        let mut out = Vec::new();
-        let had_failure = collect_dir(dir.path(), &mut out);
-        assert!(out.is_empty());
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_collect_dir_collects_files_recursively() {
+    fn test_path_and_dir_resolution() {
         let dir = tempdir().unwrap();
         let sub = dir.path().join("sub");
         fs::create_dir_all(&sub).unwrap();
-        fs::write(dir.path().join("a.txt"), "").unwrap();
-        fs::write(sub.join("b.txt"), "").unwrap();
-
-        let mut out = Vec::new();
-        let had_failure = collect_dir(dir.path(), &mut out);
-
-        assert_eq!(out.len(), 2);
-        assert!(!had_failure);
-    }
 
-    #[test]
-    fn test_collect_dir_only_yields_files_not_directories() {
-        let dir = tempdir().unwrap();
-        let sub = dir.path().join("subdir");
-        fs::create_dir_all(&sub).unwrap();
-        fs::write(sub.join("c.txt"), "").unwrap();
+        let f1 = dir.path().join("a.rs");
+        let f2 = sub.join("b.txt");
+        fs::write(&f1, "").unwrap();
+        fs::write(&f2, "").unwrap();
 
-        let mut out = Vec::new();
-        let had_failure = collect_dir(dir.path(), &mut out);
+        let root_canon = dir.path().canonicalize().unwrap();
+        assert_eq!(relative_to_root(Some(&root_canon), &f1), "a.rs");
 
-        assert!(
-            out.iter().all(|p| p.is_file()),
-            "only files should be collected, not directories"
-        );
-        assert!(!had_failure);
-    }
-
-    // ── resolve_paths ─────────────────────────────────────────────────────────
-
-    #[test]
-    fn test_resolve_paths_single_existing_file() {
-        let dir = tempdir().unwrap();
-        let file = dir.path().join("hello.txt");
-        fs::write(&file, "").unwrap();
-
-        let (result, had_failure) =
-            resolve_paths(&[file.to_string_lossy().to_string()], dir.path());
-
-        assert_eq!(result, vec![file]);
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_resolve_paths_missing_file_sets_failure() {
-        let dir = tempdir().unwrap();
-        let nonexistent = dir.path().join("nope.txt");
-
-        let (result, had_failure) =
-            resolve_paths(&[nonexistent.to_string_lossy().to_string()], dir.path());
-
-        assert!(result.is_empty());
-        assert!(had_failure, "unmatched path should set had_failure");
-    }
-
-    #[test]
-    fn test_resolve_paths_directory_collects_all_files() {
-        let dir = tempdir().unwrap();
-        let sub = dir.path().join("sub");
-        fs::create_dir_all(&sub).unwrap();
-        fs::write(dir.path().join("a.txt"), "").unwrap();
-        fs::write(sub.join("b.txt"), "").unwrap();
-
-        let (result, had_failure) =
-            resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path());
-
-        assert_eq!(result.len(), 2);
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_resolve_paths_glob_matches_by_extension() {
-        let dir = tempdir().unwrap();
-        fs::write(dir.path().join("foo.rs"), "").unwrap();
-        fs::write(dir.path().join("bar.rs"), "").unwrap();
-        fs::write(dir.path().join("baz.txt"), "").unwrap();
-
-        let pattern = format!("{}/*.rs", dir.path().to_string_lossy());
-        let (result, had_failure) = resolve_paths(&[pattern], dir.path());
-
-        assert_eq!(result.len(), 2, "only .rs files should match");
-        assert!(
-            result
-                .iter()
-                .all(|p| p.extension().unwrap_or_default() == "rs")
-        );
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_resolve_paths_glob_no_match_sets_failure() {
-        let dir = tempdir().unwrap(); // empty dir — no .rs files
-        let pattern = format!("{}/*.rs", dir.path().to_string_lossy());
+        let pattern = format!("{}/**/*.*", dir.path().display());
+        let (paths, err) = resolve_paths(&[pattern], dir.path());
 
-        let (result, had_failure) = resolve_paths(&[pattern], dir.path());
+        assert!(!err);
+        assert_eq!(paths.len(), 2);
+        let path_strs: HashSet<_> = paths.iter().map(|p| p.to_string_lossy()).collect();
+        assert!(path_strs.iter().any(|s| s.ends_with("a.rs")));
+        assert!(path_strs.iter().any(|s| s.ends_with("b.txt")));
 
-        assert!(result.is_empty());
-        assert!(had_failure, "unmatched glob should set had_failure");
+        let (_, err_missing) = resolve_paths(&["nonexistent.rs".into()], dir.path());
+        assert!(err_missing);
     }
 
     #[test]
-    fn test_resolve_paths_deduplicates_same_file() {
+    fn test_check_file_logic() {
         let dir = tempdir().unwrap();
-        let file = dir.path().join("hello.txt");
-        fs::write(&file, "").unwrap();
+        let f = dir.path().join("test.txt");
+        fs::write(&f, "actualbad\n🦀 actualbad").unwrap();
 
-        let path_str = file.to_string_lossy().to_string();
-        // List the same file twice — it should appear only once in the output.
-        let (result, had_failure) = resolve_paths(&[path_str.clone(), path_str], dir.path());
-
-        assert_eq!(result.len(), 1);
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_resolve_paths_output_is_sorted() {
-        let dir = tempdir().unwrap();
-        // Create files in non-alphabetical order.
-        fs::write(dir.path().join("c.txt"), "").unwrap();
-        fs::write(dir.path().join("a.txt"), "").unwrap();
-        fs::write(dir.path().join("b.txt"), "").unwrap();
-
-        let (result, had_failure) =
-            resolve_paths(&[dir.path().to_string_lossy().to_string()], dir.path());
-
-        let names: Vec<_> = result
-            .iter()
-            .map(|p| p.file_name().unwrap().to_string_lossy().into_owned())
-            .collect();
-        assert_eq!(names, ["a.txt", "b.txt", "c.txt"]);
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_resolve_paths_relative_pattern_resolved_against_root() {
-        let dir = tempdir().unwrap();
-        fs::write(dir.path().join("x.txt"), "").unwrap();
-
-        // "x.txt" is relative — resolve_paths should join it with the root.
-        let (result, had_failure) = resolve_paths(&["x.txt".to_string()], dir.path());
-
-        assert_eq!(result.len(), 1);
-        assert!(!had_failure);
-    }
-
-    // ── check_file ────────────────────────────────────────────────────────────
-
-    #[test]
-    fn test_check_file_unreadable_sets_failure() {
-        let dir = tempdir().unwrap();
-        let nonexistent = dir.path().join("ghost.txt");
-
-        let codebook = make_codebook();
-        let (count, had_failure) = check_file(
-            &nonexistent,
-            "ghost.txt",
-            &codebook,
-            &mut HashSet::new(),
-            false,
-            false,
-        );
-
-        assert_eq!(count, 0);
-        assert!(had_failure, "unreadable file must set had_failure");
-    }
-
-    #[test]
-    fn test_check_file_flags_misspelling() {
-        let dir = tempdir().unwrap();
-        let file = dir.path().join("bad.txt");
-        // "actualbad" is not a real English word; the checker should flag it.
-        fs::write(&file, "actualbad").unwrap();
-
-        let codebook = make_codebook();
-        let (count, had_failure) = check_file(
-            &file,
-            "bad.txt",
-            &codebook,
-            &mut HashSet::new(),
-            false,
-            false,
-        );
-
-        assert!(
-            count > 0,
-            "expected at least one misspelling to be reported"
-        );
-        assert!(!had_failure);
-    }
+        let cb = Codebook::new(Arc::new(CodebookConfigMemory::default())).unwrap();
+        let mut seen = HashSet::new();
 
-    #[test]
-    fn test_check_file_unique_counts_each_word_once_per_file() {
-        let dir = tempdir().unwrap();
-        let file = dir.path().join("dup.txt");
-        // The checker groups all occurrences of the same misspelled word into one
-        // WordLocation with multiple TextRanges; unique mode takes only the first.
-        fs::write(&file, "actualbad actualbad").unwrap();
-
-        let codebook = make_codebook();
-        let mut had_failure = false;
-
-        let (count_all, f) = check_file(
-            &file,
-            "dup.txt",
-            &codebook,
-            &mut HashSet::new(),
-            false, // unique = false
+        // Test basic flagging and multi-occurrence counting
+        let (count, err) = check_file(&f, "test.txt", &cb, &mut seen, false, false);
+        assert_eq!(count, 2);
+        assert!(!err);
+
+        // Test unique mode
+        let mut seen_unique = HashSet::new();
+        let (c1, _) = check_file(&f, "f1.txt", &cb, &mut seen_unique, true, false);
+        let (c2, _) = check_file(&f, "f2.txt", &cb, &mut seen_unique, true, false);
+        assert_eq!(c1, 1, "Should flag word once");
+        assert_eq!(c2, 0, "Should skip already-seen word in second file");
+
+        // Test IO failure
+        let (_, err_io) = check_file(
+            &dir.path().join("missing"),
+            "!",
+            &cb,
+            &mut seen,
             false,
-        );
-        had_failure |= f;
-
-        let (count_unique, f) = check_file(
-            &file,
-            "dup.txt",
-            &codebook,
-            &mut HashSet::new(),
-            true, // unique = true
             false,
         );
-        had_failure |= f;
-
-        assert_eq!(count_all, 2, "non-unique mode must count every occurrence");
-        assert_eq!(count_unique, 1, "unique mode must count each word once");
-        assert!(!had_failure);
-    }
-
-    #[test]
-    fn test_check_file_unique_deduplicates_across_files() {
-        let dir = tempdir().unwrap();
-        let file1 = dir.path().join("first.txt");
-        let file2 = dir.path().join("second.txt");
-        fs::write(&file1, "actualbad").unwrap();
-        fs::write(&file2, "actualbad").unwrap();
-
-        let codebook = make_codebook();
-        // A single shared `seen_words` set, exactly as run_lint uses it.
-        let mut seen = HashSet::new();
-
-        let (count1, f1) = check_file(&file1, "first.txt", &codebook, &mut seen, true, false);
-        let (count2, f2) = check_file(&file2, "second.txt", &codebook, &mut seen, true, false);
-
-        assert_eq!(count1, 1, "first file should report the misspelling");
-        assert_eq!(count2, 0, "second file should skip the already-seen word");
-        assert!(!f1);
-        assert!(!f2);
-    }
-
-    // ── line/col correctness (pure StringOffsets, no network I/O) ────────────
-    //
-    // These tests exercise the exact same utf8_to_char_pos call that check_file
-    // uses, confirming that columns are in Unicode characters rather than raw
-    // bytes and that the 0-based → 1-based conversion is applied correctly.
-
-    #[test]
-    fn test_linecol_word_at_start_of_file() {
-        let text = "actualbad rest of line";
-        let offsets = StringOffsets::<AllConfig>::new(text);
-        let pos = offsets.utf8_to_char_pos(0);
-        assert_eq!(pos.line + 1, 1);
-        assert_eq!(pos.col + 1, 1);
-    }
-
-    #[test]
-    fn test_linecol_word_on_second_line() {
-        // "ok text\n" is 8 bytes; "actualbad" starts at byte 8.
-        let text = "ok text\nactualbad more";
-        let offsets = StringOffsets::<AllConfig>::new(text);
-        let byte_offset = text.find("actualbad").unwrap();
-        assert_eq!(byte_offset, 8, "sanity-check byte layout");
-
-        let pos = offsets.utf8_to_char_pos(byte_offset);
-        assert_eq!(pos.line + 1, 2, "should be on the second line");
-        assert_eq!(pos.col + 1, 1, "should be the first column of that line");
-    }
-
-    #[test]
-    fn test_linecol_mid_line_ascii() {
-        // "hello " is 6 bytes/chars; "actualbad" starts at byte/char 6.
-        let text = "hello actualbad";
-        let offsets = StringOffsets::<AllConfig>::new(text);
-        let byte_offset = text.find("actualbad").unwrap();
-        assert_eq!(byte_offset, 6, "sanity-check byte layout");
-
-        let pos = offsets.utf8_to_char_pos(byte_offset);
-        assert_eq!(pos.line + 1, 1);
-        assert_eq!(pos.col + 1, 7);
-    }
-
-    /// The old byte-count approach reported col=10 here; StringOffsets correctly
-    /// reports col=8 because "résumé" is 8 bytes but only 6 Unicode characters.
-    #[test]
-    fn test_linecol_multibyte_chars_before_word() {
-        // r(1) + é(2) + s(1) + u(1) + m(1) + é(2) + space(1) = 9 bytes, 7 chars.
-        // "actualbad" starts at byte 9, char offset 7 (0-based) → col 8 (1-based).
-        let text = "résumé actualbad";
-        let offsets = StringOffsets::<AllConfig>::new(text);
-        let byte_offset = text.find("actualbad").unwrap();
-        assert_eq!(byte_offset, 9, "sanity-check byte layout");
-
-        let pos = offsets.utf8_to_char_pos(byte_offset);
-        assert_eq!(pos.line + 1, 1);
-        assert_eq!(
-            pos.col + 1,
-            8,
-            "col must count Unicode chars (6 + space = 7, 1-based = 8), not bytes (9, 1-based = 10)"
-        );
+        assert!(err_io);
     }
 
     #[test]
-    fn test_linecol_emoji_before_word() {
-        // 🐛 is U+1F41B — 4 UTF-8 bytes, 1 Unicode char.
-        // "🐛 " = 5 bytes, 2 chars; "actualbad" starts at byte 5, char 2 (0-based) → col 3.
-        let text = "🐛 actualbad";
-        let offsets = StringOffsets::<AllConfig>::new(text);
-        let byte_offset = text.find("actualbad").unwrap();
-        assert_eq!(byte_offset, 5, "sanity-check byte layout");
-
-        let pos = offsets.utf8_to_char_pos(byte_offset);
-        assert_eq!(pos.line + 1, 1);
-        assert_eq!(
-            pos.col + 1,
-            3,
-            "col must count Unicode chars (emoji=1, space=1, 1-based=3), not bytes (5, 1-based=6)"
-        );
+    fn test_unicode_line_col() {
+        let cases = [
+            ("actualbad", 0, 1, 1),        // Start
+            ("ok\nactualbad", 3, 2, 1),    // Newline
+            ("résumé actualbad", 9, 1, 8), // Multi-byte chars (é is 2 bytes)
+            ("🦀 actualbad", 5, 1, 3),     // Emoji (4 bytes, 1 char)
+        ];
+
+        for (text, offset, line, col) in cases {
+            let table = StringOffsets::<AllConfig>::new(text);
+            let pos = table.utf8_to_char_pos(offset);
+            assert_eq!(
+                (pos.line + 1, pos.col + 1),
+                (line, col),
+                "Failed on: {}",
+                text
+            );
+        }
     }
 }

From 086e983d2d65ee1a0efbf9b870392fd06139752b Mon Sep 17 00:00:00 2001
From: niekdomi <152265924+niekdomi@users.noreply.github.com>
Date: Mon, 23 Mar 2026 10:45:16 +0100
Subject: [PATCH 7/8] Update readme

---
 README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/README.md b/README.md
index 074e96f5..2b1486d0 100644
--- a/README.md
+++ b/README.md
@@ -117,6 +117,26 @@ Any editor that implements the Language Server Protocol should be compatible wit
 codebook-lsp serve
 ```
 
+### CLI (Lint)
+
+Codebook can also be used as a standalone command-line spell checker, which is useful for CI pipelines, pre-commit hooks, or one-off checks.
+
+```sh
+# Check specific files
+codebook-lsp lint src/main.rs src/lib.rs
+
+# Check all files in a directory (recursive)
+codebook-lsp lint src/
+
+# Show spelling suggestions
+codebook-lsp lint --suggest src/
+
+# Only report each misspelled word once across all files
+codebook-lsp lint --unique src/
+```
+
+The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there was an unreadable files, invalid UTF-8, etc...
+
 ## About
 
 Codebook is a spell checker for code. It binds together the venerable Tree Sitter and the fast spell checker [Spellbook](https://github.com/helix-editor/spellbook). Included is a Language Server for use in (theoretically) any editor. Everything is done in Rust to keep response times snappy and memory usage _low_.

From fc626039190073d1718e220bfc77c216dde72a93 Mon Sep 17 00:00:00 2001
From: niekdomi <152265924+niekdomi@users.noreply.github.com>
Date: Mon, 23 Mar 2026 11:43:14 +0100
Subject: [PATCH 8/8] apply copilot review

---
 README.md                       | 2 +-
 crates/codebook-lsp/src/lint.rs | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 2b1486d0..908283b7 100644
--- a/README.md
+++ b/README.md
@@ -135,7 +135,7 @@ codebook-lsp lint --suggest src/
 codebook-lsp lint --unique src/
 ```
 
-The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there was an unreadable files, invalid UTF-8, etc...
+The exit code is **0** if all files are clean, **1** if any spelling errors are found, and **2** if there were unreadable files, invalid UTF-8, etc.
 
 ## About
 
diff --git a/crates/codebook-lsp/src/lint.rs b/crates/codebook-lsp/src/lint.rs
index 3f0c8ac5..3a1b1cc0 100644
--- a/crates/codebook-lsp/src/lint.rs
+++ b/crates/codebook-lsp/src/lint.rs
@@ -123,7 +123,11 @@ fn check_file(
     // Build the offset table once per file
     let offsets = StringOffsets::<AllConfig>::new(&text);
     let mut locations = codebook.spell_check(&text, None, Some(relative));
-    // Sort by first occurrence in the file.
+    // Sort inner locations first (HashSet iteration order is nondeterministic),
+    // then sort the outer list by first occurrence in the file.
+    for wl in &mut locations {
+        wl.locations.sort_by_key(|r| r.start_byte);
+    }
     locations.sort_by_key(|l| l.locations.first().map(|r| r.start_byte).unwrap_or(0));
 
     // Collect hits first so we can compute pad_len for column alignment. The