blopker
diff --git a/‎.claude/settings.local.json‎
Lines changed: 2 additions & 1 deletion b/‎.claude/settings.local.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 34 additions & 62 deletions b/‎README.md‎
Lines changed: 34 additions & 62 deletions
diff --git a/‎crates/codebook-config/src/helpers.rs‎
Lines changed: 0 additions & 87 deletions b/‎crates/codebook-config/src/helpers.rs‎
Lines changed: 0 additions & 87 deletions
diff --git a/‎crates/codebook-config/src/lib.rs‎
Lines changed: 30 additions & 20 deletions b/‎crates/codebook-config/src/lib.rs‎
Lines changed: 30 additions & 20 deletions
@@ -13,7 +13,8 @@
       "Bash(git remote get-url:*)",
       "Bash(gh issue list:*)",
       "Bash(gh issue view:*)",
-      "Bash(gh repo view:*)"
+      "Bash(gh repo view:*)",
+      "Bash(cargo build:*)"
     ]
   }
 }
@@ -1,3 +1,8 @@
+[Unreleased]
+
+- Add tag-based filtering (`include_tags`/`exclude_tags`) to control which parts of code are spell-checked (comments, strings, identifiers, etc.)
+- Rename tree-sitter capture names to use dot-separated namespace convention (e.g., `@identifier.function` instead of `@func_declaration`)
+
 [0.3.34]
 
 - Fix crash in Termux by falling back to bundled Mozilla CA roots on Android (#230)
 
@@ -293,6 +293,19 @@ ignore_patterns = [
 # Set to 2 to check words with 2 or more characters
 min_word_length = 3
 
+# Filter which parts of your code are spell-checked by tag.
+# Tags use a dot-separated hierarchy (e.g., "comment", "identifier.function").
+# Matching is prefix-based: "comment" matches "comment", "comment.line",
+# "comment.block", etc.
+#
+# Only check these tags (if set, everything else is excluded)
+# Default: [] (empty = check everything)
+include_tags = ["comment", "string"]
+#
+# Exclude these tags from checking (takes precedence over include_tags)
+# Default: []
+exclude_tags = ["string.heredoc"]
+
 # Whether to use global configuration (project config only)
 # Set to false to completely ignore global settings
 # Default: true
@@ -355,6 +368,26 @@ ignore_patterns = [
 
 **Tip**: Include the identifier in your pattern. `'vim\.opt\.[a-z]+'` skips `showmode` in `vim.opt.showmode`, but `'vim\.opt\.'` alone won't (it only matches up to the dot).
 
+### Tag-Based Filtering
+
+Codebook categorizes every piece of text it checks using **tags** — dot-separated labels like `comment`, `string`, `identifier.function`, etc. You can use `include_tags` and `exclude_tags` to control which categories are spell-checked.
+
+Matching is **prefix-based**: `"comment"` matches `comment`, `comment.line`, `comment.block`, etc. `include_tags` narrows what is checked (allowlist), and `exclude_tags` removes from that set (blocklist, takes precedence). This works the same way as `include_paths`/`ignore_paths`.
+
+```toml
+# Only check comments and strings, ignore all identifiers
+include_tags = ["comment", "string"]
+
+# Check everything except variable and parameter names
+exclude_tags = ["identifier.variable", "identifier.parameter"]
+
+# Both can be combined: check comments and strings, but skip heredocs
+include_tags = ["comment", "string"]
+exclude_tags = ["string.heredoc"]
+```
+
+For the full list of available tags, see the [query tag reference](crates/codebook/src/queries/README.md).
+
 ### LSP Initialization Options
 
 Editors can pass `initializationOptions` when starting the Codebook LSP for LSP-specific options. Refer to your editor's documentation for how to apply these options. All values are optional, omit them for the default behavior:
@@ -451,68 +484,7 @@ For plain text dictionaries, use `TextRepo::new()` instead and add to `TEXT_DICT
 
 ## Adding New Programming Language Support
 
-Codebook uses Tree-sitter support additional programming languages. Here's how to add support for a new language:
-
-### 1. Create a Tree-sitter Query
-
-Each language needs a Tree-sitter query file that defines which parts of the code should be checked for spelling issues. The query needs to capture:
-
-- Identifiers (variable names, function names, class names, etc.)
-- String literals
-- Comments
-
-Create a new `.scm` file in `codebook/crates/codebook/src/queries/` named after your language (e.g., `java.scm`).
-
-### 2. Understand the Language's AST
-
-To write an effective query, you need to understand the Abstract Syntax Tree (AST) structure of your language. Use these tools:
-
-- [Tree-sitter Playground](https://tree-sitter.github.io/tree-sitter/7-playground.html): Interactively explore how Tree-sitter parses code
-- [Tree-sitter Visualizer](https://blopker.github.io/ts-visualizer/): Visualize the AST of your code in a more detailed way
-
-A good approach is to:
-
-1. Write sample code with identifiers, strings, and comments
-2. Paste it into the playground/visualizer
-3. Observe the node types used for each element
-4. Create capture patterns that target only definition nodes, not usages
-
-### 3. Update the Language Settings
-
-Add your language to `codebook/crates/codebook/src/queries.rs`:
-
-1. Add a new variant to the `LanguageType` enum
-2. Add a new entry to the `LANGUAGE_SETTINGS` array with:
-   - The language type
-   - File extensions for your language
-   - Language identifiers
-   - Path to your query file
-
-### 4. Add the Tree-sitter Grammar
-
-Make sure the appropriate Tree-sitter grammar is added as a dependency in `Cargo.toml` and update the `language()` function in `queries.rs` to return the correct language parser.
-
-### 5. Test Your Implementation
-
-Run the tests to ensure your query is valid:
-
-```bash
-cargo test -p codebook queries::tests::test_all_queries_are_valid
-```
-
-Additional language tests should go in `codebook/tests`. There are many example tests to copy.
-
-You can also test with real code files to verify that Codebook correctly identifies spelling issues in your language. Example files should go in `examples/` and contain at least one spelling error to pass integration tests.
-
-### Tips for Writing Effective Queries
-
-- Focus on capturing definitions, not usages
-- Include only nodes that contain user-defined text (not keywords)
-- Test with representative code samples
-- Start simple and add complexity as needed
-- Look at existing language queries for patterns
-
-If you've successfully added support for a new language, please consider contributing it back to Codebook with a pull request!
+See the [query development guide](crates/codebook/src/queries/README.md) for instructions on adding Tree-sitter queries for new languages, the tag naming convention, and tips for writing effective queries.
 
 ## Running Tests
 
 
@@ -1,5 +1,3 @@
-use crate::settings::ConfigSettings;
-use glob::Pattern;
 use log::error;
 use regex::{Regex, RegexBuilder};
 use std::env;
@@ -57,86 +55,6 @@ pub(crate) fn unix_cache_dir() -> PathBuf {
     env::temp_dir().join("codebook").join("cache")
 }
 
-/// Insert a word into the allowlist, returning true when it was newly added.
-pub(crate) fn insert_word(settings: &mut ConfigSettings, word: &str) -> bool {
-    let word = word.to_ascii_lowercase();
-    if settings.words.contains(&word) {
-        return false;
-    }
-    settings.words.push(word);
-    settings.words.sort();
-    settings.words.dedup();
-    true
-}
-
-/// Insert a path into the ignore list, returning true when it was newly added.
-pub(crate) fn insert_ignore(settings: &mut ConfigSettings, file: &str) -> bool {
-    let file = file.to_string();
-    if settings.ignore_paths.contains(&file) {
-        return false;
-    }
-    settings.ignore_paths.push(file);
-    settings.ignore_paths.sort();
-    settings.ignore_paths.dedup();
-    true
-}
-
-/// Insert a path into the include list, returning true when it was newly added.
-pub(crate) fn insert_include(settings: &mut ConfigSettings, file: &str) -> bool {
-    let file = file.to_string();
-    if settings.include_paths.contains(&file) {
-        return false;
-    }
-    settings.include_paths.push(file);
-    settings.include_paths.sort();
-    settings.include_paths.dedup();
-    true
-}
-
-/// Resolve configured dictionary IDs, providing a default when none are set.
-pub(crate) fn dictionary_ids(settings: &ConfigSettings) -> Vec<String> {
-    if settings.dictionaries.is_empty() {
-        vec!["en_us".to_string()]
-    } else {
-        settings.dictionaries.clone()
-    }
-}
-
-fn match_pattern(pattern: &[String], path_str: &str) -> bool {
-    pattern.iter().any(|pattern| {
-        Pattern::new(pattern)
-            .map(|p| p.matches(path_str))
-            .unwrap_or(false)
-    })
-}
-
-/// Determine whether a path should be included based on the configured glob patterns.
-pub(crate) fn should_include_path(settings: &ConfigSettings, path: &Path) -> bool {
-    if settings.include_paths.is_empty() {
-        return true;
-    }
-    let path_str = path.to_string_lossy();
-    match_pattern(&settings.include_paths, &path_str)
-}
-
-/// Determine whether a path should be ignored based on the configured glob patterns.
-pub(crate) fn should_ignore_path(settings: &ConfigSettings, path: &Path) -> bool {
-    let path_str = path.to_string_lossy();
-    match_pattern(&settings.ignore_paths, &path_str)
-}
-
-/// Check if a word is explicitly allowed.
-pub(crate) fn is_allowed_word(settings: &ConfigSettings, word: &str) -> bool {
-    let word = word.to_ascii_lowercase();
-    settings.words.iter().any(|w| w == &word)
-}
-
-/// Check if a word should be flagged.
-pub(crate) fn should_flag_word(settings: &ConfigSettings, word: &str) -> bool {
-    let word = word.to_ascii_lowercase();
-    settings.flag_words.iter().any(|w| w == &word)
-}
-
 /// Compile user-provided ignore regex patterns, dropping invalid entries.
 /// Patterns are compiled with multiline mode so `^` and `$` match line boundaries.
 pub(crate) fn build_ignore_regexes(patterns: &[String]) -> Vec<Regex> {
@@ -154,11 +72,6 @@ pub(crate) fn build_ignore_regexes(patterns: &[String]) -> Vec<Regex> {
         .collect()
 }
 
-/// Retrieve the configured minimum word length.
-pub(crate) fn min_word_length(settings: &ConfigSettings) -> usize {
-    settings.min_word_length
-}
-
 pub(crate) fn expand_tilde<P: AsRef<Path>>(path_user_input: P) -> Option<PathBuf> {
     let p = path_user_input.as_ref();
     if !p.starts_with("~") {
 
@@ -2,7 +2,8 @@ mod helpers;
 mod settings;
 mod watched_file;
 use crate::helpers::expand_tilde;
-use crate::settings::ConfigSettings;
+pub use crate::settings::ConfigSettings;
+
 use crate::watched_file::WatchedFile;
 use log::debug;
 use log::info;
@@ -32,6 +33,7 @@ pub trait CodebookConfig: Sync + Send + Debug {
     fn should_flag_word(&self, word: &str) -> bool;
     fn get_ignore_patterns(&self) -> Option<Vec<Regex>>;
     fn get_min_word_length(&self) -> usize;
+    fn should_check_tag(&self, tag: &str) -> bool;
     fn cache_dir(&self) -> &Path;
 }
 
@@ -474,51 +476,51 @@ impl CodebookConfigFile {
 impl CodebookConfig for CodebookConfigFile {
     /// Add a word to the project configs allowlist
     fn add_word(&self, word: &str) -> Result<bool, io::Error> {
-        Ok(self.update_project_settings(|settings| helpers::insert_word(settings, word)))
+        Ok(self.update_project_settings(|settings| settings.insert_word(word)))
     }
     /// Add a word to the global configs allowlist
     fn add_word_global(&self, word: &str) -> Result<bool, io::Error> {
-        Ok(self.update_global_settings(|settings| helpers::insert_word(settings, word)))
+        Ok(self.update_global_settings(|settings| settings.insert_word(word)))
     }
 
     /// Add a file to the ignore list
     fn add_ignore(&self, file: &str) -> Result<bool, io::Error> {
-        Ok(self.update_project_settings(|settings| helpers::insert_ignore(settings, file)))
+        Ok(self.update_project_settings(|settings| settings.insert_ignore(file)))
     }
 
     /// Add a file to the include list
     fn add_include(&self, file: &str) -> Result<bool, io::Error> {
-        Ok(self.update_project_settings(|settings| helpers::insert_include(settings, file)))
+        Ok(self.update_project_settings(|settings| settings.insert_include(file)))
     }
 
     /// Get dictionary IDs from effective configuration
     fn get_dictionary_ids(&self) -> Vec<String> {
         let snapshot = self.snapshot();
-        helpers::dictionary_ids(&snapshot)
+        snapshot.dictionary_ids()
     }
 
     /// Check if a path is included based on the effective configuration
     fn should_include_path(&self, path: &Path) -> bool {
         let snapshot = self.snapshot();
-        helpers::should_include_path(&snapshot, path)
+        snapshot.should_include_path(path)
     }
 
     /// Check if a path should be ignored based on the effective configuration
     fn should_ignore_path(&self, path: &Path) -> bool {
         let snapshot = self.snapshot();
-        helpers::should_ignore_path(&snapshot, path)
+        snapshot.should_ignore_path(path)
     }
 
     /// Check if a word is in the effective allowlist
     fn is_allowed_word(&self, word: &str) -> bool {
         let snapshot = self.snapshot();
-        helpers::is_allowed_word(&snapshot, word)
+        snapshot.is_allowed_word(word)
     }
 
     /// Check if a word should be flagged according to effective configuration
     fn should_flag_word(&self, word: &str) -> bool {
         let snapshot = self.snapshot();
-        helpers::should_flag_word(&snapshot, word)
+        snapshot.should_flag_word(word)
     }
 
     /// Get the list of user-defined ignore patterns
@@ -534,7 +536,11 @@ impl CodebookConfig for CodebookConfigFile {
 
     /// Get the minimum word length which should be checked
     fn get_min_word_length(&self) -> usize {
-        helpers::min_word_length(&self.snapshot())
+        self.snapshot().min_word_length()
+    }
+
+    fn should_check_tag(&self, tag: &str) -> bool {
+        self.snapshot().should_check_tag(tag)
     }
 
     fn cache_dir(&self) -> &Path {
@@ -576,7 +582,7 @@ impl CodebookConfigMemory {
 impl CodebookConfig for CodebookConfigMemory {
     fn add_word(&self, word: &str) -> Result<bool, io::Error> {
         let mut settings = self.settings.write().unwrap();
-        Ok(helpers::insert_word(&mut settings, word))
+        Ok(settings.insert_word(word))
     }
 
     fn add_word_global(&self, word: &str) -> Result<bool, io::Error> {
@@ -585,37 +591,37 @@ impl CodebookConfig for CodebookConfigMemory {
 
     fn add_ignore(&self, file: &str) -> Result<bool, io::Error> {
         let mut settings = self.settings.write().unwrap();
-        Ok(helpers::insert_ignore(&mut settings, file))
+        Ok(settings.insert_ignore(file))
     }
 
     fn add_include(&self, file: &str) -> Result<bool, io::Error> {
         let mut settings = self.settings.write().unwrap();
-        Ok(helpers::insert_include(&mut settings, file))
+        Ok(settings.insert_include(file))
     }
 
     fn get_dictionary_ids(&self) -> Vec<String> {
         let snapshot = self.snapshot();
-        helpers::dictionary_ids(&snapshot)
+        snapshot.dictionary_ids()
     }
 
     fn should_include_path(&self, path: &Path) -> bool {
         let snapshot = self.snapshot();
-        helpers::should_include_path(&snapshot, path)
+        snapshot.should_include_path(path)
     }
 
     fn should_ignore_path(&self, path: &Path) -> bool {
         let snapshot = self.snapshot();
-        helpers::should_ignore_path(&snapshot, path)
+        snapshot.should_ignore_path(path)
     }
 
     fn is_allowed_word(&self, word: &str) -> bool {
         let snapshot = self.snapshot();
-        helpers::is_allowed_word(&snapshot, word)
+        snapshot.is_allowed_word(word)
     }
 
     fn should_flag_word(&self, word: &str) -> bool {
         let snapshot = self.snapshot();
-        helpers::should_flag_word(&snapshot, word)
+        snapshot.should_flag_word(word)
     }
 
     fn get_ignore_patterns(&self) -> Option<Vec<Regex>> {
@@ -624,7 +630,11 @@ impl CodebookConfig for CodebookConfigMemory {
     }
 
     fn get_min_word_length(&self) -> usize {
-        helpers::min_word_length(&self.snapshot())
+        self.snapshot().min_word_length()
+    }
+
+    fn should_check_tag(&self, tag: &str) -> bool {
+        self.snapshot().should_check_tag(tag)
     }
 
     fn cache_dir(&self) -> &Path {
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,8 @@`
`13`	`13`	`"Bash(git remote get-url:*)",`
`14`	`14`	`"Bash(gh issue list:*)",`
`15`	`15`	`"Bash(gh issue view:*)",`
`16`		`- "Bash(gh repo view:*)"`
	`16`	`+ "Bash(gh repo view:*)",`
	`17`	`+ "Bash(cargo build:*)"`
`17`	`18`	`]`
`18`	`19`	`}`
`19`	`20`	`}`