Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ tree-sitter-html = "<0.25.0"
tree-sitter-java = "<0.25.0"
tree-sitter-javascript = "<0.26.0"
tree-sitter-lua = "<0.25.0"
tree-sitter-md = "<0.6.0"
tree-sitter-odin-codebook = "1.4.0"
tree-sitter-php = "<0.25.0"
tree-sitter-python = "<0.26.0"
Expand Down
1 change: 1 addition & 0 deletions crates/codebook/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ tree-sitter-java.workspace = true
tree-sitter-javascript.workspace = true
codebook-tree-sitter-latex.workspace = true
tree-sitter-lua.workspace = true
tree-sitter-md.workspace = true
tree-sitter-odin-codebook.workspace = true
tree-sitter-php.workspace = true
tree-sitter-python.workspace = true
Expand Down
9 changes: 9 additions & 0 deletions crates/codebook/src/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub enum LanguageType {
Javascript,
Latex,
Lua,
Markdown,
Odin,
Php,
Python,
Expand Down Expand Up @@ -199,6 +200,13 @@ pub static LANGUAGE_SETTINGS: &[LanguageSetting] = &[
query: include_str!("queries/lua.scm"),
extensions: &["lua"],
},
LanguageSetting {
type_: LanguageType::Markdown,
ids: &["markdown"],
dictionary_ids: &[],
query: include_str!("queries/markdown.scm"),
extensions: &["md", "markdown"],
},
LanguageSetting {
type_: LanguageType::Bash,
ids: &["bash", "shellscript", "sh", "shell script"],
Expand Down Expand Up @@ -291,6 +299,7 @@ impl LanguageSetting {
LanguageType::Javascript => Some(tree_sitter_javascript::LANGUAGE.into()),
LanguageType::Latex => Some(codebook_tree_sitter_latex::LANGUAGE.into()),
LanguageType::Lua => Some(tree_sitter_lua::LANGUAGE.into()),
LanguageType::Markdown => Some(tree_sitter_md::LANGUAGE.into()),
LanguageType::Odin => Some(tree_sitter_odin_codebook::LANGUAGE.into()),
LanguageType::Php => Some(tree_sitter_php::LANGUAGE_PHP.into()),
LanguageType::Python => Some(tree_sitter_python::LANGUAGE.into()),
Expand Down
2 changes: 2 additions & 0 deletions crates/codebook/src/queries/markdown.scm
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(paragraph (inline) @string)
(atx_heading (inline) @string)
136 changes: 136 additions & 0 deletions crates/codebook/tests/test_markdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
use codebook::{
parser::{TextRange, WordLocation},
queries::LanguageType,
};

mod utils;

#[test]
fn test_markdown_paragraph() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "Some paragraph text with a misspeled word.\n";
let expected = vec![WordLocation::new(
"misspeled".to_string(),
vec![TextRange {
start_byte: 27,
end_byte: 36,
}],
)];
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
println!("Misspelled words: {misspelled:?}");
assert_eq!(misspelled.len(), 1);
assert_eq!(misspelled[0].word, expected[0].word);
assert_eq!(misspelled[0].locations, expected[0].locations);
}

#[test]
fn test_markdown_heading() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "# A headng with a tyypo\n";
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"headng"));
assert!(words.contains(&"tyypo"));
}

#[test]
fn test_markdown_fenced_code_block_skipped() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"# Hello World

Some correct text here.

```bash
mkdir some_dir
badwwword_in_code
```

More correct text here.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
// Words inside fenced code blocks should NOT be flagged
assert!(!words.contains(&"mkdir"));
assert!(!words.contains(&"badwwword"));
assert!(!words.contains(&"dir"));
}

#[test]
fn test_markdown_fenced_code_block_with_typo_outside() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"A paragrap with a tyypo.

```python
def some_functin():
pass
```

Another paragrap with a tyypo.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
// Typos in prose should be flagged
assert!(words.contains(&"paragrap"));
assert!(words.contains(&"tyypo"));
// Typos inside code blocks should NOT be flagged
assert!(!words.contains(&"functin"));
}

#[test]
fn test_markdown_multiple_code_blocks() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"Some text with a tyypo.

```bash
mkdir somedir
```

Middle text is corect.

```python
badspel = True
```

End text is also corect.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"tyypo"));
assert!(words.contains(&"corect"));
assert!(!words.contains(&"mkdir"));
assert!(!words.contains(&"somedir"));
assert!(!words.contains(&"badspel"));
}

#[test]
fn test_markdown_block_quote() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "> A block quoet with a tyypo.\n";
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"quoet"));
assert!(words.contains(&"tyypo"));
}
Loading