From 7a9e0bba7b6b10b1163dda4b588e3b807579bb1d Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Sun, 3 Aug 2025 00:54:56 -0400 Subject: [PATCH 1/2] Now you only pass the files directly as well as a directory for --fix to put new files in --- .pre-commit-hooks.yaml | 32 ++++++------ src/config.rs | 40 +++++++-------- src/config/cli.rs | 27 +++++----- src/config/file.rs | 68 +++++++++++++++++++++----- src/file.rs | 20 -------- src/lib.rs | 11 +++-- src/rules.rs | 1 - src/rules/broken_wikilink.rs | 2 +- src/rules/unlinked_text.rs | 2 +- src/visitor.rs | 4 +- tests/logseq/broken_wikilink/tests.rs | 20 +++++--- tests/logseq/common.rs | 21 +++----- tests/logseq/duplicate_alias/tests.rs | 20 +++++--- tests/logseq/similar_filename/tests.rs | 29 +++++------ tests/logseq/unlinked_text/tests.rs | 20 +++++--- 15 files changed, 171 insertions(+), 146 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index f9dd68c..d769afa 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -1,17 +1,17 @@ -- id: mdlinker - name: Markdown Linker - description: This hook checks that all markdown links are accounted for. - entry: mdlinker - language: rust - pass_filenames: false -- id: enforce-ascii - name: Enforce ASCII Compliance - description: Detects and replaces non-ASCII characters in specified files. - entry: ./bin/enforce-ascii - language: script - types: [text] - pass_filenames: true - always_run: false - args: +- id: mdlinker + name: Markdown Linker + description: This hook checks that all markdown links are accounted for. + entry: mdlinker + language: rust + pass_filenames: false +- id: enforce-ascii + name: Enforce ASCII Compliance + description: Detects and replaces non-ASCII characters in specified files. + entry: ./bin/enforce-ascii + language: script + types: [text] + pass_filenames: true + always_run: false + args: - --fix - additional_dependencies: [] + additional_dependencies: [] diff --git a/src/config.rs b/src/config.rs index e5da7c0..796077a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -37,13 +37,16 @@ pub enum NewConfigError { /// Used to reconcile the two #[derive(Builder)] pub struct Config { + #[allow(clippy::struct_field_names)] file_config: file::Config, + #[allow(clippy::struct_field_names)] cli_config: cli::Config, - /// See [`self::cli::Config::pages_directory`] - pub pages_directory: PathBuf, - /// See [`self::cli::Config::other_directories`] + /// See [`self::cli::Config::files`] #[builder(default=vec![])] - pub other_directories: Vec, + pub files: Vec, + /// See [`self::cli::Config::root_directory`] + #[builder(default=PathBuf::from("."))] + pub new_files_directory: PathBuf, /// See [`self::cli::Config::ngram_size`] #[builder(default = 2)] pub ngram_size: usize, @@ -84,8 +87,8 @@ pub struct Config { /// these can be unioned with one another /// and then we can use that to create the final config pub trait Partial { - fn pages_directory(&self) -> Option; - fn other_directories(&self) -> Option>; + fn files(&self) -> Option>; + fn new_files_directory(&self) -> Option; fn ngram_size(&self) -> Option; fn boundary_pattern(&self) -> Option; fn filename_spacing_pattern(&self) -> Option; @@ -162,18 +165,12 @@ fn combine_partials( }) .maybe_fix(cli_config.fix().or(file_config.fix())) .maybe_allow_dirty(cli_config.allow_dirty().or(file_config.allow_dirty())) - .pages_directory( + .files( cli_config - .pages_directory() - .or(file_config.pages_directory()) + .files() + .or(file_config.files()) .expect("A default is set"), ) - .maybe_other_directories(Some( - cli_config - .other_directories() - .or(file_config.other_directories()) - .expect("A default is set"), - )) .maybe_ignore_word_pairs( cli_config .ignore_word_pairs() @@ -192,9 +189,9 @@ impl Config { /// /// # Errors /// - /// - [`Error::FileDoesNotExistError`] - Config file does not exist - /// - [`Error::FileDoesNotParseError`] - Config file does not parse from toml into the - /// expected format + /// - [`Error::FileDoesNotExistError`] - Config file does not exist + /// - [`Error::FileDoesNotParseError`] - Config file does not parse from toml into the + /// expected format /// pub fn new() -> Result { let cli = cli::Config::parse(); @@ -227,11 +224,8 @@ impl Config { /// Legacy directories function /// Gets all the directories into one vec #[must_use] - pub fn directories(&self) -> Vec { - let mut out = Vec::new(); - out.push(self.pages_directory.clone()); - out.extend(self.other_directories.clone()); - out + pub fn files(&self) -> &[PathBuf] { + &self.files } pub fn add_report_to_ignore(&mut self, report: &impl ReportTrait) { diff --git a/src/config/cli.rs b/src/config/cli.rs index ab5372c..4c272ee 100644 --- a/src/config/cli.rs +++ b/src/config/cli.rs @@ -15,14 +15,14 @@ use super::Partial; #[derive(Parser, Default, Clone)] #[command(version, about, long_about = None)] pub struct Config { - /// The pages directory is the directory where pages are named for their alias - /// and where new pages should be created when running --fix - #[clap(short = 'p', long = "pages")] - pub pages_directory: Option, + /// Globs or paths to relevant files + #[clap()] + pub files: Vec, - /// Other directories to search in - #[clap(short = 'd', long = "dir")] - pub other_directories: Vec, + /// A location to store new files in created by --fix + /// for the [`super::rules::broken_wikilink::BrokenWikilink`] rule + #[clap(short = 'n', long = "newf")] + pub new_files_directory: Option, /// Path to a configuration file #[clap(short = 'c', long = "config", default_value = "mdlinker.toml")] @@ -67,17 +67,16 @@ pub struct Config { } impl Partial for Config { - fn pages_directory(&self) -> Option { - self.pages_directory.clone() - } - fn other_directories(&self) -> Option> { - let out = self.other_directories.clone(); - if out.is_empty() { + fn files(&self) -> Option> { + if self.files.is_empty() { None } else { - Some(out) + Some(self.files.clone()) } } + fn new_files_directory(&self) -> Option { + self.new_files_directory.clone() + } fn ngram_size(&self) -> Option { self.ngram_size } diff --git a/src/config/file.rs b/src/config/file.rs index d3b2309..3e47c0c 100644 --- a/src/config/file.rs +++ b/src/config/file.rs @@ -15,11 +15,13 @@ use super::{Config as MasterConfig, NewConfigError, Partial}; #[derive(Serialize, Deserialize, Debug, Default, Clone)] pub struct Config { - /// See [`super::cli::Config::pages_directory`] - pub pages_directory: PathBuf, + /// See [`super::cli::Config::files`] + #[serde(default)] + pub files: Option>, - /// See [`super::cli::Config::other_directories`] - pub other_directories: Vec, + /// See [`super::cli::Config::new_files_directory`] + #[serde(default)] + pub new_files_directory: Option, /// See [`super::cli::Config::ngram_size`] #[serde(default)] @@ -68,8 +70,14 @@ impl Config { impl From for Config { fn from(value: MasterConfig) -> Self { Self { - pages_directory: value.pages_directory, - other_directories: value.other_directories, + files: Some( + value + .files + .into_iter() + .map(|file| file.to_string_lossy().to_string()) + .collect(), + ), + new_files_directory: Some(value.new_files_directory), ngram_size: Some(value.ngram_size), boundary_pattern: Some(value.boundary_pattern), filename_spacing_pattern: Some(value.filename_spacing_pattern), @@ -83,11 +91,45 @@ impl From for Config { } impl Partial for Config { - fn pages_directory(&self) -> Option { - Some(self.pages_directory.clone()) - } - fn other_directories(&self) -> Option> { - let out = self.other_directories.clone(); + fn files(&self) -> Option> { + let mut out = Vec::new(); + match &self.files { + None => return None, + Some(files) if files.is_empty() => return None, + Some(files) => { + for file in files { + if file.contains('*') { + let pattern = glob::Pattern::new(file); + match pattern { + Ok(_) => { + let globs = glob::glob(file); + match globs { + Ok(globs) => { + for glob in globs { + match glob { + Ok(path) => out.push(path), + Err(e) => { + eprintln!( + "Error processing glob '{file}': {e}", + ); + } + } + } + } + Err(e) => { + eprintln!("Error parsing glob pattern '{file}': {e}"); + } + } + } + Err(e) => { + eprintln!("Error parsing glob pattern '{file}': {e}"); + return None; + } + } + } + } + } + } if out.is_empty() { None } else { @@ -95,6 +137,10 @@ impl Partial for Config { } } + fn new_files_directory(&self) -> Option { + self.new_files_directory.clone() + } + fn ngram_size(&self) -> Option { self.ngram_size } diff --git a/src/file.rs b/src/file.rs index d1a3448..fbc4cd1 100644 --- a/src/file.rs +++ b/src/file.rs @@ -1,24 +1,4 @@ -use std::path::PathBuf; - -use walkdir::WalkDir; - use thiserror::Error; -use std; - pub mod content; pub mod name; - -/// Walk the directories and get just the files -pub fn get_files(dirs: &Vec) -> Vec { - let mut out = Vec::new(); - for path in dirs { - let walk = WalkDir::new(path); - for entry in walk.into_iter().filter_map(Result::ok) { - if entry.file_type().is_file() { - out.push(entry.into_path()); - } - } - } - out -} diff --git a/src/lib.rs b/src/lib.rs index feb7fdc..d0f101d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ pub mod sed; pub mod visitor; use console::{style, Emoji}; -use file::{get_files, name::ngrams}; +use file::name::ngrams; use indicatif::ProgressBar; use miette::{Diagnostic, Result}; use ngrams::CalculateError; @@ -114,6 +114,7 @@ fn is_repo_dirty(repo: &Repository) -> Result { } /// Runs [`check`] in a loop until no more fixes can be made +#[allow(clippy::result_large_err)] fn fix(config: &config::Config) -> Result { // Check if the git repo is dirty match git2::Repository::open_from_env() { @@ -145,7 +146,7 @@ fn fix(config: &config::Config) -> Result { style("[1/3]").bold().dim(), CHECK ); - }; + } let mut output_report = check(config)?; @@ -191,7 +192,7 @@ fn fix(config: &config::Config) -> Result { style("[3/3]").bold().dim(), CHECK_AGAIN ); - }; + } output_report = check(config)?; } else if env::var("RUNNING_TESTS").is_err() { println!( @@ -204,12 +205,13 @@ fn fix(config: &config::Config) -> Result { Ok(output_report) } +#[allow(clippy::result_large_err)] fn check(config: &config::Config) -> Result { // Compile our regex patterns let boundary_regex = regex::Regex::new(&config.boundary_pattern)?; let filename_spacing_regex = regex::Regex::new(&config.filename_spacing_pattern)?; - let all_files = get_files(&config.directories()); + let all_files = config.files().to_vec(); let file_ngrams = ngrams( &all_files, config.ngram_size, @@ -329,6 +331,7 @@ fn check(config: &config::Config) -> Result { /// /// Basically if this library fails, this returns an Err /// but if this library runs, even if it finds linting violations, this returns an Ok +#[allow(clippy::result_large_err)] pub fn lib(config: &config::Config) -> Result { if config.fix { fix(config) diff --git a/src/rules.rs b/src/rules.rs index 76c9250..591ced1 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -80,7 +80,6 @@ fn dedupe_by_code(mut this: Vec) -> Vec { /// Used for filtering out items that start with the exclude code impl VecHasIdExtensions for Vec { - #[must_use] fn finalize(self, excludes: &[ErrorCode]) -> Self { dedupe_by_code(filter_by_excludes(self, excludes)) } diff --git a/src/rules/broken_wikilink.rs b/src/rules/broken_wikilink.rs index cebc4df..73dd6cc 100644 --- a/src/rules/broken_wikilink.rs +++ b/src/rules/broken_wikilink.rs @@ -57,7 +57,7 @@ impl ReportTrait for BrokenWikilink { self.src.name() ); let filename = format!("{}.md", FilenameLowercase::from_alias(&self.alias, config)); - let path = config.pages_directory.join(filename); + let path = config.new_files_directory.join(filename); std::fs::write(path.clone(), "").map_err(|source| FixError::IOError { source, backtrace: Backtrace::force_capture(), diff --git a/src/rules/unlinked_text.rs b/src/rules/unlinked_text.rs index b4d3b8e..9ee50e5 100644 --- a/src/rules/unlinked_text.rs +++ b/src/rules/unlinked_text.rs @@ -56,7 +56,7 @@ impl ReportTrait for UnlinkedText { /// TODO: Be able to handle this in parallel with other reports fn fix(&self, _config: &Config) -> Result, FixError> { let file = self.src.name().to_owned(); - trace!("Fixing unlinked text: {:?}", file); + trace!("Fixing unlinked text: {file:?}"); let mut source = std::fs::read_to_string(&file).map_err(|src| FixError::IOError { source: src, file: file.clone(), diff --git a/src/visitor.rs b/src/visitor.rs index 105bbc6..f818d55 100644 --- a/src/visitor.rs +++ b/src/visitor.rs @@ -58,7 +58,7 @@ pub trait Visitor { /// WARNING: Don't overwrite this, its already written for you. /// Implement [`Self::_finalize_file`] instead fn finalize_file(&mut self, source: &str, path: &Path) -> Result<(), FinalizeError> { - trace!("{:?} finalizing file {:?}", self.name(), path); + trace!("{:?} finalizing file {:?}", self.name(), path.display()); #[allow(clippy::used_underscore_items)] self._finalize_file(source, path) } @@ -118,7 +118,7 @@ pub enum ParseError { /// Parse the source code and visit all the nodes using tree-sitter #[allow(clippy::result_large_err)] pub fn parse(path: &PathBuf, visitors: Vec>>) -> Result<(), ParseError> { - debug!("Parsing file {:?}", path); + debug!("Parsing file {:?}", path.display()); let source = std::fs::read_to_string(path).map_err(|source| ParseError::IoError { file: path.clone(), source, diff --git a/tests/logseq/broken_wikilink/tests.rs b/tests/logseq/broken_wikilink/tests.rs index 40710c1..de01db3 100644 --- a/tests/logseq/broken_wikilink/tests.rs +++ b/tests/logseq/broken_wikilink/tests.rs @@ -1,18 +1,24 @@ -use lazy_static::lazy_static; use mdlinker::rules::broken_wikilink; use crate::common::get_report; +use glob::glob; use log::{debug, info}; use mdlinker::rules::filter_code; +use std::path::PathBuf; use itertools::Itertools; -lazy_static! { - static ref PATHS: Vec = vec![ - "./tests/logseq/broken_wikilink/assets/pages/".to_string(), - "./tests/logseq/broken_wikilink/assets/journals/".to_string() - ]; -} +static PATHS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + let first: Vec = glob("./tests/logseq/broken_wikilink/assets/pages/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect(); + let second: Vec = glob("./tests/logseq/broken_wikilink/assets/journals/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect(); + [first, second].concat() +}); #[test] fn number_of_broken_wikilinks() { diff --git a/tests/logseq/common.rs b/tests/logseq/common.rs index df9a117..1bc26db 100644 --- a/tests/logseq/common.rs +++ b/tests/logseq/common.rs @@ -1,5 +1,5 @@ //! Code used in multiple test folders -use std::{path::PathBuf, str::FromStr}; +use std::path::PathBuf; use mdlinker::{ config::{cli::Config as CliConfig, file::Config as FileConfig, Config}, @@ -19,21 +19,14 @@ fn setup() { /// Runs the library and generates the [`mdlinker::OutputReport`] #[must_use] -pub fn get_report(paths: &[String], config: Option) -> mdlinker::OutputReport { +pub fn get_report(paths: &[PathBuf], config: Option) -> mdlinker::OutputReport { setup(); let config: Config = match config { - None => { - let paths: Vec = paths - .iter() - .map(|path| PathBuf::from_str(path).expect("This path exists at compile time.")) - .collect(); - Config::builder() - .pages_directory(paths[0].clone()) - .other_directories(paths[1..].to_vec()) - .cli_config(CliConfig::default()) - .file_config(FileConfig::default()) - .build() - } + None => Config::builder() + .files(paths.to_vec()) + .cli_config(CliConfig::default()) + .file_config(FileConfig::default()) + .build(), Some(config) => config, }; diff --git a/tests/logseq/duplicate_alias/tests.rs b/tests/logseq/duplicate_alias/tests.rs index 624e58c..8fc7e3a 100644 --- a/tests/logseq/duplicate_alias/tests.rs +++ b/tests/logseq/duplicate_alias/tests.rs @@ -1,20 +1,26 @@ -use lazy_static::lazy_static; use mdlinker::rules::duplicate_alias; use mdlinker::rules::duplicate_alias::DuplicateAlias; use mdlinker::rules::filter_code; use crate::common::get_report; +use glob::glob; use log::{debug, info}; +use std::path::PathBuf; use itertools::Itertools; -lazy_static! { - static ref PATHS: Vec = vec![ - "./tests/logseq/duplicate_alias/assets/pages".to_string(), - "./tests/logseq/duplicate_alias/assets/journals".to_string() - ]; -} +static PATHS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + let first: Vec = glob("./tests/logseq/duplicate_alias/assets/pages/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect(); + let second: Vec = glob("./tests/logseq/duplicate_alias/assets/journals/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect(); + [first, second].concat() +}); #[test] fn number_of_duplicate_alias() { diff --git a/tests/logseq/similar_filename/tests.rs b/tests/logseq/similar_filename/tests.rs index b780f35..710691f 100644 --- a/tests/logseq/similar_filename/tests.rs +++ b/tests/logseq/similar_filename/tests.rs @@ -1,17 +1,19 @@ use crate::common::get_report; use config::cli::Config as CliConfig; use config::file::Config as FileConfig; -use lazy_static::lazy_static; +use glob::glob; use log::info; use mdlinker::rules::similar_filename::SimilarFilename; use mdlinker::{config, lib}; use regex::Regex; -use std::{path::PathBuf, str::FromStr}; +use std::path::PathBuf; -lazy_static! { - static ref PATHS: Vec = - vec!["./tests/logseq/similar_filename/assets/pages".to_string(),]; -} +static PATHS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + glob("./tests/logseq/similar_filename/assets/pages/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect() +}); /// [`foo.md`](./assets/logseq/pages/foo.md) and [`foo___bar.md`](./assets/logseq/pages/foo___bar.md) should not conflict /// because the word `foo` in `foo/bar` is just a properly used group name. @@ -19,10 +21,7 @@ lazy_static! { fn groups_first_element_same() { info!("groups_first_element_same"); let config = config::Config::builder() - .pages_directory( - PathBuf::from_str("./tests/logseq/similar_filename/assets/pages") - .expect("This is a constant"), - ) + .files(PATHS.to_vec()) .file_config(FileConfig::default()) .cli_config(CliConfig::default()) .filename_match_threshold(1) @@ -43,10 +42,7 @@ fn groups_first_element_same() { fn test_ignore_word_pairs1() { info!("test_ignore_word_pairs"); let config = config::Config::builder() - .pages_directory( - PathBuf::from_str("./tests/logseq/similar_filename/assets/pages") - .expect("This is a constant"), - ) + .files(PATHS.to_vec()) .file_config(FileConfig::default()) .cli_config(CliConfig::default()) .filename_match_threshold(1) @@ -68,10 +64,7 @@ fn test_ignore_word_pairs1() { fn test_ignore_word_pairs2() { info!("test_ignore_word_pairs"); let config = config::Config::builder() - .pages_directory( - PathBuf::from_str("./tests/logseq/similar_filename/assets/pages") - .expect("This is a constant"), - ) + .files(PATHS.to_vec()) .file_config(FileConfig::default()) .cli_config(CliConfig::default()) .filename_match_threshold(1) diff --git a/tests/logseq/unlinked_text/tests.rs b/tests/logseq/unlinked_text/tests.rs index f42de38..7060dd1 100644 --- a/tests/logseq/unlinked_text/tests.rs +++ b/tests/logseq/unlinked_text/tests.rs @@ -1,22 +1,28 @@ use std::fs; -use lazy_static::lazy_static; use mdlinker::rules::unlinked_text; +use glob::glob; use log::{debug, info}; use mdlinker::rules::filter_code; use miette::SourceOffset; +use std::path::PathBuf; use crate::common::get_report; use itertools::Itertools; -lazy_static! { - static ref PATHS: Vec = vec![ - "./tests/logseq/unlinked_text/assets/pages/".to_string(), - "./tests/logseq/unlinked_text/assets/journals/".to_string() - ]; -} +static PATHS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + let first: Vec = glob("./tests/logseq/unlinked_text/assets/pages/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect(); + let second: Vec = glob("./tests/logseq/unlinked_text/assets/journals/**/*.md") + .expect("This is a constant") + .map(|p| p.expect("This is a constant")) + .collect(); + [first, second].concat() +}); #[test] fn number_of_unlinked_texts() { From d82c87d9a40167f997627d201673e28fe2e351fa Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Sun, 3 Aug 2025 01:13:00 -0400 Subject: [PATCH 2/2] Use stable rust --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index a2d375e..fdfacc4 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly" +channel = "1.91.0" components = ["clippy", "rustfmt"]