From 5b76fd0a00f93a285b572b3d749fec85dc53004a Mon Sep 17 00:00:00 2001 From: Humberto Yusta Date: Thu, 13 Feb 2025 15:21:54 +0100 Subject: [PATCH 1/5] fix: merge to pathbuf normalize and canonical path Now it is permissive in windows, and works with strange windows prefixes, any amount of / or \, and long paths, in Unix it still does the same --- src/files_blocklist.rs | 4 +- src/files_correction.rs | 154 +++++++++++--------- src/git/operations.rs | 6 +- src/http/routers/v1/git.rs | 2 +- src/integrations/config_chat.rs | 4 +- src/integrations/integr_github.rs | 4 +- src/integrations/integr_gitlab.rs | 4 +- src/integrations/integr_pdb.rs | 2 +- src/integrations/integr_shell.rs | 2 +- src/integrations/setting_up_integrations.rs | 2 +- src/main.rs | 4 +- src/postprocessing/pp_utils.rs | 4 +- src/telemetry/snippets_collection.rs | 6 +- src/tools/tool_tree.rs | 2 +- 14 files changed, 105 insertions(+), 95 deletions(-) diff --git a/src/files_blocklist.rs b/src/files_blocklist.rs index 74f864d8b..d642c09e9 100644 --- a/src/files_blocklist.rs +++ b/src/files_blocklist.rs @@ -223,14 +223,14 @@ fn _load_indexing_yaml_str( }; let indexing_dir_path = PathBuf::from(&expanded_dir); if indexing_dir_path.is_absolute() { - let normalized = crate::files_correction::to_pathbuf_normalize(&expanded_dir) + let normalized = crate::files_correction::canonical_path(&expanded_dir) .to_string_lossy() .into_owned(); additional_indexing_dirs.push(normalized); } else { if let Some(b) = relative_path_base { let joined_path = b.join(&expanded_dir).to_str().unwrap().to_string(); - let normalized = crate::files_correction::to_pathbuf_normalize(&joined_path) + let normalized = crate::files_correction::canonical_path(&joined_path) .to_string_lossy() .into_owned(); additional_indexing_dirs.push(normalized); diff --git a/src/files_correction.rs b/src/files_correction.rs index 6fa7b6287..3406de863 100644 --- a/src/files_correction.rs +++ b/src/files_correction.rs @@ -1,7 +1,9 @@ use std::collections::{HashMap, HashSet}; +use std::ffi::OsString; use std::sync::Arc; use std::time::Instant; -use std::path::{Component, PathBuf}; +use std::path::{Component, PathBuf, Prefix}; +use itertools::Itertools; use serde::Deserialize; use tokio::sync::RwLock as ARwLock; use tracing::info; @@ -119,35 +121,6 @@ pub async fn files_cache_rebuild_as_needed(global_context: Arc PathBuf { - // horrible_path//..\project1\project1/1.cpp - // everything should become an absolute \\?\ path on windows - let parts = p - .to_string() - .replace(r"\\", r"\") - .replace(r"/", r"\") - .split(r"\") - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .collect::>(); - if parts.len() >= 2 && parts[0] == "?" { - canonical_path(&format!(r"\\?\{}", parts[1..].join(r"\"))) - } else if parts.len() > 1 && parts[0].contains(":") { - canonical_path(&format!(r"\\?\{}", parts.join(r"\"))) - } else { - canonical_path(&p.to_string()) - } -} - -pub fn to_pathbuf_normalize(path: &str) -> PathBuf { - if cfg!(target_os = "windows") { - PathBuf::from(winpath_normalize(path)) - } else { - PathBuf::from(canonical_path(path)) - } -} - async fn complete_path_with_project_dir( gcx: Arc>, correction_candidate: &String, @@ -156,7 +129,7 @@ async fn complete_path_with_project_dir( fn path_exists(path: &PathBuf, is_dir: bool) -> bool { (is_dir && path.is_dir()) || (!is_dir && path.is_file()) } - let candidate_path = to_pathbuf_normalize(&correction_candidate); + let candidate_path = canonical_path(correction_candidate); let project_dirs = get_project_dirs(gcx.clone()).await; for p in project_dirs { if path_exists(&candidate_path, is_dir) && candidate_path.starts_with(&p) { @@ -362,52 +335,89 @@ fn _shortify_paths_from_indexed(paths: &Vec, indexed_paths: Arc std::io::Result { - let mut components = path.strip_prefix(".").unwrap_or(path).components(); - let path_os = path.as_os_str().as_encoded_bytes(); - let mut normalized = if path.is_absolute() { - if path_os.starts_with(b"//") && !path_os.starts_with(b"///") { - components.next(); - PathBuf::from("//") - } else { - PathBuf::new() +#[cfg(target_os = "windows")] +/// In Windows, tries to fix the path, permissive about paths like \\?\C:\path, incorrect amount of \ and more. +/// +/// Temporarily remove verbatim, to resolve ., .., symlinks if possible, it will be added again later. +fn preprocess_path_for_normalization(p: String) -> String { + let p = p.replace(r"/", r"\"); + let starting_slashes = p.chars().take_while(|c| *c == '\\').count(); + + let mut parts_iter = p.split(r"\").filter(|part| !part.is_empty()).peekable(); + + match parts_iter.peek() { + Some(&"?") => { + parts_iter.next(); + match parts_iter.peek() { + Some(pref) if pref.contains(":") => parts_iter.join(r"\"), // \\?\C:\path... + Some(pref) if pref.to_lowercase() == "unc" => { // \\?\UNC\server\share\path... + parts_iter.next(); + format!(r"\\{}", parts_iter.join(r"\")) + }, + Some(_) => { // \\?\path... + tracing::warn!("Found a verbatim path that is not UNC nor Disk path: {}, leaving it as-is", p); + p + }, + None => p, // \\?\ + } + }, + Some(&".") if starting_slashes > 0 => { + parts_iter.next(); + format!(r"\\.\{}", parts_iter.join(r"\")) // \\.\path... + }, + Some(pref) if pref.contains(":") => parts_iter.join(r"\"), // C:\path... + Some(_) => { + match starting_slashes { + 0 => parts_iter.join(r"\"), // relative path: folder\file.ext + 1 => format!(r"\{}", parts_iter.join(r"\")), // absolute path from cur disk: \folder\file.ext + _ => format!(r"\\{}", parts_iter.join(r"\")), // standard UNC path: \\server\share\folder\file.ext + } } - } else { - std::env::current_dir()? - }; - normalized.extend(components); - if path_os.ends_with(b"/") { - normalized.push(""); + None => p, // \ } - Ok(normalized) } -pub fn canonical_path(s: &str) -> PathBuf { - let mut res = match PathBuf::from(s).canonicalize() { - Ok(x) => x, - Err(_) => { - let a = absolute(std::path::Path::new(s)).unwrap_or(PathBuf::from(s)); - // warn!("canonical_path: {:?} doesn't work: {}\n using absolute path instead {}", s, e, a.display()); - a - } - }; - let components: Vec = res - .components() - .map(|x| match x { - Component::Normal(c) => c.to_string_lossy().to_string(), - Component::Prefix(c) => { - let lowercase_prefix = c.as_os_str().to_string_lossy().to_string().to_lowercase(); - lowercase_prefix +#[cfg(not(target_os = "windows"))] +/// In Unix, do nothing +fn preprocess_path_for_normalization(p: String) -> String { + p +} + +#[cfg(target_os = "windows")] +/// In Windows, add verbatim prefix to Disk or UNC paths, leave others as-is +fn make_absolute(path: PathBuf) -> PathBuf { + if let Some(Component::Prefix(pref)) = path.components().next() { + match pref.kind() { + Prefix::Disk(_) => { + let mut path_os_str = OsString::from(r"\\?\"); + path_os_str.push(path.as_os_str()); + PathBuf::from(path_os_str) }, - _ => x.as_os_str().to_string_lossy().to_string(), - }) - .collect(); - res = components.iter().fold(PathBuf::new(), |mut acc, x| { - acc.push(x); - acc - }); - // info!("canonical_path:\n{:?}\n{:?}", s, res); - res + Prefix::UNC(_, _) => { + let mut path_os_str = OsString::from(r"\\?\UNC\"); + path_os_str.push(path.strip_prefix(r"\\").unwrap_or(&path).as_os_str()); + PathBuf::from(path_os_str) + }, + _ => path + } + } else { + path + } +} + +#[cfg(not(target_os = "windows"))] +/// In Unix, do nothing +fn make_absolute(path: PathBuf) -> PathBuf { + path +} + +pub fn canonical_path>(p: T) -> PathBuf { + let p: String = p.into(); + let path= PathBuf::from(preprocess_path_for_normalization(p)); + + path.canonicalize() + .or_else(|_| std::path::absolute(&path).map(make_absolute)) + .unwrap_or(path) } pub fn serialize_path(path: &PathBuf, serializer: S) -> Result { diff --git a/src/git/operations.rs b/src/git/operations.rs index dc0b5902c..17de5aa3a 100644 --- a/src/git/operations.rs +++ b/src/git/operations.rs @@ -4,7 +4,7 @@ use git2::{Repository, Branch, DiffOptions, Oid}; use tracing::error; use crate::custom_error::MapErrToString; -use crate::files_correction::to_pathbuf_normalize; +use crate::files_correction::canonical_path; use crate::git::{FileChange, FileChangeStatus, DiffStatusType}; fn status_options(include_unmodified: bool, show: git2::StatusShow) -> git2::StatusOptions { @@ -127,7 +127,7 @@ pub fn get_diff_statuses(diff_status_type: DiffStatusType, repo: &Repository, in result.push(FileChange { status, absolute_path: if include_abs_paths { - to_pathbuf_normalize(&repo_workdir.join(&relative_path).to_string_lossy()) + canonical_path(repo_workdir.join(&relative_path).to_string_lossy().to_string()) } else { PathBuf::new() }, @@ -184,7 +184,7 @@ pub fn get_diff_statuses_workdir_to_head(repository: &Repository) -> Result) -> Result<(String, Option) if s.is_empty() { None } else { - let workdir = crate::files_correction::to_pathbuf_normalize(s); + let workdir = crate::files_correction::canonical_path(s); if !workdir.exists() { return Err("Workdir doesn't exist".to_string()); } else { diff --git a/src/integrations/integr_shell.rs b/src/integrations/integr_shell.rs index 46363f9f9..60c345008 100644 --- a/src/integrations/integr_shell.rs +++ b/src/integrations/integr_shell.rs @@ -256,7 +256,7 @@ fn parse_args(args: &HashMap) -> Result<(String, Option) if s.is_empty() { None } else { - let workdir = crate::files_correction::to_pathbuf_normalize(s); + let workdir = crate::files_correction::canonical_path(s); if !workdir.exists() { return Err("Workdir doesn't exist".to_string()); } else { diff --git a/src/integrations/setting_up_integrations.rs b/src/integrations/setting_up_integrations.rs index fd87d1677..fc1bfac30 100644 --- a/src/integrations/setting_up_integrations.rs +++ b/src/integrations/setting_up_integrations.rs @@ -277,7 +277,7 @@ pub async fn get_vars_for_replacements( let variables_yaml_path = if variables_yaml.is_empty() { config_dir.join("variables.yaml") } else { - crate::files_correction::to_pathbuf_normalize(&variables_yaml) + crate::files_correction::canonical_path(&variables_yaml) }; let mut variables = HashMap::new(); diff --git a/src/main.rs b/src/main.rs index 72b31ee2a..f2f349421 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use std::io::Write; use std::env; use std::panic; -use files_correction::to_pathbuf_normalize; +use files_correction::canonical_path; use tokio::task::JoinHandle; use tracing::{info, Level}; use tracing_appender; @@ -80,7 +80,7 @@ async fn main() { let cpu_num = std::thread::available_parallelism().unwrap().get(); rayon::ThreadPoolBuilder::new().num_threads(cpu_num / 2).build_global().unwrap(); - let home_dir = to_pathbuf_normalize(&home::home_dir().ok_or(()).expect("failed to find home dir").to_string_lossy().to_string()); + let home_dir = canonical_path(home::home_dir().ok_or(()).expect("failed to find home dir").to_string_lossy().to_string()); let cache_dir = home_dir.join(".cache").join("refact"); let config_dir = home_dir.join(".config").join("refact"); let (gcx, ask_shutdown_receiver, cmdline) = global_context::create_global_context(cache_dir.clone(), config_dir.clone()).await; diff --git a/src/postprocessing/pp_utils.rs b/src/postprocessing/pp_utils.rs index c45157cda..3eb1f6c7d 100644 --- a/src/postprocessing/pp_utils.rs +++ b/src/postprocessing/pp_utils.rs @@ -78,7 +78,7 @@ pub async fn pp_ast_markup_files( let path_as_presented = context_file.file_name.clone(); let candidates = crate::files_correction::correct_to_nearest_filename(gcx.clone(), &path_as_presented, false, 5).await; let cpath = match candidates.first() { - Some(c) => crate::files_correction::canonical_path(&c), + Some(c) => crate::files_correction::canonical_path(c), None => crate::files_correction::canonical_path(&path_as_presented) }; context_file.file_name = cpath.to_string_lossy().to_string(); @@ -151,7 +151,7 @@ pub async fn context_msgs_from_paths( // XXX: only used once in a test handler, maybe remove? let mut messages = vec![]; for file_name in files_set { - let path = crate::files_correction::canonical_path(&file_name.clone()); + let path = crate::files_correction::canonical_path(&file_name); let text = match get_file_text_from_memory_or_disk(global_context.clone(), &path).await { Ok(text) => text, Err(e) => { diff --git a/src/telemetry/snippets_collection.rs b/src/telemetry/snippets_collection.rs index 05642d0f9..940d2f5a0 100644 --- a/src/telemetry/snippets_collection.rs +++ b/src/telemetry/snippets_collection.rs @@ -7,7 +7,7 @@ use tracing::debug; use crate::call_validation::CodeCompletionPost; use crate::completion_cache; -use crate::files_correction::to_pathbuf_normalize; +use crate::files_correction::canonical_path; use crate::global_context; use crate::telemetry::basic_comp_counters; use crate::telemetry::basic_robot_human; @@ -115,8 +115,8 @@ pub async fn sources_changed( let mut accepted_snippets = vec![]; for snip in storage_locked.tele_snippets.iter_mut() { - let uri_path = to_pathbuf_normalize(uri).to_string_lossy().to_string(); - let cursor_file_path = to_pathbuf_normalize(&snip.inputs.cursor.file).to_string_lossy().to_string(); + let uri_path = canonical_path(uri).to_string_lossy().to_string(); + let cursor_file_path = canonical_path(&snip.inputs.cursor.file).to_string_lossy().to_string(); if snip.accepted_ts == 0 || !uri_path.ends_with(&cursor_file_path) { continue; } diff --git a/src/tools/tool_tree.rs b/src/tools/tool_tree.rs index c960506d8..35209c1d9 100644 --- a/src/tools/tool_tree.rs +++ b/src/tools/tool_tree.rs @@ -57,7 +57,7 @@ impl Tool for ToolTree { let candidate = return_one_candidate_or_a_good_error( gcx.clone(), &path, &dir_candidates, &project_dirs, true ).await?; - let true_path = crate::files_correction::to_pathbuf_normalize(&candidate); + let true_path = crate::files_correction::canonical_path(candidate); let is_within_project_dirs = project_dirs.iter().any(|p| true_path.starts_with(&p)); if !is_within_project_dirs && !gcx.read().await.cmdline.inside_container { From 2dcfa7d0183e1054333a4ea9a34f6066b32dc887 Mon Sep 17 00:00:00 2001 From: Humberto Yusta Date: Thu, 13 Feb 2025 15:23:28 +0100 Subject: [PATCH 2/5] test: windows normalization logic --- src/files_correction.rs | 117 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/src/files_correction.rs b/src/files_correction.rs index 3406de863..3cc44535e 100644 --- a/src/files_correction.rs +++ b/src/files_correction.rs @@ -503,6 +503,123 @@ mod tests { assert_eq!(result, expected_result, "The result should contain the expected paths, instead it found"); } + + #[cfg(target_os = "windows")] + #[test] + fn test_preprocess_windows_path_for_normalization() { + let test_cases = [ + // Verbatim disk paths + (r"\\\\\\\\?\\\\C:\\\\Windows\\\\System32", r"C:\Windows\System32"), + (r"\?\C:\Model generates this kind of paths", r"C:\Model generates this kind of paths"), + (r"/?/C:/other\\horr.ible/path", r"C:\other\horr.ible\path"), + + // Disk paths + (r"C:\\folder/..\\\\file", r"C:\folder\..\file"), + (r"/D:\\Users/John Doe\\\\.\myfolder/file.ext", r"D:\Users\John Doe\.\myfolder\file.ext"), + + // Verbatim UNC paths + (r"\\?\UNC\server\share/folder//file.ext", r"\\server\share\folder\file.ext"), + (r"\\?\unc\server\share/folder//file.ext", r"\\server\share\folder\file.ext"), + (r"/?/unc/server/share/folder//file.ext", r"\\server\share\folder\file.ext"), + + // Standard UNC paths + (r"\\server\share/folder//file.ext", r"\\server\share\folder\file.ext"), + (r"////server//share//folder//file.ext", r"\\server\share\folder\file.ext"), + (r"//wsl$/Ubuntu/home/yourusername/projects", r"\\wsl$\Ubuntu\home\yourusername\projects"), + + // DeviceNS paths + (r"////./pipe/docker_engine", r"\\.\pipe\docker_engine"), + (r"\\.\pipe\docker_engine", r"\\.\pipe\docker_engine"), + (r"//./pipe/docker_engine", r"\\.\pipe\docker_engine"), + + // Absolute paths without disk + (r"\Windows\System32", r"\Windows\System32"), + (r"/Program Files/Common Files", r"\Program Files\Common Files"), + (r"\Users\Public\Downloads", r"\Users\Public\Downloads"), + (r"\temp/path", r"\temp\path"), + + // Relative paths + (r"folder/file.txt", r"folder\file.txt"), + (r"./current/./folder", r".\current\.\folder"), + (r"project/../src/main.rs", r"project\..\src\main.rs"), + (r"documents\\photos", r"documents\photos"), + (r"some folder/with spaces/file", r"some folder\with spaces\file"), + (r"bin/../lib/./include", r"bin\..\lib\.\include"), + ]; + + for (input, expected) in test_cases { + let result = preprocess_path_for_normalization(input.to_string()); + assert_eq!(result, expected.to_string(), "The result for {} should be {}, got {}", input, expected, result); + } + } + + #[cfg(target_os = "windows")] + #[test] + fn test_canonical_path() + { + let temp_dir = tempfile::tempdir().unwrap(); + let temp_dir_path = temp_dir.path(); + let temp_dir_path_str = temp_dir_path.to_str().unwrap(); + + let long_str = String::from_utf8(vec![b'a'; 600].iter().map(|b| *b).collect()).unwrap(); + let long_dir_path = PathBuf::from(format!("\\\\?\\{temp_dir_path_str}\\{long_str}")); + + let create_dir_cmd = format!( + "powershell.exe -Command \"New-Item -Path '{}' -ItemType Directory -Force\"", + long_dir_path.to_string_lossy().replace("'", "''") + ); + let create_file_cmd = format!( + "powershell.exe -Command \"New-Item -Path '{}' -ItemType File -Force\"", + long_dir_path.join("file.txt").to_string_lossy().replace("'", "''") + ); + std::process::Command::new("cmd") + .args(["/C", &create_dir_cmd]) + .output() + .expect("Failed to create directory"); + std::process::Command::new("cmd") + .args(["/C", &create_file_cmd]) + .output() + .expect("Failed to create file"); + + let long_dir_path_str = format!("{temp_dir_path_str}\\{long_str}\\..\\{long_str}"); + let long_dir_file_str = format!("{temp_dir_path_str}\\{long_str}\\..\\{long_str}\\.\\..\\{long_str}\\file.txt"); + + let test_cases = vec![ + // Disks + (r"C:\\Windows\\System32\\..\\..\\Temp\\conn", PathBuf::from(r"\\?\C:\Temp\conn")), + (r"D:/../..\NUL", PathBuf::from(r"\\.\NUL")), + (r"d:\\A\\B\\C\\D\\..\\..\\..\\..\\E\\F\\G\\..\\..\\H", PathBuf::from(r"\\?\D:\E\H")), + (r"c:\\../Windows", PathBuf::from(r"\\?\C:\Windows")), + (r"d:\\..\\..\\..\\..\\..", PathBuf::from(r"\\?\D:\")), + + // Verbatim Disks + (r"\\\\?\\C:\Very\Long\Path\With\Lots\Of\Subdirectories\..\..\..\CON", PathBuf::from(r"\\?\C:\Very\Long\Path\With\CON")), + (r"//?/d:/Trailing/Dot./.", PathBuf::from(r"\\?\d:\Trailing\Dot")), + (r"\?\c:\Trailing\Space\\ ", PathBuf::from(r"\\?\c:\Trailing\Space\")), + (r"\?/C:/$MFT", PathBuf::from(r"\\?\C:\$MFT")), + + // Devices + (r"\\.\COM1", PathBuf::from(r"\\.\COM1")), + (r"\.\PIPE\SomePipeName", PathBuf::from(r"\\.\PIPE\SomePipeName")), + (r"/?/UNC//./PIPE/AnotherPipe", PathBuf::from(r"\\.\PIPE\AnotherPipe")), + (r"D:\\PRN", PathBuf::from(r"\\?\D:\PRN")), + + // Non-Standard Verbatim + (r"\\?\Volume{12345678-1234-1234-1234-1234567890AB}\Path\To\Some\File", PathBuf::from(r"\\?\Volume{12345678-1234-1234-1234-1234567890AB}\Path\To\Some\File")), + + // UNC Verbatim + (r"\\?\UNC\localhost\C$/Windows/System32\..\System32", PathBuf::from(r"\\?\UNC\localhost\C$\Windows\System32")), + + // Long paths + (&long_dir_path_str, PathBuf::from(format!("\\\\?\\{temp_dir_path_str}\\{long_str}"))), + (&long_dir_file_str, PathBuf::from(format!("\\\\?\\{temp_dir_path_str}\\{long_str}\\file.txt"))), + ]; + + for (input, expected) in test_cases { + let result = canonical_path(input); + assert_eq!(result, expected, "Expected canonical path for {} to be {}, but got {}", input, expected.to_string_lossy(), result.to_string_lossy()); + } + } // cicd works with virtual machine, this test is slow #[cfg(not(all(target_arch = "aarch64", target_os = "linux")))] From 62d9b930d2b7dc3c7f0609eb2c5208c6121fcdc3 Mon Sep 17 00:00:00 2001 From: Humberto Yusta Date: Thu, 13 Feb 2025 15:46:46 +0100 Subject: [PATCH 3/5] fix: only import windows necessary types in windows --- src/files_correction.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/files_correction.rs b/src/files_correction.rs index 3cc44535e..c52cf7f63 100644 --- a/src/files_correction.rs +++ b/src/files_correction.rs @@ -1,12 +1,16 @@ use std::collections::{HashMap, HashSet}; -use std::ffi::OsString; use std::sync::Arc; use std::time::Instant; -use std::path::{Component, PathBuf, Prefix}; -use itertools::Itertools; +use std::path::PathBuf; use serde::Deserialize; use tokio::sync::RwLock as ARwLock; use tracing::info; +#[cfg(windows)] +use itertools::Itertools; +#[cfg(windows)] +use std::path::{Component, Prefix}; +#[cfg(windows)] +use std::ffi::OsString; use crate::files_in_workspace::detect_vcs_for_a_file_path; use crate::global_context::GlobalContext; @@ -335,7 +339,7 @@ fn _shortify_paths_from_indexed(paths: &Vec, indexed_paths: Arc String { } } -#[cfg(not(target_os = "windows"))] +#[cfg(not(windows))] /// In Unix, do nothing fn preprocess_path_for_normalization(p: String) -> String { p } -#[cfg(target_os = "windows")] +#[cfg(windows)] /// In Windows, add verbatim prefix to Disk or UNC paths, leave others as-is fn make_absolute(path: PathBuf) -> PathBuf { if let Some(Component::Prefix(pref)) = path.components().next() { @@ -405,7 +409,7 @@ fn make_absolute(path: PathBuf) -> PathBuf { } } -#[cfg(not(target_os = "windows"))] +#[cfg(not(windows))] /// In Unix, do nothing fn make_absolute(path: PathBuf) -> PathBuf { path From 2319def11de6b12daff62edf439e3ca98d4f2b1a Mon Sep 17 00:00:00 2001 From: Humberto Yusta Date: Thu, 13 Feb 2025 19:22:23 +0100 Subject: [PATCH 4/5] fix: absolute paths not touching filesystem works on linux --- src/files_correction.rs | 105 +++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 24 deletions(-) diff --git a/src/files_correction.rs b/src/files_correction.rs index c52cf7f63..cc4dbd12c 100644 --- a/src/files_correction.rs +++ b/src/files_correction.rs @@ -1,19 +1,14 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::Instant; -use std::path::PathBuf; +use std::path::{PathBuf, Component, Path}; use serde::Deserialize; use tokio::sync::RwLock as ARwLock; use tracing::info; -#[cfg(windows)] -use itertools::Itertools; -#[cfg(windows)] -use std::path::{Component, Prefix}; -#[cfg(windows)] -use std::ffi::OsString; -use crate::files_in_workspace::detect_vcs_for_a_file_path; use crate::global_context::GlobalContext; +use crate::custom_error::MapErrToString; +use crate::files_in_workspace::detect_vcs_for_a_file_path; use crate::fuzzy_search::fuzzy_search; @@ -344,6 +339,8 @@ fn _shortify_paths_from_indexed(paths: &Vec, indexed_paths: Arc String { + use itertools::Itertools; + let p = p.replace(r"/", r"\"); let starting_slashes = p.chars().take_while(|c| *c == '\\').count(); @@ -388,40 +385,70 @@ fn preprocess_path_for_normalization(p: String) -> String { } #[cfg(windows)] -/// In Windows, add verbatim prefix to Disk or UNC paths, leave others as-is -fn make_absolute(path: PathBuf) -> PathBuf { +/// In Windows, call std::path::absolute, then add verbatim prefix to standard Disk or UNC paths +fn absolute(path: &Path) -> Result { + use std::path::Prefix; + use std::ffi::OsString; + + let path = std::path::absolute(path).map_err_to_string()?; + if let Some(Component::Prefix(pref)) = path.components().next() { match pref.kind() { Prefix::Disk(_) => { let mut path_os_str = OsString::from(r"\\?\"); path_os_str.push(path.as_os_str()); - PathBuf::from(path_os_str) + Ok(PathBuf::from(path_os_str)) }, Prefix::UNC(_, _) => { let mut path_os_str = OsString::from(r"\\?\UNC\"); path_os_str.push(path.strip_prefix(r"\\").unwrap_or(&path).as_os_str()); - PathBuf::from(path_os_str) + Ok(PathBuf::from(path_os_str)) }, - _ => path + _ => Ok(path.to_path_buf()) } } else { - path + Ok(path.to_path_buf()) } } #[cfg(not(windows))] -/// In Unix, do nothing -fn make_absolute(path: PathBuf) -> PathBuf { - path +/// In Unix, this method is similar to std::path::absolute, but also resolves .. +fn absolute(path: &Path) -> Result { + let mut components = path.components(); + let path_os = path.as_os_str().as_encoded_bytes(); + + let mut normalized = if path.is_absolute() { + if path_os.starts_with(b"//") && !path_os.starts_with(b"///") { + components.next(); + PathBuf::from("//") + } else { + PathBuf::from("/") + } + } else { + std::env::current_dir().map_err_to_string()? + }; + for component in components { + match component { + Component::Normal(c) => { normalized.push(c); } + Component::ParentDir => { normalized.pop(); } + Component::CurDir => (), + Component::RootDir => (), + Component::Prefix(_) => return Err("Prefix should not occur in Unix".to_string()), + } + } + + if path_os.ends_with(b"/") { + normalized.push(""); + } + + Ok(normalized) } pub fn canonical_path>(p: T) -> PathBuf { let p: String = p.into(); let path= PathBuf::from(preprocess_path_for_normalization(p)); - path.canonicalize() - .or_else(|_| std::path::absolute(&path).map(make_absolute)) - .unwrap_or(path) + path.canonicalize().unwrap_or_else(|_| absolute(&path).unwrap_or(path)) } pub fn serialize_path(path: &PathBuf, serializer: S) -> Result { @@ -508,7 +535,7 @@ mod tests { assert_eq!(result, expected_result, "The result should contain the expected paths, instead it found"); } - #[cfg(target_os = "windows")] + #[cfg(windows)] #[test] fn test_preprocess_windows_path_for_normalization() { let test_cases = [ @@ -557,9 +584,9 @@ mod tests { } } - #[cfg(target_os = "windows")] + #[cfg(windows)] #[test] - fn test_canonical_path() + fn test_canonical_path_windows() { let temp_dir = tempfile::tempdir().unwrap(); let temp_dir_path = temp_dir.path(); @@ -597,7 +624,7 @@ mod tests { (r"d:\\..\\..\\..\\..\\..", PathBuf::from(r"\\?\D:\")), // Verbatim Disks - (r"\\\\?\\C:\Very\Long\Path\With\Lots\Of\Subdirectories\..\..\..\CON", PathBuf::from(r"\\?\C:\Very\Long\Path\With\CON")), + (r"\\\\?\\C:\Very\Long\Path\With\Lots\Of\Subdirectories\..\..\..\LongFile", PathBuf::from(r"\\?\C:\Very\Long\Path\With\LongFile")), (r"//?/d:/Trailing/Dot./.", PathBuf::from(r"\\?\d:\Trailing\Dot")), (r"\?\c:\Trailing\Space\\ ", PathBuf::from(r"\\?\c:\Trailing\Space\")), (r"\?/C:/$MFT", PathBuf::from(r"\\?\C:\$MFT")), @@ -625,6 +652,36 @@ mod tests { } } + #[cfg(not(windows))] + #[test] + fn test_canonical_path_unix() + { + let cur_dir = std::env::current_dir().unwrap(); + + let test_cases = vec![ + // Absolute paths + (r"/home/.././etc/./../usr/bin", PathBuf::from(r"/usr/bin")), + (r"/var/run//.././run//docker.sock", PathBuf::from(r"/run/docker.sock")), + (r"/this_folder_does_not_exist/run/.././run/docker.sock", PathBuf::from(r"/this_folder_does_not_exist/run/docker.sock")), + (r"/../../var", PathBuf::from(r"/var")), + (r"/../../var_n/.", PathBuf::from(r"/var_n")), + (r"///var_n//foo_n/foo_n//./././../bar_n/", PathBuf::from(r"/var_n/foo_n/bar_n/")), + + // Relative paths + (r".", cur_dir.clone()), + (r".//some_not_existing_folder/..", cur_dir.clone()), + (r"./some_not_existing_folder///..//", cur_dir.join("")), + (r"foo_n////var_n", cur_dir.join("foo_n").join("var_n")), + (r"foo_n/../var_n/../cat_n/", cur_dir.join("cat_n")), + (r"./foo_n/././..", cur_dir.clone()), + ]; + + for (input, expected) in test_cases { + let result = canonical_path(input); + assert_eq!(result, expected, "Expected canonical path for {} to be {}, but got {}", input, expected.to_string_lossy(), result.to_string_lossy()); + } + } + // cicd works with virtual machine, this test is slow #[cfg(not(all(target_arch = "aarch64", target_os = "linux")))] #[cfg(not(debug_assertions))] From fffc24965de4b04f47c15f779ceaae27146e63ec Mon Sep 17 00:00:00 2001 From: Humberto Yusta Date: Fri, 14 Feb 2025 18:02:13 +0100 Subject: [PATCH 5/5] fix: replace to pathbuf normalize for canonical path --- src/tools/file_edit/tool_create_textdoc.rs | 4 ++-- src/tools/file_edit/tool_replace_textdoc.rs | 4 ++-- src/tools/file_edit/tool_update_textdoc.rs | 4 ++-- src/tools/file_edit/tool_update_textdoc_regex.rs | 4 ++-- src/tools/tool_mv.rs | 6 +++--- src/tools/tool_rm.rs | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/tools/file_edit/tool_create_textdoc.rs b/src/tools/file_edit/tool_create_textdoc.rs index 07156ea5c..b99678d60 100644 --- a/src/tools/file_edit/tool_create_textdoc.rs +++ b/src/tools/file_edit/tool_create_textdoc.rs @@ -11,7 +11,7 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; use tokio::sync::Mutex as AMutex; -use crate::files_correction::to_pathbuf_normalize; +use crate::files_correction::canonical_path; use crate::global_context::GlobalContext; use tokio::sync::RwLock as ARwLock; @@ -25,7 +25,7 @@ pub struct ToolCreateTextDoc; fn parse_args(args: &HashMap) -> Result { let path = match args.get("path") { Some(Value::String(s)) => { - let path = to_pathbuf_normalize(&s.trim().to_string()); + let path = canonical_path(&s.trim().to_string()); if !path.is_absolute() { return Err(format!( "argument 'path' should be an absolute path: {:?}", diff --git a/src/tools/file_edit/tool_replace_textdoc.rs b/src/tools/file_edit/tool_replace_textdoc.rs index 4f5d2a98f..7af0cae65 100644 --- a/src/tools/file_edit/tool_replace_textdoc.rs +++ b/src/tools/file_edit/tool_replace_textdoc.rs @@ -1,6 +1,6 @@ use crate::at_commands::at_commands::AtCommandsContext; use crate::call_validation::{ChatContent, ChatMessage, ContextEnum, DiffChunk}; -use crate::files_correction::to_pathbuf_normalize; +use crate::files_correction::canonical_path; use crate::global_context::GlobalContext; use crate::integrations::integr_abstract::IntegrationConfirmation; use crate::tools::file_edit::auxiliary::{ @@ -25,7 +25,7 @@ pub struct ToolReplaceTextDoc; fn parse_args(args: &HashMap) -> Result { let path = match args.get("path") { Some(Value::String(s)) => { - let path = to_pathbuf_normalize(&s.trim().to_string()); + let path = canonical_path(&s.trim().to_string()); if !path.is_absolute() { return Err(format!( "argument 'path' should be an absolute path: {:?}", diff --git a/src/tools/file_edit/tool_update_textdoc.rs b/src/tools/file_edit/tool_update_textdoc.rs index 0573eb903..6e4205dc5 100644 --- a/src/tools/file_edit/tool_update_textdoc.rs +++ b/src/tools/file_edit/tool_update_textdoc.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; use tokio::sync::Mutex as AMutex; -use crate::files_correction::to_pathbuf_normalize; +use crate::files_correction::canonical_path; use tokio::sync::RwLock as ARwLock; use crate::global_context::GlobalContext; @@ -25,7 +25,7 @@ pub struct ToolUpdateTextDoc; fn parse_args(args: &HashMap) -> Result { let path = match args.get("path") { Some(Value::String(s)) => { - let path = to_pathbuf_normalize(&s.trim().to_string()); + let path = canonical_path(&s.trim().to_string()); if !path.is_absolute() { return Err(format!( "argument 'path' should be an absolute path: {:?}", diff --git a/src/tools/file_edit/tool_update_textdoc_regex.rs b/src/tools/file_edit/tool_update_textdoc_regex.rs index 988da52e8..c40b76241 100644 --- a/src/tools/file_edit/tool_update_textdoc_regex.rs +++ b/src/tools/file_edit/tool_update_textdoc_regex.rs @@ -10,7 +10,7 @@ use std::path::PathBuf; use std::sync::Arc; use regex::Regex; use tokio::sync::Mutex as AMutex; -use crate::files_correction::to_pathbuf_normalize; +use crate::files_correction::canonical_path; use tokio::sync::RwLock as ARwLock; use crate::global_context::GlobalContext; @@ -26,7 +26,7 @@ pub struct ToolUpdateTextDocRegex; fn parse_args(args: &HashMap) -> Result { let path = match args.get("path") { Some(Value::String(s)) => { - let path = to_pathbuf_normalize(&s.trim().to_string()); + let path = canonical_path(&s.trim().to_string()); if !path.is_absolute() { return Err(format!( "argument 'path' should be an absolute path: {:?}", diff --git a/src/tools/tool_mv.rs b/src/tools/tool_mv.rs index 5328ec9d8..5925db182 100644 --- a/src/tools/tool_mv.rs +++ b/src/tools/tool_mv.rs @@ -9,7 +9,7 @@ use tokio::sync::Mutex as AMutex; use crate::at_commands::at_commands::AtCommandsContext; use crate::at_commands::at_file::return_one_candidate_or_a_good_error; use crate::call_validation::{ChatMessage, ChatContent, ContextEnum}; -use crate::files_correction::{get_project_dirs, to_pathbuf_normalize, correct_to_nearest_filename, correct_to_nearest_dir_path}; +use crate::files_correction::{get_project_dirs, canonical_path, correct_to_nearest_filename, correct_to_nearest_dir_path}; use crate::tools::tools_description::{MatchConfirmDeny, MatchConfirmDenyResult, Tool, ToolDesc, ToolParam}; use crate::integrations::integr_abstract::IntegrationConfirmation; @@ -106,8 +106,8 @@ impl Tool for ToolMv { .unwrap_or(dst_str.clone()); let dst_corrected_path = format!("{}/{}", dst_parent_path.trim_end_matches('/'), dst_name); - let src_true_path = to_pathbuf_normalize(&src_corrected_path); - let dst_true_path = to_pathbuf_normalize(&dst_corrected_path); + let src_true_path = canonical_path(&src_corrected_path); + let dst_true_path = canonical_path(&dst_corrected_path); let src_within_project = project_dirs.iter().any(|p| src_true_path.starts_with(p)); let dst_within_project = project_dirs.iter().any(|p| dst_true_path.starts_with(p)); diff --git a/src/tools/tool_rm.rs b/src/tools/tool_rm.rs index 722bf0f1c..436d2ec15 100644 --- a/src/tools/tool_rm.rs +++ b/src/tools/tool_rm.rs @@ -8,7 +8,7 @@ use tokio::sync::Mutex as AMutex; use crate::at_commands::at_commands::AtCommandsContext; use crate::at_commands::at_file::return_one_candidate_or_a_good_error; use crate::call_validation::{ChatMessage, ChatContent, ContextEnum}; -use crate::files_correction::{get_project_dirs, to_pathbuf_normalize, correct_to_nearest_filename, correct_to_nearest_dir_path}; +use crate::files_correction::{get_project_dirs, canonical_path, correct_to_nearest_filename, correct_to_nearest_dir_path}; use crate::tools::tools_description::{MatchConfirmDeny, MatchConfirmDenyResult, Tool, ToolDesc, ToolParam}; use crate::integrations::integr_abstract::IntegrationConfirmation; @@ -127,7 +127,7 @@ impl Tool for ToolRm { return Err(format!("Path '{}' not found", path_str)); }; - let true_path = to_pathbuf_normalize(&corrected_path); + let true_path = canonical_path(&corrected_path); // Check that the true_path is within project directories. let is_within_project = project_dirs.iter().any(|p| true_path.starts_with(p));