diff --git a/refact-agent/engine/src/files_blocklist.rs b/refact-agent/engine/src/files_blocklist.rs index 7a0dff79e..80e73416a 100644 --- a/refact-agent/engine/src/files_blocklist.rs +++ b/refact-agent/engine/src/files_blocklist.rs @@ -43,7 +43,7 @@ impl Default for IndexingSettings { pub struct IndexingEverywhere { pub global: IndexingSettings, - pub vcs_indexing_settings_map: HashMap, + pub vcs_indexing_settings_map: HashMap, pub loaded_ts: u64, } @@ -64,12 +64,11 @@ impl IndexingEverywhere { let mut best_vcs: Option = None; let mut best_pathbuf: Option = None; - for (vcs, vcs_settings) in &self.vcs_indexing_settings_map { - let vcs_pathbuf = PathBuf::from(vcs); - if path.starts_with(&vcs) { - if best_vcs.is_none() || vcs_pathbuf.components().count() > best_pathbuf.clone().unwrap().components().count() { + for (vcs_path, vcs_settings) in &self.vcs_indexing_settings_map { + if path.starts_with(vcs_path) { + if best_vcs.is_none() || vcs_path.components().count() > best_pathbuf.as_ref().unwrap().components().count() { best_vcs = Some(vcs_settings.clone()); - best_pathbuf = Some(vcs_pathbuf); + best_pathbuf = Some(vcs_path.clone()); } } } @@ -85,7 +84,7 @@ impl IndexingEverywhere { pub async fn load_indexing_yaml( indexing_yaml_path: &Path, - relative_path_base: Option<&PathBuf>, + relative_path_base: Option<&Path>, ) -> Result { let content = fs::read_to_string(&indexing_yaml_path) .await @@ -139,13 +138,13 @@ pub async fn reload_indexing_everywhere_if_needed( }; let vcs_dirs: Vec = workspace_vcs_roots.lock().unwrap().iter().cloned().collect(); - let mut vcs_indexing_settings_map: HashMap = HashMap::new(); + let mut vcs_indexing_settings_map = HashMap::new(); for indexing_root in vcs_dirs { let indexing_path = indexing_root.join(".refact").join("indexing.yaml"); if indexing_path.exists() { match load_indexing_yaml(&indexing_path, Some(&indexing_root)).await { Ok(indexing_settings) => { - vcs_indexing_settings_map.insert(indexing_root.to_str().unwrap().to_string(), indexing_settings); + vcs_indexing_settings_map.insert(indexing_root, indexing_settings); }, Err(e) => { tracing::error!("{}, skip", e); @@ -179,7 +178,7 @@ pub fn is_blocklisted(indexing_settings: &IndexingSettings, path: &Path) -> bool fn _load_indexing_yaml_str( indexing_yaml_str: &str, - relative_path_base: Option<&PathBuf>, + relative_path_base: Option<&Path>, ) -> Result { match serde_yaml::from_str::(indexing_yaml_str) { Ok(indexing_settings) => { diff --git a/refact-agent/engine/src/files_in_workspace.rs b/refact-agent/engine/src/files_in_workspace.rs index e5fff8912..a90b1252b 100644 --- a/refact-agent/engine/src/files_in_workspace.rs +++ b/refact-agent/engine/src/files_in_workspace.rs @@ -21,9 +21,7 @@ use crate::file_filter::{is_valid_file, SOURCE_FILE_EXTENSIONS}; use crate::ast::ast_indexer_thread::ast_indexer_enqueue_files; use crate::privacy::{check_file_privacy, load_privacy_if_needed, PrivacySettings, FilePrivacyLevel}; use crate::files_blocklist::{ - IndexingEverywhere, - is_blocklisted, - reload_indexing_everywhere_if_needed, + is_blocklisted, load_indexing_yaml, reload_indexing_everywhere_if_needed, IndexingEverywhere }; use crate::files_correction_cache::PathTrie; use crate::files_in_jsonl::enqueue_all_docs_from_jsonl_but_read_first; @@ -168,6 +166,8 @@ pub struct DocumentsState { pub workspace_folders: Arc>>, pub workspace_files: Arc>>, pub workspace_vcs_roots: Arc>>, + /// .refact folders in workspace dirs + pub dot_refact_folders: Arc>>, pub active_file_path: Option, pub jsonl_files: Arc>>, // document_map on windows: c%3A/Users/user\Documents/file.ext @@ -204,6 +204,7 @@ impl DocumentsState { workspace_folders: Arc::new(StdMutex::new(workspace_dirs)), workspace_files: Arc::new(StdMutex::new(Vec::new())), workspace_vcs_roots: Arc::new(StdMutex::new(Vec::new())), + dot_refact_folders: Arc::new(AMutex::new(Vec::new())), active_file_path: None, jsonl_files: Arc::new(StdMutex::new(Vec::new())), memory_document_map: HashMap::new(), @@ -421,22 +422,18 @@ pub fn get_vcs_type(path: &Path) -> Option<&'static str> { async fn _ls_files_under_version_control_recursive( all_files: &mut Vec, vcs_folders: &mut Vec, - avoid_dups: &mut HashSet, + visited_folders: &mut HashSet, indexing_everywhere: &mut IndexingEverywhere, path: PathBuf, allow_files_in_hidden_folders: bool, ignore_size_thresholds: bool, - check_blocklist: bool, ) { - let mut candidates: Vec = vec![crate::files_correction::canonical_path(&path.to_string_lossy().to_string())]; + let mut candidates: Vec = vec![crate::files_correction::canonical_path(path.to_string_lossy().to_string())]; let mut rejected_reasons: HashMap = HashMap::new(); let mut blocklisted_dirs_cnt: usize = 0; - while !candidates.is_empty() { - let checkme = candidates.pop().unwrap(); + while let Some(checkme) = candidates.pop() { if checkme.is_file() { - let maybe_valid = is_valid_file( - &checkme, allow_files_in_hidden_folders, ignore_size_thresholds); - match maybe_valid { + match is_valid_file(&checkme, allow_files_in_hidden_folders, ignore_size_thresholds) { Ok(_) => { all_files.push(checkme.clone()); } @@ -447,10 +444,10 @@ async fn _ls_files_under_version_control_recursive( } } if checkme.is_dir() { - if avoid_dups.contains(&checkme) { + if visited_folders.contains(&checkme) { continue; } - avoid_dups.insert(checkme.clone()); + visited_folders.insert(checkme.clone()); if get_vcs_type(&checkme).is_some() { vcs_folders.push(checkme.clone()); } @@ -458,13 +455,12 @@ async fn _ls_files_under_version_control_recursive( // Has version control let indexing_yaml_path = checkme.join(".refact").join("indexing.yaml"); if indexing_yaml_path.exists() { - match crate::files_blocklist::load_indexing_yaml(&indexing_yaml_path, Some(&checkme)).await { + match load_indexing_yaml(&indexing_yaml_path, Some(&checkme)).await { Ok(indexing_settings) => { for d in indexing_settings.additional_indexing_dirs.iter() { - let cp = crate::files_correction::canonical_path(d.as_str()); - candidates.push(cp); + candidates.push(canonical_path(d)); } - indexing_everywhere.vcs_indexing_settings_map.insert(checkme.to_string_lossy().to_string(), indexing_settings); + indexing_everywhere.vcs_indexing_settings_map.insert(checkme, indexing_settings); } Err(e) => { tracing::error!("failed to load indexing.yaml in {}: {}", checkme.display(), e); @@ -487,16 +483,15 @@ async fn _ls_files_under_version_control_recursive( } else { // Don't have version control let indexing_settings = indexing_everywhere.indexing_for_path(&checkme); // this effectively only uses global blocklist - if check_blocklist && is_blocklisted(&indexing_settings, &checkme) { + if is_blocklisted(&indexing_settings, &checkme) { blocklisted_dirs_cnt += 1; continue; } - let new_paths: Vec = WalkDir::new(checkme.clone()).max_depth(1) + let new_paths = WalkDir::new(checkme.clone()).max_depth(1) .into_iter() .filter_map(|e| e.ok()) - .map(|e| crate::files_correction::canonical_path(&e.path().to_string_lossy().to_string())) - .filter(|e| e != &checkme) - .collect(); + .map(|e| crate::files_correction::canonical_path(e.path().to_string_lossy().to_string())) + .filter(|e| e != &checkme); candidates.extend(new_paths); } } @@ -512,6 +507,7 @@ async fn _ls_files_under_version_control_recursive( } +/// Returns a tuple of (`all_files`, `vcs_folders`) pub async fn retrieve_files_in_workspace_folders( proj_folders: Vec, indexing_everywhere: &mut IndexingEverywhere, @@ -527,10 +523,9 @@ pub async fn retrieve_files_in_workspace_folders( &mut vcs_folders, &mut avoid_dups, indexing_everywhere, - proj_folder.clone(), + proj_folder, allow_files_in_hidden_folders, ignore_size_thresholds, - true, ).await; } info!("in all workspace folders, VCS roots found:"); @@ -589,6 +584,43 @@ async fn enqueue_some_docs( } } +/// Expects `base_path` to be canonicalized. +async fn recurse_all_dirs( + base_path: &Path, + visited: &mut HashSet, + indexing_everywhere: &mut IndexingEverywhere, +) { + if !base_path.is_dir() { + return; + } + if visited.contains(base_path) { + return; + } + let indexing_settings = indexing_everywhere.indexing_for_path(base_path); + if is_blocklisted(&indexing_settings, base_path) && !base_path.ends_with(".refact") { + return; + } + visited.insert(base_path.to_path_buf()); + + let mut entries = match tokio::fs::read_dir(base_path).await { + Ok(entries) => entries, + Err(e) => { + info!("Failed to read directory {}: {}", base_path.display(), e); + return; + } + }; + + while let Ok(Some(i)) = entries.next_entry().await { + Box::pin( + recurse_all_dirs( + &i.path(), + visited, + indexing_everywhere, + ) + ).await; + } +} + pub async fn enqueue_all_files_from_workspace_folders( gcx: Arc>, wake_up_indexers: bool, @@ -599,7 +631,7 @@ pub async fn enqueue_all_files_from_workspace_folders( info!("enqueue_all_files_from_workspace_folders started files search with {} folders", folders.len()); let mut indexing_everywhere = crate::files_blocklist::reload_global_indexing_only(gcx.clone()).await; let (all_files, vcs_folders) = retrieve_files_in_workspace_folders( - folders, + folders.clone(), &mut indexing_everywhere, false, false @@ -609,17 +641,29 @@ pub async fn enqueue_all_files_from_workspace_folders( let mut old_workspace_files = Vec::new(); let cache_dirty = { - let mut gcx_locked = gcx.write().await; { + let gcx_locked = gcx.read().await; let mut workspace_files = gcx_locked.documents_state.workspace_files.lock().unwrap(); std::mem::swap(&mut *workspace_files, &mut old_workspace_files); workspace_files.extend(all_files.clone()); } { + let mut gcx_locked = gcx.write().await; std::mem::swap(&mut gcx_locked.documents_state.workspace_vcs_roots, &mut workspace_vcs_roots); } - gcx_locked.indexing_everywhere = Arc::new(indexing_everywhere); - gcx_locked.documents_state.cache_dirty.clone() + { + let mut indexing_everywhere = crate::files_blocklist::reload_global_indexing_only(gcx.clone()).await; + let mut visited = HashSet::new(); + for folder in folders.iter() { + recurse_all_dirs(folder, &mut visited, &mut indexing_everywhere).await; + } + let mut gcx_locked = gcx.write().await; + gcx_locked.documents_state.dot_refact_folders = Arc::new(AMutex::new( + visited.into_iter().filter(|p| p.ends_with(".refact")).collect::>() + )); + gcx_locked.indexing_everywhere = Arc::new(indexing_everywhere); + gcx_locked.documents_state.cache_dirty.clone() + } }; *cache_dirty.lock().await = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64(); @@ -868,6 +912,36 @@ pub async fn file_watcher_event(event: Event, gcx_weak: Weak>, event: Event) { + if let Some(gcx) = gcx_weak.clone().upgrade() { + let dot_refact_folders_arc = gcx.read().await.documents_state.dot_refact_folders.clone(); + let mut dot_refact_folders = dot_refact_folders_arc.lock().await; + + for p in &event.paths { + if p.ends_with(".refact") { + let canonical = canonical_path(p.to_string_lossy()); + dot_refact_folders.retain(|x| x != &canonical); + if p.exists() { + dot_refact_folders.push(canonical); + } + } + } + + match event.kind { + EventKind::Create(_) => { + info!("Detected .refact folder creation: {:?}", event.paths); + } + EventKind::Remove(_) => { + info!("Detected .refact folder removal: {:?}", event.paths); + } + EventKind::Modify(_) => { + info!("Detected .refact folder modification: {:?}", event.paths); + } + _ => () + } + } + } + match event.kind { // We may receive specific event that a folder is being added/removed, but not the .git itself, this happens on Unix systems EventKind::Create(CreateKind::Folder) | EventKind::Remove(RemoveKind::Folder) if event.paths.iter().any( @@ -879,6 +953,10 @@ pub async fn file_watcher_event(event: Event, gcx_weak: Weak on_dot_git_dir_change(gcx_weak, event).await, + EventKind::Create(CreateKind::Any | CreateKind::Folder) | EventKind::Modify(_) | EventKind::Remove(RemoveKind::Any | RemoveKind::Folder) + if event.paths.iter().any(|p| p.ends_with(".refact")) => + on_dot_refact_dir_change(gcx_weak, event).await, + EventKind::Create(_) | EventKind::Modify(_) | EventKind::Remove(_) => on_file_change(gcx_weak.clone(), event).await, diff --git a/refact-agent/engine/src/http/routers/v1/v1_integrations.rs b/refact-agent/engine/src/http/routers/v1/v1_integrations.rs index 11634d17b..62cf71ca4 100644 --- a/refact-agent/engine/src/http/routers/v1/v1_integrations.rs +++ b/refact-agent/engine/src/http/routers/v1/v1_integrations.rs @@ -19,7 +19,8 @@ pub async fn handle_v1_integrations( Extension(gcx): Extension>>, _: hyper::body::Bytes, ) -> axum::response::Result, ScratchError> { - let integrations = crate::integrations::setting_up_integrations::integrations_all(gcx.clone(), true).await; + let mut integrations = crate::integrations::setting_up_integrations::integrations_all(gcx.clone(), true).await; + integrations.integrations.sort_by(|a, b| a.integr_name.cmp(&b.integr_name)); let payload = serde_json::to_string_pretty(&integrations).map_err(|e| { ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to serialize payload: {}", e)) })?; diff --git a/refact-agent/engine/src/integrations/running_integrations.rs b/refact-agent/engine/src/integrations/running_integrations.rs index 988b3251a..339697da2 100644 --- a/refact-agent/engine/src/integrations/running_integrations.rs +++ b/refact-agent/engine/src/integrations/running_integrations.rs @@ -33,7 +33,7 @@ pub async fn load_integrations( &mut error_log, include_paths_matching, false, - ); + ).await; let mut integrations_map = IndexMap::new(); for rec in records { diff --git a/refact-agent/engine/src/integrations/setting_up_integrations.rs b/refact-agent/engine/src/integrations/setting_up_integrations.rs index 956bd63a3..94d9db81f 100644 --- a/refact-agent/engine/src/integrations/setting_up_integrations.rs +++ b/refact-agent/engine/src/integrations/setting_up_integrations.rs @@ -1,7 +1,7 @@ -use std::fs; -use std::path::PathBuf; -use std::sync::Arc; -use std::collections::HashMap; +use std::{fs, iter}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, OnceLock}; +use std::collections::{HashMap, HashSet}; use regex::Regex; use serde::Serialize; use serde_json::{json, Value}; @@ -56,7 +56,7 @@ fn parse_and_validate_yaml(path: &str, content: &String) -> Result, global_config_dir: &PathBuf, integrations_yaml_path: &String, @@ -68,10 +68,6 @@ pub fn read_integrations_d( ) -> Vec { let mut result = Vec::new(); - let mut files_to_read = Vec::new(); - let mut project_config_dirs = config_dirs.iter().map(|dir| dir.to_string_lossy().to_string()).collect::>(); - project_config_dirs.push("".to_string()); // global - // 1. Read and parse integrations.yaml (Optional, used for testing) // This reads the file to be used by (2) and (3), it does not create the records yet. // --integrations-yaml flag disables global config dir, except for integrations @@ -113,51 +109,82 @@ pub fn read_integrations_d( } } - // 2. Read each of config_dirs - for project_config_dir in project_config_dirs { - // Read config_folder/integr_name.yaml and make a record, even if the file doesn't exist - let config_dir = if project_config_dir == "" { global_config_dir.clone() } else { PathBuf::from(project_config_dir.clone()) }; - for integr_name in lst.iter() { - let path_str = join_config_path(&config_dir, integr_name); - let path = PathBuf::from(path_str.clone()); - if !include_non_existent_records && !path.exists() { - continue; - } - let (_integr_name, project_path) = match split_path_into_project_and_integration(&path) { - Ok(x) => x, - Err(e) => { - tracing::error!("error deriving project path: {}", e); - continue; + // 2. Read single file integrations_yaml_path, sections in yaml become integrations + if let Some(integrations_yaml_value) = integrations_yaml_value { + let short_yaml = crate::nicer_logs::last_n_chars(integrations_yaml_path, 15); + match integrations_yaml_value.as_mapping() { + Some(mapping) => { + for (key, value) in mapping { + if let Some(key_str) = key.as_str() { + if key_str.starts_with("cmdline_") || key_str.starts_with("service_") { + tracing::info!("{} detected prefix `{}`", short_yaml, key_str); + } else if lst.contains(&key_str) { + tracing::info!("{} has `{}`", short_yaml, key_str); + } else { + tracing::warn!("{} unrecognized section `{}`", short_yaml, key_str); + continue; + } + + result.push(IntegrationRecord { + integr_config_path: integrations_yaml_path.clone(), + integr_name: key_str.to_string(), + icon_path: format!("/integration-icon/{key_str}.png"), + integr_config_exists: true, + config_unparsed: serde_json::to_value(value).unwrap(), + ..Default::default() + }); + } } - }; - files_to_read.push((path_str, integr_name.to_string(), project_path)); + }, + None => { + tracing::warn!("{} is not a mapping", short_yaml); + } } - // Find special files that start with cmdline_* and service_* - if let Ok(entries) = fs::read_dir(config_dir.join("integrations.d")) { - let mut entries: Vec<_> = entries.filter_map(Result::ok).collect(); - entries.sort_by_key(|entry| entry.file_name()); - for entry in entries { - let file_name = entry.file_name(); - let file_name_str = file_name.to_string_lossy(); - if !file_name_str.ends_with(".yaml") { - continue; - } - let file_name_str_no_yaml = file_name_str.trim_end_matches(".yaml").to_string(); - let (_integr_name, project_path) = match split_path_into_project_and_integration(&entry.path()) { - Ok(x) => x, - Err(e) => { - tracing::error!("error deriving project path: {}", e); - continue; - } + } + + // 3. Read each of config_dirs + let mut files_to_read = Vec::new(); + + for config_dir in config_dirs.iter().chain(iter::once(global_config_dir)) { + let project_path = if config_dir == global_config_dir { + String::new() // Global config dir has no project path + } else { + config_dir + .parent().expect("dir to be in form parent/.refact") + .to_string_lossy().to_string() + }; + + let mut integrations_missing: HashSet<&str> = HashSet::from_iter(lst.iter().cloned()); + + // Find integrations present in config_dir/integrations.d + if let Ok(mut entries) = tokio::fs::read_dir(config_dir.join("integrations.d")).await { + while let Ok(Some(entry)) = entries.next_entry().await { + let file_name = if let Some(name) = entry.file_name().to_string_lossy().strip_suffix(".yaml") { + name.to_string() + } else { + continue; }; - if file_name_str.starts_with("cmdline_") || file_name_str.starts_with("service_") || file_name_str.starts_with("mcp_") { - files_to_read.push((entry.path().to_string_lossy().to_string(), file_name_str_no_yaml, project_path)); - } + + if file_name.starts_with("cmdline_") || file_name.starts_with("service_") || file_name.starts_with("mcp_") { + files_to_read.push((entry.path(), file_name, project_path.clone(), true)); + } else if integrations_missing.contains(&file_name.as_str()) { + integrations_missing.remove(file_name.as_str()); + files_to_read.push((entry.path(), file_name, project_path.clone(), true)); + } + } + } + + // If there are integrations that were not found in the config_dir/integrations.d, + // add them as non-existent records. + if include_non_existent_records { + for integr_name in integrations_missing.iter() { + let path = join_config_path(config_dir, integr_name); + files_to_read.push((path, integr_name.to_string(), project_path.clone(), false)); } } } - for (path_str, integr_name, project_path) in files_to_read { + for (path, integr_name, project_path, path_exists) in files_to_read { // If --integrations-yaml is set, ignore the global config folder // except for the list of integrations specified as `globally_allowed_integrations`. if let Some(allowed_integr_list) = &globally_allowed_integration_list { @@ -166,20 +193,24 @@ pub fn read_integrations_d( } } - let path = PathBuf::from(&path_str); if !any_glob_matches_path(include_paths_matching, &path) { continue; } + let path_str = path.to_string_lossy(); + // let short_pp = if project_path.is_empty() { format!("global") } else { crate::nicer_logs::last_n_chars(&project_path, 15) }; - let mut rec: IntegrationRecord = Default::default(); - rec.project_path = project_path.clone(); - rec.integr_name = integr_name.clone(); - rec.icon_path = format!("/integration-icon/{integr_name}.png"); - rec.integr_config_path = path_str.clone(); - rec.integr_config_exists = path.exists(); + let mut rec: IntegrationRecord = IntegrationRecord { + project_path: project_path.clone(), + integr_name: integr_name.clone(), + icon_path: format!("/integration-icon/{integr_name}.png"), + integr_config_path: path_str.to_string(), + integr_config_exists: path_exists, + ..Default::default() + }; + if rec.integr_config_exists { - match fs::read_to_string(&path) { + match tokio::fs::read_to_string(&path).await { Ok(file_content) => match parse_and_validate_yaml(&path_str, &file_content) { Ok(json_value) => { // tracing::info!("{} has {}", short_pp, integr_name); @@ -210,43 +241,6 @@ pub fn read_integrations_d( result.push(rec); } - // 3. Read single file integrations_yaml_path, sections in yaml become integrations - if let Some(integrations_yaml_value) = integrations_yaml_value { - let short_yaml = crate::nicer_logs::last_n_chars(integrations_yaml_path, 15); - match integrations_yaml_value.as_mapping() { - Some(mapping) => { - for (key, value) in mapping { - if let Some(key_str) = key.as_str() { - if key_str.starts_with("cmdline_") || key_str.starts_with("service_") { - let mut rec: IntegrationRecord = Default::default(); - rec.integr_config_path = integrations_yaml_path.clone(); - rec.integr_name = key_str.to_string(); - rec.icon_path = format!("/integration-icon/{key_str}.png"); - rec.integr_config_exists = true; - rec.config_unparsed = serde_json::to_value(value.clone()).unwrap(); - result.push(rec); - tracing::info!("{} detected prefix `{}`", short_yaml, key_str); - } else if lst.contains(&key_str) { - let mut rec: IntegrationRecord = Default::default(); - rec.integr_config_path = integrations_yaml_path.clone(); - rec.integr_name = key_str.to_string(); - rec.icon_path = format!("/integration-icon/{key_str}.png"); - rec.integr_config_exists = true; - rec.config_unparsed = serde_json::to_value(value.clone()).unwrap(); - result.push(rec); - tracing::info!("{} has `{}`", short_yaml, key_str); - } else { - tracing::warn!("{} unrecognized section `{}`", short_yaml, key_str); - } - } - } - }, - None => { - tracing::warn!("{} is not a mapping", short_yaml); - } - } - } - // 4. Replace vars in config_unparsed for rec in &mut result { if let serde_json::Value::Object(map) = &mut rec.config_unparsed { @@ -369,61 +363,93 @@ pub async fn get_vars_for_replacements( variables } -pub fn join_config_path(config_dir: &PathBuf, integr_name: &str) -> String +pub fn join_config_path(config_dir: &PathBuf, integr_name: &str) -> PathBuf { - config_dir.join("integrations.d").join(format!("{}.yaml", integr_name)).to_string_lossy().into_owned() + config_dir.join("integrations.d").join(format!("{}.yaml", integr_name)) } pub async fn get_config_dirs( gcx: Arc>, current_project_path: &Option ) -> (Vec, PathBuf) { - let (global_config_dir, workspace_folders_arc, workspace_vcs_roots_arc, _integrations_yaml) = { + let (global_config_dir, workspace_folders_arc, workspace_vcs_roots_arc, dot_refact_folders_arc) = { let gcx_locked = gcx.read().await; ( gcx_locked.config_dir.clone(), gcx_locked.documents_state.workspace_folders.clone(), gcx_locked.documents_state.workspace_vcs_roots.clone(), - gcx_locked.cmdline.integrations_yaml.clone(), + gcx_locked.documents_state.dot_refact_folders.clone(), ) }; let mut workspace_folders = workspace_folders_arc.lock().unwrap().clone(); - if let Some(current_project_path) = current_project_path { - workspace_folders = workspace_folders.into_iter() - .filter(|folder| current_project_path.starts_with(&folder)).collect::>(); - } + let dot_refact_folders = dot_refact_folders_arc.lock().await.clone(); let workspace_vcs_roots = workspace_vcs_roots_arc.lock().unwrap().clone(); - let mut config_dirs = Vec::new(); + let mut config_dirs = vec![]; - for folder in workspace_folders { - let vcs_roots: Vec = workspace_vcs_roots - .iter() - .filter(|root| root.starts_with(&folder)) - .cloned() - .collect(); + if let Some(current_project_path) = current_project_path { + workspace_folders.retain(|folder| current_project_path.starts_with(folder)); - if !vcs_roots.is_empty() { - // it has any workspace_vcs_roots => take them as projects - for root in vcs_roots { - config_dirs.push(root.join(".refact")); - } + let active_workspace = if !workspace_folders.is_empty() { + workspace_folders.sort(); + workspace_folders.truncate(1); + + &workspace_folders[0] } else { - // it doesn't => use workspace_folder itself - // probably we see this because it's a new project that doesn't have version control yet, but added to the workspace already - config_dirs.push(folder.join(".refact")); + tracing::warn!("No workspace folders found for current project path: {}", current_project_path.display()); + current_project_path + }; + + tracing::info!("Active workspace folder: {}", active_workspace.display()); + + config_dirs.extend(workspace_vcs_roots.into_iter().map(|p| p.join(".refact")).filter(|p| p.starts_with(active_workspace))); + config_dirs.extend(dot_refact_folders.into_iter().filter(|p| p.starts_with(active_workspace))); + + for parent in active_workspace.ancestors() { + if parent.join(".refact").exists() || parent == active_workspace { + config_dirs.push(parent.join(".refact")); + } + } + } else { + config_dirs.extend(workspace_vcs_roots.into_iter().map(|p| p.join(".refact"))); + config_dirs.extend(dot_refact_folders.into_iter()); + + for workspace_folder in workspace_folders { + for parent in workspace_folder.ancestors() { + if parent.join(".refact").exists() || parent == workspace_folder { + config_dirs.push(parent.join(".refact")); + } + } } } config_dirs.sort(); + config_dirs.dedup(); + (config_dirs, global_config_dir) } -pub fn split_path_into_project_and_integration(cfg_path: &PathBuf) -> Result<(String, String), String> { +static RE_PER_PROJECT: OnceLock = OnceLock::new(); +static RE_GLOBAL: OnceLock = OnceLock::new(); + +fn get_re_per_project() -> &'static Regex { + RE_PER_PROJECT.get_or_init(|| { + Regex::new(r"^(.*)[\\/]\.refact[\\/](integrations\.d)[\\/](.+)\.yaml$").unwrap() + }) +} + +fn get_re_global() -> &'static Regex { + RE_GLOBAL.get_or_init(|| { + Regex::new(r"^(.*)[\\/]\.config[\\/](refact[\\/](integrations\.d)[\\/](.+)\.yaml$)").unwrap() + }) +} + +/// Does not validate the path, just extracts the parts based on known patterns. +pub fn split_path_into_project_and_integration(cfg_path: &Path) -> Result<(String, String), String> { let path_str = cfg_path.to_string_lossy(); - let re_per_project = Regex::new(r"^(.*)[\\/]\.refact[\\/](integrations\.d)[\\/](.+)\.yaml$").unwrap(); - let re_global = Regex::new(r"^(.*)[\\/]\.config[\\/](refact[\\/](integrations\.d)[\\/](.+)\.yaml$)").unwrap(); + let re_per_project = get_re_per_project(); + let re_global = get_re_global(); if let Some(caps) = re_per_project.captures(&path_str) { let project_path = caps.get(1).map_or(String::new(), |m| m.as_str().to_string()); @@ -449,7 +475,16 @@ pub async fn integrations_all( let lst: Vec<&str> = crate::integrations::integrations_list(allow_experimental); let mut error_log: Vec = Vec::new(); let vars_for_replacements = get_vars_for_replacements(gcx.clone(), &mut error_log).await; - let integrations = read_integrations_d(&config_dirs, &global_config_dir, &integrations_yaml_path, &vars_for_replacements, &lst, &mut error_log, &["**/*".to_string()], include_non_existent_records); + let integrations = read_integrations_d( + &config_dirs, + &global_config_dir, + &integrations_yaml_path, + &vars_for_replacements, + &lst, + &mut error_log, + &["**/*".to_string()], + include_non_existent_records + ).await; IntegrationResult { integrations, error_log } }