Skip to content

Commit b08a492

Browse files
committed
feat: add cortex_get_rules (3-tier rule engine) + project_path filter for memory retrieval
- Add serde_yaml = '0.9' dependency for YAML rule file parsing - Add src/rules.rs: get_merged_rules() with 3-tier deep-merge - Tier 1: ~/.cortexast/global_rules.yml - Tier 2: ~/.cortexast/cluster/{team_cluster_id}_rules.yml (via .cortexast.json) - Tier 3: {project}/.cortex_rules.yml (highest priority) - Arrays: union semantics; Scalars: last-write-wins (Project > Team > Global) - Register cortex_get_rules MCP tool in server.rs (input: project_path) - Add project_path substring filter to cortex_memory_retriever tool and hybrid_search() - Update all hybrid_search() call sites (memory.rs tests + e2e_memory.rs) for new signature - All 24 tests pass (14 unit + 8 e2e + 2 MCP smoke) - E2E integration test verified: CortexSync writes -> CortexAST reads + project filter works
1 parent 9ce7e7b commit b08a492

7 files changed

Lines changed: 302 additions & 18 deletions

File tree

Cargo.lock

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ xxhash-rust = { version = "0.8", features = ["xxh3"] }
3535
# Phase 3: resolve ~/.cortexast/global_memory.jsonl default path.
3636
dirs = "6.0.0"
3737

38+
# Rules engine: parse YAML rule files for cortex_get_rules deep-merge.
39+
serde_yaml = "0.9"
40+
3841
# Deep-dive inspection (symbol extraction)
3942
tree-sitter = "0.26.5"
4043
tree-sitter-rust = "0.21.0"

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pub mod config;
1313
pub mod inspector;
1414
pub mod mapper;
1515
pub mod memory;
16+
pub mod rules;
1617
pub mod scanner;
1718
pub mod server;
1819
pub mod slicer;

src/memory.rs

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -255,19 +255,21 @@ pub fn hybrid_search(
255255
tokens: &[&str],
256256
top_k: usize,
257257
tag_filter: &[String],
258+
project_path_filter: Option<&str>,
258259
) -> Vec<RankedEntry> {
259-
let indices: Vec<usize> = if tag_filter.is_empty() {
260-
(0..store.entries.len()).collect()
261-
} else {
262-
(0..store.entries.len())
263-
.filter(|&i| {
264-
store.entries[i]
265-
.tags
266-
.iter()
267-
.any(|t| tag_filter.iter().any(|f| f.eq_ignore_ascii_case(t)))
268-
})
269-
.collect()
270-
};
260+
let indices: Vec<usize> = (0..store.entries.len())
261+
.filter(|&i| {
262+
let e = &store.entries[i];
263+
// tag filter
264+
let tag_ok = tag_filter.is_empty()
265+
|| e.tags.iter().any(|t| tag_filter.iter().any(|f| f.eq_ignore_ascii_case(t)));
266+
// project_path filter (substring match so callers can pass partial paths)
267+
let path_ok = project_path_filter
268+
.map(|pf| e.project_path.contains(pf))
269+
.unwrap_or(true);
270+
tag_ok && path_ok
271+
})
272+
.collect();
271273

272274
let mut ranked: Vec<RankedEntry> = indices
273275
.par_iter()
@@ -403,7 +405,7 @@ mod tests {
403405

404406
let store = MemoryStore::load(tmp.path()).expect("store");
405407
let tokens = ["refactor", "parser"];
406-
let results = hybrid_search(&store, None, &tokens, 5, &[]);
408+
let results = hybrid_search(&store, None, &tokens, 5, &[], None);
407409

408410
assert!(!results.is_empty(), "must return results");
409411
assert_eq!(
@@ -424,7 +426,7 @@ mod tests {
424426
writeln!(tmp, "{other}").unwrap();
425427

426428
let store = MemoryStore::load(tmp.path()).expect("store");
427-
let results = hybrid_search(&store, None, &["fix"], 10, &["bugfix".to_string()]);
429+
let results = hybrid_search(&store, None, &["fix"], 10, &["bugfix".to_string()], None);
428430

429431
assert_eq!(results.len(), 1, "only one entry has tag 'bugfix'");
430432
assert_eq!(results[0].entry.id, "id-tagged");

src/rules.rs

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
//! # CortexAST — 3-Tier Rule Engine
2+
//!
3+
//! Implements `cortex_get_rules`: deep-merges YAML rule files from three tiers
4+
//! (Global < Team < Project) and returns a unified JSON/YAML object.
5+
//!
6+
//! ## Tier resolution priority (last-write-wins for scalars; arrays are unioned)
7+
//! 1. **Tier 1 — Global** `~/.cortexast/global_rules.yml`
8+
//! 2. **Tier 2 — Team** `~/.cortexast/cluster/{team_cluster_id}_rules.yml`
9+
//! (team_cluster_id sourced from `.cortexast.json` in project root)
10+
//! 3. **Tier 3 — Project** `{project_path}/.cortex_rules.yml`
11+
12+
use anyhow::{Context, Result};
13+
use serde_json::{Map, Value};
14+
use std::path::Path;
15+
16+
// ─────────────────────────────────────────────────────────────────────────────
17+
// Paths
18+
// ─────────────────────────────────────────────────────────────────────────────
19+
20+
fn global_rules_path() -> std::path::PathBuf {
21+
dirs::home_dir()
22+
.unwrap_or_else(|| std::path::PathBuf::from("."))
23+
.join(".cortexast")
24+
.join("global_rules.yml")
25+
}
26+
27+
fn cluster_rules_path(team_cluster_id: &str) -> std::path::PathBuf {
28+
dirs::home_dir()
29+
.unwrap_or_else(|| std::path::PathBuf::from("."))
30+
.join(".cortexast")
31+
.join("cluster")
32+
.join(format!("{team_cluster_id}_rules.yml"))
33+
}
34+
35+
// ─────────────────────────────────────────────────────────────────────────────
36+
// YAML → serde_json::Value
37+
// ─────────────────────────────────────────────────────────────────────────────
38+
39+
/// Parse a YAML file into `serde_json::Value`. Uses the serde_yaml → JSON-string
40+
/// round-trip so that callers only deal with JSON types throughout.
41+
fn read_yaml_as_json(path: &Path) -> Result<Value> {
42+
let content =
43+
std::fs::read_to_string(path).with_context(|| format!("reading {}", path.display()))?;
44+
let yaml_val: serde_yaml::Value =
45+
serde_yaml::from_str(&content).with_context(|| format!("parsing {}", path.display()))?;
46+
// Round-trip through JSON string is safe: serde_yaml implements Serialize.
47+
let json_str = serde_json::to_string(&yaml_val)?;
48+
serde_json::from_str(&json_str).context("converting yaml→json")
49+
}
50+
51+
// ─────────────────────────────────────────────────────────────────────────────
52+
// Deep-merge (last-write-wins for scalars; arrays are unioned without duplicates)
53+
// ─────────────────────────────────────────────────────────────────────────────
54+
55+
/// Recursively merge `src` into `dst`.
56+
///
57+
/// - **Object/map**: keys from `src` are merged into `dst` recursively.
58+
/// - **Array**: items from `src` are appended if not already present in `dst`
59+
/// (union semantics; preserves insertion order, dst items first).
60+
/// - **Scalar** (`bool`, `number`, `string`, `null`): `src` overwrites `dst`.
61+
pub fn deep_merge(dst: &mut Value, src: Value) {
62+
match (dst, src) {
63+
(Value::Object(d), Value::Object(s)) => {
64+
for (k, v) in s {
65+
deep_merge(d.entry(k).or_insert(Value::Null), v);
66+
}
67+
}
68+
(Value::Array(d), Value::Array(s)) => {
69+
// Union: only add items from `src` that are not already in `dst`.
70+
for item in s {
71+
if !d.contains(&item) {
72+
d.push(item);
73+
}
74+
}
75+
}
76+
(dst, src) => *dst = src,
77+
}
78+
}
79+
80+
// ─────────────────────────────────────────────────────────────────────────────
81+
// Public API
82+
// ─────────────────────────────────────────────────────────────────────────────
83+
84+
/// Merge all three rule tiers for the given workspace directory and return the
85+
/// combined rules as a `serde_json::Value` (Object).
86+
///
87+
/// Files that do not exist are silently skipped (tier is treated as empty).
88+
/// Parse errors emit a `[cortex_get_rules] WARN` to stderr but do not abort.
89+
pub fn get_merged_rules(project_path: &str) -> Result<Value> {
90+
let mut merged: Value = Value::Object(Map::new());
91+
let project_dir = Path::new(project_path);
92+
93+
// ── Tier 1: Global ────────────────────────────────────────────────────────
94+
let global_path = global_rules_path();
95+
load_tier_into(&mut merged, &global_path, "global_rules.yml");
96+
97+
// ── Read .cortexast.json → team_cluster_id ────────────────────────────────
98+
let config_path = project_dir.join(".cortexast.json");
99+
let team_cluster_id: Option<String> = if config_path.exists() {
100+
read_team_cluster_id(&config_path)
101+
} else {
102+
None
103+
};
104+
105+
// ── Tier 2: Team/cluster ──────────────────────────────────────────────────
106+
if let Some(ref id) = team_cluster_id {
107+
let cluster_path = cluster_rules_path(id);
108+
load_tier_into(&mut merged, &cluster_path, &format!("{id}_rules.yml"));
109+
}
110+
111+
// ── Tier 3: Project (highest priority) ───────────────────────────────────
112+
let project_rules_path = project_dir.join(".cortex_rules.yml");
113+
load_tier_into(&mut merged, &project_rules_path, ".cortex_rules.yml");
114+
115+
Ok(merged)
116+
}
117+
118+
// ─────────────────────────────────────────────────────────────────────────────
119+
// Helpers
120+
// ─────────────────────────────────────────────────────────────────────────────
121+
122+
fn load_tier_into(dst: &mut Value, path: &Path, label: &str) {
123+
if !path.exists() {
124+
return;
125+
}
126+
match read_yaml_as_json(path) {
127+
Ok(v) => deep_merge(dst, v),
128+
Err(e) => eprintln!("[cortex_get_rules] WARN: {label} parse error: {e}"),
129+
}
130+
}
131+
132+
fn read_team_cluster_id(config_path: &Path) -> Option<String> {
133+
let content = std::fs::read_to_string(config_path).ok()?;
134+
let json: Value = serde_json::from_str(&content).ok()?;
135+
json.get("rules_engine")
136+
.and_then(|r| r.get("team_cluster_id"))
137+
.and_then(|v| v.as_str())
138+
.map(String::from)
139+
}
140+
141+
// ─────────────────────────────────────────────────────────────────────────────
142+
// Tests
143+
// ─────────────────────────────────────────────────────────────────────────────
144+
145+
#[cfg(test)]
146+
mod tests {
147+
use super::*;
148+
use tempfile::TempDir;
149+
150+
fn write_yaml(dir: &std::path::Path, name: &str, content: &str) -> std::path::PathBuf {
151+
let p = dir.join(name);
152+
std::fs::write(&p, content).unwrap();
153+
p
154+
}
155+
156+
#[test]
157+
fn deep_merge_scalars_overwrite() {
158+
let mut base = serde_json::json!({"persona": "verbose", "strict": false});
159+
let overlay = serde_json::json!({"persona": "silent"});
160+
deep_merge(&mut base, overlay);
161+
assert_eq!(base["persona"], "silent");
162+
assert_eq!(base["strict"], false); // untouched
163+
}
164+
165+
#[test]
166+
fn deep_merge_arrays_union() {
167+
let mut base = serde_json::json!({"banned_tools": ["rm"]});
168+
let overlay = serde_json::json!({"banned_tools": ["rm", "git push"]});
169+
deep_merge(&mut base, overlay);
170+
let arr = base["banned_tools"].as_array().unwrap();
171+
assert_eq!(arr.len(), 2);
172+
assert!(arr.contains(&serde_json::json!("rm")));
173+
assert!(arr.contains(&serde_json::json!("git push")));
174+
}
175+
176+
#[test]
177+
fn get_merged_rules_three_tiers() {
178+
let tmp = TempDir::new().unwrap();
179+
let project_dir = tmp.path().join("workspace_b");
180+
std::fs::create_dir_all(&project_dir).unwrap();
181+
182+
// Simulate with inline yaml files in tmp (we'll call read_yaml_as_json directly)
183+
let t1_path = write_yaml(
184+
tmp.path(),
185+
"global_rules.yml",
186+
r#"{"banned_tools": ["rm"], "persona": "verbose"}"#,
187+
);
188+
let t2_path = write_yaml(
189+
tmp.path(),
190+
"team_rules.yml",
191+
r#"{"banned_tools": ["rm", "git push"], "require_tests": true}"#,
192+
);
193+
let t3_path = write_yaml(
194+
tmp.path(),
195+
"project_rules.yml",
196+
r#"{"persona": "silent", "vision_model": "mlx"}"#,
197+
);
198+
199+
let mut merged = Value::Object(Map::new());
200+
load_tier_into(&mut merged, &t1_path, "global");
201+
load_tier_into(&mut merged, &t2_path, "team");
202+
load_tier_into(&mut merged, &t3_path, "project");
203+
204+
assert_eq!(merged["persona"], "silent"); // project overrides global
205+
let banned = merged["banned_tools"].as_array().unwrap();
206+
assert_eq!(banned.len(), 2); // union of ["rm"] + ["rm","git push"]
207+
assert!(merged.get("require_tests").is_some());
208+
assert_eq!(merged["vision_model"], "mlx");
209+
}
210+
}

0 commit comments

Comments
 (0)