From 90e38dacec6623938622103a9c66c87651966836 Mon Sep 17 00:00:00 2001 From: fey Date: Mon, 16 Mar 2026 14:07:09 +0000 Subject: [PATCH 1/4] feat(index): resolve parent_id via line-range containment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-pass insert_symbols: first pass inserts with NULL parent_id and collects metadata, second pass resolves parent names by matching name + smallest enclosing line range. Backwards-compatible — plugins that don't emit parent field work exactly as before. Prepares chibi-core for language plugins that emit parent-child symbol hierarchies. --- crates/chibi-core/src/index/indexer.rs | 181 ++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 5 deletions(-) diff --git a/crates/chibi-core/src/index/indexer.rs b/crates/chibi-core/src/index/indexer.rs index 0de96cc9d..5f0949c5a 100644 --- a/crates/chibi-core/src/index/indexer.rs +++ b/crates/chibi-core/src/index/indexer.rs @@ -278,13 +278,29 @@ pub fn update_index( } /// Insert symbols from plugin output into the database. Returns count of symbols added. +/// Insert symbols from plugin output into the database. Returns count of symbols added. +/// +/// Uses a two-pass approach for parent resolution: +/// 1. Insert all symbols with parent_id NULL, collecting (id, name, line_start, line_end, parent_name). +/// 2. For each symbol with a parent name, find the matching parent by name + line-range containment +/// and UPDATE parent_id. fn insert_symbols(conn: &Connection, file_id: i64, output: &serde_json::Value) -> u32 { let symbols = match output.get("symbols").and_then(|v| v.as_array()) { Some(arr) => arr, None => return 0, }; + // First pass: insert all symbols, collect metadata for parent resolution. + struct SymMeta { + id: i64, + name: String, + line_start: i64, + line_end: i64, + parent_name: Option, + } + let mut metas: Vec = Vec::new(); let mut count = 0u32; + for sym in symbols { let name = sym.get("name").and_then(|v| v.as_str()).unwrap_or(""); let kind = sym.get("kind").and_then(|v| v.as_str()).unwrap_or(""); @@ -292,21 +308,57 @@ fn insert_symbols(conn: &Connection, file_id: i64, output: &serde_json::Value) - let line_end = sym.get("line_end").and_then(|v| v.as_i64()).unwrap_or(0); let signature = sym.get("signature").and_then(|v| v.as_str()); let visibility = sym.get("visibility").and_then(|v| v.as_str()); + let parent_name = sym.get("parent").and_then(|v| v.as_str()).map(|s| s.to_string()); - // Note: parent_id resolution (matching parent name → id) deferred to phase 6 - // when we have a proper protocol. For now, parent_id is NULL. let result = conn.execute( "INSERT INTO symbols (file_id, name, kind, line_start, line_end, signature, visibility) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - rusqlite::params![ - file_id, name, kind, line_start, line_end, signature, visibility - ], + rusqlite::params![file_id, name, kind, line_start, line_end, signature, visibility], ); if result.is_ok() { + let id = conn.last_insert_rowid(); + metas.push(SymMeta { + id, + name: name.to_string(), + line_start, + line_end, + parent_name, + }); count += 1; } } + + // Second pass: resolve parent_id via line-range containment. + for meta in &metas { + if let Some(ref parent_name) = meta.parent_name { + // Find the nearest enclosing parent: name matches AND parent's line range contains child. + let parent_id = metas + .iter() + .filter(|p| { + p.name == *parent_name + && p.line_start <= meta.line_start + && p.line_end >= meta.line_end + && p.id != meta.id + }) + // Nearest enclosing = smallest containing range. + .min_by_key(|p| p.line_end - p.line_start) + .map(|p| p.id); + + if let Some(pid) = parent_id { + let _ = conn.execute( + "UPDATE symbols SET parent_id = ?1 WHERE id = ?2", + rusqlite::params![pid, meta.id], + ); + } else { + eprintln!( + "index: unresolved parent \"{}\" for symbol \"{}\" at line {}", + parent_name, meta.name, meta.line_start + ); + } + } + } + count } @@ -577,4 +629,123 @@ mod tests { assert_eq!(detect_language(Path::new(&path)), Some(*expected)); } } + + #[test] + fn insert_symbols_resolves_parent_id() { + let (conn, dir) = setup_temp_project(); + let _ = dir; + + conn.execute( + "INSERT INTO files (path, lang, mtime, size) VALUES ('test.rs', 'rust', 0, 0)", + [], + ) + .unwrap(); + + let output = serde_json::json!({ + "symbols": [ + {"name": "Parser", "kind": "struct", "line_start": 1, "line_end": 10}, + {"name": "input", "kind": "field", "line_start": 2, "line_end": 2, "parent": "Parser"}, + {"name": "Parser", "kind": "impl", "line_start": 12, "line_end": 25}, + {"name": "new", "kind": "function", "line_start": 13, "line_end": 20, "parent": "Parser"} + ] + }); + + insert_symbols(&conn, 1, &output); + + // "input" (field at line 2) should have parent_id pointing to "Parser" (struct at lines 1-10). + let field_parent: Option = conn + .query_row( + "SELECT parent_id FROM symbols WHERE name = 'input' AND kind = 'field'", + [], + |row| row.get(0), + ) + .unwrap(); + let struct_id: i64 = conn + .query_row( + "SELECT id FROM symbols WHERE name = 'Parser' AND kind = 'struct'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(field_parent, Some(struct_id)); + + // "new" (function at line 13) should have parent_id pointing to "Parser" (impl at lines 12-25), + // NOT the struct at lines 1-10 (which doesn't contain line 13). + let fn_parent: Option = conn + .query_row( + "SELECT parent_id FROM symbols WHERE name = 'new' AND kind = 'function'", + [], + |row| row.get(0), + ) + .unwrap(); + let impl_id: i64 = conn + .query_row( + "SELECT id FROM symbols WHERE name = 'Parser' AND kind = 'impl'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(fn_parent, Some(impl_id)); + } + + #[test] + fn insert_symbols_no_parent_still_works() { + let (conn, dir) = setup_temp_project(); + let _ = dir; + + conn.execute( + "INSERT INTO files (path, lang, mtime, size) VALUES ('test.rs', 'rust', 0, 0)", + [], + ) + .unwrap(); + + let output = serde_json::json!({ + "symbols": [ + {"name": "main", "kind": "function", "line_start": 1, "line_end": 5} + ] + }); + + let count = insert_symbols(&conn, 1, &output); + assert_eq!(count, 1); + + let parent_id: Option = conn + .query_row( + "SELECT parent_id FROM symbols WHERE name = 'main'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(parent_id, None); + } + + #[test] + fn insert_symbols_unresolvable_parent_stays_null() { + let (conn, dir) = setup_temp_project(); + let _ = dir; + + conn.execute( + "INSERT INTO files (path, lang, mtime, size) VALUES ('test.rs', 'rust', 0, 0)", + [], + ) + .unwrap(); + + // Parent "Nonexistent" doesn't match any symbol — should gracefully leave parent_id NULL. + let output = serde_json::json!({ + "symbols": [ + {"name": "orphan", "kind": "function", "line_start": 1, "line_end": 5, "parent": "Nonexistent"} + ] + }); + + let count = insert_symbols(&conn, 1, &output); + assert_eq!(count, 1); + + let parent_id: Option = conn + .query_row( + "SELECT parent_id FROM symbols WHERE name = 'orphan'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(parent_id, None); + } } From eddc81ebb44482bd9ce7fc27709996ca3be47eb3 Mon Sep 17 00:00:00 2001 From: fey Date: Mon, 16 Mar 2026 14:42:48 +0000 Subject: [PATCH 2/4] docs(agents): add lang_rust + insert_symbols parent resolution quirks --- AGENTS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index c195852ec..2327933a1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -84,3 +84,5 @@ LLM communication is delegated to ratatoskr; `gateway.rs` bridges chibi's types - `(harness docs)` is the canonical import for harness API discovery: `(import (harness docs))` then `(describe hooks-docs)` to list all hook points with payload/return contracts, `(module-doc hooks-docs 'pre_message)` for a specific hook, or `(describe harness-tools-docs)` for the harness tool API (`define-tool`, `call-tool`, `register-hook`, etc.). Both `hooks-docs` and `harness-tools-docs` are also available as top-level bindings (pre-imported in `EVAL_PRELUDE`) but `(harness docs)` is the documented access path. `describe` takes an alist directly — NOT a symbol. - `hooks-docs` is generated at startup from `HOOK_METADATA` (`hooks.rs`) — the single source of truth for all hook contracts. `docs/hooks.md` hook reference is also generated from it via `just generate-docs`. Adding a `HookPoint` variant without a `HOOK_METADATA` entry fails `test_hook_metadata_completeness`. - `(module-exports '(harness docs))` (and `'(harness tools)`, `'(harness hooks)`) errors — runtime-registered modules are absent from tein's build-time `MODULE_EXPORTS` table. Use `harness-tools-docs` and `hooks-docs` for API discovery instead. +- `insert_symbols` (`indexer.rs`) now does a two-pass insert for parent resolution: first pass inserts all symbols with `parent_id = NULL`, second pass resolves `parent` names via line-range containment (smallest enclosing range wins). Plugins that don't emit `parent` are unaffected. +- Language plugins (e.g. `lang_rust`): `tree-sitter-rust` exposes visibility as a `visibility_modifier` child kind, not a named field — `child_by_field_name("visibility")` returns `None`. Use `node.children().find(|n| n.kind() == "visibility_modifier")` instead. Also, `use_wildcard` nodes contain the full path text (e.g. `"std::collections::*"`), not just `"*"` — take the full node text rather than constructing `prefix + "::*"`. From 22296202dc2f63c31f8a55633147411cce3c1996 Mon Sep 17 00:00:00 2001 From: fey Date: Mon, 16 Mar 2026 15:05:38 +0000 Subject: [PATCH 3/4] fix(index): remove duplicated doc comment on insert_symbols --- crates/chibi-core/src/index/indexer.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/chibi-core/src/index/indexer.rs b/crates/chibi-core/src/index/indexer.rs index 5f0949c5a..4ee91a519 100644 --- a/crates/chibi-core/src/index/indexer.rs +++ b/crates/chibi-core/src/index/indexer.rs @@ -277,7 +277,6 @@ pub fn update_index( Ok(stats) } -/// Insert symbols from plugin output into the database. Returns count of symbols added. /// Insert symbols from plugin output into the database. Returns count of symbols added. /// /// Uses a two-pass approach for parent resolution: From 504bef4df41a80310aafe7cc3655fe9c7d455146 Mon Sep 17 00:00:00 2001 From: fey Date: Mon, 16 Mar 2026 15:09:42 +0000 Subject: [PATCH 4/4] fmt --- crates/chibi-core/src/index/indexer.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/chibi-core/src/index/indexer.rs b/crates/chibi-core/src/index/indexer.rs index 4ee91a519..77929ac3e 100644 --- a/crates/chibi-core/src/index/indexer.rs +++ b/crates/chibi-core/src/index/indexer.rs @@ -307,12 +307,17 @@ fn insert_symbols(conn: &Connection, file_id: i64, output: &serde_json::Value) - let line_end = sym.get("line_end").and_then(|v| v.as_i64()).unwrap_or(0); let signature = sym.get("signature").and_then(|v| v.as_str()); let visibility = sym.get("visibility").and_then(|v| v.as_str()); - let parent_name = sym.get("parent").and_then(|v| v.as_str()).map(|s| s.to_string()); + let parent_name = sym + .get("parent") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); let result = conn.execute( "INSERT INTO symbols (file_id, name, kind, line_start, line_end, signature, visibility) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - rusqlite::params![file_id, name, kind, line_start, line_end, signature, visibility], + rusqlite::params![ + file_id, name, kind, line_start, line_end, signature, visibility + ], ); if result.is_ok() {