From 685124041e76c85c54a24d55ff887df925a94291 Mon Sep 17 00:00:00 2001
From: Thibault NORMAND <me@zenithar.org>
Date: Mon, 27 Apr 2026 18:36:47 +0200
Subject: [PATCH] feat(symgraph): improve ingestion pipeline for large
 repositories.

---
 Cargo.toml                  |   1 +
 src/cli/commands.rs         |  10 +-
 src/cli/db_utils.rs         |  74 +++++++-
 src/db/mod.rs               | 365 ++++++++++++++++++++++++++++++++----
 src/extraction/mod.rs       | 140 ++++++++------
 src/lib.rs                  | 304 +++++++++++++++++++++++-------
 src/mcp/handlers/reindex.rs |  92 ++++++++-
 src/mcp/mod.rs              |   6 +-
 src/mcp/types.rs            |   2 +-
 tests/integration_test.rs   | 115 ++++++++++++
 10 files changed, 931 insertions(+), 178 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 3ade409..a8f4f48 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,6 +66,7 @@ sha2 = "0.10"
 hex = "0.4"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+indicatif = "0.17"
 
 [features]
 default = ["sqlite"]
diff --git a/src/cli/commands.rs b/src/cli/commands.rs
index 895ca83..10129f4 100644
--- a/src/cli/commands.rs
+++ b/src/cli/commands.rs
@@ -5,9 +5,11 @@ use tracing::info;
 
 use crate::context::{format_context_markdown, ContextBuilder, ContextOptions};
 use crate::db::Database;
-use crate::{index_codebase, IndexConfig};
+use crate::IndexConfig;
 
-use super::db_utils::{canonicalize_path, database_path, open_project_database};
+use super::db_utils::{
+    canonicalize_path, database_path, open_project_database, rebuild_project_database,
+};
 
 /// Index a codebase at the given path
 pub fn index_command(path: &str) -> Result<()> {
@@ -16,11 +18,11 @@ pub fn index_command(path: &str) -> Result<()> {
 
     let config = IndexConfig {
         root: project_root.clone(),
+        show_progress: true,
         ..Default::default()
     };
 
-    let stats = index_codebase(&mut db, &config)?;
-
+    let stats = rebuild_project_database(&mut db, &config)?;
     println!("\nIndexing complete!");
     println!("  Files indexed: {}", stats.files);
     println!("  Symbols found: {}", stats.nodes);
diff --git a/src/cli/db_utils.rs b/src/cli/db_utils.rs
index 89ee9fe..34bc69b 100644
--- a/src/cli/db_utils.rs
+++ b/src/cli/db_utils.rs
@@ -1,12 +1,18 @@
 //! Database path and initialization utilities
 
 use anyhow::{Context, Result};
-use std::path::PathBuf;
+use std::{
+    path::{Path, PathBuf},
+    process,
+    time::{SystemTime, UNIX_EPOCH},
+};
 
 use crate::db::Database;
+use crate::{build_full_index, IndexConfig, IndexingStats};
 
 const DB_DIR: &str = ".symgraph";
 const DB_FILE: &str = "index.db";
+const SHADOW_DB_PREFIX: &str = "index.shadow";
 
 /// Get the database path for a project root
 pub fn database_path(project_root: &str) -> PathBuf {
@@ -27,6 +33,72 @@ pub fn open_project_database(project_root: &str) -> Result<Database> {
     Database::open(&db_path)
 }
 
+/// Open a fresh shadow database in the project index directory.
+pub fn open_shadow_database(project_root: &str) -> Result<Database> {
+    let shadow_path = shadow_database_path(project_root)?;
+    Database::cleanup_on_disk_path(&shadow_path)?;
+    Database::open(&shadow_path)
+}
+
+/// Best-effort cleanup for a shadow database path and its SQLite sidecars.
+pub fn cleanup_shadow_database_path(path: &Path) -> Result<()> {
+    Database::cleanup_on_disk_path(path)
+}
+
+/// Best-effort cleanup for a shadow database handle and its SQLite sidecars.
+pub fn cleanup_shadow_database(db: &Database) -> Result<()> {
+    match db.path() {
+        Some(path) => cleanup_shadow_database_path(path),
+        None => Ok(()),
+    }
+}
+
+/// Flush, close, and atomically swap a prepared shadow database into place.
+pub fn swap_shadow_database(live_db: &mut Database, shadow_db: Database) -> Result<()> {
+    let shadow_path = shadow_db.prepare_for_swap()?;
+    live_db.replace_with_shadow(&shadow_path)
+}
+
+/// Build a full shadow index and atomically swap it into the live database handle.
+pub fn rebuild_project_database(
+    live_db: &mut Database,
+    config: &IndexConfig,
+) -> Result<IndexingStats> {
+    let mut shadow_db = open_shadow_database(&config.root)?;
+    let shadow_path = shadow_db
+        .path()
+        .map(Path::to_path_buf)
+        .context("shadow database is not file-backed")?;
+
+    match build_full_index(&mut shadow_db, config) {
+        Ok(stats) => {
+            if let Err(err) = swap_shadow_database(live_db, shadow_db) {
+                let _ = cleanup_shadow_database_path(&shadow_path);
+                Err(err)
+            } else {
+                Ok(stats)
+            }
+        }
+        Err(err) => {
+            let _ = cleanup_shadow_database_path(&shadow_path);
+            Err(err)
+        }
+    }
+}
+
+fn shadow_database_path(project_root: &str) -> Result<PathBuf> {
+    ensure_database_directory(project_root)?;
+    let nonce = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .context("System clock is before Unix epoch")?
+        .as_nanos();
+    Ok(PathBuf::from(project_root).join(DB_DIR).join(format!(
+        "{SHADOW_DB_PREFIX}.{}.{}.db",
+        process::id(),
+        nonce
+    )))
+}
+
 /// Canonicalize and validate a path
 pub fn canonicalize_path(path: &str) -> Result<String> {
     let canonical = std::path::Path::new(path)
diff --git a/src/db/mod.rs b/src/db/mod.rs
index cb7c8e0..f1cbf21 100644
--- a/src/db/mod.rs
+++ b/src/db/mod.rs
@@ -8,46 +8,65 @@
 
 mod schema;
 
-use anyhow::Result;
+use anyhow::{Context, Result};
 use rusqlite::{params, Connection, OptionalExtension};
-use std::path::Path;
+use std::{
+    fs,
+    io::ErrorKind,
+    path::{Path, PathBuf},
+};
 
 use crate::types::{
     Edge, EdgeKind, FileRecord, IndexStats, Language, Node, NodeKind, UnresolvedReference,
     Visibility,
 };
 
+const CONNECTION_PRAGMAS: &str = "PRAGMA foreign_keys = ON; \
+             PRAGMA journal_mode = WAL; \
+             PRAGMA synchronous = NORMAL; \
+             PRAGMA cache_size = -64000;";
+
 /// Database handle for the code graph
 pub struct Database {
     conn: Connection,
+    path: Option<PathBuf>,
 }
 
 impl Database {
     /// Open or create a database at the given path
     pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
-        let conn = Connection::open(path)?;
-        Self::from_connection(conn)
+        let path = path.as_ref().to_path_buf();
+        let conn = Connection::open(&path)?;
+        Self::from_connection(conn, Some(path))
     }
 
     /// Create an in-memory database (for testing)
     pub fn in_memory() -> Result<Self> {
         let conn = Connection::open_in_memory()?;
-        Self::from_connection(conn)
+        Self::from_connection(conn, None)
     }
 
     /// Shared initialization: set PRAGMAs and create schema
-    fn from_connection(conn: Connection) -> Result<Self> {
-        conn.execute_batch(
-            "PRAGMA foreign_keys = ON; \
-             PRAGMA journal_mode = WAL; \
-             PRAGMA synchronous = NORMAL; \
-             PRAGMA cache_size = -64000;",
-        )?;
-        let db = Self { conn };
+    fn from_connection(conn: Connection, path: Option<PathBuf>) -> Result<Self> {
+        conn.execute_batch(CONNECTION_PRAGMAS)?;
+        let db = Self { conn, path };
         db.initialize()?;
         Ok(db)
     }
 
+    /// Path to the on-disk database, if this handle is file-backed.
+    pub fn path(&self) -> Option<&Path> {
+        self.path.as_deref()
+    }
+
+    /// Close the underlying SQLite connection.
+    pub fn close(self) -> Result<()> {
+        match self.conn.close() {
+            Ok(()) => Ok(()),
+            Err((_conn, err)) => Err(err.into()),
+        }
+    }
+
     /// Initialize the database schema and run additive migrations.
     fn initialize(&self) -> Result<()> {
         self.conn.execute_batch(schema::SCHEMA)?;
@@ -665,14 +684,57 @@ impl Database {
     // =========================================================================
 
     /// Insert multiple nodes using a single prepared statement.
-    /// Each node's `id` field is set to 0 before insertion (DB assigns the real id).
+    /// Each node's `id` field is updated to the database-assigned row id.
     /// Returns a map from old (extraction-time) id to new (database) id.
     pub fn insert_nodes_batch(
         &self,
         nodes: &mut [Node],
+    ) -> Result<std::collections::HashMap<i64, i64>> {
+        let id_map = self.insert_nodes_base_batch(nodes)?;
+        self.insert_node_fts_rows(nodes)?;
+        self.insert_semantic_fts_rows(nodes)?;
+        Ok(id_map)
+    }
+
+    /// Insert multiple nodes without updating FTS tables.
+    /// Used by full shadow builds, which rebuild FTS once at the end.
+    pub fn insert_nodes_batch_without_fts(
+        &self,
+        nodes: &mut [Node],
+    ) -> Result<std::collections::HashMap<i64, i64>> {
+        self.insert_nodes_base_batch(nodes)
+    }
+
+    /// Rebuild both FTS indexes from the canonical `nodes` table.
+    pub fn rebuild_fts_indexes(&self) -> Result<()> {
+        self.conn
+            .execute_batch("INSERT INTO nodes_fts(nodes_fts) VALUES('rebuild');")
+            .context("rebuild_fts_indexes: nodes_fts")?;
+        self.conn
+            .execute("DELETE FROM nodes_semantic_fts", [])
+            .context("rebuild_fts_indexes: nodes_semantic_fts clear")?;
+
+        let mut select = self
+            .conn
+            .prepare_cached("SELECT * FROM nodes ORDER BY id")?;
+        let rows = select.query_map([], Self::row_to_node)?;
+        let mut insert = self
+            .conn
+            .prepare_cached("INSERT INTO nodes_semantic_fts(rowid, tokens) VALUES (?1, ?2)")?;
+        for node in rows {
+            let node = node?;
+            insert.execute(params![node.id, build_semantic_tokens(&node)])?;
+        }
+
+        self.optimize_fts()
+    }
+
+    fn insert_nodes_base_batch(
+        &self,
+        nodes: &mut [Node],
     ) -> Result<std::collections::HashMap<i64, i64>> {
         let mut id_map = std::collections::HashMap::with_capacity(nodes.len());
-        let mut stmt = self.conn.prepare(
+        let mut stmt = self.conn.prepare_cached(
             r#"
             INSERT INTO nodes (
                 kind, name, qualified_name, file_path, start_line, end_line,
@@ -681,15 +743,8 @@ impl Database {
             ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17)
             "#,
         )?;
-        let mut fts_stmt = self
-            .conn
-            .prepare("INSERT INTO nodes_fts(rowid, name, qualified_name) VALUES (?1, ?2, ?3)")?;
-        let mut sem_stmt = self
-            .conn
-            .prepare("INSERT INTO nodes_semantic_fts(rowid, tokens) VALUES (?1, ?2)")?;
         for node in nodes.iter_mut() {
             let old_id = node.id;
-            node.id = 0;
             stmt.execute(params![
                 node.kind.as_str(),
                 node.name,
@@ -710,14 +765,32 @@ impl Database {
                 node.language.as_str(),
             ])?;
             let new_id = self.conn.last_insert_rowid();
-            fts_stmt.execute(params![new_id, node.name, node.qualified_name])?;
-            let tokens = build_semantic_tokens(node);
-            sem_stmt.execute(params![new_id, tokens])?;
+            node.id = new_id;
             id_map.insert(old_id, new_id);
         }
         Ok(id_map)
     }
 
+    fn insert_node_fts_rows(&self, nodes: &[Node]) -> Result<()> {
+        let mut stmt = self.conn.prepare_cached(
+            "INSERT INTO nodes_fts(rowid, name, qualified_name) VALUES (?1, ?2, ?3)",
+        )?;
+        for node in nodes {
+            stmt.execute(params![node.id, node.name, node.qualified_name])?;
+        }
+        Ok(())
+    }
+
+    fn insert_semantic_fts_rows(&self, nodes: &[Node]) -> Result<()> {
+        let mut stmt = self
+            .conn
+            .prepare_cached("INSERT INTO nodes_semantic_fts(rowid, tokens) VALUES (?1, ?2)")?;
+        for node in nodes {
+            stmt.execute(params![node.id, build_semantic_tokens(node)])?;
+        }
+        Ok(())
+    }
+
     /// Insert multiple edges using a single prepared statement.
     /// Maps source_id and target_id through `id_map`; edges whose ids
     /// cannot be mapped are silently skipped.
@@ -728,7 +801,7 @@ impl Database {
         id_map: &std::collections::HashMap<i64, i64>,
     ) -> Result<u64> {
         let mut count: u64 = 0;
-        let mut stmt = self.conn.prepare(
+        let mut stmt = self.conn.prepare_cached(
             r#"
             INSERT INTO edges (source_id, target_id, kind, file_path, line, column)
             VALUES (?1, ?2, ?3, ?4, ?5, ?6)
@@ -762,7 +835,7 @@ impl Database {
         refs: &mut [UnresolvedReference],
         id_map: &std::collections::HashMap<i64, i64>,
     ) -> Result<()> {
-        let mut stmt = self.conn.prepare(
+        let mut stmt = self.conn.prepare_cached(
             r#"
             INSERT INTO unresolved_refs (source_node_id, reference_name, kind, file_path, line, column)
             VALUES (?1, ?2, ?3, ?4, ?5, ?6)
@@ -851,21 +924,130 @@ impl Database {
         })
     }
 
+    /// Disable FTS5 automerge on both search tables before a bulk insert.
+    /// Prevents per-insert segment merges; call `optimize_fts` when done.
+    ///
+    /// Note: FTS5 configuration commands like `automerge` require the
+    /// two-column form `INSERT INTO ft(ft, rank) VALUES('option', value)`.
+    /// The single-column form `VALUES('automerge=0')` is only for parameterless
+    /// maintenance commands (`optimize`, `rebuild`, `delete-all`, etc.).
+    pub fn disable_fts_automerge(&self) -> Result<()> {
+        use anyhow::Context;
+        self.conn
+            .execute(
+                "INSERT INTO nodes_fts(nodes_fts, rank) VALUES('automerge', 0)",
+                [],
+            )
+            .context("disable_fts_automerge: nodes_fts")?;
+        self.conn
+            .execute(
+                "INSERT INTO nodes_semantic_fts(nodes_semantic_fts, rank) VALUES('automerge', 0)",
+                [],
+            )
+            .context("disable_fts_automerge: nodes_semantic_fts")?;
+        Ok(())
+    }
+
+    /// Merge all FTS5 segments into one after a bulk insert.
+    /// Much faster than the incremental per-insert merges that `automerge=8`
+    /// (the default) would have done.
+    pub fn optimize_fts(&self) -> Result<()> {
+        use anyhow::Context;
+        self.conn
+            .execute_batch("INSERT INTO nodes_fts(nodes_fts) VALUES('optimize');")
+            .context("optimize_fts: nodes_fts")?;
+        self.conn
+            .execute_batch("INSERT INTO nodes_semantic_fts(nodes_semantic_fts) VALUES('optimize');")
+            .context("optimize_fts: nodes_semantic_fts")?;
+        Ok(())
+    }
+
     /// Begin a transaction
     pub fn begin_transaction(&mut self) -> Result<()> {
-        self.conn.execute("BEGIN TRANSACTION", [])?;
+        use anyhow::Context;
+        self.conn
+            .execute("BEGIN TRANSACTION", [])
+            .context("begin_transaction")?;
         Ok(())
     }
 
-    /// Commit a transaction
-    pub fn commit(&mut self) -> Result<()> {
-        self.conn.execute("COMMIT", [])?;
+    /// Commit a transaction without checkpointing the WAL.
+    pub fn commit_transaction(&mut self) -> Result<()> {
+        self.conn.execute("COMMIT", []).context("commit: COMMIT")?;
         Ok(())
     }
 
+    /// Checkpoint the WAL and truncate it so the database file stays compact.
+    pub fn checkpoint_wal_truncate(&self) -> Result<()> {
+        self.conn
+            .execute_batch("PRAGMA wal_checkpoint(TRUNCATE)")
+            .context("wal_checkpoint")?;
+        Ok(())
+    }
+
+    /// Commit a transaction and checkpoint the WAL so the file stays compact.
+    pub fn commit(&mut self) -> Result<()> {
+        self.commit_transaction()?;
+        self.checkpoint_wal_truncate()
+    }
+
     /// Rollback a transaction
     pub fn rollback(&mut self) -> Result<()> {
-        self.conn.execute("ROLLBACK", [])?;
+        self.conn.execute("ROLLBACK", []).context("rollback")?;
+        Ok(())
+    }
+
+    /// Flush a shadow database to disk and close it before an atomic swap.
+    pub fn prepare_for_swap(self) -> Result<PathBuf> {
+        let path = self
+            .path
+            .clone()
+            .context("prepare_for_swap requires an on-disk database")?;
+        self.checkpoint_wal_truncate()?;
+        self.close()?;
+        cleanup_sqlite_sidecars(&path)?;
+        Ok(path)
+    }
+
+    /// Atomically replace this on-disk database file with a prepared shadow database.
+    pub fn replace_with_shadow<P: AsRef<Path>>(&mut self, shadow_path: P) -> Result<()> {
+        let live_path = self
+            .path
+            .clone()
+            .context("replace_with_shadow requires an on-disk database")?;
+        let shadow_path = shadow_path.as_ref().to_path_buf();
+
+        self.checkpoint_wal_truncate()?;
+
+        let placeholder = Connection::open_in_memory().context("opening placeholder connection")?;
+        let live_conn = std::mem::replace(&mut self.conn, placeholder);
+        if let Err((conn, err)) = live_conn.close() {
+            self.conn = conn;
+            return Err(err.into());
+        }
+
+        cleanup_sqlite_sidecars(&live_path)?;
+
+        if let Err(err) = fs::rename(&shadow_path, &live_path) {
+            self.reopen_from_path(&live_path)?;
+            return Err(err.into());
+        }
+
+        cleanup_sqlite_sidecars(&shadow_path)?;
+        self.reopen_from_path(&live_path)
+    }
+
+    /// Delete a database file and any SQLite sidecars if they exist.
+    pub fn cleanup_on_disk_path<P: AsRef<Path>>(path: P) -> Result<()> {
+        let path = path.as_ref();
+        remove_file_if_exists(path)?;
+        cleanup_sqlite_sidecars(path)
+    }
+
+    fn reopen_from_path(&mut self, path: &Path) -> Result<()> {
+        let reopened = Self::open(path)?;
+        self.conn = reopened.conn;
+        self.path = reopened.path;
         Ok(())
     }
 
@@ -1133,9 +1315,32 @@ fn normalize_query_for_fts(query: &str) -> String {
         .join(" ")
 }
 
+fn sqlite_sidecar_paths(path: &Path) -> [PathBuf; 2] {
+    [
+        PathBuf::from(format!("{}-wal", path.display())),
+        PathBuf::from(format!("{}-shm", path.display())),
+    ]
+}
+
+fn cleanup_sqlite_sidecars(path: &Path) -> Result<()> {
+    for sidecar in sqlite_sidecar_paths(path) {
+        remove_file_if_exists(&sidecar)?;
+    }
+    Ok(())
+}
+
+fn remove_file_if_exists(path: &Path) -> Result<()> {
+    match fs::remove_file(path) {
+        Ok(()) => Ok(()),
+        Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
+        Err(err) => Err(err.into()),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+    use tempfile::tempdir;
 
     fn create_test_node(name: &str, kind: NodeKind, file_path: &str) -> Node {
         Node {
@@ -1179,6 +1384,15 @@ mod tests {
         assert!(db.is_ok());
     }
 
+    #[test]
+    fn test_open_tracks_on_disk_path() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("tracked.db");
+
+        let db = Database::open(&path).unwrap();
+        assert_eq!(db.path(), Some(path.as_path()));
+    }
+
     #[test]
     fn test_database_stats_empty() {
         let db = Database::in_memory().unwrap();
@@ -1703,6 +1917,93 @@ mod tests {
         assert_eq!(stats.total_nodes, 0);
     }
 
+    #[test]
+    fn test_prepare_for_swap_cleans_sidecars() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("shadow.db");
+        let db = Database::open(&path).unwrap();
+        let file = mk_file("shadow.rs");
+
+        db.insert_or_update_file(&file).unwrap();
+        db.insert_node(&create_test_node(
+            "shadow_fn",
+            NodeKind::Function,
+            "shadow.rs",
+        ))
+        .unwrap();
+
+        let prepared_path = db.prepare_for_swap().unwrap();
+        assert_eq!(prepared_path, path);
+        assert!(prepared_path.exists());
+        assert!(!PathBuf::from(format!("{}-wal", prepared_path.display())).exists());
+        assert!(!PathBuf::from(format!("{}-shm", prepared_path.display())).exists());
+    }
+
+    #[test]
+    fn test_replace_with_shadow_reopens_new_contents() {
+        let dir = tempdir().unwrap();
+        let live_path = dir.path().join("live.db");
+        let shadow_path = dir.path().join("shadow.db");
+
+        let mut live = Database::open(&live_path).unwrap();
+        let old_file = mk_file("old.rs");
+        live.insert_or_update_file(&old_file).unwrap();
+        live.insert_node(&create_test_node("old_fn", NodeKind::Function, "old.rs"))
+            .unwrap();
+
+        let shadow = Database::open(&shadow_path).unwrap();
+        let new_file = mk_file("new.rs");
+        shadow.insert_or_update_file(&new_file).unwrap();
+        shadow
+            .insert_node(&create_test_node("new_fn", NodeKind::Function, "new.rs"))
+            .unwrap();
+
+        let prepared_shadow = shadow.prepare_for_swap().unwrap();
+        live.replace_with_shadow(&prepared_shadow).unwrap();
+
+        assert_eq!(live.path(), Some(live_path.as_path()));
+        assert!(live.find_node_by_name("old_fn").unwrap().is_none());
+        assert!(live.find_node_by_name("new_fn").unwrap().is_some());
+        assert!(live.get_file("old.rs").unwrap().is_none());
+        assert!(live.get_file("new.rs").unwrap().is_some());
+        assert!(!prepared_shadow.exists());
+    }
+
+    #[test]
+    fn test_cleanup_on_disk_path_removes_db_and_sidecars() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("cleanup.db");
+
+        std::fs::write(&path, b"db").unwrap();
+        std::fs::write(format!("{}-wal", path.display()), b"wal").unwrap();
+        std::fs::write(format!("{}-shm", path.display()), b"shm").unwrap();
+
+        Database::cleanup_on_disk_path(&path).unwrap();
+
+        assert!(!path.exists());
+        assert!(!PathBuf::from(format!("{}-wal", path.display())).exists());
+        assert!(!PathBuf::from(format!("{}-shm", path.display())).exists());
+    }
+
+    #[test]
+    fn test_rebuild_fts_indexes_restores_search() {
+        let db = Database::in_memory().unwrap();
+        let file = mk_file("auth.rs");
+        db.insert_or_update_file(&file).unwrap();
+
+        let mut node = create_test_node("validateToken", NodeKind::Function, "auth.rs");
+        node.docstring = Some("Validate JWT bearer token".to_string());
+        db.insert_nodes_batch_without_fts(&mut [node]).unwrap();
+
+        assert!(db.semantic_search("jwt bearer", 10).unwrap().is_empty());
+
+        db.disable_fts_automerge().unwrap();
+        db.rebuild_fts_indexes().unwrap();
+
+        let hits = db.semantic_search("jwt bearer", 10).unwrap();
+        assert!(hits.iter().any(|n| n.name == "validateToken"));
+    }
+
     #[test]
     fn test_get_hierarchy() {
         let db = Database::in_memory().unwrap();
diff --git a/src/extraction/mod.rs b/src/extraction/mod.rs
index 60d63a2..1fc85d3 100644
--- a/src/extraction/mod.rs
+++ b/src/extraction/mod.rs
@@ -159,32 +159,47 @@ struct ExtractionContext<'a> {
 }
 
 impl<'a> ExtractionContext<'a> {
-    fn traverse_node(&mut self, node: tree_sitter::Node) {
-        let node_type = node.kind();
+    fn traverse_node<'tree>(&mut self, root: tree_sitter::Node<'tree>) {
+        enum Work<'t> {
+            Visit(tree_sitter::Node<'t>),
+            PopStack,
+        }
 
-        // Check if this is a symbol we care about
-        if let Some(kind) = self.config.node_type_to_kind(node_type) {
-            self.extract_symbol(node, kind);
-        } else {
-            // Continue traversing children
+        fn push_children<'t>(node: tree_sitter::Node<'t>, work: &mut Vec<Work<'t>>) {
             let mut cursor = node.walk();
-            for child in node.children(&mut cursor) {
-                self.traverse_node(child);
+            let children: Vec<_> = node.children(&mut cursor).collect();
+            for child in children.into_iter().rev() {
+                work.push(Work::Visit(child));
             }
         }
-    }
 
-    fn extract_symbol(&mut self, node: tree_sitter::Node, kind: NodeKind) {
-        let name = self.extract_name(&node, kind);
-        if name.is_empty() {
-            // Skip anonymous nodes
-            let mut cursor = node.walk();
-            for child in node.children(&mut cursor) {
-                self.traverse_node(child);
+        let mut work: Vec<Work<'tree>> = vec![Work::Visit(root)];
+
+        while let Some(item) = work.pop() {
+            match item {
+                Work::PopStack => {
+                    self.node_stack.pop();
+                }
+                Work::Visit(node) => {
+                    let node_type = node.kind();
+                    if let Some(kind) = self.config.node_type_to_kind(node_type) {
+                        let name = self.extract_name(&node, kind);
+                        if name.is_empty() {
+                            push_children(node, &mut work);
+                        } else {
+                            self.emit_symbol(node, kind, name);
+                            work.push(Work::PopStack);
+                            push_children(node, &mut work);
+                        }
+                    } else {
+                        push_children(node, &mut work);
+                    }
+                }
             }
-            return;
         }
+    }
 
+    fn emit_symbol(&mut self, node: tree_sitter::Node, kind: NodeKind, name: String) {
         let start = node.start_position();
         let end = node.end_position();
 
@@ -214,7 +229,6 @@ impl<'a> ExtractionContext<'a> {
         self.next_id += 1;
         self.result.nodes.push(symbol);
 
-        // Create contains edge from parent
         if let Some(&parent_id) = self.node_stack.last() {
             let edge = Edge {
                 id: 0,
@@ -228,19 +242,8 @@ impl<'a> ExtractionContext<'a> {
             self.result.edges.push(edge);
         }
 
-        // Push this symbol onto the stack and traverse children
         self.node_stack.push(symbol_id);
-
-        // Extract function calls and other references from body
-        self.extract_references(&node, symbol_id);
-
-        // Traverse children for nested definitions
-        let mut cursor = node.walk();
-        for child in node.children(&mut cursor) {
-            self.traverse_node(child);
-        }
-
-        self.node_stack.pop();
+        self.find_calls(node, symbol_id);
     }
 
     fn extract_name(&self, node: &tree_sitter::Node, _kind: NodeKind) -> String {
@@ -282,7 +285,11 @@ impl<'a> ExtractionContext<'a> {
                 // Truncate at opening brace or newline
                 let sig = sig.split('{').next().unwrap_or(sig).trim();
                 if sig.len() > 200 {
-                    Some(format!("{}...", &sig[..200]))
+                    let boundary = (0..=200)
+                        .rev()
+                        .find(|&i| sig.is_char_boundary(i))
+                        .unwrap_or(0);
+                    Some(format!("{}...", &sig[..boundary]))
                 } else {
                     Some(sig.to_string())
                 }
@@ -481,33 +488,27 @@ impl<'a> ExtractionContext<'a> {
         Some(parts.join("::"))
     }
 
-    fn extract_references(&mut self, node: &tree_sitter::Node, source_id: i64) {
-        // Find call expressions within this node
-        self.find_calls(node, source_id);
-    }
-
-    fn find_calls(&mut self, node: &tree_sitter::Node, source_id: i64) {
-        let kind = node.kind();
-
-        if self.config.is_call_node(kind) {
-            if let Some(func_name) = self.extract_call_name(node) {
-                let start = node.start_position();
-                let uref = UnresolvedReference {
-                    source_node_id: source_id,
-                    reference_name: func_name,
-                    kind: EdgeKind::Calls,
-                    file_path: self.file_path.clone(),
-                    line: start.row as u32 + 1,
-                    column: start.column as u32,
-                };
-                self.result.unresolved_refs.push(uref);
+    fn find_calls<'tree>(&mut self, root: tree_sitter::Node<'tree>, source_id: i64) {
+        let mut stack: Vec<tree_sitter::Node<'tree>> = vec![root];
+        while let Some(node) = stack.pop() {
+            if self.config.is_call_node(node.kind()) {
+                if let Some(func_name) = self.extract_call_name(&node) {
+                    let start = node.start_position();
+                    self.result.unresolved_refs.push(UnresolvedReference {
+                        source_node_id: source_id,
+                        reference_name: func_name,
+                        kind: EdgeKind::Calls,
+                        file_path: self.file_path.clone(),
+                        line: start.row as u32 + 1,
+                        column: start.column as u32,
+                    });
+                }
+            }
+            let mut cursor = node.walk();
+            let children: Vec<_> = node.children(&mut cursor).collect();
+            for child in children.into_iter().rev() {
+                stack.push(child);
             }
-        }
-
-        // Recurse into children
-        let mut cursor = node.walk();
-        for child in node.children(&mut cursor) {
-            self.find_calls(&child, source_id);
         }
     }
 
@@ -1258,6 +1259,29 @@ end
         assert!(!is_generated_content("fn foo() { 1 + 1 }"));
     }
 
+    #[test]
+    fn test_deeply_nested_does_not_overflow() {
+        // Regression test: the old recursive traverse_node would stack-overflow
+        // on ASTs deeper than ~500 levels. The iterative rewrite must handle this.
+        let mut extractor = Extractor::new();
+        let depth = 500usize;
+        let mut code = String::new();
+        for i in 0..depth {
+            code.push_str(&format!("fn f{}(){{", i));
+        }
+        code.push_str(&"}".repeat(depth));
+        let result = extractor.extract_file("deep.rs", &code);
+        assert!(result.errors.is_empty());
+        assert_eq!(
+            result
+                .nodes
+                .iter()
+                .filter(|n| n.kind == NodeKind::Function)
+                .count(),
+            depth
+        );
+    }
+
     #[test]
     fn test_test_name_heuristic() {
         assert!(test_name_heuristic("test_user_login"));
diff --git a/src/lib.rs b/src/lib.rs
index 3c7d28b..a1c400e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -41,6 +41,11 @@ use std::time::SystemTime;
 
 use anyhow::Result;
 use ignore::WalkBuilder;
+use indicatif::{ProgressBar, ProgressStyle};
+
+/// Commit and checkpoint the WAL after this many files during bulk indexing.
+/// Keeps WAL size bounded without requiring a single massive transaction.
+const CHECKPOINT_INTERVAL: usize = 200;
 use rayon::prelude::*;
 use sha2::{Digest, Sha256};
 use tracing::{debug, info, warn};
@@ -63,6 +68,8 @@ pub struct IndexConfig {
     pub respect_gitignore: bool,
     /// Skip the global resolve_references pass (for scoped resolution)
     pub skip_resolve: bool,
+    /// Render progress bars to stderr during indexing (disable for library/server use)
+    pub show_progress: bool,
 }
 
 impl Default for IndexConfig {
@@ -103,6 +110,7 @@ impl Default for IndexConfig {
             ],
             respect_gitignore: true,
             skip_resolve: false,
+            show_progress: false,
         }
     }
 }
@@ -124,24 +132,76 @@ struct ExtractedFile {
     result: ExtractionResult,
 }
 
-/// Index a codebase into the database
+/// Indexing behavior for a storage target.
+#[cfg(feature = "sqlite")]
+#[derive(Clone, Copy)]
+enum IndexMode {
+    Incremental,
+    FullBuild,
+}
+
+/// Incrementally index only changed files into an existing database.
 #[cfg(feature = "sqlite")]
 pub fn index_codebase(db: &mut Database, config: &IndexConfig) -> Result<IndexingStats> {
+    run_index_codebase(db, config, IndexMode::Incremental)
+}
+
+/// Build a complete index into an empty target database.
+#[cfg(feature = "sqlite")]
+pub fn build_full_index(db: &mut Database, config: &IndexConfig) -> Result<IndexingStats> {
+    run_index_codebase(db, config, IndexMode::FullBuild)
+}
+
+#[cfg(feature = "sqlite")]
+fn run_index_codebase(
+    db: &mut Database,
+    config: &IndexConfig,
+    mode: IndexMode,
+) -> Result<IndexingStats> {
     let root = Path::new(&config.root).canonicalize()?;
     info!("Indexing codebase at {}", root.display());
 
     let mut stats = IndexingStats::default();
+    let entries_to_extract = collect_entries(db, config, &root, mode, &mut stats)?;
+    let extracted = extract_entries(entries_to_extract, config.show_progress);
+
+    match mode {
+        IndexMode::Incremental => store_incremental_index(db, config, extracted, &mut stats)?,
+        IndexMode::FullBuild => store_full_index(db, config, extracted, &mut stats)?,
+    }
+
+    info!(
+        "Indexed {} files, {} nodes, {} edges ({} refs resolved)",
+        stats.files, stats.nodes, stats.edges, stats.resolved_refs
+    );
+
+    Ok(stats)
+}
 
-    // Build the walker
-    let mut walker = WalkBuilder::new(&root);
+#[cfg(feature = "sqlite")]
+fn collect_entries(
+    db: &Database,
+    config: &IndexConfig,
+    root: &Path,
+    mode: IndexMode,
+    stats: &mut IndexingStats,
+) -> Result<Vec<FileEntry>> {
+    let mut walker = WalkBuilder::new(root);
     walker
         .hidden(false)
         .git_ignore(config.respect_gitignore)
         .git_global(config.respect_gitignore)
         .git_exclude(config.respect_gitignore);
 
-    // Phase 1a: Sequentially walk directory, read files, compute hashes, check reindex need
-    let mut entries_to_extract: Vec<FileEntry> = Vec::new();
+    let mut entries_to_extract = Vec::new();
+    let scan_pb = if config.show_progress {
+        let pb = ProgressBar::new_spinner();
+        pb.set_prefix("Scanning");
+        pb.enable_steady_tick(std::time::Duration::from_millis(100));
+        pb
+    } else {
+        ProgressBar::hidden()
+    };
 
     for entry in walker.build() {
         let entry = match entry {
@@ -153,26 +213,21 @@ pub fn index_codebase(db: &mut Database, config: &IndexConfig) -> Result<Indexin
         };
 
         let path = entry.path();
-
-        // Skip directories
         if !path.is_file() {
             continue;
         }
 
-        // Check if this is a manifest file (detected by filename)
         let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
         let is_manifest = extraction::manifest::is_manifest_file(filename);
 
-        // Check extension
         let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
         if !is_manifest
             && !config.extensions.is_empty()
-            && !config.extensions.iter().any(|e| e == ext)
+            && !config.extensions.iter().any(|allowed| allowed == ext)
         {
             continue;
         }
 
-        // Check if language is supported (manifest files use their ecosystem language)
         let language = if is_manifest {
             extraction::manifest::manifest_language(filename)
         } else {
@@ -182,17 +237,15 @@ pub fn index_codebase(db: &mut Database, config: &IndexConfig) -> Result<Indexin
             continue;
         }
 
-        // Check excluded directories
-        if path.components().any(|c| {
+        if path.components().any(|component| {
             config
                 .exclude_dirs
                 .iter()
-                .any(|d| c.as_os_str() == d.as_str())
+                .any(|dir| component.as_os_str() == dir.as_str())
         }) {
             continue;
         }
 
-        // Read file content
         let content = match std::fs::read_to_string(path) {
             Ok(c) => c,
             Err(err) => {
@@ -202,20 +255,17 @@ pub fn index_codebase(db: &mut Database, config: &IndexConfig) -> Result<Indexin
             }
         };
 
-        // Compute content hash
         let mut hasher = Sha256::new();
         hasher.update(content.as_bytes());
         let content_hash = hex::encode(hasher.finalize());
 
-        // Get relative path
         let rel_path = path
-            .strip_prefix(&root)
+            .strip_prefix(root)
             .unwrap_or(path)
             .display()
             .to_string();
 
-        // Check if file needs reindexing (requires DB access, done sequentially)
-        if !db.needs_reindex(&rel_path, &content_hash)? {
+        if matches!(mode, IndexMode::Incremental) && !db.needs_reindex(&rel_path, &content_hash)? {
             debug!("Skipping unchanged file: {}", rel_path);
             stats.skipped += 1;
             continue;
@@ -229,6 +279,7 @@ pub fn index_codebase(db: &mut Database, config: &IndexConfig) -> Result<Indexin
             .map(|d| d.as_secs() as i64)
             .unwrap_or(0);
 
+        scan_pb.set_message(format!("{} files queued", entries_to_extract.len() + 1));
         entries_to_extract.push(FileEntry {
             rel_path,
             content,
@@ -238,79 +289,192 @@ pub fn index_codebase(db: &mut Database, config: &IndexConfig) -> Result<Indexin
         });
     }
 
-    // Phase 1b: Parallel tree-sitter extraction using rayon
-    // Each thread creates its own Extractor (Parser is not Send)
+    scan_pb.finish_and_clear();
+    Ok(entries_to_extract)
+}
+
+#[cfg(feature = "sqlite")]
+fn extract_entries(entries_to_extract: Vec<FileEntry>, show_progress: bool) -> Vec<ExtractedFile> {
+    let parse_pb = if show_progress {
+        let pb = ProgressBar::new(entries_to_extract.len() as u64);
+        pb.set_style(
+            ProgressStyle::with_template(
+                "  {prefix:<10} [{bar:40.cyan/blue}] {pos:>5}/{len:<5} {msg}",
+            )
+            .unwrap()
+            .progress_chars("=> "),
+        );
+        pb.set_prefix("Parsing");
+        pb
+    } else {
+        ProgressBar::hidden()
+    };
+
     let extracted: Vec<ExtractedFile> = entries_to_extract
         .into_par_iter()
         .map(|entry| {
             let mut extractor = Extractor::new();
             let result = extractor.extract_file(&entry.rel_path, &entry.content);
+            parse_pb.inc(1);
             ExtractedFile { entry, result }
         })
         .collect();
+    parse_pb.finish_and_clear();
+    extracted
+}
 
-    // Phase 2: Sequential database operations inside a transaction
+#[cfg(feature = "sqlite")]
+fn store_incremental_index(
+    db: &mut Database,
+    config: &IndexConfig,
+    extracted: Vec<ExtractedFile>,
+    stats: &mut IndexingStats,
+) -> Result<()> {
+    let total = extracted.len();
     db.begin_transaction()?;
+    db.disable_fts_automerge()?;
 
-    for extracted_file in extracted {
-        let entry = extracted_file.entry;
-        let result = extracted_file.result;
-
-        debug!("Indexing: {}", entry.rel_path);
+    let store_pb = make_store_progress_bar(total, config.show_progress);
+    for (i, extracted_file) in extracted.into_iter().enumerate() {
+        store_extracted_file(db, extracted_file, stats, true, true)?;
+        store_pb.inc(1);
 
-        // Delete existing data for this file
-        db.delete_file(&entry.rel_path)?;
+        let is_last = i + 1 == total;
+        if (i + 1) % CHECKPOINT_INTERVAL == 0 && !is_last {
+            db.commit()?;
+            db.begin_transaction()?;
+        }
+    }
+    store_pb.finish_and_clear();
 
-        // Store file record FIRST (nodes have FK to files)
-        let file_record = FileRecord {
-            path: entry.rel_path.clone(),
-            content_hash: entry.content_hash,
-            language: entry.language,
-            size: entry.content.len() as u64,
-            modified_at: entry.modified_at,
-            indexed_at: SystemTime::now()
-                .duration_since(SystemTime::UNIX_EPOCH)
-                .map(|d| d.as_secs() as i64)
-                .unwrap_or(0),
-            node_count: result.nodes.len() as u32,
-        };
-        db.insert_or_update_file(&file_record)?;
+    db.optimize_fts()?;
+    resolve_references_if_needed(db, config, stats)?;
+    db.commit()?;
+    Ok(())
+}
 
-        // Store nodes (batch: prepare statement once, reuse for all rows)
-        let node_count = file_record.node_count;
-        let mut nodes = result.nodes;
-        let id_map = db.insert_nodes_batch(&mut nodes)?;
+#[cfg(feature = "sqlite")]
+fn store_full_index(
+    db: &mut Database,
+    config: &IndexConfig,
+    extracted: Vec<ExtractedFile>,
+    stats: &mut IndexingStats,
+) -> Result<()> {
+    db.begin_transaction()?;
 
-        // Store edges with mapped IDs (batch)
-        let mut edges = result.edges;
-        let edge_count = db.insert_edges_batch(&mut edges, &id_map)?;
-        stats.edges += edge_count;
+    let store_pb = make_store_progress_bar(extracted.len(), config.show_progress);
+    for extracted_file in extracted {
+        store_extracted_file(db, extracted_file, stats, false, false)?;
+        store_pb.inc(1);
+    }
+    store_pb.finish_and_clear();
 
-        // Store unresolved references with mapped IDs (batch)
-        let mut unresolved_refs = result.unresolved_refs;
-        db.insert_unresolved_refs_batch(&mut unresolved_refs, &id_map)?;
+    resolve_references_if_needed(db, config, stats)?;
+    db.disable_fts_automerge()?;
+    db.rebuild_fts_indexes()?;
+    db.commit_transaction()?;
+    Ok(())
+}
 
-        stats.files += 1;
-        stats.nodes += node_count as u64;
-        stats.errors += result.errors.len() as u64;
+#[cfg(feature = "sqlite")]
+fn make_store_progress_bar(total: usize, show_progress: bool) -> ProgressBar {
+    if show_progress {
+        let pb = ProgressBar::new(total as u64);
+        pb.set_style(
+            ProgressStyle::with_template(
+                "  {prefix:<10} [{bar:40.cyan/blue}] {pos:>5}/{len:<5} {msg}",
+            )
+            .unwrap()
+            .progress_chars("=> "),
+        );
+        pb.set_prefix("Storing");
+        pb
+    } else {
+        ProgressBar::hidden()
     }
+}
 
-    // Resolve references (unless caller will handle scoped resolution)
-    if !config.skip_resolve {
-        info!("Resolving references...");
-        let resolved = db.resolve_references()?;
-        stats.resolved_refs = resolved as u64;
+#[cfg(feature = "sqlite")]
+fn store_extracted_file(
+    db: &Database,
+    extracted_file: ExtractedFile,
+    stats: &mut IndexingStats,
+    delete_existing: bool,
+    maintain_fts: bool,
+) -> Result<()> {
+    let entry = extracted_file.entry;
+    let result = extracted_file.result;
+
+    debug!("Indexing: {}", entry.rel_path);
+    if delete_existing {
+        db.delete_file(&entry.rel_path)?;
     }
 
-    // Commit transaction
-    db.commit()?;
+    let file_record = build_file_record(&entry, result.nodes.len());
+    let node_count = file_record.node_count;
+    let error_count = result.errors.len() as u64;
+    db.insert_or_update_file(&file_record)?;
+
+    let mut nodes = result.nodes;
+    let id_map = if maintain_fts {
+        db.insert_nodes_batch(&mut nodes)?
+    } else {
+        db.insert_nodes_batch_without_fts(&mut nodes)?
+    };
+
+    let mut edges = result.edges;
+    let edge_count = db.insert_edges_batch(&mut edges, &id_map)?;
+    stats.edges += edge_count;
+
+    let mut unresolved_refs = result.unresolved_refs;
+    db.insert_unresolved_refs_batch(&mut unresolved_refs, &id_map)?;
+
+    stats.files += 1;
+    stats.nodes += node_count as u64;
+    stats.errors += error_count;
+    Ok(())
+}
 
-    info!(
-        "Indexed {} files, {} nodes, {} edges ({} refs resolved)",
-        stats.files, stats.nodes, stats.edges, stats.resolved_refs
-    );
+#[cfg(feature = "sqlite")]
+fn build_file_record(entry: &FileEntry, node_count: usize) -> FileRecord {
+    FileRecord {
+        path: entry.rel_path.clone(),
+        content_hash: entry.content_hash.clone(),
+        language: entry.language,
+        size: entry.content.len() as u64,
+        modified_at: entry.modified_at,
+        indexed_at: SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .map(|d| d.as_secs() as i64)
+            .unwrap_or(0),
+        node_count: node_count as u32,
+    }
+}
 
-    Ok(stats)
+#[cfg(feature = "sqlite")]
+fn resolve_references_if_needed(
+    db: &Database,
+    config: &IndexConfig,
+    stats: &mut IndexingStats,
+) -> Result<()> {
+    if config.skip_resolve {
+        return Ok(());
+    }
+
+    info!("Resolving references...");
+    let resolve_pb = if config.show_progress {
+        let pb = ProgressBar::new_spinner();
+        pb.set_prefix("Resolving");
+        pb.set_message("references...");
+        pb.enable_steady_tick(std::time::Duration::from_millis(100));
+        pb
+    } else {
+        ProgressBar::hidden()
+    };
+    let resolved = db.resolve_references()?;
+    resolve_pb.finish_and_clear();
+    stats.resolved_refs = resolved as u64;
+    Ok(())
 }
 
 /// Statistics from an indexing operation
diff --git a/src/mcp/handlers/reindex.rs b/src/mcp/handlers/reindex.rs
index 2d7cc37..bb1361f 100644
--- a/src/mcp/handlers/reindex.rs
+++ b/src/mcp/handlers/reindex.rs
@@ -1,5 +1,6 @@
 //! Reindexing handler
 
+use crate::cli::rebuild_project_database;
 use crate::db::Database;
 use crate::mcp::format::normalize_path;
 use crate::mcp::types::ReindexRequest;
@@ -14,7 +15,10 @@ pub fn handle_reindex(
     // If specific files requested, delete and reindex just those
     if let Some(files) = &req.files {
         if files.is_empty() {
-            return Ok("No files specified. Provide file paths or omit the parameter to reindex all changed files.".to_string());
+            return Ok(
+                "No files specified. Provide file paths or omit the parameter to rebuild the full index."
+                    .to_string(),
+            );
         }
 
         let mut errors = Vec::new();
@@ -68,20 +72,90 @@ pub fn handle_reindex(
             Err(e) => Err(format!("Reindex failed: {}", e)),
         }
     } else {
-        // Full incremental reindex
+        // Full shadow rebuild
         let config = IndexConfig {
             root: project_root.to_string(),
             ..Default::default()
         };
 
-        match index_codebase(db, &config) {
-            Ok(stats) => {
-                Ok(format!(
-                    "## Reindex Complete\n\n**Files processed:** {}\n**Files skipped (unchanged):** {}\n**Symbols found:** {}\n**Edges created:** {}\n**References resolved:** {}\n**Errors:** {}\n",
-                    stats.files, stats.skipped, stats.nodes, stats.edges, stats.resolved_refs, stats.errors
-                ))
-            }
+        match rebuild_project_database(db, &config) {
+            Ok(stats) => Ok(format!(
+                "## Reindex Complete\n\n**Mode:** full rebuild\n**Files indexed:** {}\n**Symbols found:** {}\n**Edges created:** {}\n**References resolved:** {}\n**Errors:** {}\n",
+                stats.files, stats.nodes, stats.edges, stats.resolved_refs, stats.errors
+            )),
             Err(e) => Err(format!("Reindex failed: {}", e)),
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use tempfile::tempdir;
+
+    use crate::cli::{open_project_database, rebuild_project_database};
+
+    fn write_file(path: &std::path::Path, content: &str) {
+        if let Some(parent) = path.parent() {
+            fs::create_dir_all(parent).unwrap();
+        }
+        fs::write(path, content).unwrap();
+    }
+
+    #[test]
+    fn test_handle_reindex_full_rebuild_reopens_live_handle() {
+        let dir = tempdir().unwrap();
+        let project_root = dir.path().display().to_string();
+        let file_path = dir.path().join("src/lib.rs");
+        write_file(&file_path, "pub fn old_symbol() {}\n");
+
+        let mut db = open_project_database(&project_root).unwrap();
+        let config = IndexConfig {
+            root: project_root.clone(),
+            ..Default::default()
+        };
+        rebuild_project_database(&mut db, &config).unwrap();
+        assert!(db.find_node_by_name("old_symbol").unwrap().is_some());
+
+        write_file(&file_path, "pub fn new_symbol() {}\n");
+        let output =
+            handle_reindex(&mut db, &project_root, &ReindexRequest { files: None }).unwrap();
+
+        assert!(output.contains("**Mode:** full rebuild"));
+        assert!(db.find_node_by_name("old_symbol").unwrap().is_none());
+        assert!(db.find_node_by_name("new_symbol").unwrap().is_some());
+    }
+
+    #[test]
+    fn test_handle_reindex_specific_files_stays_in_place() {
+        let dir = tempdir().unwrap();
+        let project_root = dir.path().display().to_string();
+        let a_path = dir.path().join("src/a.rs");
+        let b_path = dir.path().join("src/b.rs");
+        write_file(&a_path, "pub fn old_a() {}\n");
+        write_file(&b_path, "pub fn stable_b() {}\n");
+
+        let mut db = open_project_database(&project_root).unwrap();
+        let config = IndexConfig {
+            root: project_root.clone(),
+            ..Default::default()
+        };
+        rebuild_project_database(&mut db, &config).unwrap();
+
+        write_file(&a_path, "pub fn new_a() {}\n");
+        let output = handle_reindex(
+            &mut db,
+            &project_root,
+            &ReindexRequest {
+                files: Some(vec!["src/a.rs".to_string()]),
+            },
+        )
+        .unwrap();
+
+        assert!(output.contains("**Files reindexed:** 1"));
+        assert!(db.find_node_by_name("old_a").unwrap().is_none());
+        assert!(db.find_node_by_name("new_a").unwrap().is_some());
+        assert!(db.find_node_by_name("stable_b").unwrap().is_some());
+    }
+}
diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs
index 2c87760..a0b4a47 100644
--- a/src/mcp/mod.rs
+++ b/src/mcp/mod.rs
@@ -200,10 +200,10 @@ impl SymgraphHandler {
         self.with_db(|db| handlers::symbol::handle_references(db, &req))
     }
 
-    /// Trigger incremental reindexing (runs in background, returns immediately)
+    /// Trigger background reindexing (runs in background, returns immediately)
     #[tool(
         name = "symgraph-reindex",
-        description = "Trigger incremental reindexing of the codebase. Only changed files are re-parsed. Runs in background and returns immediately."
+        description = "Trigger reindexing of the codebase. When files are provided, only those files are updated in place; otherwise a full shadow rebuild runs in the background."
     )]
     fn symgraph_reindex(&self, Parameters(req): Parameters<ReindexRequest>) -> String {
         // If a reindex is already running, refuse to start another one.
@@ -241,7 +241,7 @@ impl SymgraphHandler {
                 n
             ),
             None => {
-                "Reindexing all changed files in background. Use symgraph-status to check progress."
+                "Rebuilding the full index in background. Use symgraph-status to check progress."
                     .to_string()
             }
         }
diff --git a/src/mcp/types.rs b/src/mcp/types.rs
index 33c5a31..e5607f5 100644
--- a/src/mcp/types.rs
+++ b/src/mcp/types.rs
@@ -48,7 +48,7 @@ pub struct DefinitionRequest {
 #[derive(Debug, Deserialize, schemars::JsonSchema)]
 pub struct ReindexRequest {
     #[schemars(
-        description = "Optional: specific files to reindex. If empty, reindexes all changed files."
+        description = "Optional: specific files to reindex. If omitted, rebuilds the full index via a shadow database."
     )]
     pub files: Option<Vec<String>>,
 }
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index 7edc6ee..1417dfb 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -2,10 +2,14 @@
 //!
 //! These tests verify the end-to-end workflow of indexing and querying code.
 
+use std::fs;
+
+use symgraph::cli::{open_project_database, rebuild_project_database};
 use symgraph::db::Database;
 use symgraph::extraction::Extractor;
 use symgraph::graph::Graph;
 use symgraph::types::{EdgeKind, FileRecord, Language, NodeKind};
+use symgraph::{build_full_index, index_codebase, IndexConfig};
 use tempfile::tempdir;
 
 /// Helper to set up a test database with indexed code
@@ -271,6 +275,85 @@ fn test_database_persistence() {
     }
 }
 
+#[test]
+fn test_build_full_index_populates_empty_target_db() {
+    let dir = tempdir().unwrap();
+    let src = dir.path().join("src");
+    fs::create_dir(&src).unwrap();
+    fs::write(
+        src.join("main.rs"),
+        "fn main() { helper(); }\nfn helper() {}\n",
+    )
+    .unwrap();
+
+    let db_path = dir.path().join("full-build.db");
+    let mut db = Database::open(&db_path).unwrap();
+    let config = IndexConfig {
+        root: dir.path().display().to_string(),
+        ..Default::default()
+    };
+
+    let stats = build_full_index(&mut db, &config).unwrap();
+
+    assert_eq!(stats.files, 1);
+    assert_eq!(stats.skipped, 0);
+    assert!(stats.nodes >= 2);
+    assert!(db.find_node_by_name("main").unwrap().is_some());
+    assert!(db.find_node_by_name("helper").unwrap().is_some());
+    assert!(!db.semantic_search("helper", 10).unwrap().is_empty());
+}
+
+#[test]
+fn test_rebuild_project_database_replaces_stale_rows() {
+    let dir = tempdir().unwrap();
+    let src = dir.path().join("src");
+    fs::create_dir(&src).unwrap();
+    fs::write(src.join("old.rs"), "fn old_symbol() {}\n").unwrap();
+
+    let project_root = dir.path().display().to_string();
+    let mut db = open_project_database(&project_root).unwrap();
+    let config = IndexConfig {
+        root: project_root.clone(),
+        ..Default::default()
+    };
+
+    rebuild_project_database(&mut db, &config).unwrap();
+    assert!(db.find_node_by_name("old_symbol").unwrap().is_some());
+
+    fs::remove_file(src.join("old.rs")).unwrap();
+    fs::write(src.join("new.rs"), "fn new_symbol() {}\n").unwrap();
+
+    rebuild_project_database(&mut db, &config).unwrap();
+
+    assert!(db.find_node_by_name("old_symbol").unwrap().is_none());
+    assert!(db.find_node_by_name("new_symbol").unwrap().is_some());
+    assert_eq!(db.get_stats().unwrap().total_files, 1);
+}
+
+#[test]
+fn test_rebuild_project_database_keeps_live_db_on_failure() {
+    let dir = tempdir().unwrap();
+    let src = dir.path().join("src");
+    fs::create_dir(&src).unwrap();
+    fs::write(src.join("live.rs"), "fn live_symbol() {}\n").unwrap();
+
+    let project_root = dir.path().display().to_string();
+    let mut db = open_project_database(&project_root).unwrap();
+    let good_config = IndexConfig {
+        root: project_root.clone(),
+        ..Default::default()
+    };
+    rebuild_project_database(&mut db, &good_config).unwrap();
+
+    let bad_config = IndexConfig {
+        root: src.join("live.rs").display().to_string(),
+        ..Default::default()
+    };
+    assert!(rebuild_project_database(&mut db, &bad_config).is_err());
+
+    assert!(db.find_node_by_name("live_symbol").unwrap().is_some());
+}
+
 #[test]
 fn test_incremental_indexing() {
     let db = Database::in_memory().unwrap();
@@ -667,3 +750,35 @@ fn test_search_performance() {
         duration
     );
 }
+
+/// Stress test: exercise the periodic-commit / WAL-checkpoint branch for
+/// in-place incremental indexing by crossing CHECKPOINT_INTERVAL (200).
+/// Uses an on-disk database to mirror the write path that still batches
+/// commits mid-loop.
+#[test]
+fn test_incremental_index_codebase_periodic_checkpoint() {
+    let dir = tempdir().unwrap();
+    let src = dir.path().join("src");
+    fs::create_dir(&src).unwrap();
+
+    // 250 small Rust files — enough to cross the 200-file checkpoint boundary.
+    for i in 0..250 {
+        let path = src.join(format!("mod_{:03}.rs", i));
+        let body = format!(
+            "pub fn handler_{i}() {{ helper_{i}(); }}\nfn helper_{i}() {{}}\n",
+            i = i
+        );
+        fs::write(&path, body).unwrap();
+    }
+
+    let db_path = dir.path().join("symgraph.db");
+    let mut db = Database::open(&db_path).unwrap();
+    let config = IndexConfig {
+        root: dir.path().display().to_string(),
+        ..Default::default()
+    };
+
+    let stats = index_codebase(&mut db, &config).expect("index_codebase failed");
+    assert_eq!(stats.files, 250);
+    assert!(stats.nodes >= 500);
+}