diff --git a/README.md b/README.md index b48a297f0..6ff5fdcb8 100644 --- a/README.md +++ b/README.md @@ -382,6 +382,17 @@ codebase-memory-mcp cli --raw search_graph '{"label": "Function"}' | jq '.result | `delete_project` | Remove a project and all its graph data. | | `index_status` | Check indexing status of a project. | +`index_status` includes an additive `evidence` object. `evidence.index_snapshot` +reports the timestamp and Git HEAD captured when the last successful graph +snapshot was finalized, the current HEAD, tracked working-tree state, and a +`freshness` value: `current` means the indexed HEAD equals current HEAD and +tracked files are clean; `head_changed` means a later checkout/commit changed +HEAD; `working_tree_changed` means HEAD matches but tracked files are modified; +`unknown` means Git or comparison data is unavailable. Untracked files are not +compared. `evidence.coverage` reports discovered/indexed/excluded/failed file +counters when the stored index contains them; older indexes may report +`unknown` rather than guessing. + ### Querying | Tool | Description | @@ -393,6 +404,14 @@ codebase-memory-mcp cli --raw search_graph '{"label": "Function"}' | jq '.result | `get_graph_schema` | Node/edge counts, relationship patterns, property definitions per label. Run this first. | | `get_code_snippet` | Read source code for a function by qualified name. | | `get_architecture` | Codebase overview: languages, packages, routes, hotspots, clusters, ADR. | + +`trace_path`/`trace_call_path` preserve their existing `callers`/`callees` +arrays and add `edge_evidence` for traversed relations when stored edge +properties contain provenance. Relation `confidence` is source-resolution +confidence from the indexer, not a probability of runtime correctness and not +BM25/semantic search relevance. Dynamic behavior such as reflection, +dependency injection, framework wiring, generated code, configuration, HTTP, +async messaging, and cross-repo links may remain inferred or unavailable. | `search_code` | Grep-like text search within indexed project files. | | `manage_adr` | CRUD for Architecture Decision Records. | | `ingest_traces` | Ingest runtime traces to validate HTTP_CALLS edges. | diff --git a/src/cli/cli.c b/src/cli/cli.c index 4228dbaad..19981ac31 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -2253,7 +2253,7 @@ static const uint32_t ZIP_MAX_UNCOMP = 500U * 1024U * 1024U; static unsigned char *zip_extract_entry(const unsigned char *file_data, uint16_t method, uint32_t comp_size, uint32_t uncomp_size, int *out_len) { if (method == ZIP_STORED) { - if (comp_size > ZIP_MAX_UNCOMP) { + if ((uint64_t)comp_size > ZIP_MAX_UNCOMP) { return NULL; } unsigned char *out = malloc(comp_size); @@ -2265,7 +2265,7 @@ static unsigned char *zip_extract_entry(const unsigned char *file_data, uint16_t return out; } if (method == ZIP_DEFLATE) { - if (uncomp_size > ZIP_MAX_UNCOMP) { + if ((uint64_t)uncomp_size > ZIP_MAX_UNCOMP) { return NULL; } unsigned char *out = malloc(uncomp_size); diff --git a/src/cypher/cypher.c b/src/cypher/cypher.c index af2b319a9..8dde79f07 100644 --- a/src/cypher/cypher.c +++ b/src/cypher/cypher.c @@ -2400,11 +2400,11 @@ static bool eval_condition(const cbm_condition_t *c, binding_t *b) { /* IS NULL / IS NOT NULL */ if (strcmp(c->op, "IS NULL") == 0) { - result = (!actual || actual[0] == '\0'); + result = actual[0] == '\0'; return c->negated ? !result : result; } if (strcmp(c->op, "IS NOT NULL") == 0) { - result = (actual && actual[0] != '\0'); + result = actual[0] != '\0'; return c->negated ? !result : result; } diff --git a/src/git/git_context.c b/src/git/git_context.c index 5f27b9f20..e5660b874 100644 --- a/src/git/git_context.c +++ b/src/git/git_context.c @@ -82,13 +82,13 @@ static int git_capture(const char *repo_path, const char *git_args, char **out) char buf[GIT_OUTPUT_MAX]; if (!fgets(buf, sizeof(buf), fp)) { - cbm_pclose(fp); - return CBM_NOT_FOUND; + buf[0] = '\0'; + } else { + trim_newlines(buf); } - trim_newlines(buf); int rc = cbm_pclose(fp); - if (rc != 0 || buf[0] == '\0') { + if (rc != 0) { return CBM_NOT_FOUND; } @@ -266,6 +266,22 @@ int cbm_git_context_resolve(const char *path, cbm_git_context_t *out) { return 0; } +int cbm_git_tracked_dirty(const char *path, bool *out_dirty) { + if (!out_dirty) { + return CBM_NOT_FOUND; + } + *out_dirty = false; + char *status = NULL; + int rc = git_capture(path, "status --porcelain --untracked-files=no", &status); + if (rc != 0) { + free(status); + return CBM_NOT_FOUND; + } + *out_dirty = status && status[0] != '\0'; + free(status); + return 0; +} + char *cbm_git_context_branch_qn(const char *project_name, const cbm_git_context_t *ctx) { const char *project = project_name && project_name[0] ? project_name : "project"; const char *slug = "working-tree"; diff --git a/src/git/git_context.h b/src/git/git_context.h index 876309eb6..3939fb1c1 100644 --- a/src/git/git_context.h +++ b/src/git/git_context.h @@ -21,6 +21,9 @@ typedef struct { int cbm_git_context_resolve(const char *path, cbm_git_context_t *out); void cbm_git_context_free(cbm_git_context_t *ctx); +/* Returns 0 when tracked working-tree dirtiness was determined, non-zero when + * unavailable. Untracked files are intentionally not compared. */ +int cbm_git_tracked_dirty(const char *path, bool *out_dirty); char *cbm_git_context_branch_qn(const char *project_name, const cbm_git_context_t *ctx); int cbm_git_context_props_json(const cbm_git_context_t *ctx, char *buf, int buf_size); diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 8102b1e77..4500befbc 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -893,6 +893,85 @@ static void add_git_context_json(yyjson_mut_doc *doc, yyjson_mut_val *obj, const cbm_git_context_free(&ctx); } +static void add_index_evidence_json(yyjson_mut_doc *doc, yyjson_mut_val *root, + const cbm_project_t *proj) { + yyjson_mut_val *evidence = yyjson_mut_obj(doc); + yyjson_mut_val *snap = yyjson_mut_obj(doc); + const char *indexed_head = (proj && proj->indexed_git_head && proj->indexed_git_head[0]) + ? proj->indexed_git_head + : NULL; + cbm_git_context_t ctx = {0}; + int git_rc = cbm_git_context_resolve(proj ? proj->root_path : NULL, &ctx); + bool dirty = false; + int dirty_rc = (proj && git_rc == 0 && ctx.is_git) + ? cbm_git_tracked_dirty(proj->root_path, &dirty) + : CBM_NOT_FOUND; + + add_git_context_string(doc, snap, "indexed_at", proj ? proj->indexed_at : NULL); + add_git_context_string(doc, snap, "indexed_git_head", indexed_head); + add_git_context_string( + doc, snap, "current_git_head", + (git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0]) ? ctx.head_sha : NULL); + const char *repo_state = "unavailable"; + if (git_rc == 0 && ctx.root_exists && !ctx.is_git) { + repo_state = "not_git"; + } else if (git_rc == 0 && ctx.is_git && dirty_rc == 0) { + repo_state = dirty ? "dirty" : "clean"; + } + yyjson_mut_obj_add_str(doc, snap, "repository_state", repo_state); + if (indexed_head && git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0]) { + yyjson_mut_obj_add_bool(doc, snap, "snapshot_matches_current_head", + strcmp(indexed_head, ctx.head_sha) == 0); + } else { + yyjson_mut_obj_add_null(doc, snap, "snapshot_matches_current_head"); + } + if (indexed_head && git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0] && + dirty_rc == 0) { + yyjson_mut_obj_add_bool(doc, snap, "snapshot_matches_working_tree", + strcmp(indexed_head, ctx.head_sha) == 0 && !dirty); + } else { + yyjson_mut_obj_add_null(doc, snap, "snapshot_matches_working_tree"); + } + const char *freshness = "unknown"; + if (indexed_head && git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0] && + dirty_rc == 0) { + if (strcmp(indexed_head, ctx.head_sha) != 0) { + freshness = "head_changed"; + } else { + freshness = dirty ? "working_tree_changed" : "current"; + } + } else if (git_rc == 0 && ctx.root_exists && !ctx.is_git) { + freshness = "unknown"; + } + yyjson_mut_obj_add_str(doc, snap, "freshness", freshness); + yyjson_mut_obj_add_val(doc, evidence, "index_snapshot", snap); + + yyjson_mut_val *cov = yyjson_mut_obj(doc); + int discovered = proj ? proj->files_discovered : 0; + int indexed = proj ? proj->files_indexed : 0; + int excluded = proj ? proj->files_excluded : 0; + int failed = proj ? proj->files_failed : 0; + yyjson_mut_obj_add_int(doc, cov, "files_discovered", discovered); + yyjson_mut_obj_add_int(doc, cov, "files_indexed", indexed); + yyjson_mut_obj_add_int(doc, cov, "files_excluded", excluded); + yyjson_mut_obj_add_int(doc, cov, "files_failed", failed); + yyjson_mut_obj_add_str(doc, cov, "coverage_status", + failed > 0 || excluded > 0 ? "partial" + : discovered > 0 && indexed == discovered ? "complete" + : "unknown"); + yyjson_mut_obj_add_val(doc, evidence, "coverage", cov); + yyjson_mut_val *limits = yyjson_mut_arr(doc); + yyjson_mut_val *lim = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, lim, "code", "UNTRACKED_FILES_NOT_COMPARED"); + yyjson_mut_obj_add_str(doc, lim, "message", + "Working-tree freshness compares current HEAD and tracked " + "modifications; untracked files are not compared."); + yyjson_mut_arr_add_val(limits, lim); + yyjson_mut_obj_add_val(doc, evidence, "limitations", limits); + yyjson_mut_obj_add_val(doc, root, "evidence", evidence); + cbm_git_context_free(&ctx); +} + /* Build a helpful error listing available projects. Caller must free() result. */ static char *build_project_list_error(const char *reason) { char dir_path[CBM_SZ_1K]; @@ -1739,9 +1818,8 @@ static char *handle_index_status(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_strcpy(doc, root, "root_path", proj_info.root_path ? proj_info.root_path : ""); add_git_context_json(doc, root, proj_info.root_path); - safe_str_free(&proj_info.name); - safe_str_free(&proj_info.indexed_at); - safe_str_free(&proj_info.root_path); + add_index_evidence_json(doc, root, &proj_info); + cbm_project_free_fields(&proj_info); } if (nodes == 0) { yyjson_mut_obj_add_str( @@ -2244,6 +2322,79 @@ static yyjson_mut_val *bfs_to_json_array(yyjson_mut_doc *doc, cbm_traverse_resul return arr; } +static const char *edge_evidence_status(const char *strategy, double confidence, int candidates) { + if (!strategy || !strategy[0]) { + return "unavailable"; + } + if (candidates > 1) { + return "ambiguous"; + } + if (strstr(strategy, "heur") || strstr(strategy, "fuzzy") || confidence < 0.8) { + return "inferred"; + } + return "verified"; +} + +static const char *edge_resolution_strategy(const char *strategy) { + if (!strategy || !strategy[0]) { + return "unknown"; + } + if (strstr(strategy, "lsp")) { + return "hybrid_lsp"; + } + if (strstr(strategy, "import") || strstr(strategy, "same_module") || + strstr(strategy, "receiver")) { + return "direct_ast"; + } + if (strstr(strategy, "fuzzy") || strstr(strategy, "heur")) { + return "heuristic"; + } + return strategy; +} + +static yyjson_mut_val *trace_edges_to_json_array(yyjson_mut_doc *doc, cbm_traverse_result_t *tr) { + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < tr->edge_count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "from", + tr->edges[i].from_name ? tr->edges[i].from_name : ""); + yyjson_mut_obj_add_str(doc, item, "to", tr->edges[i].to_name ? tr->edges[i].to_name : ""); + yyjson_mut_obj_add_str(doc, item, "type", tr->edges[i].type ? tr->edges[i].type : ""); + yyjson_mut_val *edge = yyjson_mut_obj(doc); + const char *props = tr->edges[i].properties_json; + yyjson_doc *pdoc = props ? yyjson_read(props, strlen(props), 0) : NULL; + yyjson_val *proot = pdoc ? yyjson_doc_get_root(pdoc) : NULL; + yyjson_val *v = proot ? yyjson_obj_get(proot, "strategy") : NULL; + const char *strategy = yyjson_is_str(v) ? yyjson_get_str(v) : NULL; + v = proot ? yyjson_obj_get(proot, "confidence") : NULL; + bool has_conf = yyjson_is_num(v); + double conf = has_conf ? yyjson_get_num(v) : 0.0; + v = proot ? yyjson_obj_get(proot, "candidates") : NULL; + int candidates = yyjson_is_int(v) ? (int)yyjson_get_int(v) : 0; + yyjson_mut_obj_add_str(doc, edge, "resolution_strategy", + edge_resolution_strategy(strategy)); + if (has_conf) { + yyjson_mut_obj_add_real(doc, edge, "confidence", conf); + } else { + yyjson_mut_obj_add_null(doc, edge, "confidence"); + } + if (candidates > 0) { + yyjson_mut_obj_add_int(doc, edge, "candidate_count", candidates); + } else { + yyjson_mut_obj_add_null(doc, edge, "candidate_count"); + } + yyjson_mut_obj_add_null(doc, edge, "source_location"); + yyjson_mut_obj_add_str(doc, edge, "evidence_status", + edge_evidence_status(strategy, conf, candidates)); + yyjson_mut_obj_add_val(doc, item, "edge", edge); + yyjson_mut_arr_add_val(arr, item); + if (pdoc) { + yyjson_doc_free(pdoc); + } + } + return arr; +} + static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *func_name = cbm_mcp_get_string_arg(args, "function_name"); char *project = cbm_mcp_get_string_arg(args, "project"); @@ -2365,6 +2516,16 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_val(doc, root, "callers", bfs_to_json_array(doc, &tr_in, risk_labels, include_tests)); } + yyjson_mut_val *edge_evidence = yyjson_mut_obj(doc); + if (do_outbound) { + yyjson_mut_obj_add_val(doc, edge_evidence, "outbound", + trace_edges_to_json_array(doc, &tr_out)); + } + if (do_inbound) { + yyjson_mut_obj_add_val(doc, edge_evidence, "inbound", + trace_edges_to_json_array(doc, &tr_in)); + } + yyjson_mut_obj_add_val(doc, root, "edge_evidence", edge_evidence); /* Serialize BEFORE freeing traversal results (yyjson borrows strings) */ char *json = yy_doc_to_str(doc); diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 499c916a5..35170e0c7 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -848,6 +848,8 @@ static int dump_and_persist_hashes(cbm_pipeline_t *p, const cbm_file_info_t *fil stat_mtime_ns(&fst), fst.st_size); } } + (void)cbm_store_update_project_coverage(hash_store, p->project_name, file_count, file_count, + p->excluded_count, 0); /* FTS5 backfill: populate nodes_fts with camelCase-split names. * Contentless FTS5 requires the special 'delete-all' command instead of diff --git a/src/store/store.c b/src/store/store.c index c237332e2..8a8030b7c 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -62,6 +62,7 @@ enum { #include "foundation/log.h" #include "foundation/compat_regex.h" #include "foundation/str_util.h" +#include "git/git_context.h" #define XXH_INLINE_ALL #include "xxhash/xxhash.h" @@ -217,7 +218,12 @@ static int init_schema(cbm_store_t *s) { "CREATE TABLE IF NOT EXISTS projects (" " name TEXT PRIMARY KEY," " indexed_at TEXT NOT NULL," - " root_path TEXT NOT NULL" + " root_path TEXT NOT NULL," + " indexed_git_head TEXT," + " files_discovered INTEGER NOT NULL DEFAULT 0," + " files_indexed INTEGER NOT NULL DEFAULT 0," + " files_excluded INTEGER NOT NULL DEFAULT 0," + " files_failed INTEGER NOT NULL DEFAULT 0" ");" "CREATE TABLE IF NOT EXISTS file_hashes (" " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," @@ -258,6 +264,12 @@ static int init_schema(cbm_store_t *s) { ");"; int rc = exec_sql(s, ddl); + (void)exec_sql(s, "ALTER TABLE projects ADD COLUMN indexed_git_head TEXT;"); + (void)exec_sql(s, + "ALTER TABLE projects ADD COLUMN files_discovered INTEGER NOT NULL DEFAULT 0;"); + (void)exec_sql(s, "ALTER TABLE projects ADD COLUMN files_indexed INTEGER NOT NULL DEFAULT 0;"); + (void)exec_sql(s, "ALTER TABLE projects ADD COLUMN files_excluded INTEGER NOT NULL DEFAULT 0;"); + (void)exec_sql(s, "ALTER TABLE projects ADD COLUMN files_failed INTEGER NOT NULL DEFAULT 0;"); if (rc != CBM_STORE_OK) { return rc; } @@ -944,31 +956,69 @@ int cbm_store_dump_to_file(cbm_store_t *s, const char *dest_path) { int cbm_store_upsert_project(cbm_store_t *s, const char *name, const char *root_path) { sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_upsert_project, - "INSERT INTO projects (name, indexed_at, root_path) VALUES (?1, ?2, ?3) " - "ON CONFLICT(name) DO UPDATE SET indexed_at=?2, root_path=?3;"); + "INSERT INTO projects (name, indexed_at, root_path, indexed_git_head) " + "VALUES (?1, ?2, ?3, ?4) " + "ON CONFLICT(name) DO UPDATE SET indexed_at=?2, root_path=?3, " + "indexed_git_head=?4;"); if (!stmt) { return CBM_STORE_ERR; } char ts[CBM_SZ_64]; iso_now(ts, sizeof(ts)); + cbm_git_context_t git = {0}; + const char *head = NULL; + if (cbm_git_context_resolve(root_path, &git) == 0 && git.is_git && git.head_sha && + git.head_sha[0]) { + head = git.head_sha; + } bind_text(stmt, SKIP_ONE, name); bind_text(stmt, ST_COL_2, ts); bind_text(stmt, ST_COL_3, root_path); + if (head) { + bind_text(stmt, ST_COL_4, head); + } else { + sqlite3_bind_null(stmt, ST_COL_4); + } int rc = sqlite3_step(stmt); if (rc != SQLITE_DONE) { store_set_error_sqlite(s, "upsert_project"); + cbm_git_context_free(&git); return CBM_STORE_ERR; } + cbm_git_context_free(&git); return CBM_STORE_OK; } +int cbm_store_update_project_coverage(cbm_store_t *s, const char *name, int files_discovered, + int files_indexed, int files_excluded, int files_failed) { + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2( + s->db, + "UPDATE projects SET files_discovered=?2, files_indexed=?3, files_excluded=?4, " + "files_failed=?5 WHERE name=?1;", + CBM_NOT_FOUND, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "update_project_coverage"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, name); + sqlite3_bind_int(stmt, 2, files_discovered); + sqlite3_bind_int(stmt, 3, files_indexed); + sqlite3_bind_int(stmt, 4, files_excluded); + sqlite3_bind_int(stmt, 5, files_failed); + rc = sqlite3_step(stmt); + sqlite3_finalize(stmt); + return rc == SQLITE_DONE ? CBM_STORE_OK : CBM_STORE_ERR; +} + int cbm_store_get_project(cbm_store_t *s, const char *name, cbm_project_t *out) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_get_project, - "SELECT name, indexed_at, root_path FROM projects WHERE name = ?1;"); + sqlite3_stmt *stmt = prepare_cached( + s, &s->stmt_get_project, + "SELECT name, indexed_at, root_path, indexed_git_head, files_discovered, " + "files_indexed, files_excluded, files_failed FROM projects WHERE name = ?1;"); if (!stmt) { return CBM_STORE_ERR; } @@ -979,6 +1029,11 @@ int cbm_store_get_project(cbm_store_t *s, const char *name, cbm_project_t *out) out->name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); out->indexed_at = heap_strdup((const char *)sqlite3_column_text(stmt, SKIP_ONE)); out->root_path = heap_strdup((const char *)sqlite3_column_text(stmt, CBM_SZ_2)); + out->indexed_git_head = heap_strdup((const char *)sqlite3_column_text(stmt, ST_COL_3)); + out->files_discovered = sqlite3_column_int(stmt, ST_COL_4); + out->files_indexed = sqlite3_column_int(stmt, 5); + out->files_excluded = sqlite3_column_int(stmt, 6); + out->files_failed = sqlite3_column_int(stmt, 7); return CBM_STORE_OK; } return CBM_STORE_NOT_FOUND; @@ -987,7 +1042,8 @@ int cbm_store_get_project(cbm_store_t *s, const char *name, cbm_project_t *out) int cbm_store_list_projects(cbm_store_t *s, cbm_project_t **out, int *count) { sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_list_projects, - "SELECT name, indexed_at, root_path FROM projects ORDER BY name;"); + "SELECT name, indexed_at, root_path, indexed_git_head, files_discovered, " + "files_indexed, files_excluded, files_failed FROM projects ORDER BY name;"); if (!stmt) { return CBM_STORE_ERR; } @@ -995,16 +1051,22 @@ int cbm_store_list_projects(cbm_store_t *s, cbm_project_t **out, int *count) { /* Collect into dynamic array */ int cap = ST_INIT_CAP_8; int n = 0; - cbm_project_t *arr = malloc(cap * sizeof(cbm_project_t)); + cbm_project_t *arr = calloc((size_t)cap, sizeof(cbm_project_t)); while (sqlite3_step(stmt) == SQLITE_ROW) { if (n >= cap) { cap *= ST_GROWTH; arr = safe_realloc(arr, cap * sizeof(cbm_project_t)); + memset(&arr[n], 0, (size_t)(cap - n) * sizeof(cbm_project_t)); } arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); arr[n].indexed_at = heap_strdup((const char *)sqlite3_column_text(stmt, SKIP_ONE)); arr[n].root_path = heap_strdup((const char *)sqlite3_column_text(stmt, CBM_SZ_2)); + arr[n].indexed_git_head = heap_strdup((const char *)sqlite3_column_text(stmt, ST_COL_3)); + arr[n].files_discovered = sqlite3_column_int(stmt, ST_COL_4); + arr[n].files_indexed = sqlite3_column_int(stmt, 5); + arr[n].files_excluded = sqlite3_column_int(stmt, 6); + arr[n].files_failed = sqlite3_column_int(stmt, 7); n++; } @@ -2582,7 +2644,7 @@ static int bfs_collect_edges(cbm_store_t *s, int64_t start_id, const cbm_node_ho char edge_sql[ST_SQL_BUF]; snprintf(edge_sql, sizeof(edge_sql), - "SELECT n1.name, n2.name, e.type " + "SELECT n1.name, n2.name, e.type, e.properties " "FROM edges e " "JOIN nodes n1 ON n1.id = e.source_id " "JOIN nodes n2 ON n2.id = e.target_id " @@ -2618,6 +2680,7 @@ static int bfs_collect_edges(cbm_store_t *s, int64_t start_id, const cbm_node_ho edges[en].from_name = heap_strdup((const char *)sqlite3_column_text(estmt, 0)); edges[en].to_name = heap_strdup((const char *)sqlite3_column_text(estmt, SKIP_ONE)); edges[en].type = heap_strdup((const char *)sqlite3_column_text(estmt, CBM_SZ_2)); + edges[en].properties_json = heap_strdup((const char *)sqlite3_column_text(estmt, ST_COL_3)); edges[en].confidence = (double)SKIP_ONE; en++; } @@ -2771,6 +2834,7 @@ void cbm_store_traverse_free(cbm_traverse_result_t *out) { safe_str_free(&out->edges[i].from_name); safe_str_free(&out->edges[i].to_name); safe_str_free(&out->edges[i].type); + safe_str_free(&out->edges[i].properties_json); } free(out->edges); @@ -5538,6 +5602,7 @@ void cbm_project_free_fields(cbm_project_t *p) { safe_str_free(&p->name); safe_str_free(&p->indexed_at); safe_str_free(&p->root_path); + safe_str_free(&p->indexed_git_head); } void cbm_store_free_projects(cbm_project_t *projects, int count) { diff --git a/src/store/store.h b/src/store/store.h index 26b09a5c2..c8eef9bb5 100644 --- a/src/store/store.h +++ b/src/store/store.h @@ -51,6 +51,11 @@ typedef struct { const char *name; const char *indexed_at; /* ISO 8601 */ const char *root_path; + const char *indexed_git_head; /* NULL/empty when non-git or unavailable */ + int files_discovered; + int files_indexed; + int files_excluded; + int files_failed; } cbm_project_t; typedef struct { @@ -147,6 +152,7 @@ typedef struct { const char *from_name; const char *to_name; const char *type; + const char *properties_json; double confidence; } cbm_edge_info_t; @@ -267,6 +273,8 @@ int cbm_store_dump_to_file(cbm_store_t *s, const char *dest_path); /* ── Project CRUD ───────────────────────────────────────────────── */ int cbm_store_upsert_project(cbm_store_t *s, const char *name, const char *root_path); +int cbm_store_update_project_coverage(cbm_store_t *s, const char *name, int files_discovered, + int files_indexed, int files_excluded, int files_failed); int cbm_store_get_project(cbm_store_t *s, const char *name, cbm_project_t *out); int cbm_store_list_projects(cbm_store_t *s, cbm_project_t **out, int *count); int cbm_store_delete_project(cbm_store_t *s, const char *name); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 152a700cf..5e6708c0f 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -6,9 +6,11 @@ #include "../src/foundation/compat.h" #include "../src/foundation/compat_fs.h" /* cbm_unlink / cbm_rmdir */ #include "test_framework.h" +#include "test_helpers.h" #include #include #include +#include #include #include @@ -436,6 +438,57 @@ static cbm_mcp_server_t *setup_snippet_server(char *tmp_dir, size_t tmp_sz); static void cleanup_snippet_dir(const char *tmp_dir); static char *extract_text_content(const char *mcp_result); +static int run_cmd_quiet(const char *cmd) { + int rc = system(cmd); + return rc == 0 ? 0 : -1; +} + +static int setup_git_repo(char *tmp_dir, size_t tmp_sz) { + snprintf(tmp_dir, tmp_sz, "/tmp/cbm_mcp_git_evidence_XXXXXX"); + if (!cbm_mkdtemp(tmp_dir)) { + return -1; + } + + char file_path[512]; + snprintf(file_path, sizeof(file_path), "%s/main.c", tmp_dir); + FILE *fp = fopen(file_path, "w"); + if (!fp) { + return -1; + } + fputs("int main(void) { return 0; }\n", fp); + fclose(fp); + + char cmd[2048]; + snprintf(cmd, sizeof(cmd), + "git -C \"%s\" init -q && " + "git -C \"%s\" config user.email test@example.com && " + "git -C \"%s\" config user.name Test && " + "git -C \"%s\" add main.c && " + "git -C \"%s\" commit -q -m initial", + tmp_dir, tmp_dir, tmp_dir, tmp_dir, tmp_dir); + return run_cmd_quiet(cmd); +} + +static char *git_head(const char *repo) { + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "git -C \"%s\" rev-parse HEAD 2>/dev/null", repo); + FILE *fp = popen(cmd, "r"); + if (!fp) { + return NULL; + } + char buf[128] = ""; + if (!fgets(buf, sizeof(buf), fp)) { + pclose(fp); + return NULL; + } + pclose(fp); + char *nl = strchr(buf, '\n'); + if (nl) { + *nl = '\0'; + } + return strdup(buf); +} + TEST(tool_search_graph_includes_node_properties) { /* search_graph results must surface each node's properties_json * payload so callers don't have to round-trip through get_code_snippet @@ -520,6 +573,115 @@ TEST(tool_index_status_includes_git_metadata) { PASS(); } +TEST(tool_index_status_evidence_clean_git_repo) { + char tmp[256]; + ASSERT_EQ(setup_git_repo(tmp, sizeof(tmp)), 0); + char *head = git_head(tmp); + ASSERT_NOT_NULL(head); + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + cbm_store_t *st = cbm_mcp_server_store(srv); + ASSERT_NOT_NULL(st); + cbm_mcp_server_set_project(srv, "git-clean"); + ASSERT_EQ(cbm_store_upsert_project(st, "git-clean", tmp), CBM_STORE_OK); + ASSERT_EQ(cbm_store_update_project_coverage(st, "git-clean", 1, 1, 0, 0), CBM_STORE_OK); + + char sql[512]; + snprintf(sql, sizeof(sql), + "UPDATE projects SET indexed_git_head='%s' WHERE name='git-clean';", head); + ASSERT_EQ(cbm_store_exec(st, sql), CBM_STORE_OK); + + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":17,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"index_status\"," + "\"arguments\":{\"project\":\"git-clean\"}}}"); + ASSERT_NOT_NULL(resp); + char *inner = extract_text_content(resp); + ASSERT_NOT_NULL(inner); + ASSERT_NOT_NULL(strstr(inner, "\"evidence\"")); + ASSERT_NOT_NULL(strstr(inner, "\"freshness\":\"current\"")); + ASSERT_NOT_NULL(strstr(inner, "\"repository_state\":\"clean\"")); + ASSERT_NOT_NULL(strstr(inner, "\"snapshot_matches_working_tree\":true")); + ASSERT_NOT_NULL(strstr(inner, "\"coverage_status\":\"complete\"")); + + free(inner); + free(resp); + free(head); + cbm_mcp_server_free(srv); + th_cleanup(tmp); + PASS(); +} + +TEST(tool_index_status_evidence_dirty_git_repo) { + char tmp[256]; + ASSERT_EQ(setup_git_repo(tmp, sizeof(tmp)), 0); + char *head = git_head(tmp); + ASSERT_NOT_NULL(head); + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + cbm_store_t *st = cbm_mcp_server_store(srv); + ASSERT_NOT_NULL(st); + cbm_mcp_server_set_project(srv, "git-dirty"); + ASSERT_EQ(cbm_store_upsert_project(st, "git-dirty", tmp), CBM_STORE_OK); + + char sql[512]; + snprintf(sql, sizeof(sql), + "UPDATE projects SET indexed_git_head='%s' WHERE name='git-dirty';", head); + ASSERT_EQ(cbm_store_exec(st, sql), CBM_STORE_OK); + + char file_path[512]; + snprintf(file_path, sizeof(file_path), "%s/main.c", tmp); + ASSERT_EQ(th_append_file(file_path, "/* changed */\n"), 0); + + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":18,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"index_status\"," + "\"arguments\":{\"project\":\"git-dirty\"}}}"); + ASSERT_NOT_NULL(resp); + char *inner = extract_text_content(resp); + ASSERT_NOT_NULL(inner); + ASSERT_NOT_NULL(strstr(inner, "\"freshness\":\"working_tree_changed\"")); + ASSERT_NOT_NULL(strstr(inner, "\"repository_state\":\"dirty\"")); + ASSERT_NOT_NULL(strstr(inner, "\"snapshot_matches_current_head\":true")); + ASSERT_NOT_NULL(strstr(inner, "\"snapshot_matches_working_tree\":false")); + + free(inner); + free(resp); + free(head); + cbm_mcp_server_free(srv); + th_cleanup(tmp); + PASS(); +} + +TEST(tool_trace_path_includes_edge_evidence) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *resp = cbm_mcp_server_handle( + srv, "{\"jsonrpc\":\"2.0\",\"id\":19,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"trace_path\"," + "\"arguments\":{\"project\":\"test-project\"," + "\"function_name\":\"HandleRequest\",\"direction\":\"outbound\"," + "\"depth\":1}}}"); + ASSERT_NOT_NULL(resp); + char *inner = extract_text_content(resp); + ASSERT_NOT_NULL(inner); + ASSERT_NOT_NULL(strstr(inner, "\"edge_evidence\"")); + ASSERT_NOT_NULL(strstr(inner, "\"resolution_strategy\":\"hybrid_lsp\"")); + ASSERT_NOT_NULL(strstr(inner, "\"confidence\":0.95")); + ASSERT_NOT_NULL(strstr(inner, "\"candidate_count\":1")); + ASSERT_NOT_NULL(strstr(inner, "\"evidence_status\":\"verified\"")); + + free(inner); + free(resp); + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * TOOL HANDLERS WITH DATA * ══════════════════════════════════════════════════════════════════ */ @@ -1127,7 +1289,6 @@ TEST(parse_file_uri_invalid) { * SNIPPET TESTS — Port of internal/tools/snippet_test.go * ══════════════════════════════════════════════════════════════════ */ -#include #include #include @@ -1227,7 +1388,12 @@ static cbm_mcp_server_t *setup_snippet_server(char *tmp_dir, size_t tmp_sz) { cbm_store_upsert_node(st, &n_run2); /* Create edges: HandleRequest -> ProcessOrder, HandleRequest -> Run1 */ - cbm_edge_t e1 = {.project = proj_name, .source_id = id_hr, .target_id = id_po, .type = "CALLS"}; + cbm_edge_t e1 = {.project = proj_name, + .source_id = id_hr, + .target_id = id_po, + .type = "CALLS", + .properties_json = + "{\"strategy\":\"lsp_exact\",\"confidence\":0.95,\"candidates\":1}"}; cbm_store_insert_edge(st, &e1); cbm_edge_t e2 = { @@ -2065,6 +2231,9 @@ SUITE(mcp) { RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); RUN_TEST(tool_index_status_includes_git_metadata); + RUN_TEST(tool_index_status_evidence_clean_git_repo); + RUN_TEST(tool_index_status_evidence_dirty_git_repo); + RUN_TEST(tool_trace_path_includes_edge_evidence); /* Tool handlers with validation */ RUN_TEST(tool_trace_call_path_not_found); diff --git a/tests/test_store_nodes.c b/tests/test_store_nodes.c index 0d84f4c78..2b5b5ca9d 100644 --- a/tests/test_store_nodes.c +++ b/tests/test_store_nodes.c @@ -10,6 +10,7 @@ #include #include #include +#include /* ── Schema / Open / Close ──────────────────────────────────────── */ @@ -111,6 +112,45 @@ TEST(store_project_delete) { PASS(); } +TEST(store_project_migrates_legacy_index_metadata_defaults) { + char path[256]; + snprintf(path, sizeof(path), "/tmp/cbm_store_legacy_project_XXXXXX"); + int fd = mkstemp(path); + ASSERT_TRUE(fd >= 0); + close(fd); + + sqlite3 *db = NULL; + ASSERT_EQ(sqlite3_open(path, &db), SQLITE_OK); + ASSERT_EQ(sqlite3_exec(db, + "CREATE TABLE projects (" + "name TEXT PRIMARY KEY," + "indexed_at TEXT NOT NULL," + "root_path TEXT NOT NULL);" + "INSERT INTO projects(name, indexed_at, root_path) " + "VALUES('legacy', '2024-01-01T00:00:00Z', '/tmp/legacy');", + NULL, NULL, NULL), + SQLITE_OK); + sqlite3_close(db); + + cbm_store_t *s = cbm_store_open_path(path); + ASSERT_NOT_NULL(s); + + cbm_project_t p = {0}; + ASSERT_EQ(cbm_store_get_project(s, "legacy", &p), CBM_STORE_OK); + ASSERT_STR_EQ(p.name, "legacy"); + ASSERT_STR_EQ(p.root_path, "/tmp/legacy"); + ASSERT_TRUE(p.indexed_git_head == NULL || p.indexed_git_head[0] == '\0'); + ASSERT_EQ(p.files_discovered, 0); + ASSERT_EQ(p.files_indexed, 0); + ASSERT_EQ(p.files_excluded, 0); + ASSERT_EQ(p.files_failed, 0); + + cbm_project_free_fields(&p); + cbm_store_close(s); + unlink(path); + PASS(); +} + /* ── Node CRUD ──────────────────────────────────────────────────── */ TEST(store_node_crud) { @@ -1550,6 +1590,7 @@ SUITE(store_nodes) { RUN_TEST(store_project_crud); RUN_TEST(store_project_update); RUN_TEST(store_project_delete); + RUN_TEST(store_project_migrates_legacy_index_metadata_defaults); RUN_TEST(store_node_crud); RUN_TEST(store_node_dedup); RUN_TEST(store_node_find_by_label);