diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 15d691d3..828bc88b 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -364,6 +364,24 @@ static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, cbm_resolution_t res = cbm_registry_resolve(ctx->registry, call->callee_name, module_qn, imp_keys, imp_vals, imp_count); if (!res.qualified_name || res.qualified_name[0] == '\0') { + /* External HTTP/async client (requests, axios, ...) that resolves to no + * QN because its library is not indexed. Classify from the raw callee + * name and emit a service edge against the source node so cross-repo + * matching has an HTTP_CALLS edge to work with. Issue #523. */ + cbm_svc_kind_t ext_svc = cbm_service_pattern_match(call->callee_name); + if ((ext_svc == CBM_SVC_HTTP || ext_svc == CBM_SVC_ASYNC) && call->first_string_arg && + call->first_string_arg[0] != '\0') { + cbm_resolution_t ext = {0}; + ext.qualified_name = call->callee_name; + ext.strategy = "external_service"; + /* source_node is passed as both source and target intentionally: the + * external client (requests/axios) has no node in the graph, so there + * is no distinct target to point at. emit_http_async_edge only uses + * the target for the non-URL CALLS fallback; for a real URL/topic it + * creates a Route node and links source -> Route, so the duplicated + * source arg is never dereferenced as a separate endpoint here. #523 */ + emit_http_async_edge(ctx, call, source_node, source_node, &ext, ext_svc); + } return 0; } const cbm_gbuf_node_t *target_node = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); diff --git a/src/pipeline/pass_cross_repo.c b/src/pipeline/pass_cross_repo.c index 07f5ca7e..e3f2923e 100644 --- a/src/pipeline/pass_cross_repo.c +++ b/src/pipeline/pass_cross_repo.c @@ -83,6 +83,51 @@ static const char *json_str_prop(const char *json, const char *key, char *buf, s return buf; } +/* Strip scheme and authority from a URL, returning a pointer into the + * original string at the start of the path component. + * "/v2/orders/123" -> "/v2/orders/123" (already a path) */ +static const char *cr_url_path(const char *url) { + if (!url) + return url; + const char *scheme_end = strstr(url, "://"); + if (!scheme_end) + return url; + const char *path_start = strchr(scheme_end + 3, '/'); + return path_start ? path_start : url; +} + +/* Return 1 if concrete path matches a route template. + * A template segment wrapped in '{' '}' matches any concrete segment. + * "/v2/orders/123" matches "/v2/orders/{id}" -> 1 + * "/v2/orders/123" matches "/v2/orders/456" -> 0 */ +static int cr_path_matches_template(const char *concrete, const char *tmpl) { + if (!concrete || !tmpl) + return 0; + const char *c = concrete; + const char *t = tmpl; + while (*c || *t) { + while (*c == '/') + c++; + while (*t == '/') + t++; + if (!*c && !*t) + return 1; + if (!*c || !*t) + return 0; + const char *ce = strchr(c, '/'); + const char *te = strchr(t, '/'); + size_t clen = ce ? (size_t)(ce - c) : strlen(c); + size_t tlen = te ? (size_t)(te - t) : strlen(t); + int is_param = (tlen >= 2 && t[0] == '{' && t[tlen - 1] == '}'); + if (!is_param && (clen != tlen || strncmp(c, t, clen) != 0)) { + return 0; + } + c += clen; + t += tlen; + } + return 1; +} + /* Build CROSS_* edge properties JSON. */ static void build_cross_props(char *buf, size_t bufsz, const char *target_project, const char *target_function, const char *target_file, @@ -114,9 +159,26 @@ static void delete_cross_edges(cbm_store_t *store, const char *project) { cbm_store_delete_edges_by_type(store, project, "CROSS_TRPC_CALLS"); } -/* Insert a CROSS_* edge into a store. */ +/* Insert a CROSS_* edge into a store, skipping if an identical + * (source_id, target_id, type) edge already exists. The cross-repo pass can + * reach the same caller/route pair from both directions (the entry point + * matches src->tgt and tgt->src, and emit_cross_route_bidirectional writes + * both DBs), so this guard keeps emission idempotent and prevents duplicate + * CROSS_* edges from inflating the counts. Issue #523. */ static void insert_cross_edge(cbm_store_t *store, const char *project, int64_t from_id, int64_t to_id, const char *edge_type, const char *props) { + cbm_edge_t *existing = NULL; + int existing_count = 0; + if (cbm_store_find_edges_by_source_type(store, from_id, edge_type, &existing, + &existing_count) == 0) { + for (int i = 0; i < existing_count; i++) { + if (existing[i].target_id == to_id) { + cbm_store_free_edges(existing, existing_count); + return; /* already present - do not duplicate */ + } + } + cbm_store_free_edges(existing, existing_count); + } cbm_edge_t edge = { .project = project, .source_id = from_id, @@ -207,6 +269,87 @@ static int64_t find_route_handler(cbm_store_t *target_store, const char *route_q return handler_id; } +/* Fuzzy route lookup: scan all Route nodes in target and return the first + * whose path template matches the concrete norm_path from the consumer. + * Fills route_qn_out with the matched QN (for edge emission). */ +static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, const char *norm_path, + const char *method, char *route_qn_out, size_t route_qn_sz, + char *handler_name, size_t name_sz, char *handler_file, + size_t file_sz) { + handler_name[0] = '\0'; + handler_file[0] = '\0'; + struct sqlite3 *db = cbm_store_get_db(target_store); + if (!db) + return 0; + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(db, "SELECT id, qualified_name, name FROM nodes WHERE label = 'Route'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + + int64_t matched_route_id = 0; + while (sqlite3_step(s) == SQLITE_ROW) { + int64_t rid = sqlite3_column_int64(s, 0); + const char *qn = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *name = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (!qn || !name) + continue; + + /* QN format: __route__METHOD__/path */ + const char *after_prefix = strstr(qn, "__route__"); + if (!after_prefix) + continue; + after_prefix += 9; /* skip "__route__" */ + const char *second_sep = strstr(after_prefix, "__"); + if (!second_sep) + continue; + const char *tmpl_path = second_sep + 2; + + /* method filter: skip mismatched methods, always accept ANY */ + if (method) { + size_t mlen = (size_t)(second_sep - after_prefix); + char qn_method[CBM_SZ_32] = {0}; + if (mlen >= sizeof(qn_method)) + mlen = sizeof(qn_method) - 1; + strncpy(qn_method, after_prefix, mlen); + if (strcmp(qn_method, "ANY") != 0 && strcmp(qn_method, method) != 0) + continue; + } + + if (cr_path_matches_template(norm_path, tmpl_path)) { + matched_route_id = rid; + snprintf(route_qn_out, route_qn_sz, "%s", qn); + break; + } + } + sqlite3_finalize(s); + if (matched_route_id == 0) + return 0; + + /* Follow HANDLES edge to find the handler function */ + if (sqlite3_prepare_v2(db, + "SELECT n.id, n.name, n.file_path FROM edges e " + "JOIN nodes n ON n.id = e.source_id " + "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_int64(s, SKIP_ONE, matched_route_id); + int64_t handler_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + handler_id = sqlite3_column_int64(s, 0); + const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (n) + snprintf(handler_name, name_sz, "%s", n); + if (f) + snprintf(handler_file, file_sz, "%s", f); + } + sqlite3_finalize(s); + return handler_id; +} + /* Emit CROSS_* edge for a route match: forward into source, reverse into target. */ static void emit_cross_route_bidirectional(cbm_store_t *src_store, const char *src_project, struct sqlite3 *src_db, int64_t caller_id, @@ -283,10 +426,14 @@ static int match_http_routes(cbm_store_t *src_store, const char *src_project, continue; } + /* Normalise: strip scheme+host+port so a full URL consumer-side path + * ("/v2/orders/{id}"). Issue #523. */ + const char *norm_path = cr_url_path(url_path); + /* Build the expected Route QN in the target project */ char route_qn[CR_QN_BUF]; snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method[0] ? method : "ANY", - url_path); + norm_path); char handler_name[CBM_SZ_256] = {0}; char handler_file[CBM_SZ_512] = {0}; @@ -295,10 +442,17 @@ static int match_http_routes(cbm_store_t *src_store, const char *src_project, handler_file, sizeof(handler_file)); if (handler_id == 0) { /* Try without method (ANY) */ - snprintf(route_qn, sizeof(route_qn), "__route__ANY__%s", url_path); + snprintf(route_qn, sizeof(route_qn), "__route__ANY__%s", norm_path); handler_id = find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), handler_file, sizeof(handler_file)); } + if (handler_id == 0) { + /* Exact QN lookup missed — try path-param template matching. + * Handles concrete vs template mismatch ("123" vs "{id}"). Issue #523. */ + handler_id = find_route_handler_fuzzy( + tgt_store, norm_path, method[0] ? method : NULL, route_qn, sizeof(route_qn), + handler_name, sizeof(handler_name), handler_file, sizeof(handler_file)); + } if (handler_id == 0) { continue; } @@ -651,6 +805,7 @@ cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **t } result.http_edges += match_http_routes(src_store, project, tgt_store, tgt); + result.http_edges += match_http_routes(tgt_store, tgt, src_store, project); result.async_edges += match_async_routes(src_store, project, tgt_store, tgt); result.channel_edges += match_channels(src_store, project, tgt_store, tgt); result.grpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "GRPC_CALLS",