diff --git a/internal/cbm/extract_defs.c b/internal/cbm/extract_defs.c index bfff34fb..55e1fcc8 100644 --- a/internal/cbm/extract_defs.c +++ b/internal/cbm/extract_defs.c @@ -256,12 +256,31 @@ static TSNode resolve_ocaml_func_name(TSNode node) { return null_node; } -// SQL: resolve create_function name from object_reference→identifier or direct identifier. +// Last identifier (DFS pre-order) under `node`. For a schema-qualified +// object_reference (schema.table) this is the table name; the schema prefix is +// ignored. Leaves *found false and returns `best` unchanged if none is present. +static TSNode sql_last_identifier(TSNode node, TSNode best, bool *found) { + if (strcmp(ts_node_type(node), "identifier") == 0) { + best = node; + *found = true; + } + uint32_t cc = ts_node_child_count(node); + for (uint32_t i = 0; i < cc; i++) { + best = sql_last_identifier(ts_node_child(node, i), best, found); + } + return best; +} + +// SQL: resolve create_function / create_table / create_view name. The name sits +// on an object_reference; for a schema-qualified name (schema.table) take the +// last identifier (the table), not the first (the schema). static TSNode resolve_sql_func_name(TSNode node) { TSNode obj_ref = cbm_find_child_by_kind(node, "object_reference"); if (!ts_node_is_null(obj_ref)) { - TSNode id = cbm_find_child_by_kind(obj_ref, "identifier"); - if (!ts_node_is_null(id)) { + bool found = false; + TSNode empty = {0}; + TSNode id = sql_last_identifier(obj_ref, empty, &found); + if (found) { return id; } } @@ -3020,6 +3039,62 @@ static bool extract_config_class_def(CBMExtractCtx *ctx, TSNode node, const char return true; } +// Collect FROM/JOIN table references (tree-sitter-sql `relation` nodes) anywhere +// under `node` and emit them as usages scoped to enclosing_qn. pass_usages then +// resolves each ref_name to the referenced Table/View def and creates a USAGE +// lineage edge (e.g. a view -> the tables it selects from). Emitting them here +// (rather than via the generic identifier walker) sets the correct enclosing +// scope and bypasses the is_definition_name suppression that drops them. +static void collect_sql_relation_usages(CBMExtractCtx *ctx, TSNode node, const char *enclosing_qn) { + if (strcmp(ts_node_type(node), "relation") == 0) { + TSNode nm = resolve_sql_func_name(node); // object_reference -> identifier + if (!ts_node_is_null(nm)) { + char *tname = cbm_node_text(ctx->arena, nm, ctx->source); + if (tname && tname[0]) { + CBMUsage usage; + usage.ref_name = tname; + usage.enclosing_func_qn = enclosing_qn; + cbm_usages_push(&ctx->result->usages, ctx->arena, usage); + } + } + } + uint32_t n = ts_node_child_count(node); + for (uint32_t i = 0; i < n; i++) { + collect_sql_relation_usages(ctx, ts_node_child(node, i), enclosing_qn); + } +} + +// Handle SQL DDL relation defs: CREATE TABLE / VIEW / MATERIALIZED VIEW become +// first-class Table/View nodes rather than generic Variable nodes. The relation +// name sits on an object_reference child (the same shape create_function uses), +// so resolve_sql_func_name locates it. Also emits FROM/JOIN dependencies as +// usages so lineage edges form. Returns true if handled. +static bool extract_sql_ddl_class_def(CBMExtractCtx *ctx, TSNode node, const char *kind) { + if (ctx->language != CBM_LANG_SQL) { + return false; + } + const char *label; + if (strcmp(kind, "create_table") == 0) { + label = "Table"; + } else if (strcmp(kind, "create_view") == 0 || strcmp(kind, "create_materialized_view") == 0) { + label = "View"; + } else { + return false; + } + TSNode name_node = resolve_sql_func_name(node); + if (ts_node_is_null(name_node)) { + return false; + } + char *name = cbm_node_text(ctx->arena, name_node, ctx->source); + if (!name || !name[0]) { + return false; + } + push_simple_class_def(ctx, node, name, label); + const char *qn = cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + collect_sql_relation_usages(ctx, node, qn); + return true; +} + static void extract_class_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec) { CBMArena *a = ctx->arena; const char *kind = ts_node_type(node); @@ -3027,6 +3102,9 @@ static void extract_class_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec if (extract_config_class_def(ctx, node, kind)) { return; } + if (extract_sql_ddl_class_def(ctx, node, kind)) { + return; + } TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name")); // ObjC: class name is first identifier child diff --git a/internal/cbm/lang_specs.c b/internal/cbm/lang_specs.c index 26d25b3d..8492ef29 100644 --- a/internal/cbm/lang_specs.c +++ b/internal/cbm/lang_specs.c @@ -658,13 +658,14 @@ static const char *hcl_call_types[] = {"function_call", NULL}; static const char *hcl_var_types[] = {"attribute", NULL}; // ==================== SQL ==================== -static const char *sql_func_types[] = {"create_function", "function_declaration", NULL}; +static const char *sql_func_types[] = {"create_function", "function_declaration", + "create_procedure", NULL}; static const char *sql_field_types[] = {"column_definition", NULL}; -static const char *sql_class_types[] = {"custom_type", NULL}; +static const char *sql_class_types[] = {"custom_type", "create_table", "create_view", + "create_materialized_view", NULL}; static const char *sql_module_types[] = {"program", NULL}; static const char *sql_call_types[] = {"function_call", "invocation", "command", NULL}; static const char *sql_branch_types[] = {"if_statement", "case_expression", NULL}; -static const char *sql_var_types[] = {"create_table", "create_view", NULL}; // ==================== DOCKERFILE ==================== static const char *dockerfile_module_types[] = {"source_file", NULL}; @@ -1779,7 +1780,7 @@ static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = { // CBM_LANG_SQL [CBM_LANG_SQL] = {CBM_LANG_SQL, sql_func_types, sql_class_types, sql_field_types, sql_module_types, sql_call_types, empty_types, empty_types, sql_branch_types, - sql_var_types, empty_types, empty_types, NULL, empty_types, NULL, NULL, + empty_types, empty_types, empty_types, NULL, empty_types, NULL, NULL, tree_sitter_sql, NULL}, // CBM_LANG_DOCKERFILE diff --git a/src/pipeline/pass_definitions.c b/src/pipeline/pass_definitions.c index 676f1b16..df42d5f2 100644 --- a/src/pipeline/pass_definitions.c +++ b/src/pipeline/pass_definitions.c @@ -303,7 +303,8 @@ static void process_def(cbm_pipeline_ctx_t *ctx, const CBMDefinition *def, const if (node_id > 0 && def->label && (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0 || strcmp(def->label, "Class") == 0 || strcmp(def->label, "Interface") == 0 || - strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0)) { + strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0 || + strcmp(def->label, "Table") == 0 || strcmp(def->label, "View") == 0)) { cbm_registry_add(ctx->registry, def->name, def->qualified_name, def->label); } char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel, "__file__"); diff --git a/src/pipeline/pass_parallel.c b/src/pipeline/pass_parallel.c index 0471cbe0..e5b88027 100644 --- a/src/pipeline/pass_parallel.c +++ b/src/pipeline/pass_parallel.c @@ -826,7 +826,8 @@ static int register_and_link_def(cbm_pipeline_ctx_t *ctx, const CBMDefinition *d * Variable/Field defs are registered too so READS/WRITES can resolve. */ if (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0 || strcmp(def->label, "Class") == 0 || strcmp(def->label, "Interface") == 0 || - strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0) { + strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0 || + strcmp(def->label, "Table") == 0 || strcmp(def->label, "View") == 0) { cbm_registry_add(ctx->registry, def->name, def->qualified_name, def->label); (*reg_entries)++; } diff --git a/tests/test_extraction.c b/tests/test_extraction.c index d06b2a50..0ded025a 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -1408,6 +1408,58 @@ TEST(sql_function) { PASS(); } +TEST(sql_ddl_node_labels) { + CBMFileResult *r = extract("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);\n" + "CREATE VIEW active_users AS SELECT * FROM users;\n", + CBM_LANG_SQL, "t", "schema.sql"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Table", "users")); + ASSERT(has_def(r, "View", "active_users")); + cbm_free_result(r); + PASS(); +} + +TEST(sql_view_lineage_usages) { + /* A view's FROM/JOIN relations are emitted as usages (ref_name = table), + * which pass_usages later resolves into view -> table USAGE lineage edges. */ + CBMFileResult *r = extract("CREATE TABLE users (id INTEGER);\n" + "CREATE VIEW active_users AS SELECT * FROM users;\n", + CBM_LANG_SQL, "t", "schema.sql"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + int found_users = 0; + for (int i = 0; i < r->usages.count; i++) { + if (r->usages.items[i].ref_name && strcmp(r->usages.items[i].ref_name, "users") == 0) { + found_users = 1; + } + } + ASSERT(found_users); + cbm_free_result(r); + PASS(); +} + +TEST(sql_schema_qualified_name) { + /* schema-qualified DDL (schema.table) is named by the table, not the schema, + * and FROM schema.table resolves to that table for lineage. */ + CBMFileResult *r = extract("CREATE TABLE app.users (id INTEGER);\n" + "CREATE VIEW app.active AS SELECT * FROM app.users;\n", + CBM_LANG_SQL, "t", "schema.sql"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Table", "users")); + ASSERT(has_def(r, "View", "active")); + int found_users = 0; + for (int i = 0; i < r->usages.count; i++) { + if (r->usages.items[i].ref_name && strcmp(r->usages.items[i].ref_name, "users") == 0) { + found_users = 1; + } + } + ASSERT(found_users); + cbm_free_result(r); + PASS(); +} + /* --- Meson project --- */ TEST(meson_project) { CBMFileResult *r = extract( @@ -3062,6 +3114,9 @@ SUITE(extraction) { /* Config/Markup */ RUN_TEST(html_elements); RUN_TEST(sql_function); + RUN_TEST(sql_ddl_node_labels); + RUN_TEST(sql_view_lineage_usages); + RUN_TEST(sql_schema_qualified_name); RUN_TEST(meson_project); RUN_TEST(css_rules); RUN_TEST(scss_rules); diff --git a/tests/test_grammar_labels.c b/tests/test_grammar_labels.c index 121fc01c..0a516663 100644 --- a/tests/test_grammar_labels.c +++ b/tests/test_grammar_labels.c @@ -183,7 +183,7 @@ static const LabelGolden LABEL_GOLDENS[] = { {"toml", "Class:1,Module:1,Variable:1"}, {"ini", "Class:1,Module:1,Variable:1"}, {"csv", "Module:1"}, - {"sql", "Module:1,Variable:1"}, + {"sql", "Module:1,Table:1"}, {"xml", "Class:2,Module:1"}, {"html", "Module:1"}, {"css", "Module:1"}, diff --git a/tests/test_grammar_probe_g.c b/tests/test_grammar_probe_g.c index 185ca4ba..5e8592f6 100644 --- a/tests/test_grammar_probe_g.c +++ b/tests/test_grammar_probe_g.c @@ -157,6 +157,7 @@ typedef struct { int modules; int classes; int variables; + int tables; int sections; int imports; /* IMPORTS edges */ int depends; /* DEPENDS_ON edges */ @@ -172,6 +173,7 @@ static GpgMetrics gpg_metrics_files(const GpgFile *files, int nfiles) { m.modules = gpg_count_label(store, lp.project, "Module"); m.classes = gpg_count_label(store, lp.project, "Class"); m.variables = gpg_count_label(store, lp.project, "Variable"); + m.tables = gpg_count_label(store, lp.project, "Table"); m.sections = gpg_count_label(store, lp.project, "Section"); m.imports = cbm_store_count_edges_by_type(store, lp.project, "IMPORTS"); m.depends = cbm_store_count_edges_by_type(store, lp.project, "DEPENDS_ON"); @@ -605,12 +607,12 @@ TEST(probe_csv_module_only) { /* ══════════════════════════════════════════════════════════════════ * GROUP 13 — SQL (.sql) * - * SQL golden histogram: Module:1, Variable:1 - * Table references (e.g. CREATE TABLE / SELECT FROM) produce Variable nodes. + * SQL golden histogram: Module:1, Table:1 + * CREATE TABLE / CREATE VIEW produce first-class Table / View nodes. * ══════════════════════════════════════════════════════════════════ */ -/* SQL: CREATE TABLE + SELECT → at least 1 Variable node. */ -TEST(probe_sql_variable_node) { +/* SQL: CREATE TABLE → a first-class Table node. */ +TEST(probe_sql_table_node) { GpgMetrics m = gpg_metrics("schema.sql", "CREATE TABLE users (\n" " id INTEGER PRIMARY KEY,\n" " name TEXT NOT NULL\n" @@ -618,8 +620,8 @@ TEST(probe_sql_variable_node) { "\n" "SELECT id, name FROM users WHERE id = 1;\n"); ASSERT_TRUE(m.ok); - /* GREEN: SQL table reference produces at least 1 Variable node. */ - ASSERT_TRUE(m.variables >= 1); + /* GREEN: CREATE TABLE produces a first-class Table node (was Variable). */ + ASSERT_TRUE(m.tables >= 1); ASSERT_TRUE(m.modules >= 1); PASS(); } @@ -1082,7 +1084,7 @@ SUITE(grammar_probe_g) { RUN_TEST(probe_csv_module_only); /* SQL */ - RUN_TEST(probe_sql_variable_node); + RUN_TEST(probe_sql_table_node); RUN_TEST(probe_sql_insert_select); /* SOQL */