From 432f66ecb6b1daf36691aec254b3181af4be87f8 Mon Sep 17 00:00:00 2001 From: Lucas Machado Date: Wed, 27 May 2026 23:03:13 +0200 Subject: [PATCH] feat: support bounded self-referential seeding --- EXAMPLES.md | 11 +- Makefile | 13 +- README.md | 6 +- docs/commands.md | 26 +++- docs/development.md | 11 +- init/init.mysql.sql | 11 +- init/init.sql | 10 +- integration/integration_test.go | 98 +++++++++---- integration/schema_mysql.sql | 9 ++ integration/schema_postgres.sql | 8 ++ internal/cli/gaps.go | 20 ++- internal/cli/generate.go | 20 ++- internal/cli/seed.go | 22 ++- internal/cli/seed_test.go | 157 +++++++++++++++++++++ internal/cli/table_rows.go | 40 ++++++ internal/faker/faker.go | 126 ++++++++++++++++- internal/faker/faker_test.go | 130 ++++++++++++++++- internal/graph/graph.go | 35 ++++- internal/graph/graph_test.go | 43 ++++++ internal/tui/config.go | 14 +- internal/tui/cycle_test.go | 12 ++ internal/tui/execute.go | 8 +- internal/tui/gaps.go | 31 ++-- internal/tui/gaps_test.go | 21 +++ internal/tui/generate.go | 81 +++++++---- internal/tui/generate_test.go | 14 ++ internal/tui/tui.go | 42 +++--- internal/tui/wizard_test.go | 21 +++ internal/web/handlers_api_test.go | 18 +++ internal/web/runners.go | 60 +++++--- internal/web/runners_test.go | 80 +++++++++++ internal/web/static/app.js | 1 + internal/web/templates/workspace.html.tmpl | 4 + 33 files changed, 1051 insertions(+), 152 deletions(-) create mode 100644 internal/cli/seed_test.go create mode 100644 internal/cli/table_rows.go create mode 100644 internal/tui/cycle_test.go diff --git a/EXAMPLES.md b/EXAMPLES.md index 2eb85f4..d38e199 100644 --- a/EXAMPLES.md +++ b/EXAMPLES.md @@ -19,7 +19,7 @@ End-to-end walkthroughs for common seedstorm workflows. All examples assume loca ## 1. Basic Seeding (no AI)
-Demo — introspect + seed a 28-table schema +Demo — introspect + seed a 29-table schema seedstorm introspect + seed @@ -55,7 +55,7 @@ seedstorm seed \ 14:12:15 INFO Seeding table table=users rows=150 14:12:15 INFO Seeding table table=companies rows=50 ... -14:12:16 INFO Seeding complete tables=28 total_rows=1515 duration=316ms +14:12:16 INFO Seeding complete tables=29 total_rows=1540 duration=316ms ```
@@ -398,7 +398,7 @@ Select which tables to seed. Tables are shown in FK-safe order with their depend [ ] employees → departments [✓] wishlists → users - 7 of 28 tables selected + 7 of 29 tables selected ↑/↓ navigate • space toggle • a all • n none • enter confirm • q quit ``` @@ -426,6 +426,7 @@ Set seeding parameters. Tab between fields, space to toggle the truncate checkbo ▸ Rows per table: [50] Batch size: [100] Enum rows (0 = use rows): [0] + Self-ref depth: [2] [ ] Truncate before seeding tab/↑↓ navigate • space toggle • enter confirm • b back • q quit @@ -436,6 +437,7 @@ Set seeding parameters. Tab between fields, space to toggle the truncate checkbo | Rows per table | How many rows to generate for each selected table | | Batch size | Rows per INSERT statement (higher = faster, default 100) | | Enum rows | Rows per enum value for enum tables (0 = use rows count) | +| Self-ref depth | Maximum generated depth for self-referential FK chains | | Truncate | Delete all existing data before seeding (shows warning in review) | @@ -598,7 +600,7 @@ A 3-step wizard: **Tables → Config → Generate** **Step 1 — Table picker:** Same as `seed -i` — select which tables to include. -**Step 2 — Config:** Set rows, choose format (yaml/json/sql with `←`/`→`), and optionally set an output file path. +**Step 2 — Config:** Set rows, self-reference depth, choose format (yaml/json/sql with `←`/`→`), and optionally set an output file path. ``` seedstorm generate interactive ✓ Tables ● Config ○ Generate @@ -606,6 +608,7 @@ A 3-step wizard: **Tables → Config → Generate** Configure generation ▸ Rows per table: [10] + Self-ref depth: [2] Format: [yaml] json sql Output file: [data.json] diff --git a/Makefile b/Makefile index a4f03ef..74c4a73 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,18 @@ test: test-integration: dev-up @echo "Waiting for databases to be healthy..." - @docker compose wait mysql postgres 2>/dev/null || sleep 10 + @for i in $$(seq 1 60); do \ + pg=$$(docker inspect -f '{{.State.Health.Status}}' seedstorm-postgres-1 2>/dev/null || true); \ + my=$$(docker inspect -f '{{.State.Health.Status}}' seedstorm-mysql-1 2>/dev/null || true); \ + if [ "$$pg" = "healthy" ] && [ "$$my" = "healthy" ]; then \ + break; \ + fi; \ + if [ "$$i" = "60" ]; then \ + docker compose ps; \ + exit 1; \ + fi; \ + sleep 1; \ + done cd integration && go test -v -tags integration -count=1 ./... -timeout 300s lint: diff --git a/README.md b/README.md index 6da69b9..8fd0871 100644 --- a/README.md +++ b/README.md @@ -67,14 +67,14 @@ seedstorm gaps \ ## Features - **Schema self-discovery** — introspects tables, columns, PKs, FKs, enum values, UNIQUE and CHECK constraints; no manual editing required -- **FK-aware seeding** — topological sort guarantees parent tables are seeded before children; handles self-referential FKs, near-cycles, junction tables, and deep multi-level chains +- **FK-aware seeding** — topological sort guarantees parent tables are seeded before children; handles nullable and non-nullable self-referential FKs with bounded depth, near-cycles, junction tables, and deep multi-level chains - **Constraint-aware faker mapping** — UNIQUE → `uuid`, CHECK IN → `randomstring(a,b,c)`, CHECK range → `number(min,max)`; seed data always satisfies your constraints - **Semantic faker** — maps column names (`email`, `first_name`, `price`, `city`…) to realistic `gofakeit` generators automatically - **Enum coverage** — every enum value appears at least `--rows` times, independently per column - **AI enrichment** — Gemini rewrites faker hints for domain-meaningful data; supply `--prompt` for richer context - **Gap analysis** — `gaps` shows which tables are empty with row counts and FK context; `--fill` seeds only the empty ones -- **Interactive TUI** — wizard for table selection, global config, per-table row volumes, and review before seeding -- **Web UI** — `seedstorm serve` exposes an interactive graph workspace with click-to-select tables, per-table row overrides, live SSE job logs, multi-DB session switcher, and connection presets in `localStorage` +- **Interactive TUI** — wizard for table selection, global config, self-reference depth, per-table row volumes, and review before seeding +- **Web UI** — `seedstorm serve` exposes an interactive graph workspace with click-to-select tables, self-reference depth, per-table row overrides, live SSE job logs, multi-DB session switcher, and connection presets in `localStorage` - **Dry-run** — preview the seed plan and INSERT SQL without touching the database - **Export** — generate fake data as YAML, JSON, or SQL without a live connection diff --git a/docs/commands.md b/docs/commands.md index e936ad5..99c2240 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -108,6 +108,22 @@ seedstorm seed \ --schema schema.yaml \ --enum-rows 10 +# Bound generated self-referential chains to 2 levels +seedstorm seed \ + --db postgres \ + --dsn "postgres://..." \ + --schema schema.yaml \ + --self-ref-depth 2 + +# Override specific table volumes from a scripted run +seedstorm seed \ + --db postgres \ + --dsn "postgres://..." \ + --schema schema.yaml \ + --rows 20 \ + --table-rows users=200,orders=500 \ + --table-rows order_items=1000 + # Interactive TUI — pick tables, configure options, review, then seed seedstorm seed \ --db postgres \ @@ -126,7 +142,9 @@ The interactive TUI includes a **Volumes** step after global config. Each select | `--db` / `$SEEDSTORM_DB` | `postgres` | Database type | | `--dsn` / `$SEEDSTORM_DSN` | — | Connection string (required) | | `--rows` / `-r` | `100` | Rows per table | +| `--table-rows` | — | Per-table row override, repeatable or comma-separated (`table=rows`) | | `--enum-rows` | `0` | Rows per enum value (0 = use `--rows`) | +| `--self-ref-depth` | `2` | Maximum generated depth for self-referential FK chains | | `--disable-fk` | false | Skip FK ordering | | `--dry-run` / `-n` | false | Print seed plan + SQL, do not execute | | `--truncate` | false | Truncate all tables before seeding (prompts for confirmation) | @@ -196,7 +214,9 @@ Gap Analysis | `--db` / `$SEEDSTORM_DB` | `postgres` | Database type | | `--dsn` / `$SEEDSTORM_DSN` | — | Connection string (required) | | `--rows` / `-r` | `100` | Rows per empty table (when `--fill` is set) | +| `--table-rows` | — | Per-table row override for fill, repeatable or comma-separated (`table=rows`) | | `--enum-rows` | `0` | Rows per enum value for empty enum tables (0 = use `--rows`) | +| `--self-ref-depth` | `2` | Maximum generated depth for self-referential FK chains | | `--fill` | false | Seed all empty tables | | `--dry-run` / `-n` | false | Print SQL without executing (requires `--fill`) | | `--yes` / `-y` | false | Skip confirmation prompt | @@ -213,6 +233,8 @@ Generates fake data without connecting to a database. Outputs YAML, JSON, or SQL seedstorm generate --schema schema.yaml --rows 10 --format json --out data.json seedstorm generate --schema schema.yaml --rows 5 --format sql --db postgres seedstorm generate --schema schema.yaml --rows 20 --format yaml +seedstorm generate --schema schema.yaml --rows 20 --self-ref-depth 3 +seedstorm generate --schema schema.yaml --rows 20 --table-rows users=200,orders=500 # Interactive TUI seedstorm generate --schema schema.yaml --interactive @@ -226,6 +248,8 @@ In interactive mode, the **Volumes** step can override row counts per selected t |------|---------|-------------| | `--schema` / `-s` | `schema.yaml` | Schema file | | `--rows` / `-r` | `100` | Rows per table | +| `--table-rows` | — | Per-table row override, repeatable or comma-separated (`table=rows`) | +| `--self-ref-depth` | `2` | Maximum generated depth for self-referential FK chains | | `--format` / `-f` | `yaml` | Output format: `yaml`, `json`, `sql` | | `--out` / `-o` | stdout | Output file (omit for stdout) | | `--db` | `postgres` | DB type (affects SQL placeholder style) | @@ -259,7 +283,7 @@ SEEDSTORM_ADDR=127.0.0.1:9000 seedstorm serve What the UI gives you: -- **Workspace** — Cytoscape DAG of every table; click to select, non-nullable parents auto-lock as a dependency closure (mirrors the TUI). The selected-table panel lets you override row counts per table for **Seed**, **Fill empty**, and workspace **Generate** runs while `Rows` remains the default. Live SSE log stream + status pill. +- **Workspace** — Cytoscape DAG of every table; click to select, non-nullable parents auto-lock as a dependency closure (mirrors the TUI). The selected-table panel lets you override row counts per table for **Seed**, **Fill empty**, and workspace **Generate** runs while `Rows` remains the default. `Self-ref` controls bounded generated depth for self-referential FK chains. Live SSE log stream + status pill. - **Connection management** — multi-session: hold several DBs open in one browser and switch from a topbar dropdown. Saved connection presets in `localStorage` with optional password (eye-icon reveal, closed by default). Passwords are kept in process memory only on the server. - **Standalone tools** — `/generate`, `/enrich`, `/export` mirror the CLI commands as forms. diff --git a/docs/development.md b/docs/development.md index c6dd63d..9fb5a07 100644 --- a/docs/development.md +++ b/docs/development.md @@ -62,12 +62,13 @@ go test ./... -v ### Integration tests -Integration tests run the full pipeline against a 28-table real-world schema on both MySQL and PostgreSQL, covering: +Integration tests run the full pipeline against a 29-table real-world schema on both MySQL and PostgreSQL, covering: | Edge case | Tables | |-----------|--------| | Self-referential FK | `categories`, `departments`, `employees` | | Near-cycle (nullable FK breaks it) | `departments.head_employee_id ↔ employees.department_id` | +| Hard self-reference | `hard_self_employees.manager_id → hard_self_employees.id` | | Deep FK chain (5 levels) | `return_requests → order_items → orders → users` | | Many-to-many junctions | `product_tags`, `project_assignments`, `wishlist_items` | | Multiple enums per table | `support_tickets` (status + priority) | @@ -78,8 +79,8 @@ Integration tests run the full pipeline against a 28-table real-world schema on | CHECK range constraint → `number(min,max)` faker | `products.rating` (1–5) | Tests verify: -- All 28 tables receive exactly the requested number of rows -- 38 FK relationships have zero orphans +- All 29 tables receive rows, with enum-coverage tables allowed to exceed the base request so every enum value is represented +- 39 FK relationships have zero orphans, including nullable and non-nullable self-references - 6 value constraints hold (ratings 1–5, prices > 0, quantities ≥ 1, salaries > 0) - Enum values, UNIQUE columns, and CHECK constraints are auto-detected correctly @@ -100,7 +101,7 @@ Expected output: brands 25 rows ... audit_logs 25 rows - Total: 700 rows across 28 tables (4.43s) + Total: 1600+ rows across 29 tables (11.30s) --- PASS: TestPostgresIntegration (6.87s) ``` @@ -117,7 +118,7 @@ All tests run automatically on every PR via GitHub Actions (`.github/workflows/p | `validate` | Directory/file structure via structlint | | `test` | `go test ./...` + `make build` | | `lint` | `golangci-lint` | -| `integration` | Full 28-table suite on Postgres 15 + MySQL 8 | +| `integration` | Full 29-table suite on Postgres 15 + MySQL 8 | The integration job in CI uses `--timeout 120s`. Use `300s` locally when running both engines back-to-back. diff --git a/init/init.mysql.sql b/init/init.mysql.sql index e806d66..b51ebb3 100644 --- a/init/init.mysql.sql +++ b/init/init.mysql.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS order_items; +DROP TABLE IF EXISTS hard_self_employees; DROP TABLE IF EXISTS orders; DROP TABLE IF EXISTS products; DROP TABLE IF EXISTS users; @@ -16,6 +17,14 @@ CREATE TABLE products ( price DECIMAL(10, 2) NOT NULL ); +CREATE TABLE hard_self_employees ( + id INT AUTO_INCREMENT PRIMARY KEY, + manager_id INT NOT NULL, + name VARCHAR(255) NOT NULL, + title VARCHAR(255), + FOREIGN KEY (manager_id) REFERENCES hard_self_employees(id) +); + CREATE TABLE orders ( id INT AUTO_INCREMENT PRIMARY KEY, user_id INT NOT NULL, @@ -31,4 +40,4 @@ CREATE TABLE order_items ( quantity INT NOT NULL, FOREIGN KEY (order_id) REFERENCES orders(id), FOREIGN KEY (product_id) REFERENCES products(id) -); \ No newline at end of file +); diff --git a/init/init.sql b/init/init.sql index 3a19cb3..f5d1055 100644 --- a/init/init.sql +++ b/init/init.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS order_items; +DROP TABLE IF EXISTS hard_self_employees; DROP TABLE IF EXISTS orders; DROP TYPE IF EXISTS order_status; CREATE TYPE order_status AS ENUM ('pending', 'processing', 'shipped', 'delivered', 'cancelled'); @@ -16,6 +17,13 @@ CREATE TABLE IF NOT EXISTS products ( price NUMERIC(10, 2) NOT NULL ); +CREATE TABLE IF NOT EXISTS hard_self_employees ( + id SERIAL PRIMARY KEY, + manager_id INTEGER NOT NULL REFERENCES hard_self_employees(id), + name VARCHAR(255) NOT NULL, + title VARCHAR(255) +); + CREATE TABLE IF NOT EXISTS orders ( id SERIAL PRIMARY KEY, user_id INTEGER NOT NULL REFERENCES users(id), @@ -28,4 +36,4 @@ CREATE TABLE IF NOT EXISTS order_items ( order_id INTEGER NOT NULL REFERENCES orders(id), product_id INTEGER NOT NULL REFERENCES products(id), quantity INTEGER NOT NULL -); \ No newline at end of file +); diff --git a/integration/integration_test.go b/integration/integration_test.go index 591bea5..e4bb693 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -138,6 +138,32 @@ func countRows(t *testing.T, conn *sql.DB, table string) int { return n } +func assertHardSelfRefSeeded(t *testing.T, conn *sql.DB) { + t.Helper() + var total, nulls, orphans int + if err := conn.QueryRowContext(context.Background(), `SELECT COUNT(*) FROM hard_self_employees`).Scan(&total); err != nil { + t.Fatalf("hard self-ref count: %v", err) + } + if total == 0 { + t.Fatal("hard_self_employees: expected seeded rows") + } + if err := conn.QueryRowContext(context.Background(), `SELECT COUNT(*) FROM hard_self_employees WHERE manager_id IS NULL`).Scan(&nulls); err != nil { + t.Fatalf("hard self-ref null check: %v", err) + } + if nulls != 0 { + t.Fatalf("hard_self_employees: found %d NULL manager_id values", nulls) + } + if err := conn.QueryRowContext(context.Background(), ` + SELECT COUNT(*) FROM hard_self_employees c + LEFT JOIN hard_self_employees p ON c.manager_id = p.id + WHERE p.id IS NULL`).Scan(&orphans); err != nil { + t.Fatalf("hard self-ref FK check: %v", err) + } + if orphans != 0 { + t.Fatalf("hard_self_employees: found %d orphaned manager_id values", orphans) + } +} + // buildAndSeed runs the full introspect → build schema → generate → seed pipeline. // It prints a summary at the end (not per-row during insert). func buildAndSeed(t *testing.T, label, driver, dsn string, conn *sql.DB) map[string][]map[string]interface{} { @@ -246,7 +272,7 @@ func TestPostgresIntegration(t *testing.T) { t.Run("row counts", func(t *testing.T) { allTables := []string{ // L0 - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", // L1 "categories", "addresses", "departments", "warehouses", "wishlists", // L2 @@ -909,17 +935,18 @@ func TestPostgresIntegration(t *testing.T) { expected := map[string]int{ // existing - "addresses": 1, // user_id - "products": 2, // category_id, brand_id - "product_tags": 2, // product_id, tag_id - "orders": 3, // user_id, address_id, coupon_id - "order_items": 2, // order_id, product_id - "shipments": 1, // order_id - "payments": 1, // order_id - "reviews": 2, // user_id, product_id - "wishlists": 1, // user_id - "wishlist_items": 2, // wishlist_id, product_id - "categories": 1, // parent_id (self-ref) + "addresses": 1, // user_id + "products": 2, // category_id, brand_id + "product_tags": 2, // product_id, tag_id + "orders": 3, // user_id, address_id, coupon_id + "order_items": 2, // order_id, product_id + "shipments": 1, // order_id + "payments": 1, // order_id + "reviews": 2, // user_id, product_id + "wishlists": 1, // user_id + "wishlist_items": 2, // wishlist_id, product_id + "categories": 1, // parent_id (self-ref) + "hard_self_employees": 1, // manager_id (hard self-ref) // new "departments": 3, // company_id, parent_dept_id (self-ref), head_employee_id "employees": 2, // department_id, manager_id (self-ref) @@ -1050,6 +1077,10 @@ func TestPostgresIntegration(t *testing.T) { t.Logf("employees root nodes: %d", roots) }) + t.Run("self-ref: hard_self_employees has valid non-null managers", func(t *testing.T) { + assertHardSelfRefSeeded(t, conn) + }) + // ── Deep chain subtest ────────────────────────────────────────────────────── t.Run("deep chain: return_requests -> order_items -> orders -> users", func(t *testing.T) { @@ -1393,7 +1424,7 @@ func TestPostgresIntegration(t *testing.T) { t.Fatalf("truncate: %v", err) } allTables := []string{ - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", "categories", "addresses", "departments", "warehouses", "wishlists", "products", "employees", "product_tags", "orders", "projects", "inventory", "purchase_orders", @@ -1414,7 +1445,7 @@ func TestPostgresIntegration(t *testing.T) { } buildAndSeed(t, "postgres (post-truncate)", postgresDriver, dsn, conn) allTables := []string{ - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", "categories", "addresses", "departments", "warehouses", "wishlists", "products", "employees", "product_tags", "orders", "projects", "inventory", "purchase_orders", @@ -1454,7 +1485,7 @@ func TestMySQLIntegration(t *testing.T) { t.Run("row counts", func(t *testing.T) { allTables := []string{ // L0 - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", // L1 "categories", "addresses", "departments", "warehouses", "wishlists", // L2 @@ -2116,17 +2147,18 @@ func TestMySQLIntegration(t *testing.T) { expected := map[string]int{ // existing - "addresses": 1, // user_id - "products": 2, // category_id, brand_id - "product_tags": 2, // product_id, tag_id - "orders": 3, // user_id, address_id, coupon_id - "order_items": 2, // order_id, product_id - "shipments": 1, // order_id - "payments": 1, // order_id - "reviews": 2, // user_id, product_id - "wishlists": 1, // user_id - "wishlist_items": 2, // wishlist_id, product_id - "categories": 1, // parent_id (self-ref) + "addresses": 1, // user_id + "products": 2, // category_id, brand_id + "product_tags": 2, // product_id, tag_id + "orders": 3, // user_id, address_id, coupon_id + "order_items": 2, // order_id, product_id + "shipments": 1, // order_id + "payments": 1, // order_id + "reviews": 2, // user_id, product_id + "wishlists": 1, // user_id + "wishlist_items": 2, // wishlist_id, product_id + "categories": 1, // parent_id (self-ref) + "hard_self_employees": 1, // manager_id (hard self-ref) // new "departments": 3, // company_id, parent_dept_id (self-ref), head_employee_id "employees": 2, // department_id, manager_id (self-ref) @@ -2257,6 +2289,10 @@ func TestMySQLIntegration(t *testing.T) { t.Logf("employees root nodes: %d", roots) }) + t.Run("self-ref: hard_self_employees has valid non-null managers", func(t *testing.T) { + assertHardSelfRefSeeded(t, conn) + }) + // ── Deep chain subtest ────────────────────────────────────────────────────── t.Run("deep chain: return_requests -> order_items -> orders -> users", func(t *testing.T) { @@ -2590,7 +2626,7 @@ func TestMySQLIntegration(t *testing.T) { t.Fatalf("truncate: %v", err) } allTables := []string{ - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", "categories", "addresses", "departments", "warehouses", "wishlists", "products", "employees", "product_tags", "orders", "projects", "inventory", "purchase_orders", @@ -2611,7 +2647,7 @@ func TestMySQLIntegration(t *testing.T) { } buildAndSeed(t, "mysql (post-truncate)", mysqlDriver, dsn, conn) allTables := []string{ - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", "categories", "addresses", "departments", "warehouses", "wishlists", "products", "employees", "product_tags", "orders", "projects", "inventory", "purchase_orders", @@ -2642,12 +2678,12 @@ func TestMySQLIntegration(t *testing.T) { // 5. Idempotent fill: running gap fill a second time when all tables already // have rows adds nothing (no gaps found → no generation). -// gapL0Tables are the root (no FK parents) tables in the 28-table test schema. -var gapL0Tables = []string{"brands", "tags", "users", "coupons", "companies", "suppliers"} +// gapL0Tables are the root (no FK parents) tables in the 29-table test schema. +var gapL0Tables = []string{"brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees"} // gapAllTables lists every table in the test schema (used for count assertions). var gapAllTables = []string{ - "brands", "tags", "users", "coupons", "companies", "suppliers", + "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees", "categories", "addresses", "departments", "warehouses", "wishlists", "products", "employees", "product_tags", "orders", "projects", "inventory", "purchase_orders", diff --git a/integration/schema_mysql.sql b/integration/schema_mysql.sql index 0ac1a89..3d3caa2 100644 --- a/integration/schema_mysql.sql +++ b/integration/schema_mysql.sql @@ -2,6 +2,7 @@ SET FOREIGN_KEY_CHECKS = 0; DROP TABLE IF EXISTS return_requests; DROP TABLE IF EXISTS audit_logs; +DROP TABLE IF EXISTS hard_self_employees; DROP TABLE IF EXISTS support_tickets; DROP TABLE IF EXISTS project_assignments; DROP TABLE IF EXISTS purchase_order_items; @@ -92,6 +93,14 @@ CREATE TABLE suppliers ( rating DECIMAL(3,2) ); +CREATE TABLE hard_self_employees ( + id INT AUTO_INCREMENT PRIMARY KEY, + manager_id INT NOT NULL, + name VARCHAR(100) NOT NULL, + title VARCHAR(100), + FOREIGN KEY (manager_id) REFERENCES hard_self_employees(id) +); + -- Level 1: FK to level 0 CREATE TABLE addresses ( id INT AUTO_INCREMENT PRIMARY KEY, diff --git a/integration/schema_postgres.sql b/integration/schema_postgres.sql index 29d8e3e..384a61f 100644 --- a/integration/schema_postgres.sql +++ b/integration/schema_postgres.sql @@ -1,6 +1,7 @@ -- Teardown (always safe to re-run) DROP TABLE IF EXISTS return_requests CASCADE; DROP TABLE IF EXISTS audit_logs CASCADE; +DROP TABLE IF EXISTS hard_self_employees CASCADE; DROP TABLE IF EXISTS support_tickets CASCADE; DROP TABLE IF EXISTS project_assignments CASCADE; DROP TABLE IF EXISTS purchase_order_items CASCADE; @@ -120,6 +121,13 @@ CREATE TABLE suppliers ( rating NUMERIC(3,2) ); +CREATE TABLE hard_self_employees ( + id SERIAL PRIMARY KEY, + manager_id INTEGER NOT NULL REFERENCES hard_self_employees(id), + name VARCHAR(100) NOT NULL, + title VARCHAR(100) +); + -- Level 1: FK to level 0 CREATE TABLE addresses ( id SERIAL PRIMARY KEY, diff --git a/internal/cli/gaps.go b/internal/cli/gaps.go index 098838a..85f86f9 100644 --- a/internal/cli/gaps.go +++ b/internal/cli/gaps.go @@ -52,11 +52,20 @@ Use --fill --dry-run to preview the SQL without executing it.`, Usage: "Rows to insert per empty table (when --fill is set)", Value: 100, }, + &cli.StringSliceFlag{ + Name: "table-rows", + Usage: "Per-table row override for fill, repeatable or comma-separated (table=rows)", + }, &cli.IntFlag{ Name: "enum-rows", Usage: "Rows per enum value for empty tables with enum columns (0 = use --rows)", Value: 0, }, + &cli.IntFlag{ + Name: "self-ref-depth", + Usage: "Maximum generated depth for self-referential FK chains", + Value: faker.DefaultSelfRefDepth, + }, &cli.BoolFlag{ Name: "fill", Usage: "Seed all empty tables (populated tables are skipped)", @@ -88,7 +97,12 @@ Use --fill --dry-run to preview the SQL without executing it.`, dbType := normalizeDBType(cmd.String("db")) dsn := cmd.String("dsn") rows := cmd.Int("rows") + tableRows, err := parseTableRows(cmd.StringSlice("table-rows")) + if err != nil { + return err + } enumRows := cmd.Int("enum-rows") + selfRefDepth := cmd.Int("self-ref-depth") fill := cmd.Bool("fill") dryRun := cmd.Bool("dry-run") yes := cmd.Bool("yes") @@ -127,7 +141,7 @@ Use --fill --dry-run to preview the SQL without executing it.`, } if cmd.Bool("interactive") { - return tui.RunGaps(ctx, s, dbType, dsn, counts, rows, batchSize, enumRows) + return tui.RunGaps(ctx, s, dbType, dsn, counts, rows, batchSize, enumRows, selfRefDepth) } // Build FK parents map for display: table → []parent tables. @@ -176,7 +190,9 @@ Use --fill --dry-run to preview the SQL without executing it.`, // Generate data for gap tables only; allSorted is used internally to // preload existing PKs from already-populated parent tables. - data, err := faker.GenerateFiltered(s, allSorted, gapTables, rows, enumRows, dbConn, dbType) + data, err := faker.GenerateFilteredWithOptions(s, allSorted, gapTables, rows, enumRows, tableRows, dbConn, dbType, faker.GenerateOptions{ + SelfRefDepth: selfRefDepth, + }) if err != nil { return fmt.Errorf("data generation failed: %w", err) } diff --git a/internal/cli/generate.go b/internal/cli/generate.go index 9aadd73..0cbe549 100644 --- a/internal/cli/generate.go +++ b/internal/cli/generate.go @@ -36,6 +36,15 @@ func generateCmd() *cli.Command { Usage: "Rows per table", Value: 10, }, + &cli.StringSliceFlag{ + Name: "table-rows", + Usage: "Per-table row override, repeatable or comma-separated (table=rows)", + }, + &cli.IntFlag{ + Name: "self-ref-depth", + Usage: "Maximum generated depth for self-referential FK chains", + Value: faker.DefaultSelfRefDepth, + }, &cli.StringFlag{ Name: "format", Aliases: []string{"f"}, @@ -68,6 +77,11 @@ func generateCmd() *cli.Command { log := logging.Log schemaPath := cmd.String("schema") rows := cmd.Int("rows") + tableRows, err := parseTableRows(cmd.StringSlice("table-rows")) + if err != nil { + return err + } + selfRefDepth := cmd.Int("self-ref-depth") format := cmd.String("format") outPath := cmd.String("out") dbType := normalizeDBType(cmd.String("db")) @@ -85,7 +99,7 @@ func generateCmd() *cli.Command { } if cmd.Bool("interactive") { - return tui.RunGenerate(ctx, s, dbType, format, outPath, rows) + return tui.RunGenerate(ctx, s, dbType, format, outPath, rows, selfRefDepth) } log.Info().Msg("Building dependency graph") @@ -96,7 +110,9 @@ func generateCmd() *cli.Command { } log.Info().Int("rows", rows).Msg("Generating data") - data, err := faker.Generate(s, sortedTables, rows, 0, nil, dbType) + data, err := faker.GenerateFilteredWithOptions(s, sortedTables, sortedTables, rows, 0, tableRows, nil, dbType, faker.GenerateOptions{ + SelfRefDepth: selfRefDepth, + }) if err != nil { return fmt.Errorf("generation failed: %w", err) } diff --git a/internal/cli/seed.go b/internal/cli/seed.go index 263b785..9d66205 100644 --- a/internal/cli/seed.go +++ b/internal/cli/seed.go @@ -51,11 +51,20 @@ Use --dry-run to print SQL statements without executing them.`, Usage: "Number of rows to insert per table", Value: 100, }, + &cli.StringSliceFlag{ + Name: "table-rows", + Usage: "Per-table row override, repeatable or comma-separated (table=rows)", + }, &cli.IntFlag{ Name: "enum-rows", Usage: "Rows per enum value for tables with enum columns (0 = use --rows)", Value: 0, }, + &cli.IntFlag{ + Name: "self-ref-depth", + Usage: "Maximum generated depth for self-referential FK chains", + Value: faker.DefaultSelfRefDepth, + }, &cli.BoolFlag{ Name: "disable-fk", Usage: "Skip FK ordering (seed in arbitrary order)", @@ -96,7 +105,12 @@ Use --dry-run to print SQL statements without executing them.`, dbType := normalizeDBType(cmd.String("db")) dsn := cmd.String("dsn") rows := cmd.Int("rows") + tableRows, err := parseTableRows(cmd.StringSlice("table-rows")) + if err != nil { + return err + } enumRows := cmd.Int("enum-rows") + selfRefDepth := cmd.Int("self-ref-depth") disableFK := cmd.Bool("disable-fk") dryRun := cmd.Bool("dry-run") truncate := cmd.Bool("truncate") @@ -116,7 +130,7 @@ Use --dry-run to print SQL statements without executing them.`, } if cmd.Bool("interactive") { - return tui.Run(ctx, s, dbType, dsn, rows, batchSize, enumRows, truncate) + return tui.Run(ctx, s, dbType, dsn, rows, batchSize, enumRows, truncate, selfRefDepth) } // Resolve seed order @@ -152,7 +166,7 @@ Use --dry-run to print SQL statements without executing them.`, if dryRun { log.Info().Msg("Dry-run mode — SQL will be printed, not executed") - fmt.Print(graph.RenderPlan(s, sortedTables, rows)) + fmt.Print(graph.RenderPlanWithCounts(s, sortedTables, rows, tableRows)) fmt.Println("--- SQL ---") } @@ -176,7 +190,9 @@ Use --dry-run to print SQL statements without executing them.`, // Generate data start := time.Now() log.Info().Int("rows", rows).Msg("Generating fake data") - data, err := faker.Generate(s, sortedTables, rows, enumRows, dbConn, dbType) + data, err := faker.GenerateFilteredWithOptions(s, sortedTables, sortedTables, rows, enumRows, tableRows, dbConn, dbType, faker.GenerateOptions{ + SelfRefDepth: selfRefDepth, + }) if err != nil { return fmt.Errorf("data generation failed: %w", err) } diff --git a/internal/cli/seed_test.go b/internal/cli/seed_test.go new file mode 100644 index 0000000..2f581f8 --- /dev/null +++ b/internal/cli/seed_test.go @@ -0,0 +1,157 @@ +package cli + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "reflect" + "strings" + "testing" +) + +func TestParseTableRows(t *testing.T) { + got, err := parseTableRows([]string{"users=2, orders=4", "items=7"}) + if err != nil { + t.Fatalf("parseTableRows: %v", err) + } + want := map[string]int{"users": 2, "orders": 4, "items": 7} + if !reflect.DeepEqual(got, want) { + t.Fatalf("parseTableRows = %+v, want %+v", got, want) + } +} + +func TestParseTableRowsRejectsInvalidValues(t *testing.T) { + for _, value := range [][]string{{"users"}, {"users=0"}, {"users=nope"}, {"=2"}} { + if _, err := parseTableRows(value); err == nil { + t.Fatalf("parseTableRows(%v) expected error", value) + } + } +} + +func TestSeedDryRunRejectsHardMultiTableCycleBeforeConnecting(t *testing.T) { + schemaPath := writeTempSchema(t, ` +tables: + a: + columns: + id: + type: integer + pk: true + b_id: + type: integer + fk: b.id + b: + columns: + id: + type: integer + pk: true + a_id: + type: integer + fk: a.id +`) + + err := seedCmd().Run(context.Background(), []string{ + "seed", + "--schema", schemaPath, + "--dsn", "postgres://invalid/unused", + "--dry-run", + }) + if err == nil { + t.Fatal("expected hard multi-table cycle to fail before DB connection") + } + if !strings.Contains(err.Error(), "a") || !strings.Contains(err.Error(), "b") { + t.Fatalf("error should name cycle tables, got: %v", err) + } +} + +func TestGenerateCommandHandlesHardSelfReference(t *testing.T) { + schemaPath := writeTempSchema(t, ` +tables: + employees: + columns: + id: + type: integer + pk: true + manager_id: + type: integer + fk: employees.id +`) + outPath := filepath.Join(t.TempDir(), "data.json") + + err := generateCmd().Run(context.Background(), []string{ + "generate", + "--schema", schemaPath, + "--rows", "3", + "--self-ref-depth", "2", + "--format", "json", + "--out", outPath, + }) + if err != nil { + t.Fatalf("generate: %v", err) + } + out, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read output: %v", err) + } + if !strings.Contains(string(out), "employees") || !strings.Contains(string(out), "manager_id") { + t.Fatalf("output should include generated hard self-reference data, got %s", string(out)) + } +} + +func TestGenerateCommandAppliesTableRows(t *testing.T) { + schemaPath := writeTempSchema(t, ` +tables: + users: + columns: + id: + type: integer + pk: true + name: + type: varchar + faker: name + orders: + columns: + id: + type: integer + pk: true + user_id: + type: integer + fk: users.id +`) + outPath := filepath.Join(t.TempDir(), "data.json") + + err := generateCmd().Run(context.Background(), []string{ + "generate", + "--schema", schemaPath, + "--rows", "2", + "--table-rows", "orders=5", + "--format", "json", + "--out", outPath, + }) + if err != nil { + t.Fatalf("generate: %v", err) + } + out, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read output: %v", err) + } + var data map[string][]map[string]any + if err := json.Unmarshal(out, &data); err != nil { + t.Fatalf("json output: %v\n%s", err, string(out)) + } + if got := len(data["users"]); got != 2 { + t.Fatalf("users rows = %d, want default 2", got) + } + if got := len(data["orders"]); got != 5 { + t.Fatalf("orders rows = %d, want override 5", got) + } +} + +func writeTempSchema(t *testing.T, body string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "schema.yaml") + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatalf("write schema: %v", err) + } + return path +} diff --git a/internal/cli/table_rows.go b/internal/cli/table_rows.go new file mode 100644 index 0000000..34c39ed --- /dev/null +++ b/internal/cli/table_rows.go @@ -0,0 +1,40 @@ +package cli + +import ( + "fmt" + "strconv" + "strings" +) + +func parseTableRows(values []string) (map[string]int, error) { + if len(values) == 0 { + return nil, nil + } + rows := make(map[string]int) + for _, value := range values { + for _, part := range strings.Split(value, ",") { + part = strings.TrimSpace(part) + if part == "" { + continue + } + name, countText, ok := strings.Cut(part, "=") + if !ok { + return nil, fmt.Errorf("invalid table row override %q, expected table=rows", part) + } + name = strings.TrimSpace(name) + countText = strings.TrimSpace(countText) + if name == "" || countText == "" { + return nil, fmt.Errorf("invalid table row override %q, expected table=rows", part) + } + count, err := strconv.Atoi(countText) + if err != nil || count < 1 { + return nil, fmt.Errorf("invalid row count for %s: %q", name, countText) + } + rows[name] = count + } + } + if len(rows) == 0 { + return nil, nil + } + return rows, nil +} diff --git a/internal/faker/faker.go b/internal/faker/faker.go index 82726a4..286c981 100644 --- a/internal/faker/faker.go +++ b/internal/faker/faker.go @@ -15,6 +15,23 @@ import ( "github.com/brianvoe/gofakeit/v6" ) +const DefaultSelfRefDepth = 2 + +type GenerateOptions struct { + SelfRefDepth int +} + +func DefaultGenerateOptions() GenerateOptions { + return GenerateOptions{SelfRefDepth: DefaultSelfRefDepth} +} + +func normalizeOptions(opts GenerateOptions) GenerateOptions { + if opts.SelfRefDepth < 0 { + opts.SelfRefDepth = 0 + } + return opts +} + // Generate produces fake data rows for each table, respecting FK ordering. // If conn is non-nil, existing PKs are read so FKs can reference them. // dbType is the driver name ("pgx" or "mysql") used to quote SQL identifiers. @@ -22,6 +39,10 @@ func Generate(s *schema.Schema, sortedTables []string, rows, enumRows int, conn return GenerateFiltered(s, sortedTables, sortedTables, rows, enumRows, conn, dbType) } +func GenerateWithOptions(s *schema.Schema, sortedTables []string, rows, enumRows int, conn *sql.DB, dbType string, opts GenerateOptions) (map[string][]map[string]interface{}, error) { + return GenerateFilteredWithOptions(s, sortedTables, sortedTables, rows, enumRows, nil, conn, dbType, opts) +} + // GenerateFiltered is like Generate but separates the two roles of sortedTables: // - allTables: the full set of tables used to pre-load existing PKs from the // database (so FK columns in targetTables can reference already-populated @@ -38,6 +59,13 @@ func GenerateFiltered(s *schema.Schema, allTables, targetTables []string, rows, // GenerateFilteredWithCounts is like GenerateFiltered, but tableRows can // override the default row count for individual target tables. func GenerateFilteredWithCounts(s *schema.Schema, allTables, targetTables []string, rows, enumRows int, tableRows map[string]int, conn *sql.DB, dbType string) (map[string][]map[string]interface{}, error) { + return GenerateFilteredWithOptions(s, allTables, targetTables, rows, enumRows, tableRows, conn, dbType, DefaultGenerateOptions()) +} + +// GenerateFilteredWithOptions is like GenerateFilteredWithCounts, with +// generation guardrails for recursive/self-referential relationships. +func GenerateFilteredWithOptions(s *schema.Schema, allTables, targetTables []string, rows, enumRows int, tableRows map[string]int, conn *sql.DB, dbType string, opts GenerateOptions) (map[string][]map[string]interface{}, error) { + opts = normalizeOptions(opts) data := make(map[string][]map[string]interface{}) generatedPKs := make(map[string][]interface{}) @@ -77,6 +105,9 @@ func GenerateFilteredWithCounts(s *schema.Schema, allTables, targetTables []stri } } } + if err := backfillSelfReferences(data[tableName], table, tableName, opts.SelfRefDepth); err != nil { + return nil, fmt.Errorf("table %s self-reference backfill: %w", tableName, err) + } } return data, nil @@ -336,10 +367,15 @@ func generateValue(col schema.Column, colName, tableName string, generatedPKs ma fkTable := parts[0] pks := generatedPKs[fkTable] if len(pks) == 0 { - if fkTable == tableName || col.Nullable { - // Self-referential FK or nullable FK with no parent rows yet: - // insert NULL. For nullable FKs this handles near-cycles where - // the parent table is seeded after this one. + if fkTable == tableName { + // Self-referential FKs are resolved after all rows for the + // table have PKs, so the first row can be safely rooted and + // non-nullable self-FKs can reference an existing generated PK. + return nil, nil + } + if col.Nullable { + // Nullable FK with no parent rows yet: insert NULL. This + // handles near-cycles where the parent table is seeded later. return nil, nil } return nil, fmt.Errorf("no PKs available for FK table %s", fkTable) @@ -369,6 +405,88 @@ func generateValue(col schema.Column, colName, tableName string, generatedPKs ma return val, nil } +func backfillSelfReferences(rows []map[string]interface{}, table schema.Table, tableName string, selfRefDepth int) error { + if len(rows) == 0 { + return nil + } + if selfRefDepth < 0 { + selfRefDepth = 0 + } + + colNames := make([]string, 0, len(table.Columns)) + for colName, col := range table.Columns { + if fkTable, _ := splitFK(col.FK); fkTable == tableName { + colNames = append(colNames, colName) + } + } + sort.Strings(colNames) + + for _, colName := range colNames { + col := table.Columns[colName] + _, refCol := splitFK(col.FK) + if refCol == "" { + continue + } + if _, ok := table.Columns[refCol]; !ok { + return fmt.Errorf("%s references missing column %s", colName, refCol) + } + + levels := make([]int, len(rows)) + for i := range rows { + if _, ok := rows[i][refCol]; !ok { + return fmt.Errorf("%s references unavailable generated value %s", colName, refCol) + } + if i == 0 { + if col.Nullable { + rows[i][colName] = nil + } else { + rows[i][colName] = rows[i][refCol] + } + levels[i] = 0 + continue + } + + parentIdx := chooseSelfRefParent(levels, i, selfRefDepth) + if parentIdx < 0 { + if col.Nullable { + rows[i][colName] = nil + levels[i] = 0 + continue + } + rows[i][colName] = rows[i][refCol] + levels[i] = 0 + continue + } + rows[i][colName] = rows[parentIdx][refCol] + levels[i] = levels[parentIdx] + 1 + } + } + return nil +} + +func chooseSelfRefParent(levels []int, rowIdx, maxDepth int) int { + if rowIdx <= 0 { + return -1 + } + if maxDepth <= 0 { + return -1 + } + for i := rowIdx - 1; i >= 0; i-- { + if levels[i] < maxDepth { + return i + } + } + return -1 +} + +func splitFK(fk string) (string, string) { + parts := strings.SplitN(fk, ".", 2) + if len(parts) != 2 { + return "", "" + } + return parts[0], parts[1] +} + // generatePK returns an appropriate primary key value based on the column's DB type. // Sequential integers for numeric types, UUIDs for uuid/string types. func generatePK(colType string, existingCount int) (interface{}, error) { diff --git a/internal/faker/faker_test.go b/internal/faker/faker_test.go index c0d73b5..21a3040 100644 --- a/internal/faker/faker_test.go +++ b/internal/faker/faker_test.go @@ -653,15 +653,15 @@ func TestGenerateValue_NullableFKWithNoParents(t *testing.T) { } } -func TestGenerateValue_SelfRefFKReturnsNil(t *testing.T) { - col := schema.Column{Type: "integer", FK: "cats.id"} +func TestGenerateValue_NullableSelfRefFKReturnsNil(t *testing.T) { + col := schema.Column{Type: "integer", FK: "cats.id", Nullable: true} pks := map[string][]interface{}{} // no PKs yet for self val, err := generateValue(col, "parent_id", "cats", pks, nil, "") if err != nil { t.Fatal(err) } if val != nil { - t.Errorf("self-ref FK with no PKs should be nil, got %v", val) + t.Errorf("nullable self-ref FK with no PKs should be nil, got %v", val) } } @@ -834,6 +834,130 @@ func TestGenerateFilteredWithCountsOverrideWinsOverEnumRows(t *testing.T) { } } +func TestGenerateNullableSelfReferenceCreatesRootRow(t *testing.T) { + s := &schema.Schema{ + Tables: map[string]schema.Table{ + "categories": { + Columns: map[string]schema.Column{ + "id": {Type: "integer", PK: true}, + "parent_id": {Type: "integer", FK: "categories.id", Nullable: true}, + }, + }, + }, + } + + data, err := Generate(s, []string{"categories"}, 3, 0, nil, "pgx") + if err != nil { + t.Fatalf("Generate: %v", err) + } + if got := len(data["categories"]); got != 3 { + t.Fatalf("categories rows = %d, want 3", got) + } + if data["categories"][0]["parent_id"] != nil { + t.Fatalf("first self-referential row should be a NULL root, got %v", data["categories"][0]["parent_id"]) + } + if got := maxSelfRefDepth(data["categories"], "id", "parent_id"); got > DefaultSelfRefDepth { + t.Fatalf("self-reference depth = %d, want <= %d", got, DefaultSelfRefDepth) + } +} + +func TestGenerateHardSelfReferenceBackfillsValidManagers(t *testing.T) { + s := &schema.Schema{ + Tables: map[string]schema.Table{ + "employees": { + Columns: map[string]schema.Column{ + "id": {Type: "integer", PK: true}, + "manager_id": {Type: "integer", FK: "employees.id"}, + }, + }, + }, + } + + data, err := GenerateWithOptions(s, []string{"employees"}, 5, 0, nil, "pgx", GenerateOptions{SelfRefDepth: 2}) + if err != nil { + t.Fatalf("GenerateWithOptions: %v", err) + } + rows := data["employees"] + if got := len(rows); got != 5 { + t.Fatalf("employees rows = %d, want 5", got) + } + if rows[0]["manager_id"] == nil { + t.Fatal("non-nullable self-reference should be backfilled on first row") + } + if rows[0]["manager_id"] != rows[0]["id"] { + t.Fatalf("first hard self-reference should self-root, got manager_id=%v id=%v", rows[0]["manager_id"], rows[0]["id"]) + } + ids := map[interface{}]bool{} + for _, row := range rows { + ids[row["id"]] = true + } + for i, row := range rows { + if row["manager_id"] == nil { + t.Fatalf("row %d manager_id is nil for non-nullable self-FK", i) + } + if !ids[row["manager_id"]] { + t.Fatalf("row %d manager_id=%v does not reference generated employee IDs %v", i, row["manager_id"], ids) + } + } + if got := maxSelfRefDepth(rows, "id", "manager_id"); got > 2 { + t.Fatalf("self-reference depth = %d, want <= 2", got) + } +} + +func TestGenerateWithOptionsSelfRefDepthZeroDoesNotBuildNullableChain(t *testing.T) { + s := &schema.Schema{ + Tables: map[string]schema.Table{ + "categories": { + Columns: map[string]schema.Column{ + "id": {Type: "integer", PK: true}, + "parent_id": {Type: "integer", FK: "categories.id", Nullable: true}, + }, + }, + }, + } + + data, err := GenerateWithOptions(s, []string{"categories"}, 4, 0, nil, "pgx", GenerateOptions{SelfRefDepth: 0}) + if err != nil { + t.Fatalf("GenerateWithOptions: %v", err) + } + for i, row := range data["categories"] { + if row["parent_id"] != nil { + t.Fatalf("row %d parent_id = %v, want nil when self-ref depth is 0", i, row["parent_id"]) + } + } +} + +func maxSelfRefDepth(rows []map[string]interface{}, pkCol, fkCol string) int { + byID := make(map[interface{}]map[string]interface{}, len(rows)) + for _, row := range rows { + byID[row[pkCol]] = row + } + + maxDepth := 0 + for _, row := range rows { + seen := map[interface{}]bool{} + depth := 0 + current := row + for { + fk := current[fkCol] + if fk == nil || seen[fk] { + break + } + seen[fk] = true + next := byID[fk] + if next == nil || next[pkCol] == current[pkCol] { + break + } + depth++ + current = next + } + if depth > maxDepth { + maxDepth = depth + } + } + return maxDepth +} + func TestGenerate_differentSeedsDifferentOutput(t *testing.T) { s := &schema.Schema{ Tables: map[string]schema.Table{ diff --git a/internal/graph/graph.go b/internal/graph/graph.go index 9287f0c..37baa38 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -48,7 +48,7 @@ func Build(s *schema.Schema) *Graph { } refTable := parts[0] if refTable == tableName { - continue // self-reference: skip + continue } g.edges[refTable] = append(g.edges[refTable], tableName) g.inDegree[tableName]++ @@ -87,12 +87,27 @@ func (g *Graph) TopologicalSort() ([]string, error) { } if len(sorted) != len(g.nodes) { - return nil, fmt.Errorf("circular FK dependency detected — use --disable-fk to bypass") + cycles := make(map[string]bool) + for tableName, degree := range inDegree { + if degree > 0 { + cycles[tableName] = true + } + } + return nil, fmt.Errorf("circular FK dependency detected among %s — make one FK nullable, use deferrable constraints manually, or use --disable-fk to bypass", strings.Join(sortedKeys(cycles), ", ")) } return sorted, nil } +func sortedKeys(values map[string]bool) []string { + keys := make([]string, 0, len(values)) + for key := range values { + keys = append(keys, key) + } + sort.Strings(keys) + return keys +} + // Parents returns the tables that `table` has hard (non-nullable) FK dependencies on. func (g *Graph) Parents(table string) []string { var parents []string @@ -119,9 +134,13 @@ func (g *Graph) Children(table string) []string { // order and, per table, which parent tables it depends on (hard dependencies // listed first, nullable/optional ones marked with "?"). func RenderPlan(s *schema.Schema, sortedTables []string, rows int) string { + return RenderPlanWithCounts(s, sortedTables, rows, nil) +} + +func RenderPlanWithCounts(s *schema.Schema, sortedTables []string, rows int, tableRows map[string]int) string { var sb strings.Builder - fmt.Fprintf(&sb, "\n=== Dry Run — Seed Plan (%d tables, %d rows each) ===\n\n", len(sortedTables), rows) + fmt.Fprintf(&sb, "\n=== Dry Run — Seed Plan (%d tables, default %d rows) ===\n\n", len(sortedTables), rows) // Calculate column widths. numWidth := len(fmt.Sprintf("%d", len(sortedTables))) @@ -132,8 +151,8 @@ func RenderPlan(s *schema.Schema, sortedTables []string, rows int) string { } } - fmt.Fprintf(&sb, " %-*s %-*s %s\n", numWidth, "#", tableWidth, "Table", "Depends On") - fmt.Fprintf(&sb, " %s\n", strings.Repeat("─", numWidth+2+tableWidth+2+40)) + fmt.Fprintf(&sb, " %-*s %-*s %-6s %s\n", numWidth, "#", tableWidth, "Table", "Rows", "Depends On") + fmt.Fprintf(&sb, " %s\n", strings.Repeat("─", numWidth+2+tableWidth+2+6+2+40)) for i, tableName := range sortedTables { table := s.Tables[tableName] @@ -168,7 +187,11 @@ func RenderPlan(s *schema.Schema, sortedTables []string, rows int) string { deps = strings.Join(all, ", ") } - fmt.Fprintf(&sb, " %-*d %-*s %s\n", numWidth, i+1, tableWidth, tableName, deps) + tableCount := rows + if override := tableRows[tableName]; override > 0 { + tableCount = override + } + fmt.Fprintf(&sb, " %-*d %-*s %-6d %s\n", numWidth, i+1, tableWidth, tableName, tableCount, deps) } fmt.Fprintln(&sb) diff --git a/internal/graph/graph_test.go b/internal/graph/graph_test.go index 14313a5..b984020 100644 --- a/internal/graph/graph_test.go +++ b/internal/graph/graph_test.go @@ -221,6 +221,28 @@ func TestTopologicalSort_cycle_returnsError(t *testing.T) { if !strings.Contains(err.Error(), "circular") { t.Errorf("error should mention circular dependency, got: %v", err) } + for _, tableName := range []string{"a", "b"} { + if !strings.Contains(err.Error(), tableName) { + t.Errorf("error should name cycle table %q, got: %v", tableName, err) + } + } +} + +func TestTopologicalSort_hardSelfReferenceSortsSingleTable(t *testing.T) { + s := makeSchema(map[string]map[string]schema.Column{ + "employees": { + "id": {PK: true}, + "manager_id": {FK: "employees.id"}, + }, + }) + + sorted, err := Build(s).TopologicalSort() + if err != nil { + t.Fatalf("hard self-reference should be handled during generation, got planning error: %v", err) + } + if len(sorted) != 1 || sorted[0] != "employees" { + t.Fatalf("sorted = %v, want [employees]", sorted) + } } // ── RenderPlan ──────────────────────────────────────────────────────────────── @@ -299,6 +321,27 @@ func TestRenderPlan_rootHasDash(t *testing.T) { } } +func TestRenderPlanWithCountsShowsOverrides(t *testing.T) { + s := makeSchema(map[string]map[string]schema.Column{ + "users": {"id": {PK: true}}, + "orders": {"id": {PK: true}, "user_id": {FK: "users.id"}}, + }) + g := Build(s) + sorted, _ := g.TopologicalSort() + out := RenderPlanWithCounts(s, sorted, 2, map[string]int{"orders": 5}) + + lines := strings.Split(out, "\n") + for _, line := range lines { + if strings.Contains(line, "orders") { + if !strings.Contains(line, "5") { + t.Fatalf("orders row should show override 5 rows, got: %q", line) + } + return + } + } + t.Fatalf("orders row not found in plan:\n%s", out) +} + // ── Parents / Children ────────────────────────────────────────────────────── func TestParents_returnsHardFKParents(t *testing.T) { diff --git a/internal/tui/config.go b/internal/tui/config.go index bf69fd0..dba4551 100644 --- a/internal/tui/config.go +++ b/internal/tui/config.go @@ -25,11 +25,16 @@ type configField struct { toggled bool } -func newConfig(rows, batchSize, enumRows int, truncate bool) configModel { +func newConfig(rows, batchSize, enumRows int, truncate bool, selfRefDepth ...int) configModel { + depth := 2 + if len(selfRefDepth) > 0 { + depth = selfRefDepth[0] + } fields := []configField{ makeNumericField("Rows per table", rows), makeNumericField("Batch size", batchSize), makeNumericField("Enum rows (0 = use rows)", enumRows), + makeNumericField("Self-ref depth", depth), {label: "Truncate before seeding", isToggle: true, toggled: truncate}, } fields[0].input.Focus() @@ -52,8 +57,11 @@ func (m configModel) BatchSize() int { } return v } -func (m configModel) EnumRows() int { return m.intVal(2, 0) } -func (m configModel) Truncate() bool { return m.fields[3].toggled } +func (m configModel) EnumRows() int { return m.intVal(2, 0) } +func (m configModel) SelfRefDepth() int { + return m.intVal(3, 2) +} +func (m configModel) Truncate() bool { return m.fields[4].toggled } func (m configModel) intVal(idx, fallback int) int { if idx >= len(m.fields) { diff --git a/internal/tui/cycle_test.go b/internal/tui/cycle_test.go new file mode 100644 index 0000000..272884b --- /dev/null +++ b/internal/tui/cycle_test.go @@ -0,0 +1,12 @@ +package tui + +import "github.com/AxeForging/seedstorm/internal/schema" + +func hardSelfReferenceTUISchema() *schema.Schema { + return makeSchema(map[string]map[string]schema.Column{ + "employees": { + "id": {Type: "integer", PK: true}, + "manager_id": {Type: "integer", FK: "employees.id"}, + }, + }) +} diff --git a/internal/tui/execute.go b/internal/tui/execute.go index 7a55630..969f3ac 100644 --- a/internal/tui/execute.go +++ b/internal/tui/execute.go @@ -282,7 +282,9 @@ func startSeed(ctx context.Context, s *seedParams) tea.Cmd { } } - data, err := faker.GenerateFilteredWithCounts(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType) + data, err := faker.GenerateFilteredWithOptions(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType, faker.GenerateOptions{ + SelfRefDepth: s.selfRefDepth, + }) if err != nil { return seedDoneMsg{err: fmt.Errorf("data generation failed: %w", err)} } @@ -317,7 +319,9 @@ func startSeed(ctx context.Context, s *seedParams) tea.Cmd { // startDryRun returns a tea.Cmd that generates data and builds a summary. func startDryRun(s *seedParams) tea.Cmd { return func() tea.Msg { - data, err := faker.GenerateFilteredWithCounts(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType) + data, err := faker.GenerateFilteredWithOptions(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType, faker.GenerateOptions{ + SelfRefDepth: s.selfRefDepth, + }) if err != nil { return dryRunDoneMsg{err: fmt.Errorf("data generation failed: %w", err)} } diff --git a/internal/tui/gaps.go b/internal/tui/gaps.go index 5d5ac16..e0762b9 100644 --- a/internal/tui/gaps.go +++ b/internal/tui/gaps.go @@ -51,7 +51,7 @@ type GapsModel struct { } // RunGaps launches the interactive TUI for the gaps command. -func RunGaps(ctx context.Context, s *schema.Schema, dbType, dsn string, counts map[string]int64, defaultRows, defaultBatchSize, defaultEnumRows int) error { +func RunGaps(ctx context.Context, s *schema.Schema, dbType, dsn string, counts map[string]int64, defaultRows, defaultBatchSize, defaultEnumRows int, defaultSelfRefDepth ...int) error { g := graph.Build(s) sortedAll, err := g.TopologicalSort() if err != nil { @@ -84,7 +84,7 @@ func RunGaps(ctx context.Context, s *schema.Schema, dbType, dsn string, counts m dsn: dsn, counts: counts, picker: newGapsPicker(items, counts, 40), - config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, false), + config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, false, defaultSelfRefDepth...), height: 40, width: 80, } @@ -255,15 +255,16 @@ func (m GapsModel) updateReview(msg tea.Msg) (tea.Model, tea.Cmd) { } if m.review.done { params := &seedParams{ - schema: m.schema, - tables: m.review.tables, - rows: m.review.rows, - enumRows: m.review.enumRows, - tableRows: m.review.tableRows, - batchSize: m.review.batch, - truncate: false, // gaps never truncates - dbType: m.dbType, - dsn: m.dsn, + schema: m.schema, + tables: m.review.tables, + rows: m.review.rows, + enumRows: m.review.enumRows, + selfRefDepth: m.config.SelfRefDepth(), + tableRows: m.review.tableRows, + batchSize: m.review.batch, + truncate: false, // gaps never truncates + dbType: m.dbType, + dsn: m.dsn, } m.execute = newExecute(len(m.review.tables), m.review.dryRun) m.step = gapsStepExecute @@ -339,7 +340,9 @@ func startGapsFill(ctx context.Context, s *seedParams, allSorted []string) tea.C return seedDoneMsg{err: fmt.Errorf("failed to ping database: %w", err)} } - data, err := faker.GenerateFilteredWithCounts(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType) + data, err := faker.GenerateFilteredWithOptions(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType, faker.GenerateOptions{ + SelfRefDepth: s.selfRefDepth, + }) if err != nil { return seedDoneMsg{err: fmt.Errorf("data generation failed: %w", err)} } @@ -374,7 +377,9 @@ func startGapsFill(ctx context.Context, s *seedParams, allSorted []string) tea.C // startGapsDryRun generates data for gap tables and returns a preview. func startGapsDryRun(s *seedParams, allSorted []string) tea.Cmd { return func() tea.Msg { - data, err := faker.GenerateFilteredWithCounts(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType) + data, err := faker.GenerateFilteredWithOptions(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType, faker.GenerateOptions{ + SelfRefDepth: s.selfRefDepth, + }) if err != nil { return dryRunDoneMsg{err: fmt.Errorf("data generation failed: %w", err)} } diff --git a/internal/tui/gaps_test.go b/internal/tui/gaps_test.go index 95fcae5..3164259 100644 --- a/internal/tui/gaps_test.go +++ b/internal/tui/gaps_test.go @@ -44,6 +44,27 @@ func buildGapsModel() GapsModel { } } +func TestStartGapsDryRunHandlesHardSelfReference(t *testing.T) { + params := &seedParams{ + schema: hardSelfReferenceTUISchema(), + tables: []string{"employees"}, + rows: 3, + selfRefDepth: 2, + dbType: "pgx", + } + msg := startGapsDryRun(params, []string{"employees"})() + done, ok := msg.(dryRunDoneMsg) + if !ok { + t.Fatalf("msg type = %T, want dryRunDoneMsg", msg) + } + if done.err != nil { + t.Fatalf("startGapsDryRun: %v", done.err) + } + if done.total != 3 { + t.Fatalf("total = %d, want 3", done.total) + } +} + func sendGapsKey(m tea.Model, key string) tea.Model { return sendKey(m, key) // reuse from wizard_test.go } diff --git a/internal/tui/generate.go b/internal/tui/generate.go index 3b69fbb..40845be 100644 --- a/internal/tui/generate.go +++ b/internal/tui/generate.go @@ -31,24 +31,34 @@ const ( // genConfigModel extends config with format and output fields. type genConfigModel struct { - rowsInput textinput.Model - outInput textinput.Model - formatIdx int // 0=yaml 1=json 2=sql - focused int // 0=rows 1=format 2=output - done bool - back bool - quitting bool + rowsInput textinput.Model + depthInput textinput.Model + outInput textinput.Model + formatIdx int // 0=yaml 1=json 2=sql + focused int // 0=rows 1=depth 2=format 3=output + done bool + back bool + quitting bool } var genFormats = []string{"yaml", "json", "sql"} -func newGenConfig(rows int, format, outPath string) genConfigModel { +func newGenConfig(rows int, format, outPath string, selfRefDepth ...int) genConfigModel { ri := textinput.New() ri.SetValue(fmt.Sprintf("%d", rows)) ri.CharLimit = 10 ri.Width = 12 ri.Focus() + depth := 2 + if len(selfRefDepth) > 0 { + depth = selfRefDepth[0] + } + di := textinput.New() + di.SetValue(fmt.Sprintf("%d", depth)) + di.CharLimit = 10 + di.Width = 12 + oi := textinput.New() oi.SetValue(outPath) oi.CharLimit = 200 @@ -63,9 +73,10 @@ func newGenConfig(rows int, format, outPath string) genConfigModel { } return genConfigModel{ - rowsInput: ri, - outInput: oi, - formatIdx: fmtIdx, + rowsInput: ri, + depthInput: di, + outInput: oi, + formatIdx: fmtIdx, } } @@ -78,6 +89,13 @@ func (m genConfigModel) Rows() int { } func (m genConfigModel) Format() string { return genFormats[m.formatIdx] } func (m genConfigModel) OutPath() string { return strings.TrimSpace(m.outInput.Value()) } +func (m genConfigModel) SelfRefDepth() int { + n, err := strconv.Atoi(strings.TrimSpace(m.depthInput.Value())) + if err != nil || n < 0 { + return 2 + } + return n +} func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) { switch msg := msg.(type) { @@ -85,38 +103,44 @@ func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) { switch msg.String() { case "tab", "down", "j": m.rowsInput.Blur() + m.depthInput.Blur() m.outInput.Blur() - m.focused = (m.focused + 1) % 3 + m.focused = (m.focused + 1) % 4 switch m.focused { case 0: m.rowsInput.Focus() - case 2: + case 1: + m.depthInput.Focus() + case 3: m.outInput.Focus() } return m, nil case "shift+tab", "up", "k": m.rowsInput.Blur() + m.depthInput.Blur() m.outInput.Blur() - m.focused = (m.focused + 2) % 3 + m.focused = (m.focused + 3) % 4 switch m.focused { case 0: m.rowsInput.Focus() - case 2: + case 1: + m.depthInput.Focus() + case 3: m.outInput.Focus() } return m, nil case "left", "h": - if m.focused == 1 { + if m.focused == 2 { m.formatIdx = (m.formatIdx + len(genFormats) - 1) % len(genFormats) return m, nil } case "right", "l": - if m.focused == 1 { + if m.focused == 2 { m.formatIdx = (m.formatIdx + 1) % len(genFormats) return m, nil } case " ": - if m.focused == 1 { + if m.focused == 2 { m.formatIdx = (m.formatIdx + 1) % len(genFormats) return m, nil } @@ -124,7 +148,7 @@ func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) { m.done = true return m, nil case "b": - if m.focused == 1 { // only works on format selector (not text inputs) + if m.focused == 2 { // only works on format selector (not text inputs) m.back = true return m, nil } @@ -143,7 +167,11 @@ func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) { var cmd tea.Cmd m.rowsInput, cmd = m.rowsInput.Update(msg) return m, cmd - case 2: + case 1: + var cmd tea.Cmd + m.depthInput, cmd = m.depthInput.Update(msg) + return m, cmd + case 3: var cmd tea.Cmd m.outInput, cmd = m.outInput.Update(msg) return m, cmd @@ -161,6 +189,7 @@ func (m genConfigModel) View() string { view string }{ {"Rows per table", m.rowsInput.View()}, + {"Self-ref depth", m.depthInput.View()}, {"Format", m.formatView()}, {"Output file", m.outInput.View()}, } @@ -224,7 +253,7 @@ type GenModel struct { } // RunGenerate launches the interactive TUI for the generate command. -func RunGenerate(ctx context.Context, s *schema.Schema, dbType, format, outPath string, defaultRows int) error { +func RunGenerate(ctx context.Context, s *schema.Schema, dbType, format, outPath string, defaultRows int, defaultSelfRefDepth ...int) error { g := graph.Build(s) sortedAll, err := g.TopologicalSort() if err != nil { @@ -244,7 +273,7 @@ func RunGenerate(ctx context.Context, s *schema.Schema, dbType, format, outPath sortedAll: sortedAll, dbType: dbType, picker: newTablePicker(items, 40), - genConfig: newGenConfig(defaultRows, format, outPath), + genConfig: newGenConfig(defaultRows, format, outPath, defaultSelfRefDepth...), height: 40, width: 80, } @@ -367,7 +396,7 @@ func (m GenModel) updateRows(msg tea.Msg) (tea.Model, tea.Cmd) { m.execute.dryRun = true // generate is always a "dry run" (no DB) m.step = genStepExecute - return m, tea.Batch(m.execute.spinner.Tick, startGenerate(m.schema, m.volumes.tables, m.genConfig.Rows(), m.volumes.TableRows(), m.genConfig.Format(), m.genConfig.OutPath(), m.dbType)) + return m, tea.Batch(m.execute.spinner.Tick, startGenerate(m.schema, m.volumes.tables, m.genConfig.Rows(), m.genConfig.SelfRefDepth(), m.volumes.TableRows(), m.genConfig.Format(), m.genConfig.OutPath(), m.dbType)) } return m, cmd } @@ -425,9 +454,11 @@ func (m GenModel) View() string { } // startGenerate generates data and optionally writes to file. -func startGenerate(s *schema.Schema, tables []string, rows int, tableRows map[string]int, format, outPath, dbType string) tea.Cmd { +func startGenerate(s *schema.Schema, tables []string, rows, selfRefDepth int, tableRows map[string]int, format, outPath, dbType string) tea.Cmd { return func() tea.Msg { - data, err := faker.GenerateFilteredWithCounts(s, tables, tables, rows, 0, tableRows, nil, dbType) + data, err := faker.GenerateFilteredWithOptions(s, tables, tables, rows, 0, tableRows, nil, dbType, faker.GenerateOptions{ + SelfRefDepth: selfRefDepth, + }) if err != nil { return generateDoneMsg{err: fmt.Errorf("generation failed: %w", err)} } diff --git a/internal/tui/generate_test.go b/internal/tui/generate_test.go index 320be5a..720d147 100644 --- a/internal/tui/generate_test.go +++ b/internal/tui/generate_test.go @@ -34,6 +34,20 @@ func buildGenModel() GenModel { } } +func TestStartGenerateHandlesHardSelfReference(t *testing.T) { + msg := startGenerate(hardSelfReferenceTUISchema(), []string{"employees"}, 3, 2, nil, "yaml", "", "pgx")() + done, ok := msg.(generateDoneMsg) + if !ok { + t.Fatalf("msg type = %T, want generateDoneMsg", msg) + } + if done.err != nil { + t.Fatalf("startGenerate: %v", done.err) + } + if done.total != 3 { + t.Fatalf("total = %d, want 3", done.total) + } +} + func sendGenKey(m tea.Model, key string) tea.Model { return sendKey(m, key) } diff --git a/internal/tui/tui.go b/internal/tui/tui.go index 897f21c..71a5d94 100644 --- a/internal/tui/tui.go +++ b/internal/tui/tui.go @@ -23,15 +23,16 @@ const ( // seedParams holds everything needed to execute the seed operation. type seedParams struct { - schema *schema.Schema - tables []string - rows int - enumRows int - tableRows map[string]int - batchSize int - truncate bool - dbType string - dsn string + schema *schema.Schema + tables []string + rows int + enumRows int + selfRefDepth int + tableRows map[string]int + batchSize int + truncate bool + dbType string + dsn string } // Model is the top-level TUI model orchestrating the wizard steps. @@ -57,7 +58,7 @@ type Model struct { } // Run launches the interactive TUI and returns when the user completes or aborts. -func Run(ctx context.Context, s *schema.Schema, dbType, dsn string, defaultRows, defaultBatchSize, defaultEnumRows int, defaultTruncate bool) error { +func Run(ctx context.Context, s *schema.Schema, dbType, dsn string, defaultRows, defaultBatchSize, defaultEnumRows int, defaultTruncate bool, defaultSelfRefDepth ...int) error { g := graph.Build(s) sortedAll, err := g.TopologicalSort() if err != nil { @@ -84,7 +85,7 @@ func Run(ctx context.Context, s *schema.Schema, dbType, dsn string, defaultRows, dbType: dbType, dsn: dsn, picker: newTablePicker(items, 24), - config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, defaultTruncate), + config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, defaultTruncate, defaultSelfRefDepth...), height: 24, width: 80, } @@ -247,15 +248,16 @@ func (m Model) updateReview(msg tea.Msg) (tea.Model, tea.Cmd) { } if m.review.done { params := &seedParams{ - schema: m.schema, - tables: m.review.tables, - rows: m.review.rows, - enumRows: m.review.enumRows, - tableRows: m.review.tableRows, - batchSize: m.review.batch, - truncate: m.review.truncate, - dbType: m.dbType, - dsn: m.dsn, + schema: m.schema, + tables: m.review.tables, + rows: m.review.rows, + enumRows: m.review.enumRows, + selfRefDepth: m.config.SelfRefDepth(), + tableRows: m.review.tableRows, + batchSize: m.review.batch, + truncate: m.review.truncate, + dbType: m.dbType, + dsn: m.dsn, } m.execute = newExecute(len(m.review.tables), m.review.dryRun) diff --git a/internal/tui/wizard_test.go b/internal/tui/wizard_test.go index 8e81048..d4b54cf 100644 --- a/internal/tui/wizard_test.go +++ b/internal/tui/wizard_test.go @@ -38,6 +38,27 @@ func buildTestModel() Model { } } +func TestStartDryRunHandlesHardSelfReference(t *testing.T) { + params := &seedParams{ + schema: hardSelfReferenceTUISchema(), + tables: []string{"employees"}, + rows: 3, + selfRefDepth: 2, + dbType: "pgx", + } + msg := startDryRun(params)() + done, ok := msg.(dryRunDoneMsg) + if !ok { + t.Fatalf("msg type = %T, want dryRunDoneMsg", msg) + } + if done.err != nil { + t.Fatalf("startDryRun: %v", done.err) + } + if done.total != 3 { + t.Fatalf("total = %d, want 3", done.total) + } +} + func sendKey(m tea.Model, key string) tea.Model { var msg tea.Msg switch key { diff --git a/internal/web/handlers_api_test.go b/internal/web/handlers_api_test.go index cfad470..fa81df8 100644 --- a/internal/web/handlers_api_test.go +++ b/internal/web/handlers_api_test.go @@ -110,6 +110,24 @@ func TestBuildGraphPayload_cycle(t *testing.T) { } } +func TestBuildGraphPayload_hardSelfReferenceIsSeedable(t *testing.T) { + sc := &schema.Schema{ + Tables: map[string]schema.Table{ + "employees": {Columns: map[string]schema.Column{ + "id": {PK: true, Type: "int"}, + "manager_id": {Type: "int", FK: "employees.id"}, + }}, + }, + } + payload := buildGraphPayload(sc, nil) + if payload.Cycle { + t.Fatalf("hard self-reference should be handled during generation, got cycle") + } + if !reflect.DeepEqual(payload.Order, []string{"employees"}) { + t.Fatalf("order = %v, want [employees]", payload.Order) + } +} + func TestHandleTablePreviewJSON_requiresSession(t *testing.T) { s, err := New(Options{Addr: "127.0.0.1:0"}) if err != nil { diff --git a/internal/web/runners.go b/internal/web/runners.go index c32e3b4..2b6e2dd 100644 --- a/internal/web/runners.go +++ b/internal/web/runners.go @@ -40,14 +40,15 @@ func jobLogger(w io.Writer) zerolog.Logger { // restricts seeding to the listed tables plus their transitive non-nullable // FK parents. type SeedRequest struct { - Rows int `json:"rows"` - EnumRows int `json:"enumRows"` - BatchSize int `json:"batchSize"` - DisableFK bool `json:"disableFK"` - Truncate bool `json:"truncate"` - DryRun bool `json:"dryRun"` - Tables []string `json:"tables,omitempty"` - TableRows map[string]int `json:"tableRows,omitempty"` + Rows int `json:"rows"` + EnumRows int `json:"enumRows"` + BatchSize int `json:"batchSize"` + SelfRefDepth *int `json:"selfRefDepth,omitempty"` + DisableFK bool `json:"disableFK"` + Truncate bool `json:"truncate"` + DryRun bool `json:"dryRun"` + Tables []string `json:"tables,omitempty"` + TableRows map[string]int `json:"tableRows,omitempty"` } func (s *Server) runSeed(ctx context.Context, sess *Session, req SeedRequest, jc JobControl) (map[string]any, error) { @@ -121,7 +122,9 @@ func (s *Server) runSeed(ctx context.Context, sess *Session, req SeedRequest, jc } // GenerateFiltered preloads PKs from allSorted so target tables can FK-ref // already-populated parents; targetTables alone is what gets generated. - data, err := faker.GenerateFilteredWithCounts(sc, allSorted, targetTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), connArg, sess.DBType) + data, err := faker.GenerateFilteredWithOptions(sc, allSorted, targetTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), connArg, sess.DBType, faker.GenerateOptions{ + SelfRefDepth: requestSelfRefDepth(req.SelfRefDepth), + }) if err != nil { return nil, fmt.Errorf("generation: %w", err) } @@ -183,13 +186,14 @@ func (s *Server) runSeed(ctx context.Context, sess *Session, req SeedRequest, jc // GapsRequest mirrors the gaps CLI flags. Tables, when set, restricts the // fill phase to the listed empty tables (plus their transitive parents). type GapsRequest struct { - Rows int `json:"rows"` - EnumRows int `json:"enumRows"` - BatchSize int `json:"batchSize"` - Fill bool `json:"fill"` - DryRun bool `json:"dryRun"` - Tables []string `json:"tables,omitempty"` - TableRows map[string]int `json:"tableRows,omitempty"` + Rows int `json:"rows"` + EnumRows int `json:"enumRows"` + BatchSize int `json:"batchSize"` + SelfRefDepth *int `json:"selfRefDepth,omitempty"` + Fill bool `json:"fill"` + DryRun bool `json:"dryRun"` + Tables []string `json:"tables,omitempty"` + TableRows map[string]int `json:"tableRows,omitempty"` } func (s *Server) runGaps(ctx context.Context, sess *Session, req GapsRequest, jc JobControl) (map[string]any, error) { @@ -260,7 +264,9 @@ func (s *Server) runGaps(ctx context.Context, sess *Session, req GapsRequest, jc jc.Phase("generate") log.Info().Int("gap_tables", len(gapTables)).Int("rows", req.Rows).Msg("Generating data for empty tables") - data, err := faker.GenerateFilteredWithCounts(sc, allSorted, gapTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), conn, sess.DBType) + data, err := faker.GenerateFilteredWithOptions(sc, allSorted, gapTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), conn, sess.DBType, faker.GenerateOptions{ + SelfRefDepth: requestSelfRefDepth(req.SelfRefDepth), + }) if err != nil { return nil, err } @@ -294,10 +300,11 @@ func (s *Server) runGaps(ctx context.Context, sess *Session, req GapsRequest, jc // GenerateRequest mirrors the generate CLI flags. Tables, when set, restricts // generation to the listed tables plus their transitive non-nullable parents. type GenerateRequest struct { - Rows int `json:"rows"` - Format string `json:"format"` // yaml | json | sql - Tables []string `json:"tables,omitempty"` - TableRows map[string]int `json:"tableRows,omitempty"` + Rows int `json:"rows"` + SelfRefDepth *int `json:"selfRefDepth,omitempty"` + Format string `json:"format"` // yaml | json | sql + Tables []string `json:"tables,omitempty"` + TableRows map[string]int `json:"tableRows,omitempty"` } func (s *Server) runGenerate(ctx context.Context, sess *Session, req GenerateRequest, jc JobControl) (map[string]any, error) { @@ -331,7 +338,9 @@ func (s *Server) runGenerate(ctx context.Context, sess *Session, req GenerateReq jc.Phase("generate") log.Info().Int("rows", req.Rows).Int("tables", len(targetTables)).Msg("Generating fake data") // GenerateFiltered is fine here too: with conn=nil it skips PK preload. - data, err := faker.GenerateFilteredWithCounts(sc, allSorted, targetTables, req.Rows, 0, cleanTableRows(req.TableRows), nil, sess.DBType) + data, err := faker.GenerateFilteredWithOptions(sc, allSorted, targetTables, req.Rows, 0, cleanTableRows(req.TableRows), nil, sess.DBType, faker.GenerateOptions{ + SelfRefDepth: requestSelfRefDepth(req.SelfRefDepth), + }) if err != nil { return nil, err } @@ -379,6 +388,13 @@ func cleanTableRows(rows map[string]int) map[string]int { return clean } +func requestSelfRefDepth(depth *int) int { + if depth == nil { + return faker.DefaultSelfRefDepth + } + return *depth +} + func encodeData(data map[string][]map[string]any, sortedTables []string, format, dbType string) (string, error) { switch strings.ToLower(format) { case "json": diff --git a/internal/web/runners_test.go b/internal/web/runners_test.go index f73c072..6a70c99 100644 --- a/internal/web/runners_test.go +++ b/internal/web/runners_test.go @@ -3,6 +3,7 @@ package web import ( "context" "reflect" + "strings" "testing" "github.com/AxeForging/seedstorm/internal/schema" @@ -123,6 +124,72 @@ func TestRunGenerateAppliesTableRowOverridesWithoutBreakingDefaults(t *testing.T } } +func TestRunSeedDryRunHandlesHardSelfReference(t *testing.T) { + srv, err := New(Options{Addr: "127.0.0.1:0"}) + if err != nil { + t.Fatalf("New: %v", err) + } + sess := &Session{ + DBType: "pgx", + schema: hardSelfReferenceSchema(), + } + + depth := 2 + result, err := srv.runSeed(context.Background(), sess, SeedRequest{ + Rows: 3, + BatchSize: 100, + SelfRefDepth: &depth, + DryRun: true, + }, testJobControl{}) + if err != nil { + t.Fatalf("runSeed: %v", err) + } + if got := result["totalRows"]; got != 3 { + t.Fatalf("totalRows = %v, want 3", got) + } + output, _ := result["output"].(string) + if output == "" || !containsAll(output, "employees", "manager_id") { + t.Fatalf("dry-run output should include self-referential insert SQL, got %q", output) + } +} + +func TestRunGenerateHandlesHardSelfReference(t *testing.T) { + srv, err := New(Options{Addr: "127.0.0.1:0"}) + if err != nil { + t.Fatalf("New: %v", err) + } + sess := &Session{ + DBType: "pgx", + schema: hardSelfReferenceSchema(), + } + + depth := 2 + result, err := srv.runGenerate(context.Background(), sess, GenerateRequest{ + Rows: 3, + SelfRefDepth: &depth, + Format: "yaml", + }, testJobControl{}) + if err != nil { + t.Fatalf("runGenerate: %v", err) + } + if got := result["totalRows"]; got != 3 { + t.Fatalf("totalRows = %v, want 3", got) + } + output, _ := result["output"].(string) + if output == "" || !containsAll(output, "employees", "manager_id") { + t.Fatalf("generated output should include self-referential values, got %q", output) + } +} + +func containsAll(value string, parts ...string) bool { + for _, part := range parts { + if !strings.Contains(value, part) { + return false + } + } + return true +} + func runnerRowCountSchema() *schema.Schema { return &schema.Schema{ Tables: map[string]schema.Table{ @@ -141,3 +208,16 @@ func runnerRowCountSchema() *schema.Schema { }, } } + +func hardSelfReferenceSchema() *schema.Schema { + return &schema.Schema{ + Tables: map[string]schema.Table{ + "employees": { + Columns: map[string]schema.Column{ + "id": {Type: "integer", PK: true}, + "manager_id": {Type: "integer", FK: "employees.id"}, + }, + }, + }, + } +} diff --git a/internal/web/static/app.js b/internal/web/static/app.js index c4ce693..53568d8 100644 --- a/internal/web/static/app.js +++ b/internal/web/static/app.js @@ -1564,6 +1564,7 @@ rows: Number(document.getElementById("cfg-rows").value || 0), enumRows: Number(document.getElementById("cfg-enum").value || 0), batchSize: Number(document.getElementById("cfg-batch").value || 0), + selfRefDepth: Number(document.getElementById("cfg-selfref-depth").value || 0), truncate: document.getElementById("cfg-truncate").checked, dryRun: document.getElementById("cfg-dryrun").checked, disableFK: document.getElementById("cfg-disablefk").checked, diff --git a/internal/web/templates/workspace.html.tmpl b/internal/web/templates/workspace.html.tmpl index db9ab0d..f3eb383 100644 --- a/internal/web/templates/workspace.html.tmpl +++ b/internal/web/templates/workspace.html.tmpl @@ -132,6 +132,10 @@ Enum/value +
Options