diff --git a/EXAMPLES.md b/EXAMPLES.md
index 2eb85f4..d38e199 100644
--- a/EXAMPLES.md
+++ b/EXAMPLES.md
@@ -19,7 +19,7 @@ End-to-end walkthroughs for common seedstorm workflows. All examples assume loca
## 1. Basic Seeding (no AI)
-Demo — introspect + seed a 28-table schema
+Demo — introspect + seed a 29-table schema
@@ -55,7 +55,7 @@ seedstorm seed \
14:12:15 INFO Seeding table table=users rows=150
14:12:15 INFO Seeding table table=companies rows=50
...
-14:12:16 INFO Seeding complete tables=28 total_rows=1515 duration=316ms
+14:12:16 INFO Seeding complete tables=29 total_rows=1540 duration=316ms
```
@@ -398,7 +398,7 @@ Select which tables to seed. Tables are shown in FK-safe order with their depend
[ ] employees → departments
[✓] wishlists → users
- 7 of 28 tables selected
+ 7 of 29 tables selected
↑/↓ navigate • space toggle • a all • n none • enter confirm • q quit
```
@@ -426,6 +426,7 @@ Set seeding parameters. Tab between fields, space to toggle the truncate checkbo
▸ Rows per table: [50]
Batch size: [100]
Enum rows (0 = use rows): [0]
+ Self-ref depth: [2]
[ ] Truncate before seeding
tab/↑↓ navigate • space toggle • enter confirm • b back • q quit
@@ -436,6 +437,7 @@ Set seeding parameters. Tab between fields, space to toggle the truncate checkbo
| Rows per table | How many rows to generate for each selected table |
| Batch size | Rows per INSERT statement (higher = faster, default 100) |
| Enum rows | Rows per enum value for enum tables (0 = use rows count) |
+| Self-ref depth | Maximum generated depth for self-referential FK chains |
| Truncate | Delete all existing data before seeding (shows warning in review) |
@@ -598,7 +600,7 @@ A 3-step wizard: **Tables → Config → Generate**
**Step 1 — Table picker:** Same as `seed -i` — select which tables to include.
-**Step 2 — Config:** Set rows, choose format (yaml/json/sql with `←`/`→`), and optionally set an output file path.
+**Step 2 — Config:** Set rows, self-reference depth, choose format (yaml/json/sql with `←`/`→`), and optionally set an output file path.
```
seedstorm generate interactive ✓ Tables ● Config ○ Generate
@@ -606,6 +608,7 @@ A 3-step wizard: **Tables → Config → Generate**
Configure generation
▸ Rows per table: [10]
+ Self-ref depth: [2]
Format: [yaml] json sql
Output file: [data.json]
diff --git a/Makefile b/Makefile
index a4f03ef..74c4a73 100644
--- a/Makefile
+++ b/Makefile
@@ -50,7 +50,18 @@ test:
test-integration: dev-up
@echo "Waiting for databases to be healthy..."
- @docker compose wait mysql postgres 2>/dev/null || sleep 10
+ @for i in $$(seq 1 60); do \
+ pg=$$(docker inspect -f '{{.State.Health.Status}}' seedstorm-postgres-1 2>/dev/null || true); \
+ my=$$(docker inspect -f '{{.State.Health.Status}}' seedstorm-mysql-1 2>/dev/null || true); \
+ if [ "$$pg" = "healthy" ] && [ "$$my" = "healthy" ]; then \
+ break; \
+ fi; \
+ if [ "$$i" = "60" ]; then \
+ docker compose ps; \
+ exit 1; \
+ fi; \
+ sleep 1; \
+ done
cd integration && go test -v -tags integration -count=1 ./... -timeout 300s
lint:
diff --git a/README.md b/README.md
index 6da69b9..8fd0871 100644
--- a/README.md
+++ b/README.md
@@ -67,14 +67,14 @@ seedstorm gaps \
## Features
- **Schema self-discovery** — introspects tables, columns, PKs, FKs, enum values, UNIQUE and CHECK constraints; no manual editing required
-- **FK-aware seeding** — topological sort guarantees parent tables are seeded before children; handles self-referential FKs, near-cycles, junction tables, and deep multi-level chains
+- **FK-aware seeding** — topological sort guarantees parent tables are seeded before children; handles nullable and non-nullable self-referential FKs with bounded depth, near-cycles, junction tables, and deep multi-level chains
- **Constraint-aware faker mapping** — UNIQUE → `uuid`, CHECK IN → `randomstring(a,b,c)`, CHECK range → `number(min,max)`; seed data always satisfies your constraints
- **Semantic faker** — maps column names (`email`, `first_name`, `price`, `city`…) to realistic `gofakeit` generators automatically
- **Enum coverage** — every enum value appears at least `--rows` times, independently per column
- **AI enrichment** — Gemini rewrites faker hints for domain-meaningful data; supply `--prompt` for richer context
- **Gap analysis** — `gaps` shows which tables are empty with row counts and FK context; `--fill` seeds only the empty ones
-- **Interactive TUI** — wizard for table selection, global config, per-table row volumes, and review before seeding
-- **Web UI** — `seedstorm serve` exposes an interactive graph workspace with click-to-select tables, per-table row overrides, live SSE job logs, multi-DB session switcher, and connection presets in `localStorage`
+- **Interactive TUI** — wizard for table selection, global config, self-reference depth, per-table row volumes, and review before seeding
+- **Web UI** — `seedstorm serve` exposes an interactive graph workspace with click-to-select tables, self-reference depth, per-table row overrides, live SSE job logs, multi-DB session switcher, and connection presets in `localStorage`
- **Dry-run** — preview the seed plan and INSERT SQL without touching the database
- **Export** — generate fake data as YAML, JSON, or SQL without a live connection
diff --git a/docs/commands.md b/docs/commands.md
index e936ad5..99c2240 100644
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -108,6 +108,22 @@ seedstorm seed \
--schema schema.yaml \
--enum-rows 10
+# Bound generated self-referential chains to 2 levels
+seedstorm seed \
+ --db postgres \
+ --dsn "postgres://..." \
+ --schema schema.yaml \
+ --self-ref-depth 2
+
+# Override specific table volumes from a scripted run
+seedstorm seed \
+ --db postgres \
+ --dsn "postgres://..." \
+ --schema schema.yaml \
+ --rows 20 \
+ --table-rows users=200,orders=500 \
+ --table-rows order_items=1000
+
# Interactive TUI — pick tables, configure options, review, then seed
seedstorm seed \
--db postgres \
@@ -126,7 +142,9 @@ The interactive TUI includes a **Volumes** step after global config. Each select
| `--db` / `$SEEDSTORM_DB` | `postgres` | Database type |
| `--dsn` / `$SEEDSTORM_DSN` | — | Connection string (required) |
| `--rows` / `-r` | `100` | Rows per table |
+| `--table-rows` | — | Per-table row override, repeatable or comma-separated (`table=rows`) |
| `--enum-rows` | `0` | Rows per enum value (0 = use `--rows`) |
+| `--self-ref-depth` | `2` | Maximum generated depth for self-referential FK chains |
| `--disable-fk` | false | Skip FK ordering |
| `--dry-run` / `-n` | false | Print seed plan + SQL, do not execute |
| `--truncate` | false | Truncate all tables before seeding (prompts for confirmation) |
@@ -196,7 +214,9 @@ Gap Analysis
| `--db` / `$SEEDSTORM_DB` | `postgres` | Database type |
| `--dsn` / `$SEEDSTORM_DSN` | — | Connection string (required) |
| `--rows` / `-r` | `100` | Rows per empty table (when `--fill` is set) |
+| `--table-rows` | — | Per-table row override for fill, repeatable or comma-separated (`table=rows`) |
| `--enum-rows` | `0` | Rows per enum value for empty enum tables (0 = use `--rows`) |
+| `--self-ref-depth` | `2` | Maximum generated depth for self-referential FK chains |
| `--fill` | false | Seed all empty tables |
| `--dry-run` / `-n` | false | Print SQL without executing (requires `--fill`) |
| `--yes` / `-y` | false | Skip confirmation prompt |
@@ -213,6 +233,8 @@ Generates fake data without connecting to a database. Outputs YAML, JSON, or SQL
seedstorm generate --schema schema.yaml --rows 10 --format json --out data.json
seedstorm generate --schema schema.yaml --rows 5 --format sql --db postgres
seedstorm generate --schema schema.yaml --rows 20 --format yaml
+seedstorm generate --schema schema.yaml --rows 20 --self-ref-depth 3
+seedstorm generate --schema schema.yaml --rows 20 --table-rows users=200,orders=500
# Interactive TUI
seedstorm generate --schema schema.yaml --interactive
@@ -226,6 +248,8 @@ In interactive mode, the **Volumes** step can override row counts per selected t
|------|---------|-------------|
| `--schema` / `-s` | `schema.yaml` | Schema file |
| `--rows` / `-r` | `100` | Rows per table |
+| `--table-rows` | — | Per-table row override, repeatable or comma-separated (`table=rows`) |
+| `--self-ref-depth` | `2` | Maximum generated depth for self-referential FK chains |
| `--format` / `-f` | `yaml` | Output format: `yaml`, `json`, `sql` |
| `--out` / `-o` | stdout | Output file (omit for stdout) |
| `--db` | `postgres` | DB type (affects SQL placeholder style) |
@@ -259,7 +283,7 @@ SEEDSTORM_ADDR=127.0.0.1:9000 seedstorm serve
What the UI gives you:
-- **Workspace** — Cytoscape DAG of every table; click to select, non-nullable parents auto-lock as a dependency closure (mirrors the TUI). The selected-table panel lets you override row counts per table for **Seed**, **Fill empty**, and workspace **Generate** runs while `Rows` remains the default. Live SSE log stream + status pill.
+- **Workspace** — Cytoscape DAG of every table; click to select, non-nullable parents auto-lock as a dependency closure (mirrors the TUI). The selected-table panel lets you override row counts per table for **Seed**, **Fill empty**, and workspace **Generate** runs while `Rows` remains the default. `Self-ref` controls bounded generated depth for self-referential FK chains. Live SSE log stream + status pill.
- **Connection management** — multi-session: hold several DBs open in one browser and switch from a topbar dropdown. Saved connection presets in `localStorage` with optional password (eye-icon reveal, closed by default). Passwords are kept in process memory only on the server.
- **Standalone tools** — `/generate`, `/enrich`, `/export` mirror the CLI commands as forms.
diff --git a/docs/development.md b/docs/development.md
index c6dd63d..9fb5a07 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -62,12 +62,13 @@ go test ./... -v
### Integration tests
-Integration tests run the full pipeline against a 28-table real-world schema on both MySQL and PostgreSQL, covering:
+Integration tests run the full pipeline against a 29-table real-world schema on both MySQL and PostgreSQL, covering:
| Edge case | Tables |
|-----------|--------|
| Self-referential FK | `categories`, `departments`, `employees` |
| Near-cycle (nullable FK breaks it) | `departments.head_employee_id ↔ employees.department_id` |
+| Hard self-reference | `hard_self_employees.manager_id → hard_self_employees.id` |
| Deep FK chain (5 levels) | `return_requests → order_items → orders → users` |
| Many-to-many junctions | `product_tags`, `project_assignments`, `wishlist_items` |
| Multiple enums per table | `support_tickets` (status + priority) |
@@ -78,8 +79,8 @@ Integration tests run the full pipeline against a 28-table real-world schema on
| CHECK range constraint → `number(min,max)` faker | `products.rating` (1–5) |
Tests verify:
-- All 28 tables receive exactly the requested number of rows
-- 38 FK relationships have zero orphans
+- All 29 tables receive rows, with enum-coverage tables allowed to exceed the base request so every enum value is represented
+- 39 FK relationships have zero orphans, including nullable and non-nullable self-references
- 6 value constraints hold (ratings 1–5, prices > 0, quantities ≥ 1, salaries > 0)
- Enum values, UNIQUE columns, and CHECK constraints are auto-detected correctly
@@ -100,7 +101,7 @@ Expected output:
brands 25 rows
...
audit_logs 25 rows
- Total: 700 rows across 28 tables (4.43s)
+ Total: 1600+ rows across 29 tables (11.30s)
--- PASS: TestPostgresIntegration (6.87s)
```
@@ -117,7 +118,7 @@ All tests run automatically on every PR via GitHub Actions (`.github/workflows/p
| `validate` | Directory/file structure via structlint |
| `test` | `go test ./...` + `make build` |
| `lint` | `golangci-lint` |
-| `integration` | Full 28-table suite on Postgres 15 + MySQL 8 |
+| `integration` | Full 29-table suite on Postgres 15 + MySQL 8 |
The integration job in CI uses `--timeout 120s`. Use `300s` locally when running both engines back-to-back.
diff --git a/init/init.mysql.sql b/init/init.mysql.sql
index e806d66..b51ebb3 100644
--- a/init/init.mysql.sql
+++ b/init/init.mysql.sql
@@ -1,4 +1,5 @@
DROP TABLE IF EXISTS order_items;
+DROP TABLE IF EXISTS hard_self_employees;
DROP TABLE IF EXISTS orders;
DROP TABLE IF EXISTS products;
DROP TABLE IF EXISTS users;
@@ -16,6 +17,14 @@ CREATE TABLE products (
price DECIMAL(10, 2) NOT NULL
);
+CREATE TABLE hard_self_employees (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ manager_id INT NOT NULL,
+ name VARCHAR(255) NOT NULL,
+ title VARCHAR(255),
+ FOREIGN KEY (manager_id) REFERENCES hard_self_employees(id)
+);
+
CREATE TABLE orders (
id INT AUTO_INCREMENT PRIMARY KEY,
user_id INT NOT NULL,
@@ -31,4 +40,4 @@ CREATE TABLE order_items (
quantity INT NOT NULL,
FOREIGN KEY (order_id) REFERENCES orders(id),
FOREIGN KEY (product_id) REFERENCES products(id)
-);
\ No newline at end of file
+);
diff --git a/init/init.sql b/init/init.sql
index 3a19cb3..f5d1055 100644
--- a/init/init.sql
+++ b/init/init.sql
@@ -1,4 +1,5 @@
DROP TABLE IF EXISTS order_items;
+DROP TABLE IF EXISTS hard_self_employees;
DROP TABLE IF EXISTS orders;
DROP TYPE IF EXISTS order_status;
CREATE TYPE order_status AS ENUM ('pending', 'processing', 'shipped', 'delivered', 'cancelled');
@@ -16,6 +17,13 @@ CREATE TABLE IF NOT EXISTS products (
price NUMERIC(10, 2) NOT NULL
);
+CREATE TABLE IF NOT EXISTS hard_self_employees (
+ id SERIAL PRIMARY KEY,
+ manager_id INTEGER NOT NULL REFERENCES hard_self_employees(id),
+ name VARCHAR(255) NOT NULL,
+ title VARCHAR(255)
+);
+
CREATE TABLE IF NOT EXISTS orders (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
@@ -28,4 +36,4 @@ CREATE TABLE IF NOT EXISTS order_items (
order_id INTEGER NOT NULL REFERENCES orders(id),
product_id INTEGER NOT NULL REFERENCES products(id),
quantity INTEGER NOT NULL
-);
\ No newline at end of file
+);
diff --git a/integration/integration_test.go b/integration/integration_test.go
index 591bea5..e4bb693 100644
--- a/integration/integration_test.go
+++ b/integration/integration_test.go
@@ -138,6 +138,32 @@ func countRows(t *testing.T, conn *sql.DB, table string) int {
return n
}
+func assertHardSelfRefSeeded(t *testing.T, conn *sql.DB) {
+ t.Helper()
+ var total, nulls, orphans int
+ if err := conn.QueryRowContext(context.Background(), `SELECT COUNT(*) FROM hard_self_employees`).Scan(&total); err != nil {
+ t.Fatalf("hard self-ref count: %v", err)
+ }
+ if total == 0 {
+ t.Fatal("hard_self_employees: expected seeded rows")
+ }
+ if err := conn.QueryRowContext(context.Background(), `SELECT COUNT(*) FROM hard_self_employees WHERE manager_id IS NULL`).Scan(&nulls); err != nil {
+ t.Fatalf("hard self-ref null check: %v", err)
+ }
+ if nulls != 0 {
+ t.Fatalf("hard_self_employees: found %d NULL manager_id values", nulls)
+ }
+ if err := conn.QueryRowContext(context.Background(), `
+ SELECT COUNT(*) FROM hard_self_employees c
+ LEFT JOIN hard_self_employees p ON c.manager_id = p.id
+ WHERE p.id IS NULL`).Scan(&orphans); err != nil {
+ t.Fatalf("hard self-ref FK check: %v", err)
+ }
+ if orphans != 0 {
+ t.Fatalf("hard_self_employees: found %d orphaned manager_id values", orphans)
+ }
+}
+
// buildAndSeed runs the full introspect → build schema → generate → seed pipeline.
// It prints a summary at the end (not per-row during insert).
func buildAndSeed(t *testing.T, label, driver, dsn string, conn *sql.DB) map[string][]map[string]interface{} {
@@ -246,7 +272,7 @@ func TestPostgresIntegration(t *testing.T) {
t.Run("row counts", func(t *testing.T) {
allTables := []string{
// L0
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
// L1
"categories", "addresses", "departments", "warehouses", "wishlists",
// L2
@@ -909,17 +935,18 @@ func TestPostgresIntegration(t *testing.T) {
expected := map[string]int{
// existing
- "addresses": 1, // user_id
- "products": 2, // category_id, brand_id
- "product_tags": 2, // product_id, tag_id
- "orders": 3, // user_id, address_id, coupon_id
- "order_items": 2, // order_id, product_id
- "shipments": 1, // order_id
- "payments": 1, // order_id
- "reviews": 2, // user_id, product_id
- "wishlists": 1, // user_id
- "wishlist_items": 2, // wishlist_id, product_id
- "categories": 1, // parent_id (self-ref)
+ "addresses": 1, // user_id
+ "products": 2, // category_id, brand_id
+ "product_tags": 2, // product_id, tag_id
+ "orders": 3, // user_id, address_id, coupon_id
+ "order_items": 2, // order_id, product_id
+ "shipments": 1, // order_id
+ "payments": 1, // order_id
+ "reviews": 2, // user_id, product_id
+ "wishlists": 1, // user_id
+ "wishlist_items": 2, // wishlist_id, product_id
+ "categories": 1, // parent_id (self-ref)
+ "hard_self_employees": 1, // manager_id (hard self-ref)
// new
"departments": 3, // company_id, parent_dept_id (self-ref), head_employee_id
"employees": 2, // department_id, manager_id (self-ref)
@@ -1050,6 +1077,10 @@ func TestPostgresIntegration(t *testing.T) {
t.Logf("employees root nodes: %d", roots)
})
+ t.Run("self-ref: hard_self_employees has valid non-null managers", func(t *testing.T) {
+ assertHardSelfRefSeeded(t, conn)
+ })
+
// ── Deep chain subtest ──────────────────────────────────────────────────────
t.Run("deep chain: return_requests -> order_items -> orders -> users", func(t *testing.T) {
@@ -1393,7 +1424,7 @@ func TestPostgresIntegration(t *testing.T) {
t.Fatalf("truncate: %v", err)
}
allTables := []string{
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
"categories", "addresses", "departments", "warehouses", "wishlists",
"products", "employees",
"product_tags", "orders", "projects", "inventory", "purchase_orders",
@@ -1414,7 +1445,7 @@ func TestPostgresIntegration(t *testing.T) {
}
buildAndSeed(t, "postgres (post-truncate)", postgresDriver, dsn, conn)
allTables := []string{
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
"categories", "addresses", "departments", "warehouses", "wishlists",
"products", "employees",
"product_tags", "orders", "projects", "inventory", "purchase_orders",
@@ -1454,7 +1485,7 @@ func TestMySQLIntegration(t *testing.T) {
t.Run("row counts", func(t *testing.T) {
allTables := []string{
// L0
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
// L1
"categories", "addresses", "departments", "warehouses", "wishlists",
// L2
@@ -2116,17 +2147,18 @@ func TestMySQLIntegration(t *testing.T) {
expected := map[string]int{
// existing
- "addresses": 1, // user_id
- "products": 2, // category_id, brand_id
- "product_tags": 2, // product_id, tag_id
- "orders": 3, // user_id, address_id, coupon_id
- "order_items": 2, // order_id, product_id
- "shipments": 1, // order_id
- "payments": 1, // order_id
- "reviews": 2, // user_id, product_id
- "wishlists": 1, // user_id
- "wishlist_items": 2, // wishlist_id, product_id
- "categories": 1, // parent_id (self-ref)
+ "addresses": 1, // user_id
+ "products": 2, // category_id, brand_id
+ "product_tags": 2, // product_id, tag_id
+ "orders": 3, // user_id, address_id, coupon_id
+ "order_items": 2, // order_id, product_id
+ "shipments": 1, // order_id
+ "payments": 1, // order_id
+ "reviews": 2, // user_id, product_id
+ "wishlists": 1, // user_id
+ "wishlist_items": 2, // wishlist_id, product_id
+ "categories": 1, // parent_id (self-ref)
+ "hard_self_employees": 1, // manager_id (hard self-ref)
// new
"departments": 3, // company_id, parent_dept_id (self-ref), head_employee_id
"employees": 2, // department_id, manager_id (self-ref)
@@ -2257,6 +2289,10 @@ func TestMySQLIntegration(t *testing.T) {
t.Logf("employees root nodes: %d", roots)
})
+ t.Run("self-ref: hard_self_employees has valid non-null managers", func(t *testing.T) {
+ assertHardSelfRefSeeded(t, conn)
+ })
+
// ── Deep chain subtest ──────────────────────────────────────────────────────
t.Run("deep chain: return_requests -> order_items -> orders -> users", func(t *testing.T) {
@@ -2590,7 +2626,7 @@ func TestMySQLIntegration(t *testing.T) {
t.Fatalf("truncate: %v", err)
}
allTables := []string{
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
"categories", "addresses", "departments", "warehouses", "wishlists",
"products", "employees",
"product_tags", "orders", "projects", "inventory", "purchase_orders",
@@ -2611,7 +2647,7 @@ func TestMySQLIntegration(t *testing.T) {
}
buildAndSeed(t, "mysql (post-truncate)", mysqlDriver, dsn, conn)
allTables := []string{
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
"categories", "addresses", "departments", "warehouses", "wishlists",
"products", "employees",
"product_tags", "orders", "projects", "inventory", "purchase_orders",
@@ -2642,12 +2678,12 @@ func TestMySQLIntegration(t *testing.T) {
// 5. Idempotent fill: running gap fill a second time when all tables already
// have rows adds nothing (no gaps found → no generation).
-// gapL0Tables are the root (no FK parents) tables in the 28-table test schema.
-var gapL0Tables = []string{"brands", "tags", "users", "coupons", "companies", "suppliers"}
+// gapL0Tables are the root (no FK parents) tables in the 29-table test schema.
+var gapL0Tables = []string{"brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees"}
// gapAllTables lists every table in the test schema (used for count assertions).
var gapAllTables = []string{
- "brands", "tags", "users", "coupons", "companies", "suppliers",
+ "brands", "tags", "users", "coupons", "companies", "suppliers", "hard_self_employees",
"categories", "addresses", "departments", "warehouses", "wishlists",
"products", "employees",
"product_tags", "orders", "projects", "inventory", "purchase_orders",
diff --git a/integration/schema_mysql.sql b/integration/schema_mysql.sql
index 0ac1a89..3d3caa2 100644
--- a/integration/schema_mysql.sql
+++ b/integration/schema_mysql.sql
@@ -2,6 +2,7 @@
SET FOREIGN_KEY_CHECKS = 0;
DROP TABLE IF EXISTS return_requests;
DROP TABLE IF EXISTS audit_logs;
+DROP TABLE IF EXISTS hard_self_employees;
DROP TABLE IF EXISTS support_tickets;
DROP TABLE IF EXISTS project_assignments;
DROP TABLE IF EXISTS purchase_order_items;
@@ -92,6 +93,14 @@ CREATE TABLE suppliers (
rating DECIMAL(3,2)
);
+CREATE TABLE hard_self_employees (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ manager_id INT NOT NULL,
+ name VARCHAR(100) NOT NULL,
+ title VARCHAR(100),
+ FOREIGN KEY (manager_id) REFERENCES hard_self_employees(id)
+);
+
-- Level 1: FK to level 0
CREATE TABLE addresses (
id INT AUTO_INCREMENT PRIMARY KEY,
diff --git a/integration/schema_postgres.sql b/integration/schema_postgres.sql
index 29d8e3e..384a61f 100644
--- a/integration/schema_postgres.sql
+++ b/integration/schema_postgres.sql
@@ -1,6 +1,7 @@
-- Teardown (always safe to re-run)
DROP TABLE IF EXISTS return_requests CASCADE;
DROP TABLE IF EXISTS audit_logs CASCADE;
+DROP TABLE IF EXISTS hard_self_employees CASCADE;
DROP TABLE IF EXISTS support_tickets CASCADE;
DROP TABLE IF EXISTS project_assignments CASCADE;
DROP TABLE IF EXISTS purchase_order_items CASCADE;
@@ -120,6 +121,13 @@ CREATE TABLE suppliers (
rating NUMERIC(3,2)
);
+CREATE TABLE hard_self_employees (
+ id SERIAL PRIMARY KEY,
+ manager_id INTEGER NOT NULL REFERENCES hard_self_employees(id),
+ name VARCHAR(100) NOT NULL,
+ title VARCHAR(100)
+);
+
-- Level 1: FK to level 0
CREATE TABLE addresses (
id SERIAL PRIMARY KEY,
diff --git a/internal/cli/gaps.go b/internal/cli/gaps.go
index 098838a..85f86f9 100644
--- a/internal/cli/gaps.go
+++ b/internal/cli/gaps.go
@@ -52,11 +52,20 @@ Use --fill --dry-run to preview the SQL without executing it.`,
Usage: "Rows to insert per empty table (when --fill is set)",
Value: 100,
},
+ &cli.StringSliceFlag{
+ Name: "table-rows",
+ Usage: "Per-table row override for fill, repeatable or comma-separated (table=rows)",
+ },
&cli.IntFlag{
Name: "enum-rows",
Usage: "Rows per enum value for empty tables with enum columns (0 = use --rows)",
Value: 0,
},
+ &cli.IntFlag{
+ Name: "self-ref-depth",
+ Usage: "Maximum generated depth for self-referential FK chains",
+ Value: faker.DefaultSelfRefDepth,
+ },
&cli.BoolFlag{
Name: "fill",
Usage: "Seed all empty tables (populated tables are skipped)",
@@ -88,7 +97,12 @@ Use --fill --dry-run to preview the SQL without executing it.`,
dbType := normalizeDBType(cmd.String("db"))
dsn := cmd.String("dsn")
rows := cmd.Int("rows")
+ tableRows, err := parseTableRows(cmd.StringSlice("table-rows"))
+ if err != nil {
+ return err
+ }
enumRows := cmd.Int("enum-rows")
+ selfRefDepth := cmd.Int("self-ref-depth")
fill := cmd.Bool("fill")
dryRun := cmd.Bool("dry-run")
yes := cmd.Bool("yes")
@@ -127,7 +141,7 @@ Use --fill --dry-run to preview the SQL without executing it.`,
}
if cmd.Bool("interactive") {
- return tui.RunGaps(ctx, s, dbType, dsn, counts, rows, batchSize, enumRows)
+ return tui.RunGaps(ctx, s, dbType, dsn, counts, rows, batchSize, enumRows, selfRefDepth)
}
// Build FK parents map for display: table → []parent tables.
@@ -176,7 +190,9 @@ Use --fill --dry-run to preview the SQL without executing it.`,
// Generate data for gap tables only; allSorted is used internally to
// preload existing PKs from already-populated parent tables.
- data, err := faker.GenerateFiltered(s, allSorted, gapTables, rows, enumRows, dbConn, dbType)
+ data, err := faker.GenerateFilteredWithOptions(s, allSorted, gapTables, rows, enumRows, tableRows, dbConn, dbType, faker.GenerateOptions{
+ SelfRefDepth: selfRefDepth,
+ })
if err != nil {
return fmt.Errorf("data generation failed: %w", err)
}
diff --git a/internal/cli/generate.go b/internal/cli/generate.go
index 9aadd73..0cbe549 100644
--- a/internal/cli/generate.go
+++ b/internal/cli/generate.go
@@ -36,6 +36,15 @@ func generateCmd() *cli.Command {
Usage: "Rows per table",
Value: 10,
},
+ &cli.StringSliceFlag{
+ Name: "table-rows",
+ Usage: "Per-table row override, repeatable or comma-separated (table=rows)",
+ },
+ &cli.IntFlag{
+ Name: "self-ref-depth",
+ Usage: "Maximum generated depth for self-referential FK chains",
+ Value: faker.DefaultSelfRefDepth,
+ },
&cli.StringFlag{
Name: "format",
Aliases: []string{"f"},
@@ -68,6 +77,11 @@ func generateCmd() *cli.Command {
log := logging.Log
schemaPath := cmd.String("schema")
rows := cmd.Int("rows")
+ tableRows, err := parseTableRows(cmd.StringSlice("table-rows"))
+ if err != nil {
+ return err
+ }
+ selfRefDepth := cmd.Int("self-ref-depth")
format := cmd.String("format")
outPath := cmd.String("out")
dbType := normalizeDBType(cmd.String("db"))
@@ -85,7 +99,7 @@ func generateCmd() *cli.Command {
}
if cmd.Bool("interactive") {
- return tui.RunGenerate(ctx, s, dbType, format, outPath, rows)
+ return tui.RunGenerate(ctx, s, dbType, format, outPath, rows, selfRefDepth)
}
log.Info().Msg("Building dependency graph")
@@ -96,7 +110,9 @@ func generateCmd() *cli.Command {
}
log.Info().Int("rows", rows).Msg("Generating data")
- data, err := faker.Generate(s, sortedTables, rows, 0, nil, dbType)
+ data, err := faker.GenerateFilteredWithOptions(s, sortedTables, sortedTables, rows, 0, tableRows, nil, dbType, faker.GenerateOptions{
+ SelfRefDepth: selfRefDepth,
+ })
if err != nil {
return fmt.Errorf("generation failed: %w", err)
}
diff --git a/internal/cli/seed.go b/internal/cli/seed.go
index 263b785..9d66205 100644
--- a/internal/cli/seed.go
+++ b/internal/cli/seed.go
@@ -51,11 +51,20 @@ Use --dry-run to print SQL statements without executing them.`,
Usage: "Number of rows to insert per table",
Value: 100,
},
+ &cli.StringSliceFlag{
+ Name: "table-rows",
+ Usage: "Per-table row override, repeatable or comma-separated (table=rows)",
+ },
&cli.IntFlag{
Name: "enum-rows",
Usage: "Rows per enum value for tables with enum columns (0 = use --rows)",
Value: 0,
},
+ &cli.IntFlag{
+ Name: "self-ref-depth",
+ Usage: "Maximum generated depth for self-referential FK chains",
+ Value: faker.DefaultSelfRefDepth,
+ },
&cli.BoolFlag{
Name: "disable-fk",
Usage: "Skip FK ordering (seed in arbitrary order)",
@@ -96,7 +105,12 @@ Use --dry-run to print SQL statements without executing them.`,
dbType := normalizeDBType(cmd.String("db"))
dsn := cmd.String("dsn")
rows := cmd.Int("rows")
+ tableRows, err := parseTableRows(cmd.StringSlice("table-rows"))
+ if err != nil {
+ return err
+ }
enumRows := cmd.Int("enum-rows")
+ selfRefDepth := cmd.Int("self-ref-depth")
disableFK := cmd.Bool("disable-fk")
dryRun := cmd.Bool("dry-run")
truncate := cmd.Bool("truncate")
@@ -116,7 +130,7 @@ Use --dry-run to print SQL statements without executing them.`,
}
if cmd.Bool("interactive") {
- return tui.Run(ctx, s, dbType, dsn, rows, batchSize, enumRows, truncate)
+ return tui.Run(ctx, s, dbType, dsn, rows, batchSize, enumRows, truncate, selfRefDepth)
}
// Resolve seed order
@@ -152,7 +166,7 @@ Use --dry-run to print SQL statements without executing them.`,
if dryRun {
log.Info().Msg("Dry-run mode — SQL will be printed, not executed")
- fmt.Print(graph.RenderPlan(s, sortedTables, rows))
+ fmt.Print(graph.RenderPlanWithCounts(s, sortedTables, rows, tableRows))
fmt.Println("--- SQL ---")
}
@@ -176,7 +190,9 @@ Use --dry-run to print SQL statements without executing them.`,
// Generate data
start := time.Now()
log.Info().Int("rows", rows).Msg("Generating fake data")
- data, err := faker.Generate(s, sortedTables, rows, enumRows, dbConn, dbType)
+ data, err := faker.GenerateFilteredWithOptions(s, sortedTables, sortedTables, rows, enumRows, tableRows, dbConn, dbType, faker.GenerateOptions{
+ SelfRefDepth: selfRefDepth,
+ })
if err != nil {
return fmt.Errorf("data generation failed: %w", err)
}
diff --git a/internal/cli/seed_test.go b/internal/cli/seed_test.go
new file mode 100644
index 0000000..2f581f8
--- /dev/null
+++ b/internal/cli/seed_test.go
@@ -0,0 +1,157 @@
+package cli
+
+import (
+ "context"
+ "encoding/json"
+ "os"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestParseTableRows(t *testing.T) {
+ got, err := parseTableRows([]string{"users=2, orders=4", "items=7"})
+ if err != nil {
+ t.Fatalf("parseTableRows: %v", err)
+ }
+ want := map[string]int{"users": 2, "orders": 4, "items": 7}
+ if !reflect.DeepEqual(got, want) {
+ t.Fatalf("parseTableRows = %+v, want %+v", got, want)
+ }
+}
+
+func TestParseTableRowsRejectsInvalidValues(t *testing.T) {
+ for _, value := range [][]string{{"users"}, {"users=0"}, {"users=nope"}, {"=2"}} {
+ if _, err := parseTableRows(value); err == nil {
+ t.Fatalf("parseTableRows(%v) expected error", value)
+ }
+ }
+}
+
+func TestSeedDryRunRejectsHardMultiTableCycleBeforeConnecting(t *testing.T) {
+ schemaPath := writeTempSchema(t, `
+tables:
+ a:
+ columns:
+ id:
+ type: integer
+ pk: true
+ b_id:
+ type: integer
+ fk: b.id
+ b:
+ columns:
+ id:
+ type: integer
+ pk: true
+ a_id:
+ type: integer
+ fk: a.id
+`)
+
+ err := seedCmd().Run(context.Background(), []string{
+ "seed",
+ "--schema", schemaPath,
+ "--dsn", "postgres://invalid/unused",
+ "--dry-run",
+ })
+ if err == nil {
+ t.Fatal("expected hard multi-table cycle to fail before DB connection")
+ }
+ if !strings.Contains(err.Error(), "a") || !strings.Contains(err.Error(), "b") {
+ t.Fatalf("error should name cycle tables, got: %v", err)
+ }
+}
+
+func TestGenerateCommandHandlesHardSelfReference(t *testing.T) {
+ schemaPath := writeTempSchema(t, `
+tables:
+ employees:
+ columns:
+ id:
+ type: integer
+ pk: true
+ manager_id:
+ type: integer
+ fk: employees.id
+`)
+ outPath := filepath.Join(t.TempDir(), "data.json")
+
+ err := generateCmd().Run(context.Background(), []string{
+ "generate",
+ "--schema", schemaPath,
+ "--rows", "3",
+ "--self-ref-depth", "2",
+ "--format", "json",
+ "--out", outPath,
+ })
+ if err != nil {
+ t.Fatalf("generate: %v", err)
+ }
+ out, err := os.ReadFile(outPath)
+ if err != nil {
+ t.Fatalf("read output: %v", err)
+ }
+ if !strings.Contains(string(out), "employees") || !strings.Contains(string(out), "manager_id") {
+ t.Fatalf("output should include generated hard self-reference data, got %s", string(out))
+ }
+}
+
+func TestGenerateCommandAppliesTableRows(t *testing.T) {
+ schemaPath := writeTempSchema(t, `
+tables:
+ users:
+ columns:
+ id:
+ type: integer
+ pk: true
+ name:
+ type: varchar
+ faker: name
+ orders:
+ columns:
+ id:
+ type: integer
+ pk: true
+ user_id:
+ type: integer
+ fk: users.id
+`)
+ outPath := filepath.Join(t.TempDir(), "data.json")
+
+ err := generateCmd().Run(context.Background(), []string{
+ "generate",
+ "--schema", schemaPath,
+ "--rows", "2",
+ "--table-rows", "orders=5",
+ "--format", "json",
+ "--out", outPath,
+ })
+ if err != nil {
+ t.Fatalf("generate: %v", err)
+ }
+ out, err := os.ReadFile(outPath)
+ if err != nil {
+ t.Fatalf("read output: %v", err)
+ }
+ var data map[string][]map[string]any
+ if err := json.Unmarshal(out, &data); err != nil {
+ t.Fatalf("json output: %v\n%s", err, string(out))
+ }
+ if got := len(data["users"]); got != 2 {
+ t.Fatalf("users rows = %d, want default 2", got)
+ }
+ if got := len(data["orders"]); got != 5 {
+ t.Fatalf("orders rows = %d, want override 5", got)
+ }
+}
+
+func writeTempSchema(t *testing.T, body string) string {
+ t.Helper()
+ path := filepath.Join(t.TempDir(), "schema.yaml")
+ if err := os.WriteFile(path, []byte(body), 0o644); err != nil {
+ t.Fatalf("write schema: %v", err)
+ }
+ return path
+}
diff --git a/internal/cli/table_rows.go b/internal/cli/table_rows.go
new file mode 100644
index 0000000..34c39ed
--- /dev/null
+++ b/internal/cli/table_rows.go
@@ -0,0 +1,40 @@
+package cli
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+func parseTableRows(values []string) (map[string]int, error) {
+ if len(values) == 0 {
+ return nil, nil
+ }
+ rows := make(map[string]int)
+ for _, value := range values {
+ for _, part := range strings.Split(value, ",") {
+ part = strings.TrimSpace(part)
+ if part == "" {
+ continue
+ }
+ name, countText, ok := strings.Cut(part, "=")
+ if !ok {
+ return nil, fmt.Errorf("invalid table row override %q, expected table=rows", part)
+ }
+ name = strings.TrimSpace(name)
+ countText = strings.TrimSpace(countText)
+ if name == "" || countText == "" {
+ return nil, fmt.Errorf("invalid table row override %q, expected table=rows", part)
+ }
+ count, err := strconv.Atoi(countText)
+ if err != nil || count < 1 {
+ return nil, fmt.Errorf("invalid row count for %s: %q", name, countText)
+ }
+ rows[name] = count
+ }
+ }
+ if len(rows) == 0 {
+ return nil, nil
+ }
+ return rows, nil
+}
diff --git a/internal/faker/faker.go b/internal/faker/faker.go
index 82726a4..286c981 100644
--- a/internal/faker/faker.go
+++ b/internal/faker/faker.go
@@ -15,6 +15,23 @@ import (
"github.com/brianvoe/gofakeit/v6"
)
+const DefaultSelfRefDepth = 2
+
+type GenerateOptions struct {
+ SelfRefDepth int
+}
+
+func DefaultGenerateOptions() GenerateOptions {
+ return GenerateOptions{SelfRefDepth: DefaultSelfRefDepth}
+}
+
+func normalizeOptions(opts GenerateOptions) GenerateOptions {
+ if opts.SelfRefDepth < 0 {
+ opts.SelfRefDepth = 0
+ }
+ return opts
+}
+
// Generate produces fake data rows for each table, respecting FK ordering.
// If conn is non-nil, existing PKs are read so FKs can reference them.
// dbType is the driver name ("pgx" or "mysql") used to quote SQL identifiers.
@@ -22,6 +39,10 @@ func Generate(s *schema.Schema, sortedTables []string, rows, enumRows int, conn
return GenerateFiltered(s, sortedTables, sortedTables, rows, enumRows, conn, dbType)
}
+func GenerateWithOptions(s *schema.Schema, sortedTables []string, rows, enumRows int, conn *sql.DB, dbType string, opts GenerateOptions) (map[string][]map[string]interface{}, error) {
+ return GenerateFilteredWithOptions(s, sortedTables, sortedTables, rows, enumRows, nil, conn, dbType, opts)
+}
+
// GenerateFiltered is like Generate but separates the two roles of sortedTables:
// - allTables: the full set of tables used to pre-load existing PKs from the
// database (so FK columns in targetTables can reference already-populated
@@ -38,6 +59,13 @@ func GenerateFiltered(s *schema.Schema, allTables, targetTables []string, rows,
// GenerateFilteredWithCounts is like GenerateFiltered, but tableRows can
// override the default row count for individual target tables.
func GenerateFilteredWithCounts(s *schema.Schema, allTables, targetTables []string, rows, enumRows int, tableRows map[string]int, conn *sql.DB, dbType string) (map[string][]map[string]interface{}, error) {
+ return GenerateFilteredWithOptions(s, allTables, targetTables, rows, enumRows, tableRows, conn, dbType, DefaultGenerateOptions())
+}
+
+// GenerateFilteredWithOptions is like GenerateFilteredWithCounts, with
+// generation guardrails for recursive/self-referential relationships.
+func GenerateFilteredWithOptions(s *schema.Schema, allTables, targetTables []string, rows, enumRows int, tableRows map[string]int, conn *sql.DB, dbType string, opts GenerateOptions) (map[string][]map[string]interface{}, error) {
+ opts = normalizeOptions(opts)
data := make(map[string][]map[string]interface{})
generatedPKs := make(map[string][]interface{})
@@ -77,6 +105,9 @@ func GenerateFilteredWithCounts(s *schema.Schema, allTables, targetTables []stri
}
}
}
+ if err := backfillSelfReferences(data[tableName], table, tableName, opts.SelfRefDepth); err != nil {
+ return nil, fmt.Errorf("table %s self-reference backfill: %w", tableName, err)
+ }
}
return data, nil
@@ -336,10 +367,15 @@ func generateValue(col schema.Column, colName, tableName string, generatedPKs ma
fkTable := parts[0]
pks := generatedPKs[fkTable]
if len(pks) == 0 {
- if fkTable == tableName || col.Nullable {
- // Self-referential FK or nullable FK with no parent rows yet:
- // insert NULL. For nullable FKs this handles near-cycles where
- // the parent table is seeded after this one.
+ if fkTable == tableName {
+ // Self-referential FKs are resolved after all rows for the
+ // table have PKs, so the first row can be safely rooted and
+ // non-nullable self-FKs can reference an existing generated PK.
+ return nil, nil
+ }
+ if col.Nullable {
+ // Nullable FK with no parent rows yet: insert NULL. This
+ // handles near-cycles where the parent table is seeded later.
return nil, nil
}
return nil, fmt.Errorf("no PKs available for FK table %s", fkTable)
@@ -369,6 +405,88 @@ func generateValue(col schema.Column, colName, tableName string, generatedPKs ma
return val, nil
}
+func backfillSelfReferences(rows []map[string]interface{}, table schema.Table, tableName string, selfRefDepth int) error {
+ if len(rows) == 0 {
+ return nil
+ }
+ if selfRefDepth < 0 {
+ selfRefDepth = 0
+ }
+
+ colNames := make([]string, 0, len(table.Columns))
+ for colName, col := range table.Columns {
+ if fkTable, _ := splitFK(col.FK); fkTable == tableName {
+ colNames = append(colNames, colName)
+ }
+ }
+ sort.Strings(colNames)
+
+ for _, colName := range colNames {
+ col := table.Columns[colName]
+ _, refCol := splitFK(col.FK)
+ if refCol == "" {
+ continue
+ }
+ if _, ok := table.Columns[refCol]; !ok {
+ return fmt.Errorf("%s references missing column %s", colName, refCol)
+ }
+
+ levels := make([]int, len(rows))
+ for i := range rows {
+ if _, ok := rows[i][refCol]; !ok {
+ return fmt.Errorf("%s references unavailable generated value %s", colName, refCol)
+ }
+ if i == 0 {
+ if col.Nullable {
+ rows[i][colName] = nil
+ } else {
+ rows[i][colName] = rows[i][refCol]
+ }
+ levels[i] = 0
+ continue
+ }
+
+ parentIdx := chooseSelfRefParent(levels, i, selfRefDepth)
+ if parentIdx < 0 {
+ if col.Nullable {
+ rows[i][colName] = nil
+ levels[i] = 0
+ continue
+ }
+ rows[i][colName] = rows[i][refCol]
+ levels[i] = 0
+ continue
+ }
+ rows[i][colName] = rows[parentIdx][refCol]
+ levels[i] = levels[parentIdx] + 1
+ }
+ }
+ return nil
+}
+
+func chooseSelfRefParent(levels []int, rowIdx, maxDepth int) int {
+ if rowIdx <= 0 {
+ return -1
+ }
+ if maxDepth <= 0 {
+ return -1
+ }
+ for i := rowIdx - 1; i >= 0; i-- {
+ if levels[i] < maxDepth {
+ return i
+ }
+ }
+ return -1
+}
+
+func splitFK(fk string) (string, string) {
+ parts := strings.SplitN(fk, ".", 2)
+ if len(parts) != 2 {
+ return "", ""
+ }
+ return parts[0], parts[1]
+}
+
// generatePK returns an appropriate primary key value based on the column's DB type.
// Sequential integers for numeric types, UUIDs for uuid/string types.
func generatePK(colType string, existingCount int) (interface{}, error) {
diff --git a/internal/faker/faker_test.go b/internal/faker/faker_test.go
index c0d73b5..21a3040 100644
--- a/internal/faker/faker_test.go
+++ b/internal/faker/faker_test.go
@@ -653,15 +653,15 @@ func TestGenerateValue_NullableFKWithNoParents(t *testing.T) {
}
}
-func TestGenerateValue_SelfRefFKReturnsNil(t *testing.T) {
- col := schema.Column{Type: "integer", FK: "cats.id"}
+func TestGenerateValue_NullableSelfRefFKReturnsNil(t *testing.T) {
+ col := schema.Column{Type: "integer", FK: "cats.id", Nullable: true}
pks := map[string][]interface{}{} // no PKs yet for self
val, err := generateValue(col, "parent_id", "cats", pks, nil, "")
if err != nil {
t.Fatal(err)
}
if val != nil {
- t.Errorf("self-ref FK with no PKs should be nil, got %v", val)
+ t.Errorf("nullable self-ref FK with no PKs should be nil, got %v", val)
}
}
@@ -834,6 +834,130 @@ func TestGenerateFilteredWithCountsOverrideWinsOverEnumRows(t *testing.T) {
}
}
+func TestGenerateNullableSelfReferenceCreatesRootRow(t *testing.T) {
+ s := &schema.Schema{
+ Tables: map[string]schema.Table{
+ "categories": {
+ Columns: map[string]schema.Column{
+ "id": {Type: "integer", PK: true},
+ "parent_id": {Type: "integer", FK: "categories.id", Nullable: true},
+ },
+ },
+ },
+ }
+
+ data, err := Generate(s, []string{"categories"}, 3, 0, nil, "pgx")
+ if err != nil {
+ t.Fatalf("Generate: %v", err)
+ }
+ if got := len(data["categories"]); got != 3 {
+ t.Fatalf("categories rows = %d, want 3", got)
+ }
+ if data["categories"][0]["parent_id"] != nil {
+ t.Fatalf("first self-referential row should be a NULL root, got %v", data["categories"][0]["parent_id"])
+ }
+ if got := maxSelfRefDepth(data["categories"], "id", "parent_id"); got > DefaultSelfRefDepth {
+ t.Fatalf("self-reference depth = %d, want <= %d", got, DefaultSelfRefDepth)
+ }
+}
+
+func TestGenerateHardSelfReferenceBackfillsValidManagers(t *testing.T) {
+ s := &schema.Schema{
+ Tables: map[string]schema.Table{
+ "employees": {
+ Columns: map[string]schema.Column{
+ "id": {Type: "integer", PK: true},
+ "manager_id": {Type: "integer", FK: "employees.id"},
+ },
+ },
+ },
+ }
+
+ data, err := GenerateWithOptions(s, []string{"employees"}, 5, 0, nil, "pgx", GenerateOptions{SelfRefDepth: 2})
+ if err != nil {
+ t.Fatalf("GenerateWithOptions: %v", err)
+ }
+ rows := data["employees"]
+ if got := len(rows); got != 5 {
+ t.Fatalf("employees rows = %d, want 5", got)
+ }
+ if rows[0]["manager_id"] == nil {
+ t.Fatal("non-nullable self-reference should be backfilled on first row")
+ }
+ if rows[0]["manager_id"] != rows[0]["id"] {
+ t.Fatalf("first hard self-reference should self-root, got manager_id=%v id=%v", rows[0]["manager_id"], rows[0]["id"])
+ }
+ ids := map[interface{}]bool{}
+ for _, row := range rows {
+ ids[row["id"]] = true
+ }
+ for i, row := range rows {
+ if row["manager_id"] == nil {
+ t.Fatalf("row %d manager_id is nil for non-nullable self-FK", i)
+ }
+ if !ids[row["manager_id"]] {
+ t.Fatalf("row %d manager_id=%v does not reference generated employee IDs %v", i, row["manager_id"], ids)
+ }
+ }
+ if got := maxSelfRefDepth(rows, "id", "manager_id"); got > 2 {
+ t.Fatalf("self-reference depth = %d, want <= 2", got)
+ }
+}
+
+func TestGenerateWithOptionsSelfRefDepthZeroDoesNotBuildNullableChain(t *testing.T) {
+ s := &schema.Schema{
+ Tables: map[string]schema.Table{
+ "categories": {
+ Columns: map[string]schema.Column{
+ "id": {Type: "integer", PK: true},
+ "parent_id": {Type: "integer", FK: "categories.id", Nullable: true},
+ },
+ },
+ },
+ }
+
+ data, err := GenerateWithOptions(s, []string{"categories"}, 4, 0, nil, "pgx", GenerateOptions{SelfRefDepth: 0})
+ if err != nil {
+ t.Fatalf("GenerateWithOptions: %v", err)
+ }
+ for i, row := range data["categories"] {
+ if row["parent_id"] != nil {
+ t.Fatalf("row %d parent_id = %v, want nil when self-ref depth is 0", i, row["parent_id"])
+ }
+ }
+}
+
+func maxSelfRefDepth(rows []map[string]interface{}, pkCol, fkCol string) int {
+ byID := make(map[interface{}]map[string]interface{}, len(rows))
+ for _, row := range rows {
+ byID[row[pkCol]] = row
+ }
+
+ maxDepth := 0
+ for _, row := range rows {
+ seen := map[interface{}]bool{}
+ depth := 0
+ current := row
+ for {
+ fk := current[fkCol]
+ if fk == nil || seen[fk] {
+ break
+ }
+ seen[fk] = true
+ next := byID[fk]
+ if next == nil || next[pkCol] == current[pkCol] {
+ break
+ }
+ depth++
+ current = next
+ }
+ if depth > maxDepth {
+ maxDepth = depth
+ }
+ }
+ return maxDepth
+}
+
func TestGenerate_differentSeedsDifferentOutput(t *testing.T) {
s := &schema.Schema{
Tables: map[string]schema.Table{
diff --git a/internal/graph/graph.go b/internal/graph/graph.go
index 9287f0c..37baa38 100644
--- a/internal/graph/graph.go
+++ b/internal/graph/graph.go
@@ -48,7 +48,7 @@ func Build(s *schema.Schema) *Graph {
}
refTable := parts[0]
if refTable == tableName {
- continue // self-reference: skip
+ continue
}
g.edges[refTable] = append(g.edges[refTable], tableName)
g.inDegree[tableName]++
@@ -87,12 +87,27 @@ func (g *Graph) TopologicalSort() ([]string, error) {
}
if len(sorted) != len(g.nodes) {
- return nil, fmt.Errorf("circular FK dependency detected — use --disable-fk to bypass")
+ cycles := make(map[string]bool)
+ for tableName, degree := range inDegree {
+ if degree > 0 {
+ cycles[tableName] = true
+ }
+ }
+ return nil, fmt.Errorf("circular FK dependency detected among %s — make one FK nullable, use deferrable constraints manually, or use --disable-fk to bypass", strings.Join(sortedKeys(cycles), ", "))
}
return sorted, nil
}
+func sortedKeys(values map[string]bool) []string {
+ keys := make([]string, 0, len(values))
+ for key := range values {
+ keys = append(keys, key)
+ }
+ sort.Strings(keys)
+ return keys
+}
+
// Parents returns the tables that `table` has hard (non-nullable) FK dependencies on.
func (g *Graph) Parents(table string) []string {
var parents []string
@@ -119,9 +134,13 @@ func (g *Graph) Children(table string) []string {
// order and, per table, which parent tables it depends on (hard dependencies
// listed first, nullable/optional ones marked with "?").
func RenderPlan(s *schema.Schema, sortedTables []string, rows int) string {
+ return RenderPlanWithCounts(s, sortedTables, rows, nil)
+}
+
+func RenderPlanWithCounts(s *schema.Schema, sortedTables []string, rows int, tableRows map[string]int) string {
var sb strings.Builder
- fmt.Fprintf(&sb, "\n=== Dry Run — Seed Plan (%d tables, %d rows each) ===\n\n", len(sortedTables), rows)
+ fmt.Fprintf(&sb, "\n=== Dry Run — Seed Plan (%d tables, default %d rows) ===\n\n", len(sortedTables), rows)
// Calculate column widths.
numWidth := len(fmt.Sprintf("%d", len(sortedTables)))
@@ -132,8 +151,8 @@ func RenderPlan(s *schema.Schema, sortedTables []string, rows int) string {
}
}
- fmt.Fprintf(&sb, " %-*s %-*s %s\n", numWidth, "#", tableWidth, "Table", "Depends On")
- fmt.Fprintf(&sb, " %s\n", strings.Repeat("─", numWidth+2+tableWidth+2+40))
+ fmt.Fprintf(&sb, " %-*s %-*s %-6s %s\n", numWidth, "#", tableWidth, "Table", "Rows", "Depends On")
+ fmt.Fprintf(&sb, " %s\n", strings.Repeat("─", numWidth+2+tableWidth+2+6+2+40))
for i, tableName := range sortedTables {
table := s.Tables[tableName]
@@ -168,7 +187,11 @@ func RenderPlan(s *schema.Schema, sortedTables []string, rows int) string {
deps = strings.Join(all, ", ")
}
- fmt.Fprintf(&sb, " %-*d %-*s %s\n", numWidth, i+1, tableWidth, tableName, deps)
+ tableCount := rows
+ if override := tableRows[tableName]; override > 0 {
+ tableCount = override
+ }
+ fmt.Fprintf(&sb, " %-*d %-*s %-6d %s\n", numWidth, i+1, tableWidth, tableName, tableCount, deps)
}
fmt.Fprintln(&sb)
diff --git a/internal/graph/graph_test.go b/internal/graph/graph_test.go
index 14313a5..b984020 100644
--- a/internal/graph/graph_test.go
+++ b/internal/graph/graph_test.go
@@ -221,6 +221,28 @@ func TestTopologicalSort_cycle_returnsError(t *testing.T) {
if !strings.Contains(err.Error(), "circular") {
t.Errorf("error should mention circular dependency, got: %v", err)
}
+ for _, tableName := range []string{"a", "b"} {
+ if !strings.Contains(err.Error(), tableName) {
+ t.Errorf("error should name cycle table %q, got: %v", tableName, err)
+ }
+ }
+}
+
+func TestTopologicalSort_hardSelfReferenceSortsSingleTable(t *testing.T) {
+ s := makeSchema(map[string]map[string]schema.Column{
+ "employees": {
+ "id": {PK: true},
+ "manager_id": {FK: "employees.id"},
+ },
+ })
+
+ sorted, err := Build(s).TopologicalSort()
+ if err != nil {
+ t.Fatalf("hard self-reference should be handled during generation, got planning error: %v", err)
+ }
+ if len(sorted) != 1 || sorted[0] != "employees" {
+ t.Fatalf("sorted = %v, want [employees]", sorted)
+ }
}
// ── RenderPlan ────────────────────────────────────────────────────────────────
@@ -299,6 +321,27 @@ func TestRenderPlan_rootHasDash(t *testing.T) {
}
}
+func TestRenderPlanWithCountsShowsOverrides(t *testing.T) {
+ s := makeSchema(map[string]map[string]schema.Column{
+ "users": {"id": {PK: true}},
+ "orders": {"id": {PK: true}, "user_id": {FK: "users.id"}},
+ })
+ g := Build(s)
+ sorted, _ := g.TopologicalSort()
+ out := RenderPlanWithCounts(s, sorted, 2, map[string]int{"orders": 5})
+
+ lines := strings.Split(out, "\n")
+ for _, line := range lines {
+ if strings.Contains(line, "orders") {
+ if !strings.Contains(line, "5") {
+ t.Fatalf("orders row should show override 5 rows, got: %q", line)
+ }
+ return
+ }
+ }
+ t.Fatalf("orders row not found in plan:\n%s", out)
+}
+
// ── Parents / Children ──────────────────────────────────────────────────────
func TestParents_returnsHardFKParents(t *testing.T) {
diff --git a/internal/tui/config.go b/internal/tui/config.go
index bf69fd0..dba4551 100644
--- a/internal/tui/config.go
+++ b/internal/tui/config.go
@@ -25,11 +25,16 @@ type configField struct {
toggled bool
}
-func newConfig(rows, batchSize, enumRows int, truncate bool) configModel {
+func newConfig(rows, batchSize, enumRows int, truncate bool, selfRefDepth ...int) configModel {
+ depth := 2
+ if len(selfRefDepth) > 0 {
+ depth = selfRefDepth[0]
+ }
fields := []configField{
makeNumericField("Rows per table", rows),
makeNumericField("Batch size", batchSize),
makeNumericField("Enum rows (0 = use rows)", enumRows),
+ makeNumericField("Self-ref depth", depth),
{label: "Truncate before seeding", isToggle: true, toggled: truncate},
}
fields[0].input.Focus()
@@ -52,8 +57,11 @@ func (m configModel) BatchSize() int {
}
return v
}
-func (m configModel) EnumRows() int { return m.intVal(2, 0) }
-func (m configModel) Truncate() bool { return m.fields[3].toggled }
+func (m configModel) EnumRows() int { return m.intVal(2, 0) }
+func (m configModel) SelfRefDepth() int {
+ return m.intVal(3, 2)
+}
+func (m configModel) Truncate() bool { return m.fields[4].toggled }
func (m configModel) intVal(idx, fallback int) int {
if idx >= len(m.fields) {
diff --git a/internal/tui/cycle_test.go b/internal/tui/cycle_test.go
new file mode 100644
index 0000000..272884b
--- /dev/null
+++ b/internal/tui/cycle_test.go
@@ -0,0 +1,12 @@
+package tui
+
+import "github.com/AxeForging/seedstorm/internal/schema"
+
+func hardSelfReferenceTUISchema() *schema.Schema {
+ return makeSchema(map[string]map[string]schema.Column{
+ "employees": {
+ "id": {Type: "integer", PK: true},
+ "manager_id": {Type: "integer", FK: "employees.id"},
+ },
+ })
+}
diff --git a/internal/tui/execute.go b/internal/tui/execute.go
index 7a55630..969f3ac 100644
--- a/internal/tui/execute.go
+++ b/internal/tui/execute.go
@@ -282,7 +282,9 @@ func startSeed(ctx context.Context, s *seedParams) tea.Cmd {
}
}
- data, err := faker.GenerateFilteredWithCounts(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType)
+ data, err := faker.GenerateFilteredWithOptions(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType, faker.GenerateOptions{
+ SelfRefDepth: s.selfRefDepth,
+ })
if err != nil {
return seedDoneMsg{err: fmt.Errorf("data generation failed: %w", err)}
}
@@ -317,7 +319,9 @@ func startSeed(ctx context.Context, s *seedParams) tea.Cmd {
// startDryRun returns a tea.Cmd that generates data and builds a summary.
func startDryRun(s *seedParams) tea.Cmd {
return func() tea.Msg {
- data, err := faker.GenerateFilteredWithCounts(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType)
+ data, err := faker.GenerateFilteredWithOptions(s.schema, s.tables, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType, faker.GenerateOptions{
+ SelfRefDepth: s.selfRefDepth,
+ })
if err != nil {
return dryRunDoneMsg{err: fmt.Errorf("data generation failed: %w", err)}
}
diff --git a/internal/tui/gaps.go b/internal/tui/gaps.go
index 5d5ac16..e0762b9 100644
--- a/internal/tui/gaps.go
+++ b/internal/tui/gaps.go
@@ -51,7 +51,7 @@ type GapsModel struct {
}
// RunGaps launches the interactive TUI for the gaps command.
-func RunGaps(ctx context.Context, s *schema.Schema, dbType, dsn string, counts map[string]int64, defaultRows, defaultBatchSize, defaultEnumRows int) error {
+func RunGaps(ctx context.Context, s *schema.Schema, dbType, dsn string, counts map[string]int64, defaultRows, defaultBatchSize, defaultEnumRows int, defaultSelfRefDepth ...int) error {
g := graph.Build(s)
sortedAll, err := g.TopologicalSort()
if err != nil {
@@ -84,7 +84,7 @@ func RunGaps(ctx context.Context, s *schema.Schema, dbType, dsn string, counts m
dsn: dsn,
counts: counts,
picker: newGapsPicker(items, counts, 40),
- config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, false),
+ config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, false, defaultSelfRefDepth...),
height: 40,
width: 80,
}
@@ -255,15 +255,16 @@ func (m GapsModel) updateReview(msg tea.Msg) (tea.Model, tea.Cmd) {
}
if m.review.done {
params := &seedParams{
- schema: m.schema,
- tables: m.review.tables,
- rows: m.review.rows,
- enumRows: m.review.enumRows,
- tableRows: m.review.tableRows,
- batchSize: m.review.batch,
- truncate: false, // gaps never truncates
- dbType: m.dbType,
- dsn: m.dsn,
+ schema: m.schema,
+ tables: m.review.tables,
+ rows: m.review.rows,
+ enumRows: m.review.enumRows,
+ selfRefDepth: m.config.SelfRefDepth(),
+ tableRows: m.review.tableRows,
+ batchSize: m.review.batch,
+ truncate: false, // gaps never truncates
+ dbType: m.dbType,
+ dsn: m.dsn,
}
m.execute = newExecute(len(m.review.tables), m.review.dryRun)
m.step = gapsStepExecute
@@ -339,7 +340,9 @@ func startGapsFill(ctx context.Context, s *seedParams, allSorted []string) tea.C
return seedDoneMsg{err: fmt.Errorf("failed to ping database: %w", err)}
}
- data, err := faker.GenerateFilteredWithCounts(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType)
+ data, err := faker.GenerateFilteredWithOptions(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, conn, s.dbType, faker.GenerateOptions{
+ SelfRefDepth: s.selfRefDepth,
+ })
if err != nil {
return seedDoneMsg{err: fmt.Errorf("data generation failed: %w", err)}
}
@@ -374,7 +377,9 @@ func startGapsFill(ctx context.Context, s *seedParams, allSorted []string) tea.C
// startGapsDryRun generates data for gap tables and returns a preview.
func startGapsDryRun(s *seedParams, allSorted []string) tea.Cmd {
return func() tea.Msg {
- data, err := faker.GenerateFilteredWithCounts(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType)
+ data, err := faker.GenerateFilteredWithOptions(s.schema, allSorted, s.tables, s.rows, s.enumRows, s.tableRows, nil, s.dbType, faker.GenerateOptions{
+ SelfRefDepth: s.selfRefDepth,
+ })
if err != nil {
return dryRunDoneMsg{err: fmt.Errorf("data generation failed: %w", err)}
}
diff --git a/internal/tui/gaps_test.go b/internal/tui/gaps_test.go
index 95fcae5..3164259 100644
--- a/internal/tui/gaps_test.go
+++ b/internal/tui/gaps_test.go
@@ -44,6 +44,27 @@ func buildGapsModel() GapsModel {
}
}
+func TestStartGapsDryRunHandlesHardSelfReference(t *testing.T) {
+ params := &seedParams{
+ schema: hardSelfReferenceTUISchema(),
+ tables: []string{"employees"},
+ rows: 3,
+ selfRefDepth: 2,
+ dbType: "pgx",
+ }
+ msg := startGapsDryRun(params, []string{"employees"})()
+ done, ok := msg.(dryRunDoneMsg)
+ if !ok {
+ t.Fatalf("msg type = %T, want dryRunDoneMsg", msg)
+ }
+ if done.err != nil {
+ t.Fatalf("startGapsDryRun: %v", done.err)
+ }
+ if done.total != 3 {
+ t.Fatalf("total = %d, want 3", done.total)
+ }
+}
+
func sendGapsKey(m tea.Model, key string) tea.Model {
return sendKey(m, key) // reuse from wizard_test.go
}
diff --git a/internal/tui/generate.go b/internal/tui/generate.go
index 3b69fbb..40845be 100644
--- a/internal/tui/generate.go
+++ b/internal/tui/generate.go
@@ -31,24 +31,34 @@ const (
// genConfigModel extends config with format and output fields.
type genConfigModel struct {
- rowsInput textinput.Model
- outInput textinput.Model
- formatIdx int // 0=yaml 1=json 2=sql
- focused int // 0=rows 1=format 2=output
- done bool
- back bool
- quitting bool
+ rowsInput textinput.Model
+ depthInput textinput.Model
+ outInput textinput.Model
+ formatIdx int // 0=yaml 1=json 2=sql
+ focused int // 0=rows 1=depth 2=format 3=output
+ done bool
+ back bool
+ quitting bool
}
var genFormats = []string{"yaml", "json", "sql"}
-func newGenConfig(rows int, format, outPath string) genConfigModel {
+func newGenConfig(rows int, format, outPath string, selfRefDepth ...int) genConfigModel {
ri := textinput.New()
ri.SetValue(fmt.Sprintf("%d", rows))
ri.CharLimit = 10
ri.Width = 12
ri.Focus()
+ depth := 2
+ if len(selfRefDepth) > 0 {
+ depth = selfRefDepth[0]
+ }
+ di := textinput.New()
+ di.SetValue(fmt.Sprintf("%d", depth))
+ di.CharLimit = 10
+ di.Width = 12
+
oi := textinput.New()
oi.SetValue(outPath)
oi.CharLimit = 200
@@ -63,9 +73,10 @@ func newGenConfig(rows int, format, outPath string) genConfigModel {
}
return genConfigModel{
- rowsInput: ri,
- outInput: oi,
- formatIdx: fmtIdx,
+ rowsInput: ri,
+ depthInput: di,
+ outInput: oi,
+ formatIdx: fmtIdx,
}
}
@@ -78,6 +89,13 @@ func (m genConfigModel) Rows() int {
}
func (m genConfigModel) Format() string { return genFormats[m.formatIdx] }
func (m genConfigModel) OutPath() string { return strings.TrimSpace(m.outInput.Value()) }
+func (m genConfigModel) SelfRefDepth() int {
+ n, err := strconv.Atoi(strings.TrimSpace(m.depthInput.Value()))
+ if err != nil || n < 0 {
+ return 2
+ }
+ return n
+}
func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) {
switch msg := msg.(type) {
@@ -85,38 +103,44 @@ func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) {
switch msg.String() {
case "tab", "down", "j":
m.rowsInput.Blur()
+ m.depthInput.Blur()
m.outInput.Blur()
- m.focused = (m.focused + 1) % 3
+ m.focused = (m.focused + 1) % 4
switch m.focused {
case 0:
m.rowsInput.Focus()
- case 2:
+ case 1:
+ m.depthInput.Focus()
+ case 3:
m.outInput.Focus()
}
return m, nil
case "shift+tab", "up", "k":
m.rowsInput.Blur()
+ m.depthInput.Blur()
m.outInput.Blur()
- m.focused = (m.focused + 2) % 3
+ m.focused = (m.focused + 3) % 4
switch m.focused {
case 0:
m.rowsInput.Focus()
- case 2:
+ case 1:
+ m.depthInput.Focus()
+ case 3:
m.outInput.Focus()
}
return m, nil
case "left", "h":
- if m.focused == 1 {
+ if m.focused == 2 {
m.formatIdx = (m.formatIdx + len(genFormats) - 1) % len(genFormats)
return m, nil
}
case "right", "l":
- if m.focused == 1 {
+ if m.focused == 2 {
m.formatIdx = (m.formatIdx + 1) % len(genFormats)
return m, nil
}
case " ":
- if m.focused == 1 {
+ if m.focused == 2 {
m.formatIdx = (m.formatIdx + 1) % len(genFormats)
return m, nil
}
@@ -124,7 +148,7 @@ func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) {
m.done = true
return m, nil
case "b":
- if m.focused == 1 { // only works on format selector (not text inputs)
+ if m.focused == 2 { // only works on format selector (not text inputs)
m.back = true
return m, nil
}
@@ -143,7 +167,11 @@ func (m genConfigModel) Update(msg tea.Msg) (genConfigModel, tea.Cmd) {
var cmd tea.Cmd
m.rowsInput, cmd = m.rowsInput.Update(msg)
return m, cmd
- case 2:
+ case 1:
+ var cmd tea.Cmd
+ m.depthInput, cmd = m.depthInput.Update(msg)
+ return m, cmd
+ case 3:
var cmd tea.Cmd
m.outInput, cmd = m.outInput.Update(msg)
return m, cmd
@@ -161,6 +189,7 @@ func (m genConfigModel) View() string {
view string
}{
{"Rows per table", m.rowsInput.View()},
+ {"Self-ref depth", m.depthInput.View()},
{"Format", m.formatView()},
{"Output file", m.outInput.View()},
}
@@ -224,7 +253,7 @@ type GenModel struct {
}
// RunGenerate launches the interactive TUI for the generate command.
-func RunGenerate(ctx context.Context, s *schema.Schema, dbType, format, outPath string, defaultRows int) error {
+func RunGenerate(ctx context.Context, s *schema.Schema, dbType, format, outPath string, defaultRows int, defaultSelfRefDepth ...int) error {
g := graph.Build(s)
sortedAll, err := g.TopologicalSort()
if err != nil {
@@ -244,7 +273,7 @@ func RunGenerate(ctx context.Context, s *schema.Schema, dbType, format, outPath
sortedAll: sortedAll,
dbType: dbType,
picker: newTablePicker(items, 40),
- genConfig: newGenConfig(defaultRows, format, outPath),
+ genConfig: newGenConfig(defaultRows, format, outPath, defaultSelfRefDepth...),
height: 40,
width: 80,
}
@@ -367,7 +396,7 @@ func (m GenModel) updateRows(msg tea.Msg) (tea.Model, tea.Cmd) {
m.execute.dryRun = true // generate is always a "dry run" (no DB)
m.step = genStepExecute
- return m, tea.Batch(m.execute.spinner.Tick, startGenerate(m.schema, m.volumes.tables, m.genConfig.Rows(), m.volumes.TableRows(), m.genConfig.Format(), m.genConfig.OutPath(), m.dbType))
+ return m, tea.Batch(m.execute.spinner.Tick, startGenerate(m.schema, m.volumes.tables, m.genConfig.Rows(), m.genConfig.SelfRefDepth(), m.volumes.TableRows(), m.genConfig.Format(), m.genConfig.OutPath(), m.dbType))
}
return m, cmd
}
@@ -425,9 +454,11 @@ func (m GenModel) View() string {
}
// startGenerate generates data and optionally writes to file.
-func startGenerate(s *schema.Schema, tables []string, rows int, tableRows map[string]int, format, outPath, dbType string) tea.Cmd {
+func startGenerate(s *schema.Schema, tables []string, rows, selfRefDepth int, tableRows map[string]int, format, outPath, dbType string) tea.Cmd {
return func() tea.Msg {
- data, err := faker.GenerateFilteredWithCounts(s, tables, tables, rows, 0, tableRows, nil, dbType)
+ data, err := faker.GenerateFilteredWithOptions(s, tables, tables, rows, 0, tableRows, nil, dbType, faker.GenerateOptions{
+ SelfRefDepth: selfRefDepth,
+ })
if err != nil {
return generateDoneMsg{err: fmt.Errorf("generation failed: %w", err)}
}
diff --git a/internal/tui/generate_test.go b/internal/tui/generate_test.go
index 320be5a..720d147 100644
--- a/internal/tui/generate_test.go
+++ b/internal/tui/generate_test.go
@@ -34,6 +34,20 @@ func buildGenModel() GenModel {
}
}
+func TestStartGenerateHandlesHardSelfReference(t *testing.T) {
+ msg := startGenerate(hardSelfReferenceTUISchema(), []string{"employees"}, 3, 2, nil, "yaml", "", "pgx")()
+ done, ok := msg.(generateDoneMsg)
+ if !ok {
+ t.Fatalf("msg type = %T, want generateDoneMsg", msg)
+ }
+ if done.err != nil {
+ t.Fatalf("startGenerate: %v", done.err)
+ }
+ if done.total != 3 {
+ t.Fatalf("total = %d, want 3", done.total)
+ }
+}
+
func sendGenKey(m tea.Model, key string) tea.Model {
return sendKey(m, key)
}
diff --git a/internal/tui/tui.go b/internal/tui/tui.go
index 897f21c..71a5d94 100644
--- a/internal/tui/tui.go
+++ b/internal/tui/tui.go
@@ -23,15 +23,16 @@ const (
// seedParams holds everything needed to execute the seed operation.
type seedParams struct {
- schema *schema.Schema
- tables []string
- rows int
- enumRows int
- tableRows map[string]int
- batchSize int
- truncate bool
- dbType string
- dsn string
+ schema *schema.Schema
+ tables []string
+ rows int
+ enumRows int
+ selfRefDepth int
+ tableRows map[string]int
+ batchSize int
+ truncate bool
+ dbType string
+ dsn string
}
// Model is the top-level TUI model orchestrating the wizard steps.
@@ -57,7 +58,7 @@ type Model struct {
}
// Run launches the interactive TUI and returns when the user completes or aborts.
-func Run(ctx context.Context, s *schema.Schema, dbType, dsn string, defaultRows, defaultBatchSize, defaultEnumRows int, defaultTruncate bool) error {
+func Run(ctx context.Context, s *schema.Schema, dbType, dsn string, defaultRows, defaultBatchSize, defaultEnumRows int, defaultTruncate bool, defaultSelfRefDepth ...int) error {
g := graph.Build(s)
sortedAll, err := g.TopologicalSort()
if err != nil {
@@ -84,7 +85,7 @@ func Run(ctx context.Context, s *schema.Schema, dbType, dsn string, defaultRows,
dbType: dbType,
dsn: dsn,
picker: newTablePicker(items, 24),
- config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, defaultTruncate),
+ config: newConfig(defaultRows, defaultBatchSize, defaultEnumRows, defaultTruncate, defaultSelfRefDepth...),
height: 24,
width: 80,
}
@@ -247,15 +248,16 @@ func (m Model) updateReview(msg tea.Msg) (tea.Model, tea.Cmd) {
}
if m.review.done {
params := &seedParams{
- schema: m.schema,
- tables: m.review.tables,
- rows: m.review.rows,
- enumRows: m.review.enumRows,
- tableRows: m.review.tableRows,
- batchSize: m.review.batch,
- truncate: m.review.truncate,
- dbType: m.dbType,
- dsn: m.dsn,
+ schema: m.schema,
+ tables: m.review.tables,
+ rows: m.review.rows,
+ enumRows: m.review.enumRows,
+ selfRefDepth: m.config.SelfRefDepth(),
+ tableRows: m.review.tableRows,
+ batchSize: m.review.batch,
+ truncate: m.review.truncate,
+ dbType: m.dbType,
+ dsn: m.dsn,
}
m.execute = newExecute(len(m.review.tables), m.review.dryRun)
diff --git a/internal/tui/wizard_test.go b/internal/tui/wizard_test.go
index 8e81048..d4b54cf 100644
--- a/internal/tui/wizard_test.go
+++ b/internal/tui/wizard_test.go
@@ -38,6 +38,27 @@ func buildTestModel() Model {
}
}
+func TestStartDryRunHandlesHardSelfReference(t *testing.T) {
+ params := &seedParams{
+ schema: hardSelfReferenceTUISchema(),
+ tables: []string{"employees"},
+ rows: 3,
+ selfRefDepth: 2,
+ dbType: "pgx",
+ }
+ msg := startDryRun(params)()
+ done, ok := msg.(dryRunDoneMsg)
+ if !ok {
+ t.Fatalf("msg type = %T, want dryRunDoneMsg", msg)
+ }
+ if done.err != nil {
+ t.Fatalf("startDryRun: %v", done.err)
+ }
+ if done.total != 3 {
+ t.Fatalf("total = %d, want 3", done.total)
+ }
+}
+
func sendKey(m tea.Model, key string) tea.Model {
var msg tea.Msg
switch key {
diff --git a/internal/web/handlers_api_test.go b/internal/web/handlers_api_test.go
index cfad470..fa81df8 100644
--- a/internal/web/handlers_api_test.go
+++ b/internal/web/handlers_api_test.go
@@ -110,6 +110,24 @@ func TestBuildGraphPayload_cycle(t *testing.T) {
}
}
+func TestBuildGraphPayload_hardSelfReferenceIsSeedable(t *testing.T) {
+ sc := &schema.Schema{
+ Tables: map[string]schema.Table{
+ "employees": {Columns: map[string]schema.Column{
+ "id": {PK: true, Type: "int"},
+ "manager_id": {Type: "int", FK: "employees.id"},
+ }},
+ },
+ }
+ payload := buildGraphPayload(sc, nil)
+ if payload.Cycle {
+ t.Fatalf("hard self-reference should be handled during generation, got cycle")
+ }
+ if !reflect.DeepEqual(payload.Order, []string{"employees"}) {
+ t.Fatalf("order = %v, want [employees]", payload.Order)
+ }
+}
+
func TestHandleTablePreviewJSON_requiresSession(t *testing.T) {
s, err := New(Options{Addr: "127.0.0.1:0"})
if err != nil {
diff --git a/internal/web/runners.go b/internal/web/runners.go
index c32e3b4..2b6e2dd 100644
--- a/internal/web/runners.go
+++ b/internal/web/runners.go
@@ -40,14 +40,15 @@ func jobLogger(w io.Writer) zerolog.Logger {
// restricts seeding to the listed tables plus their transitive non-nullable
// FK parents.
type SeedRequest struct {
- Rows int `json:"rows"`
- EnumRows int `json:"enumRows"`
- BatchSize int `json:"batchSize"`
- DisableFK bool `json:"disableFK"`
- Truncate bool `json:"truncate"`
- DryRun bool `json:"dryRun"`
- Tables []string `json:"tables,omitempty"`
- TableRows map[string]int `json:"tableRows,omitempty"`
+ Rows int `json:"rows"`
+ EnumRows int `json:"enumRows"`
+ BatchSize int `json:"batchSize"`
+ SelfRefDepth *int `json:"selfRefDepth,omitempty"`
+ DisableFK bool `json:"disableFK"`
+ Truncate bool `json:"truncate"`
+ DryRun bool `json:"dryRun"`
+ Tables []string `json:"tables,omitempty"`
+ TableRows map[string]int `json:"tableRows,omitempty"`
}
func (s *Server) runSeed(ctx context.Context, sess *Session, req SeedRequest, jc JobControl) (map[string]any, error) {
@@ -121,7 +122,9 @@ func (s *Server) runSeed(ctx context.Context, sess *Session, req SeedRequest, jc
}
// GenerateFiltered preloads PKs from allSorted so target tables can FK-ref
// already-populated parents; targetTables alone is what gets generated.
- data, err := faker.GenerateFilteredWithCounts(sc, allSorted, targetTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), connArg, sess.DBType)
+ data, err := faker.GenerateFilteredWithOptions(sc, allSorted, targetTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), connArg, sess.DBType, faker.GenerateOptions{
+ SelfRefDepth: requestSelfRefDepth(req.SelfRefDepth),
+ })
if err != nil {
return nil, fmt.Errorf("generation: %w", err)
}
@@ -183,13 +186,14 @@ func (s *Server) runSeed(ctx context.Context, sess *Session, req SeedRequest, jc
// GapsRequest mirrors the gaps CLI flags. Tables, when set, restricts the
// fill phase to the listed empty tables (plus their transitive parents).
type GapsRequest struct {
- Rows int `json:"rows"`
- EnumRows int `json:"enumRows"`
- BatchSize int `json:"batchSize"`
- Fill bool `json:"fill"`
- DryRun bool `json:"dryRun"`
- Tables []string `json:"tables,omitempty"`
- TableRows map[string]int `json:"tableRows,omitempty"`
+ Rows int `json:"rows"`
+ EnumRows int `json:"enumRows"`
+ BatchSize int `json:"batchSize"`
+ SelfRefDepth *int `json:"selfRefDepth,omitempty"`
+ Fill bool `json:"fill"`
+ DryRun bool `json:"dryRun"`
+ Tables []string `json:"tables,omitempty"`
+ TableRows map[string]int `json:"tableRows,omitempty"`
}
func (s *Server) runGaps(ctx context.Context, sess *Session, req GapsRequest, jc JobControl) (map[string]any, error) {
@@ -260,7 +264,9 @@ func (s *Server) runGaps(ctx context.Context, sess *Session, req GapsRequest, jc
jc.Phase("generate")
log.Info().Int("gap_tables", len(gapTables)).Int("rows", req.Rows).Msg("Generating data for empty tables")
- data, err := faker.GenerateFilteredWithCounts(sc, allSorted, gapTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), conn, sess.DBType)
+ data, err := faker.GenerateFilteredWithOptions(sc, allSorted, gapTables, req.Rows, req.EnumRows, cleanTableRows(req.TableRows), conn, sess.DBType, faker.GenerateOptions{
+ SelfRefDepth: requestSelfRefDepth(req.SelfRefDepth),
+ })
if err != nil {
return nil, err
}
@@ -294,10 +300,11 @@ func (s *Server) runGaps(ctx context.Context, sess *Session, req GapsRequest, jc
// GenerateRequest mirrors the generate CLI flags. Tables, when set, restricts
// generation to the listed tables plus their transitive non-nullable parents.
type GenerateRequest struct {
- Rows int `json:"rows"`
- Format string `json:"format"` // yaml | json | sql
- Tables []string `json:"tables,omitempty"`
- TableRows map[string]int `json:"tableRows,omitempty"`
+ Rows int `json:"rows"`
+ SelfRefDepth *int `json:"selfRefDepth,omitempty"`
+ Format string `json:"format"` // yaml | json | sql
+ Tables []string `json:"tables,omitempty"`
+ TableRows map[string]int `json:"tableRows,omitempty"`
}
func (s *Server) runGenerate(ctx context.Context, sess *Session, req GenerateRequest, jc JobControl) (map[string]any, error) {
@@ -331,7 +338,9 @@ func (s *Server) runGenerate(ctx context.Context, sess *Session, req GenerateReq
jc.Phase("generate")
log.Info().Int("rows", req.Rows).Int("tables", len(targetTables)).Msg("Generating fake data")
// GenerateFiltered is fine here too: with conn=nil it skips PK preload.
- data, err := faker.GenerateFilteredWithCounts(sc, allSorted, targetTables, req.Rows, 0, cleanTableRows(req.TableRows), nil, sess.DBType)
+ data, err := faker.GenerateFilteredWithOptions(sc, allSorted, targetTables, req.Rows, 0, cleanTableRows(req.TableRows), nil, sess.DBType, faker.GenerateOptions{
+ SelfRefDepth: requestSelfRefDepth(req.SelfRefDepth),
+ })
if err != nil {
return nil, err
}
@@ -379,6 +388,13 @@ func cleanTableRows(rows map[string]int) map[string]int {
return clean
}
+func requestSelfRefDepth(depth *int) int {
+ if depth == nil {
+ return faker.DefaultSelfRefDepth
+ }
+ return *depth
+}
+
func encodeData(data map[string][]map[string]any, sortedTables []string, format, dbType string) (string, error) {
switch strings.ToLower(format) {
case "json":
diff --git a/internal/web/runners_test.go b/internal/web/runners_test.go
index f73c072..6a70c99 100644
--- a/internal/web/runners_test.go
+++ b/internal/web/runners_test.go
@@ -3,6 +3,7 @@ package web
import (
"context"
"reflect"
+ "strings"
"testing"
"github.com/AxeForging/seedstorm/internal/schema"
@@ -123,6 +124,72 @@ func TestRunGenerateAppliesTableRowOverridesWithoutBreakingDefaults(t *testing.T
}
}
+func TestRunSeedDryRunHandlesHardSelfReference(t *testing.T) {
+ srv, err := New(Options{Addr: "127.0.0.1:0"})
+ if err != nil {
+ t.Fatalf("New: %v", err)
+ }
+ sess := &Session{
+ DBType: "pgx",
+ schema: hardSelfReferenceSchema(),
+ }
+
+ depth := 2
+ result, err := srv.runSeed(context.Background(), sess, SeedRequest{
+ Rows: 3,
+ BatchSize: 100,
+ SelfRefDepth: &depth,
+ DryRun: true,
+ }, testJobControl{})
+ if err != nil {
+ t.Fatalf("runSeed: %v", err)
+ }
+ if got := result["totalRows"]; got != 3 {
+ t.Fatalf("totalRows = %v, want 3", got)
+ }
+ output, _ := result["output"].(string)
+ if output == "" || !containsAll(output, "employees", "manager_id") {
+ t.Fatalf("dry-run output should include self-referential insert SQL, got %q", output)
+ }
+}
+
+func TestRunGenerateHandlesHardSelfReference(t *testing.T) {
+ srv, err := New(Options{Addr: "127.0.0.1:0"})
+ if err != nil {
+ t.Fatalf("New: %v", err)
+ }
+ sess := &Session{
+ DBType: "pgx",
+ schema: hardSelfReferenceSchema(),
+ }
+
+ depth := 2
+ result, err := srv.runGenerate(context.Background(), sess, GenerateRequest{
+ Rows: 3,
+ SelfRefDepth: &depth,
+ Format: "yaml",
+ }, testJobControl{})
+ if err != nil {
+ t.Fatalf("runGenerate: %v", err)
+ }
+ if got := result["totalRows"]; got != 3 {
+ t.Fatalf("totalRows = %v, want 3", got)
+ }
+ output, _ := result["output"].(string)
+ if output == "" || !containsAll(output, "employees", "manager_id") {
+ t.Fatalf("generated output should include self-referential values, got %q", output)
+ }
+}
+
+func containsAll(value string, parts ...string) bool {
+ for _, part := range parts {
+ if !strings.Contains(value, part) {
+ return false
+ }
+ }
+ return true
+}
+
func runnerRowCountSchema() *schema.Schema {
return &schema.Schema{
Tables: map[string]schema.Table{
@@ -141,3 +208,16 @@ func runnerRowCountSchema() *schema.Schema {
},
}
}
+
+func hardSelfReferenceSchema() *schema.Schema {
+ return &schema.Schema{
+ Tables: map[string]schema.Table{
+ "employees": {
+ Columns: map[string]schema.Column{
+ "id": {Type: "integer", PK: true},
+ "manager_id": {Type: "integer", FK: "employees.id"},
+ },
+ },
+ },
+ }
+}
diff --git a/internal/web/static/app.js b/internal/web/static/app.js
index c4ce693..53568d8 100644
--- a/internal/web/static/app.js
+++ b/internal/web/static/app.js
@@ -1564,6 +1564,7 @@
rows: Number(document.getElementById("cfg-rows").value || 0),
enumRows: Number(document.getElementById("cfg-enum").value || 0),
batchSize: Number(document.getElementById("cfg-batch").value || 0),
+ selfRefDepth: Number(document.getElementById("cfg-selfref-depth").value || 0),
truncate: document.getElementById("cfg-truncate").checked,
dryRun: document.getElementById("cfg-dryrun").checked,
disableFK: document.getElementById("cfg-disablefk").checked,
diff --git a/internal/web/templates/workspace.html.tmpl b/internal/web/templates/workspace.html.tmpl
index db9ab0d..f3eb383 100644
--- a/internal/web/templates/workspace.html.tmpl
+++ b/internal/web/templates/workspace.html.tmpl
@@ -132,6 +132,10 @@
Enum/value
+
Options