From 7770f1c9fb8fb83d46c47d7a1c7bfd933bdca5b4 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 26 May 2026 11:37:35 -0700 Subject: [PATCH] fix(hub): dollar-quote-aware SQL migration splitter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous splitter in hub/src/db/migrate.ts naively split schema.sql on `;`, ignoring PostgreSQL dollar-quoted string syntax. Every `DO $$ BEGIN ... EXCEPTION WHEN ... END $$;` block got split mid-string and rejected by Postgres. Production logs showed 7/142 statements failing on boot, leaving two CHECK constraints (`users_claude_session_threshold_pct_range`, `users_claude_week_threshold_pct_range`) and a re-added `scheduled_task_runs_status_check` constraint un-applied. Fix: replace `split(';')` with a small state-machine splitter `splitSqlStatements()` that respects: - $$...$$ and tagged $tag$...$tag$ dollar-quoted strings - single-quoted strings with '' escape - double-quoted identifiers with "" escape - line comments (-- to newline) - block comments (nested per Postgres semantics) - bare positional params like $1 (not a dollar-quote tag) schema.sql is unchanged — the DO blocks are valid SQL; only the parser was wrong. Tests: 13 new unit tests in hub/test/migrate.test.ts covering each shape, plus a real-file assertion that every DO block in the live schema.sql now parses to a single statement with balanced $$ tokens. Note (separate concern, NOT fixed here): runMigrations() still logs failed statements and continues rather than aborting boot. That latent silent-failure is what allowed this bug to live in production for weeks without an obvious crash signal. Worth a follow-up PR to fail loudly on migration error. Co-Authored-By: Claude Opus 4.7 (1M context) --- hub/src/db/migrate.ts | 165 ++++++++++++++++++++++++++++++++++++--- hub/test/migrate.test.ts | 134 +++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+), 11 deletions(-) create mode 100644 hub/test/migrate.test.ts diff --git a/hub/src/db/migrate.ts b/hub/src/db/migrate.ts index 4ca6d24..e724695 100644 --- a/hub/src/db/migrate.ts +++ b/hub/src/db/migrate.ts @@ -8,6 +8,159 @@ const SCHEMA_CANDIDATES = [ resolve(import.meta.dir, 'schema.sql'), ] +/** + * Split a SQL script into individual statements on top-level `;`, while + * respecting PostgreSQL syntax that contains semicolons that are NOT + * statement terminators: + * - dollar-quoted strings: $$ ... $$ and the tagged variant $tag$ ... $tag$ + * - single-quoted strings with '' escape + * - double-quoted identifiers with "" escape + * - line comments (-- to newline) + * - block comments (slash-star ... star-slash), nesting supported per Postgres + * + * Returns trimmed, non-empty statements. + */ +export function splitSqlStatements(sqlText: string): string[] { + const statements: string[] = [] + let buf = '' + let i = 0 + const n = sqlText.length + + let inSingle = false + let inDouble = false + let inLineComment = false + let blockCommentDepth = 0 + let dollarTag: string | null = null + + while (i < n) { + const c = sqlText[i] + const c2 = sqlText[i + 1] + + if (inLineComment) { + buf += c + if (c === '\n') inLineComment = false + i++ + continue + } + + if (blockCommentDepth > 0) { + buf += c + if (c === '/' && c2 === '*') { + blockCommentDepth++ + buf += c2 + i += 2 + continue + } + if (c === '*' && c2 === '/') { + blockCommentDepth-- + buf += c2 + i += 2 + continue + } + i++ + continue + } + + if (dollarTag !== null) { + if (c === '$' && sqlText.startsWith(dollarTag, i)) { + buf += dollarTag + i += dollarTag.length + dollarTag = null + continue + } + buf += c + i++ + continue + } + + if (inSingle) { + buf += c + if (c === "'") { + if (c2 === "'") { + buf += c2 + i += 2 + continue + } + inSingle = false + } + i++ + continue + } + + if (inDouble) { + buf += c + if (c === '"') { + if (c2 === '"') { + buf += c2 + i += 2 + continue + } + inDouble = false + } + i++ + continue + } + + if (c === '-' && c2 === '-') { + inLineComment = true + buf += c + buf += c2 + i += 2 + continue + } + + if (c === '/' && c2 === '*') { + blockCommentDepth = 1 + buf += c + buf += c2 + i += 2 + continue + } + + if (c === '$') { + const tagMatch = /^\$[A-Za-z_][A-Za-z0-9_]*\$|^\$\$/.exec(sqlText.slice(i)) + if (tagMatch) { + dollarTag = tagMatch[0] + buf += dollarTag + i += dollarTag.length + continue + } + buf += c + i++ + continue + } + + if (c === "'") { + inSingle = true + buf += c + i++ + continue + } + + if (c === '"') { + inDouble = true + buf += c + i++ + continue + } + + if (c === ';') { + const trimmed = buf.trim() + if (trimmed.length > 0) statements.push(trimmed) + buf = '' + i++ + continue + } + + buf += c + i++ + } + + const tail = buf.trim() + if (tail.length > 0) statements.push(tail) + return statements +} + export async function runMigrations() { const path = SCHEMA_CANDIDATES.find((p) => { try { readFileSync(p, 'utf-8'); return true } catch { return false } @@ -17,17 +170,7 @@ export async function runMigrations() { return } const ddl = readFileSync(path, 'utf-8') - // Strip line comments first, then split on semicolons. - // A previous bug: filtering statements that START WITH `--` dropped statements - // preceded by a comment block (e.g. ALTER TABLE preceded by "-- Migration ..."). - const stripped = ddl - .split('\n') - .filter((line) => !line.trim().startsWith('--')) - .join('\n') - const statements = stripped - .split(/;\s*\n/) - .map((s) => s.trim()) - .filter((s) => s.length > 0) + const statements = splitSqlStatements(ddl) let applied = 0 for (const stmt of statements) { diff --git a/hub/test/migrate.test.ts b/hub/test/migrate.test.ts new file mode 100644 index 0000000..a24abe3 --- /dev/null +++ b/hub/test/migrate.test.ts @@ -0,0 +1,134 @@ +/** + * Unit tests for splitSqlStatements — the dollar-quote-aware SQL splitter + * in hub/src/db/migrate.ts. + * + * Pure parser tests, no DB. + */ +import { describe, test, expect } from 'bun:test' +import { readFileSync } from 'fs' +import { resolve } from 'path' +import { splitSqlStatements } from '../src/db/migrate.ts' + +describe('splitSqlStatements', () => { + test('splits two simple statements', () => { + const out = splitSqlStatements('SELECT 1; SELECT 2;') + expect(out).toEqual(['SELECT 1', 'SELECT 2']) + }) + + test('ignores trailing whitespace and empty trailing statement', () => { + const out = splitSqlStatements('SELECT 1; \n ') + expect(out).toEqual(['SELECT 1']) + }) + + test('DO block with EXCEPTION + inner semicolons → 1 statement', () => { + const sql = `DO $$ BEGIN + ALTER TABLE users ADD CONSTRAINT u_chk CHECK (x > 0); + EXCEPTION WHEN duplicate_object THEN NULL; END $$;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(1) + expect(out[0]).toContain('EXCEPTION WHEN duplicate_object') + expect(out[0]).toContain('ALTER TABLE users') + }) + + test('CREATE TABLE → DO block → CREATE TABLE = 3 statements', () => { + const sql = `CREATE TABLE a (id int); +DO $$ BEGIN + ALTER TABLE a ADD CONSTRAINT a_chk CHECK (id > 0); +EXCEPTION WHEN duplicate_object THEN NULL; END $$; +CREATE TABLE b (id int);` + const out = splitSqlStatements(sql) + expect(out.length).toBe(3) + expect(out[0]).toContain('CREATE TABLE a') + expect(out[1]).toContain('DO $$') + expect(out[1]).toContain('END $$') + expect(out[2]).toContain('CREATE TABLE b') + }) + + test('tagged dollar quote $tag$ ... $tag$ respected', () => { + const sql = `CREATE FUNCTION f() RETURNS void AS $body$ + BEGIN + INSERT INTO t VALUES ('a;b;c'); + PERFORM 1; + END; + $body$ LANGUAGE plpgsql; +SELECT 1;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[0]).toContain('$body$') + expect(out[0]).toContain("INSERT INTO t VALUES ('a;b;c')") + expect(out[1]).toBe('SELECT 1') + }) + + test("single-quoted string with embedded semicolon is one statement", () => { + const sql = `INSERT INTO t (v) VALUES ('foo;bar');\nSELECT 2;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[0]).toBe("INSERT INTO t (v) VALUES ('foo;bar')") + expect(out[1]).toBe('SELECT 2') + }) + + test("escaped single quote '' inside string preserved", () => { + const sql = `INSERT INTO t (v) VALUES ('it''s; fine');\nSELECT 2;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[0]).toContain("'it''s; fine'") + }) + + test('double-quoted identifier with semicolon (pathological) preserved', () => { + const sql = `SELECT "weird;col" FROM t; SELECT 2;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[0]).toContain('"weird;col"') + }) + + test('block comment containing ; is ignored as a terminator', () => { + const sql = `/* this; has; semicolons */ SELECT 1; SELECT 2;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[0]).toContain('SELECT 1') + expect(out[1]).toBe('SELECT 2') + }) + + test('line comment with ; ignored as a terminator', () => { + const sql = `SELECT 1 -- ignore; this; please\n; SELECT 2;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[1]).toBe('SELECT 2') + }) + + test('bare $1 positional param not treated as dollar quote', () => { + const sql = `SELECT * FROM t WHERE id = $1; SELECT 2;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(2) + expect(out[0]).toContain('$1') + }) + + test('nested block comments', () => { + const sql = `/* outer /* inner; */ still in outer; */ SELECT 1;` + const out = splitSqlStatements(sql) + expect(out.length).toBe(1) + expect(out[0]).toContain('SELECT 1') + }) + + test('real schema.sql DO blocks each parse to a single statement', () => { + // Validates the actual fix against the production schema file. + const schemaPath = resolve(import.meta.dir, '../src/db/schema.sql') + const ddl = readFileSync(schemaPath, 'utf-8') + const stmts = splitSqlStatements(ddl) + + // Sanity: nothing was split mid-dollar-quote — i.e. no statement + // contains an unmatched/odd count of $$ tokens. + for (const s of stmts) { + const dollarCount = (s.match(/\$\$/g) || []).length + expect(dollarCount % 2).toBe(0) + } + + // Each DO block should be a single statement that includes both + // its opening "DO $$" and closing "END $$". + const doBlocks = stmts.filter((s) => /\bDO\s+\$\$/i.test(s)) + expect(doBlocks.length).toBeGreaterThan(0) + for (const s of doBlocks) { + expect(s).toMatch(/END\s+\$\$/i) + } + }) +})