Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 154 additions & 11 deletions hub/src/db/migrate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,159 @@ const SCHEMA_CANDIDATES = [
resolve(import.meta.dir, 'schema.sql'),
]

/**
* Split a SQL script into individual statements on top-level `;`, while
* respecting PostgreSQL syntax that contains semicolons that are NOT
* statement terminators:
* - dollar-quoted strings: $$ ... $$ and the tagged variant $tag$ ... $tag$
* - single-quoted strings with '' escape
* - double-quoted identifiers with "" escape
* - line comments (-- to newline)
* - block comments (slash-star ... star-slash), nesting supported per Postgres
*
* Returns trimmed, non-empty statements.
*/
export function splitSqlStatements(sqlText: string): string[] {
const statements: string[] = []
let buf = ''
let i = 0
const n = sqlText.length

let inSingle = false
let inDouble = false
let inLineComment = false
let blockCommentDepth = 0
let dollarTag: string | null = null

while (i < n) {
const c = sqlText[i]
const c2 = sqlText[i + 1]

if (inLineComment) {
buf += c
if (c === '\n') inLineComment = false
i++
continue
}

if (blockCommentDepth > 0) {
buf += c
if (c === '/' && c2 === '*') {
blockCommentDepth++
buf += c2
i += 2
continue
}
if (c === '*' && c2 === '/') {
blockCommentDepth--
buf += c2
i += 2
continue
}
i++
continue
}

if (dollarTag !== null) {
if (c === '$' && sqlText.startsWith(dollarTag, i)) {
buf += dollarTag
i += dollarTag.length
dollarTag = null
continue
}
buf += c
i++
continue
}

if (inSingle) {
buf += c
if (c === "'") {
if (c2 === "'") {
buf += c2
i += 2
continue
}
inSingle = false
}
i++
continue
}

if (inDouble) {
buf += c
if (c === '"') {
if (c2 === '"') {
buf += c2
i += 2
continue
}
inDouble = false
}
i++
continue
}

if (c === '-' && c2 === '-') {
inLineComment = true
buf += c
buf += c2
i += 2
continue
}

if (c === '/' && c2 === '*') {
blockCommentDepth = 1
buf += c
buf += c2
i += 2
continue
}

if (c === '$') {
const tagMatch = /^\$[A-Za-z_][A-Za-z0-9_]*\$|^\$\$/.exec(sqlText.slice(i))
if (tagMatch) {
dollarTag = tagMatch[0]
buf += dollarTag
i += dollarTag.length
continue
}
buf += c
i++
continue
}

if (c === "'") {
inSingle = true
buf += c
i++
continue
}

if (c === '"') {
inDouble = true
buf += c
i++
continue
}

if (c === ';') {
const trimmed = buf.trim()
if (trimmed.length > 0) statements.push(trimmed)
buf = ''
i++
continue
}

buf += c
i++
}

const tail = buf.trim()
if (tail.length > 0) statements.push(tail)
return statements
}

export async function runMigrations() {
const path = SCHEMA_CANDIDATES.find((p) => {
try { readFileSync(p, 'utf-8'); return true } catch { return false }
Expand All @@ -17,17 +170,7 @@ export async function runMigrations() {
return
}
const ddl = readFileSync(path, 'utf-8')
// Strip line comments first, then split on semicolons.
// A previous bug: filtering statements that START WITH `--` dropped statements
// preceded by a comment block (e.g. ALTER TABLE preceded by "-- Migration ...").
const stripped = ddl
.split('\n')
.filter((line) => !line.trim().startsWith('--'))
.join('\n')
const statements = stripped
.split(/;\s*\n/)
.map((s) => s.trim())
.filter((s) => s.length > 0)
const statements = splitSqlStatements(ddl)

let applied = 0
for (const stmt of statements) {
Expand Down
134 changes: 134 additions & 0 deletions hub/test/migrate.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* Unit tests for splitSqlStatements — the dollar-quote-aware SQL splitter
* in hub/src/db/migrate.ts.
*
* Pure parser tests, no DB.
*/
import { describe, test, expect } from 'bun:test'
import { readFileSync } from 'fs'
import { resolve } from 'path'
import { splitSqlStatements } from '../src/db/migrate.ts'

describe('splitSqlStatements', () => {
test('splits two simple statements', () => {
const out = splitSqlStatements('SELECT 1; SELECT 2;')
expect(out).toEqual(['SELECT 1', 'SELECT 2'])
})

test('ignores trailing whitespace and empty trailing statement', () => {
const out = splitSqlStatements('SELECT 1; \n ')
expect(out).toEqual(['SELECT 1'])
})

test('DO block with EXCEPTION + inner semicolons → 1 statement', () => {
const sql = `DO $$ BEGIN
ALTER TABLE users ADD CONSTRAINT u_chk CHECK (x > 0);
EXCEPTION WHEN duplicate_object THEN NULL; END $$;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(1)
expect(out[0]).toContain('EXCEPTION WHEN duplicate_object')
expect(out[0]).toContain('ALTER TABLE users')
})

test('CREATE TABLE → DO block → CREATE TABLE = 3 statements', () => {
const sql = `CREATE TABLE a (id int);
DO $$ BEGIN
ALTER TABLE a ADD CONSTRAINT a_chk CHECK (id > 0);
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE b (id int);`
const out = splitSqlStatements(sql)
expect(out.length).toBe(3)
expect(out[0]).toContain('CREATE TABLE a')
expect(out[1]).toContain('DO $$')
expect(out[1]).toContain('END $$')
expect(out[2]).toContain('CREATE TABLE b')
})

test('tagged dollar quote $tag$ ... $tag$ respected', () => {
const sql = `CREATE FUNCTION f() RETURNS void AS $body$
BEGIN
INSERT INTO t VALUES ('a;b;c');
PERFORM 1;
END;
$body$ LANGUAGE plpgsql;
SELECT 1;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[0]).toContain('$body$')
expect(out[0]).toContain("INSERT INTO t VALUES ('a;b;c')")
expect(out[1]).toBe('SELECT 1')
})

test("single-quoted string with embedded semicolon is one statement", () => {
const sql = `INSERT INTO t (v) VALUES ('foo;bar');\nSELECT 2;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[0]).toBe("INSERT INTO t (v) VALUES ('foo;bar')")
expect(out[1]).toBe('SELECT 2')
})

test("escaped single quote '' inside string preserved", () => {
const sql = `INSERT INTO t (v) VALUES ('it''s; fine');\nSELECT 2;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[0]).toContain("'it''s; fine'")
})

test('double-quoted identifier with semicolon (pathological) preserved', () => {
const sql = `SELECT "weird;col" FROM t; SELECT 2;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[0]).toContain('"weird;col"')
})

test('block comment containing ; is ignored as a terminator', () => {
const sql = `/* this; has; semicolons */ SELECT 1; SELECT 2;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[0]).toContain('SELECT 1')
expect(out[1]).toBe('SELECT 2')
})

test('line comment with ; ignored as a terminator', () => {
const sql = `SELECT 1 -- ignore; this; please\n; SELECT 2;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[1]).toBe('SELECT 2')
})

test('bare $1 positional param not treated as dollar quote', () => {
const sql = `SELECT * FROM t WHERE id = $1; SELECT 2;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(2)
expect(out[0]).toContain('$1')
})

test('nested block comments', () => {
const sql = `/* outer /* inner; */ still in outer; */ SELECT 1;`
const out = splitSqlStatements(sql)
expect(out.length).toBe(1)
expect(out[0]).toContain('SELECT 1')
})

test('real schema.sql DO blocks each parse to a single statement', () => {
// Validates the actual fix against the production schema file.
const schemaPath = resolve(import.meta.dir, '../src/db/schema.sql')
const ddl = readFileSync(schemaPath, 'utf-8')
const stmts = splitSqlStatements(ddl)

// Sanity: nothing was split mid-dollar-quote — i.e. no statement
// contains an unmatched/odd count of $$ tokens.
for (const s of stmts) {
const dollarCount = (s.match(/\$\$/g) || []).length
expect(dollarCount % 2).toBe(0)
}

// Each DO block should be a single statement that includes both
// its opening "DO $$" and closing "END $$".
const doBlocks = stmts.filter((s) => /\bDO\s+\$\$/i.test(s))
expect(doBlocks.length).toBeGreaterThan(0)
for (const s of doBlocks) {
expect(s).toMatch(/END\s+\$\$/i)
}
})
})
Loading