diff --git a/internal/endtoend/testdata/sqlite_unicode_comment/db/db.go b/internal/endtoend/testdata/sqlite_unicode_comment/db/db.go new file mode 100644 index 0000000000..cd5bbb8e08 --- /dev/null +++ b/internal/endtoend/testdata/sqlite_unicode_comment/db/db.go @@ -0,0 +1,31 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 + +package db + +import ( + "context" + "database/sql" +) + +type DBTX interface { + ExecContext(context.Context, string, ...interface{}) (sql.Result, error) + PrepareContext(context.Context, string) (*sql.Stmt, error) + QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error) + QueryRowContext(context.Context, string, ...interface{}) *sql.Row +} + +func New(db DBTX) *Queries { + return &Queries{db: db} +} + +type Queries struct { + db DBTX +} + +func (q *Queries) WithTx(tx *sql.Tx) *Queries { + return &Queries{ + db: tx, + } +} diff --git a/internal/endtoend/testdata/sqlite_unicode_comment/db/models.go b/internal/endtoend/testdata/sqlite_unicode_comment/db/models.go new file mode 100644 index 0000000000..b8d77e1021 --- /dev/null +++ b/internal/endtoend/testdata/sqlite_unicode_comment/db/models.go @@ -0,0 +1,10 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 + +package db + +type Item struct { + ID int64 + Name string +} diff --git a/internal/endtoend/testdata/sqlite_unicode_comment/db/query.sql.go b/internal/endtoend/testdata/sqlite_unicode_comment/db/query.sql.go new file mode 100644 index 0000000000..2ad41c67c3 --- /dev/null +++ b/internal/endtoend/testdata/sqlite_unicode_comment/db/query.sql.go @@ -0,0 +1,37 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 +// source: query.sql + +package db + +import ( + "context" +) + +const getItem = `-- name: GetItem :one +SELECT id, name FROM items WHERE id = ? +` + +func (q *Queries) GetItem(ctx context.Context, id int64) (Item, error) { + row := q.db.QueryRowContext(ctx, getItem, id) + var i Item + err := row.Scan(&i.ID, &i.Name) + return i, err +} + +const updateItem = `-- name: UpdateItem :exec + +UPDATE items SET name = ? WHERE id = ? +` + +type UpdateItemParams struct { + Name string + ID int64 +} + +// section — divider +func (q *Queries) UpdateItem(ctx context.Context, arg UpdateItemParams) error { + _, err := q.db.ExecContext(ctx, updateItem, arg.Name, arg.ID) + return err +} diff --git a/internal/endtoend/testdata/sqlite_unicode_comment/query.sql b/internal/endtoend/testdata/sqlite_unicode_comment/query.sql new file mode 100644 index 0000000000..dd4e5cdc29 --- /dev/null +++ b/internal/endtoend/testdata/sqlite_unicode_comment/query.sql @@ -0,0 +1,7 @@ +-- name: GetItem :one +SELECT id, name FROM items WHERE id = ?; + +-- section — divider + +-- name: UpdateItem :exec +UPDATE items SET name = ? WHERE id = ?; diff --git a/internal/endtoend/testdata/sqlite_unicode_comment/schema.sql b/internal/endtoend/testdata/sqlite_unicode_comment/schema.sql new file mode 100644 index 0000000000..93e4173ad2 --- /dev/null +++ b/internal/endtoend/testdata/sqlite_unicode_comment/schema.sql @@ -0,0 +1 @@ +CREATE TABLE items (id INTEGER PRIMARY KEY, name TEXT NOT NULL); diff --git a/internal/endtoend/testdata/sqlite_unicode_comment/sqlc.json b/internal/endtoend/testdata/sqlite_unicode_comment/sqlc.json new file mode 100644 index 0000000000..cbd787d930 --- /dev/null +++ b/internal/endtoend/testdata/sqlite_unicode_comment/sqlc.json @@ -0,0 +1,16 @@ +{ + "version": "2", + "sql": [ + { + "engine": "sqlite", + "queries": "query.sql", + "schema": "schema.sql", + "gen": { + "go": { + "package": "db", + "out": "db" + } + } + } + ] +} diff --git a/internal/engine/sqlite/parse.go b/internal/engine/sqlite/parse.go index 13425b156e..2a42909e94 100644 --- a/internal/engine/sqlite/parse.go +++ b/internal/engine/sqlite/parse.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "io" + "unicode/utf8" "github.com/antlr4-go/antlr/v4" "github.com/sqlc-dev/sqlc/internal/engine/sqlite/parser" @@ -42,7 +43,8 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) { if err != nil { return nil, err } - input := antlr.NewInputStream(string(blob)) + src := string(blob) + input := antlr.NewInputStream(src) lexer := parser.NewSQLiteLexer(input) stream := antlr.NewCommonTokenStream(lexer, 0) pp := parser.NewSQLiteParser(stream) @@ -57,6 +59,13 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) { if !ok { return nil, fmt.Errorf("expected ParserContext; got %T\n", tree) } + + // ANTLR's InputStream operates on characters (runes), so token + // positions are character indices. source.Pluck slices with byte + // offsets. Build a lookup table so we can translate correctly when + // the input contains multi-byte UTF-8 characters (e.g. em-dash). + runeToByteOffset := buildRuneToByteOffsets(src) + var stmts []ast.Statement for _, istmt := range pctx.AllSql_stmt_list() { list, ok := istmt.(*parser.Sql_stmt_listContext) @@ -72,12 +81,13 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) { loc = stmt.GetStop().GetStop() + 2 continue } - len := (stmt.GetStop().GetStop() + 1) - loc + byteLoc := runeToByteOffset[loc] + byteEnd := runeToByteOffset[stmt.GetStop().GetStop()+1] stmts = append(stmts, ast.Statement{ Raw: &ast.RawStmt{ Stmt: out, - StmtLocation: loc, - StmtLen: len, + StmtLocation: byteLoc, + StmtLen: byteEnd - byteLoc, }, }) loc = stmt.GetStop().GetStop() + 2 @@ -86,6 +96,19 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) { return stmts, nil } +// buildRuneToByteOffsets returns a slice mapping rune index to byte offset. +// Entry i holds the byte offset where rune i begins; the final entry holds +// len(s) so that an exclusive end position can be looked up safely. +func buildRuneToByteOffsets(s string) []int { + n := utf8.RuneCountInString(s) + offsets := make([]int, 0, n+1) + for bytePos := range s { + offsets = append(offsets, bytePos) + } + offsets = append(offsets, len(s)) + return offsets +} + func (p *Parser) CommentSyntax() source.CommentSyntax { return source.CommentSyntax{ Dash: true,