From 5958ccb1375219df78e31a99971e59e3cf656223 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Fri, 1 May 2026 11:01:29 +0200 Subject: [PATCH 1/2] feat: add TSV input and output format support Add `tsv` to InputFormat and OutputFormat enums so users can use `--input-format tsv` / `-I tsv` and `--output-format tsv` / `-O tsv`. TSV input reuses the CSV reader with tab as delimiter. TSV output uses the same delimited-text path as CSV but with tab as the separator. `--header` is now also allowed with `--output-format tsv`. Closes #97 --- src/main.zig | 84 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/src/main.zig b/src/main.zig index aac0fae..8aa67ae 100644 --- a/src/main.zig +++ b/src/main.zig @@ -61,10 +61,10 @@ const ExitCode = enum(u8) { }; /// Supported input formats. -const InputFormat = enum { csv, json, ndjson }; +const InputFormat = enum { csv, tsv, json, ndjson }; /// Supported output formats. -const OutputFormat = enum { csv, json, ndjson }; +const OutputFormat = enum { csv, tsv, json, ndjson }; /// Parsed command-line arguments. const ParsedArgs = struct { @@ -126,8 +126,8 @@ fn printUsage(writer: *std.Io.Writer) !void { \\Options: \\ -d, --delimiter Input field delimiter for CSV (default: ,) \\ --tsv Alias for --delimiter '\t' - \\ -I, --input-format Input format: csv (default), json, ndjson - \\ -O, --output-format Output format: csv (default), json, ndjson + \\ -I, --input-format Input format: csv (default), tsv, json, ndjson + \\ -O, --output-format Output format: csv (default), tsv, json, ndjson \\ --json Alias for --output-format json \\ --no-type-inference Treat all columns as TEXT (CSV input only) \\ -H, --header Print column names as the first output row (CSV output only) @@ -172,9 +172,10 @@ fn parseDelimiter(value: []const u8) SqlPipeError!u8 { /// parseInputFormat(s) → InputFormat /// Pre: s is the format string provided by the user /// Post: result is the matching InputFormat -/// error.InvalidInputFormat when s is not "csv", "json", or "ndjson" +/// error.InvalidInputFormat when s is not "csv", "tsv", "json", or "ndjson" fn parseInputFormat(s: []const u8) SqlPipeError!InputFormat { if (std.mem.eql(u8, s, "csv")) return .csv; + if (std.mem.eql(u8, s, "tsv")) return .tsv; if (std.mem.eql(u8, s, "json")) return .json; if (std.mem.eql(u8, s, "ndjson")) return .ndjson; return error.InvalidInputFormat; @@ -183,9 +184,10 @@ fn parseInputFormat(s: []const u8) SqlPipeError!InputFormat { /// parseOutputFormat(s) → OutputFormat /// Pre: s is the format string provided by the user /// Post: result is the matching OutputFormat -/// error.InvalidOutputFormat when s is not "csv", "json", or "ndjson" +/// error.InvalidOutputFormat when s is not "csv", "tsv", "json", or "ndjson" fn parseOutputFormat(s: []const u8) SqlPipeError!OutputFormat { if (std.mem.eql(u8, s, "csv")) return .csv; + if (std.mem.eql(u8, s, "tsv")) return .tsv; if (std.mem.eql(u8, s, "json")) return .json; if (std.mem.eql(u8, s, "ndjson")) return .ndjson; return error.InvalidOutputFormat; @@ -288,8 +290,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { } } - // Non-CSV output format is mutually exclusive with --header - if (output_format != .csv and header) + // Non-CSV/TSV output format is mutually exclusive with --header + if (output_format != .csv and output_format != .tsv and header) return error.IncompatibleFlags; // --output is mutually exclusive with --columns (--columns always writes to stdout) @@ -653,27 +655,29 @@ fn insertRowTyped( if (c.sqlite3_step(stmt) != c.SQLITE_DONE) return error.StepFailed; } -/// printRow(stmt, col_count, writer) → !void +/// printRow(stmt, col_count, writer, delimiter) → !void /// Pre: sqlite3_step returned SQLITE_ROW for stmt /// col_count = sqlite3_column_count(stmt) > 0 -/// Post: one comma-separated CSV line written to writer with col_count values; +/// delimiter is the field separator character (e.g. ',' or '\t') +/// Post: one delimited line written to writer with col_count values; /// NULL cells rendered as the literal string "NULL" fn printRow( stmt: *c.sqlite3_stmt, col_count: c_int, writer: *std.Io.Writer, + delimiter: u8, ) !void { - // Loop invariant I: columns 0..i-1 have been written, separated by commas + // Loop invariant I: columns 0..i-1 have been written, separated by delimiter // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { - if (i > 0) try writer.writeByte(','); + if (i > 0) try writer.writeByte(delimiter); if (c.sqlite3_column_type(stmt, i) == c.SQLITE_NULL) { try writer.writeAll("NULL"); } else { const ptr = c.sqlite3_column_text(stmt, i); if (ptr != null) { - try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr)))); + try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))), delimiter); } else { try writer.writeAll("NULL"); } @@ -682,16 +686,17 @@ fn printRow( try writer.writeByte('\n'); } -/// writeField(writer, value) → !void +/// writeField(writer, value, delimiter) → !void /// Pre: writer is a valid writer, value is a valid UTF-8 slice -/// Post: value is written to writer as a single CSV field: -/// if value contains comma, double-quote, or newline, it is enclosed +/// delimiter is the field separator character (e.g. ',' or '\t') +/// Post: value is written to writer as a single delimited field: +/// if value contains the delimiter, double-quote, or newline, it is enclosed /// in double-quotes with internal quotes escaped as "" (RFC 4180); /// otherwise it is written verbatim -fn writeField(writer: *std.Io.Writer, value: []const u8) !void { +fn writeField(writer: *std.Io.Writer, value: []const u8, delimiter: u8) !void { var needs_quoting = false; for (value) |ch| { - if (ch == ',' or ch == '"' or ch == '\n' or ch == '\r') { + if (ch == delimiter or ch == '"' or ch == '\n' or ch == '\r') { needs_quoting = true; break; } @@ -708,25 +713,27 @@ fn writeField(writer: *std.Io.Writer, value: []const u8) !void { } } -/// printHeaderRow(stmt, col_count, writer) → !void +/// printHeaderRow(stmt, col_count, writer, delimiter) → !void /// Pre: stmt is a prepared statement, col_count > 0 -/// Post: one CSV line with col_count column names written to writer; +/// delimiter is the field separator character (e.g. ',' or '\t') +/// Post: one delimited line with col_count column names written to writer; /// names are obtained from sqlite3_column_name (alias or original); /// fields are RFC 4180 quoted when they contain special characters fn printHeaderRow( stmt: *c.sqlite3_stmt, col_count: c_int, writer: *std.Io.Writer, + delimiter: u8, ) !void { - // Loop invariant I: columns 0..i-1 names have been written, separated by commas + // Loop invariant I: columns 0..i-1 names have been written, separated by delimiter // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { - if (i > 0) try writer.writeByte(','); + if (i > 0) try writer.writeByte(delimiter); const name_ptr = c.sqlite3_column_name(stmt, i); if (name_ptr != null) { const name = std.mem.span(@as([*:0]const u8, @ptrCast(name_ptr))); - try writeField(writer, name); + try writeField(writer, name, delimiter); } } try writer.writeByte('\n'); @@ -737,10 +744,10 @@ fn printHeaderRow( /// query is a valid SQL string (not null-terminated) /// allocator is valid /// when output_format = .json or .ndjson, header must not be set (caller's responsibility) -/// Post: if output_format = .json, results are written as a JSON array of objects -/// if output_format = .ndjson, results are written as one JSON object per line -/// if header = true (and output_format = .csv), column names written as the first CSV row -/// all result rows written to writer as CSV lines via printRow (when output_format = .csv) +/// Post: if output_format = .json, results are written as a JSON array of objects +/// if output_format = .ndjson, results are written as one JSON object per line +/// if output_format = .csv or .tsv, results are written as delimited text; +/// when header = true, column names are written as the first row /// error.PrepareQueryFailed when sqlite3_prepare_v2 returns non-SQLITE_OK /// propagates any writer I/O error fn execQuery( @@ -795,16 +802,18 @@ fn execQuery( try json.printNdjsonRow(stmt.?, col_count, col_names, writer); } }, - .csv => { + .csv, .tsv => { + const out_delim: u8 = if (output_format == .tsv) '\t' else ','; + // When header is requested, print column names before data rows if (header and col_count > 0) { - try printHeaderRow(stmt.?, col_count, writer); + try printHeaderRow(stmt.?, col_count, writer, out_delim); } // Loop invariant I: all SQLITE_ROW results returned so far have been printed // Bounding function: number of remaining rows in the result set (finite) while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { - try printRow(stmt.?, col_count, writer); + try printRow(stmt.?, col_count, writer, out_delim); } }, } @@ -1152,10 +1161,11 @@ fn runColumns( stdout_writer: *std.Io.Writer, ) void { switch (args.input_format) { - .csv => { + .csv, .tsv => { + const col_delim: u8 = if (args.input_format == .tsv) '\t' else args.delimiter; var stdin_buf: [4096]u8 = undefined; var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, args.delimiter); + var csv_reader = csv.csvReaderWithDelimiter(allocator, &stdin_file_reader.interface, col_delim); const header_record = csv_reader.nextRecord() catch |err| switch (err) { error.UnterminatedQuotedField => fatal("row 1: unterminated quoted field", stderr_writer, .csv_error, .{}), @@ -1325,6 +1335,12 @@ fn run( // Load input into `t` — dispatch on input format const rows_inserted: usize = switch (parsed.input_format) { .csv => loadCsvInput(allocator, io, db, parsed, stderr_writer), + .tsv => blk: { + // TSV is CSV with tab delimiter; override delimiter and reuse the CSV loader + var tsv_parsed = parsed; + tsv_parsed.delimiter = '\t'; + break :blk loadCsvInput(allocator, io, db, tsv_parsed, stderr_writer); + }, .json => blk: { var stdin_buf: [4096]u8 = undefined; var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); @@ -1400,14 +1416,14 @@ pub fn main(init: std.process.Init.Minimal) void { }, error.InvalidInputFormat => { stderr_writer.writeAll( - "error: unknown input format; supported: csv, json, ndjson\n", + "error: unknown input format; supported: csv, tsv, json, ndjson\n", ) catch |werr| std.log.err("failed to write error message: {}", .{werr}); stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage)); }, error.InvalidOutputFormat => { stderr_writer.writeAll( - "error: unknown output format; supported: csv, json, ndjson\n", + "error: unknown output format; supported: csv, tsv, json, ndjson\n", ) catch |werr| std.log.err("failed to write error message: {}", .{werr}); stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage)); From 6c9749eed18c958f84ea3c19964331d4db84064b Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Fri, 1 May 2026 11:13:05 +0200 Subject: [PATCH 2/2] test: add TSV integration tests and fix --header doc/error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 9 integration tests (62–70) covering --input-format tsv, --output-format tsv, -I/-O short flags, TSV roundtrip, CSV→TSV, --header with TSV output, --columns with TSV, and quoted-tab fields - Fix help text: --header now reads '(CSV/TSV output only)' - Fix doc comment and error message: 'non-CSV' → 'non-CSV/TSV' --- build.zig | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.zig | 6 +-- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/build.zig b/build.zig index b816a31..a12ecc8 100644 --- a/build.zig +++ b/build.zig @@ -639,6 +639,110 @@ pub fn build(b: *std.Build) void { test_columns_ndjson_input.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_columns_ndjson_input.step); + // ─── TSV input/output integration tests ───────────────────────────────── + + // Integration test 62: --input-format tsv reads tab-separated input correctly + const test_tsv_input_format = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name\tage\nAlice\t30\nBob\t25\n' \ + \\ | ./zig-out/bin/sql-pipe --input-format tsv 'SELECT name,age FROM t ORDER BY age') + \\expected=$(printf 'Bob,25\nAlice,30') + \\[ "$result" = "$expected" ] + }); + test_tsv_input_format.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_tsv_input_format.step); + + // Integration test 63: -I tsv short flag is equivalent to --input-format tsv + const test_tsv_input_short = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name\tage\nAlice\t30\nBob\t25\n' \ + \\ | ./zig-out/bin/sql-pipe -I tsv 'SELECT name FROM t ORDER BY name') + \\expected=$(printf 'Alice\nBob') + \\[ "$result" = "$expected" ] + }); + test_tsv_input_short.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_tsv_input_short.step); + + // Integration test 64: --output-format tsv produces tab-separated output + const test_tsv_output_format = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\nBob,25\n' \ + \\ | ./zig-out/bin/sql-pipe --output-format tsv 'SELECT name,age FROM t ORDER BY age') + \\expected=$(printf 'Bob\t25\nAlice\t30') + \\[ "$result" = "$expected" ] + }); + test_tsv_output_format.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_tsv_output_format.step); + + // Integration test 65: -O tsv short flag is equivalent to --output-format tsv + const test_tsv_output_short = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\n' \ + \\ | ./zig-out/bin/sql-pipe -O tsv 'SELECT name,age FROM t') + \\expected=$(printf 'Alice\t30') + \\[ "$result" = "$expected" ] + }); + test_tsv_output_short.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_tsv_output_short.step); + + // Integration test 66: TSV roundtrip (-I tsv -O tsv) + const test_tsv_roundtrip = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name\tage\nAlice\t30\nBob\t25\n' \ + \\ | ./zig-out/bin/sql-pipe -I tsv -O tsv 'SELECT name,age FROM t ORDER BY name') + \\expected=$(printf 'Alice\t30\nBob\t25') + \\[ "$result" = "$expected" ] + }); + test_tsv_roundtrip.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_tsv_roundtrip.step); + + // Integration test 67: CSV input, TSV output (cross-format) + const test_csv_to_tsv = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\nBob,25\n' \ + \\ | ./zig-out/bin/sql-pipe -O tsv 'SELECT name,age FROM t ORDER BY name') + \\expected=$(printf 'Alice\t30\nBob\t25') + \\[ "$result" = "$expected" ] + }); + test_csv_to_tsv.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_csv_to_tsv.step); + + // Integration test 68: --header works with --output-format tsv + const test_header_tsv_output = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\nBob,25\n' \ + \\ | ./zig-out/bin/sql-pipe --header -O tsv 'SELECT name,age FROM t ORDER BY age') + \\expected=$(printf 'name\tage\nBob\t25\nAlice\t30') + \\[ "$result" = "$expected" ] + }); + test_header_tsv_output.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_header_tsv_output.step); + + // Integration test 69: --columns with --input-format tsv + const test_columns_tsv_input_format = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'col1\tcol2\tcol3\n' \ + \\ | ./zig-out/bin/sql-pipe --columns --input-format tsv) + \\expected=$(printf 'col1\ncol2\ncol3') + \\[ "$result" = "$expected" ] + }); + test_columns_tsv_input_format.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_columns_tsv_input_format.step); + + // Integration test 70: TSV input with a quoted field containing a tab + // The tab inside a quoted field is unescaped during TSV parsing; + // the resulting value (helloworld) is written verbatim in CSV output + // because comma is the output delimiter, not tab. + const test_tsv_quoted_tab = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name\tnotes\nAlice\t"hello\tworld"\n' \ + \\ | ./zig-out/bin/sql-pipe -I tsv 'SELECT name, notes FROM t') + \\expected=$(printf 'Alice,hello\tworld') + \\[ "$result" = "$expected" ] + }); + test_tsv_quoted_tab.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_tsv_quoted_tab.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/src/main.zig b/src/main.zig index 8aa67ae..a81fcfe 100644 --- a/src/main.zig +++ b/src/main.zig @@ -130,7 +130,7 @@ fn printUsage(writer: *std.Io.Writer) !void { \\ -O, --output-format Output format: csv (default), tsv, json, ndjson \\ --json Alias for --output-format json \\ --no-type-inference Treat all columns as TEXT (CSV input only) - \\ -H, --header Print column names as the first output row (CSV output only) + \\ -H, --header Print column names as the first output row (CSV/TSV output only) \\ --max-rows Stop if more than data rows are read (exit 1) \\ -v, --verbose Force row count to stderr (shown automatically on TTY) \\ With --columns: show inferred type per column @@ -201,7 +201,7 @@ fn parseOutputFormat(s: []const u8) SqlPipeError!OutputFormat { /// result = .help when --help or -h is present /// result = .version when --version or -V is present /// error.MissingQuery when no non-flag argument is found -/// error.IncompatibleFlags when a non-CSV output format is combined with --header +/// error.IncompatibleFlags when a non-CSV/TSV output format is combined with --header fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { var query: ?[]const u8 = null; var type_inference = true; @@ -1402,7 +1402,7 @@ pub fn main(init: std.process.Init.Minimal) void { switch (err) { error.IncompatibleFlags => { stderr_writer.writeAll( - "error: --header cannot be combined with non-CSV output format\n", + "error: --header cannot be combined with non-CSV/TSV output format\n", ) catch |werr| std.log.err("failed to write error message: {}", .{werr}); stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage));