From 84b90b080e406461f653ba924a7c1148bdad94c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Tue, 21 Apr 2026 16:59:57 +0800 Subject: [PATCH 1/2] Support output formats --- Cargo.toml | 1 + README.md | 84 ++++++++++++++++++++----------- SKILL.md | 19 +++---- src/main.rs | 10 ++++ src/output.rs | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ src/session.rs | 96 +++++++++++------------------------ 6 files changed, 238 insertions(+), 104 deletions(-) create mode 100644 src/output.rs diff --git a/Cargo.toml b/Cargo.toml index 635de87..ada5368 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ arrow-array = "57" arrow-cast = { version = "57", features = ["prettyprint"] } arrow-csv = "57" arrow-flight = { version = "57", features = ["flight-sql-experimental"] } +arrow-json = "57" arrow-schema = "57" atty = "0.2" clap = { version = "4.5", features = ["derive"] } diff --git a/README.md b/README.md index d103150..35dc76b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ## Overview -arrow_cli is a CLI tool for interacting with server in Flight SQL protocol. +arrow_cli is a CLI tool for interacting with a server that speaks the Flight SQL protocol. ## Install @@ -17,7 +17,7 @@ cargo install arrow_cli ## Usage -``` +```text > arrow_cli --help Usage: arrow_cli [OPTIONS] @@ -29,59 +29,87 @@ Options: --tls --timeout Request timeout in seconds [default: 180] --prepared Execute query using prepared statement + --print-schema Print resultset schema + --output Result output format [default: table] [possible values: table, json, csv, tsv, psv] + -c, --command Execute SQL command and exit -h, --help Print help ``` ## Examples -### REPL -```sql -❯ arrow_cli -h arch -u sundy -p abc --port 8900 -Welcome to Arrow CLI. -Connecting to http://arch:8900/ as user sundy. - -arch :) select avg(number) from numbers(10); - -select avg(number) from numbers(10); - +### Single command with table output +```bash +❯ arrow_cli -h arch -u sundy -p abc --port 8900 --output table --command "select avg(number) from numbers(10);" +-------------+ | avg(number) | +-------------+ | 4.5 | +-------------+ -1 rows in set (0.036 sec) - -arch :) show tables like 'c%'; - -show tables like 'c%'; - -+-------------------+ -| tables_in_default | -+-------------------+ -| customer | -+-------------------+ +1 rows in set (tickets received in 0.036 sec, rows received in 0.036 sec) +``` -1 rows in set (0.030 sec) +### Single command with JSON output -arch :) exit -Bye +```bash +❯ arrow_cli -h arch -u sundy -p abc --port 8900 --output json --command "select number from numbers(3)" +{"number":0} +{"number":1} +{"number":2} ``` -### StdIn Pipe +### StdIn pipe with CSV output ```bash -❯ echo "select number from numbers(3)" | arrow_cli -h arch -u sundy -p abc --port 8900 +❯ echo "select number from numbers(3)" | arrow_cli -h arch -u sundy -p abc --port 8900 --output csv 0 1 2 ``` +### StdIn pipe with TSV output + +```bash +❯ echo "select number, concat('v', to_string(number)) from numbers(3)" | arrow_cli -h arch -u sundy -p abc --port 8900 --output tsv +0 v0 +1 v1 +2 v2 +``` + +### Single command with PSV output + +```bash +❯ arrow_cli -h arch -u sundy -p abc --port 8900 --output psv --command "select number, concat('v', to_string(number)) from numbers(3)" +0|v0 +1|v1 +2|v2 +``` + +### Interactive session with JSON output + +```text +❯ arrow_cli -h arch -u sundy -p abc --port 8900 --output json +Welcome to Arrow CLI v0.4.1. +Connecting to http://arch:8900/ as user sundy. + +arch :) select number from numbers(2); + +select number from numbers(2); + +{"number":0} +{"number":1} + +arch :) exit +Bye +``` + ## Features - basic keywords highlight - basic auto-completion - select query support +- output formats: table, json, csv, tsv, psv +- delimited formats use: csv=`,`, tsv=`\t`, psv=`|` - TBD #### License diff --git a/SKILL.md b/SKILL.md index 3d302b1..c6853a7 100644 --- a/SKILL.md +++ b/SKILL.md @@ -45,16 +45,10 @@ Output: echo "select 1" | arrow_cli --host localhost --port 8900 --user admin --password abc ``` -Output: - -```text -1 -``` - ### Option: print the result schema ```bash -arrow_cli --host localhost --port 8900 --user admin --password abc --print-schema --command "select number from numbers(3);" +arrow_cli --host localhost --port 8900 --user admin --password abc --print-schema --command "select 1" ``` Output: @@ -79,5 +73,12 @@ Schema { 1 rows in set (tickets received in 0.008 sec, rows received in 0.010 sec) ``` -## Other notes -- Run `arrow_cli --help` for more details \ No newline at end of file +### Option: output formats + +- `--output table`: pretty table output with row count and timing summary +- `--output json`: line-delimited JSON rows +- `--output csv`: comma-separated rows without headers +- `--output tsv`: tab-separated rows without headers +- `--output psv`: pipe-separated rows without headers + +Run `arrow_cli --help` for more usage details. \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 62adc47..e839d03 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod helper; +mod output; mod session; use std::time::Duration; @@ -7,6 +8,7 @@ use arrow_schema::ArrowError; use atty::Stream; use clap::Parser; +use output::Output; use tonic::transport::{ClientTlsConfig, Endpoint}; #[derive(Debug, Parser, PartialEq)] @@ -44,6 +46,14 @@ struct Args { #[clap(long, default_value = "false", help = "Print resultset schema")] print_schema: bool, + #[clap( + long, + value_enum, + default_value_t = Output::Table, + help = "Result output format" + )] + output: Output, + #[clap(short = 'c', long, help = "Execute SQL command and exit")] command: Option, } diff --git a/src/output.rs b/src/output.rs new file mode 100644 index 0000000..80bfa37 --- /dev/null +++ b/src/output.rs @@ -0,0 +1,132 @@ +use arrow_array::RecordBatch; +use arrow_cast::pretty::pretty_format_batches; +use arrow_csv::WriterBuilder as CsvWriterBuilder; +use arrow_json::LineDelimitedWriter; +use arrow_schema::ArrowError; +use clap::ValueEnum; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +pub enum Output { + Table, + Json, + Csv, + Tsv, + Psv, +} + +pub fn print_batches(batches: &[RecordBatch], output: Output) -> Result<(), ArrowError> { + let formatted = format_batches(batches, output)?; + print!("{formatted}"); + Ok(()) +} + +pub fn format_batches(batches: &[RecordBatch], output: Output) -> Result { + match output { + Output::Table => format_table(batches), + Output::Json => format_json(batches), + Output::Csv => format_delimited(batches, b','), + Output::Tsv => format_delimited(batches, b'\t'), + Output::Psv => format_delimited(batches, b'|'), + } +} + +fn format_table(batches: &[RecordBatch]) -> Result { + Ok(format!("{}\n", pretty_format_batches(batches)?)) +} + +fn format_json(batches: &[RecordBatch]) -> Result { + let mut bytes = vec![]; + { + let mut writer = LineDelimitedWriter::new(&mut bytes); + for batch in batches { + writer.write(batch)?; + } + writer.finish()?; + } + + let formatted = String::from_utf8(bytes).map_err(|e| ArrowError::JsonError(e.to_string()))?; + Ok(formatted) +} + +fn format_delimited(batches: &[RecordBatch], delimiter: u8) -> Result { + let mut bytes = vec![]; + { + let mut writer = CsvWriterBuilder::new() + .with_header(false) + .with_delimiter(delimiter) + .build(&mut bytes); + for batch in batches { + writer.write(batch)?; + } + } + + let formatted = String::from_utf8(bytes).map_err(|e| ArrowError::CsvError(e.to_string()))?; + Ok(formatted) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Int64Array, StringArray}; + use arrow_schema::{DataType, Field, Schema}; + use std::sync::Arc; + + fn sample_batch() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("number", DataType::Int64, false), + Field::new("label", DataType::Utf8, false), + ])); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(Int64Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["one", "two"])), + ], + ) + .unwrap() + } + + #[test] + fn formats_table_output() { + let batches = vec![sample_batch()]; + let result = format_table(&batches); + assert!(result.is_ok()); + assert!(result.unwrap().contains("| number |")); + } + + #[test] + fn formats_json_output() { + let batches = vec![sample_batch()]; + let result = format_json(&batches); + assert!(result.is_ok()); + assert_eq!( + result.unwrap(), + "{\"number\":1,\"label\":\"one\"}\n{\"number\":2,\"label\":\"two\"}\n" + ); + } + + #[test] + fn formats_csv_output() { + let batches = vec![sample_batch()]; + let result = format_batches(&batches, Output::Csv); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "1,one\n2,two\n"); + } + + #[test] + fn formats_tsv_output() { + let batches = vec![sample_batch()]; + let result = format_batches(&batches, Output::Tsv); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "1\tone\n2\ttwo\n"); + } + + #[test] + fn formats_psv_output() { + let batches = vec![sample_batch()]; + let result = format_batches(&batches, Output::Psv); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "1|one\n2|two\n"); + } +} diff --git a/src/session.rs b/src/session.rs index 16a435c..bb7761d 100644 --- a/src/session.rs +++ b/src/session.rs @@ -1,6 +1,4 @@ use arrow_array::RecordBatch; -use arrow_cast::pretty::pretty_format_batches; -use arrow_csv::WriterBuilder; use arrow_flight::{ FlightInfo, flight_service_client::FlightServiceClient, sql::client::FlightSqlServiceClient, }; @@ -13,7 +11,11 @@ use std::{io::BufRead, time::Duration}; use tokio::time::Instant; use tonic::transport::{Channel, Endpoint}; -use crate::{Args, helper::CliHelper}; +use crate::{ + Args, + helper::CliHelper, + output::{self, Output}, +}; pub struct Session { client: FlightSqlServiceClient, @@ -102,15 +104,8 @@ impl Session { println!("\n{}\n", query); if let Err(e) = async { - let (batches, ticket_recv_duration, rows_recv_duration, flight_info) = - self.execute_query(&query).await?; - print_batches( - &batches, - ticket_recv_duration, - rows_recv_duration, - flight_info, - &self.args, - )?; + let result = self.execute_query(&query).await?; + print_query_result(&result, &self.args)?; Ok::<_, ArrowError>(()) } .await @@ -127,16 +122,8 @@ impl Session { pub async fn handle_command(&mut self, command: &str) { if let Err(e) = async { - let (batches, ticket_recv_duration, rows_recv_duration, flight_info) = - self.execute_query(command).await?; - - print_batches( - &batches, - ticket_recv_duration, - rows_recv_duration, - flight_info, - &self.args, - )?; + let result = self.execute_query(command).await?; + print_query_result(&result, &self.args)?; Ok::<_, ArrowError>(()) } .await @@ -151,8 +138,8 @@ impl Session { while let Some(Ok(line)) = lines.next() { let line = line.trim_end(); if let Err(e) = async { - let (batches, _, _, _) = self.execute_query(line).await?; - print_batches_with_sep(batches.as_slice(), b'\t')?; + let result = self.execute_query(line).await?; + print_query_result(&result, &self.args)?; Ok::<_, ArrowError>(()) } .await @@ -162,18 +149,7 @@ impl Session { } } - async fn execute_query( - &mut self, - query: &str, - ) -> Result< - ( - Vec, - std::time::Duration, - std::time::Duration, - arrow_flight::FlightInfo, - ), - ArrowError, - > { + async fn execute_query(&mut self, query: &str) -> Result { let start = Instant::now(); let flight_info = if self.args.prepared { let mut stmt = self.client.prepare(query.to_string(), None).await?; @@ -208,54 +184,40 @@ impl Session { } let rows_recv_duration = start.elapsed(); - Ok(( + Ok(QueryResult { batches, ticket_recv_duration, rows_recv_duration, flight_info, - )) + }) } } -fn print_batches( - batches: &[RecordBatch], +struct QueryResult { + batches: Vec, ticket_recv_duration: Duration, rows_recv_duration: Duration, flight_info: FlightInfo, - args: &Args, -) -> Result<(), ArrowError> { - let res = pretty_format_batches(batches)?; +} - println!("{res}\n"); +fn print_query_result(result: &QueryResult, args: &Args) -> Result<(), ArrowError> { + output::print_batches(&result.batches, args.output)?; if args.print_schema { - let schema = flight_info.try_decode_schema()?; + let schema = result.flight_info.clone().try_decode_schema()?; println!("{schema:#?}\n"); } - let rows: usize = batches.iter().map(|b| b.num_rows()).sum(); - println!( - "{} rows in set (tickets received in {:.3} sec, rows received in {:.3} sec)\n", - rows, - ticket_recv_duration.as_secs_f64(), - rows_recv_duration.as_secs_f64(), - ); - Ok(()) -} - -fn print_batches_with_sep(batches: &[RecordBatch], delimiter: u8) -> Result<(), ArrowError> { - let mut bytes = vec![]; - { - let builder = WriterBuilder::new() - .with_header(false) - .with_delimiter(delimiter); - let mut writer = builder.build(&mut bytes); - for batch in batches { - writer.write(batch)?; - } + if args.output == Output::Table { + let rows: usize = result.batches.iter().map(|b| b.num_rows()).sum(); + println!( + "{} rows in set (tickets received in {:.3} sec, rows received in {:.3} sec)\n", + rows, + result.ticket_recv_duration.as_secs_f64(), + result.rows_recv_duration.as_secs_f64(), + ); } - let formatted = String::from_utf8(bytes).map_err(|e| ArrowError::CsvError(e.to_string()))?; - print!("{formatted}"); + Ok(()) } From 586353bea5d96827d1fd88f07572d1ee3ce38162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Tue, 21 Apr 2026 17:00:42 +0800 Subject: [PATCH 2/2] Bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ada5368..8140323 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ repository = "https://github.com/sundy-li/arrow_cli" edition = "2024" license = "Apache-2.0" name = "arrow_cli" -version = "0.4.1" +version = "0.5.0" [dependencies]