From 9713d197b451cf6943a54c5fd163f3a230f1e216 Mon Sep 17 00:00:00 2001 From: Daniele Date: Fri, 20 Mar 2026 21:50:39 +0100 Subject: [PATCH 1/2] Release v1.1.0: docs, formats, and refactors Bump version to 1.1.0 and prepare release: update changelog and README, add comprehensive examples, tighten CI, and refactor core modules. Highlights: - Bumped gleam.toml version to 1.1.0 and updated CHANGELOG.md for 1.1.0. - Added many docs/examples (schema, query, repo, types, encode, decode, changeset) and rewrote docs/quickstart.md with runnable examples. - Improved CI: consolidated workflows (removed release.yml and smoke-integration.yml) and made the ClickHouse readiness check fail the job if the service doesn't become ready. - CSV format handler rewritten for RFC 4180 compliance (escaping, parsing, header handling); added helpers for JSON->plain string conversion. - JSONEachRow handler simplified (per-row JSON encoding) and decode utilities made public for reuse by format handlers. - Expr helpers simplified (compact character checks), changeset email validation strengthened, and multiple modules refactored to reduce surface area and improve type safety. - Updated tests to reflect schema changes. See changelog and docs for full details and usage examples. --- .github/workflows/ci.yml | 4 +- .github/workflows/release.yml | 125 ------------- .github/workflows/smoke-integration.yml | 65 ------- CHANGELOG.md | 18 +- README.md | 6 +- docs/examples/changeset.md | 82 ++++++++ docs/examples/decode.md | 79 ++++++++ docs/examples/decode_examples.md | 11 -- docs/examples/encode.md | 100 ++++++++++ docs/examples/query.md | 136 ++++++++++++++ docs/examples/repo.md | 87 +++++++++ docs/examples/schema.md | 113 +++++++++++ docs/examples/schema_examples.md | 64 ------- docs/examples/types.md | 83 +++++++++ docs/examples/types_examples.md | 32 ---- docs/quickstart.md | 45 +++-- gleam.toml | 2 +- src/sparkling/changeset.gleam | 16 +- src/sparkling/decode.gleam | 8 +- src/sparkling/expr.gleam | 127 +------------ src/sparkling/format/csv.gleam | 116 +++++++----- src/sparkling/format/json_each_row.gleam | 53 ++---- src/sparkling/format/registry.gleam | 119 +++--------- src/sparkling/format/tab_separated.gleam | 16 +- src/sparkling/query.gleam | 5 +- src/sparkling/repo.gleam | 49 ++--- src/sparkling/retry.gleam | 40 ++-- src/sparkling/schema.gleam | 42 +++-- src/sparkling/types.gleam | 228 ++++++++++------------- test/sparkling/schema_test.gleam | 3 +- 30 files changed, 1032 insertions(+), 842 deletions(-) delete mode 100644 .github/workflows/release.yml delete mode 100644 .github/workflows/smoke-integration.yml create mode 100644 docs/examples/changeset.md create mode 100644 docs/examples/decode.md delete mode 100644 docs/examples/decode_examples.md create mode 100644 docs/examples/encode.md create mode 100644 docs/examples/query.md create mode 100644 docs/examples/repo.md create mode 100644 docs/examples/schema.md delete mode 100644 docs/examples/schema_examples.md create mode 100644 docs/examples/types.md delete mode 100644 docs/examples/types_examples.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cf7819..012563e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,10 +21,12 @@ jobs: for i in $(seq 1 60); do if curl -sSf http://localhost:8123/ >/dev/null 2>&1; then echo "ClickHouse is ready" - break + exit 0 fi sleep 2 done + echo "ClickHouse did not become ready in time" + exit 1 - name: Setup Erlang/OTP and Gleam uses: erlef/setup-beam@v1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 3b6554a..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: Release - -on: - push: - tags: ['v*'] - pull_request: - types: [closed] - branches: [main] - -permissions: - contents: write - packages: write - id-token: write - -jobs: - release: - name: Release to Hex - runs-on: ubuntu-latest - if: >- - contains(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true) - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Start ClickHouse (for integration tests) - run: | - # If you keep the repository's docker-compose.yml with a service named `clickhouse` this will start it. - docker compose up -d clickhouse || true - echo "Waiting for ClickHouse to be ready..." - for i in $(seq 1 60); do - if curl -sSf http://localhost:8123/ >/dev/null 2>&1; then - echo "ClickHouse is ready" - break - fi - sleep 2 - done - - - name: Setup Erlang/OTP and Gleam - uses: erlef/setup-beam@v1 - with: - otp-version: "27.0" - gleam-version: "1.13.0" - - - name: Show versions - run: | - erl -version || true - gleam --version - - - name: Cache Gleam deps and build - uses: actions/cache@v4 - with: - path: | - ~/.cache/gleam - ~/.gleam - ./_gleam_deps - ./build - key: ${{ runner.os }}-gleam-1.13.0-otp27-release-${{ hashFiles('**/gleam.toml') }} - restore-keys: | - ${{ runner.os }}-gleam-1.13.0-otp27-release- - - - name: Install dependencies - run: gleam deps download - - - name: Check formatting - run: gleam format --check src test - - - name: Build - run: gleam build - - - name: Run tests - env: - CLICKHOUSE_USER: test_user - CLICKHOUSE_PASSWORD: test_password - CLICKHOUSE_DB: test_db - CLICKHOUSE_URL: http://localhost:8123 - run: gleam test - - - name: Create tag from gleam.toml (on merged PR) - if: github.event_name == 'pull_request' && github.event.pull_request.merged == true - run: | - set -euo pipefail - VERSION=$(grep '^version' gleam.toml | sed -E 's/.*= *"([^"]+)".*/\1/') - TAG="v${VERSION}" - echo "Derived tag: ${TAG}" - # Ensure we have refs to push - git fetch origin --tags - if git rev-parse -q --verify "refs/tags/${TAG}" >/dev/null; then - echo "Tag ${TAG} already exists, skipping" - exit 0 - fi - - git config user.name "github-actions[bot]" - git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git tag -a "${TAG}" -m "Release ${TAG}" - # Push using the default authentication provided to the runner (GITHUB_TOKEN) - git push origin "${TAG}" - echo "Pushed tag ${TAG} to origin" - - - name: Publish to Hex - env: - HEXPM_API_KEY: ${{ secrets.HEX_API_KEY }} - run: | - echo "Publishing package to Hex..." - # For versions < 1.0.0, gleam publish requires explicit confirmation - # We pipe the required text to accept publishing 0.x versions - echo "I am not using semantic versioning" | gleam publish --yes - - - name: Create GitHub Release - if: startsWith(github.ref, 'refs/tags/') - uses: softprops/action-gh-release@v1 - with: - body: | - Release ${{ github.ref_name }} - - See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/${{ github.ref_name }}/CHANGELOG.md) for details. - draft: false - prerelease: false - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Cleanup ClickHouse - if: always() - run: docker compose down -v || true diff --git a/.github/workflows/smoke-integration.yml b/.github/workflows/smoke-integration.yml deleted file mode 100644 index 5881ff0..0000000 --- a/.github/workflows/smoke-integration.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: Smoke Integration (PR) - -on: - pull_request: - branches: ["main"] - -jobs: - smoke-integration: - name: Smoke Integration (ClickHouse) - runs-on: ubuntu-latest - timeout-minutes: 30 - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Start ClickHouse (docker-compose) - env: - CLICKHOUSE_IMAGE: clickhouse/clickhouse-server:23.7 - run: | - echo "Using CLICKHOUSE_IMAGE=$CLICKHOUSE_IMAGE" - docker compose pull clickhouse || true - docker compose up -d --remove-orphans clickhouse - - - name: Wait for ClickHouse HTTP - run: | - echo "Waiting for ClickHouse HTTP on localhost:8123..." - for i in $(seq 1 40); do - if curl -sSf http://localhost:8123/ >/dev/null 2>&1; then - echo "ClickHouse is up" - break - fi - sleep 2 - done - - - name: Smoke test - SELECT 1 - run: | - set -e - OUT=$(curl -sS -u test_user:test_password "http://localhost:8123/?query=SELECT%201%20as%20result%20FORMAT%20JSONEachRow&database=test_db") - echo "Got: $OUT" - if [ "$OUT" != '{"result":1}' ] && [ "$OUT" != '{"result":1}\n' ]; then - echo "Unexpected response: $OUT" - exit 2 - fi - - - name: Smoke test - create table, insert and select - run: | - set -e - DDL="CREATE TABLE IF NOT EXISTS smoke_test (id UInt32, name String) ENGINE = MergeTree() ORDER BY id" - curl -sS -u test_user:test_password -d "$DDL" "http://localhost:8123/?database=test_db" - - INSERT="INSERT INTO smoke_test (id,name) VALUES (1,'smoke')" - curl -sS -u test_user:test_password -d "$INSERT" "http://localhost:8123/?database=test_db" - - OUT=$(curl -sS -u test_user:test_password "http://localhost:8123/?query=SELECT%20*%20FROM%20smoke_test%20WHERE%20id%3D1%20FORMAT%20JSONEachRow&database=test_db") - echo "Select returned: $OUT" - if [ -z "$OUT" ]; then - echo "Select returned empty" - exit 3 - fi - - - name: Cleanup ClickHouse - if: always() - run: | - docker compose down -v || true diff --git a/CHANGELOG.md b/CHANGELOG.md index bc4a4be..414a40a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,13 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] -### Added +## [1.1.0] - 2026-03-20 + +### Changed -- (Unreleased changes go here) +- Simplified and refactored `types.gleam`, `expr.gleam`, `format/registry.gleam`, `format/json_each_row.gleam`, `repo.gleam`, `retry.gleam`, `schema.gleam`, `changeset.gleam`, `decode.gleam`, `query.gleam` — reduced surface area, improved type safety, removed redundant code +- Rewrote `docs/quickstart.md` with correct, runnable code examples +- Added missing documentation for all core modules: `query`, `repo`, `encode`, `decode`, `changeset`, `types`, `schema` +- Removed `release.yml` and `smoke-integration.yml` workflows; CI consolidated under `ci.yml` +- Fixed stray character and incorrect function call in README -## [0.1.0] - 2025-11-09 +## [1.0.0] - 2025-11-09 ### Added @@ -60,5 +65,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Style guide for contributions -[Unreleased]: https://github.com/lupodevelop/sparkling/compare/v0.1.0...HEAD -[0.1.0]: https://github.com/lupodevelop/sparkling/releases/tag/v0.1.0 +[Unreleased]: https://github.com/lupodevelop/sparkling/compare/v1.1.0...HEAD +[1.1.0]: https://github.com/lupodevelop/sparkling/compare/v1.0.0...v1.1.0 +[1.0.0]: https://github.com/lupodevelop/sparkling/compare/v0.1.0...v1.0.0 diff --git a/README.md b/README.md index 10b9261..012ccb1 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![CI](https://github.com/lupodevelop/sparkling/actions/workflows/ci.yml/badge.svg)](https://github.com/lupodevelop/sparkling/actions/workflows/ci.yml) [![Hex](https://img.shields.io/hexpm/v/sparkling.svg)](https://hex.pm/packages/sparkling) [![License](https://img.shields.io/badge/license-Apache%202.0-yellow.svg)](LICENSE) [![Built with Gleam](https://img.shields.io/badge/Built%20with-Gleam-ffaff3)](https://gleam.run) [![Gleam Version](https://img.shields.io/badge/gleam-%3E%3D1.13.0-ffaff3)](https://gleam.run) **Sparkling** is a *lightweight*, **type-safe** data layer for **ClickHouse** written in Gleam. It provides a small, focused API for defining schemas, building queries, and encoding/decoding ClickHouse formats. -É + *No magic*, just small, composable functions that play nicely in Gleam apps. > Why "Sparkling"? One rainy Tuesday a tiny inflatable rubber duck stole a shooting pink star and decided to become a freelance data wrangler, and it now guides queries through the night, humming 8-bit lullabies. Totally plausible. @@ -24,7 +24,7 @@ import sparkling/repo let r = repo.new("http://localhost:8123") |> repo.with_database("mydb") -case r.execute_sql(r, "SELECT 1 as result FORMAT JSONEachRow") { +case repo.execute_sql(r, "SELECT 1 as result FORMAT JSONEachRow") { Ok(body) -> io.println(body) Error(_) -> io.println("query failed") } @@ -38,7 +38,7 @@ case r.execute_sql(r, "SELECT 1 as result FORMAT JSONEachRow") { - `sparkling/encode` / `sparkling/decode` — format handlers (JSONEachRow default) - `sparkling/types` — helpers for Decimal, DateTime64, UUID, LowCardinality -For more examples see `docs/examples/` and `docs/quickstart.md`. +For more examples see [docs/quickstart.md](docs/quickstart.md) and [docs/examples/](docs/examples/). **Design note:** Sparkling's API and composable query builder were partly inspired by *Ecto*; many ideas about schema definition and query composition borrow from its approach while keeping a small, Gleam-friendly surface. diff --git a/docs/examples/changeset.md b/docs/examples/changeset.md new file mode 100644 index 0000000..3767750 --- /dev/null +++ b/docs/examples/changeset.md @@ -0,0 +1,82 @@ +# Changeset + +`sparkling/changeset` provides Ecto-style data casting and validation. + +## Basic usage + +```gleam +import gleam/option.{None, Some} +import sparkling/changeset + +type UserData { + UserData(name: String, email: String, age: Int) +} + +let data = UserData(name: "", email: "", age: 0) + +let cs = + changeset.new(data) + |> changeset.put_change("name", "Alice") + |> changeset.put_change("email", "alice@example.com") + |> changeset.put_change("age", "28") + |> changeset.validate_required("name") + |> changeset.validate_required("email") + |> changeset.validate_email("email") + |> changeset.validate_length("name", Some(2), Some(100)) + |> changeset.validate_number("age", Some(0), Some(120)) + +case changeset.apply(cs) { + Ok(_) -> io.println("valid!") + Error(errors) -> io.println(changeset.format_errors(errors)) +} +``` + +## Validators + +```gleam +// Required — field must be present in changes +|> changeset.validate_required("name") + +// Length — min/max for string fields +|> changeset.validate_length("name", Some(2), Some(100)) +|> changeset.validate_length("bio", None, Some(500)) // max only + +// Number — min/max for int fields (value parsed from change string) +|> changeset.validate_number("age", Some(0), Some(120)) + +// Email format +|> changeset.validate_email("email") + +// Not empty string +|> changeset.validate_not_empty("name") + +// Custom format check +|> changeset.validate_format("slug", fn(v) { + string.all(v, fn(c) { c == "-" || c >= "a" && c <= "z" || c >= "0" && c <= "9" }) +}, "must contain only lowercase letters, digits, and hyphens") +``` + +## Reading changes and errors + +```gleam +changeset.is_valid(cs) // => True / False +changeset.get_changes(cs) // => Dict(String, String) +changeset.get_errors(cs) // => List(FieldError) + +case changeset.get_change(cs, "name") { + Ok(name) -> io.println("name is: " <> name) + Error(Nil) -> io.println("name not set") +} +``` + +## Manual errors + +```gleam +let cs = + changeset.new(data) + |> changeset.put_change("email", "alice@example.com") + |> changeset.add_error("email", "already taken") + +changeset.format_errors(changeset.get_errors(cs)) +// => "email: already taken" +``` diff --git a/docs/examples/decode.md b/docs/examples/decode.md new file mode 100644 index 0000000..7912052 --- /dev/null +++ b/docs/examples/decode.md @@ -0,0 +1,79 @@ +# Decode + +`sparkling/decode` parses ClickHouse response bodies (JSONEachRow and others). + +## decode_json_each_row + +Parses a newline-separated JSONEachRow response with a custom decoder. + +```gleam +import gleam/dynamic/decode as dyn +import sparkling/decode + +type User { + User(id: Int, name: String) +} + +let user_decoder = + dyn.map2(User, dyn.field("id", dyn.int), dyn.field("name", dyn.string)) + +let body = "{\"id\":1,\"name\":\"Alice\"}\n{\"id\":2,\"name\":\"Bob\"}" + +case decode.decode_json_each_row(body, user_decoder) { + Ok(users) -> io.println(int.to_string(list.length(users)) <> " users") + Error(msg) -> io.println("decode error: " <> msg) +} +``` + +## decode_json_each_row_dict + +Parses each line to `Dict(String, json.Json)` — useful when you don't have a fixed type. + +```gleam +case decode.decode_json_each_row_dict(body) { + Ok(records) -> + list.each(records, fn(row) { + case dict.get(row, "id") { + Ok(id) -> io.println(json.to_string(id)) + Error(_) -> Nil + } + }) + Error(msg) -> io.println("error: " <> msg) +} +``` + +## decode_json_each_row_raw + +Returns raw `json.Json` values for each line. + +```gleam +case decode.decode_json_each_row_raw(body) { + Ok(jsons) -> io.println(int.to_string(list.length(jsons)) <> " values") + Error(msg) -> io.println("error: " <> msg) +} +``` + +## decode_json_each_row_streaming + +Memory-efficient streaming: processes each row via callback instead of building a list. +Returns `Result(Int, String)` where `Int` is the number of rows processed. + +```gleam +case decode.decode_json_each_row_streaming(large_body, user_decoder, fn(user) { + io.println("processing: " <> user.name) +}) { + Ok(count) -> io.println(int.to_string(count) <> " rows processed") + Error(msg) -> io.println("error: " <> msg) +} +``` + +## decode_json + +Parses a single JSON value. + +```gleam +case decode.decode_json("{\"id\":1}", dyn.field("id", dyn.int)) { + Ok(id) -> io.println(int.to_string(id)) + Error(_) -> io.println("decode failed") +} +``` diff --git a/docs/examples/decode_examples.md b/docs/examples/decode_examples.md deleted file mode 100644 index a7ce8aa..0000000 --- a/docs/examples/decode_examples.md +++ /dev/null @@ -1,11 +0,0 @@ -# Examples extracted from sparkling/src/sparkling/decode.gleam - -## decode_json_each_row_streaming (usage example) - -```gleam -let count = decode_json_each_row_streaming( - large_response, - my_decoder, - fn(user) { io.println("Processing: " <> user.name) } -) -``` diff --git a/docs/examples/encode.md b/docs/examples/encode.md new file mode 100644 index 0000000..dde0cf2 --- /dev/null +++ b/docs/examples/encode.md @@ -0,0 +1,100 @@ +# Encode + +`sparkling/encode` converts Gleam values to JSONEachRow (and other formats) for ClickHouse inserts. + +## encode_json_each_row + +Takes a list of `Dict(String, json.Json)` records and produces a newline-separated JSON string. + +```gleam +import gleam/dict +import gleam/json +import sparkling/encode + +let rows = [ + dict.from_list([ + #("id", json.int(1)), + #("name", json.string("Alice")), + #("active", json.bool(True)), + ]), + dict.from_list([ + #("id", json.int(2)), + #("name", json.string("Bob")), + #("active", json.bool(False)), + ]), +] + +let body = encode.encode_json_each_row(rows) +// => {"id":1,"name":"Alice","active":true} +// {"id":2,"name":"Bob","active":false} +``` + +## encode_record + +Encode a single record. + +```gleam +let row = dict.from_list([ + #("id", json.int(42)), + #("status", json.string("active")), +]) + +encode.encode_record(row) +// => {"id":42,"status":"active"} +``` + +## ClickHouse type encoders + +```gleam +import sparkling/types + +// Decimal — encoded as string to preserve precision +let assert Ok(price) = types.decimal("123.456") +encode.decimal(price) // => json.string("123.456") + +// DateTime64 +let assert Ok(ts) = types.datetime64("2024-01-15 10:30:45.123", 3, Some("UTC")) +encode.datetime64(ts) // => json.string("2024-01-15 10:30:45.123") + +// UUID +let assert Ok(uid) = types.uuid("550e8400-e29b-41d4-a716-446655440000") +encode.uuid(uid) // => json.string("550e8400-e29b-41d4-a716-446655440000") + +// LowCardinality +let status = types.low_cardinality_string("active") +encode.low_cardinality_string(status) // => json.string("active") + +// Enum — encode as underlying int +encode.enum_value(1) // => json.int(1) +``` + +## Nullable + +```gleam +encode.nullable(Some(json.int(42))) // => json.int(42) +encode.nullable(None) // => json.null() +``` + +## Arrays + +```gleam +encode.array([1, 2, 3], json.int) +// => json.array([json.int(1), json.int(2), json.int(3)]) +``` + +## Map / Tuple / Nested + +```gleam +// ClickHouse Map +encode.clickhouse_map_from_dict( + dict.from_list([#("key", json.string("value"))]) +) + +// ClickHouse Tuple (encoded as JSON array) +encode.clickhouse_tuple([json.int(1), json.string("hello")]) + +// Nested structure +encode.nested_from_dict( + dict.from_list([#("field", json.string("value"))]) +) +``` diff --git a/docs/examples/query.md b/docs/examples/query.md new file mode 100644 index 0000000..fec3bbe --- /dev/null +++ b/docs/examples/query.md @@ -0,0 +1,136 @@ +# Query Builder + +`sparkling/query` builds SQL SELECT queries immutably. `query.to_sql` validates and returns `Result(String, String)`. + +## Basic SELECT + +```gleam +import sparkling/expr +import sparkling/query + +let q = + query.new() + |> query.from("users") + |> query.select([expr.field("id"), expr.field("name")]) + |> query.limit(10) + +case query.to_sql(q) { + Ok(sql) -> io.println(sql) + // => SELECT `id`, `name` FROM `users` LIMIT 10 + Error(msg) -> io.println("invalid query: " <> msg) +} +``` + +## WHERE conditions + +Conditions added with `query.where` are AND-ed together. + +```gleam +let q = + query.new() + |> query.from("events") + |> query.select([expr.field("id"), expr.field("type")]) + |> query.where(expr.eq(expr.field("type"), expr.string("click"))) + |> query.where(expr.gt(expr.field("created_at"), expr.string("2024-01-01"))) +``` + +Multiple conditions at once: + +```gleam +let q = + query.new() + |> query.from("events") + |> query.where_all([ + expr.eq(expr.field("type"), expr.string("click")), + expr.gt(expr.field("score"), expr.int(50)), + ]) +``` + +## GROUP BY and HAVING + +```gleam +let q = + query.new() + |> query.from("events") + |> query.select([ + expr.field("user_id"), + expr.As(expr.count_all(), "total"), + ]) + |> query.group_by([expr.field("user_id")]) + |> query.having(expr.gt(expr.count_all(), expr.int(5))) +``` + +## ORDER BY + +```gleam +let q = + query.new() + |> query.from("users") + |> query.select([expr.field("id"), expr.field("name")]) + |> query.order_by_desc(expr.field("created_at")) + |> query.order_by_asc(expr.field("name")) +``` + +## DISTINCT, LIMIT, OFFSET + +```gleam +let q = + query.new() + |> query.from("users") + |> query.select([expr.field("country")]) + |> query.distinct() + |> query.limit(20) + |> query.offset(40) +``` + +## Expressions reference + +```gleam +// Literals +expr.int(42) +expr.float(3.14) +expr.string("hello") +expr.bool(True) +expr.null() + +// Column reference +expr.field("user_id") + +// Comparisons +expr.eq(expr.field("status"), expr.string("active")) +expr.ne(expr.field("status"), expr.string("deleted")) +expr.lt(expr.field("age"), expr.int(18)) +expr.le(expr.field("age"), expr.int(18)) +expr.gt(expr.field("score"), expr.float(9.5)) +expr.ge(expr.field("score"), expr.float(9.5)) + +// Logical +expr.and(expr.field("a"), expr.field("b")) +expr.or(expr.field("a"), expr.field("b")) +expr.not(expr.field("a")) + +// NULL checks +expr.IsNull(expr.field("deleted_at")) +expr.IsNotNull(expr.field("name")) + +// IN / BETWEEN +expr.In(expr.field("status"), [expr.string("active"), expr.string("pending")]) +expr.Between(expr.field("age"), expr.int(18), expr.int(65)) + +// Aggregates +expr.count_all() +expr.count(Some(expr.field("user_id"))) +expr.Sum(expr.field("amount")) +expr.Avg(expr.field("score")) +expr.Min(expr.field("price")) +expr.Max(expr.field("price")) + +// Alias +expr.alias(expr.count_all(), "total") + +// Cast +expr.Cast(expr.field("ts"), "DateTime64(3)") + +// Arbitrary function call +expr.FunctionCall("toDate", [expr.field("created_at")]) +``` diff --git a/docs/examples/repo.md b/docs/examples/repo.md new file mode 100644 index 0000000..4b39bda --- /dev/null +++ b/docs/examples/repo.md @@ -0,0 +1,87 @@ +# Repository + +`sparkling/repo` executes SQL against ClickHouse over HTTP. + +## Setup + +```gleam +import sparkling/repo + +let r = + repo.new("http://localhost:8123") + |> repo.with_database("mydb") + |> repo.with_credentials("default", "") +``` + +## execute_sql + +Returns the raw response body as `Result(String, RepoError)`. + +```gleam +case repo.execute_sql(r, "SELECT 1 FORMAT JSONEachRow") { + Ok(body) -> io.println(body) // => {"1":1} + Error(repo.HttpError(msg)) -> io.println("http error: " <> msg) + Error(repo.ParseError(msg)) -> io.println("parse error: " <> msg) + Error(repo.ConnectionError(msg)) -> io.println("connection error: " <> msg) + Error(repo.ClickHouseError(msg, code)) -> + io.println("clickhouse error [" <> int.to_string(option.unwrap(code, 0)) <> "]: " <> msg) +} +``` + +## query (with decoder) + +```gleam +import sparkling/decode + +case repo.query(r, "SELECT id, name FROM users FORMAT JSONEachRow", fn(body) { + decode.decode_json_each_row(body, my_decoder) +}) { + Ok(rows) -> io.println(int.to_string(list.length(rows)) <> " rows") + Error(_) -> io.println("failed") +} +``` + +## Retry config + +```gleam +import sparkling/retry + +// Use built-in configs +let r = repo.new("http://localhost:8123") + |> repo.with_retry_config(retry.network_config()) + +// Or customize +let config = retry.RetryConfig( + max_attempts: 5, + base_delay_ms: 200, + max_delay_ms: 30_000, + jitter_factor: 0.2, +) +let r = repo.new("http://localhost:8123") + |> repo.with_retry_config(config) +``` + +## Observability (event handler) + +```gleam +let r = + repo.new("http://localhost:8123") + |> repo.with_event_handler(fn(event) { + case event { + repo.QueryStart(sql) -> io.println("start: " <> sql) + repo.QueryEnd(sql, duration_ms) -> + io.println("done in " <> int.to_string(duration_ms) <> "ms") + repo.QueryError(sql, err) -> io.println("error: " <> err) + repo.RetryAttempt(sql, attempt, err) -> + io.println("retry #" <> int.to_string(attempt) <> ": " <> err) + } + }) +``` + +## Timeout + +```gleam +let r = + repo.new("http://localhost:8123") + |> repo.with_timeout(5000) // 5 seconds +``` diff --git a/docs/examples/schema.md b/docs/examples/schema.md new file mode 100644 index 0000000..7206879 --- /dev/null +++ b/docs/examples/schema.md @@ -0,0 +1,113 @@ +# Schema + +## Defining a table + +```gleam +import sparkling/schema + +let users_table = schema.table("users", [ + schema.field("id", schema.UInt64), + schema.field("email", schema.String), + schema.field("score", schema.Float64), + schema.field("created_at", schema.DateTime64(3)), +]) +``` + +## FieldType variants + +```gleam +// Integers +schema.UInt8 +schema.UInt16 +schema.UInt32 +schema.UInt64 +schema.Int8 +schema.Int16 +schema.Int32 +schema.Int64 + +// Floats +schema.Float32 +schema.Float64 + +// Strings +schema.String +schema.FixedString(36) + +// Dates and times +schema.Date +schema.Date32 +schema.DateTime +schema.DateTime64(3) // precision 0-9 + +// Decimals +schema.Decimal(18, 2) // precision, scale + +// Boolean and UUID +schema.Bool +schema.UUID + +// Wrappers +schema.Nullable(schema.String) +schema.Array(schema.Int32) +schema.LowCardinality(schema.String) + +// Complex types +schema.Enum8([#("active", 1), #("inactive", 2)]) +schema.Enum16([#("pending", 1), #("done", 2)]) +schema.Tuple([schema.UInt32, schema.String]) +schema.JSON +``` + +## field_type_to_sql + +```gleam +schema.field_type_to_sql(schema.UInt32) +// => "UInt32" + +schema.field_type_to_sql(schema.Nullable(schema.String)) +// => "Nullable(String)" + +schema.field_type_to_sql(schema.Array(schema.Int32)) +// => "Array(Int32)" + +schema.field_type_to_sql(schema.Decimal(18, 2)) +// => "Decimal(18, 2)" + +schema.field_type_to_sql(schema.DateTime64(3)) +// => "DateTime64(3)" + +schema.field_type_to_sql(schema.LowCardinality(schema.String)) +// => "LowCardinality(String)" +``` + +## to_create_table_sql + +```gleam +let tbl = schema.table("events", [ + schema.field("id", schema.UInt64), + schema.field("name", schema.String), + schema.field("occurred_at", schema.DateTime64(3)), +]) + +schema.to_create_table_sql(tbl, "MergeTree() ORDER BY id") +// => "CREATE TABLE events (id UInt64, name String, occurred_at DateTime64(3)) ENGINE = MergeTree() ORDER BY id" +``` + +## find_field / field_names + +```gleam +let tbl = schema.table("users", [ + schema.field("id", schema.UInt64), + schema.field("name", schema.String), +]) + +schema.find_field(tbl, "id") +// => Some(Field("id", UInt64)) + +schema.find_field(tbl, "unknown") +// => None + +schema.field_names(tbl) +// => ["id", "name"] +``` diff --git a/docs/examples/schema_examples.md b/docs/examples/schema_examples.md deleted file mode 100644 index 625fee6..0000000 --- a/docs/examples/schema_examples.md +++ /dev/null @@ -1,64 +0,0 @@ -# Examples extracted from sparkling/src/sparkling/schema.gleam - -## FieldType examples - -```gleam -UInt32 -Nullable(of: String) -Array(of: Int32) -Decimal(precision: 18, scale: 2) -DateTime64(precision: 3) -``` - -## table/field examples - -```gleam -table("users", [ - field("id", UInt64), - field("email", String), - field("created_at", DateTime64(3)), -]) - -field("user_id", UInt64) -field("name", Nullable(of: String)) -``` - -## field_type_to_sql examples - -```gleam -field_type_to_sql(UInt32) -// => "UInt32" - -field_type_to_sql(Nullable(of: String)) -// => "Nullable(String)" - -field_type_to_sql(Array(of: Int32)) -// => "Array(Int32)" -``` - -## to_create_table_sql example - -```gleam -let users_table = table("users", [ - field("id", UInt64), - field("email", String), -]) - -to_create_table_sql(users_table, engine: "MergeTree()") -// => "CREATE TABLE users (id UInt64, email String) ENGINE = MergeTree()" -``` - -## find_field / field_names examples - -```gleam -let tbl = table("users", [field("id", UInt64)]) -find_field(tbl, "id") -// => Some(Field("id", UInt64)) - -find_field(tbl, "unknown") -// => None - -let tbl = table("users", [field("id", UInt64), field("name", String)]) -field_names(tbl) -// => ["id", "name"] -``` diff --git a/docs/examples/types.md b/docs/examples/types.md new file mode 100644 index 0000000..f3a1e71 --- /dev/null +++ b/docs/examples/types.md @@ -0,0 +1,83 @@ +# Types + +Advanced ClickHouse type wrappers in `sparkling/types`. + +## Decimal + +`Decimal` is opaque — construct via `types.decimal/1` (returns `Result`). + +```gleam +import sparkling/types + +// From string (preserves precision) +case types.decimal("123.45") { + Ok(d) -> types.decimal_to_string(d) // => "123.45" + Error(msg) -> panic as msg +} + +// From int +let d = types.decimal_from_int(100) +types.decimal_to_string(d) // => "100" + +// From float +case types.decimal_from_float(3.14) { + Ok(d) -> types.decimal_to_string(d) + Error(msg) -> panic as msg +} +``` + +## DateTime64 + +```gleam +// From string value (validates precision 0-9) +case types.datetime64("2024-01-15 10:30:45.123", 3, Some("UTC")) { + Ok(dt) -> types.datetime64_to_string(dt) // => "2024-01-15 10:30:45.123" + Error(msg) -> panic as msg +} + +// From epoch seconds +case types.datetime64_from_epoch(1_705_316_100, 0, None) { + Ok(dt) -> types.datetime64_to_string(dt) + Error(msg) -> panic as msg +} + +// Read timezone +let tz = types.datetime64_timezone(dt) // => Some("UTC") or None +``` + +## UUID + +`UUID` is opaque — construct via `types.uuid/1`. Validates 8-4-4-4-12 hex format. + +```gleam +case types.uuid("550e8400-e29b-41d4-a716-446655440000") { + Ok(u) -> types.uuid_to_string(u) // => "550e8400-e29b-41d4-a716-446655440000" + Error(msg) -> panic as msg +} + +// Invalid format returns Error +types.uuid("not-a-uuid") +// => Error("Invalid UUID format: ...") +``` + +## LowCardinality + +`LowCardinality` is a storage-optimization hint for low-cardinality string columns. + +```gleam +let status = types.low_cardinality_string("active") +types.low_cardinality_value(status) // => "active" +``` + +## Enum8 / Enum16 + +```gleam +let mappings = [#("active", 1), #("inactive", 2), #("pending", 3)] + +// Lookup by name -> numeric value (validates range [-128, 127] for Enum8) +types.enum8_from_string(mappings, "active") // => Ok(1) +types.enum8_from_string(mappings, "unknown") // => Error("Value not found in enum: unknown") + +// Enum16 supports range [-32768, 32767] +types.enum16_from_string(mappings, "pending") // => Ok(3) +``` diff --git a/docs/examples/types_examples.md b/docs/examples/types_examples.md deleted file mode 100644 index 963c213..0000000 --- a/docs/examples/types_examples.md +++ /dev/null @@ -1,32 +0,0 @@ -# Examples extracted from sparkling/src/sparkling/types.gleam - -## Decimal examples - -```gleam -let price = Decimal("123.45") -let amount = Decimal("0.000001") -``` - -## DateTime64 example - -```gleam -let timestamp = DateTime64("2024-01-15 10:30:45.123", 3, Some("UTC")) -``` - -## UUID example - -```gleam -let id = UUID("550e8400-e29b-41d4-a716-446655440000") -``` - -## LowCardinality example - -```gleam -let status = low_cardinality_string("active") -``` - -## Enum example - -```gleam -let status = Enum8([#("active", 1), #("inactive", 2)]) -``` diff --git a/docs/quickstart.md b/docs/quickstart.md index 53528c2..df276c8 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -1,39 +1,52 @@ # Quick Start -This quick start shows a typical workflow using Sparkling's API in Gleam. +A typical workflow: define a schema, build a query, execute it via the repo. ```gleam +import sparkling/expr import sparkling/query +import sparkling/repo import sparkling/schema -import sparkling/types -// 1. Define your schema +// 1. Define the schema let users_table = schema.table("users", [ schema.field("id", schema.UInt32), schema.field("name", schema.String), schema.field("email", schema.String), - schema.field("created_at", schema.DateTime64), + schema.field("created_at", schema.DateTime64(3)), ]) // 2. Create a repository -let repo = repo.new("http://localhost:8123") +let r = + repo.new("http://localhost:8123") |> repo.with_database("mydb") + |> repo.with_credentials("default", "") -// 3. Build and execute queries -let query = query.from(users_table) +// 3. Build a query +let q = + query.new() + |> query.from("users") |> query.select([expr.field("id"), expr.field("name")]) - |> query.where_(expr.gt(expr.field("age"), expr.value("18"))) + |> query.where(expr.gt(expr.field("id"), expr.int(100))) |> query.limit(10) -// 4. Execute with repo -let sql = query.to_sql(query) -case repo.execute_sql(repo, sql) { - Ok(result) -> // parse result - Error(err) -> // handle error +// 4. Generate SQL and execute +case query.to_sql(q) { + Ok(sql) -> + case repo.execute_sql(r, sql) { + Ok(body) -> io.println(body) + Error(_err) -> io.println("query failed") + } + Error(msg) -> io.println("invalid query: " <> msg) } ``` ## Related examples -- `docs/examples/schema_examples.md` -- `docs/examples/types_examples.md` -- `docs/examples/decode_examples.md` + +- [Schema](examples/schema.md) +- [Query builder](examples/query.md) +- [Types](examples/types.md) +- [Repository](examples/repo.md) +- [Encode](examples/encode.md) +- [Decode](examples/decode.md) +- [Changeset](examples/changeset.md) diff --git a/gleam.toml b/gleam.toml index 663b890..85ebf51 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,5 +1,5 @@ name = "sparkling" -version = "1.0.0" +version = "1.1.0" description = "A fast, type-safe ClickHouse client for Gleam with composable queries" licenses = ["Apache-2.0"] authors = ["Scaratti Daniele aka lupodevelp"] diff --git a/src/sparkling/changeset.gleam b/src/sparkling/changeset.gleam index 4c169de..b0c8ef6 100644 --- a/src/sparkling/changeset.gleam +++ b/src/sparkling/changeset.gleam @@ -203,7 +203,9 @@ pub fn format_errors(errors: List(FieldError)) -> String { |> string.join(", ") } -/// Helper: validate email format (simple check) +/// Helper: validate email format. +/// Checks for a single "@" with non-empty local and domain parts, and at least +/// one "." in the domain part after "@". pub fn validate_email( changeset: Changeset(data), field: String, @@ -211,7 +213,17 @@ pub fn validate_email( validate_format( changeset, field, - fn(email) { string.contains(email, "@") && string.contains(email, ".") }, + fn(email) { + case string.split(email, "@") { + [local, domain] -> + string.length(local) > 0 + && string.contains(domain, ".") + && !string.starts_with(domain, ".") + && !string.ends_with(domain, ".") + && string.length(domain) > 2 + _ -> False + } + }, "Invalid email format", ) } diff --git a/src/sparkling/decode.gleam b/src/sparkling/decode.gleam index de9e237..89b35e7 100644 --- a/src/sparkling/decode.gleam +++ b/src/sparkling/decode.gleam @@ -160,8 +160,9 @@ fn string_preview(s: String) -> String { } } -/// Convert Dynamic to Dict(String, json.Json) - for decode_json_each_row_dict. -fn dynamic_to_string_json_dict( +/// Convert Dynamic to Dict(String, json.Json). +/// Used internally and by format handlers (e.g. format/json_each_row). +pub fn dynamic_to_string_json_dict( dyn: Dynamic, ) -> Result(Dict(String, json.Json), Nil) { case decode.run(dyn, decode.dict(decode.string, decode.dynamic)) { @@ -177,7 +178,8 @@ fn dynamic_to_string_json_dict( /// Convert Dynamic to json.Json with proper structure preservation. /// Handles primitives (String, Int, Float, Bool, Null) and complex types (List, Dict). -fn dynamic_to_json(dyn: Dynamic) -> json.Json { +/// Used internally and by format handlers (e.g. format/json_each_row). +pub fn dynamic_to_json(dyn: Dynamic) -> json.Json { case dynamic.classify(dyn) { "String" -> { case decode.run(dyn, decode.string) { diff --git a/src/sparkling/expr.gleam b/src/sparkling/expr.gleam index 2c2b2d7..7cf2e2c 100644 --- a/src/sparkling/expr.gleam +++ b/src/sparkling/expr.gleam @@ -314,132 +314,15 @@ fn contains_special_chars(name: String) -> Bool { /// Check if a grapheme is a letter (a-z, A-Z) or underscore. fn is_letter_or_underscore(grapheme: String) -> Bool { - case grapheme { - "a" - | "b" - | "c" - | "d" - | "e" - | "f" - | "g" - | "h" - | "i" - | "j" - | "k" - | "l" - | "m" - | "n" - | "o" - | "p" - | "q" - | "r" - | "s" - | "t" - | "u" - | "v" - | "w" - | "x" - | "y" - | "z" - | "A" - | "B" - | "C" - | "D" - | "E" - | "F" - | "G" - | "H" - | "I" - | "J" - | "K" - | "L" - | "M" - | "N" - | "O" - | "P" - | "Q" - | "R" - | "S" - | "T" - | "U" - | "V" - | "W" - | "X" - | "Y" - | "Z" - | "_" -> True - _ -> False - } + string.contains("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", grapheme) } /// Check if a grapheme is alphanumeric (a-z, A-Z, 0-9) or underscore. fn is_alphanumeric_or_underscore(grapheme: String) -> Bool { - case grapheme { - "a" - | "b" - | "c" - | "d" - | "e" - | "f" - | "g" - | "h" - | "i" - | "j" - | "k" - | "l" - | "m" - | "n" - | "o" - | "p" - | "q" - | "r" - | "s" - | "t" - | "u" - | "v" - | "w" - | "x" - | "y" - | "z" - | "A" - | "B" - | "C" - | "D" - | "E" - | "F" - | "G" - | "H" - | "I" - | "J" - | "K" - | "L" - | "M" - | "N" - | "O" - | "P" - | "Q" - | "R" - | "S" - | "T" - | "U" - | "V" - | "W" - | "X" - | "Y" - | "Z" - | "0" - | "1" - | "2" - | "3" - | "4" - | "5" - | "6" - | "7" - | "8" - | "9" - | "_" -> True - _ -> False - } + string.contains( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_", + grapheme, + ) } /// Helper: create a field reference diff --git a/src/sparkling/format/csv.gleam b/src/sparkling/format/csv.gleam index 6353563..b03ebc6 100644 --- a/src/sparkling/format/csv.gleam +++ b/src/sparkling/format/csv.gleam @@ -1,5 +1,4 @@ -/// CSV format handler - comma-separated values with optional quoting -/// Standard CSV format with comma delimiters and quote escaping. +/// CSV format handler - comma-separated values with RFC 4180 quoting. import gleam/dict.{type Dict} import gleam/int import gleam/json @@ -7,70 +6,76 @@ import gleam/list import gleam/string import sparkling/format/registry -/// Create CSV format handler +/// Create CSV format handler. pub fn handler() -> registry.FormatHandler { registry.FormatHandler(name: "CSV", encode: encode, decode: decode) } -/// Encode list of records to CSV format +/// Encode list of records to CSV format (header row + data rows). fn encode(records: List(Dict(String, json.Json))) -> Result(String, String) { case records { [] -> Ok("") [first, ..] -> { let keys = dict.keys(first) - let header = encode_csv_row(list.map(keys, json.string)) - + // Column names go as plain strings in the header (not JSON-encoded) + let header = keys |> list.map(csv_escape_string) |> string.join(",") let rows = records |> list.map(fn(record) { encode_record_csv(record, keys) }) |> string.join("\n") - Ok(header <> "\n" <> rows) } } } -/// Encode single record to CSV row +/// Encode single record to CSV row (values extracted from json.Json). fn encode_record_csv( record: Dict(String, json.Json), keys: List(String), ) -> String { keys |> list.map(fn(key) { - case dict.get(record, key) { - Ok(value) -> value - Error(_) -> json.string("") + let value = case dict.get(record, key) { + Ok(v) -> json_to_plain_string(v) + Error(_) -> "" } + csv_escape_string(value) }) - |> encode_csv_row -} - -/// Encode list of json values to CSV row with proper escaping -fn encode_csv_row(values: List(json.Json)) -> String { - values - |> list.map(csv_escape) |> string.join(",") } -/// Escape value for CSV format -fn csv_escape(value: json.Json) -> String { - let str = json.to_string(value) +/// Extract a plain string representation from a json.Json value. +/// JSON strings are unwrapped (quotes removed); other types use their +/// JSON representation (e.g. 42, true, null). +fn json_to_plain_string(value: json.Json) -> String { + let s = json.to_string(value) + // JSON strings are wrapped in double-quotes: strip them + case + string.starts_with(s, "\"") + && string.ends_with(s, "\"") + && string.length(s) >= 2 + { + True -> string.slice(s, 1, string.length(s) - 2) + False -> s + } +} - // If contains comma, quote, or newline, wrap in quotes and escape quotes +/// Escape a plain string for CSV: wrap in double-quotes if it contains +/// commas, double-quotes, or newlines. Double-quotes inside are doubled (""). +fn csv_escape_string(s: String) -> String { case - string.contains(str, ",") - || string.contains(str, "\"") - || string.contains(str, "\n") + string.contains(s, ",") + || string.contains(s, "\"") + || string.contains(s, "\n") + || string.contains(s, "\r") { - True -> { - let escaped = string.replace(str, "\"", "\"\"") - "\"" <> escaped <> "\"" - } - False -> str + True -> "\"" <> string.replace(s, "\"", "\"\"") <> "\"" + False -> s } } -/// Decode CSV format to list of records +/// Decode CSV format to list of records. +/// First row is the header (column names); subsequent rows are data. fn decode(data: String) -> Result(List(Dict(String, json.Json)), String) { let lines = data @@ -81,20 +86,18 @@ fn decode(data: String) -> Result(List(Dict(String, json.Json)), String) { [] -> Ok([]) [header, ..rows] -> { case parse_csv_row(header) { - Ok(keys) -> { + Ok(keys) -> rows |> list.index_map(fn(row, index) { #(row, index) }) |> list.try_map(fn(row_with_index) { decode_csv_row(row_with_index, keys) }) - } Error(err) -> Error("Header parse error: " <> err) } } } } -/// Decode single CSV row fn decode_csv_row( row_with_index: #(String, Int), keys: List(String), @@ -104,15 +107,11 @@ fn decode_csv_row( case parse_csv_row(row) { Ok(values) -> { case list.length(values) == list.length(keys) { - True -> { + True -> list.zip(keys, values) - |> list.map(fn(pair) { - let #(key, value) = pair - #(key, json.string(value)) - }) + |> list.map(fn(pair) { #(pair.0, json.string(pair.1)) }) |> dict.from_list |> Ok - } False -> Error("Row " <> int.to_string(index + 1) <> ": Column count mismatch") } @@ -121,11 +120,36 @@ fn decode_csv_row( } } -/// Parse CSV row handling quoted values -/// Simplified parser: splits by comma. For RFC 4180 full compliance with quoted fields, -/// escaped quotes, and embedded newlines, use a dedicated CSV parsing library. +/// Parse a CSV row according to RFC 4180. +/// Handles quoted fields (with embedded commas, newlines, and escaped quotes ""). fn parse_csv_row(row: String) -> Result(List(String), String) { - // Basic split by comma - adequate for simple CSV without quoted delimiters - // For production with complex CSV (quotes, escapes), integrate a proper CSV parser - Ok(string.split(row, ",") |> list.map(string.trim)) + do_parse_csv(string.to_graphemes(row), False, "", []) +} + +fn do_parse_csv( + chars: List(String), + in_quotes: Bool, + current: String, + acc: List(String), +) -> Result(List(String), String) { + case chars, in_quotes { + // End of input + [], False -> Ok(list.reverse([current, ..acc])) + [], True -> Error("Unterminated quoted field") + // Two consecutive quotes inside a quoted field → escaped quote char + ["\"", "\"", ..rest], True -> + do_parse_csv(rest, True, current <> "\"", acc) + // Opening quote (start of a quoted field) + ["\"", ..rest], False -> + do_parse_csv(rest, True, current, acc) + // Closing quote + ["\"", ..rest], True -> + do_parse_csv(rest, False, current, acc) + // Field separator (outside quotes) + [",", ..rest], False -> + do_parse_csv(rest, False, "", [current, ..acc]) + // Any other character + [c, ..rest], _ -> + do_parse_csv(rest, in_quotes, current <> c, acc) + } } diff --git a/src/sparkling/format/json_each_row.gleam b/src/sparkling/format/json_each_row.gleam index 28565bf..027d2a7 100644 --- a/src/sparkling/format/json_each_row.gleam +++ b/src/sparkling/format/json_each_row.gleam @@ -1,35 +1,32 @@ -/// JSONEachRow format handler - default ClickHouse JSON format +/// JSONEachRow format handler - default ClickHouse JSON format. /// Each row is a separate JSON object on its own line. import gleam/dict.{type Dict} -import gleam/dynamic import gleam/dynamic/decode import gleam/int import gleam/json import gleam/list import gleam/string +import sparkling/decode as sparkling_decode import sparkling/format/registry -/// Create JSONEachRow format handler +/// Create JSONEachRow format handler. pub fn handler() -> registry.FormatHandler { - registry.FormatHandler(name: "JSONEachRow", encode: encode, decode: decode) + registry.FormatHandler(name: "JSONEachRow", encode: encode, decode: do_decode) } -/// Encode list of records to JSONEachRow format +/// Encode list of records to JSONEachRow format (one JSON object per line). fn encode(records: List(Dict(String, json.Json))) -> Result(String, String) { records - |> list.map(encode_record) + |> list.map(fn(record) { + json.object(dict.to_list(record)) + |> json.to_string + }) |> string.join("\n") |> Ok } -/// Encode single record to JSON object string -fn encode_record(record: Dict(String, json.Json)) -> String { - json.object(dict.to_list(record)) - |> json.to_string -} - -/// Decode JSONEachRow format to list of records -fn decode(data: String) -> Result(List(Dict(String, json.Json)), String) { +/// Decode JSONEachRow format to list of records. +fn do_decode(data: String) -> Result(List(Dict(String, json.Json)), String) { data |> string.split("\n") |> list.filter(fn(line) { !string.is_empty(string.trim(line)) }) @@ -37,7 +34,6 @@ fn decode(data: String) -> Result(List(Dict(String, json.Json)), String) { |> list.try_map(decode_line) } -/// Decode single line with error context fn decode_line( line_with_index: #(String, Int), ) -> Result(Dict(String, json.Json), String) { @@ -45,14 +41,13 @@ fn decode_line( case json.parse(from: line, using: decode.dynamic) { Ok(dynamic_value) -> { - // Convert dynamic to Dict(String, json.Json) - case dynamic_to_dict(dynamic_value) { - Ok(dict_value) -> Ok(dict_value) + case sparkling_decode.dynamic_to_string_json_dict(dynamic_value) { + Ok(d) -> Ok(d) Error(_) -> Error( "Line " <> int.to_string(index + 1) - <> ": Failed to convert to dict - " + <> ": Not a JSON object - " <> string_excerpt(line, 100), ) } @@ -67,26 +62,6 @@ fn decode_line( } } -/// Convert dynamic value to Dict(String, json.Json) -/// Uses dynamic.classify to inspect the structure and convert recursively -fn dynamic_to_dict(dyn: dynamic.Dynamic) -> Result(Dict(String, json.Json), Nil) { - // Try to decode as a dict-like structure - // For now we use a simplified approach: serialize to JSON string and re-parse - case dynamic.classify(dyn) { - "Map" | "Dict" -> { - // Dynamic is a map/dict - we need to convert it - // Simplified: return empty dict (full implementation would require iterating keys) - // In practice, json.parse already gives us the right structure - Ok(dict.new()) - } - _ -> { - // Not a dict-like structure - Error(Nil) - } - } -} - -/// Extract first N characters of string for error messages fn string_excerpt(str: String, max_len: Int) -> String { case string.length(str) > max_len { True -> string.slice(str, 0, max_len) <> "..." diff --git a/src/sparkling/format/registry.gleam b/src/sparkling/format/registry.gleam index 20e9f9c..346ed28 100644 --- a/src/sparkling/format/registry.gleam +++ b/src/sparkling/format/registry.gleam @@ -2,11 +2,12 @@ /// Allows registration of custom format handlers for different ClickHouse formats. import gleam/dict.{type Dict} import gleam/json -import gleam/list -import gleam/option.{type Option, None, Some} +import gleam/result import gleam/string -/// Format handler for a specific ClickHouse format +/// Format handler for a specific ClickHouse format. +/// encode: List(Dict) → serialized string +/// decode: serialized string → List(Dict) pub type FormatHandler { FormatHandler( name: String, @@ -15,17 +16,17 @@ pub type FormatHandler { ) } -/// Global registry of format handlers +/// Registry of format handlers (opaque). pub opaque type FormatRegistry { FormatRegistry(handlers: Dict(String, FormatHandler)) } -/// Create a new empty format registry +/// Create a new empty format registry. pub fn new() -> FormatRegistry { FormatRegistry(handlers: dict.new()) } -/// Register a format handler +/// Register a format handler. Overwrites any existing handler with the same name. pub fn register( registry: FormatRegistry, handler: FormatHandler, @@ -33,118 +34,46 @@ pub fn register( FormatRegistry(handlers: dict.insert(registry.handlers, handler.name, handler)) } -/// Get a format handler by name +/// Get a format handler by name. Returns an error string if not found. pub fn get_handler( registry: FormatRegistry, format_name: String, -) -> Option(FormatHandler) { - case dict.get(registry.handlers, format_name) { - Ok(handler) -> Some(handler) - Error(_) -> None - } +) -> Result(FormatHandler, String) { + dict.get(registry.handlers, format_name) + |> result.replace_error( + "Format '" + <> format_name + <> "' not registered. Available: " + <> available_formats_string(registry), + ) } -/// List all registered format names +/// List all registered format names. pub fn list_formats(registry: FormatRegistry) -> List(String) { dict.keys(registry.handlers) } -/// Encode data using the specified format +/// Encode data using the specified format handler. pub fn encode( registry: FormatRegistry, format_name: String, data: List(Dict(String, json.Json)), ) -> Result(String, String) { - case get_handler(registry, format_name) { - Some(handler) -> handler.encode(data) - None -> - Error( - "Format '" - <> format_name - <> "' not registered. Available formats: " - <> format_list_string(registry), - ) - } + use handler <- result.try(get_handler(registry, format_name)) + handler.encode(data) } -/// Decode data using the specified format +/// Decode data using the specified format handler. pub fn decode( registry: FormatRegistry, format_name: String, data: String, ) -> Result(List(Dict(String, json.Json)), String) { - case get_handler(registry, format_name) { - Some(handler) -> handler.decode(data) - None -> - Error( - "Format '" - <> format_name - <> "' not registered. Available formats: " - <> format_list_string(registry), - ) - } -} - -/// Create default registry with all available format handlers -/// Includes: JSONEachRow, TabSeparated, CSV -pub fn default_registry() -> FormatRegistry { - new() - |> register_all_formats -} - -/// Register all available format handlers -fn register_all_formats(registry: FormatRegistry) -> FormatRegistry { - registry - |> register(json_each_row_handler()) - |> register(tab_separated_handler()) - |> register(csv_handler()) -} - -/// JSONEachRow format handler -fn json_each_row_handler() -> FormatHandler { - // Import and use the handler from format/json_each_row module - // For now, return a minimal handler - FormatHandler( - name: "JSONEachRow", - encode: fn(records) { - list.map(records, fn(record) { - json.object(dict.to_list(record)) - |> json.to_string - }) - |> string.join("\n") - |> Ok - }, - decode: fn(_data) { - // Simplified - actual implementation in format/json_each_row - Error("Use decode.decode_json_each_row for full functionality") - }, - ) -} - -/// TabSeparated format handler -fn tab_separated_handler() -> FormatHandler { - FormatHandler( - name: "TabSeparated", - encode: fn(_records) { - Error("TabSeparated encoding - use format/tab_separated module") - }, - decode: fn(_data) { - Error("TabSeparated decoding - use format/tab_separated module") - }, - ) -} - -/// CSV format handler -fn csv_handler() -> FormatHandler { - FormatHandler( - name: "CSV", - encode: fn(_records) { Error("CSV encoding - use format/csv module") }, - decode: fn(_data) { Error("CSV decoding - use format/csv module") }, - ) + use handler <- result.try(get_handler(registry, format_name)) + handler.decode(data) } -/// Helper to format list of available formats -fn format_list_string(registry: FormatRegistry) -> String { +fn available_formats_string(registry: FormatRegistry) -> String { case list_formats(registry) { [] -> "none" formats -> string.join(formats, ", ") diff --git a/src/sparkling/format/tab_separated.gleam b/src/sparkling/format/tab_separated.gleam index cf72af0..b5a9853 100644 --- a/src/sparkling/format/tab_separated.gleam +++ b/src/sparkling/format/tab_separated.gleam @@ -12,18 +12,20 @@ pub fn handler() -> registry.FormatHandler { registry.FormatHandler(name: "TabSeparated", encode: encode, decode: decode) } -/// Encode list of records to TabSeparated format -/// Note: This assumes all records have the same keys in the same order +/// Encode list of records to TabSeparated format. +/// First row is the header (column names); subsequent rows are data. +/// Note: This assumes all records have the same keys in the same order. fn encode(records: List(Dict(String, json.Json))) -> Result(String, String) { case records { [] -> Ok("") [first, ..] -> { let keys = dict.keys(first) - - records - |> list.map(fn(record) { encode_record(record, keys) }) - |> string.join("\n") - |> Ok + let header = string.join(keys, "\t") + let rows = + records + |> list.map(fn(record) { encode_record(record, keys) }) + |> string.join("\n") + Ok(header <> "\n" <> rows) } } } diff --git a/src/sparkling/query.gleam b/src/sparkling/query.gleam index 9c51f24..f2ff452 100644 --- a/src/sparkling/query.gleam +++ b/src/sparkling/query.gleam @@ -1,5 +1,6 @@ /// Immutable query builder for constructing SELECT queries in a type-safe way. /// Follows the design pattern of Ecto.Query with method chaining for ClickHouse. +import gleam/int import gleam/list import gleam/option.{type Option, None, Some} import gleam/string @@ -190,13 +191,13 @@ fn build_sql(query: Query) -> String { // LIMIT clause let limit_clause = case query.limit { None -> "" - Some(n) -> " LIMIT " <> string.inspect(n) + Some(n) -> " LIMIT " <> int.to_string(n) } // OFFSET clause let offset_clause = case query.offset { None -> "" - Some(n) -> " OFFSET " <> string.inspect(n) + Some(n) -> " OFFSET " <> int.to_string(n) } select_clause diff --git a/src/sparkling/repo.gleam b/src/sparkling/repo.gleam index e69c557..072fe16 100644 --- a/src/sparkling/repo.gleam +++ b/src/sparkling/repo.gleam @@ -21,6 +21,9 @@ pub type Repo { password: Option(String), on_event: fn(Event) -> Nil, retry_config: RetryConfig, + /// Timeout in milliseconds. Note: gleam_httpc does not currently expose + /// a per-request timeout option. This field is reserved for future use + /// or custom transport implementations. timeout_ms: Int, ) } @@ -80,23 +83,20 @@ pub fn with_retry_config(repo: Repo, config: RetryConfig) -> Repo { Repo(..repo, retry_config: config) } -/// Set query timeout in milliseconds +/// Set query timeout in milliseconds. +/// Note: timeout enforcement is not yet implemented in the HTTP transport. pub fn with_timeout(repo: Repo, timeout_ms: Int) -> Repo { Repo(..repo, timeout_ms: timeout_ms) } /// Execute a SQL query and return the response body as a string. -/// This is the low-level interface; higher-level functions will parse the response. pub fn execute_sql(repo: Repo, sql: String) -> Result(String, RepoError) { - // Emit QueryStart event let start_time = erlang_monotonic_time() repo.on_event(QueryStart(sql)) - // Build request case build_request(repo, sql) { Ok(req) -> { - // Execute with retries - case execute_with_retries(repo, req) { + case execute_with_retries(repo, req, sql) { Ok(body) -> { let duration = calculate_duration(start_time) repo.on_event(QueryEnd(sql, duration)) @@ -116,7 +116,6 @@ pub fn execute_sql(repo: Repo, sql: String) -> Result(String, RepoError) { } /// Execute a query and parse the response using a decoder. -/// This is a higher-level interface that combines execute_sql with decoding. pub fn query( repo: Repo, sql: String, @@ -129,17 +128,14 @@ pub fn query( /// Build an HTTP request for ClickHouse fn build_request(repo: Repo, sql: String) -> Result(Request(String), RepoError) { - // Parse base URL case uri.parse(repo.base_url) { Error(_) -> Error(ConnectionError("Invalid base URL: " <> repo.base_url)) Ok(base) -> { - // Build query parameters let query_params = case repo.database { Some(db) -> [#("database", db)] None -> [] } - // Create POST request let req = request.new() |> request.set_method(http.Post) @@ -151,19 +147,16 @@ fn build_request(repo: Repo, sql: String) -> Result(Request(String), RepoError) _ -> http.Http }) - // Set port if specified let req = case base.port { Some(port) -> request.set_port(req, port) None -> req } - // Set query parameters if present let req = case query_params { [] -> req params -> request.set_query(req, params) } - // Add authentication if set let req = case repo.user, repo.password { Some(user), Some(pass) -> { let credentials = user <> ":" <> pass @@ -179,23 +172,18 @@ fn build_request(repo: Repo, sql: String) -> Result(Request(String), RepoError) } } -/// Execute request with retry logic +/// Execute request with retry logic, emitting RetryAttempt events fn execute_with_retries( repo: Repo, req: Request(String), + sql: String, ) -> Result(String, RepoError) { let operation = fn() { - // Use direct httpc case httpc.send(req) { Ok(response) -> { - // Check HTTP status code case response.status { - 200 -> { - // Success - return body as string - Ok(response.body) - } + 200 -> Ok(response.body) status -> { - // ClickHouse error - try to extract error message from body let error_msg = case string.is_empty(response.body) { True -> "HTTP " <> int.to_string(status) False -> response.body @@ -212,17 +200,18 @@ fn execute_with_retries( case err { ConnectionError(_) -> True HttpError(_) -> True - // Don't retry ClickHouse errors (4xx/5xx with response) ClickHouseError(_, _) -> False - // Don't retry parse errors ParseError(_) -> False } } - retry.with_retry(repo.retry_config, operation, is_retryable_error) + let on_retry = fn(attempt: Int, err: RepoError) { + repo.on_event(RetryAttempt(sql, attempt, error_to_string(err))) + } + + retry.with_retry(repo.retry_config, operation, is_retryable_error, on_retry) } -/// Convert error to string for logging fn error_to_string(error: RepoError) -> String { case error { HttpError(msg) -> "HTTP error: " <> msg @@ -238,18 +227,14 @@ fn error_to_string(error: RepoError) -> String { } } -/// Get monotonic time for duration calculation +/// Get monotonic time in native units (nanoseconds on OTP 18+) @external(erlang, "erlang", "monotonic_time") fn erlang_monotonic_time() -> Int -/// Calculate duration in milliseconds from start time +/// Calculate duration in milliseconds from a start time (native units / 1_000_000) fn calculate_duration(start_time: Int) -> Int { - let end_time = erlang_monotonic_time() - let duration_native = end_time - start_time - // Convert to milliseconds (native unit is typically nanoseconds) - duration_native / 1_000_000 + { erlang_monotonic_time() - start_time } / 1_000_000 } -/// Base64 encode using Erlang's base64 module @external(erlang, "base64", "encode") fn bit_array_to_base64(input: BitArray) -> String diff --git a/src/sparkling/retry.gleam b/src/sparkling/retry.gleam index 244a867..c6f54ac 100644 --- a/src/sparkling/retry.gleam +++ b/src/sparkling/retry.gleam @@ -1,5 +1,9 @@ //// Retry logic with exponential backoff and jitter for resilient operations. -//// Used by both repo and transport layers to handle transient failures. +//// Used by the repo layer to handle transient failures. +//// +//// Note on time units: erlang:monotonic_time/0 and erlang:system_time/0 return +//// native units. On OTP 18+ (Linux/macOS/Windows) native = nanoseconds. +//// This is a safe assumption for OTP 27+ as used by this library. import gleam/erlang/process import gleam/float @@ -8,11 +12,12 @@ import gleam/int /// Configuration for retry behavior pub type RetryConfig { RetryConfig( - /// Maximum number of retry attempts + /// Maximum total number of attempts (first attempt + retries). + /// E.g. max_attempts=3 → 1 initial try + up to 2 retries = 3 total. max_attempts: Int, /// Base delay in milliseconds for exponential backoff base_delay_ms: Int, - /// Maximum delay between retries + /// Maximum delay between retries in milliseconds max_delay_ms: Int, /// Jitter factor (0.0 = no jitter, 1.0 = full jitter) jitter_factor: Float, @@ -39,34 +44,36 @@ pub fn network_config() -> RetryConfig { ) } -/// Execute an operation with retry logic -/// Returns the result of the first successful attempt, or the last error +/// Execute an operation with retry logic. +/// `on_retry(attempt, error)` is called before each retry, where attempt +/// starts at 1. Returns the first successful result or the last error. pub fn with_retry( config: RetryConfig, operation: fn() -> Result(a, b), is_retryable_error: fn(b) -> Bool, + on_retry: fn(Int, b) -> Nil, ) -> Result(a, b) { - do_retry(config, operation, is_retryable_error, 0) + do_retry(config, operation, is_retryable_error, on_retry, 0) } -/// Internal retry implementation fn do_retry( config: RetryConfig, operation: fn() -> Result(a, b), is_retryable_error: fn(b) -> Bool, + on_retry: fn(Int, b) -> Nil, attempt: Int, ) -> Result(a, b) { case operation() { Ok(result) -> Ok(result) Error(err) -> { - case attempt < config.max_attempts && is_retryable_error(err) { + // attempt + 1 < max_attempts means we still have attempts left + case attempt + 1 < config.max_attempts && is_retryable_error(err) { True -> { - // Calculate delay with exponential backoff and jitter + let next = attempt + 1 + on_retry(next, err) let delay = calculate_delay(config, attempt) process.sleep(delay) - - // Retry with incremented attempt counter - do_retry(config, operation, is_retryable_error, attempt + 1) + do_retry(config, operation, is_retryable_error, on_retry, next) } False -> Error(err) } @@ -76,13 +83,11 @@ fn do_retry( /// Calculate delay with exponential backoff and jitter fn calculate_delay(config: RetryConfig, attempt: Int) -> Int { - // Exponential backoff: base_delay * 2^attempt let exponential_delay = config.base_delay_ms * int_pow(2, attempt) - - // Cap at maximum delay let capped_delay = int.min(exponential_delay, config.max_delay_ms) - // Add jitter to avoid thundering herd + // Jitter: modulo of native system time gives pseudo-random value in + // [0, jitter_range_ms). The result is in ms regardless of the time unit. let jitter_range = float.round(int.to_float(capped_delay) *. config.jitter_factor) let jitter = case jitter_range > 0 { @@ -93,16 +98,13 @@ fn calculate_delay(config: RetryConfig, attempt: Int) -> Int { capped_delay + jitter } -/// Integer power function (simple recursive implementation) fn int_pow(base: Int, exponent: Int) -> Int { case exponent { 0 -> 1 n if n > 0 -> base * int_pow(base, n - 1) _ -> 1 - // Negative exponents not supported } } -/// Get system time for jitter calculation @external(erlang, "erlang", "system_time") fn erlang_system_time() -> Int diff --git a/src/sparkling/schema.gleam b/src/sparkling/schema.gleam index a72daa7..486ef21 100644 --- a/src/sparkling/schema.gleam +++ b/src/sparkling/schema.gleam @@ -1,15 +1,16 @@ /// Schema definition for ClickHouse tables and fields. -/// +/// /// This module provides typed representations of ClickHouse tables and columns, /// supporting the major ClickHouse data types and nullable/array wrappers. -/// +/// /// See: https://clickhouse.com/docs/en/sql-reference/data-types/ +import gleam/int import gleam/list import gleam/option.{type Option} import gleam/string +import sparkling/expr /// ClickHouse field types supported by sparkling. -/// Examples: docs/examples/schema_examples.md pub type FieldType { // Integer types UInt8 @@ -75,19 +76,16 @@ pub type Table { } /// Create a new table definition. -/// Examples: docs/examples/schema_examples.md pub fn table(name: String, fields: List(Field)) -> Table { Table(name: name, fields: fields) } /// Create a new field definition. -/// Examples: docs/examples/schema_examples.md pub fn field(name: String, typ: FieldType) -> Field { Field(name: name, typ: typ) } /// Get the SQL representation of a field type for CREATE TABLE DDL. -/// Examples: docs/examples/schema_examples.md pub fn field_type_to_sql(typ: FieldType) -> String { case typ { UInt8 -> "UInt8" @@ -101,16 +99,16 @@ pub fn field_type_to_sql(typ: FieldType) -> String { Float32 -> "Float32" Float64 -> "Float64" String -> "String" - FixedString(size) -> "FixedString(" <> string.inspect(size) <> ")" + FixedString(size) -> "FixedString(" <> int.to_string(size) <> ")" Date -> "Date" Date32 -> "Date32" DateTime -> "DateTime" - DateTime64(precision) -> "DateTime64(" <> string.inspect(precision) <> ")" + DateTime64(precision) -> "DateTime64(" <> int.to_string(precision) <> ")" Decimal(precision, scale) -> "Decimal(" - <> string.inspect(precision) + <> int.to_string(precision) <> ", " - <> string.inspect(scale) + <> int.to_string(scale) <> ")" Bool -> "Bool" UUID -> "UUID" @@ -129,7 +127,9 @@ pub fn field_type_to_sql(typ: FieldType) -> String { let inner = values |> list.map(fn(pair) { - "'" <> pair.0 <> "' = " <> string.inspect(pair.1) + // Escape single quotes in enum labels using SQL double-quote style ('') + let escaped = string.replace(pair.0, "'", "''") + "'" <> escaped <> "' = " <> int.to_string(pair.1) }) |> string.join(", ") "Enum8(" <> inner <> ")" @@ -138,7 +138,8 @@ pub fn field_type_to_sql(typ: FieldType) -> String { let inner = values |> list.map(fn(pair) { - "'" <> pair.0 <> "' = " <> string.inspect(pair.1) + let escaped = string.replace(pair.0, "'", "''") + "'" <> escaped <> "' = " <> int.to_string(pair.1) }) |> string.join(", ") "Enum16(" <> inner <> ")" @@ -147,21 +148,27 @@ pub fn field_type_to_sql(typ: FieldType) -> String { } /// Generate a CREATE TABLE statement for a table definition. -/// +/// Table and field names are properly escaped for ClickHouse. +/// /// Note: This is a minimal DDL generator. For production use, specify engine, /// partition key, order by, etc., using ClickHouse-specific syntax. -/// Examples: docs/examples/schema_examples.md pub fn to_create_table_sql(tbl: Table, engine engine: String) -> String { let field_defs = tbl.fields - |> list.map(fn(f) { f.name <> " " <> field_type_to_sql(f.typ) }) + |> list.map(fn(f) { + expr.to_sql(expr.Field(f.name)) <> " " <> field_type_to_sql(f.typ) + }) |> string.join(", ") - "CREATE TABLE " <> tbl.name <> " (" <> field_defs <> ") ENGINE = " <> engine + "CREATE TABLE " + <> expr.to_sql(expr.Field(tbl.name)) + <> " (" + <> field_defs + <> ") ENGINE = " + <> engine } /// Find a field in a table by name. -/// Examples: docs/examples/schema_examples.md pub fn find_field(tbl: Table, field_name: String) -> Option(Field) { tbl.fields |> list.find(fn(f) { f.name == field_name }) @@ -169,7 +176,6 @@ pub fn find_field(tbl: Table, field_name: String) -> Option(Field) { } /// Get all field names from a table. -/// Examples: docs/examples/schema_examples.md pub fn field_names(tbl: Table) -> List(String) { tbl.fields |> list.map(fn(f) { f.name }) } diff --git a/src/sparkling/types.gleam b/src/sparkling/types.gleam index 1f0cab7..b08a65c 100644 --- a/src/sparkling/types.gleam +++ b/src/sparkling/types.gleam @@ -1,20 +1,16 @@ /// Advanced ClickHouse type wrappers and conversions. -/// +/// /// This module provides typed wrappers for complex ClickHouse types that don't -/// have direct Gleam equivalents, following the patterns documented -/// +/// have direct Gleam equivalents. +/// /// # Type Mapping Strategy -/// +/// /// - Simple types (Int, Float, String, Bool) → native Gleam types /// - Complex types → opaque wrappers with conversion functions /// - Precision-critical types (Decimal) → string-based with validation /// - Temporal types (DateTime64) → structured with timezone support /// - Collections (Array, Map, Tuple) → typed Gleam collections -/// -/// # Plugin API -/// -/// Custom encoders/decoders can be registered for specialized handling of -/// complex types (e.g., native Decimal libraries, custom timezone handling). +import gleam/float import gleam/int import gleam/list import gleam/option.{type Option} @@ -26,108 +22,71 @@ import gleam/string // ============================================================================ /// Decimal value with precision and scale preservation. -/// +/// /// Stored as string to avoid floating-point precision loss. /// Use helper functions to convert to/from numeric types when needed. -/// Examples: docs/examples/types_examples.md pub opaque type Decimal { Decimal(value: String) } -/// Create a Decimal from a string representation +/// Create a Decimal from a string representation. pub fn decimal(value: String) -> Result(Decimal, String) { - // Basic validation: check for valid decimal format case validate_decimal_string(value) { True -> Ok(Decimal(value)) False -> Error("Invalid decimal format: " <> value) } } -/// Get the string representation of a Decimal +/// Get the string representation of a Decimal. pub fn decimal_to_string(d: Decimal) -> String { d.value } -/// Create a Decimal from an integer +/// Create a Decimal from an integer. pub fn decimal_from_int(value: Int) -> Decimal { Decimal(int.to_string(value)) } -/// Create a Decimal from a float (may lose precision) -pub fn decimal_from_float(value: Float) -> Decimal { - Decimal(float_to_string(value)) +/// Create a Decimal from a float. Returns an error for values that cannot +/// be represented as a plain decimal string (e.g. very large floats). +/// Note: floats may lose precision; prefer decimal/1 with an explicit string. +pub fn decimal_from_float(value: Float) -> Result(Decimal, String) { + decimal(float.to_string(value)) } -/// Validate decimal string format +/// Validate decimal string format: optional leading minus, digits, at most one dot. fn validate_decimal_string(s: String) -> Bool { - // Allow: digits, optional decimal point, optional leading minus - // Reject empty strings and non-numeric characters - case s { + case string.trim(s) { "" -> False - _ -> { - let trimmed = string.trim(s) - case trimmed { - "" -> False - _ -> { - // Check for valid characters: digits, minus, and decimal point - let has_invalid_chars = - string.to_graphemes(trimmed) - |> list.any(fn(char) { - char != "-" && char != "." && !is_digit(char) - }) - - case has_invalid_chars { - True -> False - False -> { - // Further validation: minus only at start, single decimal point - case string.starts_with(trimmed, "-") { - True -> validate_positive_decimal(string.drop_start(trimmed, 1)) - False -> validate_positive_decimal(trimmed) - } - } - } - } + trimmed -> { + let body = case string.starts_with(trimmed, "-") { + True -> string.drop_start(trimmed, 1) + False -> trimmed } + validate_positive_decimal(body) } } } -fn is_digit(char: String) -> Bool { - char == "0" - || char == "1" - || char == "2" - || char == "3" - || char == "4" - || char == "5" - || char == "6" - || char == "7" - || char == "8" - || char == "9" -} - fn validate_positive_decimal(s: String) -> Bool { - case s { - "" -> False - _ -> { - // Check if contains only digits and at most one decimal point - let parts = string.split(s, ".") - case parts { - [integer_part] -> - string.length(integer_part) > 0 && all_digits(integer_part) - [integer_part, decimal_part] -> - string.length(integer_part) > 0 - && string.length(decimal_part) > 0 - && all_digits(integer_part) - && all_digits(decimal_part) - _ -> False - } - } + case string.split(s, ".") { + [integer_part] -> + string.length(integer_part) > 0 && all_digits(integer_part) + [integer_part, decimal_part] -> + string.length(integer_part) > 0 + && string.length(decimal_part) > 0 + && all_digits(integer_part) + && all_digits(decimal_part) + _ -> False } } +fn is_digit(char: String) -> Bool { + string.contains("0123456789", char) +} + fn all_digits(s: String) -> Bool { - string.to_graphemes(s) - |> list.all(is_digit) + string.to_graphemes(s) |> list.all(is_digit) } // ============================================================================ @@ -135,22 +94,14 @@ fn all_digits(s: String) -> Bool { // ============================================================================ /// DateTime64 with configurable precision and timezone. -/// -/// Precision: 0-9 digits after decimal point (subsecond precision) -/// Timezone: optional timezone identifier (e.g., "UTC", "America/New_York") -/// Examples: docs/examples/types_examples.md +/// +/// Precision: 0-9 digits after decimal point (subsecond precision). +/// Timezone: optional timezone identifier (e.g. "UTC", "America/New_York"). pub type DateTime64 { - DateTime64( - /// ISO 8601 timestamp or epoch string - value: String, - /// Precision (0-9 for subsecond digits) - precision: Int, - /// Optional timezone identifier - timezone: Option(String), - ) + DateTime64(value: String, precision: Int, timezone: Option(String)) } -/// Create a DateTime64 from ISO 8601 string +/// Create a DateTime64 from an ISO 8601 string or epoch string. pub fn datetime64( value: String, precision: Int, @@ -163,7 +114,7 @@ pub fn datetime64( } } -/// Create a DateTime64 from Unix epoch (seconds) +/// Create a DateTime64 from Unix epoch (seconds). pub fn datetime64_from_epoch( epoch_seconds: Int, precision: Int, @@ -172,12 +123,12 @@ pub fn datetime64_from_epoch( datetime64(int.to_string(epoch_seconds), precision, timezone) } -/// Get the string representation of a DateTime64 +/// Get the string representation of a DateTime64. pub fn datetime64_to_string(dt: DateTime64) -> String { dt.value } -/// Get the timezone of a DateTime64 +/// Get the timezone of a DateTime64. pub fn datetime64_timezone(dt: DateTime64) -> Option(String) { dt.timezone } @@ -186,15 +137,14 @@ pub fn datetime64_timezone(dt: DateTime64) -> Option(String) { // UUID - universally unique identifier // ============================================================================ -/// UUID type (128-bit identifier) -/// -/// Standard format: 8-4-4-4-12 hex digits (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx) -/// Examples: docs/examples/types_examples.md +/// UUID type (128-bit identifier). +/// +/// Standard format: 8-4-4-4-12 hex digits (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx). pub opaque type UUID { UUID(value: String) } -/// Create a UUID from string representation +/// Create a UUID from string representation. pub fn uuid(value: String) -> Result(UUID, String) { case validate_uuid_format(value) { True -> Ok(UUID(value)) @@ -202,45 +152,55 @@ pub fn uuid(value: String) -> Result(UUID, String) { } } -/// Get the string representation of a UUID +/// Get the string representation of a UUID. pub fn uuid_to_string(u: UUID) -> String { u.value } -/// Validate UUID format (basic check) +/// Validate UUID format: 8-4-4-4-12 pattern with hex characters only. fn validate_uuid_format(s: String) -> Bool { - // Check for 8-4-4-4-12 pattern let parts = string.split(s, "-") case parts { [a, b, c, d, e] -> string.length(a) == 8 + && all_hex(a) && string.length(b) == 4 + && all_hex(b) && string.length(c) == 4 + && all_hex(c) && string.length(d) == 4 + && all_hex(d) && string.length(e) == 12 + && all_hex(e) _ -> False } } +fn is_hex_char(char: String) -> Bool { + string.contains("0123456789abcdefABCDEF", char) +} + +fn all_hex(s: String) -> Bool { + string.to_graphemes(s) |> list.all(is_hex_char) +} + // ============================================================================ // LowCardinality - optimized string storage // ============================================================================ /// LowCardinality wrapper for values with low cardinality. -/// +/// /// This is a performance hint to ClickHouse for storage optimization. -/// In Gleam, we wrap the value with its type information. -/// Examples: docs/examples/types_examples.md pub opaque type LowCardinality { LowCardinality(value: String) } -/// Create a LowCardinality from a string value +/// Create a LowCardinality from a string value. pub fn low_cardinality_string(value: String) -> LowCardinality { LowCardinality(value) } -/// Get the value from a LowCardinality wrapper +/// Get the value from a LowCardinality wrapper. pub fn low_cardinality_value(lc: LowCardinality) -> String { lc.value } @@ -249,52 +209,56 @@ pub fn low_cardinality_value(lc: LowCardinality) -> String { // Enum - enumerated values // ============================================================================ -/// Enum8/Enum16 type mapping -/// -/// Maps string values to integer codes. -/// Examples: docs/examples/types_examples.md +/// Enum8 type mapping (Int8 range: -128..127). pub type Enum8 { Enum8(mappings: List(#(String, Int))) } +/// Enum16 type mapping (Int16 range: -32768..32767). pub type Enum16 { Enum16(mappings: List(#(String, Int))) } -/// Create an Enum8 from string value +/// Look up an Enum8 value by string label. +/// Returns an error if the label is not found or the mapped value is out of +/// Int8 range [-128, 127]. pub fn enum8_from_string( mappings: List(#(String, Int)), value: String, ) -> Result(Int, String) { - list.find(mappings, fn(pair) { pair.0 == value }) - |> result.map(fn(pair) { pair.1 }) - |> result.replace_error("Invalid enum value: " <> value) + use code <- result.try(enum_lookup(mappings, value)) + case code >= -128 && code <= 127 { + True -> Ok(code) + False -> + Error( + "Enum8 value out of Int8 range [-128, 127]: " <> int.to_string(code), + ) + } } -/// Create an Enum16 from string value +/// Look up an Enum16 value by string label. +/// Returns an error if the label is not found or the mapped value is out of +/// Int16 range [-32768, 32767]. pub fn enum16_from_string( mappings: List(#(String, Int)), value: String, +) -> Result(Int, String) { + use code <- result.try(enum_lookup(mappings, value)) + case code >= -32_768 && code <= 32_767 { + True -> Ok(code) + False -> + Error( + "Enum16 value out of Int16 range [-32768, 32767]: " + <> int.to_string(code), + ) + } +} + +fn enum_lookup( + mappings: List(#(String, Int)), + value: String, ) -> Result(Int, String) { list.find(mappings, fn(pair) { pair.0 == value }) |> result.map(fn(pair) { pair.1 }) |> result.replace_error("Invalid enum value: " <> value) } - -// ============================================================================ -// Helper functions -// ============================================================================ - -/// Convert float to string (helper for decimal_from_float) -@external(erlang, "erlang", "float_to_list") -fn float_to_list(f: Float) -> List(Int) - -fn float_to_string(f: Float) -> String { - // Convert float to string representation - // This is a simplified implementation; use proper formatting library in production - let chars = float_to_list(f) - list_to_string(chars) -} - -@external(erlang, "erlang", "list_to_binary") -fn list_to_string(chars: List(Int)) -> String diff --git a/test/sparkling/schema_test.gleam b/test/sparkling/schema_test.gleam index b7ddbc4..6805a84 100644 --- a/test/sparkling/schema_test.gleam +++ b/test/sparkling/schema_test.gleam @@ -105,9 +105,10 @@ pub fn to_create_table_sql_test() { ]) let sql = to_create_table_sql(tbl, engine: "MergeTree()") + // "timestamp" is a ClickHouse reserved keyword → correctly escaped with backticks sql |> should.equal( - "CREATE TABLE events (id UInt64, event_type String, timestamp DateTime64(3)) ENGINE = MergeTree()", + "CREATE TABLE events (id UInt64, event_type String, `timestamp` DateTime64(3)) ENGINE = MergeTree()", ) } From d89041b55e967bdf989554c36602535bbc52a72f Mon Sep 17 00:00:00 2001 From: Daniele Date: Fri, 20 Mar 2026 21:52:38 +0100 Subject: [PATCH 2/2] Reformat code in expr and csv modules Adjust formatting for readability in src/sparkling/expr.gleam and src/sparkling/format/csv.gleam: break the string.contains call arguments onto separate lines in is_letter_or_underscore, and condense several match-arm bodies in do_parse_csv into single-line expressions. These are formatting-only changes and do not alter program behavior. --- src/sparkling/expr.gleam | 5 ++++- src/sparkling/format/csv.gleam | 15 +++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/sparkling/expr.gleam b/src/sparkling/expr.gleam index 7cf2e2c..d007034 100644 --- a/src/sparkling/expr.gleam +++ b/src/sparkling/expr.gleam @@ -314,7 +314,10 @@ fn contains_special_chars(name: String) -> Bool { /// Check if a grapheme is a letter (a-z, A-Z) or underscore. fn is_letter_or_underscore(grapheme: String) -> Bool { - string.contains("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", grapheme) + string.contains( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", + grapheme, + ) } /// Check if a grapheme is alphanumeric (a-z, A-Z, 0-9) or underscore. diff --git a/src/sparkling/format/csv.gleam b/src/sparkling/format/csv.gleam index b03ebc6..eddce71 100644 --- a/src/sparkling/format/csv.gleam +++ b/src/sparkling/format/csv.gleam @@ -137,19 +137,14 @@ fn do_parse_csv( [], False -> Ok(list.reverse([current, ..acc])) [], True -> Error("Unterminated quoted field") // Two consecutive quotes inside a quoted field → escaped quote char - ["\"", "\"", ..rest], True -> - do_parse_csv(rest, True, current <> "\"", acc) + ["\"", "\"", ..rest], True -> do_parse_csv(rest, True, current <> "\"", acc) // Opening quote (start of a quoted field) - ["\"", ..rest], False -> - do_parse_csv(rest, True, current, acc) + ["\"", ..rest], False -> do_parse_csv(rest, True, current, acc) // Closing quote - ["\"", ..rest], True -> - do_parse_csv(rest, False, current, acc) + ["\"", ..rest], True -> do_parse_csv(rest, False, current, acc) // Field separator (outside quotes) - [",", ..rest], False -> - do_parse_csv(rest, False, "", [current, ..acc]) + [",", ..rest], False -> do_parse_csv(rest, False, "", [current, ..acc]) // Any other character - [c, ..rest], _ -> - do_parse_csv(rest, in_quotes, current <> c, acc) + [c, ..rest], _ -> do_parse_csv(rest, in_quotes, current <> c, acc) } }