From d158c51d3d446c663d1e0a3dcb278ca4603ff37d Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Sun, 15 Feb 2026 18:30:20 -0600 Subject: [PATCH 1/9] feat(openapi): upgrade to v0.2.0 with modular architecture Replace v0.1.4 monolithic codebase with v0.2.0 refactored modules: config, request, response, pagination, column_matching, spec, schema. New features: POST-for-read endpoints, spec_json inline specs, LIMIT-to-page_size pushdown, api_key_location (query/cookie), debug mode, max_pages/max_response_bytes safety limits, OpenAPI 3.1 support. Includes 518 unit tests, benchmarks, 5 real-world examples (NWS, CarAPI, PokeAPI, GitHub, Threads), Docker-based integration test infrastructure with 113 assertions, and performance analysis docs. --- docs/catalog/openapi.md | 115 +- wasm-wrappers/fdw/Cargo.lock | 366 +- wasm-wrappers/fdw/Cargo.toml | 4 + wasm-wrappers/fdw/openapi_fdw/.gitignore | 1 + wasm-wrappers/fdw/openapi_fdw/Cargo.toml | 9 +- wasm-wrappers/fdw/openapi_fdw/Makefile | 19 + wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md | 285 + wasm-wrappers/fdw/openapi_fdw/README.md | 26 +- .../fdw/openapi_fdw/benches/fdw_benchmarks.rs | 383 ++ .../fdw/openapi_fdw/examples/README.md | 18 + .../fdw/openapi_fdw/examples/carapi/README.md | 413 ++ .../fdw/openapi_fdw/examples/carapi/init.sql | 258 + .../fdw/openapi_fdw/examples/github/README.md | 489 ++ .../fdw/openapi_fdw/examples/github/init.sql | 282 + .../fdw/openapi_fdw/examples/nws/README.md | 425 ++ .../fdw/openapi_fdw/examples/nws/init.sql | 231 + .../openapi_fdw/examples/pokeapi/README.md | 386 ++ .../fdw/openapi_fdw/examples/pokeapi/init.sql | 190 + .../openapi_fdw/examples/threads/README.md | 493 ++ .../fdw/openapi_fdw/examples/threads/init.sql | 301 + .../fdw/openapi_fdw/src/column_matching.rs | 309 + .../openapi_fdw/src/column_matching_tests.rs | 1029 +++ wasm-wrappers/fdw/openapi_fdw/src/config.rs | 270 + .../fdw/openapi_fdw/src/config_tests.rs | 665 ++ wasm-wrappers/fdw/openapi_fdw/src/lib.rs | 1096 +--- .../fdw/openapi_fdw/src/lib_tests.rs | 181 + .../fdw/openapi_fdw/src/pagination.rs | 101 + .../fdw/openapi_fdw/src/pagination_tests.rs | 575 ++ wasm-wrappers/fdw/openapi_fdw/src/request.rs | 512 ++ .../fdw/openapi_fdw/src/request_tests.rs | 527 ++ wasm-wrappers/fdw/openapi_fdw/src/response.rs | 168 + .../fdw/openapi_fdw/src/response_tests.rs | 761 +++ wasm-wrappers/fdw/openapi_fdw/src/schema.rs | 122 +- .../fdw/openapi_fdw/src/schema_tests.rs | 1949 ++++++ wasm-wrappers/fdw/openapi_fdw/src/spec.rs | 748 ++- .../fdw/openapi_fdw/src/spec_tests.rs | 5533 +++++++++++++++++ .../fdw/openapi_fdw/test/.env.example | 5 + .../fdw/openapi_fdw/test/benchmark.sh | 407 ++ .../fdw/openapi_fdw/test/docker-compose.yml | 25 + .../fdw/openapi_fdw/test/expectations.json | 896 +++ wasm-wrappers/fdw/openapi_fdw/test/init.sql | 791 +++ .../fdw/openapi_fdw/test/mock-spec.json | 673 ++ .../fdw/openapi_fdw/test/run-examples.sh | 642 ++ wasm-wrappers/fdw/openapi_fdw/test/run.sh | 761 +++ wasm-wrappers/fdw/openapi_fdw/wit/world.wit | 3 +- 45 files changed, 22113 insertions(+), 1330 deletions(-) create mode 100644 wasm-wrappers/fdw/openapi_fdw/.gitignore create mode 100644 wasm-wrappers/fdw/openapi_fdw/Makefile create mode 100644 wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/README.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/github/README.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/nws/init.sql create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md create mode 100644 wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/column_matching_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/config.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/lib_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/pagination.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/pagination_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/request.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/response.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs create mode 100644 wasm-wrappers/fdw/openapi_fdw/test/.env.example create mode 100755 wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh create mode 100644 wasm-wrappers/fdw/openapi_fdw/test/docker-compose.yml create mode 100644 wasm-wrappers/fdw/openapi_fdw/test/expectations.json create mode 100644 wasm-wrappers/fdw/openapi_fdw/test/init.sql create mode 100644 wasm-wrappers/fdw/openapi_fdw/test/mock-spec.json create mode 100755 wasm-wrappers/fdw/openapi_fdw/test/run-examples.sh create mode 100755 wasm-wrappers/fdw/openapi_fdw/test/run.sh diff --git a/docs/catalog/openapi.md b/docs/catalog/openapi.md index e49171bf..1be25372 100644 --- a/docs/catalog/openapi.md +++ b/docs/catalog/openapi.md @@ -12,12 +12,13 @@ tags: [OpenAPI](https://www.openapis.org/) is a specification for describing HTTP APIs. The OpenAPI Wrapper is a generic WebAssembly (Wasm) foreign data wrapper that can connect to any REST API with an OpenAPI 3.0+ specification. -This wrapper allows you to query any REST API endpoint as a PostgreSQL foreign table, with support for path parameters, pagination, and automatic schema import. +This wrapper allows you to query any REST API endpoint as a PostgreSQL foreign table, with support for path parameters, pagination, POST-for-read endpoints, and automatic schema import. ## Available Versions | Version | Wasm Package URL | Checksum | Required Wrappers Version | | ------- | ---------------- | -------- | ------------------------- | +| 0.2.0 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm` | `{checksum}` | >=0.5.0 | | 0.1.4 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.1.4/openapi_fdw.wasm` | `dd434f8565b060b181d1e69e1e4d5c8b9c3ac5ca444056d3c2fb939038d308fe` | >=0.5.0 | ## Preparation @@ -94,12 +95,14 @@ We need to provide Postgres with the credentials to access the API and any addit | Option | Required | Description | | ------ | :------: | ----------- | | `fdw_package_*` | Yes | Standard Wasm FDW package metadata. See [Available Versions](#available-versions). | -| `base_url` | Yes* | Base URL for the API (e.g., `https://api.example.com/v1`). *Optional if `spec_url` provides servers. | -| `spec_url` | No | URL to the OpenAPI specification JSON. Required for `IMPORT FOREIGN SCHEMA`. | +| `base_url` | Yes* | Base URL for the API (e.g., `https://api.example.com/v1`). *Optional if `spec_url` or `spec_json` provides servers. | +| `spec_url` | No | URL to the OpenAPI specification JSON. Required for `IMPORT FOREIGN SCHEMA`. Mutually exclusive with `spec_json`. | +| `spec_json` | No | Inline OpenAPI 3.0+ JSON spec for `IMPORT FOREIGN SCHEMA`. Mutually exclusive with `spec_url`. Useful when the API doesn't publish a spec URL. | | `api_key` | No | API key for authentication. | | `api_key_id` | No | Vault secret key ID storing the API key. Use instead of `api_key`. | | `api_key_header` | No | Header name for API key (default: `Authorization`). | | `api_key_prefix` | No | Prefix for API key value (default: `Bearer` for Authorization header). | +| `api_key_location` | No | Where to send the API key: `header` (default), `query`, or `cookie`. | | `bearer_token` | No | Bearer token for authentication (alternative to `api_key`). | | `bearer_token_id` | No | Vault secret key ID storing the bearer token. | | `user_agent` | No | Custom User-Agent header value. | @@ -109,6 +112,9 @@ We need to provide Postgres with the credentials to access the API and any addit | `page_size` | No | Default page size for pagination (0 = no automatic limit). | | `page_size_param` | No | Query parameter name for page size (default: `limit`). | | `cursor_param` | No | Query parameter name for pagination cursor (default: `after`). | +| `max_pages` | No | Maximum pages per scan to prevent infinite pagination loops (default: `1000`). | +| `max_response_bytes` | No | Maximum response body size in bytes (default: `52428800` / 50 MiB). | +| `debug` | No | Emit HTTP request details and scan stats via PostgreSQL INFO messages when set to `'true'` or `'1'`. | ### Create a schema @@ -151,10 +157,12 @@ options ( | `cursor_param` | No | Override server-level cursor parameter name. | | `page_size_param` | No | Override server-level page size parameter name. | | `page_size` | No | Override server-level page size. | +| `method` | No | HTTP method for this endpoint. Use `POST` for read-via-POST endpoints (default: `GET`). | +| `request_body` | No | Request body string for POST endpoints. | ### Automatic Schema Import -If you provide a `spec_url` in the server options, you can automatically import table definitions: +If you provide a `spec_url` or `spec_json` in the server options, you can automatically import table definitions: ```sql -- Import all endpoints @@ -244,12 +252,67 @@ select * from openapi.users where status = 'active'; Columns used as query or path parameters always return the value from the WHERE clause, even if the API response contains the same field with different casing. This ensures PostgreSQL's post-filter always passes. +### LIMIT Pushdown + +When your query includes a `LIMIT`, the FDW uses it as the `page_size` for the first API request, reducing unnecessary data transfer: + +```sql +-- Sends GET /users?limit=5 (uses LIMIT as page_size) +select * from openapi.users limit 5; +``` + +## POST-for-Read Endpoints + +Some APIs use POST requests for read operations (e.g., search or query endpoints). Use the `method` and `request_body` table options: + +```sql +create foreign table openapi.search_results ( + id text, + title text, + score real, + attrs jsonb +) +server my_api_server +options ( + endpoint '/search', + method 'POST', + request_body '{"query": "openapi", "limit": 50}' +); + +select id, title, score from openapi.search_results; +``` + +## Debug Mode + +Enable debug mode to see HTTP request details and scan statistics in PostgreSQL INFO messages: + +```sql +create server debug_api + foreign data wrapper wasm_wrapper + options ( + fdw_package_name 'supabase:openapi-fdw', + fdw_package_url '{See: "Available Versions"}', + fdw_package_checksum '{See: "Available Versions"}', + fdw_package_version '{See: "Available Versions"}', + base_url 'https://api.example.com', + debug 'true' + ); +``` + +Debug output includes: + +- HTTP method and URL for each request +- Response status code and body size +- Total rows fetched and pages retrieved +- Pagination details + ## Pagination The FDW automatically handles pagination. It supports: 1. **Cursor-based pagination** - Uses `cursor_param` and `cursor_path` 2. **URL-based pagination** - Follows `next` links in response +3. **Offset-based pagination** - Auto-detected from common patterns ### Configuring Pagination @@ -358,10 +421,10 @@ For APIs with very strict rate limits, consider using materialized views to cach create server openapi_server foreign data wrapper wasm_wrapper options ( - fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.1.4/openapi_fdw.wasm', + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', fdw_package_name 'supabase:openapi-fdw', - fdw_package_version '0.1.4', - fdw_package_checksum 'dd434f8565b060b181d1e69e1e4d5c8b9c3ac5ca444056d3c2fb939038d308fe', + fdw_package_version '0.2.0', + fdw_package_checksum '{See: "Available Versions"}', base_url 'https://api.weather.gov', spec_url 'https://api.weather.gov/openapi.json' ); @@ -395,6 +458,26 @@ options ( select id, type from openapi.zone_stations where zone_id = 'AKZ317'; ``` +### POST-for-Read + +```sql +-- Query a search API that uses POST for read operations +create foreign table openapi.search_results ( + id text, + title text, + score real, + attrs jsonb +) +server my_api_server +options ( + endpoint '/search', + method 'POST', + request_body '{"query": "postgresql", "limit": 25}' +); + +select id, title, score from openapi.search_results; +``` + ### Custom Headers ```sql @@ -413,6 +496,24 @@ create server custom_api ); ``` +### API Key Location + +By default, the API key is sent as a header. Use `api_key_location` to send it as a query parameter or cookie instead: + +```sql +create server query_auth_api + foreign data wrapper wasm_wrapper + options ( + fdw_package_name 'supabase:openapi-fdw', + fdw_package_url '{See: "Available Versions"}', + fdw_package_checksum '{See: "Available Versions"}', + fdw_package_version '{See: "Available Versions"}', + base_url 'https://api.example.com', + api_key 'sk-your-api-key', + api_key_location 'query' -- sends as ?api_key=sk-... (uses api_key_header as param name) + ); +``` + ### Response Path Extraction For APIs that wrap data in a container object: diff --git a/wasm-wrappers/fdw/Cargo.lock b/wasm-wrappers/fdw/Cargo.lock index bc491082..0ed0d85b 100644 --- a/wasm-wrappers/fdw/Cargo.lock +++ b/wasm-wrappers/fdw/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "android-tzdata" version = "0.1.1" @@ -17,6 +26,18 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + [[package]] name = "autocfg" version = "1.4.0" @@ -47,6 +68,12 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.23" @@ -81,9 +108,61 @@ dependencies = [ "js-sys", "num-traits", "wasm-bindgen", - "windows-link", + "windows-link 0.1.1", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", ] +[[package]] +name = "clap" +version = "4.5.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + [[package]] name = "clerk_fdw" version = "0.2.2" @@ -98,6 +177,90 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "helloworld_fdw" version = "0.2.0" @@ -105,6 +268,12 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hubspot_fdw" version = "0.2.0" @@ -145,6 +314,26 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -203,10 +392,17 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "openapi_fdw" -version = "0.1.4" +version = "0.2.0" dependencies = [ + "criterion", "serde", "serde_json", "urlencoding", @@ -230,6 +426,34 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -248,6 +472,55 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" + [[package]] name = "rustversion" version = "1.0.20" @@ -260,6 +533,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.219" @@ -335,6 +617,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "unicode-ident" version = "1.0.18" @@ -347,6 +639,16 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -405,6 +707,25 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-core" version = "0.61.1" @@ -413,7 +734,7 @@ checksum = "46ec44dc15085cea82cf9c78f85a9114c463a369786585ad2882d1ff0b0acf40" dependencies = [ "windows-implement", "windows-interface", - "windows-link", + "windows-link 0.1.1", "windows-result", "windows-strings", ] @@ -446,13 +767,19 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-result" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b895b5356fc36103d0f64dd1e94dfa7ac5633f1c9dd6e80fe9ec4adef69e09d" dependencies = [ - "windows-link", + "windows-link 0.1.1", ] [[package]] @@ -461,7 +788,16 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a7ab927b2637c19b3dbe0965e75d8f2d30bdd697a1516191cad2ec4df8fb28a" dependencies = [ - "windows-link", + "windows-link 0.1.1", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", ] [[package]] @@ -469,3 +805,23 @@ name = "wit-bindgen-rt" version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4db52a11d4dfb0a59f194c064055794ee6564eb1ced88c25da2cf76e50c5621" + +[[package]] +name = "zerocopy" +version = "0.8.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/wasm-wrappers/fdw/Cargo.toml b/wasm-wrappers/fdw/Cargo.toml index e0a9b931..ada7fec0 100644 --- a/wasm-wrappers/fdw/Cargo.toml +++ b/wasm-wrappers/fdw/Cargo.toml @@ -17,6 +17,10 @@ members = [ ] resolver = "2" +[profile.release] +strip = "debuginfo" +lto = true + [workspace.package] edition = "2024" rust-version = "1.88" diff --git a/wasm-wrappers/fdw/openapi_fdw/.gitignore b/wasm-wrappers/fdw/openapi_fdw/.gitignore new file mode 100644 index 00000000..325fb06e --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/.gitignore @@ -0,0 +1 @@ +test/.env diff --git a/wasm-wrappers/fdw/openapi_fdw/Cargo.toml b/wasm-wrappers/fdw/openapi_fdw/Cargo.toml index f70dc72b..ffd90d70 100644 --- a/wasm-wrappers/fdw/openapi_fdw/Cargo.toml +++ b/wasm-wrappers/fdw/openapi_fdw/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "openapi_fdw" -version = "0.1.4" +version = "0.2.0" edition = { workspace = true } rust-version = { workspace = true } homepage = { workspace = true } @@ -14,6 +14,13 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" urlencoding = "2.1" +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "fdw_benchmarks" +harness = false + [package.metadata.component] package = "supabase:openapi-fdw" diff --git a/wasm-wrappers/fdw/openapi_fdw/Makefile b/wasm-wrappers/fdw/openapi_fdw/Makefile new file mode 100644 index 00000000..df301939 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/Makefile @@ -0,0 +1,19 @@ +SHELL := /bin/bash +export PATH := $(HOME)/.cargo/bin:$(PATH) +export RUSTUP_HOME := $(HOME)/.rustup + +.PHONY: fmt clippy test build check + +fmt: + cargo fmt + +clippy: + RUSTFLAGS="-D warnings" cargo clippy --all --tests --no-deps + +test: + cargo test + +build: + cargo component build --release --target wasm32-unknown-unknown + +check: fmt clippy test build diff --git a/wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md b/wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md new file mode 100644 index 00000000..8e7be064 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md @@ -0,0 +1,285 @@ +# OpenAPI FDW Performance Analysis + +## Overview + +This document captures performance characteristics, optimizations, and benchmark results for the OpenAPI FDW. + +## Benchmark Results (Feb 2026) + +### End-to-End Performance (10 iterations each) + +| Scenario | OpenAPI FDW | pg_http | pg_net | Overhead | +| ---------- | ------------- | --------- | -------- | ---------- | +| Simple Array (3 rows) | 183ms | 10ms | 786ms | +173ms | +| Wrapped Response (2 rows) | 178ms | 7ms | 787ms | +171ms | +| Type Coercion (1 row) | 185ms | 10ms | 792ms | +175ms | +| GeoJSON Nested (3 rows) | 188ms | 11ms | 788ms | +177ms | +| POST-for-Read (1 row) | 198ms | 14ms | 785ms | +184ms | + +**Key Finding:** Consistent ~170-180ms overhead regardless of row count, indicating fixed per-query cost. + +### Micro-Benchmark Results + +From Criterion benchmarks (`cargo bench --bench fdw_benchmarks`): + +| Operation | Time | Notes | +| ----------- | ------ | ------- | +| Column sanitization (camelCase→snake_case) | 60-122ns | One-time per column | +| camelCase conversion | 19-123ns | Cached in begin_scan | +| JSON key lookup (HashMap) | 12-16ns | O(1) exact match | +| DateTime normalization (Cow) | **0.98ns** | Zero-copy for valid datetimes | +| DateTime normalization (String) | 13ns | Allocates new string | +| JSON parsing (10 rows) | 2.6µs | ~260ns per row | +| JSON parsing (1000 rows) | 265µs | ~265ns per row | +| Type conversion (primitives) | 0.6-0.7ns | i64, f64, bool | +| Type conversion (string) | 12ns | Allocates | +| URL building (no params) | 15ns | String concat | +| URL building (3 params) | 85ns | With urlencoding | +| URL building (10 params) | 110ns | Scales linearly | + +## Performance Characteristics + +### Per-Query Overhead Breakdown + +The ~170ms overhead is composed of: + +1. **WASM Runtime Initialization** (~100-120ms) + - Component instantiation + - Module loading + - Memory setup + +2. **FDW Framework** (~20-30ms) + - PostgreSQL FDW API calls + - WASM boundary crossings + - Context setup + +3. **OpenAPI FDW Logic** (~30-40ms) + - Column metadata caching + - HTTP request setup + - JSON parsing + - Column key mapping + +### Per-Row Costs + +Once initialized, per-row costs are minimal: + +- **JSON parsing**: ~265ns per row (measured) +- **Type conversion**: 0.6-12ns per cell (measured) +- **Column key lookup**: 12-16ns per column (measured) +- **WASM boundary crossing**: ~100ns per cell push (estimated) + +**Total per-row**: ~1-2µs for typical 5-column row + +### Scaling Characteristics + +- ✅ **Excellent**: Row count (1 row vs 1000 rows has minimal impact) +- ✅ **Good**: Column count (O(1) lookups via pre-built key map) +- ⚠️ **Fixed**: Per-query overhead (~170ms regardless of data size) + +## Real-World Performance Impact + +### With Typical API Latency + +Most REST APIs have 100-500ms base latency. Example: + +| API Latency | pg_http Total | OpenAPI FDW Total | Relative Overhead | +| ------------- | --------------- | ------------------- | ------------------- | +| 100ms | 110ms | 280ms | +154% | +| 200ms | 210ms | 380ms | +81% | +| 300ms | 310ms | 480ms | +55% | +| 500ms | 510ms | 680ms | +33% | + +**Takeaway:** With realistic API latency, overhead ranges from 33-80%, not 2000%. + +### When OpenAPI FDW Wins + +Despite the overhead, OpenAPI FDW provides value when: + +1. **Complex JSON structures** - Automatic unwrapping vs manual jsonb queries +2. **Type safety** - Automatic type conversion vs manual casts +3. **Pagination** - Automatic vs manual cursor handling +4. **Schema discovery** - IMPORT FOREIGN SCHEMA vs manual DDL +5. **Maintainability** - 1-line queries vs 10-line jsonb wrangling + +## Optimization History + +### Feb 2026 - Cleanup & Performance Sprint + +1. **Fixed deduplication bug** (schema.rs:88) + - Removed redundant `sanitize_column_name()` call + - Eliminated unwrap() in hot path + +2. **Removed JSON clone** (spec.rs:187) + - Changed `from_json(&JsonValue)` to `from_json(JsonValue)` + - Eliminates clone of entire OpenAPI spec + +3. **`Cow` for datetime normalization** + - **13× faster** for already-valid datetimes (0.98ns vs 13ns) + - Eliminates 50% of allocations in date/timestamp columns + +4. **HashMap pre-allocation** + - `substitute_path_params`: 2× quals capacity (injected_params) + - `build_query_params`: quals + 3 capacity + - Eliminates rehashing during URL construction + +5. **Function extraction** + - `build_url()`: 150 lines → 30 lines (extracted helpers) + - `json_to_cell_cached()`: 135 lines → 40 lines (extracted converters) + - Eliminated code duplication in type conversion + +6. **Column metadata caching** (existing optimization) + - Eliminates ~2000 WASM boundary crossings per 100-row scan + - Caches name, type_oid, camelCase, lowercase variants + +7. **Column key pre-resolution** (existing optimization) + - Builds column→JSON key map once per page + - O(1) lookups vs O(N) search per cell + +## Known Bottlenecks + +### 1. WASM Runtime Startup (~100-120ms) + +**Root cause:** Supabase Wrappers recreates the entire wasmtime stack for every query in `wasm_fdw.rs:new()`: + +| Step | Cached? | Est. Cost | +| ------ | --------- | ----------- | +| Engine creation | No | ~20-30ms | +| Component load (from disk) | Yes (file only) | ~10-20ms | +| WASM → native compilation | No | ~40-60ms | +| Linker setup | No | ~5-10ms | +| Component instantiation | No (required per-query) | ~30-50ms | + +Only the WASM binary file is cached on disk. The Engine, compiled native code, and Linker are rebuilt from scratch every time. + +**Tested:** We added Engine caching via `static OnceLock` in the Supabase Wrappers source. Results: + +- First query (cold): ~213ms +- Subsequent queries (same connection): ~179ms avg +- **Savings: ~35ms per query**, but only within the same PostgreSQL backend process + +Since PostgreSQL is multi-process (each connection = separate process), the cache doesn't help across connections. Not worth maintaining a fork for, but a good upstream contribution opportunity. + +**Upstream opportunities** (in Supabase Wrappers): + +- Shared Engine via `OnceLock` (~35ms savings per connection) +- Wasmtime compilation cache to disk (`config.cache_config_load_default()`) for cross-process savings +- Component caching per foreign server + +**Impact:** 60-70% of total overhead + +### 2. WASM Boundary Crossings (~30-50ms total) + +**Root cause:** WIT interface serialization for each cell + +Per 100-row × 10-column scan: + +- 1000 `row.push(cell)` calls × ~50ns each = **50µs** +- Column metadata setup: ~100 calls × ~200ns = **20µs** +- Not actually significant! + +**Actual impact:** <1ms (negligible) + +### 3. HTTP Request Clone (~1-2ms) + +**Root cause:** `http::Request` takes ownership, must clone headers and body + +```rust +let req = http::Request { + url, + headers: self.headers.clone(), // Vec<(String, String)> + body: self.request_body.clone(), // String +}; +``` + +**Potential optimizations:** + +- Reuse request structure +- Reference-counted headers (Arc) + +**Impact:** <1% of total overhead + +## Optimization Opportunities + +### High Impact (>10ms savings) + +1. **WASM Module Caching** + - Pre-load module at extension init + - Reuse across queries + - **Potential savings:** 60-80ms + +2. **Lazy Column Initialization** + - Only cache metadata for SELECTed columns + - Skip camelCase conversion for unused columns + - **Potential savings:** 10-20ms + +### Medium Impact (1-10ms savings) + +1. **JSON Parser Optimization** + - Use simd-json instead of serde_json + - **Potential savings:** 5-10ms for large responses + +2. **URL Building Cache** + - Cache built URLs for repeated queries + - **Potential savings:** 1-5ms + +### Low Impact (<1ms savings) + +1. **String Interning** + - Intern repeated enum values + - **Potential savings:** <1ms + +2. **Remove Header Clone** + - Use Arc> + - **Potential savings:** <1ms + +## Test Infrastructure + +### Unit Tests: 337 tests + +- 151 spec tests (OpenAPI parsing) +- 52 schema tests (type mapping) +- 134 lib tests (FDW logic) + +### Integration Tests: 80+ assertions + +- Docker-based (PostgreSQL + MockServer) +- Covers all major OpenAPI features +- Tests both typed and raw JSONB queries + +### Benchmarks + +- **Micro**: Criterion-based (`cargo bench`) +- **End-to-end**: Docker-based (`bash test/benchmark.sh`) +- **Comparison**: OpenAPI FDW vs pg_http vs pg_net + +## Conclusion + +The OpenAPI FDW is **well-optimized at the algorithmic level**: + +- O(1) column lookups +- Zero-copy where possible +- Minimal allocations per row +- Pre-cached metadata + +The remaining ~170ms overhead is primarily **WASM runtime initialization** in Supabase Wrappers, which is: + +- One-time per query (not per row) +- Outside our control (requires upstream changes) +- Acceptable given the DX benefits + +For typical REST APIs with 100-500ms latency, the relative overhead is **30-80%**, which is reasonable given the automatic JSON unwrapping, type conversion, pagination, and schema discovery. + +## Recommendations + +1. **For high-frequency queries**: Consider caching results in materialized views +2. **For low-latency requirements**: Use pg_http with manual JSON extraction +3. **For most use cases**: OpenAPI FDW provides excellent DX/performance trade-off +4. **For future optimization**: Focus on WASM module caching/reuse + +--- + +Last updated: February 2026 +Benchmark environment: MockServer (near-zero network latency) +PostgreSQL: 15.14 (Supabase distribution) +WASM Target: wasm32-unknown-unknown +Rust: 1.88+ diff --git a/wasm-wrappers/fdw/openapi_fdw/README.md b/wasm-wrappers/fdw/openapi_fdw/README.md index 7c0cb582..391eae2b 100644 --- a/wasm-wrappers/fdw/openapi_fdw/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/README.md @@ -4,7 +4,7 @@ This is a WASM-based Foreign Data Wrapper (FDW) for integrating any OpenAPI 3.0+ Point this at an OpenAPI spec and query the API with SQL. The FDW parses the spec, figures out the endpoints and response schemas, and lets you `IMPORT FOREIGN SCHEMA` to generate tables automatically. -Handles pagination, rate limiting (429 backoff), path parameter substitution from WHERE clauses, and stops fetching early when you use LIMIT. +Handles pagination, rate limiting (429 backoff), path parameter substitution from WHERE clauses, POST-for-read endpoints, and stops fetching early when you use LIMIT. ## Documentation @@ -17,10 +17,10 @@ Handles pagination, rate limiting (429 backoff), path parameter substitution fro CREATE SERVER my_api_server FOREIGN DATA WRAPPER wasm_wrapper OPTIONS ( - fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.1.4/openapi_fdw.wasm', + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', fdw_package_name 'supabase:openapi-fdw', - fdw_package_version '0.1.4', - fdw_package_checksum 'dd434f8565b060b181d1e69e1e4d5c8b9c3ac5ca444056d3c2fb939038d308fe', + fdw_package_version '0.2.0', + fdw_package_checksum '{see docs for latest checksum}', base_url 'https://api.example.com/v1', spec_url 'https://api.example.com/openapi.json', api_key_id '' @@ -48,23 +48,37 @@ cargo component build --release --target wasm32-unknown-unknown ```bash # Unit tests (run with native target) -cargo test --target aarch64-apple-darwin # or x86_64-unknown-linux-gnu +cargo test + +# Benchmarks +cargo bench --bench fdw_benchmarks + +# Integration tests (Docker-based) +bash test/run.sh # Integration tests (from wrappers directory) cd wrappers cargo pgrx test --features "wasm_fdw pg16" ``` +### Code Quality + +```bash +make check # runs fmt, clippy, test, build +``` + ## Limitations - Read-only (no INSERT/UPDATE/DELETE support) -- Only GET endpoints are supported +- Only GET endpoints are supported (POST-for-read is available via the `method` table option) - Authentication limited to API key and Bearer token (No OAuth2 flow support yet - use pre-obtained tokens) +- Only OpenAPI 3.x specs are supported (Swagger 2.0 is rejected) ## Changelog | Version | Date | Notes | | ------- | ---------- | ---------------------------------------------------- | +| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 337 unit tests, 5 real-world examples | | 0.1.4 | 2026-02-09 | Type coercion, auth validation, table naming, URL fixes | | 0.1.3 | 2026-02-06 | Avoid cloning JSON response data | | 0.1.2 | 2026-02-01 | Fix query param filtering | diff --git a/wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs b/wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs new file mode 100644 index 00000000..89ae10e1 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs @@ -0,0 +1,383 @@ +use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; +use serde_json::json; + +// Note: We can't directly import from lib.rs due to WASM component model, +// so we'll benchmark the core algorithms that we can extract + +/// Benchmark: camelCase to snake_case conversion (used for every column) +fn bench_sanitize_column_name(c: &mut Criterion) { + let mut group = c.benchmark_group("column_name_sanitization"); + + let test_cases = vec![ + ("simpleCase", "simple_case"), + ("clusterIP", "cluster_ip"), + ("HTMLParser", "html_parser"), + ("getHTTPSUrl", "get_https_url"), + ("user_id", "user_id"), + ("userId", "user_id"), + ("@id", "_id"), + ("created-at", "created_at"), + ("123_start", "_123_start"), + ]; + + for (input, _expected) in test_cases { + group.bench_with_input(BenchmarkId::from_parameter(input), &input, |b, &input| { + b.iter(|| { + let mut result = String::new(); + let chars: Vec = input.chars().collect(); + + for (i, &c) in chars.iter().enumerate() { + if c.is_uppercase() && i > 0 { + let prev = chars[i - 1]; + let next_is_lower = chars.get(i + 1).is_some_and(|n| n.is_lowercase()); + + if prev.is_lowercase() + || prev.is_ascii_digit() + || (prev.is_uppercase() && next_is_lower) + { + result.push('_'); + } + result.push(c.to_ascii_lowercase()); + } else if c.is_alphanumeric() || c == '_' { + result.push(c.to_ascii_lowercase()); + } else { + result.push('_'); + } + } + + if result.starts_with(|c: char| c.is_ascii_digit()) { + result.insert(0, '_'); + } + + black_box(result) + }); + }); + } + + group.finish(); +} + +/// Benchmark: snake_case to camelCase (used during column matching) +fn bench_to_camel_case(c: &mut Criterion) { + let mut group = c.benchmark_group("to_camel_case"); + + let test_cases = vec![ + "user_id", + "created_at", + "cluster_ip", + "html_parser", + "simple_name", + "very_long_column_name_with_many_underscores", + ]; + + for input in test_cases { + group.bench_with_input(BenchmarkId::from_parameter(input), &input, |b, &input| { + b.iter(|| { + let mut result = String::new(); + let mut capitalize_next = false; + + for c in input.chars() { + if c == '_' { + capitalize_next = true; + } else if capitalize_next { + result.push(c.to_uppercase().next().unwrap_or(c)); + capitalize_next = false; + } else { + result.push(c); + } + } + + black_box(result) + }); + }); + } + + group.finish(); +} + +/// Benchmark: JSON object key lookup (happens once per cell) +fn bench_json_key_lookup(c: &mut Criterion) { + let mut group = c.benchmark_group("json_key_lookup"); + group.throughput(Throughput::Elements(1)); + + // Small object (5 keys) + let small_obj = json!({ + "id": 123, + "name": "John Doe", + "email": "john@example.com", + "createdAt": "2024-01-15T10:30:00Z", + "isActive": true + }); + + // Medium object (20 keys) + let medium_obj = json!({ + "id": 123, + "name": "John Doe", + "email": "john@example.com", + "createdAt": "2024-01-15T10:30:00Z", + "updatedAt": "2024-01-15T10:30:00Z", + "isActive": true, + "age": 30, + "city": "San Francisco", + "country": "USA", + "zipCode": "94102", + "phoneNumber": "+1234567890", + "companyName": "Acme Inc", + "jobTitle": "Engineer", + "department": "Engineering", + "salary": 100000, + "startDate": "2020-01-01", + "manager": "Jane Smith", + "teamSize": 5, + "projects": ["project1", "project2"], + "skills": ["rust", "sql"] + }); + + group.bench_function("small_object_exact_match", |b| { + b.iter(|| { + let obj = small_obj.as_object().unwrap(); + black_box(obj.get("name")) + }); + }); + + group.bench_function("medium_object_exact_match", |b| { + b.iter(|| { + let obj = medium_obj.as_object().unwrap(); + black_box(obj.get("name")) + }); + }); + + group.bench_function("small_object_case_insensitive", |b| { + b.iter(|| { + let obj = small_obj.as_object().unwrap(); + let target = "createdat"; + black_box( + obj.iter() + .find(|(k, _)| k.to_lowercase() == target) + .map(|(_, v)| v), + ) + }); + }); + + group.finish(); +} + +/// Benchmark: DateTime normalization (happens for every date/timestamp cell) +fn bench_normalize_datetime(c: &mut Criterion) { + let mut group = c.benchmark_group("normalize_datetime"); + + group.bench_function("date_only", |b| { + let input = "2024-01-15"; + b.iter(|| { + let result = if input.len() == 10 + && input.as_bytes().get(4) == Some(&b'-') + && input.as_bytes().get(7) == Some(&b'-') + { + format!("{input}T00:00:00Z") + } else { + input.to_string() + }; + black_box(result) + }); + }); + + group.bench_function("full_datetime", |b| { + let input = "2024-01-15T10:30:00Z"; + b.iter(|| { + let result = if input.len() == 10 + && input.as_bytes().get(4) == Some(&b'-') + && input.as_bytes().get(7) == Some(&b'-') + { + format!("{input}T00:00:00Z") + } else { + input.to_string() + }; + black_box(result) + }); + }); + + group.bench_function("date_only_cow", |b| { + use std::borrow::Cow; + let input = "2024-01-15"; + b.iter(|| { + let result: Cow = if input.len() == 10 + && input.as_bytes().get(4) == Some(&b'-') + && input.as_bytes().get(7) == Some(&b'-') + { + Cow::Owned(format!("{input}T00:00:00Z")) + } else { + Cow::Borrowed(input) + }; + black_box(result) + }); + }); + + group.bench_function("full_datetime_cow", |b| { + use std::borrow::Cow; + let input = "2024-01-15T10:30:00Z"; + b.iter(|| { + let result: Cow = if input.len() == 10 + && input.as_bytes().get(4) == Some(&b'-') + && input.as_bytes().get(7) == Some(&b'-') + { + Cow::Owned(format!("{input}T00:00:00Z")) + } else { + Cow::Borrowed(input) + }; + black_box(result) + }); + }); + + group.finish(); +} + +/// Benchmark: JSON parsing (happens once per page) +fn bench_json_parsing(c: &mut Criterion) { + let mut group = c.benchmark_group("json_parsing"); + + // Small response (10 rows) + let small_json = serde_json::to_string(&json!({ + "data": (0..10).map(|i| json!({ + "id": i, + "name": format!("User {}", i), + "email": format!("user{}@example.com", i), + "createdAt": "2024-01-15T10:30:00Z", + "isActive": true + })).collect::>() + })) + .unwrap(); + + // Large response (1000 rows) + let large_json = serde_json::to_string(&json!({ + "data": (0..1000).map(|i| json!({ + "id": i, + "name": format!("User {}", i), + "email": format!("user{}@example.com", i), + "createdAt": "2024-01-15T10:30:00Z", + "isActive": true + })).collect::>() + })) + .unwrap(); + + group.throughput(Throughput::Bytes(small_json.len() as u64)); + group.bench_function("small_response_10_rows", |b| { + b.iter(|| black_box(serde_json::from_str::(&small_json).unwrap())); + }); + + group.throughput(Throughput::Bytes(large_json.len() as u64)); + group.bench_function("large_response_1000_rows", |b| { + b.iter(|| black_box(serde_json::from_str::(&large_json).unwrap())); + }); + + group.finish(); +} + +/// Benchmark: URL building with query parameters +fn bench_url_building(c: &mut Criterion) { + let mut group = c.benchmark_group("url_building"); + + group.bench_function("no_params", |b| { + let base = "https://api.example.com/users"; + let params: Vec = vec![]; + b.iter(|| { + let mut url = base.to_string(); + if !params.is_empty() { + url.push('?'); + url.push_str(¶ms.join("&")); + } + black_box(url) + }); + }); + + group.bench_function("few_params_3", |b| { + let base = "https://api.example.com/users"; + let params = vec![ + "limit=100".to_string(), + "offset=0".to_string(), + "sort=created_at".to_string(), + ]; + b.iter(|| { + let mut url = base.to_string(); + if !params.is_empty() { + url.push('?'); + url.push_str(¶ms.join("&")); + } + black_box(url) + }); + }); + + group.bench_function("many_params_10", |b| { + let base = "https://api.example.com/users"; + let params = vec![ + "limit=100".to_string(), + "offset=0".to_string(), + "sort=created_at".to_string(), + "filter=active".to_string(), + "include=profile".to_string(), + "fields=id,name,email".to_string(), + "page=1".to_string(), + "per_page=50".to_string(), + "order=desc".to_string(), + "search=test".to_string(), + ]; + b.iter(|| { + let mut url = base.to_string(); + if !params.is_empty() { + url.push('?'); + url.push_str(¶ms.join("&")); + } + black_box(url) + }); + }); + + group.finish(); +} + +/// Benchmark: Type conversion (happens for every cell) +fn bench_type_conversion(c: &mut Criterion) { + let mut group = c.benchmark_group("type_conversion"); + + group.bench_function("json_to_i64", |b| { + let val = json!(12345); + b.iter(|| black_box(val.as_i64())); + }); + + group.bench_function("json_to_f64", |b| { + let val = json!(123.45); + b.iter(|| black_box(val.as_f64())); + }); + + group.bench_function("json_to_string", |b| { + let val = json!("test string"); + b.iter(|| black_box(val.as_str().map(|s| s.to_owned()))); + }); + + group.bench_function("json_to_bool", |b| { + let val = json!(true); + b.iter(|| black_box(val.as_bool())); + }); + + group.bench_function("json_complex_to_string", |b| { + let val = json!({ + "nested": { + "object": "value" + } + }); + b.iter(|| black_box(val.to_string())); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_sanitize_column_name, + bench_to_camel_case, + bench_json_key_lookup, + bench_normalize_datetime, + bench_json_parsing, + bench_url_building, + bench_type_conversion, +); + +criterion_main!(benches); diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/README.md new file mode 100644 index 00000000..cd695f0a --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/README.md @@ -0,0 +1,18 @@ +# Examples + +Each example shows how to configure the FDW against a real API, with complete server options, table definitions, and sample queries. + +## No Auth Required + +| Example | API | Features | +| --- | --- | --- | +| [pokeapi](pokeapi/) | [PokéAPI](https://pokeapi.co/) | Offset-based pagination, path params, auto-detected `results` wrapper | +| [carapi](carapi/) | [CarAPI](https://carapi.app/) | Page-based pagination, query pushdown, auto-detected `data` wrapper | +| [nws](nws/) | [National Weather Service](https://www.weather.gov/documentation/services-web-api) | GeoJSON responses, nested path extraction, custom User-Agent | + +## Auth Required + +| Example | API | Auth | Features | +| --- | --- | --- | --- | +| [github](github/) | [GitHub REST API](https://docs.github.com/en/rest) | Bearer token | Path params, custom headers, `items` wrapper, search pushdown | +| [threads](threads/) | [Meta Threads API](https://developers.facebook.com/docs/threads) | OAuth token (query param) | Cursor-based pagination, path params, query pushdown | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md new file mode 100644 index 00000000..02f88397 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md @@ -0,0 +1,413 @@ +# CarAPI Example + +Query the [CarAPI](https://carapi.app/) vehicle database using SQL. This example demonstrates the OpenAPI FDW against a free, no-auth API with **page-based pagination**, auto-detected `data` wrapper key, and **query parameter pushdown** for filtering by year, make, and model. + +## Server Configuration + +```sql +create server carapi + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://carapi.app/api' + ); +``` + +--- + +## 1. Makes + +Fetches all car manufacturers. Demonstrates **page-based pagination** with auto-detected `data` wrapper key. The CarAPI wraps responses in `{"collection": {...}, "data": [...]}` and the FDW auto-detects the `data` key. + +```sql +create foreign table makes ( + id integer, + name text, + attrs jsonb +) + server carapi + options ( + endpoint '/makes/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT id, name +FROM makes +LIMIT 5; +``` + +| id | name | +| --- | --- | +| 1 | Acura | +| 24 | Alfa Romeo | +| 44 | Aston Martin | +| 2 | Audi | +| 25 | Bentley | + +## 2. Models + +Car models filtered by make and year. Demonstrates **query parameter pushdown** — the WHERE clause values are sent as query parameters to the API, so only matching data is returned. + +```sql +create foreign table models ( + id integer, + make_id integer, + year integer, + make text, + name text, + attrs jsonb +) + server carapi + options ( + endpoint '/models/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT id, name, make +FROM models +WHERE make = 'Toyota' AND year = '2020' +LIMIT 5; +``` + +| id | name | make | +| --- | --- | --- | +| 4841 | 4Runner | Toyota | +| 7245 | 86 | Toyota | +| 5689 | Avalon | Toyota | +| 7308 | C-HR | Toyota | +| 4779 | Camry | Toyota | + +## 3. Trims + +Trim levels with MSRP pricing. Combines query pushdown (year, make, model) with integer type coercion for pricing fields. + +```sql +create foreign table trims ( + id integer, + make_id integer, + model_id integer, + year integer, + make text, + model text, + submodel text, + trim text, + description text, + msrp integer, + invoice integer, + created timestamptz, + modified timestamptz, + attrs jsonb +) + server carapi + options ( + endpoint '/trims/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT trim, msrp, description +FROM trims +WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' +LIMIT 3; +``` + +| trim | msrp | description | +| --- | --- | --- | +| LE | 28430 | LE 4dr Sedan (2.5L 4cyl gas/electric hybrid CVT) | +| SE | 30130 | SE 4dr Sedan (2.5L 4cyl gas/electric hybrid CVT) | +| XLE | 32730 | XLE 4dr Sedan (2.5L 4cyl gas/electric hybrid CVT) | + +Compare MSRP vs invoice price across trims: + +```sql +SELECT trim, msrp, invoice, msrp - invoice AS dealer_margin +FROM trims +WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry'; +``` + +See when trim data was last updated: + +```sql +SELECT trim, msrp, created, modified +FROM trims +WHERE year = '2020' AND make = 'Honda' AND model = 'Civic' +LIMIT 3; +``` + +## 4. Bodies + +Vehicle body dimensions. Demonstrates mixed types — integer for counts/weights, text for decimal measurements. + +```sql +create foreign table bodies ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + type text, + doors integer, + length text, + width text, + height text, + wheel_base text, + ground_clearance text, + cargo_capacity text, + curb_weight integer, + seats integer, + attrs jsonb +) + server carapi + options ( + endpoint '/bodies/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT type, doors, length, curb_weight +FROM bodies +WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' +LIMIT 3; +``` + +| type | doors | length | curb_weight | +| --- | --- | --- | --- | +| Sedan | 4 | 192.1 | 3472 | +| Sedan | 4 | 192.7 | 3549 | +| Sedan | 4 | 192.1 | 3572 | + +Full dimension breakdown: + +```sql +SELECT type, doors, seats, length, width, height, + wheel_base, ground_clearance, cargo_capacity, curb_weight +FROM bodies +WHERE year = '2020' AND make = 'Toyota' AND model = 'RAV4' +LIMIT 3; +``` + +## 5. Engines + +Engine specifications and performance data. + +```sql +create foreign table engines ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + engine_type text, + fuel_type text, + cylinders text, + size text, + horsepower_hp integer, + horsepower_rpm integer, + torque_ft_lbs integer, + torque_rpm integer, + valves integer, + valve_timing text, + cam_type text, + drive_type text, + transmission text, + attrs jsonb +) + server carapi + options ( + endpoint '/engines/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT engine_type, horsepower_hp, cylinders, transmission +FROM engines +WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' +LIMIT 3; +``` + +| engine_type | horsepower_hp | cylinders | transmission | +| --- | --- | --- | --- | +| hybrid | 208 | I4 | continuously variable-speed automatic | +| hybrid | 208 | I4 | continuously variable-speed automatic | +| hybrid | 208 | I4 | continuously variable-speed automatic | + +Full engine specs with torque, valve config, and drive type: + +```sql +SELECT engine_type, fuel_type, size, cylinders, + horsepower_hp, horsepower_rpm, + torque_ft_lbs, torque_rpm, + valves, valve_timing, cam_type, + drive_type, transmission +FROM engines +WHERE year = '2020' AND make = 'Ford' AND model = 'Mustang' +LIMIT 3; +``` + +## 6. Mileages + +Fuel economy and range data (EPA ratings). + +```sql +create foreign table mileages ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + fuel_tank_capacity text, + combined_mpg integer, + epa_city_mpg integer, + epa_highway_mpg integer, + range_city integer, + range_highway integer, + attrs jsonb +) + server carapi + options ( + endpoint '/mileages/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT combined_mpg, epa_city_mpg, epa_highway_mpg, range_city +FROM mileages +WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' +LIMIT 3; +``` + +| combined_mpg | epa_city_mpg | epa_highway_mpg | range_city | +| --- | --- | --- | --- | +| 52 | 51 | 53 | 673 | +| 46 | 44 | 47 | 581 | +| 46 | 44 | 47 | 581 | + +Include fuel tank capacity and highway range: + +```sql +SELECT trim, fuel_tank_capacity, + combined_mpg, epa_city_mpg, epa_highway_mpg, + range_city, range_highway +FROM mileages +WHERE year = '2020' AND make = 'Honda' AND model = 'Accord' +LIMIT 3; +``` + +## 7. Exterior Colors + +Paint colors with RGB values. + +```sql +create foreign table exterior_colors ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + color text, + rgb text, + attrs jsonb +) + server carapi + options ( + endpoint '/exterior-colors/v2', + rowid_column 'id' + ); +``` + +```sql +SELECT color, rgb +FROM exterior_colors +WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' +LIMIT 5; +``` + +| color | rgb | +| --- | --- | +| Blue Streak Metallic | 0,62,155 | +| Brownstone | 95,85,71 | +| Celestial Silver Metallic | 151,156,160 | +| Galactic Aqua Mica | 37,54,65 | +| Midnight Black Metallic | 23,23,23 | + +## 8. OBD Codes + +OBD-II diagnostic trouble codes. A small dataset available on the free tier. + +```sql +create foreign table obd_codes ( + code text, + description text, + attrs jsonb +) + server carapi + options ( + endpoint '/obd-codes', + rowid_column 'code' + ); +``` + +```sql +SELECT code, description +FROM obd_codes +LIMIT 5; +``` + +| code | description | +| --- | --- | +| P0100 | Mass or Volume Air Flow Sensor A Circuit | +| U1000 | Manufacturer Controlled DTC | + +## 9. Debug Mode + +The `makes_debug` table uses the `carapi_debug` server which has `debug 'true'`. This emits HTTP request details and scan statistics as PostgreSQL INFO messages. + +```sql +SELECT id FROM makes_debug LIMIT 1; +``` + +Look for INFO output like: + +```log +INFO: [openapi_fdw] HTTP GET https://carapi.app/api/makes/v2 -> 200 (1404 bytes) +INFO: [openapi_fdw] Scan complete: 1 rows, 1 columns +``` + +## 10. The `attrs` Column + +Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column upfront. + +```sql +SELECT name, attrs +FROM makes +LIMIT 1; +``` + +## Features Demonstrated + +| Feature | Table(s) | +| --- | --- | +| Page-based pagination (auto-followed) | `makes`, `models`, `trims`, `bodies`, `engines`, `mileages`, `exterior_colors` | +| Auto-detected `data` wrapper key | All tables | +| Query parameter pushdown | `models`, `trims`, `bodies`, `engines`, `mileages`, `exterior_colors` | +| Integer type coercion | `trims` (msrp), `bodies` (curb_weight), `engines` (horsepower), `mileages` (mpg) | +| `timestamptz` coercion | `trims` (created, modified) | +| LIMIT pushdown | Any table with `LIMIT` | +| Debug mode (`debug`) | `makes_debug` | +| `attrs` catch-all column | All tables | +| `rowid_column` | All tables | +| No authentication required | All servers | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql new file mode 100644 index 00000000..557b98c7 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql @@ -0,0 +1,258 @@ +-- OpenAPI FDW example: CarAPI (Vehicle Data) +-- Free demo dataset (2015-2020 vehicles), no auth required. +-- See: https://carapi.app/api +-- Note: fdw_package_url uses file:// for local Docker testing. In production, use the +-- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm + +-- Create supabase_admin role if it doesn't exist (required by wrappers extension) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_admin') THEN + CREATE ROLE supabase_admin WITH SUPERUSER CREATEDB CREATEROLE LOGIN PASSWORD 'postgres'; + END IF; +END +$$; + +create schema if not exists extensions; +create extension if not exists wrappers with schema extensions; + +set search_path to public, extensions; + +create foreign data wrapper wasm_wrapper + handler wasm_fdw_handler + validator wasm_fdw_validator; + +-- ============================================================ +-- Server 1: carapi — Main CarAPI server (no auth, free demo) +-- Response format: {"collection": {...pagination...}, "data": [...]} +-- The FDW auto-detects the "data" wrapper key. +-- ============================================================ +create server carapi + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://carapi.app/api' + ); + +-- ============================================================ +-- Server 2: carapi_debug — Same API with debug output +-- ============================================================ +create server carapi_debug + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://carapi.app/api', + debug 'true' + ); + +-- ============================================================ +-- Table 1: makes +-- All car manufacturers (paginated) +-- Features: auto-detected "data" wrapper, page-based pagination +-- ============================================================ +create foreign table makes ( + id integer, + name text, + attrs jsonb +) + server carapi + options ( + endpoint '/makes/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 2: models +-- Car models filtered by make and year +-- Features: query param pushdown (make, year), pagination +-- ============================================================ +create foreign table models ( + id integer, + make_id integer, + year integer, + make text, + name text, + attrs jsonb +) + server carapi + options ( + endpoint '/models/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 3: trims +-- Trim levels with MSRP and descriptions +-- Features: query pushdown (year, make, model), pricing data, +-- integer types, timestamptz coercion +-- ============================================================ +create foreign table trims ( + id integer, + make_id integer, + model_id integer, + year integer, + make text, + model text, + submodel text, + trim text, + description text, + msrp integer, + invoice integer, + created timestamptz, + modified timestamptz, + attrs jsonb +) + server carapi + options ( + endpoint '/trims/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 4: bodies +-- Vehicle body dimensions and specs +-- Features: query pushdown, physical measurements (text for +-- decimal strings), integer for counts/weights +-- ============================================================ +create foreign table bodies ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + type text, + doors integer, + length text, + width text, + height text, + wheel_base text, + ground_clearance text, + cargo_capacity text, + curb_weight integer, + seats integer, + attrs jsonb +) + server carapi + options ( + endpoint '/bodies/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 5: engines +-- Engine specifications and performance data +-- Features: query pushdown, horsepower/torque as integers, +-- engine type, fuel type, drive type, transmission +-- ============================================================ +create foreign table engines ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + engine_type text, + fuel_type text, + cylinders text, + size text, + horsepower_hp integer, + horsepower_rpm integer, + torque_ft_lbs integer, + torque_rpm integer, + valves integer, + valve_timing text, + cam_type text, + drive_type text, + transmission text, + attrs jsonb +) + server carapi + options ( + endpoint '/engines/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 6: mileages +-- Fuel economy and range data (MPG, tank capacity, EV stats) +-- Features: query pushdown, EPA mileage ratings, range data +-- ============================================================ +create foreign table mileages ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + fuel_tank_capacity text, + combined_mpg integer, + epa_city_mpg integer, + epa_highway_mpg integer, + range_city integer, + range_highway integer, + attrs jsonb +) + server carapi + options ( + endpoint '/mileages/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 7: exterior_colors +-- Paint colors with RGB values +-- Features: query pushdown, color name + RGB string +-- ============================================================ +create foreign table exterior_colors ( + id integer, + year integer, + make text, + model text, + submodel text, + trim text, + color text, + rgb text, + attrs jsonb +) + server carapi + options ( + endpoint '/exterior-colors/v2', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 8: obd_codes +-- OBD-II diagnostic trouble codes +-- Features: small dataset on free tier, code + description +-- ============================================================ +create foreign table obd_codes ( + code text, + description text, + attrs jsonb +) + server carapi + options ( + endpoint '/obd-codes', + rowid_column 'code' + ); + +-- ============================================================ +-- Table 9: makes_debug +-- Same as makes but on the debug server +-- Features: debug output in INFO messages +-- ============================================================ +create foreign table makes_debug ( + id integer, + name text, + attrs jsonb +) + server carapi_debug + options ( + endpoint '/makes/v2', + rowid_column 'id' + ); diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md new file mode 100644 index 00000000..b677faff --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md @@ -0,0 +1,489 @@ +# GitHub API Example + +Query the [GitHub REST API](https://docs.github.com/en/rest) using SQL. This example demonstrates bearer token authentication, page-based pagination, path parameter substitution, query parameter pushdown, and custom HTTP headers. + +## Server Configuration + +```sql +create server github + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.github.com', + api_key '', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/vnd.github+json', + headers '{"X-GitHub-Api-Version": "2022-11-28"}', + page_size '30', + page_size_param 'per_page' + ); +``` + +--- + +## 1. Your Profile + +Single object response. The FDW returns one row with your GitHub profile info. + +```sql +create foreign table my_profile ( + login text, + id bigint, + name text, + email text, + bio text, + public_repos integer, + public_gists integer, + followers integer, + following integer, + created_at timestamptz, + avatar_url text, + company text, + location text, + blog text, + attrs jsonb +) + server github + options ( + endpoint '/user' + ); +``` + +```sql +SELECT login, name, public_repos, followers +FROM my_profile; +``` + +| login | name | public_repos | followers | +| --- | --- | --- | --- | +| youruser | Your Name | 42 | 150 | + +> Your results will reflect your own GitHub profile. + +Full profile with bio, company, and timestamps: + +```sql +SELECT login, name, email, bio, company, location, blog, + public_repos, public_gists, followers, following, + created_at +FROM my_profile; +``` + +## 2. Your Repositories + +Paginated list of your repos. The FDW auto-detects page-based pagination via `Link` headers. + +```sql +create foreign table my_repos ( + id bigint, + name text, + full_name text, + description text, + private boolean, + fork boolean, + language text, + stargazers_count integer, + forks_count integer, + open_issues_count integer, + created_at timestamptz, + updated_at timestamptz, + pushed_at timestamptz, + html_url text, + default_branch text, + archived boolean, + type text, + sort text, + attrs jsonb +) + server github + options ( + endpoint '/user/repos', + rowid_column 'id' + ); +``` + +```sql +SELECT name, language, stargazers_count, fork +FROM my_repos +LIMIT 5; +``` + +| name | language | stargazers_count | fork | +| --- | --- | --- | --- | +| my-project | TypeScript | 24 | f | +| dotfiles | Shell | 3 | f | +| cool-app | Rust | 12 | f | +| some-fork | | 0 | t | +| api-client | Python | 8 | f | + +> Your results will reflect your own repositories. + +Filter with query pushdown: + +```sql +-- Pushes down to: GET /user/repos?type=owner&sort=updated +SELECT name, language, updated_at +FROM my_repos +WHERE type = 'owner' AND sort = 'updated' +LIMIT 5; +``` + +Full repo details with descriptions, URLs, and activity timestamps: + +```sql +SELECT name, description, language, private, archived, + stargazers_count, forks_count, open_issues_count, + default_branch, html_url, + created_at, updated_at, pushed_at +FROM my_repos +LIMIT 5; +``` + +## 3. Repository Detail (Path Parameters) + +Look up a specific repository. The `{owner}` and `{repo}` placeholders in the endpoint are replaced with values from your WHERE clause. + +```sql +create foreign table repo_detail ( + id bigint, + name text, + full_name text, + description text, + private boolean, + stargazers_count integer, + forks_count integer, + open_issues_count integer, + watchers_count integer, + language text, + default_branch text, + created_at timestamptz, + updated_at timestamptz, + license jsonb, + topics jsonb, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}' + ); +``` + +```sql +SELECT name, stargazers_count, forks_count, language +FROM repo_detail +WHERE owner = 'supabase' AND repo = 'wrappers'; +``` + +| name | stargazers_count | forks_count | language | +| --- | --- | --- | --- | +| wrappers | 811 | 92 | Rust | + +Full detail with license, topics, and watcher count: + +```sql +SELECT name, description, language, default_branch, + stargazers_count, forks_count, watchers_count, open_issues_count, + license->>'name' AS license, + topics, + created_at, updated_at +FROM repo_detail +WHERE owner = 'supabase' AND repo = 'wrappers'; +``` + +## 4. Repository Issues + +Issues for a repository. Two path parameters plus query pushdown for state filtering: + +```sql +create foreign table repo_issues ( + id bigint, + number integer, + title text, + state text, + body text, + created_at timestamptz, + updated_at timestamptz, + closed_at timestamptz, + comments integer, + user_col jsonb, + labels jsonb, + html_url text, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}/issues', + rowid_column 'id' + ); +``` + +```sql +SELECT number, title, state +FROM repo_issues +WHERE owner = 'supabase' AND repo = 'wrappers' +LIMIT 5; +``` + +| number | title | state | +| --- | --- | --- | +| 571 | chore(deps): bump aws-sdk-s3 from 1.109.0 to 1.112.0 in the cargo group across 1 directory | open | +| 549 | feat: add aggregate pushdown support via GetForeignUpperPaths | open | +| 472 | AWS Cognito wrapper, ERROR: HV000: unhandled error | open | +| 461 | Hubspot FDW requires API Keys which are deprecated | open | +| 459 | Auth0 FDW API Key | open | + +Filter by state: + +```sql +SELECT number, title, state +FROM repo_issues +WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'closed' +LIMIT 5; +``` + +Full issue details with body, timestamps, labels, and comment count: + +```sql +SELECT number, title, state, comments, + body, + user_col->>'login' AS author, + labels, + html_url, + created_at, updated_at, closed_at +FROM repo_issues +WHERE owner = 'supabase' AND repo = 'wrappers' +LIMIT 3; +``` + +## 5. Pull Requests + +Pull requests with state filtering via query pushdown: + +```sql +create foreign table repo_pulls ( + id bigint, + number integer, + title text, + state text, + draft boolean, + created_at timestamptz, + updated_at timestamptz, + merged_at timestamptz, + user_col jsonb, + head jsonb, + base jsonb, + html_url text, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}/pulls', + rowid_column 'id' + ); +``` + +```sql +SELECT number, title, state +FROM repo_pulls +WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'closed' +LIMIT 5; +``` + +| number | title | state | +| --- | --- | --- | +| 572 | docs(openapi): update wasm module checksum and improve docs | closed | +| 570 | chore(deps): bump time from 0.3.44 to 0.3.47 in the cargo group across 1 directory | closed | +| 569 | feat: add comprehensive AI assistant guide for Wrappers project | closed | +| 568 | chore(deps): bump bytes from 1.10.1 to 1.11.1 in the cargo group across 1 directory | closed | +| 567 | chore(deps): bump wasmtime from 36.0.3 to 36.0.5 in the cargo group across 1 directory | closed | + +PR details with draft status, branch info, and merge timestamp: + +```sql +SELECT number, title, state, draft, + user_col->>'login' AS author, + head->>'ref' AS source_branch, + base->>'ref' AS target_branch, + html_url, + created_at, merged_at +FROM repo_pulls +WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'closed' +LIMIT 5; +``` + +Open PRs only: + +```sql +SELECT number, title, draft, + user_col->>'login' AS author, + created_at +FROM repo_pulls +WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'open' +LIMIT 5; +``` + +## 6. Releases + +Paginated list of releases for a repository: + +```sql +create foreign table repo_releases ( + id bigint, + tag_name text, + name text, + body text, + draft boolean, + prerelease boolean, + created_at timestamptz, + published_at timestamptz, + author jsonb, + html_url text, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}/releases', + rowid_column 'id' + ); +``` + +```sql +SELECT tag_name, name, prerelease +FROM repo_releases +WHERE owner = 'supabase' AND repo = 'wrappers' +LIMIT 5; +``` + +| tag_name | name | prerelease | +| --- | --- | --- | +| wasm_openapi_fdw_v0.1.4 | wasm_openapi_fdw_v0.1.4 | f | +| wasm_snowflake_fdw_v0.2.1 | wasm_snowflake_fdw_v0.2.1 | f | +| wasm_infura_fdw_v0.1.0 | wasm_infura_fdw_v0.1.0 | f | +| wasm_clerk_fdw_v0.2.1 | wasm_clerk_fdw_v0.2.1 | f | +| v0.5.7 | v0.5.7 | f | + +Full release info with author, publish date, and release notes: + +```sql +SELECT tag_name, name, draft, prerelease, + author->>'login' AS author, + published_at, + left(body, 200) AS release_notes, + html_url +FROM repo_releases +WHERE owner = 'supabase' AND repo = 'wrappers' +LIMIT 3; +``` + +## 7. Search Repositories (Query Pushdown) + +When a WHERE clause references `q`, the FDW sends it as a query parameter to the `/search/repositories` endpoint. The FDW auto-detects the `items` wrapper key in the response. + +```sql +create foreign table search_repos ( + id bigint, + name text, + full_name text, + description text, + stargazers_count integer, + forks_count integer, + language text, + open_issues_count integer, + created_at timestamptz, + html_url text, + topics jsonb, + license jsonb, + q text, + attrs jsonb +) + server github + options ( + endpoint '/search/repositories', + rowid_column 'id' + ); +``` + +```sql +-- Pushes down to: GET /search/repositories?q=openapi+foreign+data+wrapper +SELECT name, full_name, stargazers_count +FROM search_repos +WHERE q = 'openapi foreign data wrapper' +LIMIT 5; +``` + +| name | full_name | stargazers_count | +| --- | --- | --- | +| openapi_fdw | sabino/openapi_fdw | 2 | +| openapi-fdw | user/openapi-fdw | 1 | +| fdw-api | user/fdw-api | 0 | + +Search with full detail — description, license, topics, and timestamps: + +```sql +SELECT name, full_name, description, language, + stargazers_count, forks_count, open_issues_count, + license->>'name' AS license, + topics, + html_url, created_at +FROM search_repos +WHERE q = 'postgres foreign data wrapper rust' +LIMIT 5; +``` + +## 8. Debug Mode + +The `search_repos_debug` table uses the `github_debug` server which has `debug 'true'`. This emits HTTP request details as PostgreSQL INFO messages. + +```sql +SELECT id FROM search_repos_debug WHERE q = 'supabase' LIMIT 1; +``` + +Look for INFO output like: + +```log +INFO: [openapi_fdw] HTTP GET https://api.github.com/search/repositories?per_page=30&q=supabase -> 200 (176333 bytes) +INFO: [openapi_fdw] Scan complete: 1 rows, 2 columns +``` + +## The `attrs` Column + +Every table includes an `attrs jsonb` column that captures all fields not mapped to named columns: + +```sql +SELECT name, attrs->>'visibility' AS visibility, + attrs->>'has_wiki' AS has_wiki +FROM my_repos +LIMIT 3; +``` + +| name | visibility | has_wiki | +| --- | --- | --- | +| my-project | public | true | +| dotfiles | public | false | +| cool-app | public | true | + +## Features Demonstrated + +| Feature | Table(s) | +| --- | --- | +| Bearer token auth (Authorization header) | All tables | +| Custom HTTP headers (X-GitHub-Api-Version) | All tables | +| Page-based pagination (auto-detected) | `my_repos`, `repo_issues`, `repo_pulls`, `repo_releases`, `search_repos` | +| Path parameter substitution | `repo_detail`, `repo_issues`, `repo_pulls`, `repo_releases` | +| Query parameter pushdown | `my_repos` (`type`, `sort`), `repo_issues` (`state`), `repo_pulls` (`state`), `search_repos` (`q`) | +| Single object response | `my_profile`, `repo_detail` | +| Auto-detected wrapper key (`items`) | `search_repos`, `search_repos_debug` | +| Type coercion (timestamptz, boolean, bigint) | All tables | +| Debug mode | `search_repos_debug` | +| `attrs` catch-all column | All tables | +| `rowid_column` | `my_repos`, `repo_issues`, `repo_pulls`, `repo_releases`, `search_repos` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql new file mode 100644 index 00000000..cd56bcbd --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql @@ -0,0 +1,282 @@ +-- OpenAPI FDW example: GitHub API +-- Requires a GitHub personal access token (set GITHUB_TOKEN env var). +-- See: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens +-- Note: fdw_package_url uses file:// for local Docker testing. In production, use the +-- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm + +-- Create supabase_admin role if it doesn't exist (required by wrappers extension) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_admin') THEN + CREATE ROLE supabase_admin WITH SUPERUSER CREATEDB CREATEROLE LOGIN PASSWORD 'postgres'; + END IF; +END +$$; + +create schema if not exists extensions; +create extension if not exists wrappers with schema extensions; + +set search_path to public, extensions; + +create foreign data wrapper wasm_wrapper + handler wasm_fdw_handler + validator wasm_fdw_validator; + +-- ============================================================ +-- Server 1: github — Main GitHub API server +-- Bearer token auth via Authorization header (default behavior) +-- Custom headers for GitHub API versioning +-- ============================================================ +create server github + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.github.com', + api_key 'placeholder', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/vnd.github+json', + headers '{"X-GitHub-Api-Version": "2022-11-28"}', + page_size '30', + page_size_param 'per_page' + ); + +-- ============================================================ +-- Server 2: github_debug — Same API with debug output +-- Emits HTTP request details as INFO messages +-- ============================================================ +create server github_debug + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.github.com', + api_key 'placeholder', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/vnd.github+json', + headers '{"X-GitHub-Api-Version": "2022-11-28"}', + page_size '30', + page_size_param 'per_page', + debug 'true' + ); + +-- ============================================================ +-- Table 1: my_profile +-- Authenticated user's profile — GET /user +-- Features: single object response, bearer token auth, custom headers +-- ============================================================ +create foreign table my_profile ( + login text, + id bigint, + name text, + email text, + bio text, + public_repos integer, + public_gists integer, + followers integer, + following integer, + created_at timestamptz, + avatar_url text, + company text, + location text, + blog text, + attrs jsonb +) + server github + options ( + endpoint '/user' + ); + +-- ============================================================ +-- Table 2: my_repos +-- Authenticated user's repositories — GET /user/repos +-- Features: page-based pagination (auto-detected), query pushdown (type, sort) +-- ============================================================ +create foreign table my_repos ( + id bigint, + name text, + full_name text, + description text, + private boolean, + fork boolean, + language text, + stargazers_count integer, + forks_count integer, + open_issues_count integer, + created_at timestamptz, + updated_at timestamptz, + pushed_at timestamptz, + html_url text, + default_branch text, + archived boolean, + type text, + sort text, + attrs jsonb +) + server github + options ( + endpoint '/user/repos', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 3: repo_detail +-- Full repository metadata — GET /repos/{owner}/{repo} +-- Features: two path parameters, single object response +-- ============================================================ +create foreign table repo_detail ( + id bigint, + name text, + full_name text, + description text, + private boolean, + stargazers_count integer, + forks_count integer, + open_issues_count integer, + watchers_count integer, + language text, + default_branch text, + created_at timestamptz, + updated_at timestamptz, + license jsonb, + topics jsonb, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}' + ); + +-- ============================================================ +-- Table 4: repo_issues +-- Repository issues — GET /repos/{owner}/{repo}/issues +-- Features: two path parameters, page-based pagination, +-- query pushdown (state), timestamptz coercion +-- ============================================================ +create foreign table repo_issues ( + id bigint, + number integer, + title text, + state text, + body text, + created_at timestamptz, + updated_at timestamptz, + closed_at timestamptz, + comments integer, + user_col jsonb, + labels jsonb, + html_url text, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}/issues', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 5: repo_pulls +-- Repository pull requests — GET /repos/{owner}/{repo}/pulls +-- Features: two path parameters, page-based pagination, +-- query pushdown (state), boolean + timestamptz coercion +-- ============================================================ +create foreign table repo_pulls ( + id bigint, + number integer, + title text, + state text, + draft boolean, + created_at timestamptz, + updated_at timestamptz, + merged_at timestamptz, + user_col jsonb, + head jsonb, + base jsonb, + html_url text, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}/pulls', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 6: repo_releases +-- Repository releases — GET /repos/{owner}/{repo}/releases +-- Features: two path parameters, page-based pagination +-- ============================================================ +create foreign table repo_releases ( + id bigint, + tag_name text, + name text, + body text, + draft boolean, + prerelease boolean, + created_at timestamptz, + published_at timestamptz, + author jsonb, + html_url text, + owner text, + repo text, + attrs jsonb +) + server github + options ( + endpoint '/repos/{owner}/{repo}/releases', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 7: search_repos +-- Search repositories — GET /search/repositories +-- Features: query pushdown (q), auto-detected "items" wrapper key +-- ============================================================ +create foreign table search_repos ( + id bigint, + name text, + full_name text, + description text, + stargazers_count integer, + forks_count integer, + language text, + open_issues_count integer, + created_at timestamptz, + html_url text, + topics jsonb, + license jsonb, + q text, + attrs jsonb +) + server github + options ( + endpoint '/search/repositories', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 8: search_repos_debug +-- Same as search_repos but on the debug server +-- Features: debug output in INFO messages +-- ============================================================ +create foreign table search_repos_debug ( + id bigint, + name text, + full_name text, + stargazers_count integer, + q text, + attrs jsonb +) + server github_debug + options ( + endpoint '/search/repositories', + rowid_column 'id' + ); diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md new file mode 100644 index 00000000..b35be986 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md @@ -0,0 +1,425 @@ +# NWS Weather API Example + +Query the [National Weather Service API](https://www.weather.gov/documentation/services-web-api) using SQL. This example exercises all major features of the OpenAPI FDW against a real, free, no-auth API. + +## Server Configuration + +```sql +create server nws + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/geo+json' + ); +``` + +--- + +## 1. Weather Stations + +Fetches the full list of US weather stations. Demonstrates **GeoJSON extraction** (`response_path` + `object_path`), **cursor-based pagination** (`cursor_path`), and **camelCase-to-snake_case** column matching (`stationIdentifier` → `station_identifier`). + +```sql +create foreign table stations ( + station_identifier text, + name text, + time_zone text, + elevation jsonb, + attrs jsonb +) + server nws + options ( + endpoint '/stations', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier', + cursor_path '/pagination/next', + page_size '50', + page_size_param 'limit' + ); +``` + +```sql +SELECT station_identifier, name, time_zone +FROM stations +LIMIT 5; +``` + +| station_identifier | name | time_zone | +| --- | --- | --- | +| 0007W | Montford Middle | America/New_York | +| 000PG | Southside Road | America/Los_Angeles | +| 000SE | SCE South Hills Park | America/Los_Angeles | +| 001AS | Poloa_Wx | Pacific/Pago_Pago | +| 001BH | Tilford | America/Denver | + +The `stations` table paginates automatically — the FDW follows `/pagination/next` cursors. Try fetching more: + +```sql +SELECT count(*) FROM stations; +``` + +The `elevation` column is `jsonb` because the API returns a structured object with value and unit: + +```sql +SELECT station_identifier, name, elevation +FROM stations +LIMIT 3; +``` + +| station_identifier | name | elevation | +| --- | --- | --- | +| 0007W | Montford Middle | `{"value": 49.0728, "unitCode": "wmoUnit:m"}` | +| 000PG | Southside Road | `{"value": 129.2352, "unitCode": "wmoUnit:m"}` | +| 000SE | SCE South Hills Park | `{"value": 242.9256, "unitCode": "wmoUnit:m"}` | + +## 2. Active Alerts + +Different GeoJSON shape with **timestamptz coercion** for `onset` and `expires` columns. + +```sql +create foreign table active_alerts ( + id text, + area_desc text, + severity text, + certainty text, + event text, + headline text, + onset timestamptz, + expires timestamptz, + attrs jsonb +) + server nws + options ( + endpoint '/alerts/active', + response_path '/features', + object_path '/properties', + rowid_column 'id' + ); +``` + +```sql +SELECT event, severity, headline, onset, expires +FROM active_alerts +LIMIT 5; +``` + +| event | severity | headline | onset | expires | +| --- | --- | --- | --- | --- | +| Flash Flood Warning | Severe | Flash Flood Warning issued February 13 at 10:07PM CST… | 2026-02-14 04:07:00+00 | 2026-02-14 05:30:00+00 | +| Small Craft Advisory | Minor | Small Craft Advisory issued February 13 at 11:03PM EST… | 2026-02-15 06:00:00+00 | 2026-02-14 18:15:00+00 | + +Full alert details with area, certainty, and timing: + +```sql +SELECT id, event, severity, certainty, area_desc, + headline, onset, expires +FROM active_alerts +LIMIT 5; +``` + +Filter in SQL after fetching: + +```sql +SELECT event, severity, headline +FROM active_alerts +WHERE severity IN ('Severe', 'Extreme') +LIMIT 10; +``` + +## 3. Query Param Pushdown (severity filter) + +When a WHERE clause references a column that isn't a path parameter, the FDW sends it as a **query parameter** to the API. The NWS alerts endpoint supports a `severity` filter — and because it echoes `severity` back in every response object, the column is populated naturally: + +```sql +-- Pushes down to: GET /alerts/active?severity=Severe +SELECT event, severity, headline +FROM active_alerts +WHERE severity = 'Severe' +LIMIT 3; +``` + +| event | severity | headline | +| --- | --- | --- | +| Flash Flood Warning | Severe | Flash Flood Warning issued February 13 at 10:07PM CST… | +| Severe Thunderstorm Warning | Severe | Severe Thunderstorm Warning issued February 13 at 10:02PM CST… | +| Winter Storm Watch | Severe | Winter Storm Watch issued February 13 at 7:52PM PST… | + +Try other severity values: `Extreme`, `Moderate`, `Minor`, `Unknown`. + +## 4. Station Observations + +**Path parameter substitution**: the `{station_id}` placeholder in the endpoint is replaced with the value from your WHERE clause. + +```sql +create foreign table station_observations ( + timestamp timestamptz, + text_description text, + temperature jsonb, + wind_speed jsonb, + wind_direction jsonb, + station_id text, + attrs jsonb +) + server nws + options ( + endpoint '/stations/{station_id}/observations', + response_path '/features', + object_path '/properties' + ); +``` + +```sql +-- Pushes down to: GET /stations/KDEN/observations +SELECT timestamp, text_description, temperature +FROM station_observations +WHERE station_id = 'KDEN' +LIMIT 3; +``` + +| timestamp | text_description | temperature | +| --- | --- | --- | +| 2026-02-14 03:45:00+00 | Cloudy | `{"value": 7, "unitCode": "wmoUnit:degC", "qualityControl": "V"}` | +| 2026-02-14 03:40:00+00 | Cloudy | `{"value": 7, "unitCode": "wmoUnit:degC", "qualityControl": "V"}` | +| 2026-02-14 03:35:00+00 | Cloudy | `{"value": 8, "unitCode": "wmoUnit:degC", "qualityControl": "V"}` | + +`KDEN` is Denver International Airport. Try other station IDs: `KJFK` (New York), `KLAX` (Los Angeles), `KORD` (Chicago). + +Temperature and wind values are `jsonb` because the NWS returns them as objects with unit and value: + +```sql +SELECT timestamp, + temperature->>'value' AS temp_c, + wind_speed->>'value' AS wind_mps, + text_description +FROM station_observations +WHERE station_id = 'KDEN' +LIMIT 3; +``` + +| timestamp | temp_c | wind_mps | text_description | +| --- | --- | --- | --- | +| 2026-02-14 03:45:00+00 | 7 | 24.084 | Cloudy | +| 2026-02-14 03:40:00+00 | 7 | 25.92 | Cloudy | +| 2026-02-14 03:35:00+00 | 8 | 25.92 | Cloudy | + +## 5. Current Conditions + +**Single object response** — the `/observations/latest` endpoint returns one GeoJSON Feature (not a FeatureCollection). The FDW auto-detects this and returns a single row. + +```sql +create foreign table latest_observation ( + text_description text, + temperature jsonb, + wind_speed jsonb, + wind_direction jsonb, + barometric_pressure jsonb, + relative_humidity jsonb, + station_id text, + attrs jsonb +) + server nws + options ( + endpoint '/stations/{station_id}/observations/latest', + object_path '/properties' + ); +``` + +```sql +SELECT text_description, + temperature->>'value' AS temp_c, + wind_speed->>'value' AS wind_mps, + wind_direction->>'value' AS wind_deg, + barometric_pressure->>'value' AS pressure_pa, + relative_humidity->>'value' AS humidity_pct +FROM latest_observation +WHERE station_id = 'KDEN'; +``` + +| text_description | temp_c | wind_mps | wind_deg | pressure_pa | humidity_pct | +| --- | --- | --- | --- | --- | --- | +| Cloudy | 7 | 24.084 | 310 | | 65.63 | + +## 6. Point Metadata & Forecast + +This two-step flow demonstrates **composite path parameters** and **nested response extraction**. + +```sql +create foreign table point_metadata ( + grid_id text, + grid_x integer, + grid_y integer, + forecast text, + forecast_hourly text, + relative_location jsonb, + point text, + attrs jsonb +) + server nws + options ( + endpoint '/points/{point}', + object_path '/properties' + ); +``` + +**Step 1:** Look up grid coordinates for a location (Denver: 39.7456,-104.9887): + +```sql +SELECT grid_id, grid_x, grid_y, forecast +FROM point_metadata +WHERE point = '39.7456,-104.9887'; +``` + +| grid_id | grid_x | grid_y | forecast | +| --- | --- | --- | --- | +| BOU | 63 | 62 | | + +The point metadata includes more detail than just the grid coordinates: + +```sql +SELECT grid_id, grid_x, grid_y, + forecast, forecast_hourly, + relative_location->>'city' AS city, + relative_location->>'state' AS state +FROM point_metadata +WHERE point = '39.7456,-104.9887'; +``` + +**Step 2:** Use those grid coordinates to fetch the forecast. This exercises **multiple path parameters** (`wfo`, `x`, `y`) and **nested `response_path`** (`/properties/periods` digs two levels into the response): + +```sql +create foreign table forecast_periods ( + number integer, + name text, + start_time timestamptz, + end_time timestamptz, + is_daytime boolean, + temperature integer, + temperature_unit text, + wind_speed text, + wind_direction text, + short_forecast text, + detailed_forecast text, + wfo text, + x text, + y text, + attrs jsonb +) + server nws + options ( + endpoint '/gridpoints/{wfo}/{x},{y}/forecast', + response_path '/properties/periods' + ); +``` + +```sql +-- Replace wfo/x/y with values from Step 1 +SELECT name, temperature, temperature_unit, + is_daytime, wind_speed, short_forecast +FROM forecast_periods +WHERE wfo = 'BOU' AND x = '63' AND y = '62'; +``` + +| name | temperature | temperature_unit | is_daytime | wind_speed | short_forecast | +| --- | --- | --- | --- | --- | --- | +| Tonight | 35 | F | false | 3 to 7 mph | Rain Showers Likely | +| Saturday | 57 | F | true | 6 mph | Sunny | +| Saturday Night | 31 | F | false | 5 mph | Mostly Clear | +| Sunday | 66 | F | true | 6 mph | Mostly Sunny | + +Full forecast with timing, wind, and detailed text: + +```sql +SELECT number, name, start_time, end_time, + is_daytime, temperature, temperature_unit, + wind_speed, wind_direction, + short_forecast, detailed_forecast +FROM forecast_periods +WHERE wfo = 'BOU' AND x = '63' AND y = '62'; +``` + +> Grid coordinates vary by location. Always use Step 1 to find the right values for your area. + +## 7. IMPORT FOREIGN SCHEMA + +Auto-generate table definitions from the NWS OpenAPI spec. The `nws_import` server has a `spec_url` configured. + +```sql +CREATE SCHEMA IF NOT EXISTS nws_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER nws_import +INTO nws_auto; +``` + +See what was generated: + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'nws_auto'; +``` + +Pick a generated table and query it: + +```sql +SELECT * FROM nws_auto.alerts LIMIT 3; +``` + +## 8. Debug Mode + +The `stations_debug` table uses the `nws_debug` server which has `debug 'true'`. This emits HTTP request details (method, URL, status, response size) and scan statistics (row/column counts) as PostgreSQL INFO messages. + +```sql +SELECT station_identifier, name +FROM stations_debug +LIMIT 5; +``` + +Look for INFO output like: + +```log +INFO: [openapi_fdw] HTTP GET https://api.weather.gov/stations?limit=50 -> 200 (51639 bytes) +INFO: [openapi_fdw] Scan complete: 5 rows, 2 columns +``` + +## 9. The `attrs` Column + +Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column. + +```sql +SELECT station_identifier, attrs->>'county' AS county +FROM stations +LIMIT 5; +``` + +| station_identifier | county | +| --- | --- | +| 0007W | | +| 000PG | | +| 000SE | | +| 001AS | | +| 001BH | | + +## Features Demonstrated + +| Feature | Table(s) | +| --- | --- | +| GeoJSON extraction (`response_path` + `object_path`) | `stations`, `active_alerts`, `station_observations` | +| Cursor-based pagination (`cursor_path`) | `stations` | +| Path parameter substitution | `station_observations`, `latest_observation`, `point_metadata`, `forecast_periods` | +| Query parameter pushdown | `active_alerts` (with `WHERE severity = ...`) | +| camelCase → snake_case matching | All tables | +| Custom headers (`user_agent`, `accept`) | All servers | +| LIMIT pushdown | Any table with `LIMIT` | +| Debug mode (`debug`) | `stations_debug` | +| IMPORT FOREIGN SCHEMA | `nws_import` server | +| Single object response | `latest_observation`, `point_metadata` | +| Type coercion (timestamptz, jsonb, boolean, integer) | `active_alerts`, `forecast_periods` | +| `attrs` catch-all column | All tables | +| Multiple path parameters | `forecast_periods` | +| Nested response extraction (JSON pointer) | `forecast_periods` | +| `rowid_column` | `stations`, `active_alerts` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/nws/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/nws/init.sql new file mode 100644 index 00000000..26803b11 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/nws/init.sql @@ -0,0 +1,231 @@ +-- OpenAPI FDW example: National Weather Service API +-- All queries hit the live NWS API (no auth required). +-- Note: fdw_package_url uses file:// for local Docker testing. In production, use the +-- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm + +-- Create supabase_admin role if it doesn't exist (required by wrappers extension) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_admin') THEN + CREATE ROLE supabase_admin WITH SUPERUSER CREATEDB CREATEROLE LOGIN PASSWORD 'postgres'; + END IF; +END +$$; + +create schema if not exists extensions; +create extension if not exists wrappers with schema extensions; + +set search_path to public, extensions; + +create foreign data wrapper wasm_wrapper + handler wasm_fdw_handler + validator wasm_fdw_validator; + +-- ============================================================ +-- Server 1: nws — Main NWS API server +-- ============================================================ +create server nws + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/geo+json' + ); + +-- ============================================================ +-- Server 2: nws_debug — Same API with debug output +-- ============================================================ +create server nws_debug + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/geo+json', + debug 'true' + ); + +-- ============================================================ +-- Server 3: nws_import — With spec_url for IMPORT FOREIGN SCHEMA +-- ============================================================ +create server nws_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/geo+json', + spec_url 'https://api.weather.gov/openapi.json', + page_size '50', + page_size_param 'limit' + ); + +-- ============================================================ +-- Table 1: stations +-- GeoJSON FeatureCollection with cursor-based pagination +-- Features: response_path, object_path, cursor_path, page_size, +-- rowid_column, camelCase matching, attrs catch-all +-- ============================================================ +create foreign table stations ( + station_identifier text, + name text, + time_zone text, + elevation jsonb, + attrs jsonb +) + server nws + options ( + endpoint '/stations', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier', + cursor_path '/pagination/next', + page_size '50', + page_size_param 'limit' + ); + +-- ============================================================ +-- Table 2: active_alerts +-- GeoJSON with different column shape, timestamptz coercion +-- Features: timestamp type coercion, severity/certainty columns +-- ============================================================ +create foreign table active_alerts ( + id text, + area_desc text, + severity text, + certainty text, + event text, + headline text, + onset timestamptz, + expires timestamptz, + attrs jsonb +) + server nws + options ( + endpoint '/alerts/active', + response_path '/features', + object_path '/properties', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 3: station_observations +-- Path parameter substitution: /stations/{station_id}/observations +-- Features: path param, GeoJSON, observation data as jsonb +-- ============================================================ +create foreign table station_observations ( + timestamp timestamptz, + text_description text, + temperature jsonb, + wind_speed jsonb, + wind_direction jsonb, + station_id text, + attrs jsonb +) + server nws + options ( + endpoint '/stations/{station_id}/observations', + response_path '/features', + object_path '/properties' + ); + +-- ============================================================ +-- Table 4: latest_observation +-- Single object response (GeoJSON Feature, not FeatureCollection) +-- Features: path param, single object, object_path extraction +-- ============================================================ +create foreign table latest_observation ( + text_description text, + temperature jsonb, + wind_speed jsonb, + wind_direction jsonb, + barometric_pressure jsonb, + relative_humidity jsonb, + station_id text, + attrs jsonb +) + server nws + options ( + endpoint '/stations/{station_id}/observations/latest', + object_path '/properties' + ); + +-- ============================================================ +-- Table 5: point_metadata +-- Composite path parameter: lat,lon as a single value +-- Features: single object, grid coordinate lookup +-- ============================================================ +create foreign table point_metadata ( + grid_id text, + grid_x integer, + grid_y integer, + forecast text, + forecast_hourly text, + relative_location jsonb, + point text, + attrs jsonb +) + server nws + options ( + endpoint '/points/{point}', + object_path '/properties' + ); + +-- ============================================================ +-- Table 6: forecast_periods +-- Multiple path params + nested response extraction +-- Features: 3 path params, response_path into nested array, +-- boolean coercion, integer temperature +-- ============================================================ +create foreign table forecast_periods ( + number integer, + name text, + start_time timestamptz, + end_time timestamptz, + is_daytime boolean, + temperature integer, + temperature_unit text, + wind_speed text, + wind_direction text, + short_forecast text, + detailed_forecast text, + wfo text, + x text, + y text, + attrs jsonb +) + server nws + options ( + endpoint '/gridpoints/{wfo}/{x},{y}/forecast', + response_path '/properties/periods' + ); + +-- ============================================================ +-- Table 7: stations_debug +-- Same as stations but on the debug server +-- Features: debug output in INFO messages +-- ============================================================ +create foreign table stations_debug ( + station_identifier text, + name text, + time_zone text, + elevation jsonb, + attrs jsonb +) + server nws_debug + options ( + endpoint '/stations', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier', + cursor_path '/pagination/next', + page_size '50', + page_size_param 'limit' + ); diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md new file mode 100644 index 00000000..f4d73c12 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md @@ -0,0 +1,386 @@ +# PokéAPI Example + +Query the [PokéAPI](https://pokeapi.co/) using SQL. This example demonstrates the OpenAPI FDW against a free, no-auth API with **offset-based pagination** and auto-detected `results` wrapper key. + +## Server Configuration + +```sql +create server pokeapi + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://pokeapi.co/api/v2', + page_size '20', + page_size_param 'limit' + ); +``` + +--- + +## 1. Pokemon List + +Fetches the paginated list of all Pokemon (~1350 entries). Demonstrates **offset-based pagination** with auto-detected `results` wrapper key and `limit` page size parameter. The FDW automatically follows the `next` URL in each response to fetch subsequent pages. + +```sql +create foreign table pokemon ( + name text, + url text, + attrs jsonb +) + server pokeapi + options ( + endpoint '/pokemon', + rowid_column 'name' + ); +``` + +```sql +SELECT name, url +FROM pokemon +LIMIT 5; +``` + +| name | url | +| --- | --- | +| bulbasaur | | +| ivysaur | | +| venusaur | | +| charmander | | +| charmeleon | | + +List endpoints only return `name` and `url` pairs. Use the detail table to get full data for a specific Pokemon. + +## 2. Pokemon Detail + +**Path parameter substitution**: the `{name}` placeholder in the endpoint is replaced with the value from your WHERE clause. Returns a single object with full Pokemon data. + +```sql +create foreign table pokemon_detail ( + id integer, + name text, + height integer, + weight integer, + base_experience integer, + is_default boolean, + order_num integer, + abilities jsonb, + types jsonb, + stats jsonb, + moves jsonb, + sprites jsonb, + attrs jsonb +) + server pokeapi + options ( + endpoint '/pokemon/{name}', + rowid_column 'id' + ); +``` + +```sql +SELECT id, name, height, weight, base_experience, is_default +FROM pokemon_detail +WHERE name = 'pikachu'; +``` + +| id | name | height | weight | base_experience | is_default | +| --- | --- | --- | --- | --- | --- | +| 25 | pikachu | 4 | 60 | 112 | t | + +Complex nested data like abilities, types, and stats are returned as `jsonb`: + +```sql +SELECT name, types, abilities +FROM pokemon_detail +WHERE name = 'charizard'; +``` + +| name | types | abilities | +| --- | --- | --- | +| charizard | `[{"slot":1,"type":{"name":"fire","url":"..."}},{"slot":2,"type":{"name":"flying","url":"..."}}]` | `[{"slot":1,"ability":{"name":"blaze","url":"..."},"is_hidden":false},...]` | + +Extract specific fields from the jsonb columns: + +```sql +SELECT name, + sprites->>'front_default' AS sprite_url +FROM pokemon_detail +WHERE name = 'eevee'; +``` + +Extract base stats from the jsonb column: + +```sql +SELECT name, height, weight, + stats->0->>'base_stat' AS hp, + stats->1->>'base_stat' AS attack, + stats->2->>'base_stat' AS defense, + stats->3->>'base_stat' AS sp_attack, + stats->4->>'base_stat' AS sp_defense, + stats->5->>'base_stat' AS speed +FROM pokemon_detail +WHERE name = 'pikachu'; +``` + +| name | height | weight | hp | attack | defense | sp_attack | sp_defense | speed | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| pikachu | 4 | 60 | 35 | 55 | 40 | 50 | 50 | 90 | + +Compare two Pokemon side by side: + +```sql +SELECT name, height, weight, base_experience, is_default, + types, abilities +FROM pokemon_detail +WHERE name IN ('charizard', 'blastoise'); +``` + +Try other Pokemon: `bulbasaur`, `charizard`, `mewtwo`, `snorlax`, `gengar`. + +## 3. Types List + +Fetches all Pokemon types. With only 21 types, this fits within a single page (page size is 20, so it takes two small fetches). + +```sql +create foreign table types ( + name text, + url text, + attrs jsonb +) + server pokeapi + options ( + endpoint '/type', + rowid_column 'name' + ); +``` + +```sql +SELECT name, url +FROM types; +``` + +| name | url | +| --- | --- | +| normal | | +| fighting | | +| flying | | +| poison | | +| ground | | +| rock | | +| bug | | +| ghost | | +| steel | | +| fire | | +| water | | +| grass | | +| electric | | +| psychic | | +| ice | | +| dragon | | +| dark | | +| fairy | | +| stellar | | +| unknown | | +| shadow | | + +## 4. Type Detail + +Detailed information about a single type, including **damage relations** (strengths and weaknesses) and a list of all Pokemon of that type. + +```sql +create foreign table type_detail ( + id integer, + name text, + damage_relations jsonb, + pokemon jsonb, + moves jsonb, + attrs jsonb +) + server pokeapi + options ( + endpoint '/type/{name}', + rowid_column 'id' + ); +``` + +```sql +SELECT id, name, damage_relations +FROM type_detail +WHERE name = 'fire'; +``` + +| id | name | damage_relations | +| --- | --- | --- | +| 10 | fire | `{"double_damage_to":[{"name":"grass","url":"..."},{"name":"ice","url":"..."},{"name":"bug","url":"..."},{"name":"steel","url":"..."}],"half_damage_from":[{"name":"fire","url":"..."},...],...}` | + +The `damage_relations` jsonb column contains the full type effectiveness chart. Extract specific matchups: + +```sql +SELECT name, + damage_relations->'double_damage_to' AS super_effective_against +FROM type_detail +WHERE name = 'fire'; +``` + +Get the list of all Pokemon for a given type: + +```sql +SELECT name, + damage_relations->'double_damage_to' AS super_effective, + damage_relations->'half_damage_from' AS resists, + pokemon, + moves +FROM type_detail +WHERE name = 'dragon'; +``` + +Try other types: `water`, `electric`, `dragon`, `fairy`, `ghost`. + +## 5. Berries List + +Fetches all berries (64 items). Demonstrates pagination across multiple pages. + +```sql +create foreign table berries ( + name text, + url text, + attrs jsonb +) + server pokeapi + options ( + endpoint '/berry', + rowid_column 'name' + ); +``` + +```sql +SELECT name, url +FROM berries +LIMIT 5; +``` + +| name | url | +| --- | --- | +| cheri | | +| chesto | | +| pecha | | +| rawst | | +| aspear | | + +## 6. Berry Detail + +Detailed information about a single berry, including growth data, flavors, and natural gift properties. + +```sql +create foreign table berry_detail ( + id integer, + name text, + growth_time integer, + max_harvest integer, + natural_gift_power integer, + size integer, + smoothness integer, + soil_dryness integer, + firmness jsonb, + flavors jsonb, + natural_gift_type jsonb, + attrs jsonb +) + server pokeapi + options ( + endpoint '/berry/{name}', + rowid_column 'id' + ); +``` + +```sql +SELECT id, name, growth_time, max_harvest, natural_gift_power, + size, smoothness, soil_dryness +FROM berry_detail +WHERE name = 'cheri'; +``` + +| id | name | growth_time | max_harvest | natural_gift_power | size | smoothness | soil_dryness | +| --- | --- | --- | --- | --- | --- | --- | --- | +| 1 | cheri | 3 | 5 | 60 | 20 | 25 | 15 | + +Complex data like firmness, flavors, and natural gift type are returned as `jsonb`: + +```sql +SELECT name, + firmness->>'name' AS firmness, + natural_gift_type->>'name' AS gift_type +FROM berry_detail +WHERE name = 'cheri'; +``` + +| name | firmness | gift_type | +| --- | --- | --- | +| cheri | soft | fire | + +Extract all flavor profiles from the jsonb column: + +```sql +SELECT name, growth_time, max_harvest, + firmness->>'name' AS firmness, + natural_gift_type->>'name' AS gift_type, + natural_gift_power, + flavors +FROM berry_detail +WHERE name = 'sitrus'; +``` + +Try other berries: `chesto`, `pecha`, `rawst`, `aspear`, `leppa`, `oran`, `sitrus`. + +## 7. Debug Mode + +The `pokemon_debug` table uses the `pokeapi_debug` server which has `debug 'true'`. This emits HTTP request details (method, URL, status, response size) and scan statistics as PostgreSQL INFO messages. + +```sql +SELECT name, url +FROM pokemon_debug +LIMIT 3; +``` + +Look for INFO output like: + +```log +INFO: [openapi_fdw] HTTP GET https://pokeapi.co/api/v2/pokemon?limit=20 -> 200 (1416 bytes) +INFO: [openapi_fdw] Scan complete: 3 rows, 1 columns +``` + +## 8. The `attrs` Column + +Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column upfront. + +For list endpoints, `attrs` will be mostly empty since the API only returns `name` and `url`. For detail endpoints, `attrs` captures the remaining fields: + +```sql +SELECT name, + attrs->>'location_area_encounters' AS encounters_url +FROM pokemon_detail +WHERE name = 'pikachu'; +``` + +| name | encounters_url | +| --- | --- | +| pikachu | | + +## Features Demonstrated + +| Feature | Table(s) | +| --- | --- | +| Offset-based pagination (auto-followed `next` URL) | `pokemon`, `types`, `berries` | +| Auto-detected `results` wrapper key | All list tables | +| Path parameter substitution | `pokemon_detail`, `type_detail`, `berry_detail` | +| Single object response | `pokemon_detail`, `type_detail`, `berry_detail` | +| Integer type coercion | `pokemon_detail`, `berry_detail` | +| Boolean type coercion | `pokemon_detail` | +| JSONB for complex nested data | `pokemon_detail`, `type_detail`, `berry_detail` | +| LIMIT pushdown | Any table with `LIMIT` | +| Debug mode (`debug`) | `pokemon_debug` | +| `attrs` catch-all column | All tables | +| `rowid_column` | All tables | +| No authentication required | All servers | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql new file mode 100644 index 00000000..41879850 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql @@ -0,0 +1,190 @@ +-- OpenAPI FDW example: PokéAPI +-- All queries hit the live PokéAPI (no auth required). +-- Note: fdw_package_url uses file:// for local Docker testing. In production, use the +-- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm + +-- Create supabase_admin role if it doesn't exist (required by wrappers extension) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_admin') THEN + CREATE ROLE supabase_admin WITH SUPERUSER CREATEDB CREATEROLE LOGIN PASSWORD 'postgres'; + END IF; +END +$$; + +create schema if not exists extensions; +create extension if not exists wrappers with schema extensions; + +set search_path to public, extensions; + +create foreign data wrapper wasm_wrapper + handler wasm_fdw_handler + validator wasm_fdw_validator; + +-- ============================================================ +-- Server 1: pokeapi — Main PokéAPI server +-- ============================================================ +create server pokeapi + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://pokeapi.co/api/v2', + page_size '20', + page_size_param 'limit' + ); + +-- ============================================================ +-- Server 2: pokeapi_debug — Same API with debug output +-- ============================================================ +create server pokeapi_debug + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://pokeapi.co/api/v2', + page_size '20', + page_size_param 'limit', + debug 'true' + ); + +-- ============================================================ +-- Table 1: pokemon +-- Paginated list of all Pokémon (~1350 items) +-- Features: offset-based pagination, auto-detected `results` +-- wrapper key, LIMIT pushdown +-- ============================================================ +create foreign table pokemon ( + name text, + url text, + attrs jsonb +) + server pokeapi + options ( + endpoint '/pokemon', + rowid_column 'name' + ); + +-- ============================================================ +-- Table 2: pokemon_detail +-- Single Pokémon detail via path parameter +-- Features: path param substitution (WHERE name = 'pikachu'), +-- single object response, integer/boolean coercion, +-- jsonb for complex nested data (abilities, types, etc.) +-- ============================================================ +create foreign table pokemon_detail ( + id integer, + name text, + height integer, + weight integer, + base_experience integer, + is_default boolean, + order_num integer, + abilities jsonb, + types jsonb, + stats jsonb, + moves jsonb, + sprites jsonb, + attrs jsonb +) + server pokeapi + options ( + endpoint '/pokemon/{name}', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 3: types +-- Paginated list of all Pokémon types (21 items) +-- Features: small paginated list, fits in a single page +-- ============================================================ +create foreign table types ( + name text, + url text, + attrs jsonb +) + server pokeapi + options ( + endpoint '/type', + rowid_column 'name' + ); + +-- ============================================================ +-- Table 4: type_detail +-- Single type detail via path parameter +-- Features: path param, damage relations as jsonb, +-- pokemon list per type +-- ============================================================ +create foreign table type_detail ( + id integer, + name text, + damage_relations jsonb, + pokemon jsonb, + moves jsonb, + attrs jsonb +) + server pokeapi + options ( + endpoint '/type/{name}', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 5: berries +-- Paginated list of all berries (64 items) +-- Features: offset-based pagination, auto-detected `results` +-- ============================================================ +create foreign table berries ( + name text, + url text, + attrs jsonb +) + server pokeapi + options ( + endpoint '/berry', + rowid_column 'name' + ); + +-- ============================================================ +-- Table 6: berry_detail +-- Single berry detail via path parameter +-- Features: path param, integer columns for growth/harvest data, +-- jsonb for firmness/flavors/natural_gift_type +-- ============================================================ +create foreign table berry_detail ( + id integer, + name text, + growth_time integer, + max_harvest integer, + natural_gift_power integer, + size integer, + smoothness integer, + soil_dryness integer, + firmness jsonb, + flavors jsonb, + natural_gift_type jsonb, + attrs jsonb +) + server pokeapi + options ( + endpoint '/berry/{name}', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 7: pokemon_debug +-- Same as pokemon but on the debug server +-- Features: debug output in INFO messages +-- ============================================================ +create foreign table pokemon_debug ( + name text, + url text, + attrs jsonb +) + server pokeapi_debug + options ( + endpoint '/pokemon', + rowid_column 'name' + ); diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md new file mode 100644 index 00000000..b59a957c --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md @@ -0,0 +1,493 @@ +# Threads API Example + +Query the [Meta Threads API](https://developers.facebook.com/docs/threads) using SQL. This example demonstrates authenticated API access, cursor-based pagination, path parameter substitution, and query param pushdown. + +## Server Configuration + +```sql +create server threads + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://graph.threads.net', + api_key '', + api_key_header 'access_token', + api_key_location 'query' + ); +``` + +--- + +## 1. Your Profile + +Single object response. The FDW returns one row with your Threads profile info. + +```sql +create foreign table my_profile ( + id text, + username text, + name text, + threads_profile_picture_url text, + threads_biography text, + is_verified boolean, + attrs jsonb +) + server threads + options ( + endpoint '/me?fields=id,username,name,threads_profile_picture_url,threads_biography,is_verified', + rowid_column 'id' + ); +``` + +```sql +SELECT username, name, threads_biography, is_verified +FROM my_profile; +``` + +| username | name | threads_biography | is_verified | +| --- | --- | --- | --- | +| youruser | Your Name | your bio here | false | + +> Your results will reflect your own Threads profile. + +## 2. Your Threads + +Paginated list of your posts. The FDW auto-detects the `data` wrapper key and follows cursor-based pagination (`paging.cursors.after`). + +```sql +create foreign table my_threads ( + id text, + media_type text, + text text, + permalink text, + username text, + timestamp timestamptz, + shortcode text, + is_quote_post boolean, + topic_tag text, + link_attachment_url text, + is_verified boolean, + attrs jsonb +) + server threads + options ( + endpoint '/me/threads?fields=id,media_type,media_product_type,text,permalink,username,timestamp,shortcode,is_quote_post,topic_tag,link_attachment_url,is_verified', + rowid_column 'id' + ); +``` + +```sql +SELECT id, text, media_type, timestamp +FROM my_threads +LIMIT 5; +``` + +| id | text | media_type | timestamp | +| --- | --- | --- | --- | +| 18555728842018816 | Your latest thread post text here... | TEXT_POST | 2026-02-12 04:46:47+00 | +| 18051838931694754 | | IMAGE | 2026-02-11 14:12:47+00 | +| 18099070105919840 | | REPOST_FACADE | 2026-02-09 00:20:23+00 | + +> Your results will reflect your own posts. + +Full thread details with permalink, shortcode, and quote/topic info: + +```sql +SELECT id, text, media_type, permalink, shortcode, + is_quote_post, topic_tag, link_attachment_url, + is_verified, timestamp +FROM my_threads +LIMIT 5; +``` + +Filter by time in SQL: + +```sql +SELECT text, timestamp, topic_tag +FROM my_threads +WHERE timestamp > '2024-01-01' +LIMIT 5; +``` + +Filter by media type after fetching: + +```sql +SELECT id, text, media_type, timestamp +FROM my_threads +WHERE media_type = 'TEXT_POST' +LIMIT 5; +``` + +## 3. Your Replies + +Same pagination pattern as threads, filtered to your replies: + +```sql +create foreign table my_replies ( + id text, + media_type text, + text text, + permalink text, + username text, + timestamp timestamptz, + shortcode text, + is_quote_post boolean, + has_replies boolean, + is_reply boolean, + attrs jsonb +) + server threads + options ( + endpoint '/me/replies?fields=id,media_type,text,permalink,username,timestamp,shortcode,is_quote_post,has_replies,is_reply', + rowid_column 'id' + ); +``` + +```sql +SELECT text, timestamp, is_reply, has_replies +FROM my_replies +LIMIT 5; +``` + +| text | timestamp | is_reply | has_replies | +| --- | --- | --- | --- | +| Your reply text here... | 2026-02-13 19:25:51+00 | true | false | +| Another reply... | 2026-02-13 19:22:01+00 | true | true | + +Full reply details with permalink, media type, and quote status: + +```sql +SELECT id, text, media_type, permalink, username, shortcode, + is_quote_post, has_replies, is_reply, timestamp +FROM my_replies +LIMIT 5; +``` + +## 4. Thread Detail (Path Parameter) + +Look up a specific thread by ID. The `{thread_id}` placeholder in the endpoint is replaced with the value from your WHERE clause. + +```sql +create foreign table thread_detail ( + id text, + media_type text, + text text, + permalink text, + username text, + timestamp timestamptz, + is_quote_post boolean, + has_replies boolean, + topic_tag text, + link_attachment_url text, + reply_audience text, + thread_id text, + attrs jsonb +) + server threads + options ( + endpoint '/{thread_id}?fields=id,media_type,text,permalink,username,timestamp,is_quote_post,has_replies,topic_tag,link_attachment_url,reply_audience', + rowid_column 'id' + ); +``` + +```sql +-- Get a thread ID from your posts first +SELECT id FROM my_threads LIMIT 1; + +-- Then fetch full details +SELECT text, media_type, timestamp, reply_audience +FROM thread_detail +WHERE thread_id = ''; +``` + +| text | media_type | timestamp | reply_audience | +| --- | --- | --- | --- | +| Your thread text... | TEXT_POST | 2026-02-12 04:46:47+00 | EVERYONE | + +## 5. Thread Replies + +Top-level replies to a specific thread. Requires `thread_id` path parameter: + +```sql +create foreign table thread_replies ( + id text, + text text, + username text, + permalink text, + timestamp timestamptz, + media_type text, + has_replies boolean, + is_reply boolean, + hide_status text, + is_verified boolean, + thread_id text, + attrs jsonb +) + server threads + options ( + endpoint '/{thread_id}/replies?fields=id,text,username,permalink,timestamp,media_type,has_replies,is_reply,hide_status,is_verified', + rowid_column 'id' + ); +``` + +```sql +SELECT username, text, timestamp, hide_status +FROM thread_replies +WHERE thread_id = '' +LIMIT 10; +``` + +Full reply metadata with permalink, media type, and verification status: + +```sql +SELECT id, username, text, media_type, permalink, + has_replies, is_reply, hide_status, is_verified, + timestamp +FROM thread_replies +WHERE thread_id = '' +LIMIT 10; +``` + +## 6. Thread Conversation + +All replies at all depths, flattened into a single list: + +```sql +create foreign table thread_conversation ( + id text, + text text, + username text, + permalink text, + timestamp timestamptz, + media_type text, + has_replies boolean, + is_reply boolean, + hide_status text, + thread_id text, + attrs jsonb +) + server threads + options ( + endpoint '/{thread_id}/conversation?fields=id,text,username,permalink,timestamp,media_type,has_replies,is_reply,hide_status&reverse=false', + rowid_column 'id' + ); +``` + +```sql +SELECT username, text, timestamp, is_reply +FROM thread_conversation +WHERE thread_id = '' +LIMIT 20; +``` + +Full conversation with media and reply chain info: + +```sql +SELECT id, username, text, media_type, permalink, + has_replies, is_reply, hide_status, + timestamp +FROM thread_conversation +WHERE thread_id = '' +LIMIT 20; +``` + +## 7. Keyword Search (Query Param Pushdown) + +When a WHERE clause references `q`, the FDW sends it as a query parameter to the `/keyword_search` endpoint. Requires the `threads_keyword_search` permission on your app. + +```sql +create foreign table keyword_search ( + id text, + text text, + media_type text, + permalink text, + username text, + timestamp timestamptz, + has_replies boolean, + is_quote_post boolean, + is_reply boolean, + topic_tag text, + q text, + attrs jsonb +) + server threads + options ( + endpoint '/keyword_search?fields=id,text,media_type,permalink,username,timestamp,has_replies,is_quote_post,is_reply,topic_tag', + rowid_column 'id' + ); +``` + +```sql +-- Pushes down to: GET /keyword_search?q=threads +SELECT username, text, timestamp +FROM keyword_search +WHERE q = 'threads' +LIMIT 3; +``` + +| username | text | timestamp | +| --- | --- | --- | +| youruser | A matching post about threads... | 2025-12-25 20:09:53+00 | +| youruser | Another matching result... | 2025-11-09 01:47:56+00 | + +Full search results with media type, engagement flags, and topic tags: + +```sql +SELECT id, username, text, media_type, permalink, + has_replies, is_quote_post, is_reply, topic_tag, + timestamp +FROM keyword_search +WHERE q = 'threads' +LIMIT 5; +``` + +## 8. Profile Lookup + +Look up any public profile by username. Requires the `threads_basic` permission. + +```sql +create foreign table profile_lookup ( + username text, + name text, + biography text, + profile_picture_url text, + follower_count bigint, + is_verified boolean, + likes_count bigint, + quotes_count bigint, + reposts_count bigint, + views_count bigint, + attrs jsonb +) + server threads + options ( + endpoint '/profile_lookup', + rowid_column 'username' + ); +``` + +```sql +SELECT name, biography, follower_count, is_verified +FROM profile_lookup +WHERE username = 'threads'; +``` + +| name | biography | follower_count | is_verified | +| --- | --- | --- | --- | +| Threads | | 100000000 | true | + +Full profile with engagement metrics: + +```sql +SELECT username, name, biography, profile_picture_url, + follower_count, likes_count, quotes_count, + reposts_count, views_count, is_verified +FROM profile_lookup +WHERE username = 'threads'; +``` + +## 9. Publishing Limit + +Check your current rate limit usage: + +```sql +create foreign table publishing_limit ( + quota_usage integer, + config jsonb, + reply_quota_usage integer, + reply_config jsonb, + attrs jsonb +) + server threads + options ( + endpoint '/me/threads_publishing_limit?fields=quota_usage,config,reply_quota_usage,reply_config' + ); +``` + +```sql +SELECT quota_usage, config, reply_quota_usage, reply_config +FROM publishing_limit; +``` + +| quota_usage | config | reply_quota_usage | reply_config | +| --- | --- | --- | --- | +| 0 | `{"quota_total": 250, "quota_duration": 86400}` | 0 | `{"quota_total": 1000, "quota_duration": 86400}` | + +## 10. Debug Mode + +The `keyword_search_debug` table uses the `threads_debug` server which has `debug 'true'`. This emits HTTP request details as PostgreSQL INFO messages. + +```sql +SELECT id, text FROM keyword_search_debug WHERE q = 'meta' LIMIT 3; +``` + +Look for INFO output like: + +```log +INFO: [openapi_fdw] HTTP GET https://graph.threads.net/keyword_search?... -> 200 (1234 bytes) +INFO: [openapi_fdw] Scan complete: 3 rows, 2 columns +``` + +## 11. IMPORT FOREIGN SCHEMA (Inline `spec_json`) + +Meta's Threads API does not publish an official OpenAPI spec at a public URL. Instead of `spec_url`, this example uses `spec_json` to provide a hand-written spec directly in the server definition. The inline spec describes just the 8 GET endpoints used by this example. + +This approach also works well for APIs that: + +- Don't publish an OpenAPI spec at all (like Threads) +- Publish a spec that's too large, outdated, or inaccurate +- Need a customized subset of endpoints + +The FDW parses the inline JSON the same way it would a fetched spec, auto-generating `CREATE FOREIGN TABLE` statements with correct column names and types. Endpoints with path parameters (`/{thread_id}/replies`, `/{thread_id}/conversation`) are skipped — those need manual table definitions like the ones above. + +Auto-generate table definitions from the inline spec: + +```sql +CREATE SCHEMA IF NOT EXISTS threads_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER threads_import +INTO threads_auto; +``` + +See what was generated: + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'threads_auto'; +``` + +## 12. The `attrs` Column + +Every table includes an `attrs jsonb` column that captures all fields not mapped to named columns: + +```sql +SELECT id, attrs->>'media_product_type' AS product_type, + attrs->>'shortcode' AS shortcode +FROM my_threads +LIMIT 3; +``` + +| id | product_type | shortcode | +| --- | --- | --- | +| 18555728842018816 | THREADS | ABC123xyz | +| 18051838931694754 | THREADS | DEF456uvw | +| 18099070105919840 | THREADS | GHI789rst | + +## Features Demonstrated + +| Feature | Table(s) | +| --- | --- | +| API key auth (query param) | All tables | +| Cursor-based pagination (auto-detected) | `my_threads`, `my_replies`, `keyword_search` | +| Path parameter substitution | `thread_detail`, `thread_replies`, `thread_conversation` | +| Query parameter pushdown | `keyword_search` (with `WHERE q = ...`), `profile_lookup` (with `WHERE username = ...`) | +| Single object response | `my_profile`, `thread_detail`, `profile_lookup` | +| Endpoint query string (field selection) | All tables except `profile_lookup` | +| Type coercion (timestamptz, boolean, bigint) | `my_threads`, `profile_lookup` | +| Debug mode | `keyword_search_debug` | +| IMPORT FOREIGN SCHEMA | `threads_import` server | +| `attrs` catch-all column | All tables | +| `rowid_column` | `my_threads`, `keyword_search`, `profile_lookup` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql new file mode 100644 index 00000000..c7ddce61 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql @@ -0,0 +1,301 @@ +-- OpenAPI FDW example: Threads API (Meta) +-- Requires a Threads access token (set THREADS_ACCESS_TOKEN env var). +-- See: https://developers.facebook.com/docs/threads +-- Note: fdw_package_url uses file:// for local Docker testing. In production, use the +-- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm + +-- Create supabase_admin role if it doesn't exist (required by wrappers extension) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_admin') THEN + CREATE ROLE supabase_admin WITH SUPERUSER CREATEDB CREATEROLE LOGIN PASSWORD 'postgres'; + END IF; +END +$$; + +create schema if not exists extensions; +create extension if not exists wrappers with schema extensions; + +set search_path to public, extensions; + +create foreign data wrapper wasm_wrapper + handler wasm_fdw_handler + validator wasm_fdw_validator; + +-- ============================================================ +-- Server 1: threads — Main Threads API server +-- Auth via access_token query parameter +-- ============================================================ +create server threads + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://graph.threads.net', + api_key 'placeholder', + api_key_header 'access_token', + api_key_location 'query' + ); + +-- ============================================================ +-- Server 2: threads_debug — Same API with debug output +-- ============================================================ +create server threads_debug + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://graph.threads.net', + api_key 'placeholder', + api_key_header 'access_token', + api_key_location 'query', + debug 'true' + ); + +-- ============================================================ +-- Server 3: threads_import — With inline spec for IMPORT FOREIGN SCHEMA +-- ============================================================ +create server threads_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://graph.threads.net', + api_key 'placeholder', + api_key_header 'access_token', + api_key_location 'query', + spec_json '{"openapi":"3.0.0","info":{"title":"Threads API","version":"1.0.0"},"servers":[{"url":"https://graph.threads.net"}],"paths":{"/me":{"get":{"parameters":[{"name":"fields","in":"query","schema":{"type":"string"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"id":{"type":"string"},"username":{"type":"string"},"name":{"type":"string"},"threads_profile_picture_url":{"type":"string"},"threads_biography":{"type":"string"}}}}}}}}},"/me/threads":{"get":{"parameters":[{"name":"fields","in":"query","schema":{"type":"string"}},{"name":"since","in":"query","schema":{"type":"string"}},{"name":"until","in":"query","schema":{"type":"string"}},{"name":"limit","in":"query","schema":{"type":"integer"}},{"name":"before","in":"query","schema":{"type":"string"}},{"name":"after","in":"query","schema":{"type":"string"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"data":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"media_product_type":{"type":"string"},"media_type":{"type":"string"},"permalink":{"type":"string"},"owner":{"type":"object","properties":{"id":{"type":"string"}}},"username":{"type":"string"},"text":{"type":"string"},"timestamp":{"type":"string","format":"date-time"},"shortcode":{"type":"string"},"is_quote_post":{"type":"boolean"},"has_replies":{"type":"boolean"},"poll_attachment":{"type":"object","properties":{"option_a":{"type":"string"},"option_b":{"type":"string"},"option_c":{"type":"string"},"option_d":{"type":"string"},"option_a_votes_percentage":{"type":"number"},"option_b_votes_percentage":{"type":"number"},"option_c_votes_percentage":{"type":"number"},"option_d_votes_percentage":{"type":"number"},"expiration_timestamp":{"type":"string","format":"date-time"}}}}}},"paging":{"type":"object","properties":{"cursors":{"type":"object","properties":{"before":{"type":"string"},"after":{"type":"string"}}}}}}}}}}}}},"/me/replies":{"get":{"parameters":[{"name":"fields","in":"query","schema":{"type":"string"}},{"name":"since","in":"query","schema":{"type":"string"}},{"name":"until","in":"query","schema":{"type":"string"}},{"name":"limit","in":"query","schema":{"type":"integer"}},{"name":"before","in":"query","schema":{"type":"string"}},{"name":"after","in":"query","schema":{"type":"string"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"data":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"media_product_type":{"type":"string"},"media_type":{"type":"string"},"permalink":{"type":"string"},"username":{"type":"string"},"text":{"type":"string"},"timestamp":{"type":"string","format":"date-time"},"shortcode":{"type":"string"},"is_quote_post":{"type":"boolean"},"has_replies":{"type":"boolean"}}}},"paging":{"type":"object","properties":{"cursors":{"type":"object","properties":{"before":{"type":"string"},"after":{"type":"string"}}}}}}}}}}}}},"/{thread_id}/replies":{"get":{"parameters":[{"name":"fields","in":"query","schema":{"type":"string"}},{"name":"reverse","in":"query","schema":{"type":"boolean"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"data":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"text":{"type":"string"},"timestamp":{"type":"string","format":"date-time"},"media_product_type":{"type":"string"},"media_type":{"type":"string"},"permalink":{"type":"string"},"shortcode":{"type":"string"},"username":{"type":"string"},"is_quote_post":{"type":"boolean"},"has_replies":{"type":"boolean"},"is_reply":{"type":"boolean"},"is_reply_owned_by_me":{"type":"boolean"},"root_post":{"type":"object","properties":{"id":{"type":"string"}}},"replied_to":{"type":"object","properties":{"id":{"type":"string"}}}}}},"paging":{"type":"object","properties":{"cursors":{"type":"object","properties":{"before":{"type":"string"},"after":{"type":"string"}}}}}}}}}}}}},"/{thread_id}/conversation":{"get":{"parameters":[{"name":"fields","in":"query","schema":{"type":"string"}},{"name":"reverse","in":"query","schema":{"type":"boolean"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"data":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"text":{"type":"string"},"timestamp":{"type":"string","format":"date-time"},"media_product_type":{"type":"string"},"media_type":{"type":"string"},"permalink":{"type":"string"},"shortcode":{"type":"string"},"username":{"type":"string"},"is_quote_post":{"type":"boolean"},"has_replies":{"type":"boolean"},"is_reply":{"type":"boolean"},"is_reply_owned_by_me":{"type":"boolean"},"root_post":{"type":"object","properties":{"id":{"type":"string"}}},"replied_to":{"type":"object","properties":{"id":{"type":"string"}}}}}},"paging":{"type":"object","properties":{"cursors":{"type":"object","properties":{"before":{"type":"string"},"after":{"type":"string"}}}}}}}}}}}}},"/keyword_search":{"get":{"parameters":[{"name":"q","in":"query","schema":{"type":"string"}},{"name":"search_type","in":"query","schema":{"type":"string"}},{"name":"fields","in":"query","schema":{"type":"string"}},{"name":"search_mode","in":"query","schema":{"type":"string"}},{"name":"limit","in":"query","schema":{"type":"integer"}},{"name":"since","in":"query","schema":{"type":"string"}},{"name":"until","in":"query","schema":{"type":"string"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"data":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"media_product_type":{"type":"string"},"media_type":{"type":"string"},"permalink":{"type":"string"},"username":{"type":"string"},"text":{"type":"string"},"timestamp":{"type":"string","format":"date-time"},"shortcode":{"type":"string"},"is_quote_post":{"type":"boolean"},"has_replies":{"type":"boolean"},"is_reply":{"type":"boolean"},"topic_tag":{"type":"string"}}}},"paging":{"type":"object","properties":{"cursors":{"type":"object","properties":{"before":{"type":"string"},"after":{"type":"string"}}}}}}}}}}}}},"/profile_lookup":{"get":{"parameters":[{"name":"username","in":"query","schema":{"type":"string"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"username":{"type":"string"},"name":{"type":"string"},"threads_profile_picture_url":{"type":"string"},"threads_biography":{"type":"string"},"follower_count":{"type":"integer"},"is_verified":{"type":"boolean"},"likes_count":{"type":"integer"},"quotes_count":{"type":"integer"},"replies_count":{"type":"integer"},"reposts_count":{"type":"integer"},"views_count":{"type":"integer"}}}}}}}}},"/me/threads_publishing_limit":{"get":{"parameters":[{"name":"fields","in":"query","schema":{"type":"string"}}],"responses":{"default":{"content":{"application/json":{"schema":{"type":"object","properties":{"data":{"type":"array","items":{"type":"object","properties":{"quota_usage":{"type":"integer"},"config":{"type":"object","properties":{"quota_total":{"type":"integer"},"quota_duration":{"type":"integer"}}},"reply_quota_usage":{"type":"integer"},"reply_config":{"type":"object","properties":{"quota_total":{"type":"integer"},"quota_duration":{"type":"integer"}}}}}}}}}}}}}}}}' + ); + +-- ============================================================ +-- Table 1: my_profile +-- Single object response — GET /me with profile fields +-- Features: api_key in query param, field selection via endpoint query string +-- ============================================================ +create foreign table my_profile ( + id text, + username text, + name text, + threads_profile_picture_url text, + threads_biography text, + is_verified boolean, + attrs jsonb +) + server threads + options ( + endpoint '/me?fields=id,username,name,threads_profile_picture_url,threads_biography,is_verified', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 2: my_threads +-- Paginated list of the authenticated user's posts +-- Features: cursor-based pagination (auto-detected via data/paging), +-- timestamptz coercion, boolean coercion, attrs catch-all +-- ============================================================ +create foreign table my_threads ( + id text, + media_type text, + text text, + permalink text, + username text, + timestamp timestamptz, + shortcode text, + is_quote_post boolean, + topic_tag text, + link_attachment_url text, + is_verified boolean, + attrs jsonb +) + server threads + options ( + endpoint '/me/threads?fields=id,media_type,media_product_type,text,permalink,username,timestamp,shortcode,is_quote_post,topic_tag,link_attachment_url,is_verified', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 3: my_replies +-- Paginated list of the authenticated user's replies +-- Features: same pagination as my_threads, reply-specific fields +-- ============================================================ +create foreign table my_replies ( + id text, + media_type text, + text text, + permalink text, + username text, + timestamp timestamptz, + shortcode text, + is_quote_post boolean, + has_replies boolean, + is_reply boolean, + attrs jsonb +) + server threads + options ( + endpoint '/me/replies?fields=id,media_type,text,permalink,username,timestamp,shortcode,is_quote_post,has_replies,is_reply', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 4: thread_detail +-- Single media object by ID — path parameter substitution +-- Features: path param {thread_id}, single object response +-- ============================================================ +create foreign table thread_detail ( + id text, + media_type text, + text text, + permalink text, + username text, + timestamp timestamptz, + is_quote_post boolean, + has_replies boolean, + topic_tag text, + link_attachment_url text, + reply_audience text, + thread_id text, + attrs jsonb +) + server threads + options ( + endpoint '/{thread_id}?fields=id,media_type,text,permalink,username,timestamp,is_quote_post,has_replies,topic_tag,link_attachment_url,reply_audience', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 5: thread_replies +-- Top-level replies to a specific thread — path param + pagination +-- Features: path param, reverse chronological order, reply metadata +-- ============================================================ +create foreign table thread_replies ( + id text, + text text, + username text, + permalink text, + timestamp timestamptz, + media_type text, + has_replies boolean, + is_reply boolean, + hide_status text, + is_verified boolean, + thread_id text, + attrs jsonb +) + server threads + options ( + endpoint '/{thread_id}/replies?fields=id,text,username,permalink,timestamp,media_type,has_replies,is_reply,hide_status,is_verified', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 6: thread_conversation +-- Full flattened conversation (all reply depths) +-- Features: path param, all-depth replies, chronological ordering +-- ============================================================ +create foreign table thread_conversation ( + id text, + text text, + username text, + permalink text, + timestamp timestamptz, + media_type text, + has_replies boolean, + is_reply boolean, + hide_status text, + thread_id text, + attrs jsonb +) + server threads + options ( + endpoint '/{thread_id}/conversation?fields=id,text,username,permalink,timestamp,media_type,has_replies,is_reply,hide_status&reverse=false', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 7: keyword_search +-- Search for public threads by keyword or topic tag +-- Features: query param pushdown (q, search_type, search_mode) +-- ============================================================ +create foreign table keyword_search ( + id text, + text text, + media_type text, + permalink text, + username text, + timestamp timestamptz, + has_replies boolean, + is_quote_post boolean, + is_reply boolean, + topic_tag text, + q text, + attrs jsonb +) + server threads + options ( + endpoint '/keyword_search?fields=id,text,media_type,permalink,username,timestamp,has_replies,is_quote_post,is_reply,topic_tag', + rowid_column 'id' + ); + +-- ============================================================ +-- Table 8: profile_lookup +-- Look up a public profile by username +-- Features: query param pushdown (username), single object response +-- ============================================================ +create foreign table profile_lookup ( + username text, + name text, + biography text, + profile_picture_url text, + follower_count bigint, + is_verified boolean, + likes_count bigint, + quotes_count bigint, + reposts_count bigint, + views_count bigint, + attrs jsonb +) + server threads + options ( + endpoint '/profile_lookup', + rowid_column 'username' + ); + +-- ============================================================ +-- Table 9: publishing_limit +-- Check the user's current publishing rate limit +-- Features: nested data array response +-- ============================================================ +create foreign table publishing_limit ( + quota_usage integer, + config jsonb, + reply_quota_usage integer, + reply_config jsonb, + attrs jsonb +) + server threads + options ( + endpoint '/me/threads_publishing_limit?fields=quota_usage,config,reply_quota_usage,reply_config' + ); + +-- ============================================================ +-- Table 10: keyword_search_debug +-- Same as keyword_search but on the debug server +-- Features: debug output in INFO messages +-- ============================================================ +create foreign table keyword_search_debug ( + id text, + text text, + username text, + timestamp timestamptz, + q text, + attrs jsonb +) + server threads_debug + options ( + endpoint '/keyword_search?fields=id,text,username,timestamp', + rowid_column 'id' + ); diff --git a/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs b/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs new file mode 100644 index 00000000..4ac490f1 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs @@ -0,0 +1,309 @@ +//! Column name matching and JSON-to-Cell conversion +//! +//! Handles the mapping between SQL column names and JSON keys, +//! including camelCase, snake_case, and normalized matching strategies. + +use std::borrow::Cow; + +use serde_json::Value as JsonValue; + +use crate::bindings::supabase::wrappers::{ + time, + types::{Cell, FdwError, TypeOid}, +}; +use crate::{OpenApiFdw, extract_effective_row}; + +/// How a SQL column name was resolved to a JSON key. +/// +/// Avoids cloning strings that already exist in [`CachedColumn`] — only the +/// case-insensitive fallback (rare) needs its own allocation. +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum KeyMatch { + /// JSON key matches `CachedColumn::name` exactly + Exact, + /// JSON key matches `CachedColumn::camel_name` + CamelCase, + /// JSON key matched case-insensitively (stores the original API key) + CaseInsensitive(String), +} + +/// Pre-computed column metadata to avoid repeated WASM boundary crossings. +/// +/// During `iter_scan`, each call to `ctx.get_columns()`, `col.name()`, and +/// `col.type_oid()` crosses the WASM boundary. By caching these once in +/// `begin_scan`, we eliminate ~2000 boundary crossings per 100-row scan. +#[derive(Debug)] +pub(crate) struct CachedColumn { + pub name: String, + pub type_oid: TypeOid, + pub camel_name: String, + pub lower_name: String, + /// Alphanumeric-only lowercase name for normalized matching. + /// Strips `@`, `.`, `-`, `$`, etc. so `@id` → `_id` → `id` can match. + pub alnum_name: String, +} + +/// Convert `snake_case` to `camelCase` +pub(crate) fn to_camel_case(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut capitalize_next = false; + + for c in s.chars() { + if c == '_' { + capitalize_next = true; + } else if capitalize_next { + result.push(c.to_uppercase().next().unwrap_or(c)); + capitalize_next = false; + } else { + result.push(c); + } + } + + result +} + +/// Strip non-alphanumeric chars and lowercase for normalized matching. +/// +/// Used to match JSON keys with special characters (`@id`, `user.name`, `$oid`) +/// to sanitized SQL column names (`_id`, `user_name`, `_oid`). +pub(crate) fn normalize_to_alnum(s: &str) -> String { + s.chars() + .filter(|c| c.is_alphanumeric()) + .collect::() + .to_lowercase() +} + +impl OpenApiFdw { + /// Normalize a date/datetime string for RFC3339 parsing. + /// + /// Handles two non-RFC3339 formats: + /// - Date-only `"2024-01-15"` → `"2024-01-15T00:00:00Z"` + /// - ISO 8601 tz without colon `"2024-01-15T12:00:00+0000"` → `"2024-01-15T12:00:00+00:00"` + /// + /// Returns `Cow` to avoid allocating when the string is already valid. + pub(crate) fn normalize_datetime(s: &str) -> Cow<'_, str> { + // Date-only: exactly 10 chars matching YYYY-MM-DD pattern + if s.len() == 10 && s.as_bytes().get(4) == Some(&b'-') && s.as_bytes().get(7) == Some(&b'-') + { + return Cow::Owned(format!("{s}T00:00:00Z")); + } + + // Fix timezone offset without colon: +0000 → +00:00, -0500 → -05:00 + // ISO 8601 allows ±HHMM but RFC 3339 requires ±HH:MM + let bytes = s.as_bytes(); + let len = bytes.len(); + if len >= 5 { + let sign_pos = len - 4; + if (bytes[sign_pos - 1] == b'+' || bytes[sign_pos - 1] == b'-') + && bytes[sign_pos..].iter().all(|b| b.is_ascii_digit()) + { + let mut fixed = String::with_capacity(len + 1); + fixed.push_str(&s[..sign_pos + 2]); + fixed.push(':'); + fixed.push_str(&s[sign_pos + 2..]); + return Cow::Owned(fixed); + } + } + + Cow::Borrowed(s) + } + + /// Build a map from column index to resolved JSON key, using the first row's keys. + /// + /// This runs the 3-step matching (exact → camelCase → case-insensitive) once per + /// column instead of once per column per row. Called after each `make_request`. + pub(crate) fn build_column_key_map(&mut self) { + if self.cached_columns.is_empty() || self.src_rows.is_empty() { + self.column_key_map = vec![None; self.cached_columns.len()]; + return; + } + + let first_row = &self.src_rows[0]; + let effective_row = extract_effective_row(first_row, self.object_path.as_deref()); + + self.column_key_map = if let Some(obj) = effective_row.as_object() { + self.cached_columns + .iter() + .map(|cc| { + // attrs is special-cased (returns entire row), no key lookup needed + if cc.name == "attrs" { + return None; + } + if obj.contains_key(&cc.name) { + Some(KeyMatch::Exact) + } else if obj.contains_key(&cc.camel_name) { + Some(KeyMatch::CamelCase) + } else if let Some(key) = obj.keys().find(|k| k.to_lowercase() == cc.lower_name) + { + Some(KeyMatch::CaseInsensitive(key.clone())) + } else { + // Normalized match: strip non-alphanumeric chars and compare. + // Handles JSON-LD @-prefixed keys (@id↔_id), dotted names + // (user.name↔user_name), and other special-char properties. + obj.keys() + .find(|k| normalize_to_alnum(k) == cc.alnum_name) + .cloned() + .map(KeyMatch::CaseInsensitive) + } + }) + .collect() + } else { + vec![None; self.cached_columns.len()] + }; + } + + /// Convert a JSON value to a Cell based on the target PostgreSQL type. + /// + /// Handles type coercion, date/time parsing, and numeric conversions. + pub(crate) fn convert_json_to_cell( + src: &JsonValue, + type_oid: &TypeOid, + ) -> Result, FdwError> { + let cell = match type_oid { + TypeOid::Bool => src.as_bool().map(Cell::Bool), + TypeOid::I8 => src + .as_i64() + .and_then(|v| i8::try_from(v).ok()) + .map(Cell::I8), + TypeOid::I16 => src + .as_i64() + .and_then(|v| i16::try_from(v).ok()) + .map(Cell::I16), + TypeOid::I32 => src + .as_i64() + .and_then(|v| i32::try_from(v).ok()) + .map(Cell::I32), + TypeOid::I64 => src.as_i64().map(Cell::I64), + #[allow(clippy::cast_possible_truncation)] + TypeOid::F32 => src.as_f64().map(|v| Cell::F32(v as f32)), + TypeOid::F64 => src.as_f64().map(Cell::F64), + TypeOid::Numeric => src.as_f64().map(Cell::Numeric), + TypeOid::String => Some(Cell::String( + src.as_str() + .map_or_else(|| src.to_string(), ToOwned::to_owned), + )), + TypeOid::Date => { + if let Some(s) = src.as_str() { + let ts = time::parse_from_rfc3339(&Self::normalize_datetime(s))?; + Some(Cell::Date(ts / 1_000_000)) + } else { + // Unix timestamp (seconds since epoch) + src.as_i64().map(Cell::Date) + } + } + TypeOid::Timestamp | TypeOid::Timestamptz => { + let wrap: fn(i64) -> Cell = if matches!(type_oid, TypeOid::Timestamp) { + Cell::Timestamp + } else { + Cell::Timestamptz + }; + if let Some(s) = src.as_str() { + let ts = time::parse_from_rfc3339(&Self::normalize_datetime(s))?; + Some(wrap(ts)) + } else { + // Unix timestamp (seconds since epoch) → microseconds + src.as_i64() + .and_then(|epoch| epoch.checked_mul(1_000_000)) + .map(wrap) + } + } + TypeOid::Uuid => src.as_str().map(|v| Cell::Uuid(v.to_owned())), + // Json and unknown types: serialize to JSON string + TypeOid::Json | TypeOid::Other(_) => Some(Cell::Json(src.to_string())), + }; + + Ok(cell) + } + + /// Convert a string value from path/query params to a Cell based on target type. + /// + /// Used for injecting WHERE clause values that were used as URL parameters. + pub(crate) fn convert_string_to_cell(value: &str, type_oid: &TypeOid) -> Option { + match type_oid { + TypeOid::Bool => value.parse::().ok().map(Cell::Bool), + TypeOid::I8 => value.parse::().ok().map(Cell::I8), + TypeOid::I16 => value.parse::().ok().map(Cell::I16), + TypeOid::I32 => value.parse::().ok().map(Cell::I32), + TypeOid::I64 => value.parse::().ok().map(Cell::I64), + #[allow(clippy::cast_possible_truncation)] + TypeOid::F32 => value.parse::().ok().map(|v| Cell::F32(v as f32)), + TypeOid::F64 => value.parse::().ok().map(Cell::F64), + TypeOid::Numeric => value.parse::().ok().map(Cell::Numeric), + TypeOid::Date => time::parse_from_rfc3339(&Self::normalize_datetime(value)) + .ok() + .map(|ts| Cell::Date(ts / 1_000_000)), + TypeOid::Timestamp | TypeOid::Timestamptz => { + let wrap: fn(i64) -> Cell = if matches!(type_oid, TypeOid::Timestamp) { + Cell::Timestamp + } else { + Cell::Timestamptz + }; + time::parse_from_rfc3339(&Self::normalize_datetime(value)) + .ok() + .map(wrap) + } + TypeOid::Json => Some(Cell::Json(value.to_string())), + _ => Some(Cell::String(value.to_string())), + } + } + + /// Convert a JSON value to a Cell using cached column metadata and pre-resolved key map. + /// + /// Uses `CachedColumn` fields instead of WASM resource methods, and the pre-built + /// `column_key_map` for O(1) JSON key lookup instead of per-row 3-step matching. + pub(crate) fn json_to_cell_cached( + &self, + src_row: &JsonValue, + col_idx: usize, + ) -> Result, FdwError> { + let cc = &self.cached_columns[col_idx]; + + // Special handling for 'attrs' column - returns entire row as JSON + if cc.name == "attrs" { + return Ok(Some(Cell::Json(src_row.to_string()))); + } + + // If this column was used as a query/path parameter, inject the WHERE clause + // value directly. Coerce to target column type to avoid type mismatches. + if let Some(value) = self.injected_params.get(&cc.lower_name) { + let cell = Self::convert_string_to_cell(value, &cc.type_oid); + return Ok(cell.or_else(|| Some(Cell::String(value.clone())))); + } + + // Use pre-resolved key from column_key_map for O(1) lookup + let src = src_row.as_object().and_then(|obj| { + match self.column_key_map.get(col_idx) { + Some(Some(KeyMatch::Exact)) => obj.get(&cc.name), + Some(Some(KeyMatch::CamelCase)) => obj.get(&cc.camel_name), + Some(Some(KeyMatch::CaseInsensitive(key))) => obj.get(key), + _ => { + // Fallback: 4-step matching for heterogeneous row shapes + obj.get(&cc.name) + .or_else(|| obj.get(&cc.camel_name)) + .or_else(|| { + obj.iter() + .find(|(k, _)| k.to_lowercase() == cc.lower_name) + .map(|(_, v)| v) + }) + .or_else(|| { + // Normalized: strip non-alnum, compare (handles @-keys, dots, etc.) + obj.iter() + .find(|(k, _)| normalize_to_alnum(k) == cc.alnum_name) + .map(|(_, v)| v) + }) + } + } + }); + + let src = match src { + Some(v) if !v.is_null() => v, + _ => return Ok(None), + }; + + Self::convert_json_to_cell(src, &cc.type_oid) + } +} + +#[cfg(test)] +#[path = "column_matching_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/column_matching_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/column_matching_tests.rs new file mode 100644 index 00000000..1e121f75 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/column_matching_tests.rs @@ -0,0 +1,1029 @@ +use super::*; + +// --- to_camel_case tests --- + +#[test] +fn test_to_camel_case() { + assert_eq!(to_camel_case("snake_case"), "snakeCase"); + assert_eq!(to_camel_case("already"), "already"); + assert_eq!(to_camel_case("multi_word_name"), "multiWordName"); + assert_eq!(to_camel_case(""), ""); +} + +#[test] +fn test_to_camel_case_trailing_underscore() { + assert_eq!(to_camel_case("name_"), "name"); +} + +#[test] +fn test_to_camel_case_double_underscore() { + assert_eq!(to_camel_case("a__b"), "aB"); +} + +#[test] +fn test_to_camel_case_single_char() { + assert_eq!(to_camel_case("x"), "x"); +} + +#[test] +fn test_to_camel_case_with_numbers() { + assert_eq!(to_camel_case("field_2_name"), "field2Name"); +} + +#[test] +fn test_to_camel_case_all_uppercase() { + // Already uppercase segments + assert_eq!(to_camel_case("a_b_c"), "aBC"); +} + +// --- normalize_to_alnum tests --- + +#[test] +fn test_normalize_to_alnum_basic() { + assert_eq!(normalize_to_alnum("hello"), "hello"); + assert_eq!(normalize_to_alnum("Hello"), "hello"); + assert_eq!(normalize_to_alnum(""), ""); +} + +#[test] +fn test_normalize_to_alnum_special_chars() { + assert_eq!(normalize_to_alnum("@id"), "id"); + assert_eq!(normalize_to_alnum("$oid"), "oid"); + assert_eq!(normalize_to_alnum("user.name"), "username"); + assert_eq!(normalize_to_alnum("user-name"), "username"); + assert_eq!(normalize_to_alnum("_id"), "id"); +} + +#[test] +fn test_normalize_to_alnum_mixed() { + assert_eq!(normalize_to_alnum("user_Name"), "username"); + assert_eq!(normalize_to_alnum("@Type"), "type"); + assert_eq!(normalize_to_alnum("123-abc"), "123abc"); +} + +// --- build_column_key_map tests --- + +/// Helper: create an FDW with cached columns and src_rows, then build key map +fn build_key_map( + col_names: &[&str], + rows: Vec, + object_path: Option<&str>, +) -> Vec> { + let mut fdw = OpenApiFdw { + src_rows: rows, + object_path: object_path.map(String::from), + ..Default::default() + }; + fdw.cached_columns = col_names + .iter() + .map(|name| CachedColumn { + name: name.to_string(), + type_oid: TypeOid::String, + camel_name: to_camel_case(name), + lower_name: name.to_lowercase(), + alnum_name: normalize_to_alnum(name), + }) + .collect(); + fdw.build_column_key_map(); + fdw.column_key_map +} + +#[test] +fn test_build_column_key_map_exact() { + let rows = vec![serde_json::json!({"id": 1, "name": "alice"})]; + let map = build_key_map(&["id", "name"], rows, None); + assert_eq!(map, vec![Some(KeyMatch::Exact), Some(KeyMatch::Exact)]); +} + +#[test] +fn test_build_column_key_map_camel() { + // API returns camelCase, SQL columns are snake_case + let rows = vec![serde_json::json!({"firstName": "Alice", "lastName": "Smith"})]; + let map = build_key_map(&["first_name", "last_name"], rows, None); + assert_eq!( + map, + vec![Some(KeyMatch::CamelCase), Some(KeyMatch::CamelCase)] + ); +} + +#[test] +fn test_build_column_key_map_case_insensitive() { + // API returns PascalCase, SQL columns are lowercase + let rows = vec![serde_json::json!({"Id": 1, "UserName": "alice"})]; + let map = build_key_map(&["id", "username"], rows, None); + assert_eq!( + map, + vec![ + Some(KeyMatch::CaseInsensitive("Id".to_string())), + Some(KeyMatch::CaseInsensitive("UserName".to_string())) + ] + ); +} + +#[test] +fn test_build_column_key_map_empty_rows() { + let map = build_key_map(&["id", "name"], vec![], None); + assert_eq!(map, vec![None, None]); +} + +#[test] +fn test_build_column_key_map_missing_column() { + let rows = vec![serde_json::json!({"id": 1, "name": "alice"})]; + let map = build_key_map(&["id", "email"], rows, None); + assert_eq!(map, vec![Some(KeyMatch::Exact), None]); +} + +#[test] +fn test_build_column_key_map_attrs_skipped() { + let rows = vec![serde_json::json!({"id": 1, "name": "alice"})]; + let map = build_key_map(&["id", "attrs"], rows, None); + // attrs should be None (special-cased, not looked up) + assert_eq!(map, vec![Some(KeyMatch::Exact), None]); +} + +#[test] +fn test_build_column_key_map_with_object_path() { + // GeoJSON-style: keys live under /properties + let rows = vec![serde_json::json!({ + "type": "Feature", + "properties": {"name": "Park", "area": 500} + })]; + let map = build_key_map(&["name", "area"], rows, Some("/properties")); + assert_eq!(map, vec![Some(KeyMatch::Exact), Some(KeyMatch::Exact)]); +} + +#[test] +fn test_build_column_key_map_at_prefixed_keys() { + // JSON-LD @-prefixed keys: @id sanitizes to _id. + // The normalized matching step strips non-alnum chars: + // column "_id" → alnum "id", key "@id" → alnum "id" → match! + let rows = vec![serde_json::json!({"@id": "urn:test", "@type": "Feature"})]; + let map = build_key_map(&["_id", "_type"], rows, None); + assert_eq!( + map, + vec![ + Some(KeyMatch::CaseInsensitive("@id".to_string())), + Some(KeyMatch::CaseInsensitive("@type".to_string())) + ] + ); +} + +#[test] +fn test_build_column_key_map_dotted_keys() { + // Dotted property names: "user.name" sanitizes to "user_name" + // Normalized: "username" == "username" → match + let rows = vec![serde_json::json!({"user.name": "Alice", "user.email": "a@b.com"})]; + let map = build_key_map(&["user_name", "user_email"], rows, None); + assert_eq!( + map, + vec![ + Some(KeyMatch::CaseInsensitive("user.name".to_string())), + Some(KeyMatch::CaseInsensitive("user.email".to_string())) + ] + ); +} + +#[test] +fn test_build_column_key_map_dollar_prefixed_keys() { + // MongoDB-style $-prefixed keys: "$oid" sanitizes to "_oid" + let rows = vec![serde_json::json!({"$oid": "abc123"})]; + let map = build_key_map(&["_oid"], rows, None); + assert_eq!( + map, + vec![Some(KeyMatch::CaseInsensitive("$oid".to_string()))] + ); +} + +#[test] +fn test_build_column_key_map_mixed_conventions() { + // Mixed API response: some exact, some camel, some case-insensitive + // Note: case-insensitive compares k.to_lowercase() == col.lower_name, + // so it only works for pure case differences (e.g., "Status" vs "status"), + // not camelCase→snake_case transformations. + let rows = vec![serde_json::json!({ + "id": 1, + "firstName": "Alice", + "Status": "active" + })]; + let map = build_key_map(&["id", "first_name", "status"], rows, None); + assert_eq!( + map, + vec![ + Some(KeyMatch::Exact), + Some(KeyMatch::CamelCase), + Some(KeyMatch::CaseInsensitive("Status".to_string())) + ] + ); +} + +#[test] +fn test_build_column_key_map_k8s_nested_metadata() { + // Kubernetes response: access nested fields via object_path + let rows = vec![serde_json::json!({ + "metadata": {"name": "my-pod", "namespace": "default", "uid": "abc-123"}, + "status": {"phase": "Running"} + })]; + let map = build_key_map(&["name", "namespace", "uid"], rows, Some("/metadata")); + assert_eq!( + map, + vec![ + Some(KeyMatch::Exact), + Some(KeyMatch::Exact), + Some(KeyMatch::Exact) + ] + ); +} + +#[test] +fn test_build_column_key_map_github_mixed_casing() { + // GitHub returns mixed casing: some camelCase, some snake_case + let rows = vec![serde_json::json!({ + "id": 1, + "node_id": "MDQ6VXNlcjE=", + "login": "octocat", + "gravatar_id": "", + "followers_url": "https://api.github.com/users/octocat/followers" + })]; + let map = build_key_map( + &["id", "node_id", "login", "gravatar_id", "followers_url"], + rows, + None, + ); + // All exact match — GitHub uses snake_case for these fields + assert!(map.iter().all(|m| matches!(m, Some(KeyMatch::Exact)))); +} + +#[test] +fn test_build_column_key_map_numeric_keys() { + // APIs that return numeric-like keys + let rows = vec![serde_json::json!({ + "200": {"description": "OK"}, + "404": {"description": "Not Found"} + })]; + // Sanitized: 200 → _200, 404 → _404 + // to_camel_case("_200") = "200", which matches the JSON key "200" → CamelCase match + let map = build_key_map(&["_200", "_404"], rows, None); + assert_eq!( + map, + vec![Some(KeyMatch::CamelCase), Some(KeyMatch::CamelCase)] + ); +} + +#[test] +fn test_hyphen_case_key_matching() { + // REST APIs with hyphen-case keys: "user-id" → normalized match to "user_id" + let rows = vec![serde_json::json!({"user-id": "abc", "user-name": "Alice"})]; + let map = build_key_map(&["user_id", "user_name"], rows, None); + // Normalized matching: "userid" matches "userid" (after stripping non-alnum) + assert_eq!( + map, + vec![ + Some(KeyMatch::CaseInsensitive("user-id".to_string())), + Some(KeyMatch::CaseInsensitive("user-name".to_string())) + ] + ); +} + +#[test] +fn test_screaming_snake_case_matching() { + // Legacy APIs with SCREAMING_SNAKE_CASE: "USER_NAME" → case-insensitive match + let rows = vec![serde_json::json!({"USER_NAME": "alice", "USER_ID": 42})]; + let map = build_key_map(&["user_name", "user_id"], rows, None); + assert_eq!( + map, + vec![ + Some(KeyMatch::CaseInsensitive("USER_NAME".to_string())), + Some(KeyMatch::CaseInsensitive("USER_ID".to_string())) + ] + ); +} + +// --- normalize_datetime tests --- + +#[test] +fn test_normalize_datetime_date_only() { + assert_eq!( + OpenApiFdw::normalize_datetime("2024-01-15"), + "2024-01-15T00:00:00Z" + ); +} + +#[test] +fn test_normalize_datetime_full_datetime() { + // Full datetime should pass through unchanged + let dt = "2024-06-15T10:30:00Z"; + assert_eq!(OpenApiFdw::normalize_datetime(dt), dt); +} + +#[test] +fn test_normalize_datetime_with_offset() { + let dt = "2024-06-15T10:30:00+05:00"; + assert_eq!(OpenApiFdw::normalize_datetime(dt), dt); +} + +#[test] +fn test_normalize_datetime_not_date_format() { + // Strings that are 10 chars but not date format should pass through + assert_eq!(OpenApiFdw::normalize_datetime("abcdefghij"), "abcdefghij"); +} + +#[test] +fn test_normalize_datetime_with_milliseconds() { + // ISO 8601 datetime with milliseconds — should pass through + let dt = "2024-06-15T10:30:00.123Z"; + assert_eq!(OpenApiFdw::normalize_datetime(dt), dt); +} + +#[test] +fn test_normalize_datetime_short_string() { + // String shorter than 10 chars — should not be treated as date + assert_eq!(OpenApiFdw::normalize_datetime("2024-01"), "2024-01"); +} + +#[test] +fn test_normalize_datetime_long_non_date() { + // Exactly 10 chars but not a date pattern (no dashes at right positions) + assert_eq!(OpenApiFdw::normalize_datetime("1234567890"), "1234567890"); +} + +#[test] +fn test_normalize_datetime_empty_string() { + assert_eq!(OpenApiFdw::normalize_datetime(""), ""); +} + +#[test] +fn test_normalize_datetime_tz_without_colon() { + // Threads API format: +0000 → +00:00 + assert_eq!( + OpenApiFdw::normalize_datetime("2026-02-12T04:46:47+0000"), + "2026-02-12T04:46:47+00:00" + ); +} + +#[test] +fn test_normalize_datetime_negative_tz_without_colon() { + assert_eq!( + OpenApiFdw::normalize_datetime("2024-09-12T23:17:39-0500"), + "2024-09-12T23:17:39-05:00" + ); +} + +#[test] +fn test_normalize_datetime_tz_with_colon_unchanged() { + // Already has colon — should pass through + let dt = "2024-06-15T10:30:00+05:30"; + assert_eq!(OpenApiFdw::normalize_datetime(dt), dt); +} + +// --- json_to_cell_cached tests --- + +/// Helper: build an FDW with cached columns and column_key_map, then call json_to_cell_cached +fn cell_from_json( + col_name: &str, + type_oid: TypeOid, + json_obj: &JsonValue, +) -> Result, String> { + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: col_name.to_string(), + type_oid: type_oid.clone(), + camel_name: to_camel_case(col_name), + lower_name: col_name.to_lowercase(), + alnum_name: normalize_to_alnum(col_name), + }], + column_key_map: vec![Some(KeyMatch::Exact)], + ..Default::default() + }; + fdw.json_to_cell_cached(json_obj, 0) +} + +/// Helper: extract string from Cell +fn cell_to_string(cell: &Cell) -> Option { + match cell { + Cell::String(s) => Some(s.clone()), + _ => None, + } +} + +#[test] +fn test_json_to_cell_bool() { + let obj = serde_json::json!({"active": true}); + let cell = cell_from_json("active", TypeOid::Bool, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::Bool(true)))); +} + +#[test] +fn test_json_to_cell_i8() { + let obj = serde_json::json!({"val": 42}); + let cell = cell_from_json("val", TypeOid::I8, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::I8(42)))); +} + +#[test] +fn test_json_to_cell_i8_overflow() { + // 200 exceeds i8 range (-128..127) + let obj = serde_json::json!({"val": 200}); + let cell = cell_from_json("val", TypeOid::I8, &obj).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_i16() { + let obj = serde_json::json!({"val": 1000}); + let cell = cell_from_json("val", TypeOid::I16, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::I16(1000)))); +} + +#[test] +fn test_json_to_cell_i16_overflow() { + // i16 max is 32767 — 40000 should overflow + let obj = serde_json::json!({"val": 40000}); + let cell = cell_from_json("val", TypeOid::I16, &obj).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_i32() { + let obj = serde_json::json!({"val": 100_000}); + let cell = cell_from_json("val", TypeOid::I32, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::I32(100_000)))); +} + +#[test] +fn test_json_to_cell_i32_overflow() { + // i32 max is 2147483647 — 3 billion should overflow + let obj = serde_json::json!({"val": 3_000_000_000_i64}); + let cell = cell_from_json("val", TypeOid::I32, &obj).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_i32_from_float_truncates() { + // JSON number 42.9 → i64 returns 42 (as_i64 truncates), then try_from + // serde_json::as_i64() returns None for floats, so this should be None + let obj = serde_json::json!({"val": 42.9}); + let cell = cell_from_json("val", TypeOid::I32, &obj).unwrap(); + assert!(cell.is_none(), "Float value should not coerce to I32"); +} + +#[test] +fn test_json_to_cell_i64() { + let obj = serde_json::json!({"val": 9_000_000_000_i64}); + let cell = cell_from_json("val", TypeOid::I64, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::I64(9_000_000_000)))); +} + +#[test] +fn test_json_to_cell_i64_max() { + // Maximum i64 value + let obj = serde_json::json!({"val": i64::MAX}); + let cell = cell_from_json("val", TypeOid::I64, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::I64(v)) if v == i64::MAX)); +} + +#[test] +fn test_json_to_cell_f32() { + let obj = serde_json::json!({"val": 2.78}); + let cell = cell_from_json("val", TypeOid::F32, &obj).unwrap(); + if let Some(Cell::F32(v)) = cell { + assert!((v - 2.78_f32).abs() < 0.001); + } else { + panic!("Expected F32"); + } +} + +#[test] +fn test_json_to_cell_f64() { + let obj = serde_json::json!({"val": 1.234_567_890_123}); + let cell = cell_from_json("val", TypeOid::F64, &obj).unwrap(); + if let Some(Cell::F64(v)) = cell { + assert!((v - 1.234_567_890_123).abs() < f64::EPSILON); + } else { + panic!("Expected F64"); + } +} + +#[test] +fn test_json_to_cell_f64_nan_like() { + // JSON doesn't have NaN — should always be a valid number + let obj = serde_json::json!({"val": 1e308}); + let cell = cell_from_json("val", TypeOid::F64, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::F64(_)))); +} + +#[test] +fn test_json_to_cell_numeric() { + let obj = serde_json::json!({"val": 99.99}); + let cell = cell_from_json("val", TypeOid::Numeric, &obj).unwrap(); + if let Some(Cell::Numeric(v)) = cell { + assert!((v - 99.99).abs() < f64::EPSILON); + } else { + panic!("Expected Numeric"); + } +} + +#[test] +fn test_json_to_cell_numeric_from_string() { + // Numeric column with string value → None (as_f64 returns None for strings) + let obj = serde_json::json!({"price": "29.99"}); + let cell = cell_from_json("price", TypeOid::Numeric, &obj).unwrap(); + assert!(cell.is_none(), "String number should not coerce to Numeric"); +} + +#[test] +fn test_json_to_cell_string_from_string() { + let obj = serde_json::json!({"name": "alice"}); + let cell = cell_from_json("name", TypeOid::String, &obj).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("alice".to_string()) + ); +} + +#[test] +fn test_json_to_cell_string_from_number() { + // Non-string values should be serialized to string + let obj = serde_json::json!({"name": 42}); + let cell = cell_from_json("name", TypeOid::String, &obj).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("42".to_string()) + ); +} + +#[test] +fn test_json_to_cell_string_from_bool() { + let obj = serde_json::json!({"name": true}); + let cell = cell_from_json("name", TypeOid::String, &obj).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("true".to_string()) + ); +} + +#[test] +fn test_json_to_cell_string_from_object() { + // When a column expects text but JSON value is an object, serialize it + let obj = serde_json::json!({"info": {"nested": true, "value": 42}}); + let cell = cell_from_json("info", TypeOid::String, &obj).unwrap(); + let s = cell_to_string(cell.as_ref().unwrap()).unwrap(); + assert!(s.contains("\"nested\":true")); + assert!(s.contains("\"value\":42")); +} + +#[test] +fn test_json_to_cell_string_from_array() { + // When a column expects text but JSON value is an array, serialize it + let obj = serde_json::json!({"tags": ["rust", "wasm", "sql"]}); + let cell = cell_from_json("tags", TypeOid::String, &obj).unwrap(); + let s = cell_to_string(cell.as_ref().unwrap()).unwrap(); + assert_eq!(s, r#"["rust","wasm","sql"]"#); +} + +#[test] +fn test_json_to_cell_null_returns_none() { + let obj = serde_json::json!({"val": null}); + let cell = cell_from_json("val", TypeOid::String, &obj).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_missing_key_returns_none() { + let obj = serde_json::json!({"other": "value"}); + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "missing".to_string(), + type_oid: TypeOid::String, + camel_name: "missing".to_string(), + lower_name: "missing".to_string(), + alnum_name: "missing".to_string(), + }], + column_key_map: vec![None], // no match found + ..Default::default() + }; + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_bool_from_non_bool() { + // Non-boolean value for boolean column → None + let obj = serde_json::json!({"active": "yes"}); + let cell = cell_from_json("active", TypeOid::Bool, &obj).unwrap(); + assert!(cell.is_none(), "String 'yes' should not coerce to Bool"); +} + +// Note: Date/Timestamp/Timestamptz from string tests are skipped because +// they call `time::parse_from_rfc3339` which is a WASM host import that +// panics outside the WASM runtime. These paths are covered by integration tests. + +#[test] +fn test_json_to_cell_date_from_unix() { + // Unix timestamp as integer → Date (doesn't call parse_from_rfc3339) + let obj = serde_json::json!({"dt": 1718409600}); + let cell = cell_from_json("dt", TypeOid::Date, &obj).unwrap(); + assert!(matches!(cell, Some(Cell::Date(1718409600)))); +} + +#[test] +fn test_json_to_cell_timestamp_from_unix() { + // Unix epoch → microseconds (doesn't call parse_from_rfc3339) + let obj = serde_json::json!({"ts": 1718409600}); + let cell = cell_from_json("ts", TypeOid::Timestamp, &obj).unwrap(); + if let Some(Cell::Timestamp(v)) = cell { + assert_eq!(v, 1_718_409_600_000_000); + } else { + panic!("Expected Timestamp"); + } +} + +#[test] +fn test_json_to_cell_timestamptz_from_unix() { + // Unix epoch → microseconds (doesn't call parse_from_rfc3339) + let obj = serde_json::json!({"ts": 1718409600}); + let cell = cell_from_json("ts", TypeOid::Timestamptz, &obj).unwrap(); + if let Some(Cell::Timestamptz(v)) = cell { + assert_eq!(v, 1_718_409_600_000_000); + } else { + panic!("Expected Timestamptz"); + } +} + +#[test] +fn test_json_to_cell_uuid() { + let obj = serde_json::json!({"uid": "550e8400-e29b-41d4-a716-446655440000"}); + let cell = cell_from_json("uid", TypeOid::Uuid, &obj).unwrap(); + assert!(matches!( + cell, + Some(Cell::Uuid(ref s)) if s == "550e8400-e29b-41d4-a716-446655440000" + )); +} + +#[test] +fn test_json_to_cell_uuid_from_non_string() { + // UUID column with numeric value → None (as_str returns None) + let obj = serde_json::json!({"uid": 12345}); + let cell = cell_from_json("uid", TypeOid::Uuid, &obj).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_json_object() { + let obj = serde_json::json!({"meta": {"key": "val"}}); + let cell = cell_from_json("meta", TypeOid::Json, &obj).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert_eq!(s, r#"{"key":"val"}"#); + } else { + panic!("Expected Json"); + } +} + +#[test] +fn test_json_to_cell_json_array() { + let obj = serde_json::json!({"tags": ["a", "b", "c"]}); + let cell = cell_from_json("tags", TypeOid::Json, &obj).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert_eq!(s, r#"["a","b","c"]"#); + } else { + panic!("Expected Json"); + } +} + +#[test] +fn test_json_to_cell_json_from_null() { + // Null value for JSON column → None (null is filtered before type matching) + let obj = serde_json::json!({"meta": null}); + let cell = cell_from_json("meta", TypeOid::Json, &obj).unwrap(); + assert!(cell.is_none()); +} + +#[test] +fn test_json_to_cell_json_from_primitive() { + // Primitive value serialized as JSON + let obj = serde_json::json!({"val": 42}); + let cell = cell_from_json("val", TypeOid::Json, &obj).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert_eq!(s, "42"); + } else { + panic!("Expected Json cell"); + } +} + +#[test] +fn test_json_to_cell_json_string_value() { + // JSON column with plain string value → serialize as JSON string + let obj = serde_json::json!({"data": "hello"}); + let cell = cell_from_json("data", TypeOid::Json, &obj).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert_eq!(s, r#""hello""#); + } else { + panic!("Expected Json cell"); + } +} + +#[test] +fn test_json_to_cell_json_bool_value() { + // JSON column with bool value + let obj = serde_json::json!({"flag": true}); + let cell = cell_from_json("flag", TypeOid::Json, &obj).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert_eq!(s, "true"); + } else { + panic!("Expected Json cell"); + } +} + +#[test] +fn test_json_to_cell_typeoid_other() { + // TypeOid::Other(n) → Json cell (same as TypeOid::Json) + let obj = serde_json::json!({"payload": {"nested": true, "count": 42}}); + let cell = cell_from_json("payload", TypeOid::Other("custom_type".to_string()), &obj).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert!(s.contains("\"nested\":true")); + assert!(s.contains("\"count\":42")); + } else { + panic!("Expected Json cell for TypeOid::Other"); + } +} + +#[test] +fn test_json_to_cell_path_param_injection() { + // When a column is a path param, its value is injected from injected_params + let mut fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "user_id".to_string(), + type_oid: TypeOid::String, + camel_name: "userId".to_string(), + lower_name: "user_id".to_string(), + alnum_name: "userid".to_string(), + }], + column_key_map: vec![None], + ..Default::default() + }; + fdw.injected_params + .insert("user_id".to_string(), "42".to_string()); + + let obj = serde_json::json!({"title": "Post Title"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("42".to_string()) + ); +} + +#[test] +fn test_json_to_cell_path_param_type_coercion() { + // Path param for an integer column should be coerced + let mut fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "id".to_string(), + type_oid: TypeOid::I64, + camel_name: "id".to_string(), + lower_name: "id".to_string(), + alnum_name: "id".to_string(), + }], + column_key_map: vec![None], + ..Default::default() + }; + fdw.injected_params + .insert("id".to_string(), "123".to_string()); + + let obj = serde_json::json!({}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert!(matches!(cell, Some(Cell::I64(123)))); +} + +#[test] +fn test_json_to_cell_path_param_bool_coercion() { + // Path param for boolean column + let mut fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "active".to_string(), + type_oid: TypeOid::Bool, + camel_name: "active".to_string(), + lower_name: "active".to_string(), + alnum_name: "active".to_string(), + }], + column_key_map: vec![None], + ..Default::default() + }; + fdw.injected_params + .insert("active".to_string(), "true".to_string()); + + let obj = serde_json::json!({}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert!(matches!(cell, Some(Cell::Bool(true)))); +} + +#[test] +fn test_json_to_cell_path_param_f64_coercion() { + // Path param for float column + let mut fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "lat".to_string(), + type_oid: TypeOid::F64, + camel_name: "lat".to_string(), + lower_name: "lat".to_string(), + alnum_name: "lat".to_string(), + }], + column_key_map: vec![None], + ..Default::default() + }; + fdw.injected_params + .insert("lat".to_string(), "37.7749".to_string()); + + let obj = serde_json::json!({}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + if let Some(Cell::F64(v)) = cell { + assert!((v - 37.7749).abs() < f64::EPSILON); + } else { + panic!("Expected F64"); + } +} + +#[test] +fn test_json_to_cell_path_param_invalid_number_fallback() { + // Path param that can't parse as target type → falls back to String + let mut fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "id".to_string(), + type_oid: TypeOid::I64, + camel_name: "id".to_string(), + lower_name: "id".to_string(), + alnum_name: "id".to_string(), + }], + column_key_map: vec![None], + ..Default::default() + }; + fdw.injected_params + .insert("id".to_string(), "not-a-number".to_string()); + + let obj = serde_json::json!({}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + // i64 parse fails → falls back to Cell::String + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("not-a-number".to_string()) + ); +} + +#[test] +fn test_json_to_cell_path_param_json_type() { + // Path param for JSON column + let mut fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "filter".to_string(), + type_oid: TypeOid::Json, + camel_name: "filter".to_string(), + lower_name: "filter".to_string(), + alnum_name: "filter".to_string(), + }], + column_key_map: vec![None], + ..Default::default() + }; + fdw.injected_params + .insert("filter".to_string(), r#"{"status":"active"}"#.to_string()); + + let obj = serde_json::json!({}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + if let Some(Cell::Json(s)) = cell { + assert_eq!(s, r#"{"status":"active"}"#); + } else { + panic!("Expected Json cell"); + } +} + +#[test] +fn test_json_to_cell_attrs_column() { + // The "attrs" column gets the full row as JSON + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "attrs".to_string(), + type_oid: TypeOid::Json, + camel_name: "attrs".to_string(), + lower_name: "attrs".to_string(), + alnum_name: "attrs".to_string(), + }], + column_key_map: vec![None], // attrs is special-cased, no key match + ..Default::default() + }; + + let obj = serde_json::json!({"id": 1, "name": "test"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert!(matches!(cell, Some(Cell::Json(_)))); +} + +#[test] +fn test_json_to_cell_fallback_camel_match() { + // When column_key_map has None (heterogeneous rows), fallback to camelCase match + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "first_name".to_string(), + type_oid: TypeOid::String, + camel_name: "firstName".to_string(), + lower_name: "first_name".to_string(), + alnum_name: "firstname".to_string(), + }], + column_key_map: vec![None], // force fallback path + ..Default::default() + }; + + let obj = serde_json::json!({"firstName": "Alice"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("Alice".to_string()) + ); +} + +#[test] +fn test_json_to_cell_fallback_normalized_match() { + // Fallback to normalized (alnum-only) matching for @-prefixed keys + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "_id".to_string(), + type_oid: TypeOid::String, + camel_name: "Id".to_string(), + lower_name: "_id".to_string(), + alnum_name: "id".to_string(), + }], + column_key_map: vec![None], // force fallback path + ..Default::default() + }; + + let obj = serde_json::json!({"@id": "urn:test:123"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("urn:test:123".to_string()) + ); +} + +#[test] +fn test_json_to_cell_fallback_case_insensitive() { + // column_key_map=None → fallback 4-step matching with case-insensitive step + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "status".to_string(), + type_oid: TypeOid::String, + camel_name: "status".to_string(), + lower_name: "status".to_string(), + alnum_name: "status".to_string(), + }], + column_key_map: vec![None], // force fallback path + ..Default::default() + }; + + let obj = serde_json::json!({"Status": "active"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("active".to_string()) + ); +} + +#[test] +fn test_json_to_cell_camel_case_key_match() { + // CamelCase key match path + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "created_at".to_string(), + type_oid: TypeOid::String, + camel_name: "createdAt".to_string(), + lower_name: "created_at".to_string(), + alnum_name: "createdat".to_string(), + }], + column_key_map: vec![Some(KeyMatch::CamelCase)], + ..Default::default() + }; + + let obj = serde_json::json!({"createdAt": "2024-01-15T10:00:00Z"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("2024-01-15T10:00:00Z".to_string()) + ); +} + +#[test] +fn test_json_to_cell_case_insensitive_key_match() { + // CaseInsensitive key match path + let fdw = OpenApiFdw { + cached_columns: vec![CachedColumn { + name: "user_name".to_string(), + type_oid: TypeOid::String, + camel_name: "userName".to_string(), + lower_name: "user_name".to_string(), + alnum_name: "username".to_string(), + }], + column_key_map: vec![Some(KeyMatch::CaseInsensitive("UserName".to_string()))], + ..Default::default() + }; + + let obj = serde_json::json!({"UserName": "alice"}); + let cell = fdw.json_to_cell_cached(&obj, 0).unwrap(); + assert_eq!( + cell_to_string(cell.as_ref().unwrap()), + Some("alice".to_string()) + ); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/config.rs b/wasm-wrappers/fdw/openapi_fdw/src/config.rs new file mode 100644 index 00000000..e8727639 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/config.rs @@ -0,0 +1,270 @@ +//! Server configuration: headers and authentication setup + +use serde_json::{Map as JsonMap, Value as JsonValue}; + +use crate::bindings::supabase::wrappers::{ + types::{FdwResult, Options}, + utils, +}; + +pub(crate) const DEFAULT_MAX_PAGES: usize = 1000; +pub(crate) const DEFAULT_MAX_RESPONSE_BYTES: usize = 50 * 1024 * 1024; // 50 MiB + +/// Server-level configuration. +/// +/// Fields are set once in `init()` from server options. A few fields +/// (`page_size`, `page_size_param`, `cursor_param`) can be overridden +/// per-table in `begin_scan`; call `save_pagination_defaults()` after +/// init and `restore_pagination_defaults()` at the start of each scan. +pub(crate) struct ServerConfig { + pub(crate) base_url: String, + pub(crate) headers: Vec<(String, String)>, + pub(crate) spec_url: Option, + pub(crate) spec_json: Option, + pub(crate) api_key_query: Option<(String, String)>, + pub(crate) include_attrs: bool, + pub(crate) page_size: usize, + pub(crate) page_size_param: String, + pub(crate) cursor_param: String, + pub(crate) max_pages: usize, + pub(crate) max_response_bytes: usize, + pub(crate) debug: bool, + + // Server-level defaults (saved after init, restored in begin_scan) + pub(crate) default_page_size: usize, + pub(crate) default_page_size_param: String, + pub(crate) default_cursor_param: String, +} + +/// Manual Debug impl to redact authentication secrets (headers may contain +/// API keys or bearer tokens, and api_key_query contains the raw key value). +impl std::fmt::Debug for ServerConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ServerConfig") + .field("base_url", &self.base_url) + .field("headers", &format!("[{} header(s)]", self.headers.len())) + .field("spec_url", &self.spec_url) + .field( + "spec_json", + &self + .spec_json + .as_ref() + .map(|s| format!("[{} bytes]", s.len())), + ) + .field( + "api_key_query", + &self + .api_key_query + .as_ref() + .map(|(k, _)| format!("{k}=[REDACTED]")), + ) + .field("include_attrs", &self.include_attrs) + .field("page_size", &self.page_size) + .field("page_size_param", &self.page_size_param) + .field("cursor_param", &self.cursor_param) + .field("max_pages", &self.max_pages) + .field("max_response_bytes", &self.max_response_bytes) + .field("debug", &self.debug) + .finish() + } +} + +impl Default for ServerConfig { + fn default() -> Self { + Self { + base_url: String::new(), + headers: Vec::new(), + spec_url: None, + spec_json: None, + api_key_query: None, + include_attrs: true, + page_size: 0, + page_size_param: String::new(), + cursor_param: String::new(), + max_pages: DEFAULT_MAX_PAGES, + max_response_bytes: DEFAULT_MAX_RESPONSE_BYTES, + debug: false, + default_page_size: 0, + default_page_size_param: String::new(), + default_cursor_param: String::new(), + } + } +} + +impl ServerConfig { + /// Snapshot the current pagination fields as server-level defaults. + /// + /// Call once at the end of `init()`, after server options are parsed. + pub(crate) fn save_pagination_defaults(&mut self) { + self.default_page_size = self.page_size; + self.default_page_size_param + .clone_from(&self.page_size_param); + self.default_cursor_param.clone_from(&self.cursor_param); + } + + /// Restore pagination fields to server-level defaults. + /// + /// Call at the start of each `begin_scan()`, before applying table-level overrides. + pub(crate) fn restore_pagination_defaults(&mut self) { + self.page_size = self.default_page_size; + self.page_size_param + .clone_from(&self.default_page_size_param); + self.cursor_param.clone_from(&self.default_cursor_param); + } + + /// Configure request headers from server options + pub(crate) fn configure_headers(&mut self, opts: &Options) -> FdwResult { + self.apply_headers( + opts.get("user_agent"), + opts.get("accept"), + opts.get("headers"), + ) + } + + /// Apply header configuration from extracted option values. + /// + /// Separated from `configure_headers` for testability (Options is a WASM resource). + pub(crate) fn apply_headers( + &mut self, + user_agent: Option, + accept: Option, + headers_json: Option, + ) -> FdwResult { + self.headers + .push(("content-type".to_owned(), "application/json".to_string())); + + if let Some(user_agent) = user_agent { + self.headers.push(("user-agent".to_owned(), user_agent)); + } + + if let Some(accept) = accept { + self.headers.push(("accept".to_owned(), accept)); + } + + if let Some(headers_json) = headers_json { + let headers: JsonMap = serde_json::from_str(&headers_json) + .map_err(|e| format!("Invalid JSON for 'headers' option: {e}"))?; + for (key, value) in headers { + if let Some(v) = value.as_str() { + self.headers.push((key.to_lowercase(), v.to_string())); + } else { + return Err(format!( + "Invalid non-string value for header '{key}' in 'headers' option" + )); + } + } + } + + Ok(()) + } + + /// Configure authentication from server options + pub(crate) fn configure_auth(&mut self, opts: &Options) -> FdwResult { + let api_key = opts.get("api_key").or_else(|| { + opts.get("api_key_id") + .and_then(|key_id| utils::get_vault_secret(&key_id)) + }); + + let bearer_token = opts.get("bearer_token").or_else(|| { + opts.get("bearer_token_id") + .and_then(|token_id| utils::get_vault_secret(&token_id)) + }); + + // Warn on empty credentials (likely vault misconfiguration) + if let Some(ref key) = api_key { + if key.trim().is_empty() { + utils::report_warning( + "[openapi_fdw] api_key is empty. Requests may fail authentication.", + ); + } + } + if let Some(ref token) = bearer_token { + if token.trim().is_empty() { + utils::report_warning( + "[openapi_fdw] bearer_token is empty. Requests may fail authentication.", + ); + } + } + + let location = opts.require_or("api_key_location", "header"); + let header = opts.require_or("api_key_header", "Authorization"); + let prefix = opts.get("api_key_prefix"); + + self.apply_auth(api_key, bearer_token, &location, &header, prefix)?; + + // Warn if query auth uses the default header name (likely misconfiguration) + if location == "query" && header == "Authorization" && self.api_key_query.is_some() { + utils::report_warning( + "[openapi_fdw] api_key_location is 'query' but api_key_header \ + is the default 'Authorization'. This will send ?Authorization= \ + as a query parameter, which is likely incorrect. Set api_key_header \ + to the actual query parameter name (e.g., 'api_key' or 'key').", + ); + } + + Ok(()) + } + + /// Apply authentication configuration from extracted option values. + /// + /// Separated from `configure_auth` for testability (Options is a WASM resource). + pub(crate) fn apply_auth( + &mut self, + api_key: Option, + bearer_token: Option, + api_key_location: &str, + api_key_header: &str, + api_key_prefix: Option, + ) -> FdwResult { + // Enforce mutual exclusivity — both would emit duplicate auth headers + if api_key.is_some() && bearer_token.is_some() { + return Err( + "Cannot use both api_key/api_key_id and bearer_token/bearer_token_id. \ + Choose one authentication method." + .to_string(), + ); + } + + if let Some(key) = api_key { + match api_key_location { + "query" => { + // API key sent as query parameter (e.g., ?api_key=xxx) + self.api_key_query = Some((api_key_header.to_string(), key)); + } + "cookie" => { + // API key sent as cookie (e.g., Cookie: session=xxx) + self.headers + .push(("cookie".to_owned(), format!("{api_key_header}={key}"))); + } + "header" => { + // API key sent as header (default) + let header_value = match (api_key_header, &api_key_prefix) { + ("Authorization", None) => format!("Bearer {key}"), + (_, Some(p)) => format!("{p} {key}"), + (_, None) => key, + }; + + self.headers + .push((api_key_header.to_lowercase(), header_value)); + } + other => { + return Err(format!( + "Invalid api_key_location '{other}'. \ + Must be 'header', 'query', or 'cookie'." + )); + } + } + } + + if let Some(token) = bearer_token { + self.headers + .push(("authorization".to_owned(), format!("Bearer {token}"))); + } + + Ok(()) + } +} + +#[cfg(test)] +#[path = "config_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs new file mode 100644 index 00000000..607f0adf --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs @@ -0,0 +1,665 @@ +use super::{DEFAULT_MAX_PAGES, DEFAULT_MAX_RESPONSE_BYTES, ServerConfig}; + +// --- Default values --- + +#[test] +fn test_default_max_pages() { + let config = ServerConfig::default(); + assert_eq!(config.max_pages, DEFAULT_MAX_PAGES); +} + +#[test] +fn test_default_max_response_bytes() { + let config = ServerConfig::default(); + assert_eq!(config.max_response_bytes, DEFAULT_MAX_RESPONSE_BYTES); +} + +#[test] +fn test_default_page_size_zero() { + let config = ServerConfig::default(); + assert_eq!(config.page_size, 0); +} + +#[test] +fn test_default_no_api_key_query() { + let config = ServerConfig::default(); + assert!(config.api_key_query.is_none()); +} + +#[test] +fn test_default_empty_headers() { + let config = ServerConfig::default(); + assert!(config.headers.is_empty()); +} + +#[test] +fn test_default_debug_off() { + let config = ServerConfig::default(); + assert!(!config.debug); +} + +#[test] +fn test_default_include_attrs_on() { + let config = ServerConfig::default(); + assert!(config.include_attrs); +} + +// --- save_pagination_defaults / restore_pagination_defaults --- + +#[test] +fn test_save_and_restore_pagination_defaults() { + let mut config = ServerConfig { + page_size: 50, + page_size_param: "per_page".to_string(), + cursor_param: "cursor".to_string(), + ..Default::default() + }; + config.save_pagination_defaults(); + + // Override with table-level values + config.page_size = 100; + config.page_size_param = "limit".to_string(); + config.cursor_param = "next".to_string(); + + // Restore should bring back server defaults + config.restore_pagination_defaults(); + assert_eq!(config.page_size, 50); + assert_eq!(config.page_size_param, "per_page"); + assert_eq!(config.cursor_param, "cursor"); +} + +#[test] +fn test_restore_before_save_uses_default_zeros() { + let mut config = ServerConfig { + page_size: 100, + page_size_param: "limit".to_string(), + ..Default::default() + }; + + // Restore without save → restores to Default::default() values + config.restore_pagination_defaults(); + assert_eq!(config.page_size, 0); + assert_eq!(config.page_size_param, ""); + assert_eq!(config.cursor_param, ""); +} + +#[test] +fn test_save_pagination_defaults_idempotent() { + let mut config = ServerConfig { + page_size: 25, + page_size_param: "limit".to_string(), + cursor_param: "after".to_string(), + ..Default::default() + }; + + config.save_pagination_defaults(); + config.save_pagination_defaults(); // Second save should be same + + config.page_size = 999; + config.restore_pagination_defaults(); + assert_eq!(config.page_size, 25); +} + +#[test] +fn test_multiple_save_restore_cycles() { + let mut config = ServerConfig { + page_size: 10, + page_size_param: "limit".to_string(), + cursor_param: "after".to_string(), + ..Default::default() + }; + config.save_pagination_defaults(); + + // Cycle 1: override and restore + config.page_size = 50; + config.page_size_param = "per_page".to_string(); + config.restore_pagination_defaults(); + assert_eq!(config.page_size, 10); + assert_eq!(config.page_size_param, "limit"); + + // Cycle 2: different override and restore + config.page_size = 200; + config.cursor_param = "next_token".to_string(); + config.restore_pagination_defaults(); + assert_eq!(config.page_size, 10); + assert_eq!(config.cursor_param, "after"); +} + +#[test] +fn test_restore_does_not_affect_non_pagination_fields() { + let mut config = ServerConfig { + base_url: "https://api.example.com".to_string(), + max_pages: 500, + debug: true, + page_size: 25, + ..Default::default() + }; + config.save_pagination_defaults(); + + config.base_url = "https://other.example.com".to_string(); + config.max_pages = 100; + config.debug = false; + + config.restore_pagination_defaults(); + // Non-pagination fields should be unchanged + assert_eq!(config.base_url, "https://other.example.com"); + assert_eq!(config.max_pages, 100); + assert!(!config.debug); + // Pagination fields should be restored + assert_eq!(config.page_size, 25); +} + +// --- apply_headers --- + +#[test] +fn test_apply_headers_content_type_always_added() { + let mut config = ServerConfig::default(); + config.apply_headers(None, None, None).unwrap(); + assert_eq!(config.headers.len(), 1); + assert_eq!(config.headers[0].0, "content-type"); + assert_eq!(config.headers[0].1, "application/json"); +} + +#[test] +fn test_apply_headers_with_user_agent() { + let mut config = ServerConfig::default(); + config + .apply_headers(Some("MyApp/1.0".to_string()), None, None) + .unwrap(); + assert_eq!(config.headers.len(), 2); + assert_eq!(config.headers[1].0, "user-agent"); + assert_eq!(config.headers[1].1, "MyApp/1.0"); +} + +#[test] +fn test_apply_headers_with_accept() { + let mut config = ServerConfig::default(); + config + .apply_headers(None, Some("application/geo+json".to_string()), None) + .unwrap(); + assert_eq!(config.headers.len(), 2); + assert_eq!(config.headers[1].0, "accept"); + assert_eq!(config.headers[1].1, "application/geo+json"); +} + +#[test] +fn test_apply_headers_with_user_agent_and_accept() { + let mut config = ServerConfig::default(); + config + .apply_headers( + Some("Bot/2.0".to_string()), + Some("text/xml".to_string()), + None, + ) + .unwrap(); + assert_eq!(config.headers.len(), 3); + assert_eq!(config.headers[1].0, "user-agent"); + assert_eq!(config.headers[1].1, "Bot/2.0"); + assert_eq!(config.headers[2].0, "accept"); + assert_eq!(config.headers[2].1, "text/xml"); +} + +#[test] +fn test_apply_headers_custom_json() { + let mut config = ServerConfig::default(); + config + .apply_headers( + None, + None, + Some(r#"{"X-Custom": "value1", "Feature-Flag": "beta"}"#.to_string()), + ) + .unwrap(); + // content-type + 2 custom headers + assert_eq!(config.headers.len(), 3); + // Custom headers should be lowercased + let custom_headers: Vec<_> = config.headers[1..].to_vec(); + assert!( + custom_headers + .iter() + .any(|(k, v)| k == "x-custom" && v == "value1") + ); + assert!( + custom_headers + .iter() + .any(|(k, v)| k == "feature-flag" && v == "beta") + ); +} + +#[test] +fn test_apply_headers_custom_json_lowercases_keys() { + let mut config = ServerConfig::default(); + config + .apply_headers( + None, + None, + Some(r#"{"X-API-KEY": "secret123"}"#.to_string()), + ) + .unwrap(); + assert_eq!(config.headers[1].0, "x-api-key"); + assert_eq!(config.headers[1].1, "secret123"); +} + +#[test] +fn test_apply_headers_invalid_json() { + let mut config = ServerConfig::default(); + let result = config.apply_headers(None, None, Some("not valid json".to_string())); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid JSON")); +} + +#[test] +fn test_apply_headers_non_string_value_error() { + let mut config = ServerConfig::default(); + let result = config.apply_headers(None, None, Some(r#"{"X-Count": 42}"#.to_string())); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("non-string value")); + assert!(err.contains("X-Count")); +} + +#[test] +fn test_apply_headers_empty_json_object() { + let mut config = ServerConfig::default(); + config + .apply_headers(None, None, Some("{}".to_string())) + .unwrap(); + // Only content-type, no custom headers + assert_eq!(config.headers.len(), 1); +} + +#[test] +fn test_apply_headers_boolean_value_error() { + let mut config = ServerConfig::default(); + let result = config.apply_headers(None, None, Some(r#"{"X-Debug": true}"#.to_string())); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("non-string value")); +} + +#[test] +fn test_apply_headers_array_value_error() { + let mut config = ServerConfig::default(); + let result = config.apply_headers(None, None, Some(r#"{"X-Tags": ["a", "b"]}"#.to_string())); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("non-string value")); +} + +#[test] +fn test_apply_headers_all_options_combined() { + let mut config = ServerConfig::default(); + config + .apply_headers( + Some("MyApp/1.0".to_string()), + Some("application/json".to_string()), + Some(r#"{"X-Request-Id": "abc123"}"#.to_string()), + ) + .unwrap(); + assert_eq!(config.headers.len(), 4); + assert_eq!( + config.headers[0], + ("content-type".to_string(), "application/json".to_string()) + ); + assert_eq!( + config.headers[1], + ("user-agent".to_string(), "MyApp/1.0".to_string()) + ); + assert_eq!( + config.headers[2], + ("accept".to_string(), "application/json".to_string()) + ); + assert_eq!( + config.headers[3], + ("x-request-id".to_string(), "abc123".to_string()) + ); +} + +// --- apply_auth: API key as header (default) --- + +#[test] +fn test_auth_api_key_default_header_authorization() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("my-api-key".to_string()), + None, + "header", + "Authorization", + None, + ) + .unwrap(); + assert_eq!(config.headers.len(), 1); + assert_eq!(config.headers[0].0, "authorization"); + assert_eq!(config.headers[0].1, "Bearer my-api-key"); +} + +#[test] +fn test_auth_api_key_custom_header() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("key123".to_string()), + None, + "header", + "X-API-Key", + None, + ) + .unwrap(); + assert_eq!(config.headers.len(), 1); + assert_eq!(config.headers[0].0, "x-api-key"); + assert_eq!(config.headers[0].1, "key123"); // No "Bearer" prefix for non-Authorization headers +} + +#[test] +fn test_auth_api_key_custom_header_with_prefix() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("key123".to_string()), + None, + "header", + "X-API-Key", + Some("Token".to_string()), + ) + .unwrap(); + assert_eq!(config.headers[0].0, "x-api-key"); + assert_eq!(config.headers[0].1, "Token key123"); +} + +#[test] +fn test_auth_api_key_authorization_with_custom_prefix() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("key123".to_string()), + None, + "header", + "Authorization", + Some("Basic".to_string()), + ) + .unwrap(); + assert_eq!(config.headers[0].0, "authorization"); + assert_eq!(config.headers[0].1, "Basic key123"); +} + +// --- apply_auth: API key as query parameter --- + +#[test] +fn test_auth_api_key_query_param() { + let mut config = ServerConfig::default(); + config + .apply_auth(Some("secret".to_string()), None, "query", "api_key", None) + .unwrap(); + assert!(config.headers.is_empty()); // No headers added + assert_eq!( + config.api_key_query, + Some(("api_key".to_string(), "secret".to_string())) + ); +} + +#[test] +fn test_auth_api_key_query_custom_param_name() { + let mut config = ServerConfig::default(); + config + .apply_auth(Some("key123".to_string()), None, "query", "appid", None) + .unwrap(); + assert_eq!( + config.api_key_query, + Some(("appid".to_string(), "key123".to_string())) + ); +} + +// --- apply_auth: API key as cookie --- + +#[test] +fn test_auth_api_key_cookie() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("session-token-xyz".to_string()), + None, + "cookie", + "session", + None, + ) + .unwrap(); + assert_eq!(config.headers.len(), 1); + assert_eq!(config.headers[0].0, "cookie"); + assert_eq!(config.headers[0].1, "session=session-token-xyz"); +} + +#[test] +fn test_auth_api_key_cookie_default_name() { + let mut config = ServerConfig::default(); + config + .apply_auth(Some("token".to_string()), None, "cookie", "api_key", None) + .unwrap(); + assert_eq!(config.headers[0].1, "api_key=token"); +} + +// --- apply_auth: Bearer token --- + +#[test] +fn test_auth_bearer_token() { + let mut config = ServerConfig::default(); + config + .apply_auth( + None, + Some("eyJhbGciOiJIUzI1NiJ9.test".to_string()), + "header", + "Authorization", + None, + ) + .unwrap(); + assert_eq!(config.headers.len(), 1); + assert_eq!(config.headers[0].0, "authorization"); + assert_eq!(config.headers[0].1, "Bearer eyJhbGciOiJIUzI1NiJ9.test"); +} + +// --- apply_auth: Mutual exclusivity --- + +#[test] +fn test_auth_mutual_exclusivity_error() { + let mut config = ServerConfig::default(); + let result = config.apply_auth( + Some("api-key".to_string()), + Some("bearer-token".to_string()), + "header", + "Authorization", + None, + ); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Cannot use both")); + assert!(err.contains("api_key")); + assert!(err.contains("bearer_token")); +} + +#[test] +fn test_auth_mutual_exclusivity_no_side_effects() { + let mut config = ServerConfig::default(); + let _ = config.apply_auth( + Some("api-key".to_string()), + Some("bearer-token".to_string()), + "header", + "Authorization", + None, + ); + // Error should not have added any headers + assert!(config.headers.is_empty()); + assert!(config.api_key_query.is_none()); +} + +// --- apply_auth: No auth --- + +#[test] +fn test_auth_no_credentials() { + let mut config = ServerConfig::default(); + config + .apply_auth(None, None, "header", "Authorization", None) + .unwrap(); + assert!(config.headers.is_empty()); + assert!(config.api_key_query.is_none()); +} + +// --- apply_auth: Edge cases --- + +#[test] +fn test_auth_api_key_empty_string() { + let mut config = ServerConfig::default(); + config + .apply_auth(Some(String::new()), None, "header", "Authorization", None) + .unwrap(); + assert_eq!(config.headers[0].1, "Bearer "); +} + +#[test] +fn test_auth_bearer_token_empty_string() { + let mut config = ServerConfig::default(); + config + .apply_auth(None, Some(String::new()), "header", "Authorization", None) + .unwrap(); + assert_eq!(config.headers[0].1, "Bearer "); +} + +#[test] +fn test_auth_api_key_preserves_existing_headers() { + let mut config = ServerConfig::default(); + config + .headers + .push(("content-type".to_string(), "application/json".to_string())); + + config + .apply_auth( + Some("key123".to_string()), + None, + "header", + "X-API-Key", + None, + ) + .unwrap(); + assert_eq!(config.headers.len(), 2); + assert_eq!(config.headers[0].0, "content-type"); + assert_eq!(config.headers[1].0, "x-api-key"); +} + +#[test] +fn test_auth_explicit_header_location() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("key".to_string()), + None, + "header", + "Authorization", + None, + ) + .unwrap(); + assert_eq!(config.headers[0].0, "authorization"); + assert_eq!(config.headers[0].1, "Bearer key"); +} + +#[test] +fn test_auth_unknown_location_returns_error() { + let mut config = ServerConfig::default(); + let result = config.apply_auth( + Some("key".to_string()), + None, + "queery", // typo + "api_key", + None, + ); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Invalid api_key_location")); + assert!(err.contains("queery")); +} + +#[test] +fn test_auth_unknown_location_no_side_effects() { + let mut config = ServerConfig::default(); + let _ = config.apply_auth( + Some("key".to_string()), + None, + "invalid", + "Authorization", + None, + ); + // Error should not have added any headers or query params + assert!(config.headers.is_empty()); + assert!(config.api_key_query.is_none()); +} + +// --- Debug impl redaction --- + +#[test] +fn test_debug_redacts_headers() { + let mut config = ServerConfig::default(); + config.headers.push(( + "authorization".to_string(), + "Bearer secret-token".to_string(), + )); + config + .headers + .push(("x-api-key".to_string(), "my-secret-key".to_string())); + + let debug_output = format!("{config:?}"); + assert!(!debug_output.contains("secret-token")); + assert!(!debug_output.contains("my-secret-key")); + assert!(debug_output.contains("[2 header(s)]")); +} + +#[test] +fn test_debug_redacts_api_key_query() { + let config = ServerConfig { + api_key_query: Some(("api_key".to_string(), "super-secret".to_string())), + ..Default::default() + }; + + let debug_output = format!("{config:?}"); + assert!(!debug_output.contains("super-secret")); + assert!(debug_output.contains("api_key=[REDACTED]")); +} + +#[test] +fn test_debug_shows_non_sensitive_fields() { + let config = ServerConfig { + base_url: "https://api.example.com".to_string(), + max_pages: 500, + debug: true, + ..Default::default() + }; + + let debug_output = format!("{config:?}"); + assert!(debug_output.contains("https://api.example.com")); + assert!(debug_output.contains("500")); + assert!(debug_output.contains("true")); +} + +#[test] +fn test_debug_no_api_key_query_shows_none() { + let config = ServerConfig::default(); + let debug_output = format!("{config:?}"); + assert!(debug_output.contains("api_key_query: None")); +} + +// --- apply_auth: query + default header name --- + +#[test] +fn test_auth_query_with_default_header_still_works() { + // Even though this is likely misconfigured (Authorization as query param name), + // the function should still set api_key_query correctly. + // The warning is emitted by configure_auth (WASM layer), not apply_auth. + let mut config = ServerConfig::default(); + config + .apply_auth( + Some("key123".to_string()), + None, + "query", + "Authorization", + None, + ) + .unwrap(); + assert_eq!( + config.api_key_query, + Some(("Authorization".to_string(), "key123".to_string())) + ); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/lib.rs b/wasm-wrappers/fdw/openapi_fdw/src/lib.rs index 5545a959..ba18629a 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/lib.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/lib.rs @@ -8,64 +8,99 @@ #[allow(warnings)] mod bindings; +mod column_matching; +mod config; +mod pagination; +mod request; +mod response; mod schema; mod spec; -use serde_json::{Map as JsonMap, Value as JsonValue}; +use serde_json::Value as JsonValue; +use std::collections::HashMap; use bindings::{ exports::supabase::wrappers::routines::Guest, supabase::wrappers::{ - http, stats, time, + http, stats, types::{ - Cell, Column, Context, FdwError, FdwResult, ImportForeignSchemaStmt, ImportSchemaType, - OptionsType, Row, TypeOid, Value, + Cell, Context, FdwError, FdwResult, ImportForeignSchemaStmt, ImportSchemaType, + OptionsType, Row, }, utils, }, }; +use column_matching::{CachedColumn, KeyMatch, normalize_to_alnum, to_camel_case}; +use config::ServerConfig; +use pagination::PaginationState; use schema::generate_all_tables; use spec::OpenApiSpec; /// The `OpenAPI` FDW state -#[derive(Debug, Default)] +#[derive(Debug)] struct OpenApiFdw { - // Configuration from server options - base_url: String, - headers: Vec<(String, String)>, + // Server-level configuration (set once in init, some overridden per table) + config: ServerConfig, + + // OpenAPI spec (fetched on demand) spec: Option, - spec_url: Option, // Current operation state (from table options) + method: http::Method, + request_body: String, endpoint: String, response_path: Option, object_path: Option, // Extract nested object from each row (e.g., "/properties" for GeoJSON) rowid_col: String, - - // Pagination configuration - cursor_param: String, cursor_path: String, - page_size: usize, - page_size_param: String, - // Pagination state - next_cursor: Option, - next_url: Option, + // Pagination state and loop detection + pagination: PaginationState, - // Schema generation options - include_attrs: bool, - - // Path parameters extracted from WHERE clause (for injecting back into rows) - path_params: std::collections::HashMap, + // Qual values injected as URL path/query params (for injecting back into rows) + injected_params: HashMap, // Data buffers src_rows: Vec, src_idx: usize, + // Cached column metadata (populated in begin_scan, avoids WASM crossings in iter_scan) + cached_columns: Vec, + // Pre-resolved JSON key for each cached column (rebuilt per page in make_request) + column_key_map: Vec>, + // Limit pushdown for early pagination stop src_limit: Option, consumed_row_cnt: i64, + + // Debug row counter (only active when config.debug is true) + scan_row_count: i64, +} + +impl Default for OpenApiFdw { + fn default() -> Self { + Self { + config: ServerConfig::default(), + spec: None, + method: http::Method::Get, + request_body: String::new(), + endpoint: String::new(), + response_path: None, + object_path: None, + rowid_col: String::new(), + cursor_path: String::new(), + pagination: PaginationState::default(), + injected_params: HashMap::new(), + src_rows: Vec::new(), + src_idx: 0, + cached_columns: Vec::new(), + column_key_map: Vec::new(), + src_limit: None, + consumed_row_cnt: 0, + scan_row_count: 0, + } + } } /// Global FDW instance pointer. @@ -79,6 +114,50 @@ struct OpenApiFdw { static mut INSTANCE: *mut OpenApiFdw = std::ptr::null_mut::(); static FDW_NAME: &str = "OpenApiFdw"; +const READ_ONLY_ERROR: &str = "OpenAPI FDW is read-only"; +const HOST_VERSION_REQUIREMENT: &str = "^0.1.0"; +const DEFAULT_PAGE_SIZE_PARAM: &str = "limit"; +const DEFAULT_CURSOR_PARAM: &str = "after"; +const DEFAULT_ROWID_COLUMN: &str = "id"; + +/// Validate that a URL starts with `http://` or `https://`. +fn validate_url(url: &str, field_name: &str) -> Result<(), String> { + if !url.starts_with("http://") && !url.starts_with("https://") { + return Err(format!( + "Invalid {field_name}: '{url}'. Must start with http:// or https://" + )); + } + Ok(()) +} + +/// Parse a string option value as `usize`, returning a descriptive error. +fn parse_usize_option(value: &str, field_name: &str) -> Result { + value + .parse() + .map_err(|_| format!("Invalid value for '{field_name}': '{value}'")) +} + +/// Parse an optional string as a boolean flag (`"true"` or `"1"` → true). +fn parse_bool_flag(value: Option<&str>) -> bool { + value.is_some_and(|v| v == "true" || v == "1") +} + +/// Check whether the consumed row count has reached or exceeded the limit. +fn should_stop_scanning(consumed: i64, limit: Option) -> bool { + limit.is_some_and(|l| consumed >= l) +} + +/// Extract the effective row from a JSON value, optionally dereferencing an object path. +/// +/// Used in `iter_scan` and `build_column_key_map` to apply `object_path` +/// (e.g., `"/properties"` for GeoJSON) to each row before column matching. +pub(crate) fn extract_effective_row<'a>( + row: &'a JsonValue, + object_path: Option<&str>, +) -> &'a JsonValue { + object_path.map_or(row, |path| row.pointer(path).unwrap_or(row)) +} + impl OpenApiFdw { fn init() { let instance = Self::default(); @@ -99,561 +178,11 @@ impl OpenApiFdw { &mut (*INSTANCE) } } - - /// Fetch and parse the `OpenAPI` spec - fn fetch_spec(&mut self) -> Result<(), FdwError> { - if let Some(ref url) = self.spec_url { - let req = http::Request { - method: http::Method::Get, - url: url.clone(), - headers: self.headers.clone(), - body: String::default(), - }; - let resp = http::get(&req)?; - http::error_for_status(&resp) - .map_err(|err| format!("Failed to fetch OpenAPI spec: {}: {}", err, resp.body))?; - - let spec_json: JsonValue = - serde_json::from_str(&resp.body).map_err(|e| e.to_string())?; - self.spec = Some(OpenApiSpec::from_json(&spec_json)?); - - // Use base_url from spec if not explicitly set - if self.base_url.is_empty() { - if let Some(ref spec) = self.spec { - if let Some(url) = spec.base_url() { - self.base_url = url.trim_end_matches('/').to_string(); - } - } - } - - stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64); - } - Ok(()) - } - - /// Extract a qual value as a string - fn qual_value_to_string(qual: &bindings::supabase::wrappers::types::Qual) -> Option { - if qual.operator() != "=" { - return None; - } - if let Value::Cell(cell) = qual.value() { - match cell { - Cell::String(s) => Some(s), - Cell::I32(n) => Some(n.to_string()), - Cell::I64(n) => Some(n.to_string()), - Cell::F32(n) => Some(n.to_string()), - Cell::F64(n) => Some(n.to_string()), - Cell::Bool(b) => Some(b.to_string()), - _ => None, - } - } else { - None - } - } - - /// Build the URL for a request, handling path parameters and pagination - /// - /// Supports endpoint templates like: - /// - `/users/{user_id}/posts` - /// - `/projects/{org}/{repo}/issues` - /// - `/resources/{type}/{id}` - /// - /// Path parameters are substituted from WHERE clause quals. - /// Returns (url, `path_params`) where `path_params` maps column names to values. - /// - /// # Errors - /// Returns an error if required path parameters are missing from the WHERE clause. - fn build_url( - &self, - ctx: &Context, - ) -> Result<(String, std::collections::HashMap), String> { - // Use next_url for pagination if available - if let Some(ref next_url) = self.next_url { - // Handle relative URLs: - // - Absolute URLs are used as-is - // - Query-only (e.g., "?page=2") resolves against base_url + endpoint - // - Absolute paths (e.g., "/items?page=2") resolve against base_url - // - Bare relative paths (e.g., "page/2") resolve against base_url/ - let url = if next_url.starts_with("http://") || next_url.starts_with("https://") { - next_url.clone() - } else if next_url.starts_with('?') { - let endpoint_base = self.endpoint.split('?').next().unwrap_or(&self.endpoint); - format!("{}{endpoint_base}{next_url}", self.base_url) - } else if next_url.starts_with('/') { - format!("{}{next_url}", self.base_url) - } else { - format!("{}/{next_url}", self.base_url) - }; - return Ok((url, self.path_params.clone())); - } - - let quals = ctx.get_quals(); - let mut extracted_params: std::collections::HashMap = - std::collections::HashMap::new(); - - // Build a map of qual field -> value for path parameter substitution - let mut qual_map: std::collections::HashMap = - std::collections::HashMap::new(); - for qual in &quals { - if let Some(value) = Self::qual_value_to_string(qual) { - // Store both original and lowercase versions for flexible matching - qual_map.insert(qual.field().to_lowercase(), value.clone()); - qual_map.insert(qual.field(), value); - } - } - - // Substitute path parameters in endpoint template - // e.g., /users/{user_id}/posts -> /users/123/posts - let mut endpoint = self.endpoint.clone(); - let mut path_params_used: Vec = Vec::new(); - let mut missing_params: Vec = Vec::new(); - - // Find all {param} patterns and substitute - while let Some(start) = endpoint.find('{') { - if let Some(end) = endpoint[start..].find('}') { - let param_name = &endpoint[start + 1..start + end]; - let param_lower = param_name.to_lowercase(); - - // Try to find matching qual (case-insensitive) - let value = qual_map - .get(¶m_lower) - .or_else(|| qual_map.get(param_name)); - - if let Some(val) = value { - path_params_used.push(param_lower.clone()); - // Store the path param for injection into rows (unencoded for PostgreSQL filter) - extracted_params.insert(param_lower.clone(), val.clone()); - endpoint = format!( - "{}{}{}", - &endpoint[..start], - urlencoding::encode(val), - &endpoint[start + end + 1..] - ); - } else { - // Track missing parameter and remove it from the endpoint to continue - missing_params.push(param_name.to_string()); - endpoint = format!("{}{}", &endpoint[..start], &endpoint[start + end + 1..]); - } - } else { - break; - } - } - - // Return error if any required path parameters are missing - if !missing_params.is_empty() { - return Err(format!( - "Missing required path parameter(s) in WHERE clause: {}. \ - Add WHERE {} to your query.", - missing_params.join(", "), - missing_params - .iter() - .map(|p| format!("{p} = ''")) - .collect::>() - .join(" AND ") - )); - } - - // Check for rowid pushdown for single-resource access - // Only if endpoint doesn't already have path params and rowid qual exists - if path_params_used.is_empty() { - if let Some(id_qual) = quals.iter().find(|q| { - q.field().to_lowercase() == self.rowid_col.to_lowercase() && q.operator() == "=" - }) { - if let Some(id) = Self::qual_value_to_string(id_qual) { - // Store rowid as path param too - extracted_params.insert(self.rowid_col.to_lowercase(), id.clone()); - return Ok(( - format!("{}{}/{}", self.base_url, endpoint, id), - extracted_params, - )); - } - } - } - - let mut base = format!("{}{}", self.base_url, endpoint); - let mut params = Vec::new(); - - // Add pagination cursor if we have one - if let Some(ref cursor) = self.next_cursor { - params.push(format!( - "{}={}", - self.cursor_param, - urlencoding::encode(cursor) - )); - } - - // Add page size if configured - if self.page_size > 0 && !self.page_size_param.is_empty() { - params.push(format!("{}={}", self.page_size_param, self.page_size)); - } - - // Add remaining quals as query params (exclude path params and rowid) - for qual in &quals { - let field_lower = qual.field().to_lowercase(); - - // Skip if used as path param - if path_params_used.contains(&field_lower) { - continue; - } - - // Skip the rowid column - if field_lower == self.rowid_col.to_lowercase() { - continue; - } - - if let Some(value) = Self::qual_value_to_string(qual) { - // Store query param for injection back into rows - // (so PostgreSQL's WHERE filter passes even if the API doesn't echo it back) - extracted_params.insert(field_lower, value.clone()); - params.push(format!( - "{}={}", - urlencoding::encode(&qual.field()), - urlencoding::encode(&value) - )); - } - } - - if !params.is_empty() { - base.push('?'); - base.push_str(¶ms.join("&")); - } - - Ok((base, extracted_params)) - } - - /// Make a request to the API with automatic rate limit handling - fn make_request(&mut self, ctx: &Context) -> FdwResult { - let (url, path_params) = self.build_url(ctx)?; - self.path_params = path_params; - - let req = http::Request { - method: http::Method::Get, - url, - headers: self.headers.clone(), - body: String::default(), - }; - - // Retry loop for rate limiting (HTTP 429) - let mut retry_count = 0; - const MAX_RETRIES: u32 = 3; - - let resp = loop { - let resp = http::get(&req)?; - - // Handle rate limiting (HTTP 429) - if resp.status_code == 429 { - if retry_count >= MAX_RETRIES { - return Err("API rate limit exceeded after max retries".to_string()); - } - - // Try to get retry delay from Retry-After header (case-insensitive) - let delay_ms = resp - .headers - .iter() - .find(|h| h.0.to_lowercase() == "retry-after") - .and_then(|h| h.1.parse::().ok()) - .map(|secs| secs * 1000) - .unwrap_or_else(|| { - // Exponential backoff: 1s, 2s, 4s - 1000 * (1 << retry_count) - }); - - time::sleep(delay_ms); - retry_count += 1; - continue; - } - - break resp; - }; - - // Handle 404 as empty result (no matching resource) - if resp.status_code == 404 { - self.src_rows = Vec::new(); - self.src_idx = 0; - self.next_cursor = None; - self.next_url = None; - return Ok(()); - } - - http::error_for_status(&resp).map_err(|err| format!("{}: {}", err, resp.body))?; - - let mut resp_json: JsonValue = - serde_json::from_str(&resp.body).map_err(|e| e.to_string())?; - - stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64); - - // Handle pagination before extracting data (borrows resp_json) - self.handle_pagination(&resp_json); - - // Extract data by taking ownership (avoids cloning the array) - self.src_rows = self.extract_data(&mut resp_json)?; - self.src_idx = 0; - - Ok(()) - } - - /// Extract the data array from the response, taking ownership to avoid cloning - fn extract_data(&self, resp: &mut JsonValue) -> Result, FdwError> { - // If response_path is specified, use it - if let Some(ref path) = self.response_path { - let data = resp - .pointer_mut(path) - .map(JsonValue::take) - .ok_or_else(|| format!("Response path '{path}' not found in response"))?; - - return Self::json_to_rows(data); - } - - // Direct array response - if resp.is_array() { - return Self::json_to_rows(resp.take()); - } - - // Try common wrapper patterns - if resp.is_object() { - for key in ["data", "results", "items", "records", "entries", "features"] { - if resp.get(key).is_some_and(|d| d.is_array() || d.is_object()) { - return Self::json_to_rows(resp[key].take()); - } - } - - // Single object response - return Ok(vec![resp.take()]); - } - - Err("Unable to extract data from response".to_string()) - } - - /// Convert a JSON value to a vector of row objects (takes ownership, no cloning) - fn json_to_rows(data: JsonValue) -> Result, FdwError> { - match data { - JsonValue::Array(arr) => Ok(arr), - data if data.is_object() => Ok(vec![data]), - _ => Err("Response data is not an array or object".to_string()), - } - } - - /// Handle pagination from the response - fn handle_pagination(&mut self, resp: &JsonValue) { - self.next_cursor = None; - self.next_url = None; - - // Try configured cursor path first - if !self.cursor_path.is_empty() { - if let Some(cursor) = Self::extract_non_empty_string(resp, &self.cursor_path) { - self.next_cursor = Some(cursor); - return; - } - } - - // Only try auto-detection for object responses - if resp.as_object().is_none() { - return; - } - - // Check for next URL in common locations - let next_url_paths = [ - "/meta/pagination/next", - "/meta/pagination/next_url", - "/pagination/next", - "/pagination/next_url", - "/links/next", - "/links/next_url", - "/next", - "/next_url", - "/_links/next/href", - ]; - for path in &next_url_paths { - if let Some(url) = Self::extract_non_empty_string(resp, path) { - self.next_url = Some(url); - return; - } - } - - // Check for has_more flag with cursor - let has_more_paths = [ - "/meta/pagination/has_more", - "/has_more", - "/pagination/has_more", - ]; - let has_more = has_more_paths - .iter() - .find_map(|p| resp.pointer(p)) - .and_then(JsonValue::as_bool) - .unwrap_or(false); - - if !has_more { - return; - } - - // Find next cursor - let cursor_paths = [ - "/meta/pagination/next_cursor", - "/pagination/next_cursor", - "/next_cursor", - "/cursor", - ]; - for path in &cursor_paths { - if let Some(cursor) = Self::extract_non_empty_string(resp, path) { - self.next_cursor = Some(cursor); - return; - } - } - } - - /// Extract a non-empty string from a JSON pointer path - fn extract_non_empty_string(json: &JsonValue, path: &str) -> Option { - json.pointer(path) - .and_then(JsonValue::as_str) - .filter(|s| !s.is_empty()) - .map(ToString::to_string) - } - - /// Convert a JSON value to a Cell based on the target column type - fn json_to_cell( - &self, - src_row: &JsonValue, - tgt_col: &Column, - ) -> Result, FdwError> { - let tgt_col_name = tgt_col.name(); - - // Special handling for 'attrs' column - returns entire row as JSON - if tgt_col_name == "attrs" { - return Ok(Some(Cell::Json(src_row.to_string()))); - } - - // If this column was used as a query/path parameter, inject the WHERE clause - // value directly. This ensures PostgreSQL's post-filter passes even when the - // API returns a different case (e.g. API accepts "actual" but returns "Actual"). - // Coerce the string value to the target column type to avoid type mismatches. - if let Some(value) = self.path_params.get(&tgt_col_name.to_lowercase()) { - let cell = match tgt_col.type_oid() { - TypeOid::Bool => value.parse::().ok().map(Cell::Bool), - TypeOid::I8 => value.parse::().ok().map(Cell::I8), - TypeOid::I16 => value.parse::().ok().map(Cell::I16), - TypeOid::I32 => value.parse::().ok().map(Cell::I32), - TypeOid::I64 => value.parse::().ok().map(Cell::I64), - #[allow(clippy::cast_possible_truncation)] - TypeOid::F32 => value.parse::().ok().map(|v| Cell::F32(v as f32)), - TypeOid::F64 => value.parse::().ok().map(Cell::F64), - TypeOid::Numeric => value.parse::().ok().map(Cell::Numeric), - TypeOid::Date => time::parse_from_rfc3339(value) - .ok() - .map(|ts| Cell::Date(ts / 1_000_000)), - TypeOid::Timestamp => time::parse_from_rfc3339(value).ok().map(Cell::Timestamp), - TypeOid::Timestamptz => time::parse_from_rfc3339(value).ok().map(Cell::Timestamptz), - TypeOid::Json => Some(Cell::Json(value.clone())), - _ => Some(Cell::String(value.clone())), - }; - return Ok(cell.or_else(|| Some(Cell::String(value.clone())))); - } - - // Handle column name matching with multiple strategies: - // 1. Exact match - // 2. snake_case to camelCase conversion - // 3. Case-insensitive match (PostgreSQL lowercases column names) - let src = src_row.as_object().and_then(|obj| { - obj.get(&tgt_col_name) - .or_else(|| { - // Try camelCase version (snake_case to camelCase) - let camel = to_camel_case(&tgt_col_name); - obj.get(&camel) - }) - .or_else(|| { - // Case-insensitive match for when PostgreSQL lowercases column names - obj.iter() - .find(|(k, _)| k.to_lowercase() == tgt_col_name.to_lowercase()) - .map(|(_, v)| v) - }) - }); - - let src = match src { - Some(v) if !v.is_null() => v, - _ => return Ok(None), - }; - - // Type conversion based on target column type - let cell = match tgt_col.type_oid() { - TypeOid::Bool => src.as_bool().map(Cell::Bool), - TypeOid::I8 => src - .as_i64() - .and_then(|v| i8::try_from(v).ok()) - .map(Cell::I8), - TypeOid::I16 => src - .as_i64() - .and_then(|v| i16::try_from(v).ok()) - .map(Cell::I16), - TypeOid::I32 => src - .as_i64() - .and_then(|v| i32::try_from(v).ok()) - .map(Cell::I32), - TypeOid::I64 => src.as_i64().map(Cell::I64), - #[allow(clippy::cast_possible_truncation)] - TypeOid::F32 => src.as_f64().map(|v| Cell::F32(v as f32)), - TypeOid::F64 => src.as_f64().map(Cell::F64), - TypeOid::Numeric => src.as_f64().map(Cell::Numeric), - TypeOid::String => { - // Handle both string and non-string JSON values - Some(Cell::String( - src.as_str() - .map_or_else(|| src.to_string(), ToOwned::to_owned), - )) - } - TypeOid::Date => { - if let Some(s) = src.as_str() { - let ts = time::parse_from_rfc3339(s)?; - Some(Cell::Date(ts / 1_000_000)) - } else { - None - } - } - TypeOid::Timestamp => { - if let Some(s) = src.as_str() { - let ts = time::parse_from_rfc3339(s)?; - Some(Cell::Timestamp(ts)) - } else { - None - } - } - TypeOid::Timestamptz => { - if let Some(s) = src.as_str() { - let ts = time::parse_from_rfc3339(s)?; - Some(Cell::Timestamptz(ts)) - } else { - None - } - } - TypeOid::Uuid => src.as_str().map(|v| Cell::String(v.to_owned())), - // Json and unknown types: serialize to JSON string - TypeOid::Json | TypeOid::Other(_) => Some(Cell::Json(src.to_string())), - }; - - Ok(cell) - } -} - -/// Convert `snake_case` to `camelCase` -fn to_camel_case(s: &str) -> String { - let mut result = String::new(); - let mut capitalize_next = false; - - for c in s.chars() { - if c == '_' { - capitalize_next = true; - } else if capitalize_next { - result.push(c.to_uppercase().next().unwrap_or(c)); - capitalize_next = false; - } else { - result.push(c); - } - } - - result } impl Guest for OpenApiFdw { fn host_version_requirement() -> String { - "^0.1.0".to_string() + HOST_VERSION_REQUIREMENT.to_string() } fn init(ctx: &Context) -> FdwResult { @@ -663,119 +192,64 @@ impl Guest for OpenApiFdw { let opts = ctx.get_options(&OptionsType::Server); // Get base_url (optional if spec_url provides servers) - this.base_url = opts + this.config.base_url = opts .get("base_url") .unwrap_or_default() .trim_end_matches('/') .to_string(); // Validate base_url format if provided - if !this.base_url.is_empty() - && !this.base_url.starts_with("http://") - && !this.base_url.starts_with("https://") - { - return Err(format!( - "Invalid base_url: '{}'. Must start with http:// or https://", - this.base_url - )); + if !this.config.base_url.is_empty() { + validate_url(&this.config.base_url, "base_url")?; } - // Get spec_url for import_foreign_schema - this.spec_url = opts.get("spec_url"); + // Get spec_url / spec_json for import_foreign_schema + this.config.spec_url = opts.get("spec_url"); + this.config.spec_json = opts.get("spec_json"); + + // Validate mutual exclusivity + if this.config.spec_url.is_some() && this.config.spec_json.is_some() { + return Err("Cannot use both spec_url and spec_json. Choose one.".to_string()); + } // Whether to include an 'attrs' jsonb column in IMPORT FOREIGN SCHEMA output - this.include_attrs = opts + this.config.include_attrs = opts .get("include_attrs") .map(|v| v != "false") .unwrap_or(true); // Validate spec_url format if provided - if let Some(ref spec_url) = this.spec_url { - if !spec_url.starts_with("http://") && !spec_url.starts_with("https://") { - return Err(format!( - "Invalid spec_url: '{spec_url}'. Must start with http:// or https://" - )); - } - } - - // Set up headers - this.headers - .push(("content-type".to_owned(), "application/json".to_string())); - - // Optional User-Agent header (some APIs require this for identification) - let user_agent = opts.require_or("user_agent", "Wrappers OpenAPI FDW"); - this.headers.push(("user-agent".to_owned(), user_agent)); - - // Optional Accept header for content negotiation (JSON, XML, JSON-LD, GeoJSON etc.) - if let Some(accept) = opts.get("accept") { - this.headers.push(("accept".to_owned(), accept)); + if let Some(ref spec_url) = this.config.spec_url { + validate_url(spec_url, "spec_url")?; } - // Custom headers as JSON object: '{"Feature-Flags": "value", "X-Custom": "value"}' - if let Some(headers_json) = opts.get("headers") { - let headers: JsonMap = serde_json::from_str(&headers_json) - .map_err(|e| format!("Invalid JSON for 'headers' option: {e}"))?; - for (key, value) in headers { - if let Some(v) = value.as_str() { - this.headers.push((key.to_lowercase(), v.to_string())); - } else { - return Err(format!( - "Invalid non-string value for header '{key}' in 'headers' option" - )); - } - } - } + this.config.configure_headers(&opts)?; + this.config.configure_auth(&opts)?; - // API Key authentication - let api_key = opts.get("api_key").or_else(|| { - opts.get("api_key_id") - .and_then(|key_id| utils::get_vault_secret(&key_id)) - }); - - // Bearer token authentication (alternative to api_key) - let bearer_token = opts.get("bearer_token").or_else(|| { - opts.get("bearer_token_id") - .and_then(|token_id| utils::get_vault_secret(&token_id)) - }); - - // Enforce mutual exclusivity — both would emit duplicate auth headers - if api_key.is_some() && bearer_token.is_some() { - return Err( - "Cannot use both api_key/api_key_id and bearer_token/bearer_token_id. \ - Choose one authentication method." - .to_string(), - ); - } - - if let Some(key) = api_key { - let header_name = opts.require_or("api_key_header", "Authorization"); - let prefix = opts.get("api_key_prefix"); + // Pagination defaults (page_size=0 means no automatic limit parameter) + this.config.page_size = match opts.get("page_size") { + Some(s) => parse_usize_option(&s, "page_size")?, + None => 0, + }; - let header_value = match (header_name.as_str(), prefix) { - ("Authorization", None) => format!("Bearer {key}"), - (_, Some(p)) => format!("{p} {key}"), - (_, None) => key, - }; + this.config.page_size_param = opts.require_or("page_size_param", DEFAULT_PAGE_SIZE_PARAM); + this.config.cursor_param = opts.require_or("cursor_param", DEFAULT_CURSOR_PARAM); - this.headers - .push((header_name.to_lowercase(), header_value)); + // Maximum pages per scan (default 1000, prevents infinite pagination loops) + if let Some(s) = opts.get("max_pages") { + this.config.max_pages = parse_usize_option(&s, "max_pages")?; } - if let Some(token) = bearer_token { - this.headers - .push(("authorization".to_owned(), format!("Bearer {token}"))); + // Maximum response body size (default 50 MiB) + if let Some(s) = opts.get("max_response_bytes") { + this.config.max_response_bytes = parse_usize_option(&s, "max_response_bytes")?; } - // Pagination defaults (page_size=0 means no automatic limit parameter) - this.page_size = match opts.get("page_size") { - Some(s) => s - .parse() - .map_err(|_| format!("Invalid value for 'page_size': '{s}'"))?, - None => 0, - }; + // Debug: emit HTTP details and scan stats via INFO when enabled + this.config.debug = parse_bool_flag(opts.get("debug").as_deref()); - this.page_size_param = opts.require_or("page_size_param", "limit"); - this.cursor_param = opts.require_or("cursor_param", "after"); + // Save server-level pagination defaults for restoration in begin_scan + this.config.save_pagination_defaults(); stats::inc_stats(FDW_NAME, stats::Metric::CreateTimes, 1); @@ -788,39 +262,77 @@ impl Guest for OpenApiFdw { // Get table options this.endpoint = opts.require("endpoint")?; - this.rowid_col = opts.require_or("rowid_column", "id"); + this.rowid_col = opts + .require_or("rowid_column", DEFAULT_ROWID_COLUMN) + .to_lowercase(); + + // HTTP method (default GET, case-insensitive) + this.method = match opts.get("method") { + Some(m) if m.eq_ignore_ascii_case("POST") => http::Method::Post, + _ => http::Method::Get, + }; + + // Request body for POST endpoints + this.request_body = opts.get("request_body").unwrap_or_default(); this.response_path = opts.get("response_path"); this.object_path = opts.get("object_path"); // e.g., "/properties" for GeoJSON this.cursor_path = opts.require_or("cursor_path", ""); + // Restore server-level pagination defaults before applying table overrides + this.config.restore_pagination_defaults(); + // Override pagination params if specified at table level if let Some(param) = opts.get("cursor_param") { - this.cursor_param = param; + this.config.cursor_param = param; } if let Some(param) = opts.get("page_size_param") { - this.page_size_param = param; + this.config.page_size_param = param; } if let Some(size) = opts.get("page_size") { match size.parse() { - Ok(parsed) => this.page_size = parsed, + Ok(parsed) => this.config.page_size = parsed, Err(e) => utils::report_warning(&format!( "Invalid page_size '{}': {}. Using default value {}.", - size, e, this.page_size + size, e, this.config.page_size )), } } - // Reset pagination state - this.next_cursor = None; - this.next_url = None; + // Reset pagination and path param state + this.pagination.reset(); + this.injected_params.clear(); // Capture limit for early pagination stop // Note: Postgres handles offset locally, so we need offset + count total rows this.src_limit = ctx.get_limit().map(|v| v.offset() + v.count()); this.consumed_row_cnt = 0; + // Cache column metadata once to avoid WASM boundary crossings in iter_scan + this.cached_columns = ctx + .get_columns() + .iter() + .map(|col| { + let name = col.name(); + let camel_name = to_camel_case(&name); + let lower_name = name.to_lowercase(); + let alnum_name = normalize_to_alnum(&name); + CachedColumn { + type_oid: col.type_oid(), + name, + camel_name, + lower_name, + alnum_name, + } + }) + .collect(); + + if this.config.debug { + this.scan_row_count = 0; + } + // Make initial request this.make_request(ctx)?; + this.pagination.record_first_page(); Ok(()) } @@ -834,18 +346,31 @@ impl Guest for OpenApiFdw { stats::inc_stats(FDW_NAME, stats::Metric::RowsOut, this.src_rows.len() as i64); // No more pages to fetch - if this.next_cursor.is_none() && this.next_url.is_none() { + if this.pagination.is_exhausted() { return Ok(None); } // Check if limit is satisfied - stop pagination early - if let Some(limit) = this.src_limit { - if this.consumed_row_cnt >= limit { - return Ok(None); - } + if should_stop_scanning(this.consumed_row_cnt, this.src_limit) { + return Ok(None); + } + + // Pagination safety: detect loops and enforce page limit + if this.pagination.exceeds_limit(this.config.max_pages) { + utils::report_warning(&format!( + "Pagination stopped after {} pages (max_pages limit). \ + Increase max_pages server option if needed.", + this.config.max_pages + )); + return Ok(None); + } + if let Some(reason) = this.pagination.detect_loop() { + utils::report_warning(&format!("Pagination stopped: {reason}.")); + return Ok(None); } // Fetch next page + this.pagination.advance(); this.make_request(ctx)?; // If still no data after fetch, we're done @@ -856,54 +381,67 @@ impl Guest for OpenApiFdw { // Convert current row (apply object_path if set, e.g., "/properties" for GeoJSON) let src_row = &this.src_rows[this.src_idx]; - let effective_row = this - .object_path - .as_ref() - .map_or(src_row, |path| src_row.pointer(path).unwrap_or(src_row)); - for tgt_col in ctx.get_columns() { - let cell = this.json_to_cell(effective_row, &tgt_col)?; + let effective_row = extract_effective_row(src_row, this.object_path.as_deref()); + for (col_idx, _) in this.cached_columns.iter().enumerate() { + let cell = this.json_to_cell_cached(effective_row, col_idx)?; row.push(cell.as_ref()); } this.src_idx += 1; this.consumed_row_cnt += 1; + if this.config.debug { + this.scan_row_count += 1; + } Ok(Some(0)) } fn re_scan(ctx: &Context) -> FdwResult { let this = Self::this_mut(); - this.next_cursor = None; - this.next_url = None; + this.pagination.reset(); this.consumed_row_cnt = 0; - this.make_request(ctx) + this.injected_params.clear(); + this.make_request(ctx)?; + this.pagination.record_first_page(); + Ok(()) } fn end_scan(_ctx: &Context) -> FdwResult { let this = Self::this_mut(); + + if this.config.debug { + utils::report_info(&format!( + "[openapi_fdw] Scan complete: {} rows, {} columns", + this.scan_row_count, + this.cached_columns.len() + )); + } + this.src_rows.clear(); this.src_idx = 0; + this.cached_columns.clear(); + this.column_key_map.clear(); Ok(()) } fn begin_modify(_ctx: &Context) -> FdwResult { - Err("OpenAPI FDW is read-only".to_string()) + Err(READ_ONLY_ERROR.to_string()) } fn insert(_ctx: &Context, _row: &Row) -> FdwResult { - Err("OpenAPI FDW is read-only".to_string()) + Err(READ_ONLY_ERROR.to_string()) } fn update(_ctx: &Context, _rowid: Cell, _row: &Row) -> FdwResult { - Err("OpenAPI FDW is read-only".to_string()) + Err(READ_ONLY_ERROR.to_string()) } fn delete(_ctx: &Context, _rowid: Cell) -> FdwResult { - Err("OpenAPI FDW is read-only".to_string()) + Err(READ_ONLY_ERROR.to_string()) } fn end_modify(_ctx: &Context) -> FdwResult { - Err("OpenAPI FDW is read-only".to_string()) + Err(READ_ONLY_ERROR.to_string()) } fn import_foreign_schema( @@ -920,7 +458,7 @@ impl Guest for OpenApiFdw { let spec = this .spec .as_ref() - .ok_or("No OpenAPI spec available. Set spec_url in server options.")?; + .ok_or("No OpenAPI spec available. Set spec_url or spec_json in server options.")?; // Determine filter based on import statement let (filter, exclude) = match stmt.list_type { @@ -929,8 +467,13 @@ impl Guest for OpenApiFdw { ImportSchemaType::Except => (Some(stmt.table_list.as_slice()), true), }; - let tables = - generate_all_tables(spec, &stmt.server_name, filter, exclude, this.include_attrs); + let tables = generate_all_tables( + spec, + &stmt.server_name, + filter, + exclude, + this.config.include_attrs, + ); Ok(tables) } @@ -939,172 +482,5 @@ impl Guest for OpenApiFdw { bindings::export!(OpenApiFdw with_types_in bindings); #[cfg(test)] -mod tests { - use super::*; - - // --- json_to_rows tests --- - - #[test] - fn test_json_to_rows_array() { - let data = serde_json::json!([ - {"id": 1, "name": "alice"}, - {"id": 2, "name": "bob"}, - {"id": 3, "name": "charlie"} - ]); - let rows = OpenApiFdw::json_to_rows(data).unwrap(); - assert_eq!(rows.len(), 3); - assert_eq!(rows[0]["id"], 1); - assert_eq!(rows[2]["name"], "charlie"); - } - - #[test] - fn test_json_to_rows_single_object() { - let data = serde_json::json!({"id": 1, "name": "alice"}); - let rows = OpenApiFdw::json_to_rows(data).unwrap(); - assert_eq!(rows.len(), 1); - assert_eq!(rows[0]["name"], "alice"); - } - - #[test] - fn test_json_to_rows_empty_array() { - let data = serde_json::json!([]); - let rows = OpenApiFdw::json_to_rows(data).unwrap(); - assert!(rows.is_empty()); - } - - #[test] - fn test_json_to_rows_rejects_primitive() { - let data = serde_json::json!("just a string"); - assert!(OpenApiFdw::json_to_rows(data).is_err()); - } - - // --- extract_data tests --- - - fn fdw_with_response_path(path: Option<&str>) -> OpenApiFdw { - OpenApiFdw { - response_path: path.map(String::from), - ..Default::default() - } - } - - #[test] - fn test_extract_data_with_response_path() { - let fdw = fdw_with_response_path(Some("/features")); - let mut resp = serde_json::json!({ - "type": "FeatureCollection", - "features": [ - {"properties": {"id": "a"}}, - {"properties": {"id": "b"}} - ] - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 2); - // Original is taken, not cloned - assert!(resp["features"].is_null()); - } - - #[test] - fn test_extract_data_with_nested_response_path() { - let fdw = fdw_with_response_path(Some("/result/data")); - let mut resp = serde_json::json!({ - "result": { - "data": [{"id": 1}, {"id": 2}, {"id": 3}] - } - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 3); - } - - #[test] - fn test_extract_data_invalid_response_path() { - let fdw = fdw_with_response_path(Some("/nonexistent")); - let mut resp = serde_json::json!({"data": [1, 2, 3]}); - assert!(fdw.extract_data(&mut resp).is_err()); - } - - #[test] - fn test_extract_data_direct_array() { - let fdw = fdw_with_response_path(None); - let mut resp = serde_json::json!([{"id": 1}, {"id": 2}]); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 2); - } - - #[test] - fn test_extract_data_auto_detect_data_key() { - let fdw = fdw_with_response_path(None); - let mut resp = serde_json::json!({ - "data": [{"id": 1}, {"id": 2}], - "meta": {"total": 2} - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 2); - assert!(resp["data"].is_null()); - } - - #[test] - fn test_extract_data_auto_detect_results_key() { - let fdw = fdw_with_response_path(None); - let mut resp = serde_json::json!({ - "results": [{"id": "x"}], - "count": 1 - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 1); - assert_eq!(rows[0]["id"], "x"); - } - - #[test] - fn test_extract_data_auto_detect_features_key() { - let fdw = fdw_with_response_path(None); - let mut resp = serde_json::json!({ - "type": "FeatureCollection", - "features": [ - {"type": "Feature", "properties": {"name": "A"}}, - {"type": "Feature", "properties": {"name": "B"}} - ] - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 2); - } - - #[test] - fn test_extract_data_single_object_fallback() { - let fdw = fdw_with_response_path(None); - let mut resp = serde_json::json!({ - "id": "abc", - "name": "singleton" - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 1); - assert_eq!(rows[0]["id"], "abc"); - } - - #[test] - fn test_extract_data_ownership_no_clone() { - // Verify that extract_data takes ownership rather than cloning: - // after extraction, the original data should be replaced with null - let fdw = fdw_with_response_path(Some("/items")); - let mut resp = serde_json::json!({ - "items": [ - {"id": 1, "payload": "x".repeat(1000)}, - {"id": 2, "payload": "y".repeat(1000)} - ] - }); - let rows = fdw.extract_data(&mut resp).unwrap(); - assert_eq!(rows.len(), 2); - assert_eq!(rows[0]["payload"].as_str().unwrap().len(), 1000); - // The original value was taken, not cloned - assert!(resp.pointer("/items").unwrap().is_null()); - } - - // --- to_camel_case tests --- - - #[test] - fn test_to_camel_case() { - assert_eq!(to_camel_case("snake_case"), "snakeCase"); - assert_eq!(to_camel_case("already"), "already"); - assert_eq!(to_camel_case("multi_word_name"), "multiWordName"); - assert_eq!(to_camel_case(""), ""); - } -} +#[path = "lib_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/lib_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/lib_tests.rs new file mode 100644 index 00000000..b7a81234 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/lib_tests.rs @@ -0,0 +1,181 @@ +use super::*; +use config::{DEFAULT_MAX_PAGES, DEFAULT_MAX_RESPONSE_BYTES}; + +// --- Cross-cutting default tests --- + +#[test] +fn test_max_response_bytes_default() { + let fdw = OpenApiFdw::default(); + assert_eq!(fdw.config.max_response_bytes, DEFAULT_MAX_RESPONSE_BYTES); +} + +#[test] +fn test_pagination_safety_defaults() { + let fdw = OpenApiFdw::default(); + assert_eq!(fdw.config.max_pages, DEFAULT_MAX_PAGES); + assert_eq!(fdw.pagination.pages_fetched, 0); + assert!(fdw.pagination.previous.is_none()); + assert!(fdw.pagination.next.is_none()); +} + +// --- validate_url --- + +#[test] +fn test_validate_url_https() { + assert!(validate_url("https://api.example.com", "base_url").is_ok()); +} + +#[test] +fn test_validate_url_http() { + assert!(validate_url("http://localhost:8080", "base_url").is_ok()); +} + +#[test] +fn test_validate_url_no_scheme() { + let err = validate_url("api.example.com", "base_url").unwrap_err(); + assert!(err.contains("Invalid base_url")); + assert!(err.contains("api.example.com")); + assert!(err.contains("http://")); +} + +#[test] +fn test_validate_url_ftp_scheme() { + let err = validate_url("ftp://files.example.com", "spec_url").unwrap_err(); + assert!(err.contains("Invalid spec_url")); +} + +#[test] +fn test_validate_url_empty_string() { + let err = validate_url("", "base_url").unwrap_err(); + assert!(err.contains("Invalid base_url")); +} + +// --- parse_usize_option --- + +#[test] +fn test_parse_usize_option_valid() { + assert_eq!(parse_usize_option("100", "page_size").unwrap(), 100); +} + +#[test] +fn test_parse_usize_option_zero() { + assert_eq!(parse_usize_option("0", "page_size").unwrap(), 0); +} + +#[test] +fn test_parse_usize_option_large() { + assert_eq!( + parse_usize_option("52428800", "max_response_bytes").unwrap(), + 52_428_800 + ); +} + +#[test] +fn test_parse_usize_option_negative() { + let err = parse_usize_option("-1", "max_pages").unwrap_err(); + assert!(err.contains("Invalid value for 'max_pages'")); + assert!(err.contains("-1")); +} + +#[test] +fn test_parse_usize_option_not_a_number() { + let err = parse_usize_option("abc", "page_size").unwrap_err(); + assert!(err.contains("Invalid value for 'page_size'")); + assert!(err.contains("abc")); +} + +#[test] +fn test_parse_usize_option_float() { + let err = parse_usize_option("3.14", "page_size").unwrap_err(); + assert!(err.contains("Invalid value for 'page_size'")); +} + +// --- parse_bool_flag --- + +#[test] +fn test_parse_bool_flag_true() { + assert!(parse_bool_flag(Some("true"))); +} + +#[test] +fn test_parse_bool_flag_one() { + assert!(parse_bool_flag(Some("1"))); +} + +#[test] +fn test_parse_bool_flag_false() { + assert!(!parse_bool_flag(Some("false"))); +} + +#[test] +fn test_parse_bool_flag_zero() { + assert!(!parse_bool_flag(Some("0"))); +} + +#[test] +fn test_parse_bool_flag_none() { + assert!(!parse_bool_flag(None)); +} + +#[test] +fn test_parse_bool_flag_random_string() { + assert!(!parse_bool_flag(Some("yes"))); +} + +// --- should_stop_scanning --- + +#[test] +fn test_should_stop_scanning_no_limit() { + assert!(!should_stop_scanning(100, None)); +} + +#[test] +fn test_should_stop_scanning_below_limit() { + assert!(!should_stop_scanning(5, Some(10))); +} + +#[test] +fn test_should_stop_scanning_at_limit() { + assert!(should_stop_scanning(10, Some(10))); +} + +#[test] +fn test_should_stop_scanning_above_limit() { + assert!(should_stop_scanning(15, Some(10))); +} + +#[test] +fn test_should_stop_scanning_zero_consumed() { + assert!(!should_stop_scanning(0, Some(10))); +} + +// --- extract_effective_row --- + +#[test] +fn test_extract_effective_row_no_path() { + let row = serde_json::json!({"name": "Alice"}); + let result = extract_effective_row(&row, None); + assert_eq!(result, &row); +} + +#[test] +fn test_extract_effective_row_with_path() { + let row = serde_json::json!({"properties": {"name": "Alice"}, "type": "Feature"}); + let result = extract_effective_row(&row, Some("/properties")); + assert_eq!(result, &serde_json::json!({"name": "Alice"})); +} + +#[test] +fn test_extract_effective_row_missing_path() { + let row = serde_json::json!({"name": "Alice"}); + let result = extract_effective_row(&row, Some("/nonexistent")); + // Falls back to the original row when path doesn't exist + assert_eq!(result, &row); +} + +#[test] +fn test_extract_effective_row_nested_path() { + let row = serde_json::json!({"a": {"b": {"c": 42}}}); + let result = extract_effective_row(&row, Some("/a/b")); + assert_eq!(result, &serde_json::json!({"c": 42})); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs b/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs new file mode 100644 index 00000000..f212197e --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs @@ -0,0 +1,101 @@ +//! Pagination state tracking and loop detection + +/// A pagination token: either a cursor string or a full/partial URL. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum PaginationToken { + /// Token-based pagination (e.g., Stripe `next_cursor`) + Cursor(String), + /// Link-based pagination (e.g., GitHub `Link` header, HAL `_links`) + Url(String), +} + +impl PaginationToken { + /// Returns the inner cursor string, or `None` if this is a URL. + pub(crate) fn as_cursor(&self) -> Option<&str> { + match self { + Self::Cursor(s) => Some(s), + Self::Url(_) => None, + } + } + + /// Returns the inner URL string, or `None` if this is a cursor. + pub(crate) fn as_url(&self) -> Option<&str> { + match self { + Self::Url(s) => Some(s), + Self::Cursor(_) => None, + } + } +} + +/// Tracks pagination state across pages within a single scan. +/// +/// Detects infinite loops (duplicate token) and enforces page limits. +#[derive(Debug, Default)] +pub(crate) struct PaginationState { + /// Token for the next page (cursor or URL) + pub(crate) next: Option, + /// Token from the previous page (for loop detection) + pub(crate) previous: Option, + /// Number of pages fetched so far + pub(crate) pages_fetched: usize, +} + +impl PaginationState { + /// Reset all pagination state for a new scan. + pub(crate) fn reset(&mut self) { + self.next = None; + self.previous = None; + self.pages_fetched = 0; + } + + /// Returns `true` when there are no more pages to fetch. + pub(crate) fn is_exhausted(&self) -> bool { + self.next.is_none() + } + + /// Detect a pagination loop (duplicate token). + /// + /// Returns a human-readable reason if a loop is detected. + pub(crate) fn detect_loop(&self) -> Option<&'static str> { + match (&self.next, &self.previous) { + (Some(PaginationToken::Cursor(n)), Some(PaginationToken::Cursor(p))) if n == p => { + Some("duplicate cursor detected (possible infinite loop)") + } + (Some(PaginationToken::Url(n)), Some(PaginationToken::Url(p))) if n == p => { + Some("duplicate URL detected (possible infinite loop)") + } + _ => None, + } + } + + /// Returns `true` if the page limit has been reached. + pub(crate) fn exceeds_limit(&self, max_pages: usize) -> bool { + self.pages_fetched >= max_pages + } + + /// Save current next value as previous (for loop detection) and increment page count. + /// + /// Call this before fetching each subsequent page. + pub(crate) fn advance(&mut self) { + self.previous = self.next.clone(); + self.pages_fetched += 1; + } + + /// Record the first page after initial `make_request` in `begin_scan`. + /// + /// Only sets `pages_fetched = 1`. Does NOT copy `next` into `previous` — + /// there was no token sent for the first page, so `previous` must stay + /// `None` to avoid a false-positive loop detection. + pub(crate) fn record_first_page(&mut self) { + self.pages_fetched = 1; + } + + /// Clear next-page token (e.g., on 404 or empty response). + pub(crate) fn clear_next(&mut self) { + self.next = None; + } +} + +#[cfg(test)] +#[path = "pagination_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/pagination_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/pagination_tests.rs new file mode 100644 index 00000000..4ff8741e --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/pagination_tests.rs @@ -0,0 +1,575 @@ +use super::{PaginationState, PaginationToken}; + +// --- Default state --- + +#[test] +fn test_default_state_is_exhausted() { + let state = PaginationState::default(); + assert!(state.is_exhausted()); +} + +#[test] +fn test_default_state_no_loop() { + let state = PaginationState::default(); + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_default_state_zero_pages() { + let state = PaginationState::default(); + assert_eq!(state.pages_fetched, 0); +} + +// --- is_exhausted --- + +#[test] +fn test_not_exhausted_with_cursor() { + let state = PaginationState { + next: Some(PaginationToken::Cursor("abc".to_string())), + ..Default::default() + }; + assert!(!state.is_exhausted()); +} + +#[test] +fn test_not_exhausted_with_url() { + let state = PaginationState { + next: Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + ..Default::default() + }; + assert!(!state.is_exhausted()); +} + +#[test] +fn test_exhausted_when_none() { + let state = PaginationState { + next: None, + previous: Some(PaginationToken::Cursor("old".to_string())), // prev doesn't matter + pages_fetched: 5, + }; + assert!(state.is_exhausted()); +} + +// --- detect_loop --- + +#[test] +fn test_detect_loop_duplicate_cursor() { + let state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_abc".to_string())), + previous: Some(PaginationToken::Cursor("cursor_abc".to_string())), + ..Default::default() + }; + let reason = state.detect_loop(); + assert!(reason.is_some()); + assert!(reason.unwrap().contains("duplicate cursor")); +} + +#[test] +fn test_detect_loop_duplicate_url() { + let state = PaginationState { + next: Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + previous: Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + ..Default::default() + }; + let reason = state.detect_loop(); + assert!(reason.is_some()); + assert!(reason.unwrap().contains("duplicate URL")); +} + +#[test] +fn test_no_loop_different_cursors() { + let state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_2".to_string())), + previous: Some(PaginationToken::Cursor("cursor_1".to_string())), + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_no_loop_different_urls() { + let state = PaginationState { + next: Some(PaginationToken::Url( + "https://api.example.com/items?page=3".to_string(), + )), + previous: Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_no_loop_when_next_none() { + // None next should never be a loop + let state = PaginationState { + next: None, + previous: None, + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_no_loop_cursor_set_prev_none() { + // First page: cursor set but no previous yet + let state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_1".to_string())), + previous: None, + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_no_loop_url_set_prev_none() { + let state = PaginationState { + next: Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + previous: None, + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_no_loop_different_token_types() { + // Cursor vs URL should never match as a loop + let state = PaginationState { + next: Some(PaginationToken::Cursor("same_value".to_string())), + previous: Some(PaginationToken::Url("same_value".to_string())), + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} + +// --- exceeds_limit --- + +#[test] +fn test_exceeds_limit_at_boundary() { + let state = PaginationState { + pages_fetched: 10, + ..Default::default() + }; + assert!(state.exceeds_limit(10)); +} + +#[test] +fn test_exceeds_limit_over() { + let state = PaginationState { + pages_fetched: 15, + ..Default::default() + }; + assert!(state.exceeds_limit(10)); +} + +#[test] +fn test_does_not_exceed_limit_under() { + let state = PaginationState { + pages_fetched: 9, + ..Default::default() + }; + assert!(!state.exceeds_limit(10)); +} + +#[test] +fn test_does_not_exceed_limit_zero_pages() { + let state = PaginationState::default(); + assert!(!state.exceeds_limit(10)); +} + +#[test] +fn test_exceeds_limit_max_pages_one() { + let state = PaginationState { + pages_fetched: 1, + ..Default::default() + }; + assert!(state.exceeds_limit(1)); +} + +#[test] +fn test_exceeds_limit_zero_max_pages() { + // max_pages=0 means every page count exceeds the limit + let state = PaginationState::default(); + assert!(state.exceeds_limit(0)); +} + +// --- reset --- + +#[test] +fn test_reset_clears_all_state() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_5".to_string())), + previous: Some(PaginationToken::Cursor("cursor_4".to_string())), + pages_fetched: 5, + }; + state.reset(); + assert!(state.next.is_none()); + assert!(state.previous.is_none()); + assert_eq!(state.pages_fetched, 0); +} + +#[test] +fn test_reset_already_default() { + let mut state = PaginationState::default(); + state.reset(); // should be idempotent + assert!(state.is_exhausted()); + assert_eq!(state.pages_fetched, 0); +} + +#[test] +fn test_reset_makes_exhausted() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("abc".to_string())), + pages_fetched: 3, + ..Default::default() + }; + assert!(!state.is_exhausted()); + state.reset(); + assert!(state.is_exhausted()); +} + +// --- advance --- + +#[test] +fn test_advance_copies_cursor_to_prev() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_2".to_string())), + previous: None, + pages_fetched: 1, + }; + state.advance(); + assert_eq!( + state.previous, + Some(PaginationToken::Cursor("cursor_2".to_string())) + ); + assert_eq!( + state.next, + Some(PaginationToken::Cursor("cursor_2".to_string())) + ); // next unchanged + assert_eq!(state.pages_fetched, 2); +} + +#[test] +fn test_advance_copies_url_to_prev() { + let mut state = PaginationState { + next: Some(PaginationToken::Url( + "https://api.example.com/items?page=3".to_string(), + )), + previous: None, + pages_fetched: 1, + }; + state.advance(); + assert_eq!( + state.previous, + Some(PaginationToken::Url( + "https://api.example.com/items?page=3".to_string() + )) + ); + assert_eq!(state.pages_fetched, 2); +} + +#[test] +fn test_advance_increments_page_count() { + let mut state = PaginationState::default(); + assert_eq!(state.pages_fetched, 0); + state.advance(); + assert_eq!(state.pages_fetched, 1); + state.advance(); + assert_eq!(state.pages_fetched, 2); + state.advance(); + assert_eq!(state.pages_fetched, 3); +} + +#[test] +fn test_advance_overwrites_prev() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_3".to_string())), + previous: Some(PaginationToken::Cursor("cursor_1".to_string())), + pages_fetched: 2, + }; + state.advance(); + assert_eq!( + state.previous, + Some(PaginationToken::Cursor("cursor_3".to_string())) + ); +} + +#[test] +fn test_advance_clears_prev_when_next_is_none() { + let mut state = PaginationState { + next: None, + previous: Some(PaginationToken::Cursor("old_cursor".to_string())), + pages_fetched: 1, + }; + state.advance(); + assert!(state.previous.is_none()); + assert_eq!(state.pages_fetched, 2); +} + +// --- record_first_page --- + +#[test] +fn test_record_first_page_sets_count_to_one() { + let mut state = PaginationState::default(); + state.record_first_page(); + assert_eq!(state.pages_fetched, 1); +} + +#[test] +fn test_record_first_page_does_not_set_prev() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_1".to_string())), + ..Default::default() + }; + state.record_first_page(); + // prev must stay None to avoid false loop detection on page 2 + assert!(state.previous.is_none()); + assert_eq!(state.pages_fetched, 1); +} + +#[test] +fn test_record_first_page_preserves_next() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_1".to_string())), + ..Default::default() + }; + state.record_first_page(); + assert_eq!( + state.next, + Some(PaginationToken::Cursor("cursor_1".to_string())) + ); +} + +// --- clear_next --- + +#[test] +fn test_clear_next_removes_token() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_5".to_string())), + previous: Some(PaginationToken::Cursor("cursor_4".to_string())), + pages_fetched: 5, + }; + state.clear_next(); + assert!(state.next.is_none()); + // prev and pages_fetched should be untouched + assert_eq!( + state.previous, + Some(PaginationToken::Cursor("cursor_4".to_string())) + ); + assert_eq!(state.pages_fetched, 5); +} + +#[test] +fn test_clear_next_makes_exhausted() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("abc".to_string())), + ..Default::default() + }; + assert!(!state.is_exhausted()); + state.clear_next(); + assert!(state.is_exhausted()); +} + +#[test] +fn test_clear_next_already_none() { + let mut state = PaginationState::default(); + state.clear_next(); // should be idempotent + assert!(state.is_exhausted()); +} + +// --- PaginationToken accessors --- + +#[test] +fn test_token_as_cursor() { + let token = PaginationToken::Cursor("abc".to_string()); + assert_eq!(token.as_cursor(), Some("abc")); + assert_eq!(token.as_url(), None); +} + +#[test] +fn test_token_as_url() { + let token = PaginationToken::Url("https://example.com".to_string()); + assert_eq!(token.as_url(), Some("https://example.com")); + assert_eq!(token.as_cursor(), None); +} + +// --- State machine sequences --- + +#[test] +fn test_full_pagination_lifecycle_cursor() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("cursor_1".to_string())), + ..Default::default() + }; + + // After begin_scan: API returns first page with cursor + state.record_first_page(); + assert_eq!(state.pages_fetched, 1); + assert!(!state.is_exhausted()); + assert!(state.detect_loop().is_none()); // prev is still None + + // Before page 2: advance saves cursor_1 as prev + state.advance(); + assert_eq!( + state.previous, + Some(PaginationToken::Cursor("cursor_1".to_string())) + ); + assert_eq!(state.pages_fetched, 2); + + // Page 2 returns new cursor + state.next = Some(PaginationToken::Cursor("cursor_2".to_string())); + assert!(state.detect_loop().is_none()); // cursor_2 != cursor_1 + + // Before page 3: advance saves cursor_2 as prev + state.advance(); + assert_eq!( + state.previous, + Some(PaginationToken::Cursor("cursor_2".to_string())) + ); + assert_eq!(state.pages_fetched, 3); + + // Page 3 returns same cursor (loop!) + state.next = Some(PaginationToken::Cursor("cursor_2".to_string())); + assert!(state.detect_loop().is_some()); + + // Reset for re_scan + state.reset(); + assert!(state.is_exhausted()); + assert_eq!(state.pages_fetched, 0); + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_full_pagination_lifecycle_url() { + let mut state = PaginationState { + next: Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + ..Default::default() + }; + + // First page: API returns next URL + state.record_first_page(); + assert!(!state.is_exhausted()); + assert!(state.detect_loop().is_none()); + + // Before page 2 + state.advance(); + assert_eq!( + state.previous, + Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string() + )) + ); + + // Page 2: next URL points to page 3 + state.next = Some(PaginationToken::Url( + "https://api.example.com/items?page=3".to_string(), + )); + assert!(state.detect_loop().is_none()); + + // Before page 3 + state.advance(); + + // Page 3: no next URL (last page) + state.next = None; + assert!(state.is_exhausted()); + assert!(state.detect_loop().is_none()); +} + +#[test] +fn test_page_limit_enforcement_in_lifecycle() { + let mut state = PaginationState::default(); + let max_pages = 3; + + state.next = Some(PaginationToken::Cursor("c1".to_string())); + state.record_first_page(); + assert!(!state.exceeds_limit(max_pages)); // 1 < 3 + + state.advance(); + state.next = Some(PaginationToken::Cursor("c2".to_string())); + assert!(!state.exceeds_limit(max_pages)); // 2 < 3 + + state.advance(); + state.next = Some(PaginationToken::Cursor("c3".to_string())); + assert!(state.exceeds_limit(max_pages)); // 3 >= 3 +} + +#[test] +fn test_clear_next_after_404() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("c1".to_string())), + ..Default::default() + }; + state.record_first_page(); + state.advance(); + + // Simulate 404 response: clear next token + state.next = Some(PaginationToken::Cursor("c2".to_string())); + state.clear_next(); + assert!(state.is_exhausted()); + // pages_fetched should still reflect actual fetches + assert_eq!(state.pages_fetched, 2); +} + +#[test] +fn test_reset_then_new_scan() { + let mut state = PaginationState { + next: Some(PaginationToken::Cursor("old".to_string())), + previous: Some(PaginationToken::Cursor("older".to_string())), + pages_fetched: 10, + }; + + state.reset(); + + // Simulate new scan + state.next = Some(PaginationToken::Url( + "https://new-api.example.com/data?page=2".to_string(), + )); + state.record_first_page(); + assert_eq!(state.pages_fetched, 1); + assert!(!state.is_exhausted()); + assert!(state.detect_loop().is_none()); +} + +// --- Edge cases --- + +#[test] +fn test_empty_string_cursor_is_not_none() { + // Empty string cursor is still Some, NOT exhausted + let state = PaginationState { + next: Some(PaginationToken::Cursor(String::new())), + ..Default::default() + }; + assert!(!state.is_exhausted()); +} + +#[test] +fn test_empty_string_cursor_duplicate_detection() { + // Two empty string cursors should still be detected as a loop + let state = PaginationState { + next: Some(PaginationToken::Cursor(String::new())), + previous: Some(PaginationToken::Cursor(String::new())), + ..Default::default() + }; + assert!(state.detect_loop().is_some()); +} + +#[test] +fn test_whitespace_cursors_are_distinct() { + let state = PaginationState { + next: Some(PaginationToken::Cursor(" ".to_string())), + previous: Some(PaginationToken::Cursor(" ".to_string())), + ..Default::default() + }; + assert!(state.detect_loop().is_none()); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request.rs b/wasm-wrappers/fdw/openapi_fdw/src/request.rs new file mode 100644 index 00000000..f6730bd1 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/request.rs @@ -0,0 +1,512 @@ +//! HTTP request building, URL construction, and API communication + +use std::collections::HashMap; + +use serde_json::Value as JsonValue; + +use crate::bindings::supabase::wrappers::{ + http, stats, time, + types::{Cell, Context, FdwError, FdwResult, Value}, +}; +use crate::spec::OpenApiSpec; +use crate::{FDW_NAME, OpenApiFdw}; + +const RETRY_AFTER_HEADER: &str = "retry-after"; + +/// Extract the origin (scheme://authority) from a URL for same-origin comparison. +/// Returns everything up to (but not including) the first `/` after `://`. +fn extract_origin(url: &str) -> &str { + if let Some(scheme_end) = url.find("://") { + let rest = &url[scheme_end + 3..]; + if let Some(slash) = rest.find('/') { + &url[..scheme_end + 3 + slash] + } else { + url + } + } else { + url + } +} + +/// Redact a query parameter value from a URL for safe logging. +/// Replaces the value of the named parameter with `[REDACTED]`. +fn redact_query_param(url: &str, param_name: &str) -> String { + let encoded_prefix = format!("{}=", urlencoding::encode(param_name)); + if let Some(start) = url.find(&encoded_prefix) { + let value_start = start + encoded_prefix.len(); + let value_end = url[value_start..] + .find('&') + .map_or(url.len(), |i| value_start + i); + format!("{}[REDACTED]{}", &url[..value_start], &url[value_end..]) + } else { + url.to_string() + } +} + +impl OpenApiFdw { + /// Fetch and parse the `OpenAPI` spec + pub(crate) fn fetch_spec(&mut self) -> Result<(), FdwError> { + if let Some(ref url) = self.config.spec_url { + let req = http::Request { + method: http::Method::Get, + url: url.clone(), + headers: self.config.headers.clone(), + body: String::default(), + }; + let resp = http::get(&req)?; + http::error_for_status(&resp).map_err(|_| { + // Discard opaque error body — may contain URL with credentials + format!("Failed to fetch OpenAPI spec (HTTP {})", resp.status_code) + })?; + + if resp.body.len() > self.config.max_response_bytes { + return Err(format!( + "OpenAPI spec too large: {} bytes (limit: {} bytes). \ + Increase max_response_bytes server option if needed.", + resp.body.len(), + self.config.max_response_bytes + )); + } + + let spec_json: JsonValue = + serde_json::from_str(&resp.body).map_err(|e| e.to_string())?; + let spec = OpenApiSpec::from_json(spec_json)?; + + // Use base_url from spec if not explicitly set + if self.config.base_url.is_empty() { + if let Some(url) = spec.base_url() { + self.config.base_url = url.trim_end_matches('/').to_string(); + } + } + + self.spec = Some(spec); + stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64); + } else if let Some(ref raw_json) = self.config.spec_json { + if raw_json.len() > self.config.max_response_bytes { + return Err(format!( + "OpenAPI spec_json too large: {} bytes (limit: {} bytes). \ + Increase max_response_bytes server option if needed.", + raw_json.len(), + self.config.max_response_bytes + )); + } + + let spec_json: JsonValue = + serde_json::from_str(raw_json).map_err(|e| format!("Invalid spec_json: {e}"))?; + let spec = OpenApiSpec::from_json(spec_json)?; + + if self.config.base_url.is_empty() { + if let Some(url) = spec.base_url() { + self.config.base_url = url.trim_end_matches('/').to_string(); + } + } + + self.spec = Some(spec); + } + Ok(()) + } + + /// Extract a qual value as a string + pub(crate) fn qual_value_to_string( + qual: &crate::bindings::supabase::wrappers::types::Qual, + ) -> Option { + if qual.operator() != "=" { + return None; + } + if let Value::Cell(cell) = qual.value() { + match cell { + Cell::String(s) => Some(s), + Cell::I32(n) => Some(n.to_string()), + Cell::I64(n) => Some(n.to_string()), + Cell::F32(n) => Some(n.to_string()), + Cell::F64(n) => Some(n.to_string()), + Cell::Bool(b) => Some(b.to_string()), + _ => None, + } + } else { + None + } + } + + /// Resolve a relative or absolute pagination URL against the base URL and endpoint. + /// + /// Handles four forms of `next_url`: + /// - Absolute URLs (`http://...`, `https://...`) → validated against `base_url` origin + /// - Query-only (`?page=2`) → resolves against `base_url + endpoint` + /// - Absolute paths (`/items?page=2`) → resolves against `base_url` + /// - Bare relative paths (`page/2`) → resolves against `base_url/` + /// + /// # Errors + /// Returns an error if an absolute pagination URL points to a different origin + /// than `base_url`, which would leak authentication credentials to a third party. + pub(crate) fn resolve_pagination_url(&self, next_url: &str) -> Result { + if next_url.starts_with("http://") || next_url.starts_with("https://") { + let next_origin = extract_origin(next_url); + let base_origin = extract_origin(&self.config.base_url); + if !next_origin.eq_ignore_ascii_case(base_origin) { + return Err(format!( + "Pagination URL origin mismatch: API returned '{next_origin}' \ + but base_url is '{base_origin}'. Cross-origin pagination URLs are \ + rejected to prevent credential leakage. If this API legitimately \ + uses a different host for pagination, set base_url to match \ + the pagination host." + )); + } + Ok(next_url.to_string()) + } else if next_url.starts_with('?') { + let endpoint_base = self.endpoint.split('?').next().unwrap_or(&self.endpoint); + Ok(format!("{}{endpoint_base}{next_url}", self.config.base_url)) + } else if next_url.starts_with('/') { + Ok(format!("{}{next_url}", self.config.base_url)) + } else { + Ok(format!("{}/{next_url}", self.config.base_url)) + } + } + + /// Substitute path parameters in endpoint template from quals. + /// + /// Writes substituted values into `injected` so they can be re-injected + /// into result rows (ensuring PostgreSQL's post-filter passes). + /// + /// Returns (resolved_endpoint, path_params_used) where path_params_used + /// contains lowercase names of parameters that were substituted. + /// + /// # Errors + /// Returns an error if required path parameters are missing from quals. + pub(crate) fn substitute_path_params( + endpoint: &str, + quals: &[crate::bindings::supabase::wrappers::types::Qual], + injected: &mut HashMap, + ) -> Result<(String, Vec), String> { + if !endpoint.contains('{') { + return Ok((endpoint.to_string(), Vec::new())); + } + + // Build a map of qual field -> value for path parameter substitution + // Pre-allocate for 2 entries per qual (original + lowercase key) + let mut qual_map: HashMap = HashMap::with_capacity(quals.len() * 2); + for qual in quals { + if let Some(value) = Self::qual_value_to_string(qual) { + // Store both original and lowercase versions for flexible matching + qual_map.insert(qual.field().to_lowercase(), value.clone()); + qual_map.insert(qual.field(), value); + } + } + + let mut endpoint = endpoint.to_string(); + let mut path_params_used: Vec = Vec::new(); + let mut missing_params: Vec = Vec::new(); + + // Find all {param} patterns and substitute + while let Some(start) = endpoint.find('{') { + if let Some(end) = endpoint[start..].find('}') { + let param_name = &endpoint[start + 1..start + end]; + let param_lower = param_name.to_lowercase(); + + // Try to find matching qual (case-insensitive) + let value = qual_map + .get(¶m_lower) + .or_else(|| qual_map.get(param_name)); + + if let Some(val) = value { + path_params_used.push(param_lower.clone()); + // Store the path param for injection into rows (unencoded for PostgreSQL filter) + injected.insert(param_lower, val.clone()); + endpoint = format!( + "{}{}{}", + &endpoint[..start], + urlencoding::encode(val), + &endpoint[start + end + 1..] + ); + } else { + // Track missing parameter and remove it from the endpoint to continue + missing_params.push(param_name.to_string()); + endpoint = format!("{}{}", &endpoint[..start], &endpoint[start + end + 1..]); + } + } else { + return Err(format!("Unclosed '{{' in endpoint template: {endpoint}")); + } + } + + // Return error if any required path parameters are missing + if !missing_params.is_empty() { + return Err(format!( + "Missing required path parameter(s) in WHERE clause: {}. \ + Add WHERE {} to your query.", + missing_params.join(", "), + missing_params + .iter() + .map(|p| format!("{p} = ''")) + .collect::>() + .join(" AND ") + )); + } + + Ok((endpoint, path_params_used)) + } + + /// Build query parameters from pagination state, quals, and API key. + /// + /// Returns (url_params, injected_entries) where injected_entries are + /// qual values to merge into `self.injected_params` for row injection. + /// Excludes path parameters and rowid column. + pub(crate) fn build_query_params( + &self, + quals: &[crate::bindings::supabase::wrappers::types::Qual], + path_params_used: &[String], + ) -> (Vec, Vec<(String, String)>) { + // Pre-allocate for cursor + page_size + quals + api_key + let mut params = Vec::with_capacity(quals.len() + 3); + let mut injected_entries = Vec::new(); + + // Add pagination cursor if we have one + if let Some(cursor) = self.pagination.next.as_ref().and_then(|t| t.as_cursor()) { + params.push(format!( + "{}={}", + urlencoding::encode(&self.config.cursor_param), + urlencoding::encode(cursor) + )); + } + + // Add page size if configured, reduced by LIMIT when available + if self.config.page_size > 0 && !self.config.page_size_param.is_empty() { + let effective_size = match self.src_limit { + Some(limit) if limit > 0 => self.config.page_size.min(limit as usize), + _ => self.config.page_size, + }; + params.push(format!( + "{}={}", + urlencoding::encode(&self.config.page_size_param), + effective_size + )); + } + + // Add remaining quals as query params (exclude path params and rowid) + for qual in quals { + let field_lower = qual.field().to_lowercase(); + + // Skip if used as path param + if path_params_used.contains(&field_lower) { + continue; + } + + // Skip the rowid column + if field_lower == self.rowid_col { + continue; + } + + if let Some(value) = Self::qual_value_to_string(qual) { + // Track for injection back into rows + // (so PostgreSQL's WHERE filter passes even if the API doesn't echo it back) + injected_entries.push((field_lower, value.clone())); + params.push(format!( + "{}={}", + urlencoding::encode(&qual.field()), + urlencoding::encode(&value) + )); + } + } + + // Add API key as query parameter if configured + if let Some((ref param_name, ref param_value)) = self.config.api_key_query { + params.push(format!( + "{}={}", + urlencoding::encode(param_name), + urlencoding::encode(param_value) + )); + } + + (params, injected_entries) + } + + /// Build the URL for a request, handling path parameters and pagination. + /// + /// Updates `self.injected_params` in place (avoids cloning on pagination). + /// + /// Supports endpoint templates like: + /// - `/users/{user_id}/posts` + /// - `/projects/{org}/{repo}/issues` + /// - `/resources/{type}/{id}` + /// + /// Path parameters are substituted from WHERE clause quals. + /// + /// # Errors + /// Returns an error if required path parameters are missing from the WHERE clause. + pub(crate) fn build_url(&mut self, ctx: &Context) -> Result { + // Use next_url for pagination if available (injected_params unchanged) + if let Some(next_url) = self.pagination.next.as_ref().and_then(|t| t.as_url()) { + let mut url = self.resolve_pagination_url(next_url)?; + if let Some((ref param_name, ref param_value)) = self.config.api_key_query { + let separator = if url.contains('?') { '&' } else { '?' }; + url.push(separator); + url.push_str(&format!( + "{}={}", + urlencoding::encode(param_name), + urlencoding::encode(param_value) + )); + } + return Ok(url); + } + + let quals = ctx.get_quals(); + + // Substitute path parameters (no self borrow — takes &mut injected_params directly) + let (endpoint, path_params_used) = + Self::substitute_path_params(&self.endpoint, &quals, &mut self.injected_params)?; + + // Check for rowid pushdown for single-resource access + // Only if endpoint doesn't already have path params and rowid qual exists + if path_params_used.is_empty() { + if let Some(id_qual) = quals + .iter() + .find(|q| q.field().to_lowercase() == self.rowid_col && q.operator() == "=") + { + if let Some(id) = Self::qual_value_to_string(id_qual) { + self.injected_params + .insert(self.rowid_col.clone(), id.clone()); + return Ok(format!( + "{}{}/{}", + self.config.base_url, + endpoint, + urlencoding::encode(&id) + )); + } + } + } + + // Build query parameters + let (params, injected_entries) = self.build_query_params(&quals, &path_params_used); + self.injected_params.extend(injected_entries); + + // Assemble final URL + let mut url = format!("{}{}", self.config.base_url, endpoint); + if !params.is_empty() { + let separator = if url.contains('?') { '&' } else { '?' }; + url.push(separator); + url.push_str(¶ms.join("&")); + } + + Ok(url) + } + + /// Make a request to the API with automatic rate limit handling + pub(crate) fn make_request(&mut self, ctx: &Context) -> FdwResult { + let url = self.build_url(ctx)?; + + let req = http::Request { + method: self.method, + url, + headers: self.config.headers.clone(), + body: self.request_body.clone(), + }; + + // Retry loop for rate limiting (HTTP 429) + let mut retry_count = 0; + const MAX_RETRIES: u32 = 3; + const MAX_RETRY_DELAY_MS: u64 = 30_000; + + let resp = loop { + let resp = match req.method { + http::Method::Post => http::post(&req)?, + _ => http::get(&req)?, + }; + + // Handle rate limiting (HTTP 429) + if resp.status_code == 429 { + if retry_count >= MAX_RETRIES { + return Err(format!( + "API rate limit exceeded after {MAX_RETRIES} retries. \ + Consider adding a page_size option to reduce request frequency." + )); + } + + // Try to get retry delay from Retry-After header (case-insensitive), + // capped to prevent absurdly long waits from malicious/buggy servers + let delay_ms = resp + .headers + .iter() + .find(|h| h.0.to_lowercase() == RETRY_AFTER_HEADER) + .and_then(|h| h.1.parse::().ok()) + .map(|secs| secs.saturating_mul(1000).min(MAX_RETRY_DELAY_MS)) + .unwrap_or_else(|| { + // Exponential backoff: 1s, 2s, 4s (capped) + let backoff = 1000u64.saturating_mul(1 << retry_count); + backoff.min(MAX_RETRY_DELAY_MS) + }); + + time::sleep(delay_ms); + retry_count += 1; + continue; + } + + break resp; + }; + + if self.config.debug { + let log_url = match self.config.api_key_query { + Some((ref param_name, _)) => redact_query_param(&req.url, param_name), + None => req.url.clone(), + }; + crate::bindings::supabase::wrappers::utils::report_info(&format!( + "[openapi_fdw] HTTP {} {} -> {} ({} bytes)", + if matches!(req.method, http::Method::Post) { + "POST" + } else { + "GET" + }, + log_url, + resp.status_code, + resp.body.len() + )); + } + + // Handle 404 as empty result (no matching resource) + if resp.status_code == 404 { + self.src_rows = Vec::new(); + self.src_idx = 0; + self.pagination.clear_next(); + return Ok(()); + } + + http::error_for_status(&resp).map_err(|_| { + // Discard the opaque error body from error_for_status — it may + // contain the full request URL, which leaks API key query params + // when api_key_location = 'query'. + format!( + "HTTP {} error from API endpoint ({})", + resp.status_code, + self.endpoint.split('?').next().unwrap_or(&self.endpoint) + ) + })?; + + if resp.body.len() > self.config.max_response_bytes { + return Err(format!( + "Response body too large: {} bytes (limit: {} bytes). \ + Increase max_response_bytes server option if needed.", + resp.body.len(), + self.config.max_response_bytes + )); + } + + let mut resp_json: JsonValue = + serde_json::from_str(&resp.body).map_err(|e| e.to_string())?; + + stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64); + + // Handle pagination before extracting data (borrows resp_json) + self.handle_pagination(&resp_json); + + // Extract data by taking ownership (avoids cloning the array) + self.src_rows = self.extract_data(&mut resp_json)?; + self.src_idx = 0; + + // Build column key map for O(1) lookups during iter_scan + self.build_column_key_map(); + + Ok(()) + } +} + +#[cfg(test)] +#[path = "request_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs new file mode 100644 index 00000000..ed54388c --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs @@ -0,0 +1,527 @@ +use super::*; +use crate::config::ServerConfig; +use crate::pagination::PaginationState; + +// --- resolve_pagination_url tests --- + +fn make_fdw_for_url(base_url: &str, endpoint: &str) -> OpenApiFdw { + OpenApiFdw { + config: ServerConfig { + base_url: base_url.to_string(), + ..Default::default() + }, + endpoint: endpoint.to_string(), + ..Default::default() + } +} + +#[test] +fn test_resolve_pagination_url_absolute_https() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let url = fdw + .resolve_pagination_url("https://api.example.com/items?page=2&limit=10") + .unwrap(); + assert_eq!(url, "https://api.example.com/items?page=2&limit=10"); +} + +#[test] +fn test_resolve_pagination_url_absolute_http() { + let fdw = make_fdw_for_url("http://mockserver:1080", "/items"); + let url = fdw + .resolve_pagination_url("http://mockserver:1080/items?page=2") + .unwrap(); + assert_eq!(url, "http://mockserver:1080/items?page=2"); +} + +#[test] +fn test_resolve_pagination_url_query_only() { + // "?page=2" should resolve against base_url + endpoint + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let url = fdw.resolve_pagination_url("?page=2").unwrap(); + assert_eq!(url, "https://api.example.com/items?page=2"); +} + +#[test] +fn test_resolve_pagination_url_query_only_strips_existing_query() { + // If endpoint already has query params, only the path part is used + let fdw = make_fdw_for_url("https://api.example.com", "/items?status=active"); + let url = fdw.resolve_pagination_url("?page=2").unwrap(); + assert_eq!(url, "https://api.example.com/items?page=2"); +} + +#[test] +fn test_resolve_pagination_url_absolute_path() { + // "/items?page=2" should resolve against base_url + let fdw = make_fdw_for_url("https://api.example.com", "/old-endpoint"); + let url = fdw + .resolve_pagination_url("/items?page=2&limit=50") + .unwrap(); + assert_eq!(url, "https://api.example.com/items?page=2&limit=50"); +} + +#[test] +fn test_resolve_pagination_url_bare_relative() { + // "page/2" should resolve against base_url/ + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let url = fdw.resolve_pagination_url("page/2").unwrap(); + assert_eq!(url, "https://api.example.com/page/2"); +} + +#[test] +fn test_resolve_pagination_url_empty_string() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let url = fdw.resolve_pagination_url("").unwrap(); + assert_eq!(url, "https://api.example.com/"); +} + +#[test] +fn test_resolve_pagination_url_trailing_slash_base() { + // base_url is already trimmed of trailing slash in init() + let fdw = make_fdw_for_url("https://api.example.com", "/v2/items"); + let url = fdw.resolve_pagination_url("/v2/items?offset=100").unwrap(); + assert_eq!(url, "https://api.example.com/v2/items?offset=100"); +} + +// --- Cross-origin pagination rejection --- + +#[test] +fn test_resolve_pagination_url_cross_origin_rejected() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let result = fdw.resolve_pagination_url("https://evil.com/exfiltrate?token=abc"); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("origin mismatch")); + assert!(err.contains("credential leakage")); +} + +#[test] +fn test_resolve_pagination_url_cross_origin_different_subdomain() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let result = fdw.resolve_pagination_url("https://cdn.example.com/items?page=2"); + assert!(result.is_err()); +} + +#[test] +fn test_resolve_pagination_url_cross_origin_different_port() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let result = fdw.resolve_pagination_url("https://api.example.com:8443/items?page=2"); + assert!(result.is_err()); +} + +#[test] +fn test_resolve_pagination_url_cross_origin_http_vs_https() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let result = fdw.resolve_pagination_url("http://api.example.com/items?page=2"); + assert!(result.is_err()); +} + +#[test] +fn test_resolve_pagination_url_same_origin_with_path() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let url = fdw + .resolve_pagination_url("https://api.example.com/v2/items?page=2") + .unwrap(); + assert_eq!(url, "https://api.example.com/v2/items?page=2"); +} + +#[test] +fn test_resolve_pagination_url_same_origin_with_port() { + let fdw = make_fdw_for_url("http://mockserver:1080", "/items"); + let url = fdw + .resolve_pagination_url("http://mockserver:1080/next?cursor=abc") + .unwrap(); + assert_eq!(url, "http://mockserver:1080/next?cursor=abc"); +} + +#[test] +fn test_resolve_pagination_url_same_origin_case_insensitive() { + let fdw = make_fdw_for_url("https://API.Example.COM", "/items"); + let url = fdw + .resolve_pagination_url("https://api.example.com/items?page=2") + .unwrap(); + assert_eq!(url, "https://api.example.com/items?page=2"); +} + +// --- extract_origin tests --- + +#[test] +fn test_extract_origin_https() { + assert_eq!( + extract_origin("https://api.example.com/items?page=2"), + "https://api.example.com" + ); +} + +#[test] +fn test_extract_origin_with_port() { + assert_eq!( + extract_origin("http://localhost:8080/api/v1"), + "http://localhost:8080" + ); +} + +#[test] +fn test_extract_origin_no_path() { + assert_eq!( + extract_origin("https://api.example.com"), + "https://api.example.com" + ); +} + +#[test] +fn test_extract_origin_no_scheme() { + assert_eq!( + extract_origin("api.example.com/items"), + "api.example.com/items" + ); +} + +#[test] +fn test_extract_origin_trailing_slash() { + assert_eq!( + extract_origin("https://api.example.com/"), + "https://api.example.com" + ); +} + +// --- redact_query_param tests --- + +#[test] +fn test_redact_query_param_present() { + let url = "https://api.example.com/items?api_key=SECRET123&page=2"; + let redacted = redact_query_param(url, "api_key"); + assert_eq!( + redacted, + "https://api.example.com/items?api_key=[REDACTED]&page=2" + ); + assert!(!redacted.contains("SECRET123")); +} + +#[test] +fn test_redact_query_param_at_end() { + let url = "https://api.example.com/items?page=2&api_key=SECRET123"; + let redacted = redact_query_param(url, "api_key"); + assert_eq!( + redacted, + "https://api.example.com/items?page=2&api_key=[REDACTED]" + ); +} + +#[test] +fn test_redact_query_param_only_param() { + let url = "https://api.example.com/items?api_key=SECRET123"; + let redacted = redact_query_param(url, "api_key"); + assert_eq!(redacted, "https://api.example.com/items?api_key=[REDACTED]"); +} + +#[test] +fn test_redact_query_param_not_present() { + let url = "https://api.example.com/items?page=2&limit=10"; + let redacted = redact_query_param(url, "api_key"); + assert_eq!(redacted, url); +} + +#[test] +fn test_redact_query_param_no_query_string() { + let url = "https://api.example.com/items"; + let redacted = redact_query_param(url, "api_key"); + assert_eq!(redacted, url); +} + +#[test] +fn test_redact_query_param_encoded_name() { + // urlencoding::encode("api key") = "api%20key" + let url = "https://api.example.com/items?api%20key=SECRET&page=2"; + let redacted = redact_query_param(url, "api key"); + assert_eq!( + redacted, + "https://api.example.com/items?api%20key=[REDACTED]&page=2" + ); +} + +// --- URL encoding security tests --- + +#[test] +fn test_rowid_url_encoding_path_traversal() { + // Verify urlencoding::encode handles path traversal attempts + let malicious_id = "../admin"; + let encoded = urlencoding::encode(malicious_id); + assert_eq!(encoded, "..%2Fadmin"); + // Resulting URL would be /items/..%2Fadmin (safe) not /items/../admin (traversal) +} + +#[test] +fn test_rowid_url_encoding_query_injection() { + // Verify urlencoding::encode handles query injection attempts + let malicious_id = "123?admin=true"; + let encoded = urlencoding::encode(malicious_id); + assert_eq!(encoded, "123%3Fadmin%3Dtrue"); +} + +#[test] +fn test_rowid_url_encoding_special_chars() { + // Verify urlencoding::encode handles various URL-unsafe chars + let special = "id with spaces&more=stuff#fragment"; + let encoded = urlencoding::encode(special); + assert!(!encoded.contains(' ')); + assert!(!encoded.contains('&')); + assert!(!encoded.contains('=')); + assert!(!encoded.contains('#')); +} + +#[test] +fn test_rowid_url_encoding_normal_ids() { + // Normal IDs should pass through unchanged + assert_eq!(urlencoding::encode("123"), "123"); + assert_eq!(urlencoding::encode("abc-def"), "abc-def"); + assert_eq!( + urlencoding::encode("550e8400-e29b-41d4-a716-446655440000"), + "550e8400-e29b-41d4-a716-446655440000" + ); +} + +// --- Retry delay cap tests --- + +#[test] +fn test_retry_delay_cap_normal_value() { + // Normal Retry-After: 5 seconds → 5000ms, well under cap + let secs: u64 = 5; + let max_delay: u64 = 30_000; + let delay = secs.saturating_mul(1000).min(max_delay); + assert_eq!(delay, 5000); +} + +#[test] +fn test_retry_delay_cap_large_value() { + // Absurdly large Retry-After: 999999 seconds → capped to 30s + let secs: u64 = 999_999; + let max_delay: u64 = 30_000; + let delay = secs.saturating_mul(1000).min(max_delay); + assert_eq!(delay, 30_000); +} + +#[test] +fn test_retry_delay_cap_u64_max() { + // u64::MAX seconds → saturating_mul prevents overflow, then capped + let secs: u64 = u64::MAX; + let max_delay: u64 = 30_000; + let delay = secs.saturating_mul(1000).min(max_delay); + assert_eq!(delay, 30_000); +} + +#[test] +fn test_retry_delay_cap_zero() { + // Retry-After: 0 → 0ms (immediate retry) + let secs: u64 = 0; + let max_delay: u64 = 30_000; + let delay = secs.saturating_mul(1000).min(max_delay); + assert_eq!(delay, 0); +} + +#[test] +fn test_retry_backoff_cap() { + // Exponential backoff at retry_count=10 would be 1024s, but capped + let retry_count: u32 = 10; + let max_delay: u64 = 30_000; + let backoff = 1000u64.saturating_mul(1 << retry_count); + let delay = backoff.min(max_delay); + assert_eq!(delay, 30_000); +} + +// --- build_query_params: LIMIT-to-page_size optimization --- + +fn make_fdw_for_page_size(page_size: usize, src_limit: Option) -> OpenApiFdw { + OpenApiFdw { + config: ServerConfig { + page_size, + page_size_param: "per_page".to_string(), + ..Default::default() + }, + src_limit, + ..Default::default() + } +} + +fn get_page_size_param(fdw: &OpenApiFdw) -> Option { + let (params, _) = fdw.build_query_params(&[], &[]); + params.iter().find(|p| p.starts_with("per_page=")).cloned() +} + +#[test] +fn test_page_size_reduced_by_limit() { + // LIMIT 5 with page_size=30 → per_page=5 + let fdw = make_fdw_for_page_size(30, Some(5)); + assert_eq!(get_page_size_param(&fdw), Some("per_page=5".to_string())); +} + +#[test] +fn test_page_size_not_increased_by_limit() { + // LIMIT 50 with page_size=30 → per_page=30 (limit larger than page_size) + let fdw = make_fdw_for_page_size(30, Some(50)); + assert_eq!(get_page_size_param(&fdw), Some("per_page=30".to_string())); +} + +#[test] +fn test_page_size_unchanged_without_limit() { + // No LIMIT → per_page=30 + let fdw = make_fdw_for_page_size(30, None); + assert_eq!(get_page_size_param(&fdw), Some("per_page=30".to_string())); +} + +#[test] +fn test_page_size_zero_no_param() { + // page_size=0 → no per_page param regardless of LIMIT + let fdw = make_fdw_for_page_size(0, Some(5)); + assert_eq!(get_page_size_param(&fdw), None); +} + +// --- fetch_spec with spec_json tests --- + +const MINIMAL_SPEC_JSON: &str = r#"{ + "openapi": "3.0.0", + "info": { "title": "Test", "version": "1.0" }, + "servers": [{ "url": "https://api.example.com" }], + "paths": { + "/items": { + "get": { + "responses": { "200": { "description": "OK" } } + } + } + } +}"#; + +#[test] +fn test_fetch_spec_from_spec_json() { + let mut fdw = OpenApiFdw { + config: ServerConfig { + spec_json: Some(MINIMAL_SPEC_JSON.to_string()), + ..Default::default() + }, + ..Default::default() + }; + fdw.fetch_spec().unwrap(); + assert!(fdw.spec.is_some()); + assert_eq!(fdw.config.base_url, "https://api.example.com"); +} + +#[test] +fn test_fetch_spec_from_spec_json_preserves_explicit_base_url() { + let mut fdw = OpenApiFdw { + config: ServerConfig { + base_url: "https://custom.example.com".to_string(), + spec_json: Some(MINIMAL_SPEC_JSON.to_string()), + ..Default::default() + }, + ..Default::default() + }; + fdw.fetch_spec().unwrap(); + assert!(fdw.spec.is_some()); + assert_eq!(fdw.config.base_url, "https://custom.example.com"); +} + +#[test] +fn test_fetch_spec_from_spec_json_invalid_json() { + let mut fdw = OpenApiFdw { + config: ServerConfig { + spec_json: Some("{ not valid json".to_string()), + ..Default::default() + }, + ..Default::default() + }; + let err = fdw.fetch_spec().unwrap_err(); + assert!(err.contains("Invalid spec_json")); +} + +#[test] +fn test_fetch_spec_from_spec_json_too_large() { + let mut fdw = OpenApiFdw { + config: ServerConfig { + spec_json: Some("x".repeat(200)), + max_response_bytes: 100, + ..Default::default() + }, + ..Default::default() + }; + let err = fdw.fetch_spec().unwrap_err(); + assert!(err.contains("spec_json too large")); + assert!(err.contains("200 bytes")); + assert!(err.contains("limit: 100 bytes")); +} + +#[test] +fn test_fetch_spec_neither_url_nor_json() { + let mut fdw = OpenApiFdw::default(); + // Neither spec_url nor spec_json set → succeeds but spec stays None + fdw.fetch_spec().unwrap(); + assert!(fdw.spec.is_none()); +} + +// --- Fix 1: api_key_query appended to URL-based pagination --- + +#[test] +fn test_resolve_pagination_url_appends_api_key_query() { + let fdw = OpenApiFdw { + config: ServerConfig { + base_url: "https://api.example.com".to_string(), + api_key_query: Some(("api_key".to_string(), "secret123".to_string())), + ..Default::default() + }, + endpoint: "/items".to_string(), + pagination: PaginationState { + next: Some(crate::pagination::PaginationToken::Url( + "https://api.example.com/items?page=2".to_string(), + )), + ..Default::default() + }, + ..Default::default() + }; + + // Simulate what build_url does for URL-based pagination + let next_url = fdw.pagination.next.as_ref().unwrap().as_url().unwrap(); + let mut url = fdw.resolve_pagination_url(next_url).unwrap(); + if let Some((ref param_name, ref param_value)) = fdw.config.api_key_query { + let separator = if url.contains('?') { '&' } else { '?' }; + url.push(separator); + url.push_str(&format!( + "{}={}", + urlencoding::encode(param_name), + urlencoding::encode(param_value) + )); + } + assert_eq!( + url, + "https://api.example.com/items?page=2&api_key=secret123" + ); +} + +#[test] +fn test_resolve_pagination_url_no_api_key_unchanged() { + let fdw = make_fdw_for_url("https://api.example.com", "/items"); + let url = fdw + .resolve_pagination_url("https://api.example.com/items?page=2") + .unwrap(); + // No api_key_query configured → URL unchanged + assert_eq!(url, "https://api.example.com/items?page=2"); +} + +// --- Fix 7: Unclosed '{' in endpoint template --- + +#[test] +fn test_substitute_path_params_unclosed_brace_error() { + let mut injected = std::collections::HashMap::new(); + let result = OpenApiFdw::substitute_path_params("/items/{id", &[], &mut injected); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Unclosed '{'")); + assert!(err.contains("/items/{id")); +} + +#[test] +fn test_substitute_path_params_unclosed_brace_after_valid() { + let mut injected = std::collections::HashMap::new(); + // First param is valid, second is unclosed + let result = + OpenApiFdw::substitute_path_params("/users/{user_id}/posts/{title", &[], &mut injected); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Unclosed '{'")); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response.rs b/wasm-wrappers/fdw/openapi_fdw/src/response.rs new file mode 100644 index 00000000..fc054ecf --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/response.rs @@ -0,0 +1,168 @@ +//! Response parsing: data extraction and pagination handling + +use serde_json::Value as JsonValue; + +use crate::OpenApiFdw; +use crate::bindings::supabase::wrappers::types::FdwError; +use crate::pagination::PaginationToken; + +/// Common wrapper keys for auto-detecting the data array in API responses. +pub(crate) const WRAPPER_KEYS: &[&str] = &[ + "data", "results", "items", "records", "entries", "features", "@graph", +]; + +/// JSON pointer paths to check for a next-page URL. +const NEXT_URL_PATHS: &[&str] = &[ + "/meta/pagination/next", + "/meta/pagination/next_url", + "/pagination/next", + "/pagination/next_url", + "/links/next", + "/links/next_url", + "/next", + "/next_url", + "/_links/next/href", +]; + +/// JSON pointer paths to check for a boolean "has more pages" flag. +const HAS_MORE_PATHS: &[&str] = &[ + "/meta/pagination/has_more", + "/has_more", + "/pagination/has_more", +]; + +/// JSON pointer paths to check for a next-page cursor value. +const CURSOR_PATHS: &[&str] = &[ + "/meta/pagination/next_cursor", + "/pagination/next_cursor", + "/next_cursor", + "/cursor", +]; + +impl OpenApiFdw { + /// Extract the data array from the response, taking ownership to avoid cloning + pub(crate) fn extract_data(&self, resp: &mut JsonValue) -> Result, FdwError> { + // If response_path is specified, use it + if let Some(ref path) = self.response_path { + if let Some(data) = resp.pointer_mut(path).map(JsonValue::take) { + return Self::json_to_rows(data); + } + // response_path not found — fall through to auto-detection + // (common when rowid lookup returns single object instead of collection) + } + + // Direct array response + if resp.is_array() { + return Self::json_to_rows(resp.take()); + } + + // Try common wrapper patterns + if resp.is_object() { + for key in WRAPPER_KEYS { + if resp + .get(*key) + .is_some_and(|d| d.is_array() || d.is_object()) + { + return Self::json_to_rows(resp[*key].take()); + } + } + + // Single object response + return Ok(vec![resp.take()]); + } + + Err(format!( + "Unable to extract data from response (type: {}). \ + Expected an array, object with a known wrapper key \ + ({}), \ + or set response_path in table options.", + match resp { + JsonValue::Null => "null", + JsonValue::Bool(_) => "boolean", + JsonValue::Number(_) => "number", + JsonValue::String(_) => "string", + _ => "unknown", + }, + WRAPPER_KEYS.join(", "), + )) + } + + /// Convert a JSON value to a vector of row objects (takes ownership, no cloning) + pub(crate) fn json_to_rows(data: JsonValue) -> Result, FdwError> { + match data { + JsonValue::Array(arr) => Ok(arr), + data if data.is_object() => Ok(vec![data]), + _ => Err(format!( + "Response data is not an array or object (got {})", + match &data { + JsonValue::Null => "null", + JsonValue::Bool(_) => "boolean", + JsonValue::Number(_) => "number", + JsonValue::String(_) => "string", + _ => "unknown", + } + )), + } + } + + /// Handle pagination from the response + pub(crate) fn handle_pagination(&mut self, resp: &JsonValue) { + self.pagination.clear_next(); + + // Try configured cursor path first + if !self.cursor_path.is_empty() { + if let Some(value) = Self::extract_non_empty_string(resp, &self.cursor_path) { + if value.starts_with("http://") || value.starts_with("https://") { + self.pagination.next = Some(PaginationToken::Url(value)); + } else { + self.pagination.next = Some(PaginationToken::Cursor(value)); + } + return; + } + } + + // Only try auto-detection for object responses + if resp.as_object().is_none() { + return; + } + + // Check for next URL in common locations + for path in NEXT_URL_PATHS { + if let Some(url) = Self::extract_non_empty_string(resp, path) { + self.pagination.next = Some(PaginationToken::Url(url)); + return; + } + } + + // Check for has_more flag with cursor + let has_more = HAS_MORE_PATHS + .iter() + .find_map(|p| resp.pointer(p)) + .and_then(JsonValue::as_bool) + .unwrap_or(false); + + if !has_more { + return; + } + + // Find next cursor + for path in CURSOR_PATHS { + if let Some(cursor) = Self::extract_non_empty_string(resp, path) { + self.pagination.next = Some(PaginationToken::Cursor(cursor)); + return; + } + } + } + + /// Extract a non-empty string from a JSON pointer path + pub(crate) fn extract_non_empty_string(json: &JsonValue, path: &str) -> Option { + json.pointer(path) + .and_then(JsonValue::as_str) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } +} + +#[cfg(test)] +#[path = "response_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs new file mode 100644 index 00000000..b016e36b --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs @@ -0,0 +1,761 @@ +use super::*; +use crate::config::ServerConfig; +use crate::pagination::PaginationToken; + +// --- json_to_rows tests --- + +#[test] +fn test_json_to_rows_array() { + let data = serde_json::json!([ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + {"id": 3, "name": "charlie"} + ]); + let rows = OpenApiFdw::json_to_rows(data).unwrap(); + assert_eq!(rows.len(), 3); + assert_eq!(rows[0]["id"], 1); + assert_eq!(rows[2]["name"], "charlie"); +} + +#[test] +fn test_json_to_rows_single_object() { + let data = serde_json::json!({"id": 1, "name": "alice"}); + let rows = OpenApiFdw::json_to_rows(data).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["name"], "alice"); +} + +#[test] +fn test_json_to_rows_empty_array() { + let data = serde_json::json!([]); + let rows = OpenApiFdw::json_to_rows(data).unwrap(); + assert!(rows.is_empty()); +} + +#[test] +fn test_json_to_rows_rejects_primitive() { + let data = serde_json::json!("just a string"); + let err = OpenApiFdw::json_to_rows(data).unwrap_err(); + assert!( + err.contains("string"), + "Error should mention the type: {err}" + ); +} + +// --- extract_data tests --- + +fn fdw_with_response_path(path: Option<&str>) -> OpenApiFdw { + OpenApiFdw { + response_path: path.map(String::from), + ..Default::default() + } +} + +#[test] +fn test_extract_data_with_response_path() { + let fdw = fdw_with_response_path(Some("/features")); + let mut resp = serde_json::json!({ + "type": "FeatureCollection", + "features": [ + {"properties": {"id": "a"}}, + {"properties": {"id": "b"}} + ] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + // Original is taken, not cloned + assert!(resp["features"].is_null()); +} + +#[test] +fn test_extract_data_with_nested_response_path() { + let fdw = fdw_with_response_path(Some("/result/data")); + let mut resp = serde_json::json!({ + "result": { + "data": [{"id": 1}, {"id": 2}, {"id": 3}] + } + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 3); +} + +#[test] +fn test_extract_data_response_path_fallback_single_object() { + // response_path /features fails (single object, not FeatureCollection), + // falls back to single object auto-detection + let fdw = fdw_with_response_path(Some("/features")); + let mut resp = serde_json::json!({ + "@type": "wx:ObservationStation", + "stationIdentifier": "KDEN", + "name": "Denver International Airport" + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["stationIdentifier"], "KDEN"); +} + +#[test] +fn test_extract_data_response_path_fallback_with_wrapper_key() { + // response_path /features fails, falls back to auto-detect "data" wrapper key + let fdw = fdw_with_response_path(Some("/features")); + let mut resp = serde_json::json!({ + "data": [{"id": 1}, {"id": 2}], + "total": 2 + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["id"], 1); +} + +#[test] +fn test_extract_data_direct_array() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!([{"id": 1}, {"id": 2}]); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); +} + +#[test] +fn test_extract_data_auto_detect_data_key() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "data": [{"id": 1}, {"id": 2}], + "meta": {"total": 2} + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert!(resp["data"].is_null()); +} + +#[test] +fn test_extract_data_auto_detect_results_key() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "results": [{"id": "x"}], + "count": 1 + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["id"], "x"); +} + +#[test] +fn test_extract_data_auto_detect_features_key() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "type": "FeatureCollection", + "features": [ + {"type": "Feature", "properties": {"name": "A"}}, + {"type": "Feature", "properties": {"name": "B"}} + ] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); +} + +#[test] +fn test_extract_data_single_object_fallback() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "id": "abc", + "name": "singleton" + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["id"], "abc"); +} + +#[test] +fn test_extract_data_ownership_no_clone() { + // Verify that extract_data takes ownership rather than cloning: + // after extraction, the original data should be replaced with null + let fdw = fdw_with_response_path(Some("/items")); + let mut resp = serde_json::json!({ + "items": [ + {"id": 1, "payload": "x".repeat(1000)}, + {"id": 2, "payload": "y".repeat(1000)} + ] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["payload"].as_str().unwrap().len(), 1000); + // The original value was taken, not cloned + assert!(resp.pointer("/items").unwrap().is_null()); +} + +#[test] +fn test_extract_data_auto_detect_records_key() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "records": [{"id": 1}, {"id": 2}], + "total": 2 + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); +} + +#[test] +fn test_extract_data_auto_detect_entries_key() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "entries": [{"id": "a"}, {"id": "b"}] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); +} + +#[test] +fn test_extract_data_auto_detect_items_key() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "items": [{"id": 1}], + "next_page": null + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); +} + +#[test] +fn test_extract_data_priority_order() { + // When response has both "data" and "results", "data" wins (checked first) + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "data": [{"id": 1}], + "results": [{"id": 2}, {"id": 3}] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["id"], 1); +} + +#[test] +fn test_extract_data_non_array_wrapper_becomes_single_row() { + // If a wrapper key contains an object (not array), treat as single row + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "data": {"id": "single", "name": "test"} + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["id"], "single"); +} + +#[test] +fn test_extract_data_auto_detect_at_graph_key() { + // JSON-LD @graph wrapper (NWS API with Accept: application/ld+json) + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "@context": {"@version": "1.1", "wx": "https://api.weather.gov/ontology#"}, + "@graph": [ + {"@id": "urn:alert:1", "@type": "wx:Alert", "headline": "Storm warning"}, + {"@id": "urn:alert:2", "@type": "wx:Alert", "headline": "Heat advisory"} + ] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["headline"], "Storm warning"); +} + +#[test] +fn test_extract_data_response_path_to_single_object() { + // response_path pointing to a single object (not array) → wrapped as single row + let fdw = fdw_with_response_path(Some("/user")); + let mut resp = serde_json::json!({ + "user": {"id": 1, "name": "alice"}, + "meta": {"request_id": "abc"} + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["id"], 1); + assert_eq!(rows[0]["name"], "alice"); +} + +#[test] +fn test_extract_data_deeply_nested_response_path() { + // Three-level deep response path + let fdw = fdw_with_response_path(Some("/response/body/items")); + let mut resp = serde_json::json!({ + "response": { + "body": { + "items": [{"id": 1}, {"id": 2}] + } + } + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); +} + +#[test] +fn test_extract_data_empty_object_is_single_row() { + // Empty object {} treated as single row + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({}); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 1); +} + +// --- Pagination tests --- + +fn make_fdw_for_pagination(cursor_path: &str) -> OpenApiFdw { + OpenApiFdw { + cursor_path: cursor_path.to_string(), + config: ServerConfig { + cursor_param: "after".to_string(), + ..Default::default() + }, + ..Default::default() + } +} + +#[test] +fn test_handle_pagination_cursor_path_token() { + let mut fdw = make_fdw_for_pagination("/cursor"); + let resp = serde_json::json!({"cursor": "abc123", "data": []}); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Cursor("abc123".to_string())) + ); +} + +#[test] +fn test_handle_pagination_cursor_path_full_url() { + let mut fdw = make_fdw_for_pagination("/pagination/next"); + let resp = serde_json::json!({ + "pagination": {"next": "https://api.example.com/items?cursor=xyz"}, + "data": [] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?cursor=xyz".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_cursor_path_http_url() { + let mut fdw = make_fdw_for_pagination("/next"); + let resp = serde_json::json!({"next": "http://api.example.com/page2"}); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "http://api.example.com/page2".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_cursor_path_missing() { + let mut fdw = make_fdw_for_pagination("/cursor"); + let resp = serde_json::json!({"data": []}); + fdw.handle_pagination(&resp); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_handle_pagination_auto_detect_next_url() { + let mut fdw = make_fdw_for_pagination(""); // no cursor_path configured + let resp = serde_json::json!({ + "pagination": {"next": "https://api.example.com/items?page=2"}, + "data": [] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_auto_detect_links_next() { + // HAL-style: /links/next + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "links": {"next": "https://api.example.com/page2"}, + "data": [] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/page2".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_auto_detect_has_more_with_cursor() { + // Stripe-style: has_more + next_cursor + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "has_more": true, + "next_cursor": "cursor_xyz", + "data": [{"id": 1}] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Cursor("cursor_xyz".to_string())) + ); +} + +#[test] +fn test_handle_pagination_has_more_false_stops() { + // has_more: false should NOT set any pagination + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "has_more": false, + "next_cursor": "stale_cursor", + "data": [{"id": 1}] + }); + fdw.handle_pagination(&resp); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_handle_pagination_auto_detect_meta_pagination() { + // Nested meta.pagination.next pattern + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "meta": { + "pagination": { + "next": "https://api.example.com/items?page=3" + } + }, + "data": [] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?page=3".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_empty_string_next_url_stops() { + // Empty string cursor_path value should be treated as "no more pages" + let mut fdw = make_fdw_for_pagination("/next"); + let resp = serde_json::json!({"next": "", "data": []}); + fdw.handle_pagination(&resp); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_handle_pagination_null_cursor_stops() { + // Null cursor should mean end of pagination + let mut fdw = make_fdw_for_pagination("/cursor"); + let resp = serde_json::json!({"cursor": null, "data": []}); + fdw.handle_pagination(&resp); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_handle_pagination_array_response_no_autodetect() { + // Auto-detection should not run on array responses + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!([{"id": 1}, {"id": 2}]); + fdw.handle_pagination(&resp); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_meta_pagination_has_more_nested() { + // Paginated APIs: /meta/pagination/has_more + /meta/pagination/next_cursor + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "meta": { + "pagination": { + "has_more": true, + "next_cursor": "cursor_abc123" + } + }, + "data": [{"id": 1}, {"id": 2}] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Cursor("cursor_abc123".to_string())) + ); +} + +#[test] +fn test_handle_pagination_has_more_true_but_no_cursor() { + // has_more: true but no cursor path found — should NOT paginate (avoid infinite loop) + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "has_more": true, + "data": [{"id": 1}] + }); + fdw.handle_pagination(&resp); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_handle_pagination_next_url_direct_key() { + // Auto-detect: /next_url key directly + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "next_url": "https://api.example.com/items?page=3", + "data": [{"id": 1}] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?page=3".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_pagination_next_url() { + // Auto-detect: /pagination/next_url key + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "pagination": { + "next_url": "https://api.example.com/page/2" + }, + "data": [] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/page/2".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_next_direct() { + // Auto-detect: /next key directly (not nested) + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "next": "https://api.example.com/items?cursor=xyz", + "data": [{"id": 1}] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?cursor=xyz".to_string() + )) + ); +} + +#[test] +fn test_handle_pagination_cursor_path_integer_value() { + // Cursor path resolves to an integer — should be treated as non-string, ignored + let mut fdw = make_fdw_for_pagination("/cursor"); + let resp = serde_json::json!({"cursor": 12345, "data": []}); + fdw.handle_pagination(&resp); + // extract_non_empty_string returns None for non-string values + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_handle_pagination_pagination_has_more_with_cursor() { + // Auto-detect: /pagination/has_more + /pagination/next_cursor + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "pagination": { + "has_more": true, + "next_cursor": "pg_cursor_99" + }, + "data": [{"id": 1}] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Cursor("pg_cursor_99".to_string())) + ); +} + +#[test] +fn test_handle_pagination_meta_next_url() { + // Auto-detect: /meta/pagination/next_url (not /next) + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "meta": { + "pagination": { + "next_url": "https://api.example.com/page/4" + } + }, + "data": [] + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/page/4".to_string() + )) + ); +} + +// --- Real-world API pattern tests --- + +#[test] +fn test_stripe_list_response() { + // Stripe pattern: {object:"list", data:[...], has_more:true} + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "object": "list", + "data": [ + {"id": "ch_1", "amount": 2000, "currency": "usd"}, + {"id": "ch_2", "amount": 5000, "currency": "eur"} + ], + "has_more": true, + "url": "/v1/charges" + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["id"], "ch_1"); + assert_eq!(rows[1]["amount"], 5000); + // data was taken (ownership), not cloned + assert!(resp["data"].is_null()); +} + +#[test] +fn test_github_direct_array() { + // GitHub pattern: direct array response + no auto-pagination + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!([ + {"id": 1, "login": "octocat", "type": "User"}, + {"id": 2, "login": "hubot", "type": "Bot"} + ]); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["login"], "octocat"); + + // Array responses should not trigger auto-pagination + let mut pagination_fdw = make_fdw_for_pagination(""); + let array_resp = serde_json::json!([{"id": 1}, {"id": 2}]); + pagination_fdw.handle_pagination(&array_resp); + assert!(pagination_fdw.pagination.next.is_none()); +} + +#[test] +fn test_hal_links_next_href_pagination() { + // HAL pattern: _links/next/href pagination path + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "_embedded": {"items": [{"id": 1}]}, + "_links": { + "self": {"href": "https://api.example.com/items?page=1"}, + "next": {"href": "https://api.example.com/items?page=2"} + } + }); + fdw.handle_pagination(&resp); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string() + )) + ); +} + +#[test] +fn test_kubernetes_list_response() { + // Kubernetes pattern: {kind, apiVersion, metadata, items:[...]} + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "kind": "PodList", + "apiVersion": "v1", + "metadata": {"resourceVersion": "1234"}, + "items": [ + {"metadata": {"name": "pod-1"}, "status": {"phase": "Running"}}, + {"metadata": {"name": "pod-2"}, "status": {"phase": "Pending"}} + ] + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["metadata"]["name"], "pod-1"); +} + +#[test] +fn test_elasticsearch_hits_response() { + // Elasticsearch pattern: response_path must be used for non-standard wrapper + let fdw = fdw_with_response_path(Some("/hits/hits")); + let mut resp = serde_json::json!({ + "took": 5, + "hits": { + "total": {"value": 2}, + "hits": [ + {"_id": "1", "_source": {"title": "Doc 1"}}, + {"_id": "2", "_source": {"title": "Doc 2"}} + ] + } + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["_id"], "1"); +} + +#[test] +fn test_graphql_style_response() { + // GraphQL-style: {data: {users: [...]}} — needs response_path + let fdw = fdw_with_response_path(Some("/data/users")); + let mut resp = serde_json::json!({ + "data": { + "users": [ + {"id": "1", "name": "Alice"}, + {"id": "2", "name": "Bob"} + ] + } + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["name"], "Alice"); +} + +#[test] +fn test_jsonapi_style_response() { + // JSON:API pattern: {data: [{type, id, attributes}], meta} + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!({ + "data": [ + {"type": "articles", "id": "1", "attributes": {"title": "JSON:API"}}, + {"type": "articles", "id": "2", "attributes": {"title": "REST"}} + ], + "meta": {"total-pages": 1} + }); + let rows = fdw.extract_data(&mut resp).unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0]["id"], "1"); +} + +// --- Error message context tests --- + +#[test] +fn test_extract_data_error_shows_type_for_non_extractable() { + let fdw = fdw_with_response_path(None); + let mut resp = serde_json::json!(42); + let err = fdw.extract_data(&mut resp).unwrap_err(); + assert!( + err.contains("number"), + "Error should mention JSON type: {err}" + ); + assert!( + err.contains("response_path"), + "Error should suggest response_path: {err}" + ); +} + +#[test] +fn test_json_to_rows_error_shows_type() { + let err = OpenApiFdw::json_to_rows(serde_json::json!(null)).unwrap_err(); + assert!(err.contains("null"), "Error should show type: {err}"); + + let err = OpenApiFdw::json_to_rows(serde_json::json!(true)).unwrap_err(); + assert!(err.contains("boolean"), "Error should show type: {err}"); + + let err = OpenApiFdw::json_to_rows(serde_json::json!(42)).unwrap_err(); + assert!(err.contains("number"), "Error should show type: {err}"); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/schema.rs b/wasm-wrappers/fdw/openapi_fdw/src/schema.rs index 9c6ce0b6..733e93c8 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/schema.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/schema.rs @@ -3,26 +3,34 @@ //! This module handles mapping `OpenAPI` types to `PostgreSQL` types //! and generating CREATE FOREIGN TABLE statements. +use std::collections::HashMap; + use crate::spec::{EndpointInfo, OpenApiSpec, Schema}; /// Maps `OpenAPI` schema types to `PostgreSQL` type names pub fn openapi_to_pg_type(schema: &Schema, spec: &OpenApiSpec) -> &'static str { - // First resolve the schema if it's a reference + // Resolve $ref if present; otherwise borrow the original (no clone). + let owned; let resolved = if schema.reference.is_some() { - spec.resolve_schema(schema) + owned = spec.resolve_schema(schema); + &owned } else { - schema.clone() + schema }; match resolved.schema_type.as_deref() { Some("string") => match resolved.format.as_deref() { Some("date") => "date", Some("date-time") => "timestamptz", - // All other string formats map to text + Some("time") => "time", + Some("byte") | Some("binary") => "bytea", + Some("uuid") => "uuid", _ => "text", }, Some("integer") => match resolved.format.as_deref() { Some("int32") => "integer", + // Stripe uses format: "unix-time" for epoch seconds + Some("unix-time") => "timestamptz", // int64 and others default to bigint for safety _ => "bigint", }, @@ -62,12 +70,32 @@ pub fn extract_columns(schema: &Schema, spec: &OpenApiSpec, include_attrs: bool) if let Some(schema) = item_schema { // Check if this is an object with properties if !schema.properties.is_empty() { - for (name, prop_schema) in &schema.properties { + // Track seen names to detect collisions after sanitization + let mut seen: HashMap = HashMap::new(); + + let mut sorted_props: Vec<_> = schema.properties.iter().collect(); + sorted_props.sort_by_key(|(name, _)| *name); + + for (name, prop_schema) in sorted_props { + // Skip writeOnly properties (e.g., "password") — not returned in GET responses + if prop_schema.write_only { + continue; + } let pg_type = openapi_to_pg_type(prop_schema, spec); let nullable = !schema.required.contains(name) || prop_schema.nullable; + let base_name = sanitize_column_name(name); + + // Deduplicate: if this sanitized name was already used, append a suffix + let count = seen.entry(base_name.clone()).or_insert(0); + let final_name = if *count > 0 { + format!("{base_name}_{count}") + } else { + base_name + }; + *count += 1; columns.push(ColumnDef { - name: sanitize_column_name(name), + name: final_name, pg_type, nullable, }); @@ -95,12 +123,29 @@ pub fn extract_columns(schema: &Schema, spec: &OpenApiSpec, include_attrs: bool) } /// Sanitize a column name for `PostgreSQL` (converts `camelCase` to `snake_case`) +/// +/// Handles consecutive uppercase (acronyms) correctly: +/// - `clusterIP` → `cluster_ip` (not `cluster_i_p`) +/// - `HTMLParser` → `html_parser` (not `h_t_m_l_parser`) +/// - `getHTTPSUrl` → `get_https_url` fn sanitize_column_name(name: &str) -> String { let mut result = String::new(); + let chars: Vec = name.chars().collect(); - for (i, c) in name.chars().enumerate() { + for (i, &c) in chars.iter().enumerate() { if c.is_uppercase() && i > 0 { - result.push('_'); + let prev = chars[i - 1]; + let next_is_lower = chars.get(i + 1).is_some_and(|n| n.is_lowercase()); + + // Insert '_' before an uppercase letter when: + // 1. Previous char is lowercase/digit (start of new word: "cluster|I|P") + // 2. Previous char is uppercase but next is lowercase (end of acronym: "HTM|L|Parser") + if prev.is_lowercase() + || prev.is_ascii_digit() + || (prev.is_uppercase() && next_is_lower) + { + result.push('_'); + } result.push(c.to_ascii_lowercase()); } else if c.is_alphanumeric() || c == '_' { result.push(c.to_ascii_lowercase()); @@ -182,12 +227,18 @@ pub fn generate_foreign_table( // Escape single quotes in option values for SQL let escaped_endpoint = endpoint.path.replace('\'', "''"); - let options = if let Some(rowid) = rowid_col { + let mut option_parts = vec![format!(" endpoint '{escaped_endpoint}'")]; + + if endpoint.method != "GET" { + option_parts.push(format!(" method '{}'", endpoint.method)); + } + + if let Some(rowid) = rowid_col { let escaped_rowid = rowid.replace('\'', "''"); - format!(" endpoint '{escaped_endpoint}',\n rowid_column '{escaped_rowid}'") - } else { - format!(" endpoint '{escaped_endpoint}'") - }; + option_parts.push(format!(" rowid_column '{escaped_rowid}'")); + } + + let options = option_parts.join(",\n"); format!( r"CREATE FOREIGN TABLE IF NOT EXISTS {} ( @@ -228,46 +279,5 @@ pub fn generate_all_tables( } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_sanitize_column_name() { - assert_eq!(sanitize_column_name("userName"), "user_name"); - assert_eq!(sanitize_column_name("user-name"), "user_name"); - assert_eq!(sanitize_column_name("123abc"), "_123abc"); - assert_eq!(sanitize_column_name("already_snake"), "already_snake"); - } - - #[test] - fn test_openapi_to_pg_type() { - let spec = OpenApiSpec::from_str( - r#"{ - "openapi": "3.0.0", - "info": {"title": "Test"}, - "paths": {} - }"#, - ) - .unwrap(); - - let string_schema = Schema { - schema_type: Some("string".to_string()), - ..Default::default() - }; - assert_eq!(openapi_to_pg_type(&string_schema, &spec), "text"); - - let date_schema = Schema { - schema_type: Some("string".to_string()), - format: Some("date-time".to_string()), - ..Default::default() - }; - assert_eq!(openapi_to_pg_type(&date_schema, &spec), "timestamptz"); - - let int_schema = Schema { - schema_type: Some("integer".to_string()), - format: Some("int32".to_string()), - ..Default::default() - }; - assert_eq!(openapi_to_pg_type(&int_schema, &spec), "integer"); - } -} +#[path = "schema_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs new file mode 100644 index 00000000..de3ff116 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs @@ -0,0 +1,1949 @@ +use super::*; + +#[test] +fn test_sanitize_column_name() { + assert_eq!(sanitize_column_name("userName"), "user_name"); + assert_eq!(sanitize_column_name("user-name"), "user_name"); + assert_eq!(sanitize_column_name("123abc"), "_123abc"); + assert_eq!(sanitize_column_name("already_snake"), "already_snake"); +} + +#[test] +fn test_sanitize_column_name_acronyms() { + // Consecutive uppercase letters (acronyms) should be grouped + assert_eq!(sanitize_column_name("clusterIP"), "cluster_ip"); + assert_eq!(sanitize_column_name("HTMLParser"), "html_parser"); + assert_eq!(sanitize_column_name("getHTTPSUrl"), "get_https_url"); + assert_eq!(sanitize_column_name("IOError"), "io_error"); + assert_eq!(sanitize_column_name("apiURL"), "api_url"); + // Single uppercase still works + assert_eq!(sanitize_column_name("firstName"), "first_name"); +} + +#[test] +fn test_sanitize_column_name_special_chars() { + // @ prefix (JSON-LD) + assert_eq!(sanitize_column_name("@id"), "_id"); + assert_eq!(sanitize_column_name("@type"), "_type"); + // Dots (nested keys) + assert_eq!(sanitize_column_name("user.name"), "user_name"); + // Plus/minus (GitHub reactions) + assert_eq!(sanitize_column_name("+1"), "_1"); + assert_eq!(sanitize_column_name("-1"), "_1"); +} + +#[test] +fn test_openapi_to_pg_type() { + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + let string_schema = Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&string_schema, &spec), "text"); + + let date_schema = Schema { + schema_type: Some("string".to_string()), + format: Some("date-time".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&date_schema, &spec), "timestamptz"); + + let int_schema = Schema { + schema_type: Some("integer".to_string()), + format: Some("int32".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&int_schema, &spec), "integer"); +} + +#[test] +fn test_openapi_to_pg_type_unix_time() { + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + // Stripe's format: "unix-time" should map to timestamptz + let unix_time_schema = Schema { + schema_type: Some("integer".to_string()), + format: Some("unix-time".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&unix_time_schema, &spec), "timestamptz"); + + // Regular integer without format should still be bigint + let int_schema = Schema { + schema_type: Some("integer".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&int_schema, &spec), "bigint"); + + // string format: "date" should be date + let date_schema = Schema { + schema_type: Some("string".to_string()), + format: Some("date".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&date_schema, &spec), "date"); + + // boolean + let bool_schema = Schema { + schema_type: Some("boolean".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&bool_schema, &spec), "boolean"); + + // number format: "float" → real + let float_schema = Schema { + schema_type: Some("number".to_string()), + format: Some("float".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&float_schema, &spec), "real"); + + // number without format → double precision + let num_schema = Schema { + schema_type: Some("number".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&num_schema, &spec), "double precision"); + + // array → jsonb + let arr_schema = Schema { + schema_type: Some("array".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&arr_schema, &spec), "jsonb"); + + // object → jsonb + let obj_schema = Schema { + schema_type: Some("object".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&obj_schema, &spec), "jsonb"); + + // None type → jsonb (OpenAPI 3.1 type arrays that resolve to None) + let none_schema = Schema { + schema_type: None, + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&none_schema, &spec), "jsonb"); +} + +#[test] +fn test_openapi_to_pg_type_time_format() { + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + let time_schema = Schema { + schema_type: Some("string".to_string()), + format: Some("time".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&time_schema, &spec), "time"); +} + +#[test] +fn test_openapi_to_pg_type_byte_binary_format() { + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + let byte_schema = Schema { + schema_type: Some("string".to_string()), + format: Some("byte".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&byte_schema, &spec), "bytea"); + + let binary_schema = Schema { + schema_type: Some("string".to_string()), + format: Some("binary".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&binary_schema, &spec), "bytea"); +} + +#[test] +fn test_openapi_to_pg_type_uuid_format() { + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + let uuid_schema = Schema { + schema_type: Some("string".to_string()), + format: Some("uuid".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&uuid_schema, &spec), "uuid"); +} + +#[test] +fn test_column_name_collision_dedup() { + // Properties that collide after sanitization should get suffixed + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "user-name".to_string(), + Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + properties.insert( + "userName".to_string(), + Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + + let schema = Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + + // Both should exist, one with a suffix + assert!( + names.contains(&"user_name"), + "Expected user_name in {names:?}", + ); + assert!( + names.contains(&"user_name_1"), + "Expected user_name_1 for collision in {names:?}", + ); +} + +// --- generate_all_tables filter tests --- + +fn make_test_spec() -> OpenApiSpec { + OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/users": { + "get": { + "responses": {"200": {"description": "ok", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}}}}}}}} + } + }, + "/posts": { + "get": { + "responses": {"200": {"description": "ok", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}}}}}}}} + } + }, + "/comments": { + "get": { + "responses": {"200": {"description": "ok", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}}}}}}}} + } + } + } + }"#, + ) + .unwrap() +} + +#[test] +fn test_generate_all_tables_no_filter() { + let spec = make_test_spec(); + let tables = generate_all_tables(&spec, "test_server", None, false, false); + assert_eq!(tables.len(), 3); +} + +#[test] +fn test_generate_all_tables_limit_to() { + let spec = make_test_spec(); + let filter = vec!["users".to_string(), "posts".to_string()]; + let tables = generate_all_tables(&spec, "test_server", Some(&filter), false, false); + assert_eq!(tables.len(), 2); + assert!(tables.iter().any(|t| t.contains("\"users\""))); + assert!(tables.iter().any(|t| t.contains("\"posts\""))); + assert!(!tables.iter().any(|t| t.contains("\"comments\""))); +} + +#[test] +fn test_generate_all_tables_except() { + let spec = make_test_spec(); + let filter = vec!["comments".to_string()]; + let tables = generate_all_tables(&spec, "test_server", Some(&filter), true, false); + assert_eq!(tables.len(), 2); + assert!(tables.iter().any(|t| t.contains("\"users\""))); + assert!(tables.iter().any(|t| t.contains("\"posts\""))); + assert!(!tables.iter().any(|t| t.contains("\"comments\""))); +} + +#[test] +fn test_generate_all_tables_limit_to_nonexistent() { + let spec = make_test_spec(); + let filter = vec!["nonexistent".to_string()]; + let tables = generate_all_tables(&spec, "test_server", Some(&filter), false, false); + assert_eq!(tables.len(), 0); +} + +#[test] +fn test_generate_all_tables_include_attrs() { + let spec = make_test_spec(); + let tables = generate_all_tables(&spec, "test_server", None, false, true); + // All tables should have an 'attrs' column + for table in &tables { + assert!( + table.contains("\"attrs\" jsonb"), + "Missing attrs in: {table}" + ); + } +} + +#[test] +fn test_generate_all_tables_exclude_attrs() { + let spec = make_test_spec(); + let tables = generate_all_tables(&spec, "test_server", None, false, false); + // No table should have an 'attrs' column + for table in &tables { + assert!(!table.contains("\"attrs\""), "Unexpected attrs in: {table}"); + } +} + +#[test] +fn test_generate_foreign_table_no_schema() { + // Endpoint with no response schema → default id + attrs columns + let spec = make_test_spec(); + let endpoint = crate::spec::EndpointInfo { + path: "/health".to_string(), + method: "GET", + response_schema: None, + }; + let table = generate_foreign_table(&endpoint, &spec, "test_server", true); + assert!(table.contains("\"id\" text NOT NULL")); + assert!(table.contains("\"attrs\" jsonb")); + assert!(table.contains("rowid_column 'id'")); +} + +#[test] +fn test_write_only_properties_filtered() { + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {} + }"#, + ) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "username".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + properties.insert( + "password".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + write_only: true, + ..Default::default() + }, + ); + properties.insert( + "email".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + + assert!(names.contains(&"username"), "username should be included"); + assert!(names.contains(&"email"), "email should be included"); + assert!( + !names.contains(&"password"), + "password (writeOnly) should be excluded" + ); + assert_eq!(columns.len(), 2); +} + +#[test] +fn test_generate_foreign_table_post_method() { + let spec = make_test_spec(); + let endpoint = crate::spec::EndpointInfo { + path: "/search".to_string(), + method: "POST", + response_schema: None, + }; + let table = generate_foreign_table(&endpoint, &spec, "test_server", true); + assert!( + table.contains("method 'POST'"), + "POST DDL should include method option: {table}" + ); + assert!(table.contains("endpoint '/search'")); +} + +#[test] +fn test_generate_foreign_table_get_no_method() { + let spec = make_test_spec(); + let endpoint = crate::spec::EndpointInfo { + path: "/items".to_string(), + method: "GET", + response_schema: None, + }; + let table = generate_foreign_table(&endpoint, &spec, "test_server", true); + assert!( + !table.contains("method "), + "GET DDL should NOT include method option: {table}" + ); + assert!(table.contains("endpoint '/items'")); +} + +// --- OpenAPI 3.1 type mapping and DDL generation tests --- + +#[test] +fn test_int64_format_explicit() { + // integer + format: "int64" → bigint + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let schema = crate::spec::Schema { + schema_type: Some("integer".to_string()), + format: Some("int64".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&schema, &spec), "bigint"); +} + +#[test] +fn test_double_format_explicit() { + // number + format: "double" → double precision + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let schema = crate::spec::Schema { + schema_type: Some("number".to_string()), + format: Some("double".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&schema, &spec), "double precision"); +} + +#[test] +fn test_unknown_string_format_fallback() { + // Unknown string formats (email, uri, hostname, ipv4, ipv6, password) → text + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + for fmt in &["email", "uri", "hostname", "ipv4", "ipv6", "password"] { + let schema = crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some(fmt.to_string()), + ..Default::default() + }; + assert_eq!( + openapi_to_pg_type(&schema, &spec), + "text", + "format '{fmt}' should map to text", + ); + } +} + +#[test] +fn test_extract_columns_github_type_arrays() { + // Nullable via type arrays in column extraction (OpenAPI 3.1) + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.1.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "name".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + nullable: true, // from type: ["string", "null"] + ..Default::default() + }, + ); + properties.insert( + "count".to_string(), + crate::spec::Schema { + schema_type: Some("integer".to_string()), + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + required: vec!["name".to_string(), "count".to_string()], + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + + let name_col = columns.iter().find(|c| c.name == "name").unwrap(); + // name is required but nullable (from type array) → nullable=true + assert!(name_col.nullable); + assert_eq!(name_col.pg_type, "text"); + + let count_col = columns.iter().find(|c| c.name == "count").unwrap(); + // count is required and not nullable → nullable=false + assert!(!count_col.nullable); + assert_eq!(count_col.pg_type, "bigint"); +} + +#[test] +fn test_rowid_selection_no_id_column() { + // No 'id' column → picks first non-attrs non-jsonb column as rowid + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "name".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + properties.insert( + "metadata".to_string(), + crate::spec::Schema { + schema_type: Some("object".to_string()), + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let endpoint = crate::spec::EndpointInfo { + path: "/things".to_string(), + method: "GET", + response_schema: Some(schema), + }; + + let table = generate_foreign_table(&endpoint, &spec, "test_server", false); + // 'metadata' is jsonb, so 'name' (text) should be the rowid + assert!( + table.contains("rowid_column 'name'"), + "Expected name as rowid: {table}" + ); +} + +#[test] +fn test_rowid_selection_all_jsonb() { + // All columns are jsonb → omits rowid_column + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "data".to_string(), + crate::spec::Schema { + schema_type: Some("object".to_string()), + ..Default::default() + }, + ); + properties.insert( + "meta".to_string(), + crate::spec::Schema { + schema_type: Some("array".to_string()), + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let endpoint = crate::spec::EndpointInfo { + path: "/blobs".to_string(), + method: "GET", + response_schema: Some(schema), + }; + + let table = generate_foreign_table(&endpoint, &spec, "test_server", false); + // All columns are jsonb → no suitable rowid column + assert!( + !table.contains("rowid_column"), + "All-jsonb schema should omit rowid_column: {table}" + ); +} + +#[test] +fn test_no_properties_schema_defaults() { + // Empty properties (e.g., additionalProperties-only schema) → only attrs column if enabled + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + // No properties + ..Default::default() + }; + + let columns_with_attrs = extract_columns(&schema, &spec, true); + assert_eq!(columns_with_attrs.len(), 1); + assert_eq!(columns_with_attrs[0].name, "attrs"); + + let columns_without_attrs = extract_columns(&schema, &spec, false); + assert_eq!(columns_without_attrs.len(), 0); +} + +#[test] +fn test_column_ordering_id_first() { + // id sorts first, rest alphabetical + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + for name in &["zebra", "id", "alpha", "middle"] { + properties.insert( + name.to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + } + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + assert_eq!(names, vec!["id", "alpha", "middle", "zebra"]); +} + +#[test] +fn test_sanitize_consecutive_special_chars() { + // @@@id → ___id (each special char becomes _) + assert_eq!(sanitize_column_name("@@@id"), "___id"); +} + +#[test] +fn test_sanitize_leading_underscore_preserved() { + // _id stays _id (leading underscore preserved) + assert_eq!(sanitize_column_name("_id"), "_id"); +} + +// --- OpenAPI 3.1 DDL generation and type mapping coverage --- + +#[test] +fn test_extract_columns_from_ref_array_schema() { + // Schema is a $ref to an array of objects — should extract items properties + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {}, + "components": { + "schemas": { + "User": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + }, + "required": ["id"] + }, + "UserList": { + "type": "array", + "items": {"$ref": "#/components/schemas/User"} + } + } + } + }"##, + ) + .unwrap(); + + let schema = spec.resolve_ref("#/components/schemas/UserList").unwrap(); + let columns = extract_columns(schema, &spec, false); + + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"id"), "Missing id in {names:?}"); + assert!(names.contains(&"name"), "Missing name in {names:?}"); + + let id_col = columns.iter().find(|c| c.name == "id").unwrap(); + assert_eq!(id_col.pg_type, "bigint"); + assert!(!id_col.nullable); + + let name_col = columns.iter().find(|c| c.name == "name").unwrap(); + assert_eq!(name_col.pg_type, "text"); + assert!(name_col.nullable); // not in required list +} + +#[test] +fn test_extract_columns_from_allof_resolved() { + // Schema with allOf — extract_columns should resolve and merge + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "id": {"type": "string"} + }, + "required": ["id"] + }, + "Extended": { + "allOf": [ + {"$ref": "#/components/schemas/Base"}, + { + "type": "object", + "properties": { + "email": {"type": "string", "format": "email"}, + "created_at": {"type": "string", "format": "date-time"} + }, + "required": ["email"] + } + ] + } + } + } + }"##, + ) + .unwrap(); + + let schema = spec.resolve_ref("#/components/schemas/Extended").unwrap(); + let columns = extract_columns(schema, &spec, false); + + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"id")); + assert!(names.contains(&"email")); + assert!(names.contains(&"created_at")); + + // id is required + not nullable → NOT NULL + let id_col = columns.iter().find(|c| c.name == "id").unwrap(); + assert!(!id_col.nullable); + + // email is required + not nullable → NOT NULL + let email_col = columns.iter().find(|c| c.name == "email").unwrap(); + assert!(!email_col.nullable); + assert_eq!(email_col.pg_type, "text"); // format: "email" → text + + // created_at is not required → nullable + let created_col = columns.iter().find(|c| c.name == "created_at").unwrap(); + assert!(created_col.nullable); + assert_eq!(created_col.pg_type, "timestamptz"); +} + +#[test] +fn test_generate_foreign_table_single_quote_in_endpoint() { + // Endpoint with single quotes — should be escaped in SQL + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let endpoint = crate::spec::EndpointInfo { + path: "/o'reilly/books".to_string(), + method: "GET", + response_schema: None, + }; + let table = generate_foreign_table(&endpoint, &spec, "test_server", false); + // Single quote should be doubled in SQL + assert!( + table.contains("endpoint '/o''reilly/books'"), + "Should escape single quotes: {table}" + ); +} + +#[test] +fn test_quote_identifier_with_double_quote() { + // Table name with double quote — sanitized to underscore by table_name() + let endpoint = crate::spec::EndpointInfo { + path: "/he\"llo".to_string(), + method: "GET", + response_schema: None, + }; + let table_name = endpoint.table_name(); + assert_eq!(table_name, "he_llo"); + + // The DDL uses the sanitized name + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + let ddl = generate_foreign_table(&endpoint, &spec, "test_server", false); + assert!( + ddl.contains(r#""he_llo""#), + "Sanitized table name should appear in DDL: {ddl}" + ); +} + +#[test] +fn test_full_ddl_from_openapi_31_spec() { + // Full end-to-end: OpenAPI 3.1 spec → DDL with correct type arrays + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.1.0", + "info": {"title": "Test 3.1 API", "version": "1.0"}, + "paths": { + "/users": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + "bio": {"type": ["string", "null"]}, + "age": {"type": ["integer", "null"], "format": "int32"}, + "score": {"type": ["number", "null"], "format": "float"}, + "active": {"type": ["boolean", "null"]}, + "tags": {"type": "array", "items": {"type": "string"}}, + "metadata": {"type": "object"} + }, + "required": ["id", "name"] + } + } + } + } + } + } + } + } + } + }"#, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "api_server", None, false, true); + assert_eq!(tables.len(), 1); + + let ddl = &tables[0]; + + // Check column types + assert!(ddl.contains("\"id\" bigint NOT NULL"), "id: {ddl}"); + assert!(ddl.contains("\"name\" text NOT NULL"), "name: {ddl}"); + assert!( + ddl.contains("\"bio\" text"), + "bio should be nullable text: {ddl}" + ); + assert!( + ddl.contains("\"age\" integer"), + "age should be int32: {ddl}" + ); + assert!( + ddl.contains("\"score\" real"), + "score should be float: {ddl}" + ); + assert!(ddl.contains("\"active\" boolean"), "active: {ddl}"); + assert!(ddl.contains("\"tags\" jsonb"), "tags array → jsonb: {ddl}"); + assert!( + ddl.contains("\"metadata\" jsonb"), + "metadata object → jsonb: {ddl}" + ); + assert!(ddl.contains("\"attrs\" jsonb"), "attrs column: {ddl}"); + + // id column should be rowid + assert!(ddl.contains("rowid_column 'id'"), "rowid: {ddl}"); + + // bio and age should NOT have NOT NULL (they're nullable via type arrays) + assert!( + !ddl.contains("\"bio\" text NOT NULL"), + "bio should be nullable: {ddl}" + ); + assert!( + !ddl.contains("\"age\" integer NOT NULL"), + "age should be nullable: {ddl}" + ); +} + +#[test] +fn test_openapi_to_pg_type_ref_resolved() { + // Type mapping with $ref — should resolve the ref before mapping + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {}, + "components": { + "schemas": { + "UserId": { + "type": "string", + "format": "uuid" + } + } + } + }"##, + ) + .unwrap(); + + let ref_schema = crate::spec::Schema { + reference: Some("#/components/schemas/UserId".to_string()), + ..Default::default() + }; + assert_eq!(openapi_to_pg_type(&ref_schema, &spec), "uuid"); +} + +#[test] +fn test_extract_columns_nullable_required_interaction() { + // Column is both required AND nullable (OpenAPI 3.1 type arrays) + // → should be nullable (nullable overrides required) + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.1.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "email".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + nullable: true, // from type: ["string", "null"] + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + required: vec!["email".to_string()], + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + let email = columns.iter().find(|c| c.name == "email").unwrap(); + // required=true but nullable=true → column should be nullable + assert!(email.nullable, "nullable should override required"); +} + +#[test] +fn test_extract_columns_write_only_in_allof() { + // writeOnly property inside allOf should still be filtered + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.0.0", + "info": {"title": "Test"}, + "paths": {}, + "components": { + "schemas": { + "User": { + "allOf": [ + { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "password": {"type": "string", "writeOnly": true} + } + }, + { + "type": "object", + "properties": { + "name": {"type": "string"} + } + } + ] + } + } + } + }"##, + ) + .unwrap(); + + let user = spec.resolve_ref("#/components/schemas/User").unwrap(); + let columns = extract_columns(user, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + + assert!(names.contains(&"id")); + assert!(names.contains(&"name")); + assert!( + !names.contains(&"password"), + "writeOnly in allOf should be excluded" + ); +} + +#[test] +fn test_generate_foreign_table_with_attrs_existing() { + // Schema already has an "attrs" property — should not duplicate + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.0.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "id".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + properties.insert( + "attrs".to_string(), + crate::spec::Schema { + schema_type: Some("object".to_string()), + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, true); + let attrs_count = columns.iter().filter(|c| c.name == "attrs").count(); + assert_eq!(attrs_count, 1, "Should not duplicate existing attrs column"); +} + +#[test] +fn test_sanitize_column_name_unicode() { + // Unicode letters are alphanumeric, so they pass through (lowercased) + assert_eq!(sanitize_column_name("café"), "café"); + assert_eq!(sanitize_column_name("naïve"), "naïve"); + // Non-letter unicode (e.g., emoji) → underscore + assert_eq!(sanitize_column_name("key→val"), "key_val"); +} + +#[test] +fn test_sanitize_column_name_all_special() { + // All special characters + assert_eq!(sanitize_column_name("@#$"), "___"); +} + +#[test] +fn test_sanitize_column_name_mixed_digits_and_uppercase() { + // Mix of digits and uppercase in camelCase + assert_eq!(sanitize_column_name("ipV4Address"), "ip_v4_address"); + assert_eq!(sanitize_column_name("x509Certificate"), "x509_certificate"); +} + +#[test] +fn test_generate_all_tables_post_method_in_ddl() { + // POST endpoint should include method option in DDL + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/search": { + "post": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "score": {"type": "number"} + } + } + } + } + } + } + } + } + } + } + }"#, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "api_server", None, false, false); + assert_eq!(tables.len(), 1); + let ddl = &tables[0]; + assert!(ddl.contains("\"search_post\""), "Table name: {ddl}"); + assert!(ddl.contains("method 'POST'"), "Method option: {ddl}"); + assert!(ddl.contains("endpoint '/search'"), "Endpoint option: {ddl}"); +} + +// ============================================================================= +// OpenAPI 3.1 DDL generation coverage — end-to-end spec → DDL pipeline +// ============================================================================= + +#[test] +fn test_extract_columns_from_oneof_nullable_properties() { + // oneOf merges properties as nullable — verify column extraction reflects this + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "Test"}, + "paths": {}, + "components": { + "schemas": { + "Cat": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "indoor": {"type": "boolean"} + }, + "required": ["name"] + }, + "Dog": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "breed": {"type": "string"} + }, + "required": ["name"] + }, + "Pet": { + "oneOf": [ + {"$ref": "#/components/schemas/Cat"}, + {"$ref": "#/components/schemas/Dog"} + ] + } + } + } + }"##, + ) + .unwrap(); + + let pet = spec.resolve_ref("#/components/schemas/Pet").unwrap(); + let columns = extract_columns(pet, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + + assert!(names.contains(&"name"), "Missing name in {names:?}"); + assert!(names.contains(&"indoor"), "Missing indoor in {names:?}"); + assert!(names.contains(&"breed"), "Missing breed in {names:?}"); + + // All oneOf properties should be nullable (don't know which variant) + for col in &columns { + assert!(col.nullable, "{} should be nullable in oneOf", col.name); + } +} + +#[test] +fn test_extract_columns_31_anyof_ref_and_null() { + // GitHub pattern: anyOf: [$ref, {type: "null"}] — should produce columns from the ref + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "Test"}, + "paths": {}, + "components": { + "schemas": { + "SimpleUser": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "login": {"type": "string"}, + "avatar_url": {"type": ["string", "null"], "format": "uri"} + }, + "required": ["id", "login"] + } + } + } + }"##, + ) + .unwrap(); + + // Simulate what get_response_schema returns for anyOf: [$ref, null] + let schema = crate::spec::Schema { + any_of: vec![ + crate::spec::Schema { + reference: Some("#/components/schemas/SimpleUser".to_string()), + ..Default::default() + }, + crate::spec::Schema { + schema_type: Some("null".to_string()), + ..Default::default() + }, + ], + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + + assert!(names.contains(&"id"), "Missing id in {names:?}"); + assert!(names.contains(&"login"), "Missing login in {names:?}"); + assert!( + names.contains(&"avatar_url"), + "Missing avatar_url in {names:?}" + ); + + // anyOf makes everything nullable + for col in &columns { + assert!(col.nullable, "{} should be nullable in anyOf", col.name); + } + + // avatar_url gets format: "uri" → text (not a special PG type for uri) + let avatar = columns.iter().find(|c| c.name == "avatar_url").unwrap(); + assert_eq!(avatar.pg_type, "text"); +} + +#[test] +fn test_full_ddl_31_anyof_nullable_ref_pattern() { + // End-to-end: 3.1 spec with GitHub-style anyOf nullable ref → correct DDL + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub-style API", "version": "1.0"}, + "paths": { + "/repos": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + "owner": { + "anyOf": [ + {"$ref": "#/components/schemas/SimpleUser"}, + {"type": "null"} + ] + }, + "description": {"type": ["string", "null"]}, + "created_at": {"type": "string", "format": "date-time"} + }, + "required": ["id", "name"] + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "SimpleUser": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "login": {"type": "string"} + }, + "required": ["id", "login"] + } + } + } + }"##, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "github_server", None, false, true); + assert_eq!(tables.len(), 1); + let ddl = &tables[0]; + + // id and name are required, not nullable + assert!(ddl.contains("\"id\" bigint NOT NULL"), "id: {ddl}"); + assert!(ddl.contains("\"name\" text NOT NULL"), "name: {ddl}"); + // created_at is not required → nullable + assert!( + ddl.contains("\"created_at\" timestamptz"), + "created_at: {ddl}" + ); + assert!( + !ddl.contains("\"created_at\" timestamptz NOT NULL"), + "created_at should be nullable: {ddl}" + ); + // description is nullable via type array + assert!(ddl.contains("\"description\" text"), "description: {ddl}"); + assert!( + !ddl.contains("\"description\" text NOT NULL"), + "description should be nullable: {ddl}" + ); + // owner is anyOf → jsonb (merged from oneOf/anyOf without type) + assert!( + ddl.contains("\"owner\" jsonb"), + "owner should be jsonb: {ddl}" + ); + // attrs column + assert!(ddl.contains("\"attrs\" jsonb"), "attrs: {ddl}"); + // rowid should be id + assert!(ddl.contains("rowid_column 'id'"), "rowid: {ddl}"); +} + +#[test] +fn test_full_ddl_31_allof_inheritance_chain() { + // End-to-end: allOf inheritance with 3.1 type arrays → correct DDL + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "Inheritance API", "version": "1.0"}, + "paths": { + "/tickets": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/Ticket"} + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "BaseEntity": { + "type": "object", + "properties": { + "id": {"type": "string", "format": "uuid"}, + "created_at": {"type": "string", "format": "date-time"}, + "updated_at": {"type": ["string", "null"], "format": "date-time"} + }, + "required": ["id", "created_at"] + }, + "Ticket": { + "allOf": [ + {"$ref": "#/components/schemas/BaseEntity"}, + { + "type": "object", + "properties": { + "title": {"type": "string"}, + "priority": {"type": ["integer", "null"], "format": "int32"}, + "assignee": {"type": ["string", "null"]} + }, + "required": ["title"] + } + ] + } + } + } + }"##, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "api_server", None, false, false); + assert_eq!(tables.len(), 1); + let ddl = &tables[0]; + + // From BaseEntity: id (uuid, required), created_at (timestamptz, required), updated_at (nullable) + assert!(ddl.contains("\"id\" uuid NOT NULL"), "id: {ddl}"); + assert!( + ddl.contains("\"created_at\" timestamptz NOT NULL"), + "created_at: {ddl}" + ); + assert!( + ddl.contains("\"updated_at\" timestamptz"), + "updated_at: {ddl}" + ); + assert!( + !ddl.contains("\"updated_at\" timestamptz NOT NULL"), + "updated_at should be nullable: {ddl}" + ); + + // From Ticket extension: title (required), priority (nullable int32), assignee (nullable) + assert!(ddl.contains("\"title\" text NOT NULL"), "title: {ddl}"); + assert!(ddl.contains("\"priority\" integer"), "priority: {ddl}"); + assert!( + !ddl.contains("\"priority\" integer NOT NULL"), + "priority should be nullable: {ddl}" + ); + assert!(ddl.contains("\"assignee\" text"), "assignee: {ddl}"); + assert!( + !ddl.contains("\"assignee\" text NOT NULL"), + "assignee should be nullable: {ddl}" + ); + + // rowid should be id + assert!(ddl.contains("rowid_column 'id'"), "rowid: {ddl}"); +} + +#[test] +fn test_generate_all_tables_31_spec_with_type_arrays() { + // generate_all_tables with a full 3.1 spec — multiple endpoints + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.1.0", + "info": {"title": "3.1 API", "version": "1.0"}, + "paths": { + "/users": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "email": {"type": ["string", "null"]} + }, + "required": ["id"] + } + } + } + } + } + } + } + }, + "/events": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string", "format": "uuid"}, + "occurred_at": {"type": "string", "format": "date-time"}, + "payload": {"type": ["object", "null"]} + }, + "required": ["id", "occurred_at"] + } + } + } + } + } + } + } + } + } + }"#, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "api31", None, false, true); + assert_eq!(tables.len(), 2); + + // Find the users table + let users_ddl = tables.iter().find(|t| t.contains("\"users\"")).unwrap(); + assert!( + users_ddl.contains("\"id\" bigint NOT NULL"), + "users.id: {users_ddl}" + ); + assert!( + users_ddl.contains("\"email\" text"), + "users.email: {users_ddl}" + ); + assert!( + !users_ddl.contains("\"email\" text NOT NULL"), + "users.email nullable: {users_ddl}" + ); + + // Find the events table + let events_ddl = tables.iter().find(|t| t.contains("\"events\"")).unwrap(); + assert!( + events_ddl.contains("\"id\" uuid NOT NULL"), + "events.id: {events_ddl}" + ); + assert!( + events_ddl.contains("\"occurred_at\" timestamptz NOT NULL"), + "events.occurred_at: {events_ddl}" + ); + assert!( + events_ddl.contains("\"payload\" jsonb"), + "events.payload: {events_ddl}" + ); + assert!( + !events_ddl.contains("\"payload\" jsonb NOT NULL"), + "events.payload nullable: {events_ddl}" + ); +} + +#[test] +fn test_extract_columns_31_all_nullable_format_types() { + // Every format type mapped through 3.1 nullable type arrays + let spec = OpenApiSpec::from_str( + r#"{ + "openapi": "3.1.0", + "info": {"title": "T"}, + "paths": {} + }"#, + ) + .unwrap(); + + let mut properties = HashMap::new(); + // string formats + properties.insert( + "date_field".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some("date".to_string()), + nullable: true, // from type: ["string", "null"] + ..Default::default() + }, + ); + properties.insert( + "datetime_field".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some("date-time".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "time_field".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some("time".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "uuid_field".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some("uuid".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "byte_field".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some("byte".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "binary_field".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + format: Some("binary".to_string()), + nullable: true, + ..Default::default() + }, + ); + // integer formats + properties.insert( + "int32_field".to_string(), + crate::spec::Schema { + schema_type: Some("integer".to_string()), + format: Some("int32".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "int64_field".to_string(), + crate::spec::Schema { + schema_type: Some("integer".to_string()), + format: Some("int64".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "unix_time_field".to_string(), + crate::spec::Schema { + schema_type: Some("integer".to_string()), + format: Some("unix-time".to_string()), + nullable: true, + ..Default::default() + }, + ); + // number formats + properties.insert( + "float_field".to_string(), + crate::spec::Schema { + schema_type: Some("number".to_string()), + format: Some("float".to_string()), + nullable: true, + ..Default::default() + }, + ); + properties.insert( + "double_field".to_string(), + crate::spec::Schema { + schema_type: Some("number".to_string()), + format: Some("double".to_string()), + nullable: true, + ..Default::default() + }, + ); + // boolean + properties.insert( + "bool_field".to_string(), + crate::spec::Schema { + schema_type: Some("boolean".to_string()), + nullable: true, + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + // None required — all optional + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + + let expected = vec![ + ("binary_field", "bytea"), + ("bool_field", "boolean"), + ("byte_field", "bytea"), + ("date_field", "date"), + ("datetime_field", "timestamptz"), + ("double_field", "double precision"), + ("float_field", "real"), + ("int32_field", "integer"), + ("int64_field", "bigint"), + ("time_field", "time"), + ("unix_time_field", "timestamptz"), + ("uuid_field", "uuid"), + ]; + + for (name, pg_type) in &expected { + let col = columns + .iter() + .find(|c| c.name == *name) + .unwrap_or_else(|| panic!("Missing column {name}")); + assert_eq!(col.pg_type, *pg_type, "{name} should be {pg_type}"); + assert!(col.nullable, "{name} should be nullable"); + } +} + +#[test] +fn test_full_ddl_31_stripe_expandable_field() { + // Stripe pattern: anyOf [string, $ref] for expandable fields → produces jsonb in DDL + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "Stripe-style API", "version": "1.0"}, + "paths": { + "/charges": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "amount": {"type": "integer"}, + "customer": { + "anyOf": [ + {"type": "string"}, + {"$ref": "#/components/schemas/Customer"} + ] + }, + "currency": {"type": "string"}, + "created": {"type": "integer", "format": "unix-time"} + }, + "required": ["id", "amount", "currency", "created"] + } + } + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "Customer": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "email": {"type": ["string", "null"]} + } + } + } + } + }"##, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "stripe_server", None, false, false); + assert_eq!(tables.len(), 1); + let ddl = &tables[0]; + + // The response is an object with a "data" array — extract_columns sees the top-level object, + // which has a "data" property of type array → jsonb column + // This verifies that wrapper objects don't get their inner items extracted at DDL time + // (that's a runtime concern handled by extract_data auto-detection) + assert!(ddl.contains("\"data\" jsonb"), "data wrapper: {ddl}"); +} + +#[test] +fn test_full_ddl_31_response_level_ref() { + // Response-level $ref with 3.1 type arrays in the referenced response schema + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "$ref": "#/components/responses/ItemList" + } + } + } + } + }, + "components": { + "responses": { + "ItemList": { + "description": "List of items", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string", "format": "uuid"}, + "label": {"type": ["string", "null"]}, + "weight": {"type": ["number", "null"], "format": "float"}, + "active": {"type": ["boolean", "null"]} + }, + "required": ["id"] + } + } + } + } + } + } + } + }"##, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "test_server", None, false, false); + assert_eq!(tables.len(), 1); + let ddl = &tables[0]; + + assert!(ddl.contains("\"id\" uuid NOT NULL"), "id: {ddl}"); + assert!(ddl.contains("\"label\" text"), "label: {ddl}"); + assert!( + !ddl.contains("\"label\" text NOT NULL"), + "label nullable: {ddl}" + ); + assert!(ddl.contains("\"weight\" real"), "weight: {ddl}"); + assert!( + !ddl.contains("\"weight\" real NOT NULL"), + "weight nullable: {ddl}" + ); + assert!(ddl.contains("\"active\" boolean"), "active: {ddl}"); + assert!( + !ddl.contains("\"active\" boolean NOT NULL"), + "active nullable: {ddl}" + ); +} + +#[test] +fn test_extract_columns_31_writeonly_excluded_with_type_arrays() { + // writeOnly with 3.1 type arrays — should still be filtered + let spec = + OpenApiSpec::from_str(r#"{"openapi": "3.1.0", "info": {"title": "T"}, "paths": {}}"#) + .unwrap(); + + let mut properties = HashMap::new(); + properties.insert( + "username".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + properties.insert( + "password".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + nullable: true, // type: ["string", "null"] + write_only: true, + ..Default::default() + }, + ); + properties.insert( + "api_key".to_string(), + crate::spec::Schema { + schema_type: Some("string".to_string()), + write_only: true, + ..Default::default() + }, + ); + + let schema = crate::spec::Schema { + schema_type: Some("object".to_string()), + properties, + ..Default::default() + }; + + let columns = extract_columns(&schema, &spec, false); + let names: Vec<&str> = columns.iter().map(|c| c.name.as_str()).collect(); + assert_eq!(names, vec!["username"]); +} + +#[test] +fn test_full_ddl_31_post_for_read_with_composition() { + // POST-for-read endpoint with allOf response in 3.1 + let spec = OpenApiSpec::from_str( + r##"{ + "openapi": "3.1.0", + "info": {"title": "Search API", "version": "1.0"}, + "paths": { + "/search": { + "post": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/components/schemas/BaseResult"}, + { + "type": "object", + "properties": { + "relevance": {"type": ["number", "null"], "format": "float"}, + "snippet": {"type": ["string", "null"]} + } + } + ] + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "BaseResult": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "title": {"type": "string"}, + "created_at": {"type": "string", "format": "date-time"} + }, + "required": ["id", "title"] + } + } + } + }"##, + ) + .unwrap(); + + let tables = generate_all_tables(&spec, "search_server", None, false, false); + assert_eq!(tables.len(), 1); + let ddl = &tables[0]; + + // POST table name + assert!(ddl.contains("\"search_post\""), "table name: {ddl}"); + assert!(ddl.contains("method 'POST'"), "method: {ddl}"); + + // Base fields from allOf + assert!(ddl.contains("\"id\" text NOT NULL"), "id: {ddl}"); + assert!(ddl.contains("\"title\" text NOT NULL"), "title: {ddl}"); + assert!( + ddl.contains("\"created_at\" timestamptz"), + "created_at: {ddl}" + ); + + // Extension fields — nullable via 3.1 type arrays + assert!(ddl.contains("\"relevance\" real"), "relevance: {ddl}"); + assert!( + !ddl.contains("\"relevance\" real NOT NULL"), + "relevance nullable: {ddl}" + ); + assert!(ddl.contains("\"snippet\" text"), "snippet: {ddl}"); + assert!( + !ddl.contains("\"snippet\" text NOT NULL"), + "snippet nullable: {ddl}" + ); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/src/spec.rs b/wasm-wrappers/fdw/openapi_fdw/src/spec.rs index e6fb9b76..b44066e9 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/spec.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/spec.rs @@ -5,8 +5,89 @@ use serde::Deserialize; use serde_json::Value as JsonValue; +use std::cell::Cell; use std::collections::HashMap; +/// Raw schema for deserialization — handles OpenAPI 3.1 type arrays. +/// +/// OpenAPI 3.1 changed `type` from a string to potentially an array: +/// - 3.0: `"type": "string"` with `"nullable": true` +/// - 3.1: `"type": ["string", "null"]` +/// +/// This intermediate struct captures the raw `type` field, then `From` +/// extracts the actual type and sets `nullable` accordingly. +#[derive(Debug, Deserialize)] +struct RawSchema { + #[serde(rename = "type")] + #[serde(default)] + schema_type: Option, + #[serde(default)] + format: Option, + #[serde(default)] + properties: HashMap, + #[serde(default)] + items: Option>, + #[serde(rename = "$ref")] + #[serde(default)] + reference: Option, + #[serde(default)] + required: Vec, + #[serde(default)] + nullable: bool, + #[serde(rename = "writeOnly")] + #[serde(default)] + write_only: bool, + #[serde(rename = "allOf")] + #[serde(default)] + all_of: Vec, + #[serde(rename = "oneOf")] + #[serde(default)] + one_of: Vec, + #[serde(rename = "anyOf")] + #[serde(default)] + any_of: Vec, +} + +impl From for Schema { + fn from(raw: RawSchema) -> Self { + let (schema_type, type_has_null) = match raw.schema_type { + None => (None, false), + Some(JsonValue::String(s)) => (Some(s), false), + Some(JsonValue::Array(arr)) => { + let has_null = arr.iter().any(|v| v.as_str() == Some("null")); + let non_null_types: Vec<&str> = arr + .iter() + .filter_map(|v| v.as_str()) + .filter(|s| *s != "null") + .collect(); + // Multiple non-null types (e.g., ["string", "integer"]) → None (maps to jsonb) + let actual = if non_null_types.len() == 1 { + Some(non_null_types[0].to_string()) + } else { + None + }; + (actual, has_null) + } + Some(_) => (None, false), + }; + + Schema { + schema_type, + format: raw.format, + properties: raw.properties, + items: raw.items, + reference: raw.reference, + required: raw.required, + // nullable if explicitly set OR if type array contains "null" + nullable: raw.nullable || type_has_null, + write_only: raw.write_only, + all_of: raw.all_of, + one_of: raw.one_of, + any_of: raw.any_of, + } + } +} + /// Represents an `OpenAPI` 3.0+ specification #[derive(Debug, Deserialize)] pub struct OpenApiSpec { @@ -31,78 +112,80 @@ struct Info { /// Server definition #[derive(Debug, Deserialize)] -pub struct Server { +pub(crate) struct Server { pub url: String, + #[serde(default)] + pub variables: HashMap, +} + +/// Server variable with a default value for URL template substitution +#[derive(Debug, Deserialize)] +pub(crate) struct ServerVariable { + pub default: String, } -/// Path item (only GET operations are used for foreign tables) +/// Path item (GET and POST operations are used for foreign tables) #[derive(Debug, Deserialize)] -pub struct PathItem { +pub(crate) struct PathItem { #[serde(default)] pub get: Option, + #[serde(default)] + pub post: Option, } /// Operation definition #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] -pub struct Operation { +pub(crate) struct Operation { #[serde(default)] pub responses: HashMap, } /// Response definition #[derive(Debug, Deserialize)] -pub struct Response { +pub(crate) struct Response { + #[serde(rename = "$ref")] + #[serde(default)] + pub reference: Option, #[serde(default)] pub content: HashMap, } #[derive(Debug, Deserialize)] -pub struct MediaType { +pub(crate) struct MediaType { #[serde(default)] pub schema: Option, } #[derive(Debug, Clone, Default, Deserialize)] +#[serde(from = "RawSchema")] #[allow(clippy::struct_field_names)] pub struct Schema { - #[serde(rename = "type")] - #[serde(default)] pub schema_type: Option, - #[serde(default)] pub format: Option, - #[serde(default)] pub properties: HashMap, - #[serde(default)] pub items: Option>, - #[serde(rename = "$ref")] - #[serde(default)] pub reference: Option, - #[serde(default)] pub required: Vec, - #[serde(default)] pub nullable: bool, - #[serde(rename = "allOf")] - #[serde(default)] + pub write_only: bool, pub all_of: Vec, - #[serde(rename = "oneOf")] - #[serde(default)] pub one_of: Vec, - #[serde(rename = "anyOf")] - #[serde(default)] pub any_of: Vec, } #[derive(Debug, Deserialize)] -pub struct Components { +pub(crate) struct Components { #[serde(default)] pub schemas: HashMap, + #[serde(default)] + pub responses: HashMap, } impl OpenApiSpec { /// Parse an `OpenAPI` spec from a JSON value - pub fn from_json(json: &JsonValue) -> Result { - let spec: Self = serde_json::from_value(json.clone()) + pub fn from_json(json: JsonValue) -> Result { + let spec: Self = serde_json::from_value(json) .map_err(|e| format!("Failed to parse OpenAPI spec: {e}"))?; if !spec.openapi.starts_with("3.") { @@ -131,12 +214,18 @@ impl OpenApiSpec { Ok(spec) } - /// Get the base URL from the spec (first server URL) - pub fn base_url(&self) -> Option<&str> { - self.servers.first().map(|s| s.url.as_str()) + /// Get the base URL from the spec (first server URL), substituting any variables + pub fn base_url(&self) -> Option { + self.servers.first().map(|s| { + let mut url = s.url.clone(); + for (name, var) in &s.variables { + url = url.replace(&format!("{{{name}}}"), &var.default); + } + url + }) } - /// Get all endpoint paths that support GET operations (for querying). + /// Get all endpoint paths that support GET or POST operations (for querying). /// /// Parameterized paths (e.g., `/users/{id}`, `/users/{user_id}/posts`) are /// excluded because they require path parameter values from WHERE clauses at @@ -154,464 +243,337 @@ impl OpenApiSpec { } if let Some(ref op) = path_item.get { - let response_schema = Self::get_response_schema(op); + let response_schema = self.get_response_schema(op); + endpoints.push(EndpointInfo { + path: path.clone(), + method: "GET", + response_schema, + }); + } + + if let Some(ref op) = path_item.post { + let response_schema = self.get_response_schema(op); endpoints.push(EndpointInfo { path: path.clone(), + method: "POST", response_schema, }); } } - endpoints.sort_by(|a, b| a.path.cmp(&b.path)); + endpoints.sort_by(|a, b| a.path.cmp(&b.path).then(a.method.cmp(b.method))); endpoints } - /// Get the response schema for a successful response (200, 201, or default) - fn get_response_schema(op: &Operation) -> Option { - let response = op - .responses - .get("200") - .or_else(|| op.responses.get("201")) - .or_else(|| op.responses.get("default"))?; + /// Success response codes to check, in priority order + const SUCCESS_RESPONSE_CODES: &[&str] = &["200", "201", "2XX", "default"]; - let media_type = response + /// Get the response schema for a successful response (200, 201, 2XX, or default) + fn get_response_schema(&self, op: &Operation) -> Option { + let response = Self::SUCCESS_RESPONSE_CODES + .iter() + .find_map(|code| op.responses.get(*code))?; + + // Resolve $ref at the response level (e.g., "$ref": "#/components/responses/Success") + let resolved_response = response + .reference + .as_ref() + .and_then(|r| self.resolve_response_ref(r)) + .unwrap_or(response); + + let media_type = resolved_response .content - .get("application/json") - .or_else(|| response.content.values().next())?; + .iter() + .find(|(k, _)| k.starts_with("application/json")) + .map(|(_, v)| v) + .or_else(|| resolved_response.content.values().next())?; media_type.schema.clone() } + /// Parse a `#/components/{section}/{name}` reference, returning the name if it matches. + fn parse_component_ref<'a>(reference: &'a str, section: &str) -> Option<&'a str> { + let path = reference.strip_prefix("#/components/")?; + let name = path.strip_prefix(section)?.strip_prefix('/')?; + // Reject if name contains further slashes (e.g., "#/components/schemas/a/b") + if name.contains('/') { + return None; + } + Some(name) + } + + /// Resolve a $ref to a response in components.responses + fn resolve_response_ref(&self, reference: &str) -> Option<&Response> { + let name = Self::parse_component_ref(reference, "responses")?; + self.components.as_ref()?.responses.get(name) + } + /// Resolve a $ref to its schema pub fn resolve_ref(&self, reference: &str) -> Option<&Schema> { - // Handle refs like "#/components/schemas/User" - let parts: Vec<&str> = reference.trim_start_matches("#/").split('/').collect(); - if parts.len() == 3 && parts[0] == "components" && parts[1] == "schemas" { - self.components.as_ref()?.schemas.get(parts[2]) - } else { - None - } + let name = Self::parse_component_ref(reference, "schemas")?; + self.components.as_ref()?.schemas.get(name) } /// Recursively resolve a schema, following $ref pointers and handling composition. - /// Uses depth limiting to prevent infinite recursion on circular references. + /// Uses depth limiting and a call counter to prevent infinite recursion and + /// exponential blowup on branching schemas. pub fn resolve_schema(&self, schema: &Schema) -> Schema { - self.resolve_schema_with_depth(schema, 0) + let call_count = Cell::new(0usize); + self.resolve_schema_internal(schema, 0, &call_count) } /// Maximum depth for schema resolution to prevent stack overflow on circular refs const MAX_RESOLVE_DEPTH: usize = 32; - /// Internal schema resolution with depth tracking - fn resolve_schema_with_depth(&self, schema: &Schema, depth: usize) -> Schema { - // Guard against circular references - if depth > Self::MAX_RESOLVE_DEPTH { + /// Maximum total resolve calls to prevent exponential blowup on branching schemas + const MAX_RESOLVE_CALLS: usize = 10_000; + + /// Internal schema resolution with depth and call-count tracking + fn resolve_schema_internal( + &self, + schema: &Schema, + depth: usize, + call_count: &Cell, + ) -> Schema { + let count = call_count.get() + 1; + call_count.set(count); + + // Guard against circular references and exponential blowup + if depth > Self::MAX_RESOLVE_DEPTH || count > Self::MAX_RESOLVE_CALLS { return schema.clone(); } // First resolve any $ref if let Some(ref reference) = schema.reference { if let Some(resolved) = self.resolve_ref(reference) { - return self.resolve_schema_with_depth(resolved, depth + 1); + let mut result = self.resolve_schema_internal(resolved, depth + 1, call_count); + // Merge non-default siblings (OpenAPI 3.1 $ref with siblings) + if schema.nullable { + result.nullable = true; + } + if schema.write_only { + result.write_only = true; + } + for (name, prop) in &schema.properties { + result.properties.insert(name.clone(), prop.clone()); + } + if !schema.required.is_empty() { + result.required.extend(schema.required.iter().cloned()); + result.required.sort(); + result.required.dedup(); + } + // OpenAPI 3.1: $ref can coexist with composition keywords + if !schema.all_of.is_empty() { + let allof = self.merge_allof_schemas(&schema.all_of, depth + 1, call_count); + for (name, prop) in allof.properties { + result.properties.insert(name, prop); + } + result.required.extend(allof.required); + result.required.sort(); + result.required.dedup(); + } + if !schema.one_of.is_empty() { + let oneof = self.merge_union_schemas(&schema.one_of, depth + 1, call_count); + for (name, prop) in oneof.properties { + result.properties.entry(name).or_insert(prop); + } + } + if !schema.any_of.is_empty() { + let anyof = self.merge_union_schemas(&schema.any_of, depth + 1, call_count); + for (name, prop) in anyof.properties { + result.properties.entry(name).or_insert(prop); + } + } + return result; } } // Handle allOf by merging all properties (intersection - all schemas apply) if !schema.all_of.is_empty() { - return self.merge_schemas_with_depth(&schema.all_of, false, depth + 1); + let mut merged = self.merge_allof_schemas(&schema.all_of, depth + 1, call_count); + // Merge parent-level properties/required alongside allOf (OpenAPI 3.1) + Self::merge_parent_siblings(schema, &mut merged); + return merged; } // Handle oneOf by merging all possible properties as nullable (union - one of the schemas) if !schema.one_of.is_empty() { - return self.merge_schemas_with_depth(&schema.one_of, true, depth + 1); + let mut merged = self.merge_union_schemas(&schema.one_of, depth + 1, call_count); + Self::merge_parent_siblings(schema, &mut merged); + return merged; } // Handle anyOf by merging all possible properties as nullable (union - any of the schemas) if !schema.any_of.is_empty() { - return self.merge_schemas_with_depth(&schema.any_of, true, depth + 1); + let mut merged = self.merge_union_schemas(&schema.any_of, depth + 1, call_count); + Self::merge_parent_siblings(schema, &mut merged); + return merged; } schema.clone() } - /// Merge multiple schemas into one with depth tracking. - /// If `make_nullable` is true, all properties become optional (for oneOf/anyOf) - fn merge_schemas_with_depth( + /// Merge allOf schemas: later schemas refine/override earlier ones, required fields preserved. + fn merge_allof_schemas( &self, schemas: &[Schema], - make_nullable: bool, depth: usize, + call_count: &Cell, ) -> Schema { let mut merged = Schema { - schema_type: Some("object".to_string()), properties: HashMap::new(), required: Vec::new(), ..Default::default() }; + let mut has_any_properties = false; + for sub_schema in schemas { - let resolved = self.resolve_schema_with_depth(sub_schema, depth); + let resolved = self.resolve_schema_internal(sub_schema, depth, call_count); - // Merge properties - for (name, mut prop_schema) in resolved.properties { - if make_nullable { - prop_schema.nullable = true; - // For oneOf/anyOf: keep first definition (most permissive) - merged.properties.entry(name).or_insert(prop_schema); - } else { - // For allOf: later schemas refine/override earlier ones - // This follows OpenAPI semantics where allOf combines schemas - // and later definitions can provide more specific types - merged.properties.insert(name, prop_schema); - } + if !resolved.properties.is_empty() { + has_any_properties = true; } - // For allOf, all required fields stay required - // For oneOf/anyOf, nothing is required since we don't know which variant - if !make_nullable { - merged.required.extend(resolved.required); + // Later schemas refine/override earlier ones + for (name, prop_schema) in resolved.properties { + merged.properties.insert(name, prop_schema); } + + merged.required.extend(resolved.required); + } + + if has_any_properties { + merged.schema_type = Some("object".to_string()); } - // Deduplicate required fields merged.required.sort(); merged.required.dedup(); merged } -} -/// Extracted endpoint information for table generation -#[derive(Debug)] -pub struct EndpointInfo { - pub path: String, - pub response_schema: Option, -} - -impl EndpointInfo { - /// Generate a table name from the endpoint path. - /// - /// Uses the full path to avoid collisions (e.g., `/v1/users` and `/v2/users` - /// become `v1_users` and `v2_users` instead of both becoming `users`). - pub fn table_name(&self) -> String { - let cleaned = self.path.trim_matches('/'); - - if cleaned.is_empty() { - return "unknown".to_string(); - } - - // Join path segments with '_' and convert kebab-case to snake_case - cleaned.replace(['/', '-'], "_") - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_minimal_spec() { - let spec_json = r#"{ - "openapi": "3.0.0", - "info": {"title": "Test API", "version": "1.0"}, - "paths": {} - }"#; - - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - assert_eq!(spec.openapi, "3.0.0"); - assert_eq!(spec.info.title, "Test API"); - } - - #[test] - fn test_endpoint_table_name() { - let endpoint = EndpointInfo { - path: "/api/v1/user-accounts".to_string(), - response_schema: None, - }; - assert_eq!(endpoint.table_name(), "api_v1_user_accounts"); - - // Single segment - let endpoint = EndpointInfo { - path: "/users".to_string(), - response_schema: None, + /// Merge oneOf/anyOf schemas: all properties become nullable, first definition wins. + fn merge_union_schemas( + &self, + schemas: &[Schema], + depth: usize, + call_count: &Cell, + ) -> Schema { + let mut merged = Schema { + properties: HashMap::new(), + required: Vec::new(), + ..Default::default() }; - assert_eq!(endpoint.table_name(), "users"); - // Collision avoidance: different versions produce different names - let v1 = EndpointInfo { - path: "/v1/users".to_string(), - response_schema: None, - }; - let v2 = EndpointInfo { - path: "/v2/users".to_string(), - response_schema: None, - }; - assert_ne!(v1.table_name(), v2.table_name()); + let mut has_any_properties = false; - // Empty path - let endpoint = EndpointInfo { - path: "/".to_string(), - response_schema: None, - }; - assert_eq!(endpoint.table_name(), "unknown"); - } + for sub_schema in schemas { + let resolved = self.resolve_schema_internal(sub_schema, depth, call_count); - #[test] - fn test_resolve_ref() { - let spec_json = r#"{ - "openapi": "3.0.0", - "info": {"title": "Test", "version": "1.0"}, - "paths": {}, - "components": { - "schemas": { - "User": { - "type": "object", - "properties": { - "id": {"type": "string"}, - "name": {"type": "string"} - }, - "required": ["id"] - } - } + if !resolved.properties.is_empty() { + has_any_properties = true; } - }"#; - - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - let user_schema = spec.resolve_ref("#/components/schemas/User").unwrap(); - - assert_eq!(user_schema.schema_type, Some("object".to_string())); - assert!(user_schema.properties.contains_key("id")); - assert!(user_schema.properties.contains_key("name")); - assert!(user_schema.required.contains(&"id".to_string())); - } - #[test] - fn test_allof_merges_properties() { - let spec_json = r##"{ - "openapi": "3.0.0", - "info": {"title": "Test", "version": "1.0"}, - "paths": {}, - "components": { - "schemas": { - "Base": { - "type": "object", - "properties": { - "id": {"type": "string"} - }, - "required": ["id"] - }, - "Extended": { - "allOf": [ - {"$ref": "#/components/schemas/Base"}, - { - "type": "object", - "properties": { - "name": {"type": "string"}, - "email": {"type": "string"} - }, - "required": ["name"] - } - ] - } - } + // Keep first definition (most permissive), mark all nullable + for (name, mut prop_schema) in resolved.properties { + prop_schema.nullable = true; + merged.properties.entry(name).or_insert(prop_schema); } - }"##; - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - let extended = spec.resolve_ref("#/components/schemas/Extended").unwrap(); - let resolved = spec.resolve_schema(extended); + // Nothing is required — we don't know which variant applies + } - // Should have all properties from both schemas - assert!(resolved.properties.contains_key("id")); - assert!(resolved.properties.contains_key("name")); - assert!(resolved.properties.contains_key("email")); + // Only set type to "object" if at least one sub-schema has properties. + // Primitive composition (e.g., oneOf: [{type: "string"}, {type: "integer"}]) + // should produce None (→ jsonb), not "object". + if has_any_properties { + merged.schema_type = Some("object".to_string()); + } - // Required from both should be merged - assert!(resolved.required.contains(&"id".to_string())); - assert!(resolved.required.contains(&"name".to_string())); + merged } - #[test] - fn test_oneof_merges_as_nullable() { - let spec_json = r#"{ - "openapi": "3.0.0", - "info": {"title": "Test", "version": "1.0"}, - "paths": {}, - "components": { - "schemas": { - "Response": { - "oneOf": [ - { - "type": "object", - "properties": { - "user_id": {"type": "string"}, - "user_name": {"type": "string"} - }, - "required": ["user_id"] - }, - { - "type": "object", - "properties": { - "org_id": {"type": "string"}, - "org_name": {"type": "string"} - }, - "required": ["org_id"] - } - ] - } - } - } - }"#; - - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - let response = spec.resolve_ref("#/components/schemas/Response").unwrap(); - let resolved = spec.resolve_schema(response); - - // Should have properties from all variants - assert!(resolved.properties.contains_key("user_id")); - assert!(resolved.properties.contains_key("user_name")); - assert!(resolved.properties.contains_key("org_id")); - assert!(resolved.properties.contains_key("org_name")); - - // All properties should be nullable (since we don't know which variant) - assert!(resolved.properties.get("user_id").unwrap().nullable); - assert!(resolved.properties.get("org_id").unwrap().nullable); - - // Nothing should be required for oneOf - assert!(resolved.required.is_empty()); + /// Merge parent-level `properties` and `required` into a composition result. + /// + /// Per OpenAPI 3.1, properties defined alongside `allOf`/`oneOf`/`anyOf` + /// should be merged into the composed schema (parent properties override). + fn merge_parent_siblings(parent: &Schema, merged: &mut Schema) { + for (name, prop) in &parent.properties { + merged.properties.insert(name.clone(), prop.clone()); + } + if !parent.required.is_empty() { + merged.required.extend(parent.required.iter().cloned()); + merged.required.sort(); + merged.required.dedup(); + } + if parent.nullable { + merged.nullable = true; + } + if parent.write_only { + merged.write_only = true; + } + // Promote to object if parent has properties + if !parent.properties.is_empty() && merged.schema_type.is_none() { + merged.schema_type = Some("object".to_string()); + } } +} - #[test] - fn test_anyof_merges_as_nullable() { - let spec_json = r#"{ - "openapi": "3.0.0", - "info": {"title": "Test", "version": "1.0"}, - "paths": {}, - "components": { - "schemas": { - "Flexible": { - "anyOf": [ - { - "type": "object", - "properties": { - "name": {"type": "string"} - } - }, - { - "type": "object", - "properties": { - "title": {"type": "string"} - } - } - ] - } - } - } - }"#; - - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - let flexible = spec.resolve_ref("#/components/schemas/Flexible").unwrap(); - let resolved = spec.resolve_schema(flexible); - - // Should have properties from all variants - assert!(resolved.properties.contains_key("name")); - assert!(resolved.properties.contains_key("title")); +/// Extracted endpoint information for table generation +#[derive(Debug)] +pub struct EndpointInfo { + pub path: String, + pub method: &'static str, + pub response_schema: Option, +} - // All should be nullable - assert!(resolved.properties.get("name").unwrap().nullable); - assert!(resolved.properties.get("title").unwrap().nullable); - } +impl EndpointInfo { + /// Generate a table name from the endpoint path. + /// + /// Uses the full path to avoid collisions (e.g., `/v1/users` and `/v2/users` + /// become `v1_users` and `v2_users` instead of both becoming `users`). + /// POST endpoints get a `_post` suffix to avoid collisions with GET tables. + pub fn table_name(&self) -> String { + let cleaned = self.path.trim_matches('/'); - #[test] - fn test_nested_ref_resolution() { - let spec_json = r##"{ - "openapi": "3.0.0", - "info": {"title": "Test", "version": "1.0"}, - "paths": {}, - "components": { - "schemas": { - "Address": { - "type": "object", - "properties": { - "street": {"type": "string"}, - "city": {"type": "string"} - } - }, - "Person": { - "type": "object", - "properties": { - "name": {"type": "string"}, - "address": {"$ref": "#/components/schemas/Address"} - } + let mut base = if cleaned.is_empty() { + "unknown".to_string() + } else { + // Join path segments with '_' and convert kebab-case to snake_case + let mut name = cleaned.replace(['/', '-'], "_"); + // Replace remaining non-alphanumeric/non-underscore chars with '_' + name = name + .chars() + .map(|c| { + if c.is_alphanumeric() || c == '_' { + c + } else { + '_' } - } + }) + .collect(); + // Collapse consecutive underscores + while name.contains("__") { + name = name.replace("__", "_"); } - }"##; - - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - let person = spec.resolve_ref("#/components/schemas/Person").unwrap(); - let resolved = spec.resolve_schema(person); - - assert!(resolved.properties.contains_key("name")); - assert!(resolved.properties.contains_key("address")); + // Trim trailing underscores + name.trim_end_matches('_').to_string() + }; - // The address property should still have a $ref (we resolve at property level when needed) - let address_prop = resolved.properties.get("address").unwrap(); - assert!(address_prop.reference.is_some() || !address_prop.properties.is_empty()); - } + // Prepend '_' if starts with digit + if base.starts_with(|c: char| c.is_ascii_digit()) { + base.insert(0, '_'); + } - #[test] - fn test_allof_later_schema_overrides_earlier() { - // Test that for allOf, later schemas override earlier ones - // This is important for inheritance patterns where a child refines a parent's type - let spec_json = r##"{ - "openapi": "3.0.0", - "info": {"title": "Test", "version": "1.0"}, - "paths": {}, - "components": { - "schemas": { - "Base": { - "type": "object", - "properties": { - "status": {"type": "string"}, - "id": {"type": "integer"} - } - }, - "Refined": { - "allOf": [ - {"$ref": "#/components/schemas/Base"}, - { - "type": "object", - "properties": { - "status": { - "type": "string", - "format": "enum" - }, - "extra": {"type": "boolean"} - } - } - ] - } - } - } - }"##; - - let spec = OpenApiSpec::from_str(spec_json).unwrap(); - let refined = spec.resolve_ref("#/components/schemas/Refined").unwrap(); - let resolved = spec.resolve_schema(refined); - - // Should have all properties - assert!(resolved.properties.contains_key("status")); - assert!(resolved.properties.contains_key("id")); - assert!(resolved.properties.contains_key("extra")); - - // The 'status' property should be from the later schema (has format: "enum") - // The base schema's status has no format, so if we get "enum", the later one won - let status_prop = resolved.properties.get("status").unwrap(); - assert_eq!( - status_prop.format, - Some("enum".to_string()), - "Later allOf schema should override earlier schema's property definition" - ); + if self.method == "POST" { + format!("{base}_post") + } else { + base + } } } + +#[cfg(test)] +#[path = "spec_tests.rs"] +mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs new file mode 100644 index 00000000..deb82cb5 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs @@ -0,0 +1,5533 @@ +use super::*; + +#[test] +fn test_parse_minimal_spec() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test API", "version": "1.0"}, + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!(spec.openapi, "3.0.0"); + assert_eq!(spec.info.title, "Test API"); +} + +#[test] +fn test_endpoint_table_name() { + let endpoint = EndpointInfo { + path: "/api/v1/user-accounts".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "api_v1_user_accounts"); + + // Single segment + let endpoint = EndpointInfo { + path: "/users".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "users"); + + // Collision avoidance: different versions produce different names + let v1 = EndpointInfo { + path: "/v1/users".to_string(), + method: "GET", + response_schema: None, + }; + let v2 = EndpointInfo { + path: "/v2/users".to_string(), + method: "GET", + response_schema: None, + }; + assert_ne!(v1.table_name(), v2.table_name()); + + // Empty path + let endpoint = EndpointInfo { + path: "/".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "unknown"); +} + +#[test] +fn test_resolve_ref() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "User": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "name": {"type": "string"} + }, + "required": ["id"] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let user_schema = spec.resolve_ref("#/components/schemas/User").unwrap(); + + assert_eq!(user_schema.schema_type, Some("object".to_string())); + assert!(user_schema.properties.contains_key("id")); + assert!(user_schema.properties.contains_key("name")); + assert!(user_schema.required.contains(&"id".to_string())); +} + +#[test] +fn test_allof_merges_properties() { + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "id": {"type": "string"} + }, + "required": ["id"] + }, + "Extended": { + "allOf": [ + {"$ref": "#/components/schemas/Base"}, + { + "type": "object", + "properties": { + "name": {"type": "string"}, + "email": {"type": "string"} + }, + "required": ["name"] + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let extended = spec.resolve_ref("#/components/schemas/Extended").unwrap(); + let resolved = spec.resolve_schema(extended); + + // Should have all properties from both schemas + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("email")); + + // Required from both should be merged + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"name".to_string())); +} + +#[test] +fn test_oneof_merges_as_nullable() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Response": { + "oneOf": [ + { + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "user_name": {"type": "string"} + }, + "required": ["user_id"] + }, + { + "type": "object", + "properties": { + "org_id": {"type": "string"}, + "org_name": {"type": "string"} + }, + "required": ["org_id"] + } + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let response = spec.resolve_ref("#/components/schemas/Response").unwrap(); + let resolved = spec.resolve_schema(response); + + // Should have properties from all variants + assert!(resolved.properties.contains_key("user_id")); + assert!(resolved.properties.contains_key("user_name")); + assert!(resolved.properties.contains_key("org_id")); + assert!(resolved.properties.contains_key("org_name")); + + // All properties should be nullable (since we don't know which variant) + assert!(resolved.properties.get("user_id").unwrap().nullable); + assert!(resolved.properties.get("org_id").unwrap().nullable); + + // Nothing should be required for oneOf + assert!(resolved.required.is_empty()); +} + +#[test] +fn test_anyof_merges_as_nullable() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Flexible": { + "anyOf": [ + { + "type": "object", + "properties": { + "name": {"type": "string"} + } + }, + { + "type": "object", + "properties": { + "title": {"type": "string"} + } + } + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let flexible = spec.resolve_ref("#/components/schemas/Flexible").unwrap(); + let resolved = spec.resolve_schema(flexible); + + // Should have properties from all variants + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("title")); + + // All should be nullable + assert!(resolved.properties.get("name").unwrap().nullable); + assert!(resolved.properties.get("title").unwrap().nullable); +} + +#[test] +fn test_nested_ref_resolution() { + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"} + } + }, + "Person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "address": {"$ref": "#/components/schemas/Address"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let person = spec.resolve_ref("#/components/schemas/Person").unwrap(); + let resolved = spec.resolve_schema(person); + + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("address")); + + // The address property should still have a $ref (we resolve at property level when needed) + let address_prop = resolved.properties.get("address").unwrap(); + assert!(address_prop.reference.is_some() || !address_prop.properties.is_empty()); +} + +#[test] +fn test_allof_later_schema_overrides_earlier() { + // Test that for allOf, later schemas override earlier ones + // This is important for inheritance patterns where a child refines a parent's type + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "status": {"type": "string"}, + "id": {"type": "integer"} + } + }, + "Refined": { + "allOf": [ + {"$ref": "#/components/schemas/Base"}, + { + "type": "object", + "properties": { + "status": { + "type": "string", + "format": "enum" + }, + "extra": {"type": "boolean"} + } + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let refined = spec.resolve_ref("#/components/schemas/Refined").unwrap(); + let resolved = spec.resolve_schema(refined); + + // Should have all properties + assert!(resolved.properties.contains_key("status")); + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("extra")); + + // The 'status' property should be from the later schema (has format: "enum") + // The base schema's status has no format, so if we get "enum", the later one won + let status_prop = resolved.properties.get("status").unwrap(); + assert_eq!( + status_prop.format, + Some("enum".to_string()), + "Later allOf schema should override earlier schema's property definition" + ); +} + +// --- OpenAPI 3.1 type array tests --- + +#[test] +fn test_openapi_31_type_string_null() { + // OpenAPI 3.1: "type": ["string", "null"] should parse as type=string, nullable=true + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test 3.1", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "User": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "nickname": {"type": ["string", "null"]}, + "age": {"type": ["integer", "null"]} + }, + "required": ["name"] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let user = spec.resolve_ref("#/components/schemas/User").unwrap(); + + // name: plain string, not nullable + let name_prop = user.properties.get("name").unwrap(); + assert_eq!(name_prop.schema_type, Some("string".to_string())); + assert!(!name_prop.nullable); + + // nickname: ["string", "null"] → string + nullable + let nickname_prop = user.properties.get("nickname").unwrap(); + assert_eq!(nickname_prop.schema_type, Some("string".to_string())); + assert!(nickname_prop.nullable); + + // age: ["integer", "null"] → integer + nullable + let age_prop = user.properties.get("age").unwrap(); + assert_eq!(age_prop.schema_type, Some("integer".to_string())); + assert!(age_prop.nullable); +} + +#[test] +fn test_openapi_31_type_array_without_null() { + // OpenAPI 3.1: "type": ["string"] (single-element array without null) + let schema: Schema = serde_json::from_str(r#"{"type": ["string"]}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(!schema.nullable); +} + +#[test] +fn test_openapi_30_type_string_still_works() { + // OpenAPI 3.0: plain string type should still work + let schema: Schema = serde_json::from_str(r#"{"type": "string"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(!schema.nullable); +} + +#[test] +fn test_openapi_30_nullable_flag_still_works() { + // OpenAPI 3.0: nullable as a separate flag + let schema: Schema = serde_json::from_str(r#"{"type": "string", "nullable": true}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); +} + +#[test] +fn test_openapi_31_type_mapping_with_spec() { + // Verify that type arrays produce correct PostgreSQL type mappings + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/records": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": ["string", "null"]}, + "score": {"type": ["number", "null"], "format": "float"}, + "active": {"type": ["boolean", "null"]} + }, + "required": ["id"] + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let resolved = spec.resolve_schema(schema); + let items = resolved.items.as_ref().unwrap(); + + // name should be string type, not jsonb (the old bug) + assert_eq!( + items.properties.get("name").unwrap().schema_type, + Some("string".to_string()) + ); + assert_eq!( + items.properties.get("score").unwrap().schema_type, + Some("number".to_string()) + ); + assert_eq!( + items.properties.get("active").unwrap().schema_type, + Some("boolean".to_string()) + ); +} + +// --- Circular reference tests --- + +#[test] +fn test_circular_ref_depth_limit() { + // Self-referential schema should not stack overflow + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "TreeNode": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "children": { + "type": "array", + "items": {"$ref": "#/components/schemas/TreeNode"} + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let node = spec.resolve_ref("#/components/schemas/TreeNode").unwrap(); + // Should not stack overflow — depth limit kicks in + let resolved = spec.resolve_schema(node); + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("children")); +} + +#[test] +fn test_mutual_circular_refs() { + // A references B which references A + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "SchemaA": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "b_ref": {"$ref": "#/components/schemas/SchemaB"} + } + }, + "SchemaB": { + "type": "object", + "properties": { + "value": {"type": "integer"}, + "a_ref": {"$ref": "#/components/schemas/SchemaA"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let schema_a = spec.resolve_ref("#/components/schemas/SchemaA").unwrap(); + let resolved = spec.resolve_schema(schema_a); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("b_ref")); +} + +// --- Deep allOf chain tests (Box-style inheritance) --- + +#[test] +fn test_deep_allof_chain() { + // FileBase → FileMini → File → FileFull (4-level chain) + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "FileBase": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "type": {"type": "string"} + }, + "required": ["id"] + }, + "FileMini": { + "allOf": [ + {"$ref": "#/components/schemas/FileBase"}, + { + "type": "object", + "properties": { + "name": {"type": "string"}, + "size": {"type": "integer"} + } + } + ] + }, + "File": { + "allOf": [ + {"$ref": "#/components/schemas/FileMini"}, + { + "type": "object", + "properties": { + "content_type": {"type": "string"}, + "created_at": {"type": "string", "format": "date-time"} + } + } + ] + }, + "FileFull": { + "allOf": [ + {"$ref": "#/components/schemas/File"}, + { + "type": "object", + "properties": { + "permissions": {"type": "object"}, + "version_number": {"type": "integer"} + } + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let full = spec.resolve_ref("#/components/schemas/FileFull").unwrap(); + let resolved = spec.resolve_schema(full); + + // Should have all properties from the entire chain + assert!( + resolved.properties.contains_key("id"), + "Missing id from FileBase" + ); + assert!( + resolved.properties.contains_key("type"), + "Missing type from FileBase" + ); + assert!( + resolved.properties.contains_key("name"), + "Missing name from FileMini" + ); + assert!( + resolved.properties.contains_key("size"), + "Missing size from FileMini" + ); + assert!( + resolved.properties.contains_key("content_type"), + "Missing content_type from File" + ); + assert!( + resolved.properties.contains_key("created_at"), + "Missing created_at from File" + ); + assert!( + resolved.properties.contains_key("permissions"), + "Missing permissions from FileFull" + ); + assert!( + resolved.properties.contains_key("version_number"), + "Missing version_number from FileFull" + ); + + // id should be required (from FileBase) + assert!(resolved.required.contains(&"id".to_string())); +} + +// --- Swagger 2.0 rejection --- + +#[test] +fn test_swagger_20_rejected() { + let spec_json = r#"{ + "swagger": "2.0", + "info": {"title": "Old API", "version": "1.0"}, + "paths": {} + }"#; + + let result = OpenApiSpec::from_str(spec_json); + assert!(result.is_err()); +} + +// --- Parameterized path exclusion --- + +#[test] +fn test_parameterized_paths_excluded_from_endpoints() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": {"200": {"description": "ok"}} + } + }, + "/items/{id}": { + "get": { + "responses": {"200": {"description": "ok"}} + } + }, + "/users/{user_id}/posts": { + "get": { + "responses": {"200": {"description": "ok"}} + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + + // Only /items should be included — parameterized paths are excluded + assert_eq!(endpoints.len(), 1); + assert_eq!(endpoints[0].path, "/items"); +} + +// --- Response schema extraction from non-200 codes --- + +#[test] +fn test_response_schema_from_201() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/created": { + "get": { + "responses": { + "201": { + "description": "Created", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "created": {"type": "boolean"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("id")); + assert!(schema.properties.contains_key("created")); +} + +#[test] +fn test_no_schema_endpoint() { + // Endpoint with no content schema should still be returned with None schema + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/health": { + "get": { + "responses": { + "200": { + "description": "Healthy" + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + assert!(endpoints[0].response_schema.is_none()); +} + +// --- anyOf with null variant (OpenAPI 3.1 pattern) --- + +#[test] +fn test_anyof_with_null_variant() { + // OpenAPI 3.1 uses anyOf: [{type: string}, {type: "null"}] for nullable + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "User": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "bio": { + "anyOf": [ + {"type": "string"}, + {"type": "object", "properties": {"text": {"type": "string"}}} + ] + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let user = spec.resolve_ref("#/components/schemas/User").unwrap(); + let resolved = spec.resolve_schema(user); + + assert!(resolved.properties.contains_key("name")); + // bio is anyOf — the resolver should merge all variant properties + let bio = resolved.properties.get("bio").unwrap(); + let bio_resolved = spec.resolve_schema(bio); + // Should have merged properties from both variants + assert!(bio_resolved.properties.contains_key("text") || bio_resolved.schema_type.is_some()); +} + +// --- resolve_schema edge cases --- + +#[test] +fn test_resolve_schema_broken_ref() { + // A $ref pointing to a nonexistent schema should return the schema unchanged + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Broken": { + "type": "object", + "properties": { + "ref_field": {"$ref": "#/components/schemas/DoesNotExist"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let broken = spec.resolve_ref("#/components/schemas/Broken").unwrap(); + let resolved = spec.resolve_schema(broken); + // Should still have the property, just unresolved + assert!(resolved.properties.contains_key("ref_field")); + let ref_field = resolved.properties.get("ref_field").unwrap(); + assert_eq!( + ref_field.reference, + Some("#/components/schemas/DoesNotExist".to_string()) + ); +} + +#[test] +fn test_resolve_ref_invalid_path() { + // Refs that don't match #/components/schemas/X should return None + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert!(spec.resolve_ref("#/definitions/User").is_none()); + assert!( + spec.resolve_ref("#/components/responses/NotFound") + .is_none() + ); + assert!(spec.resolve_ref("User").is_none()); + assert!(spec.resolve_ref("").is_none()); +} + +#[test] +fn test_resolve_schema_plain_object_passthrough() { + // A simple object (no $ref, no allOf/oneOf/anyOf) should be returned as-is + let schema = Schema { + schema_type: Some("object".to_string()), + properties: { + let mut map = HashMap::new(); + map.insert( + "id".to_string(), + Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + map + }, + ..Default::default() + }; + + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {} + }"#; + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let resolved = spec.resolve_schema(&schema); + + assert_eq!(resolved.schema_type, Some("object".to_string())); + assert!(resolved.properties.contains_key("id")); +} + +#[test] +fn test_resolve_schema_allof_with_ref_and_inline() { + // Common pattern: allOf combining a $ref with inline properties + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/components/schemas/Base"}, + { + "type": "object", + "properties": { + "extra": {"type": "string"} + } + } + ] + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + }, + "required": ["id"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + let resolved = spec.resolve_schema(items); + + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("extra")); + assert!(resolved.required.contains(&"id".to_string())); +} + +#[test] +fn test_resolve_schema_oneof_keeps_first_definition() { + // When two variants define the same property, oneOf should keep the first + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Poly": { + "oneOf": [ + { + "type": "object", + "properties": { + "status": {"type": "string", "format": "v1"} + } + }, + { + "type": "object", + "properties": { + "status": {"type": "string", "format": "v2"} + } + } + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let poly = spec.resolve_ref("#/components/schemas/Poly").unwrap(); + let resolved = spec.resolve_schema(poly); + + // oneOf uses or_insert, so first definition wins + let status = resolved.properties.get("status").unwrap(); + assert_eq!(status.format, Some("v1".to_string())); +} + +#[test] +fn test_resolve_schema_no_components() { + // Spec with no components section — resolve_ref should return None + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert!(spec.resolve_ref("#/components/schemas/User").is_none()); +} + +#[test] +fn test_get_response_schema_default_response() { + // When only "default" response exists (no 200 or 201) + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "default": { + "description": "Default response", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "string"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("id")); +} + +#[test] +fn test_get_response_schema_non_json_content_type() { + // When content type is not application/json (e.g., application/xml), + // should still pick up the first available content type + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/geo": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/geo+json": { + "schema": { + "type": "object", + "properties": { + "type": {"type": "string"}, + "features": {"type": "array"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("type")); + assert!(schema.properties.contains_key("features")); +} + +#[test] +fn test_paths_without_get_include_post() { + // Paths with POST are now included (POST-for-read support) + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": {"200": {"description": "ok"}} + } + }, + "/upload": { + "post": { + "responses": {"201": {"description": "created"}} + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 2); + assert_eq!(endpoints[0].path, "/items"); + assert_eq!(endpoints[0].method, "GET"); + assert_eq!(endpoints[1].path, "/upload"); + assert_eq!(endpoints[1].method, "POST"); +} + +#[test] +fn test_table_name_deeply_nested_path() { + let endpoint = EndpointInfo { + path: "/api/v2/projects/issues/comments".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "api_v2_projects_issues_comments"); +} + +#[test] +fn test_openapi_31_type_only_null() { + // Edge case: "type": ["null"] — no actual type, just null + let schema: Schema = serde_json::from_str(r#"{"type": ["null"]}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(schema.nullable); +} + +#[test] +fn test_openapi_31_type_non_standard_value() { + // Edge case: "type" is not a string or array (e.g., number or boolean) + let schema: Schema = serde_json::from_str(r#"{"type": 42}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(!schema.nullable); +} + +#[test] +fn test_schema_no_type() { + // Schema with no type field at all + let schema: Schema = serde_json::from_str(r#"{"format": "date-time"}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(!schema.nullable); + assert_eq!(schema.format, Some("date-time".to_string())); +} + +// --- Fix 1: $ref in Response objects --- + +#[test] +fn test_response_ref_resolution() { + // Response $ref should be resolved via components.responses + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": {"$ref": "#/components/responses/ItemList"} + } + } + } + }, + "components": { + "responses": { + "ItemList": { + "description": "A list of items", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + } + } + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + assert!(items.properties.contains_key("id")); + assert!(items.properties.contains_key("name")); +} + +// --- Fix 2: 2XX wildcard status codes --- + +#[test] +fn test_response_schema_from_2xx_wildcard() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "2XX": { + "description": "Success", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "status": {"type": "string"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("id")); + assert!(schema.properties.contains_key("status")); +} + +#[test] +fn test_200_preferred_over_2xx() { + // "200" should be preferred over "2XX" + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_200": {"type": "string"} + } + } + } + } + }, + "2XX": { + "description": "Wildcard", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_2xx": {"type": "string"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("from_200")); + assert!(!schema.properties.contains_key("from_2xx")); +} + +// --- Fix 3: writeOnly properties --- + +#[test] +fn test_write_only_property_deserialization() { + let schema: Schema = serde_json::from_str(r#"{"type": "string", "writeOnly": true}"#).unwrap(); + assert!(schema.write_only); + assert_eq!(schema.schema_type, Some("string".to_string())); +} + +#[test] +fn test_write_only_default_false() { + let schema: Schema = serde_json::from_str(r#"{"type": "string"}"#).unwrap(); + assert!(!schema.write_only); +} + +// --- Fix 4: Multi-type arrays --- + +#[test] +fn test_multi_type_array_becomes_none() { + // ["string", "integer"] — multiple non-null types → schema_type = None (jsonb) + let schema: Schema = serde_json::from_str(r#"{"type": ["string", "integer"]}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(!schema.nullable); +} + +#[test] +fn test_multi_type_array_with_null() { + // ["string", "integer", "null"] → None type + nullable + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "integer", "null"]}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(schema.nullable); +} + +#[test] +fn test_single_type_array_still_works() { + // ["string", "null"] → exactly one non-null type → Some("string") + let schema: Schema = serde_json::from_str(r#"{"type": ["string", "null"]}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); +} + +// --- Fix 5: Composition on primitives --- + +#[test] +fn test_oneof_primitives_not_object() { + // oneOf with primitive types should NOT produce schema_type = "object" + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "StringOrInt": { + "oneOf": [ + {"type": "string"}, + {"type": "integer"} + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let schema = spec + .resolve_ref("#/components/schemas/StringOrInt") + .unwrap(); + let resolved = spec.resolve_schema(schema); + // Should be None (→ jsonb), NOT "object" + assert_eq!(resolved.schema_type, None); + assert!(resolved.properties.is_empty()); +} + +#[test] +fn test_oneof_with_objects_stays_object() { + // oneOf with object schemas should still produce "object" + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "UserOrOrg": { + "oneOf": [ + { + "type": "object", + "properties": {"user_id": {"type": "string"}} + }, + { + "type": "object", + "properties": {"org_id": {"type": "string"}} + } + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let schema = spec.resolve_ref("#/components/schemas/UserOrOrg").unwrap(); + let resolved = spec.resolve_schema(schema); + assert_eq!(resolved.schema_type, Some("object".to_string())); + assert!(resolved.properties.contains_key("user_id")); + assert!(resolved.properties.contains_key("org_id")); +} + +// --- Fix 7: Server URL variable substitution --- + +#[test] +fn test_server_variable_substitution() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "servers": [ + { + "url": "https://{region}.api.example.com/v{version}", + "variables": { + "region": {"default": "us-east-1"}, + "version": {"default": "2"} + } + } + ], + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!( + spec.base_url(), + Some("https://us-east-1.api.example.com/v2".to_string()) + ); +} + +#[test] +fn test_server_no_variables() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "servers": [{"url": "https://api.example.com"}], + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!(spec.base_url(), Some("https://api.example.com".to_string())); +} + +#[test] +fn test_endpoints_sorted_alphabetically() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/zebras": {"get": {"responses": {"200": {"description": "ok"}}}}, + "/apples": {"get": {"responses": {"200": {"description": "ok"}}}}, + "/middle": {"get": {"responses": {"200": {"description": "ok"}}}} + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 3); + assert_eq!(endpoints[0].path, "/apples"); + assert_eq!(endpoints[1].path, "/middle"); + assert_eq!(endpoints[2].path, "/zebras"); +} + +#[test] +fn test_response_schema_charset_content_type() { + // Content type with charset parameter should still match + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json; charset=utf-8": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "integer"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("id")); +} + +#[test] +fn test_response_schema_json_preferred_over_xml() { + // When both JSON and XML content types exist, JSON should be preferred + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/xml": { + "schema": { + "type": "object", + "properties": { + "xml_field": {"type": "string"} + } + } + }, + "application/json": { + "schema": { + "type": "object", + "properties": { + "json_field": {"type": "string"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("json_field")); +} + +#[test] +fn test_resolve_schema_ref_with_nullable_sibling() { + // OpenAPI 3.1: $ref with nullable sibling should merge + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"} + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + + let ref_with_nullable = Schema { + reference: Some("#/components/schemas/Address".to_string()), + nullable: true, + ..Default::default() + }; + + let resolved = spec.resolve_schema(&ref_with_nullable); + assert!(resolved.nullable); + assert!(resolved.properties.contains_key("street")); + assert!(resolved.properties.contains_key("city")); +} + +#[test] +fn test_resolve_schema_ref_with_extra_properties() { + // OpenAPI 3.1: $ref with additional properties sibling should merge + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "required": ["id"], + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + + let mut extra_props = std::collections::HashMap::new(); + extra_props.insert( + "extra_field".to_string(), + Schema { + schema_type: Some("string".to_string()), + ..Default::default() + }, + ); + + let ref_with_props = Schema { + reference: Some("#/components/schemas/Base".to_string()), + properties: extra_props, + required: vec!["extra_field".to_string()], + ..Default::default() + }; + + let resolved = spec.resolve_schema(&ref_with_props); + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("extra_field")); + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"extra_field".to_string())); +} + +// --- POST-for-read tests --- + +#[test] +fn test_post_endpoint_included() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/search": { + "post": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string"} + } + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + assert_eq!(endpoints[0].path, "/search"); + assert_eq!(endpoints[0].method, "POST"); + assert!(endpoints[0].response_schema.is_some()); +} + +#[test] +fn test_post_endpoint_table_name_suffix() { + let endpoint = EndpointInfo { + path: "/search".to_string(), + method: "POST", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "search_post"); + + let get_endpoint = EndpointInfo { + path: "/search".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(get_endpoint.table_name(), "search"); +} + +#[test] +fn test_get_and_post_same_path() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": {"200": {"description": "ok"}} + }, + "post": { + "responses": {"200": {"description": "ok"}} + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 2); + // Sorted by path then method, so GET comes first + assert_eq!(endpoints[0].method, "GET"); + assert_eq!(endpoints[1].method, "POST"); + // Table names should differ + let names: Vec = endpoints.iter().map(|e| e.table_name()).collect(); + assert_eq!(names[0], "items"); + assert_eq!(names[1], "items_post"); +} + +#[test] +fn test_parameterized_post_excluded() { + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items/{id}/search": { + "post": { + "responses": {"200": {"description": "ok"}} + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 0); +} + +#[test] +fn test_post_only_path() { + // Path with only POST and no GET should still be included + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/search": { + "post": { + "responses": {"200": {"description": "ok"}} + } + }, + "/items": { + "get": { + "responses": {"200": {"description": "ok"}} + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 2); + assert!( + endpoints + .iter() + .any(|e| e.method == "GET" && e.path == "/items") + ); + assert!( + endpoints + .iter() + .any(|e| e.method == "POST" && e.path == "/search") + ); +} + +// --- OpenAPI 3.1 real-world API pattern tests --- + +#[test] +fn test_stripe_expandable_anyof() { + // Stripe pattern: anyOf: [{type: "string"}, {$ref: "..."}] for expandable ID/object fields + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Stripe", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Customer": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "name": {"type": "string"} + } + }, + "Charge": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "customer": { + "anyOf": [ + {"type": "string"}, + {"$ref": "#/components/schemas/Customer"} + ] + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let charge = spec.resolve_ref("#/components/schemas/Charge").unwrap(); + let resolved = spec.resolve_schema(charge); + + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("customer")); + // customer is anyOf → resolver merges; the Customer variant has properties (id, name), + // so merged schema_type = "object". The string variant has no properties. + let customer = resolved.properties.get("customer").unwrap(); + let customer_resolved = spec.resolve_schema(customer); + // Should have merged properties from the Customer $ref + assert!( + customer_resolved.properties.contains_key("id") || customer_resolved.schema_type.is_some() + ); +} + +#[test] +fn test_github_nullable_anyof_null_type() { + // GitHub 3.1 pattern: anyOf: [{$ref: "..."}, {type: "null"}] for nullable refs + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "SimpleUser": { + "type": "object", + "properties": { + "login": {"type": "string"}, + "id": {"type": "integer"} + } + }, + "Issue": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "assignee": { + "anyOf": [ + {"$ref": "#/components/schemas/SimpleUser"}, + {"type": "null"} + ] + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let issue = spec.resolve_ref("#/components/schemas/Issue").unwrap(); + let resolved = spec.resolve_schema(issue); + + assert!(resolved.properties.contains_key("assignee")); + let assignee = resolved.properties.get("assignee").unwrap(); + let assignee_resolved = spec.resolve_schema(assignee); + // Should have merged SimpleUser properties + assert!(assignee_resolved.properties.contains_key("login")); + assert!(assignee_resolved.properties.contains_key("id")); +} + +#[test] +fn test_kubernetes_deep_ref_chain_8_levels() { + // Kubernetes-style: 8-level chain of $ref → $ref → ... deep resolution + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "K8s", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "L1": {"$ref": "#/components/schemas/L2"}, + "L2": {"$ref": "#/components/schemas/L3"}, + "L3": {"$ref": "#/components/schemas/L4"}, + "L4": {"$ref": "#/components/schemas/L5"}, + "L5": {"$ref": "#/components/schemas/L6"}, + "L6": {"$ref": "#/components/schemas/L7"}, + "L7": {"$ref": "#/components/schemas/L8"}, + "L8": { + "type": "object", + "properties": { + "value": {"type": "string"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let l1 = spec.resolve_ref("#/components/schemas/L1").unwrap(); + let resolved = spec.resolve_schema(l1); + + // Should resolve through all 8 levels + assert!(resolved.properties.contains_key("value")); + assert_eq!( + resolved.properties.get("value").unwrap().schema_type, + Some("string".to_string()) + ); +} + +#[test] +fn test_allof_multiple_refs() { + // Multi-inheritance: allOf: [{$ref: "A"}, {$ref: "B"}, {inline}] + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Auditable": { + "type": "object", + "properties": { + "created_at": {"type": "string", "format": "date-time"}, + "updated_at": {"type": "string", "format": "date-time"} + } + }, + "Identifiable": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "slug": {"type": "string"} + }, + "required": ["id"] + }, + "Resource": { + "allOf": [ + {"$ref": "#/components/schemas/Identifiable"}, + {"$ref": "#/components/schemas/Auditable"}, + { + "type": "object", + "properties": { + "name": {"type": "string"}, + "status": {"type": "string"} + }, + "required": ["name"] + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let resource = spec.resolve_ref("#/components/schemas/Resource").unwrap(); + let resolved = spec.resolve_schema(resource); + + // Should have properties from all three sources + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("slug")); + assert!(resolved.properties.contains_key("created_at")); + assert!(resolved.properties.contains_key("updated_at")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("status")); + + // Required from both Identifiable and inline + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"name".to_string())); +} + +#[test] +fn test_openapi_31_nullable_array() { + // GitHub pattern: type: ["array", "null"] with items + let schema: Schema = + serde_json::from_str(r#"{"type": ["array", "null"], "items": {"type": "string"}}"#) + .unwrap(); + assert_eq!(schema.schema_type, Some("array".to_string())); + assert!(schema.nullable); + assert!(schema.items.is_some()); + assert_eq!( + schema.items.as_ref().unwrap().schema_type, + Some("string".to_string()) + ); +} + +#[test] +fn test_openapi_31_nullable_boolean() { + // General 3.1 pattern: type: ["boolean", "null"] + let schema: Schema = serde_json::from_str(r#"{"type": ["boolean", "null"]}"#).unwrap(); + assert_eq!(schema.schema_type, Some("boolean".to_string())); + assert!(schema.nullable); +} + +#[test] +fn test_type_array_three_non_null_types() { + // Edge case: type: ["string", "integer", "boolean"] → None (jsonb) + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "integer", "boolean"]}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(!schema.nullable); +} + +#[test] +fn test_content_type_jsonld() { + // NWS/JSON-LD: application/ld+json picked up via fallback + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "NWS", "version": "1.0"}, + "paths": { + "/alerts": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/ld+json": { + "schema": { + "type": "object", + "properties": { + "@context": {"type": "object"}, + "@graph": {"type": "array", "items": {"type": "object"}} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("@graph")); +} + +#[test] +fn test_content_type_jsonapi() { + // JSON:API: application/vnd.api+json picked up via fallback + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "JSON:API", "version": "1.0"}, + "paths": { + "/articles": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/vnd.api+json": { + "schema": { + "type": "object", + "properties": { + "data": {"type": "array"}, + "meta": {"type": "object"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("data")); + assert!(schema.properties.contains_key("meta")); +} + +#[test] +fn test_ref_with_description_sibling() { + // Common 3.1 pattern: $ref + description sibling (description ignored, ref resolved) + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Pet": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "tag": {"type": "string"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + + // Simulate a $ref with a description sibling — description is not in Schema struct, + // so it's implicitly ignored. The key point is $ref still resolves correctly. + let ref_schema = Schema { + reference: Some("#/components/schemas/Pet".to_string()), + ..Default::default() + }; + + let resolved = spec.resolve_schema(&ref_schema); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("tag")); +} + +#[test] +fn test_stripe_metadata_additional_properties() { + // Stripe pattern: additionalProperties without properties → maps to jsonb (type: "object") + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Stripe", "version": "1.0"}, + "paths": { + "/charges": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "metadata": { + "type": "object" + } + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let metadata = schema.properties.get("metadata").unwrap(); + // type: "object" with no properties → maps to jsonb + assert_eq!(metadata.schema_type, Some("object".to_string())); + assert!(metadata.properties.is_empty()); +} + +#[test] +fn test_discriminator_doesnt_break_parsing() { + // Polymorphic APIs: discriminator field on oneOf is silently ignored + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Cat": { + "type": "object", + "properties": { + "pet_type": {"type": "string"}, + "purrs": {"type": "boolean"} + } + }, + "Dog": { + "type": "object", + "properties": { + "pet_type": {"type": "string"}, + "barks": {"type": "boolean"} + } + }, + "Pet": { + "oneOf": [ + {"$ref": "#/components/schemas/Cat"}, + {"$ref": "#/components/schemas/Dog"} + ], + "discriminator": { + "propertyName": "pet_type" + } + } + } + } + }"##; + + // Should parse without error (discriminator field is ignored by serde) + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let pet = spec.resolve_ref("#/components/schemas/Pet").unwrap(); + let resolved = spec.resolve_schema(pet); + + // oneOf merges all variant properties as nullable + assert!(resolved.properties.contains_key("pet_type")); + assert!(resolved.properties.contains_key("purrs")); + assert!(resolved.properties.contains_key("barks")); +} + +#[test] +fn test_empty_type_array() { + // Edge case: type: [] → None (jsonb) + let schema: Schema = serde_json::from_str(r#"{"type": []}"#).unwrap(); + assert_eq!(schema.schema_type, None); + assert!(!schema.nullable); +} + +// --- OpenAPI 3.1 full READ operation coverage tests --- +// Based on real-world OpenAPI 3.1 schemas from GitHub, Stripe, Kubernetes, and others. + +#[test] +fn test_github_31_nullable_allof_with_ref() { + // GitHub API 3.1 pattern: property is allOf + nullable for merged nullable refs + // Example: pull_request.head.repo is allOf: [{$ref: "#/components/schemas/repository"}] + // with nullable: true on the outer property + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "repository": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "full_name": {"type": "string"}, + "private": {"type": "boolean"} + }, + "required": ["id", "full_name"] + }, + "pull-request": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "title": {"type": "string"}, + "head_repo": { + "allOf": [ + {"$ref": "#/components/schemas/repository"} + ], + "nullable": true + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let pr = spec + .resolve_ref("#/components/schemas/pull-request") + .unwrap(); + let resolved = spec.resolve_schema(pr); + + assert!(resolved.properties.contains_key("head_repo")); + let head_repo = resolved.properties.get("head_repo").unwrap(); + // The nullable flag on the outer property should carry through + // allOf with a single $ref should resolve to that ref's properties + let head_resolved = spec.resolve_schema(head_repo); + assert!(head_resolved.properties.contains_key("id")); + assert!(head_resolved.properties.contains_key("full_name")); +} + +#[test] +fn test_github_31_nullable_simple_user_ref() { + // GitHub 3.1: nullable-simple-user is used everywhere as: + // "assignee": { "anyOf": [{"$ref": "..."}, {"type": "null"}] } + // This tests the full resolution chain with nullable merging + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub", "version": "1.0"}, + "paths": { + "/repos/issues": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/issue"} + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "simple-user": { + "type": "object", + "properties": { + "login": {"type": "string"}, + "id": {"type": "integer"}, + "avatar_url": {"type": "string", "format": "uri"} + }, + "required": ["login", "id", "avatar_url"] + }, + "issue": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "number": {"type": "integer"}, + "title": {"type": "string"}, + "state": {"type": "string"}, + "assignee": { + "anyOf": [ + {"$ref": "#/components/schemas/simple-user"}, + {"type": "null"} + ] + }, + "labels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + "color": {"type": "string"} + } + } + } + }, + "required": ["id", "number", "title", "state"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let resolved = spec.resolve_schema(schema); + let items = resolved.items.as_ref().unwrap(); + let item_resolved = spec.resolve_schema(items); + + assert!(item_resolved.properties.contains_key("id")); + assert!(item_resolved.properties.contains_key("title")); + assert!(item_resolved.properties.contains_key("assignee")); + assert!(item_resolved.properties.contains_key("labels")); + + // assignee resolves to simple-user properties via anyOf + let assignee = item_resolved.properties.get("assignee").unwrap(); + let assignee_resolved = spec.resolve_schema(assignee); + assert!(assignee_resolved.properties.contains_key("login")); +} + +#[test] +fn test_kubernetes_allof_with_description_only_ref() { + // Kubernetes pattern: allOf with one $ref and one schema that only has description + // (no properties, no type — just metadata). Should not break resolution. + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "K8s", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "ObjectMeta": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "namespace": {"type": "string"} + } + }, + "Pod": { + "type": "object", + "properties": { + "metadata": { + "allOf": [ + {"$ref": "#/components/schemas/ObjectMeta"}, + {} + ] + }, + "kind": {"type": "string"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let pod = spec.resolve_ref("#/components/schemas/Pod").unwrap(); + let resolved = spec.resolve_schema(pod); + + assert!(resolved.properties.contains_key("metadata")); + assert!(resolved.properties.contains_key("kind")); + + let meta = resolved.properties.get("metadata").unwrap(); + let meta_resolved = spec.resolve_schema(meta); + assert!(meta_resolved.properties.contains_key("name")); + assert!(meta_resolved.properties.contains_key("namespace")); +} + +#[test] +fn test_stripe_31_expandable_field_string_or_ref() { + // Stripe 3.1 uses anyOf: [{maxLength:5000, type:"string"}, {$ref:"..."}] + // for expandable fields (default: string ID, expanded: full object) + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Stripe", "version": "1.0"}, + "paths": { + "/v1/charges": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": {"$ref": "#/components/schemas/charge"} + }, + "has_more": {"type": "boolean"}, + "url": {"type": "string"} + }, + "required": ["data", "has_more", "url"] + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "customer": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "email": {"type": ["string", "null"]}, + "name": {"type": ["string", "null"]} + }, + "required": ["id"] + }, + "charge": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "amount": {"type": "integer"}, + "currency": {"type": "string"}, + "created": {"type": "integer", "format": "unix-time"}, + "customer": { + "anyOf": [ + {"type": "string"}, + {"$ref": "#/components/schemas/customer"} + ] + }, + "metadata": {"type": "object"} + }, + "required": ["id", "amount", "currency", "created"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("data")); + assert!(schema.properties.contains_key("has_more")); + + // Verify nested charge schema resolution + let data = schema.properties.get("data").unwrap(); + let items = data.items.as_ref().unwrap(); + let charge_resolved = spec.resolve_schema(items); + assert!(charge_resolved.properties.contains_key("id")); + assert!(charge_resolved.properties.contains_key("amount")); + assert!(charge_resolved.properties.contains_key("created")); + + // created has format: unix-time + let created = charge_resolved.properties.get("created").unwrap(); + assert_eq!(created.format, Some("unix-time".to_string())); +} + +#[test] +fn test_ref_with_write_only_sibling() { + // OpenAPI 3.1: $ref + writeOnly sibling should merge writeOnly into resolved schema + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Credentials": { + "type": "object", + "properties": { + "token": {"type": "string"} + } + }, + "User": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "auth": { + "$ref": "#/components/schemas/Credentials", + "writeOnly": true + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let user = spec.resolve_ref("#/components/schemas/User").unwrap(); + let resolved = spec.resolve_schema(user); + + let auth = resolved.properties.get("auth").unwrap(); + let auth_resolved = spec.resolve_schema(auth); + assert!(auth_resolved.write_only); + assert!(auth_resolved.properties.contains_key("token")); +} + +#[test] +fn test_openapi_31_version_string_accepted() { + // OpenAPI 3.1.0 should be accepted as valid 3.x + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test 3.1", "version": "1.0"}, + "paths": {} + }"#; + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!(spec.openapi, "3.1.0"); +} + +#[test] +fn test_openapi_31_minor_version_accepted() { + // Future 3.x versions (e.g., 3.2.0) should also be accepted + let spec_json = r#"{ + "openapi": "3.2.0", + "info": {"title": "Future", "version": "1.0"}, + "paths": {} + }"#; + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!(spec.openapi, "3.2.0"); +} + +#[test] +fn test_response_ref_chain_through_components() { + // Response uses $ref to components/responses, which itself contains a schema $ref + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/pets": { + "get": { + "responses": { + "200": {"$ref": "#/components/responses/PetList"} + } + } + } + }, + "components": { + "responses": { + "PetList": { + "description": "A list of pets", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/Pet"} + } + } + } + } + }, + "schemas": { + "Pet": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + "species": {"type": "string"} + }, + "required": ["id", "name"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + let resolved = spec.resolve_schema(items); + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("species")); +} + +#[test] +fn test_openapi_31_nullable_object_type_array() { + // OpenAPI 3.1: type: ["object", "null"] — nullable object + let schema: Schema = serde_json::from_str( + r#"{"type": ["object", "null"], "properties": {"key": {"type": "string"}}}"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("object".to_string())); + assert!(schema.nullable); + assert!(schema.properties.contains_key("key")); +} + +#[test] +fn test_openapi_31_nullable_integer_type_array() { + // OpenAPI 3.1: type: ["integer", "null"] with format + let schema: Schema = + serde_json::from_str(r#"{"type": ["integer", "null"], "format": "int32"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("int32".to_string())); +} + +#[test] +fn test_openapi_31_nullable_number_type_array() { + // OpenAPI 3.1: type: ["number", "null"] with format: double + let schema: Schema = + serde_json::from_str(r#"{"type": ["number", "null"], "format": "double"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("number".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("double".to_string())); +} + +#[test] +fn test_mixed_allof_and_type_array_31() { + // OpenAPI 3.1: allOf with type arrays inside sub-schemas + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Combined": { + "allOf": [ + { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": ["string", "null"]} + }, + "required": ["id"] + }, + { + "type": "object", + "properties": { + "email": {"type": ["string", "null"]}, + "count": {"type": ["integer", "null"]} + } + } + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let combined = spec.resolve_ref("#/components/schemas/Combined").unwrap(); + let resolved = spec.resolve_schema(combined); + + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("email")); + assert!(resolved.properties.contains_key("count")); + + // id is required and not nullable + assert!(resolved.required.contains(&"id".to_string())); + + // name should be nullable (from type array) + let name = resolved.properties.get("name").unwrap(); + assert!(name.nullable); + assert_eq!(name.schema_type, Some("string".to_string())); + + // count should be nullable + let count = resolved.properties.get("count").unwrap(); + assert!(count.nullable); + assert_eq!(count.schema_type, Some("integer".to_string())); +} + +#[test] +fn test_twilio_nested_schema_ref_in_response() { + // Twilio pattern: response has inline wrapper with $ref items + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Twilio", "version": "1.0"}, + "paths": { + "/Accounts": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "accounts": { + "type": "array", + "items": {"$ref": "#/components/schemas/Account"} + }, + "page": {"type": "integer"}, + "page_size": {"type": "integer"} + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "Account": { + "type": "object", + "properties": { + "sid": {"type": "string"}, + "friendly_name": {"type": "string"}, + "status": {"type": "string"}, + "date_created": {"type": "string", "format": "date-time"} + }, + "required": ["sid"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + // Twilio wraps in "accounts" key + assert!(schema.properties.contains_key("accounts")); + + let accounts = schema.properties.get("accounts").unwrap(); + let items = accounts.items.as_ref().unwrap(); + let resolved = spec.resolve_schema(items); + assert!(resolved.properties.contains_key("sid")); + assert!(resolved.properties.contains_key("friendly_name")); + assert!(resolved.properties.contains_key("date_created")); +} + +#[test] +fn test_oneof_with_null_type_31() { + // OpenAPI 3.1: oneOf: [{$ref: "..."}, {type: "null"}] — common nullable pattern + // Different from anyOf: semantically "exactly one", but for nullable refs identical behavior + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "zip": {"type": "string"} + } + }, + "Order": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "shipping_address": { + "oneOf": [ + {"$ref": "#/components/schemas/Address"}, + {"type": "null"} + ] + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let order = spec.resolve_ref("#/components/schemas/Order").unwrap(); + let resolved = spec.resolve_schema(order); + + let shipping = resolved.properties.get("shipping_address").unwrap(); + let shipping_resolved = spec.resolve_schema(shipping); + // Should have Address properties from the non-null variant + assert!(shipping_resolved.properties.contains_key("street")); + assert!(shipping_resolved.properties.contains_key("city")); + assert!(shipping_resolved.properties.contains_key("zip")); +} + +#[test] +fn test_empty_paths_valid_spec() { + // Valid spec with no paths (e.g., webhook-only APIs) + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Webhooks Only", "version": "1.0"}, + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert!(endpoints.is_empty()); +} + +#[test] +fn test_multiple_servers_uses_first() { + // Multiple servers — base_url should come from the first one + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "servers": [ + {"url": "https://api.production.com"}, + {"url": "https://api.staging.com"}, + {"url": "https://api.dev.com"} + ], + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!( + spec.base_url(), + Some("https://api.production.com".to_string()) + ); +} + +#[test] +fn test_no_servers_returns_none() { + // No servers array — base_url should be None + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {} + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + assert_eq!(spec.base_url(), None); +} + +#[test] +fn test_openapi_31_const_ignored_gracefully() { + // OpenAPI 3.1 introduced "const" (from JSON Schema). Our parser should ignore it + // gracefully since serde skips unknown fields. + let schema: Schema = serde_json::from_str(r#"{"type": "string", "const": "active"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); +} + +#[test] +fn test_openapi_31_examples_ignored_gracefully() { + // OpenAPI 3.1: "examples" (array) replaces "example" (singular). Should be ignored. + let schema: Schema = + serde_json::from_str(r#"{"type": "string", "examples": ["foo", "bar"]}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); +} + +#[test] +fn test_allof_empty_schemas() { + // Edge case: allOf with empty sub-schemas — should produce empty merged result + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Empty": { + "allOf": [{}, {}] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let empty = spec.resolve_ref("#/components/schemas/Empty").unwrap(); + let resolved = spec.resolve_schema(empty); + assert!(resolved.properties.is_empty()); + // No properties → schema_type should not be "object" + assert_eq!(resolved.schema_type, None); +} + +#[test] +fn test_anyof_all_primitives() { + // anyOf with only primitive types (no objects) → None type (jsonb) + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Flexible": { + "anyOf": [ + {"type": "string"}, + {"type": "integer"}, + {"type": "boolean"} + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let flexible = spec.resolve_ref("#/components/schemas/Flexible").unwrap(); + let resolved = spec.resolve_schema(flexible); + // All primitives, no properties → schema_type should be None (jsonb) + assert_eq!(resolved.schema_type, None); + assert!(resolved.properties.is_empty()); +} + +#[test] +fn test_response_schema_from_array_ref() { + // Response schema is a $ref to an array schema (not inlined) + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/users": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": {"$ref": "#/components/schemas/UserList"} + } + } + } + } + } + } + }, + "components": { + "schemas": { + "User": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "login": {"type": "string"} + } + }, + "UserList": { + "type": "array", + "items": {"$ref": "#/components/schemas/User"} + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + // The response schema is a $ref to UserList (array type) + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.reference.is_some()); + + let resolved = spec.resolve_schema(schema); + assert_eq!(resolved.schema_type, Some("array".to_string())); + let items = resolved.items.as_ref().unwrap(); + let item_resolved = spec.resolve_schema(items); + assert!(item_resolved.properties.contains_key("id")); + assert!(item_resolved.properties.contains_key("login")); +} + +#[test] +fn test_oneof_mixed_object_and_primitive() { + // oneOf: [{type: "object", properties: {...}}, {type: "string"}] + // Common in APIs that return either a structured error or a simple value + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Result": { + "oneOf": [ + { + "type": "object", + "properties": { + "data": {"type": "string"}, + "status": {"type": "integer"} + } + }, + {"type": "string"} + ] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let result = spec.resolve_ref("#/components/schemas/Result").unwrap(); + let resolved = spec.resolve_schema(result); + + // Should have properties from the object variant + assert!(resolved.properties.contains_key("data")); + assert!(resolved.properties.contains_key("status")); + // schema_type should be "object" because at least one variant has properties + assert_eq!(resolved.schema_type, Some("object".to_string())); +} + +#[test] +fn test_allof_with_required_dedup() { + // allOf where multiple schemas specify the same required field + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Merged": { + "allOf": [ + { + "type": "object", + "properties": {"id": {"type": "string"}}, + "required": ["id"] + }, + { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["id", "name"] + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let merged = spec.resolve_ref("#/components/schemas/Merged").unwrap(); + let resolved = spec.resolve_schema(merged); + + // "id" should appear only once in required (deduplicated) + assert_eq!( + resolved.required.iter().filter(|r| *r == "id").count(), + 1, + "Required should be deduplicated" + ); + assert!(resolved.required.contains(&"name".to_string())); +} + +#[test] +fn test_response_ref_broken_gracefully() { + // Response $ref pointing to nonexistent components/responses — should still work + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": {"$ref": "#/components/responses/NonExistent"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + // Broken ref → no schema + assert!(endpoints[0].response_schema.is_none()); +} + +#[test] +fn test_allof_with_properties_sibling() { + // K8s-style: allOf + sibling properties → allOf takes priority (sibling properties + // are not in Schema's allOf path, they stay on the outer schema) + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "id": {"type": "string"} + } + }, + "Extended": { + "allOf": [ + {"$ref": "#/components/schemas/Base"}, + { + "type": "object", + "properties": { + "name": {"type": "string"} + } + } + ], + "properties": { + "sibling_prop": {"type": "boolean"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let extended = spec.resolve_ref("#/components/schemas/Extended").unwrap(); + let resolved = spec.resolve_schema(extended); + + // allOf properties should be present + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + // Parent-level sibling properties are now merged alongside allOf (OpenAPI 3.1) + assert!( + resolved.properties.contains_key("sibling_prop"), + "sibling_prop should be merged from parent alongside allOf" + ); +} + +#[test] +fn test_response_only_error_codes() { + // Only 4xx/5xx responses → None schema (no success response to extract) + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Error API", "version": "1.0"}, + "paths": { + "/errors": { + "get": { + "responses": { + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": {"type": "string"} + } + } + } + } + }, + "500": { + "description": "Internal error" + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + // No 200/201/2XX/default → response_schema should be None + assert!(endpoints[0].response_schema.is_none()); +} + +// ============================================================================= +// OpenAPI 3.1 READ operation coverage — based on real-world API specifications +// ============================================================================= +// Tests below are derived from actual patterns found in production OpenAPI 3.1 +// specs: GitHub REST API, Stripe API, Kubernetes API, DigitalOcean API, and +// the OpenAPI 3.1 specification itself (JSON Schema 2020-12 alignment). + +// --- GitHub API 3.1: nullable format strings via type arrays --- + +#[test] +fn test_github_31_nullable_datetime_type_array() { + // GitHub API: type: ["string", "null"] with format: "date-time" + // e.g., issue.closed_at, pull_request.merged_at + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "null"], "format": "date-time"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("date-time".to_string())); +} + +#[test] +fn test_github_31_nullable_uri_type_array() { + // GitHub API: type: ["string", "null"] with format: "uri" + // e.g., repository.homepage, issue.body_url + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "null"], "format": "uri"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("uri".to_string())); +} + +#[test] +fn test_github_31_nullable_date_type_array() { + // DigitalOcean/GitHub: type: ["string", "null"] with format: "date" + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "null"], "format": "date"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("date".to_string())); +} + +#[test] +fn test_github_31_nullable_email_type_array() { + // GitHub API: type: ["string", "null"] with format: "email" + // e.g., user.email + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "null"], "format": "email"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("email".to_string())); +} + +// --- GitHub API 3.1: anyOf with all $ref variants (polymorphic events) --- + +#[test] +fn test_github_31_anyof_all_refs() { + // GitHub API: timeline events use anyOf with multiple $ref variants + // e.g., GET /repos/{owner}/{repo}/issues/{issue_number}/timeline + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "labeled-event": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "event": {"type": "string"}, + "label": {"type": "object", "properties": {"name": {"type": "string"}}} + }, + "required": ["id", "event"] + }, + "commented-event": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "event": {"type": "string"}, + "body": {"type": "string"} + }, + "required": ["id", "event"] + }, + "assigned-event": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "event": {"type": "string"}, + "assignee": {"$ref": "#/components/schemas/simple-user"} + }, + "required": ["id", "event"] + }, + "simple-user": { + "type": "object", + "properties": { + "login": {"type": "string"}, + "id": {"type": "integer"} + }, + "required": ["login", "id"] + }, + "timeline-event": { + "anyOf": [ + {"$ref": "#/components/schemas/labeled-event"}, + {"$ref": "#/components/schemas/commented-event"}, + {"$ref": "#/components/schemas/assigned-event"} + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let timeline = spec + .resolve_ref("#/components/schemas/timeline-event") + .unwrap(); + let resolved = spec.resolve_schema(timeline); + + // anyOf merges all variant properties as nullable + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("event")); + assert!(resolved.properties.contains_key("label")); + assert!(resolved.properties.contains_key("body")); + assert!(resolved.properties.contains_key("assignee")); + + // All properties should be nullable (anyOf makes everything nullable) + assert!(resolved.properties.get("id").unwrap().nullable); + assert!(resolved.properties.get("event").unwrap().nullable); + assert!(resolved.properties.get("body").unwrap().nullable); + + // Nothing should be required for anyOf + assert!(resolved.required.is_empty()); +} + +// --- GitHub API 3.1: nested anyOf inside allOf properties --- + +#[test] +fn test_github_31_nested_anyof_inside_allof() { + // GitHub API pattern: pull_request uses allOf for inheritance, + // with nested anyOf for nullable ref fields inside the child schema + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "simple-user": { + "type": "object", + "properties": { + "login": {"type": "string"}, + "id": {"type": "integer"} + }, + "required": ["login", "id"] + }, + "milestone": { + "type": "object", + "properties": { + "number": {"type": "integer"}, + "title": {"type": "string"}, + "state": {"type": "string"} + }, + "required": ["number", "title"] + }, + "issue-base": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "title": {"type": "string"}, + "state": {"type": "string"} + }, + "required": ["id", "title", "state"] + }, + "issue-full": { + "allOf": [ + {"$ref": "#/components/schemas/issue-base"}, + { + "type": "object", + "properties": { + "assignee": { + "anyOf": [ + {"$ref": "#/components/schemas/simple-user"}, + {"type": "null"} + ] + }, + "milestone": { + "anyOf": [ + {"$ref": "#/components/schemas/milestone"}, + {"type": "null"} + ] + }, + "body": {"type": ["string", "null"]} + } + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let issue = spec.resolve_ref("#/components/schemas/issue-full").unwrap(); + let resolved = spec.resolve_schema(issue); + + // Should have base properties + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("title")); + assert!(resolved.properties.contains_key("state")); + + // Should have extended properties + assert!(resolved.properties.contains_key("assignee")); + assert!(resolved.properties.contains_key("milestone")); + assert!(resolved.properties.contains_key("body")); + + // body should be nullable (from type array) + let body = resolved.properties.get("body").unwrap(); + assert!(body.nullable); + assert_eq!(body.schema_type, Some("string".to_string())); + + // Required should come from base + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"title".to_string())); +} + +// --- GitHub API 3.1: readOnly properties (should be INCLUDED in responses) --- + +#[test] +fn test_github_31_readonly_property_included() { + // GitHub API: readOnly fields like id, node_id should appear in GET responses. + // Our parser ignores readOnly (it's not in Schema struct), which is correct + // behavior — readOnly properties SHOULD appear in responses. + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "GitHub", "version": "1.0"}, + "paths": { + "/repos": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer", "readOnly": true}, + "node_id": {"type": "string", "readOnly": true}, + "name": {"type": "string"}, + "full_name": {"type": "string", "readOnly": true} + }, + "required": ["id", "name", "full_name"] + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + + // readOnly properties should still be present (they appear in responses) + assert!(items.properties.contains_key("id")); + assert!(items.properties.contains_key("node_id")); + assert!(items.properties.contains_key("name")); + assert!(items.properties.contains_key("full_name")); +} + +// --- Stripe API 3.1: enum values in response schemas --- + +#[test] +fn test_stripe_31_enum_string_property() { + // Stripe API: type: "string" with enum values (e.g., charge.status) + // Enums should still map to text type (our parser ignores enum values) + let schema: Schema = + serde_json::from_str(r#"{"type": "string", "enum": ["active", "inactive", "pending"]}"#) + .unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(!schema.nullable); +} + +#[test] +fn test_stripe_31_nullable_enum_type_array() { + // Stripe API 3.1: type: ["string", "null"] with enum + // e.g., subscription.cancel_at_period_end reason + let schema: Schema = serde_json::from_str( + r#"{"type": ["string", "null"], "enum": ["duplicate", "fraudulent", "requested_by_customer", null]}"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); +} + +// --- Stripe API 3.1: nullable unix timestamps --- + +#[test] +fn test_stripe_31_nullable_unix_time() { + // Stripe API: type: ["integer", "null"] with format: "unix-time" + // e.g., subscription.canceled_at, charge.refunded_at + let schema: Schema = + serde_json::from_str(r#"{"type": ["integer", "null"], "format": "unix-time"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("unix-time".to_string())); +} + +// --- Stripe API: vendor extensions should be ignored --- + +#[test] +fn test_stripe_31_vendor_extensions_ignored() { + // Stripe API uses x-expansionResources, x-resourceId, x-stripeBypassValidation, etc. + // These should be silently ignored by serde's default behavior + let schema: Schema = serde_json::from_str( + r##"{ + "type": "object", + "x-resourceId": "charge", + "x-expansionResources": {"oneOf": [{"$ref": "#/components/schemas/customer"}]}, + "x-stripeBypassValidation": true, + "properties": { + "id": {"type": "string"} + } + }"##, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("object".to_string())); + assert!(schema.properties.contains_key("id")); +} + +// --- Kubernetes API: schema with no type but with properties --- + +#[test] +fn test_kubernetes_implicit_object_no_type() { + // Kubernetes API: some schemas omit "type" but have "properties" + // e.g., io.k8s.api.core.v1.PodSpec — properties without explicit "type": "object" + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "K8s", "version": "1.0"}, + "paths": { + "/api/v1/pods": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "properties": { + "apiVersion": {"type": "string"}, + "kind": {"type": "string"}, + "items": { + "type": "array", + "items": { + "properties": { + "name": {"type": "string"}, + "namespace": {"type": "string"} + } + } + } + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + // Schema has no explicit type but has properties — should still parse + assert!(schema.properties.contains_key("apiVersion")); + assert!(schema.properties.contains_key("kind")); + assert!(schema.properties.contains_key("items")); + // schema_type should be None (no explicit type) + assert_eq!(schema.schema_type, None); +} + +// --- Kubernetes API: numeric format strings --- + +#[test] +fn test_kubernetes_format_int32() { + // Kubernetes API: format: "int32" on integer properties + let schema: Schema = serde_json::from_str(r#"{"type": "integer", "format": "int32"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); + assert_eq!(schema.format, Some("int32".to_string())); +} + +#[test] +fn test_kubernetes_format_int64() { + // Kubernetes API: format: "int64" on integer properties + let schema: Schema = serde_json::from_str(r#"{"type": "integer", "format": "int64"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); + assert_eq!(schema.format, Some("int64".to_string())); +} + +#[test] +fn test_kubernetes_format_double() { + // Kubernetes API: format: "double" on number properties + let schema: Schema = serde_json::from_str(r#"{"type": "number", "format": "double"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("number".to_string())); + assert_eq!(schema.format, Some("double".to_string())); +} + +#[test] +fn test_format_float() { + // General: format: "float" on number properties + let schema: Schema = serde_json::from_str(r#"{"type": "number", "format": "float"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("number".to_string())); + assert_eq!(schema.format, Some("float".to_string())); +} + +// --- Kubernetes API: 3.1 nullable int32/int64 --- + +#[test] +fn test_31_nullable_int32() { + // OpenAPI 3.1: type: ["integer", "null"] with format: "int32" + let schema: Schema = + serde_json::from_str(r#"{"type": ["integer", "null"], "format": "int32"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("int32".to_string())); +} + +#[test] +fn test_31_nullable_int64() { + // OpenAPI 3.1: type: ["integer", "null"] with format: "int64" + let schema: Schema = + serde_json::from_str(r#"{"type": ["integer", "null"], "format": "int64"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("int64".to_string())); +} + +#[test] +fn test_31_nullable_double() { + // OpenAPI 3.1: type: ["number", "null"] with format: "double" + // (already tested without null; testing nullable variant) + let schema: Schema = + serde_json::from_str(r#"{"type": ["number", "null"], "format": "double"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("number".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("double".to_string())); +} + +// --- DigitalOcean API: format: "uuid" --- + +#[test] +fn test_digitalocean_format_uuid() { + // DigitalOcean API: format: "uuid" on string properties + let schema: Schema = serde_json::from_str(r#"{"type": "string", "format": "uuid"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert_eq!(schema.format, Some("uuid".to_string())); +} + +#[test] +fn test_31_nullable_uuid() { + // OpenAPI 3.1: type: ["string", "null"] with format: "uuid" + let schema: Schema = + serde_json::from_str(r#"{"type": ["string", "null"], "format": "uuid"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert!(schema.nullable); + assert_eq!(schema.format, Some("uuid".to_string())); +} + +// --- OpenAPI 3.1 JSON Schema 2020-12: keywords that should be ignored --- + +#[test] +fn test_31_defs_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): "$defs" replaces "definitions" + // Should be silently ignored by serde + let schema: Schema = serde_json::from_str( + r#"{ + "type": "object", + "properties": { + "name": {"type": "string"} + }, + "$defs": { + "helper": {"type": "string"} + } + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("object".to_string())); + assert!(schema.properties.contains_key("name")); +} + +#[test] +fn test_31_comment_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): "$comment" for documentation + let schema: Schema = serde_json::from_str( + r#"{"type": "string", "$comment": "This is an internal comment for schema authors"}"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); +} + +#[test] +fn test_31_if_then_else_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): if/then/else conditional schemas + // Our parser ignores these (they don't affect type resolution for FDW) + let schema: Schema = serde_json::from_str( + r#"{ + "type": "object", + "properties": { + "type": {"type": "string"}, + "value": {"type": "string"} + }, + "if": {"properties": {"type": {"const": "email"}}}, + "then": {"properties": {"value": {"format": "email"}}}, + "else": {"properties": {"value": {"format": "uri"}}} + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("object".to_string())); + assert!(schema.properties.contains_key("type")); + assert!(schema.properties.contains_key("value")); +} + +#[test] +fn test_31_dependent_required_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): dependentRequired + let schema: Schema = serde_json::from_str( + r#"{ + "type": "object", + "properties": { + "name": {"type": "string"}, + "credit_card": {"type": "string"}, + "billing_address": {"type": "string"} + }, + "dependentRequired": { + "credit_card": ["billing_address"] + } + }"#, + ) + .unwrap(); + assert!(schema.properties.contains_key("name")); + assert!(schema.properties.contains_key("credit_card")); + assert!(schema.properties.contains_key("billing_address")); +} + +#[test] +fn test_31_prefix_items_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): prefixItems for tuple validation + let schema: Schema = serde_json::from_str( + r#"{ + "type": "array", + "prefixItems": [ + {"type": "number"}, + {"type": "string"}, + {"type": "string"} + ] + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("array".to_string())); +} + +#[test] +fn test_31_pattern_properties_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): patternProperties + let schema: Schema = serde_json::from_str( + r#"{ + "type": "object", + "properties": { + "name": {"type": "string"} + }, + "patternProperties": { + "^x-": {"type": "string"} + } + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("object".to_string())); + assert!(schema.properties.contains_key("name")); + // patternProperties should not appear in regular properties + assert!(!schema.properties.contains_key("^x-")); +} + +#[test] +fn test_31_unevaluated_properties_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): unevaluatedProperties + let schema: Schema = serde_json::from_str( + r#"{ + "type": "object", + "properties": { + "name": {"type": "string"} + }, + "unevaluatedProperties": false + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("object".to_string())); + assert!(schema.properties.contains_key("name")); +} + +#[test] +fn test_31_content_media_type_encoding_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): contentMediaType and contentEncoding + // for binary/encoded string data + let schema: Schema = serde_json::from_str( + r#"{ + "type": "string", + "contentMediaType": "image/png", + "contentEncoding": "base64" + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); +} + +#[test] +fn test_31_exclusive_min_max_as_numbers_ignored() { + // OpenAPI 3.1 (JSON Schema 2020-12): exclusiveMinimum/exclusiveMaximum are now + // numbers (not booleans as in 3.0). Should be silently ignored. + let schema: Schema = serde_json::from_str( + r#"{"type": "integer", "exclusiveMinimum": 0, "exclusiveMaximum": 100}"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, Some("integer".to_string())); +} + +// --- Deprecated properties and operations --- + +#[test] +fn test_31_deprecated_property_still_included() { + // OpenAPI 3.1: deprecated: true on a property should not exclude it + // (deprecated means "avoid using" not "removed") + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/users": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "login": {"type": "string"}, + "gravatar_id": { + "type": ["string", "null"], + "deprecated": true + } + }, + "required": ["id", "login"] + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + // Deprecated property should still be present + assert!(schema.properties.contains_key("gravatar_id")); + let gravatar = schema.properties.get("gravatar_id").unwrap(); + assert!(gravatar.nullable); + assert_eq!(gravatar.schema_type, Some("string".to_string())); +} + +#[test] +fn test_31_deprecated_operation_still_included() { + // OpenAPI 3.1: deprecated: true on an operation should still be included + // (the endpoint still works, just discouraged) + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/legacy/items": { + "get": { + "deprecated": true, + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string"} + } + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + // Deprecated operations should still be discovered + assert_eq!(endpoints.len(), 1); + assert!(endpoints[0].response_schema.is_some()); +} + +// --- POST-for-read with OpenAPI 3.1 type arrays --- + +#[test] +fn test_31_post_for_read_with_type_arrays() { + // POST-for-read (e.g., Elasticsearch-style search) with 3.1 type arrays in response + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Search API", "version": "1.0"}, + "paths": { + "/search": { + "post": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "hits": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "score": {"type": ["number", "null"]}, + "matched_at": {"type": ["string", "null"], "format": "date-time"}, + "highlight": {"type": ["object", "null"]} + }, + "required": ["id"] + } + }, + "total": {"type": "integer"}, + "max_score": {"type": ["number", "null"]} + }, + "required": ["hits", "total"] + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + assert_eq!(endpoints[0].method, "POST"); + assert_eq!(endpoints[0].table_name(), "search_post"); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("hits")); + assert!(schema.properties.contains_key("total")); + + let max_score = schema.properties.get("max_score").unwrap(); + assert!(max_score.nullable); + assert_eq!(max_score.schema_type, Some("number".to_string())); +} + +// --- Response $ref to schema that uses allOf (double indirection) --- + +#[test] +fn test_31_response_ref_to_allof_schema() { + // Real pattern: response has $ref to a schema, which itself is an allOf. + // GitHub API uses this for pull-request (allOf of issue + extra fields). + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/pulls": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/pull-request"} + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "issue-base": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "title": {"type": "string"}, + "state": {"type": "string"}, + "created_at": {"type": "string", "format": "date-time"} + }, + "required": ["id", "title", "state"] + }, + "pull-request": { + "allOf": [ + {"$ref": "#/components/schemas/issue-base"}, + { + "type": "object", + "properties": { + "merged": {"type": "boolean"}, + "merged_at": {"type": ["string", "null"], "format": "date-time"}, + "commits": {"type": "integer"}, + "additions": {"type": "integer"}, + "deletions": {"type": "integer"} + } + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + let resolved = spec.resolve_schema(items); + + // Should have all properties from the allOf chain + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("title")); + assert!(resolved.properties.contains_key("state")); + assert!(resolved.properties.contains_key("created_at")); + assert!(resolved.properties.contains_key("merged")); + assert!(resolved.properties.contains_key("merged_at")); + assert!(resolved.properties.contains_key("commits")); + + // merged_at should be nullable + let merged_at = resolved.properties.get("merged_at").unwrap(); + assert!(merged_at.nullable); + assert_eq!(merged_at.format, Some("date-time".to_string())); + + // Required should include base requirements + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"title".to_string())); +} + +// --- anyOf with $ref + inline type + null (3 variants) --- + +#[test] +fn test_31_anyof_three_variants_ref_inline_null() { + // Real pattern: anyOf: [{$ref: "..."}, {type: "string"}, {type: "null"}] + // This appears in APIs where a field can be an ID string, expanded object, or null + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Account": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "email": {"type": "string"} + }, + "required": ["id"] + }, + "Transaction": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "source": { + "anyOf": [ + {"type": "string"}, + {"$ref": "#/components/schemas/Account"}, + {"type": "null"} + ] + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let txn = spec + .resolve_ref("#/components/schemas/Transaction") + .unwrap(); + let resolved = spec.resolve_schema(txn); + + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("source")); + + // source is anyOf with 3 variants — should merge and resolve + let source = resolved.properties.get("source").unwrap(); + let source_resolved = spec.resolve_schema(source); + // The Account ref has properties, so merge produces object-like schema + assert!( + source_resolved.properties.contains_key("id") + || source_resolved.schema_type.is_some() + || source_resolved.properties.contains_key("email") + ); +} + +// --- Schema with items but no explicit type: "array" --- + +#[test] +fn test_schema_items_without_array_type() { + // Some APIs define items without an explicit type: "array" + // The schema should still parse (items is present even without type) + let schema: Schema = serde_json::from_str( + r#"{ + "items": { + "type": "object", + "properties": { + "id": {"type": "string"} + } + } + }"#, + ) + .unwrap(); + assert_eq!(schema.schema_type, None); + assert!(schema.items.is_some()); + let items = schema.items.as_ref().unwrap(); + assert!(items.properties.contains_key("id")); +} + +// --- Full end-to-end: OpenAPI 3.1 spec with comprehensive patterns --- + +#[test] +fn test_31_full_e2e_github_style_spec() { + // End-to-end test modeled after GitHub API 3.1.0 structure: + // - Multiple endpoints (GET + POST) + // - Type arrays for nullable + // - $ref resolution through allOf chains + // - anyOf with null for nullable refs + // - Format strings (date-time, uri) + // - Response $ref to components/responses + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "GitHub-style API", "version": "2024-01-01"}, + "servers": [ + { + "url": "https://api.github.com" + } + ], + "paths": { + "/repos": { + "get": { + "responses": { + "200": { + "description": "List repositories", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/repository"} + } + } + } + } + } + } + }, + "/repos/search": { + "post": { + "responses": { + "200": { + "description": "Search repositories", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "total_count": {"type": "integer"}, + "incomplete_results": {"type": "boolean"}, + "items": { + "type": "array", + "items": {"$ref": "#/components/schemas/repository"} + } + }, + "required": ["total_count", "incomplete_results", "items"] + } + } + } + } + } + } + }, + "/notifications": { + "get": { + "responses": { + "200": {"$ref": "#/components/responses/NotificationList"} + } + } + } + }, + "components": { + "schemas": { + "simple-user": { + "type": "object", + "properties": { + "login": {"type": "string"}, + "id": {"type": "integer"}, + "avatar_url": {"type": "string", "format": "uri"}, + "html_url": {"type": "string", "format": "uri"} + }, + "required": ["login", "id", "avatar_url", "html_url"] + }, + "license": { + "type": "object", + "properties": { + "key": {"type": "string"}, + "name": {"type": "string"}, + "spdx_id": {"type": ["string", "null"]}, + "url": {"type": ["string", "null"], "format": "uri"} + }, + "required": ["key", "name"] + }, + "repository": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + "full_name": {"type": "string"}, + "private": {"type": "boolean"}, + "owner": {"$ref": "#/components/schemas/simple-user"}, + "description": {"type": ["string", "null"]}, + "fork": {"type": "boolean"}, + "html_url": {"type": "string", "format": "uri"}, + "created_at": {"type": "string", "format": "date-time"}, + "updated_at": {"type": "string", "format": "date-time"}, + "pushed_at": {"type": ["string", "null"], "format": "date-time"}, + "homepage": {"type": ["string", "null"], "format": "uri"}, + "size": {"type": "integer"}, + "stargazers_count": {"type": "integer"}, + "language": {"type": ["string", "null"]}, + "archived": {"type": "boolean"}, + "disabled": {"type": "boolean"}, + "license": { + "anyOf": [ + {"$ref": "#/components/schemas/license"}, + {"type": "null"} + ] + }, + "topics": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["id", "name", "full_name", "private", "fork"] + }, + "notification-subject": { + "type": "object", + "properties": { + "title": {"type": "string"}, + "type": {"type": "string"}, + "url": {"type": ["string", "null"], "format": "uri"} + }, + "required": ["title", "type"] + }, + "notification": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "unread": {"type": "boolean"}, + "reason": {"type": "string"}, + "updated_at": {"type": "string", "format": "date-time"}, + "subject": {"$ref": "#/components/schemas/notification-subject"}, + "repository": {"$ref": "#/components/schemas/repository"} + }, + "required": ["id", "unread", "reason", "updated_at"] + } + }, + "responses": { + "NotificationList": { + "description": "List of notifications", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/notification"} + } + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + + // Server URL + assert_eq!(spec.base_url(), Some("https://api.github.com".to_string())); + + // Should have 3 endpoints: GET /repos, POST /repos/search, GET /notifications + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 3); + + // Endpoints should be sorted + assert_eq!(endpoints[0].path, "/notifications"); + assert_eq!(endpoints[0].method, "GET"); + assert_eq!(endpoints[1].path, "/repos"); + assert_eq!(endpoints[1].method, "GET"); + assert_eq!(endpoints[2].path, "/repos/search"); + assert_eq!(endpoints[2].method, "POST"); + + // Table names + assert_eq!(endpoints[0].table_name(), "notifications"); + assert_eq!(endpoints[1].table_name(), "repos"); + assert_eq!(endpoints[2].table_name(), "repos_search_post"); + + // --- Verify /repos response schema --- + let repos_schema = endpoints[1].response_schema.as_ref().unwrap(); + let repos_items = repos_schema.items.as_ref().unwrap(); + let repo_resolved = spec.resolve_schema(repos_items); + + assert!(repo_resolved.properties.contains_key("id")); + assert!(repo_resolved.properties.contains_key("name")); + assert!(repo_resolved.properties.contains_key("description")); + assert!(repo_resolved.properties.contains_key("language")); + assert!(repo_resolved.properties.contains_key("license")); + assert!(repo_resolved.properties.contains_key("topics")); + + // description is nullable (type: ["string", "null"]) + let desc = repo_resolved.properties.get("description").unwrap(); + assert!(desc.nullable); + assert_eq!(desc.schema_type, Some("string".to_string())); + + // pushed_at is nullable datetime + let pushed_at = repo_resolved.properties.get("pushed_at").unwrap(); + assert!(pushed_at.nullable); + assert_eq!(pushed_at.format, Some("date-time".to_string())); + + // --- Verify /notifications response (via $ref to components/responses) --- + let notif_schema = endpoints[0].response_schema.as_ref().unwrap(); + let notif_items = notif_schema.items.as_ref().unwrap(); + let notif_resolved = spec.resolve_schema(notif_items); + + assert!(notif_resolved.properties.contains_key("id")); + assert!(notif_resolved.properties.contains_key("unread")); + assert!(notif_resolved.properties.contains_key("reason")); + assert!(notif_resolved.properties.contains_key("subject")); + assert!(notif_resolved.properties.contains_key("repository")); + + // --- Verify POST search response --- + let search_schema = endpoints[2].response_schema.as_ref().unwrap(); + assert!(search_schema.properties.contains_key("total_count")); + assert!(search_schema.properties.contains_key("items")); +} + +// --- Stripe-style full spec with 3.1 patterns --- + +#[test] +fn test_31_full_e2e_stripe_style_spec() { + // End-to-end test modeled after Stripe API 3.1.0: + // - List pagination wrapper (data/has_more/url) + // - Expandable fields (anyOf: [string, $ref]) + // - Type arrays for nullable + // - Unix timestamps + // - Metadata as untyped object + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Stripe-style API", "version": "2024-01-01"}, + "servers": [{"url": "https://api.stripe.com"}], + "paths": { + "/v1/customers": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "object": {"type": "string"}, + "data": { + "type": "array", + "items": {"$ref": "#/components/schemas/customer"} + }, + "has_more": {"type": "boolean"}, + "url": {"type": "string"} + }, + "required": ["object", "data", "has_more", "url"] + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "address": { + "type": "object", + "properties": { + "city": {"type": ["string", "null"]}, + "country": {"type": ["string", "null"]}, + "line1": {"type": ["string", "null"]}, + "line2": {"type": ["string", "null"]}, + "postal_code": {"type": ["string", "null"]}, + "state": {"type": ["string", "null"]} + } + }, + "customer": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "object": {"type": "string"}, + "created": {"type": "integer", "format": "unix-time"}, + "email": {"type": ["string", "null"]}, + "name": {"type": ["string", "null"]}, + "phone": {"type": ["string", "null"]}, + "description": {"type": ["string", "null"]}, + "balance": {"type": "integer"}, + "currency": {"type": ["string", "null"]}, + "delinquent": {"type": ["boolean", "null"]}, + "livemode": {"type": "boolean"}, + "address": { + "anyOf": [ + {"$ref": "#/components/schemas/address"}, + {"type": "null"} + ] + }, + "metadata": {"type": "object"} + }, + "required": ["id", "object", "created", "livemode"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let data = schema.properties.get("data").unwrap(); + let items = data.items.as_ref().unwrap(); + let customer = spec.resolve_schema(items); + + // Verify all properties present + assert!(customer.properties.contains_key("id")); + assert!(customer.properties.contains_key("created")); + assert!(customer.properties.contains_key("email")); + assert!(customer.properties.contains_key("address")); + assert!(customer.properties.contains_key("metadata")); + + // Verify nullable fields + assert!(customer.properties.get("email").unwrap().nullable); + assert!(customer.properties.get("name").unwrap().nullable); + assert!(customer.properties.get("phone").unwrap().nullable); + assert!(customer.properties.get("delinquent").unwrap().nullable); + + // Verify non-nullable fields + assert!(!customer.properties.get("livemode").unwrap().nullable); + assert!(!customer.properties.get("id").unwrap().nullable); + + // Verify created has unix-time format + let created = customer.properties.get("created").unwrap(); + assert_eq!(created.format, Some("unix-time".to_string())); + assert_eq!(created.schema_type, Some("integer".to_string())); +} + +// --- oneOf with discriminator + mapping --- + +#[test] +fn test_31_oneof_with_discriminator_and_mapping() { + // OpenAPI 3.1: oneOf with discriminator.mapping for polymorphic types + // Mapping provides URI-based resolution hints (we ignore mapping, just merge variants) + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "CreditCard": { + "type": "object", + "properties": { + "type": {"type": "string"}, + "last_four": {"type": "string"}, + "brand": {"type": "string"} + }, + "required": ["type"] + }, + "BankAccount": { + "type": "object", + "properties": { + "type": {"type": "string"}, + "routing_number": {"type": "string"}, + "account_holder": {"type": "string"} + }, + "required": ["type"] + }, + "PaymentMethod": { + "oneOf": [ + {"$ref": "#/components/schemas/CreditCard"}, + {"$ref": "#/components/schemas/BankAccount"} + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "credit_card": "#/components/schemas/CreditCard", + "bank_account": "#/components/schemas/BankAccount" + } + } + } + } + } + }"##; + + // Should parse without error (discriminator + mapping ignored by serde) + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let pm = spec + .resolve_ref("#/components/schemas/PaymentMethod") + .unwrap(); + let resolved = spec.resolve_schema(pm); + + // oneOf merges all variant properties as nullable + assert!(resolved.properties.contains_key("type")); + assert!(resolved.properties.contains_key("last_four")); + assert!(resolved.properties.contains_key("brand")); + assert!(resolved.properties.contains_key("routing_number")); + assert!(resolved.properties.contains_key("account_holder")); +} + +// --- allOf where one schema adds only required (no new properties) --- + +#[test] +fn test_31_allof_ref_with_additional_required_only() { + // Kubernetes pattern: allOf: [{$ref}, {required: [...]}] + // The second schema only adds required constraints, no new properties + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Resource": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "name": {"type": "string"}, + "status": {"type": "string"} + } + }, + "StrictResource": { + "allOf": [ + {"$ref": "#/components/schemas/Resource"}, + { + "required": ["id", "name", "status"] + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let strict = spec + .resolve_ref("#/components/schemas/StrictResource") + .unwrap(); + let resolved = spec.resolve_schema(strict); + + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("status")); + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"name".to_string())); + assert!(resolved.required.contains(&"status".to_string())); +} + +// --- Response with only binary content (application/octet-stream) --- + +#[test] +fn test_response_only_binary_content() { + // Some APIs return binary data — schema should still be extracted via fallback + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/download": { + "get": { + "responses": { + "200": { + "description": "Binary file", + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + // Should fall back to the only available content type + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert_eq!(schema.format, Some("binary".to_string())); +} + +// --- Multiple $ref resolution through response → ref → allOf → ref chain --- + +#[test] +fn test_31_triple_indirection_response_ref_allof_ref() { + // Response $ref → components/responses → schema $ref → allOf → $ref + // This tests the deepest indirection chain common in real APIs + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/resources": { + "get": { + "responses": { + "200": {"$ref": "#/components/responses/ResourceList"} + } + } + } + }, + "components": { + "schemas": { + "Timestamps": { + "type": "object", + "properties": { + "created_at": {"type": "string", "format": "date-time"}, + "updated_at": {"type": ["string", "null"], "format": "date-time"} + }, + "required": ["created_at"] + }, + "Resource": { + "allOf": [ + {"$ref": "#/components/schemas/Timestamps"}, + { + "type": "object", + "properties": { + "id": {"type": "string", "format": "uuid"}, + "name": {"type": "string"}, + "status": {"type": "string"} + }, + "required": ["id", "name"] + } + ] + } + }, + "responses": { + "ResourceList": { + "description": "List of resources", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": {"$ref": "#/components/schemas/Resource"} + } + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + let resolved = spec.resolve_schema(items); + + // Should have all properties from the entire chain + assert!(resolved.properties.contains_key("created_at")); + assert!(resolved.properties.contains_key("updated_at")); + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("status")); + + // updated_at should be nullable + let updated = resolved.properties.get("updated_at").unwrap(); + assert!(updated.nullable); + + // id should have uuid format + let id = resolved.properties.get("id").unwrap(); + assert_eq!(id.format, Some("uuid".to_string())); + + // Required should include from both + assert!(resolved.required.contains(&"created_at".to_string())); + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"name".to_string())); +} + +// --- Format strings: byte and binary (base64 encoded content) --- + +#[test] +fn test_format_byte_base64() { + // OpenAPI: format: "byte" for base64-encoded binary data + let schema: Schema = serde_json::from_str(r#"{"type": "string", "format": "byte"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert_eq!(schema.format, Some("byte".to_string())); +} + +#[test] +fn test_format_binary() { + // OpenAPI: format: "binary" for raw binary data + let schema: Schema = serde_json::from_str(r#"{"type": "string", "format": "binary"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert_eq!(schema.format, Some("binary".to_string())); +} + +// --- Format string: time --- + +#[test] +fn test_format_time() { + // OpenAPI: format: "time" for time-only values + let schema: Schema = serde_json::from_str(r#"{"type": "string", "format": "time"}"#).unwrap(); + assert_eq!(schema.schema_type, Some("string".to_string())); + assert_eq!(schema.format, Some("time".to_string())); +} + +// --- OpenAPI 3.1: $ref with siblings combined (nullable + writeOnly + required) --- + +#[test] +fn test_31_ref_with_multiple_siblings() { + // OpenAPI 3.1: $ref with nullable + additional required fields merged + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Address": { + "type": "object", + "properties": { + "line1": {"type": "string"}, + "city": {"type": "string"}, + "country": {"type": "string"} + }, + "required": ["line1"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + + // Construct a $ref with multiple siblings + let mut extra_props = HashMap::new(); + extra_props.insert( + "verified".to_string(), + Schema { + schema_type: Some("boolean".to_string()), + ..Default::default() + }, + ); + + let ref_with_siblings = Schema { + reference: Some("#/components/schemas/Address".to_string()), + nullable: true, + write_only: true, + properties: extra_props, + required: vec!["city".to_string(), "verified".to_string()], + ..Default::default() + }; + + let resolved = spec.resolve_schema(&ref_with_siblings); + + // All original properties + sibling properties + assert!(resolved.properties.contains_key("line1")); + assert!(resolved.properties.contains_key("city")); + assert!(resolved.properties.contains_key("country")); + assert!(resolved.properties.contains_key("verified")); + + // Siblings merged + assert!(resolved.nullable); + assert!(resolved.write_only); + + // Required merged and deduplicated + assert!(resolved.required.contains(&"line1".to_string())); + assert!(resolved.required.contains(&"city".to_string())); + assert!(resolved.required.contains(&"verified".to_string())); +} + +// --- Empty oneOf/anyOf (edge case) --- + +#[test] +fn test_empty_oneof() { + // Edge case: oneOf with no variants + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Empty": { + "oneOf": [] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let empty = spec.resolve_ref("#/components/schemas/Empty").unwrap(); + let resolved = spec.resolve_schema(empty); + assert!(resolved.properties.is_empty()); +} + +#[test] +fn test_empty_anyof() { + // Edge case: anyOf with no variants + let spec_json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Empty": { + "anyOf": [] + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let empty = spec.resolve_ref("#/components/schemas/Empty").unwrap(); + let resolved = spec.resolve_schema(empty); + assert!(resolved.properties.is_empty()); +} + +// --- OpenAPI 3.1 response code variants with type arrays --- +// Ensures all response code paths work with 3.1-specific nullable type arrays. + +#[test] +fn test_31_response_201_with_type_arrays() { + // OpenAPI 3.1 endpoint returning 201 with type arrays in schema + // Real pattern: POST creation endpoints that return the created object + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/resources": { + "post": { + "responses": { + "201": { + "description": "Created", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "string", "format": "uuid"}, + "name": {"type": "string"}, + "description": {"type": ["string", "null"]}, + "created_at": {"type": "string", "format": "date-time"} + }, + "required": ["id", "name", "created_at"] + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + assert!(schema.properties.contains_key("id")); + let desc = schema.properties.get("description").unwrap(); + assert!(desc.nullable); + assert_eq!(desc.schema_type, Some("string".to_string())); +} + +#[test] +fn test_31_response_2xx_with_type_arrays() { + // OpenAPI 3.1 endpoint using 2XX wildcard with type arrays + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "2XX": { + "description": "Success", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "label": {"type": ["string", "null"]}, + "value": {"type": ["number", "null"], "format": "double"} + }, + "required": ["id"] + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + let label = items.properties.get("label").unwrap(); + assert!(label.nullable); + assert_eq!(label.schema_type, Some("string".to_string())); + + let value = items.properties.get("value").unwrap(); + assert!(value.nullable); + assert_eq!(value.format, Some("double".to_string())); +} + +#[test] +fn test_31_response_default_only_with_type_arrays() { + // OpenAPI 3.1 endpoint with only "default" response and type arrays + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/status": { + "get": { + "responses": { + "default": { + "description": "Default response", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "status": {"type": "string"}, + "message": {"type": ["string", "null"]}, + "timestamp": {"type": "integer", "format": "unix-time"} + }, + "required": ["status"] + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let msg = schema.properties.get("message").unwrap(); + assert!(msg.nullable); + assert_eq!(msg.schema_type, Some("string".to_string())); +} + +#[test] +fn test_31_response_priority_all_codes_with_type_arrays() { + // OpenAPI 3.1: 200 should be preferred over 201/2XX/default + // Ensures response code priority works correctly with 3.1 schemas + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_200": {"type": ["string", "null"]} + } + } + } + } + }, + "201": { + "description": "Created", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_201": {"type": "string"} + } + } + } + } + }, + "2XX": { + "description": "Wildcard", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_2xx": {"type": "string"} + } + } + } + } + }, + "default": { + "description": "Default", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "from_default": {"type": "string"} + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + + // 200 should win + assert!(schema.properties.contains_key("from_200")); + assert!(!schema.properties.contains_key("from_201")); + assert!(!schema.properties.contains_key("from_2xx")); + assert!(!schema.properties.contains_key("from_default")); + + // And the 3.1 type array should be resolved + let field = schema.properties.get("from_200").unwrap(); + assert!(field.nullable); +} + +#[test] +fn test_31_non_json_content_type_with_type_arrays() { + // OpenAPI 3.1 with non-JSON content types containing type arrays + // Tests GeoJSON fallback with 3.1 nullable patterns + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/features": { + "get": { + "responses": { + "200": { + "description": "ok", + "content": { + "application/geo+json": { + "schema": { + "type": "object", + "properties": { + "type": {"type": "string"}, + "bbox": {"type": ["array", "null"]}, + "features": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": ["string", "null"]}, + "geometry": {"type": ["object", "null"]} + } + } + } + } + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let bbox = schema.properties.get("bbox").unwrap(); + assert!(bbox.nullable); + assert_eq!(bbox.schema_type, Some("array".to_string())); +} + +#[test] +fn test_31_response_ref_with_type_arrays() { + // OpenAPI 3.1: response-level $ref where the target schema uses type arrays + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/events": { + "get": { + "responses": { + "200": {"$ref": "#/components/responses/EventList"} + } + } + } + }, + "components": { + "responses": { + "EventList": { + "description": "List of events", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "type": {"type": "string"}, + "actor": {"type": ["string", "null"]}, + "created_at": {"type": "string", "format": "date-time"}, + "payload": {"type": ["object", "null"]} + }, + "required": ["id", "type", "created_at"] + } + } + } + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + + let actor = items.properties.get("actor").unwrap(); + assert!(actor.nullable); + assert_eq!(actor.schema_type, Some("string".to_string())); + + let payload = items.properties.get("payload").unwrap(); + assert!(payload.nullable); + assert_eq!(payload.schema_type, Some("object".to_string())); +} + +#[test] +fn test_31_get_post_same_path_different_schemas() { + // OpenAPI 3.1: same path with GET and POST returning different schemas, + // both using 3.1 type arrays + let spec_json = r#"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/items": { + "get": { + "responses": { + "200": { + "description": "List items", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "summary": {"type": ["string", "null"]} + }, + "required": ["id"] + } + } + } + } + } + } + }, + "post": { + "responses": { + "201": { + "description": "Created", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "summary": {"type": ["string", "null"]}, + "details": {"type": ["string", "null"]}, + "created_at": {"type": "string", "format": "date-time"} + }, + "required": ["id", "created_at"] + } + } + } + } + } + } + } + } + }"#; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 2); + + // GET comes first (alphabetically by method) + assert_eq!(endpoints[0].method, "GET"); + assert_eq!(endpoints[1].method, "POST"); + + // GET has array response with 2 properties + let get_schema = endpoints[0].response_schema.as_ref().unwrap(); + let get_items = get_schema.items.as_ref().unwrap(); + assert_eq!(get_items.properties.len(), 2); + + // POST has object response with 4 properties + let post_schema = endpoints[1].response_schema.as_ref().unwrap(); + assert_eq!(post_schema.properties.len(), 4); + assert!(post_schema.properties.contains_key("details")); + + // Table names should differ + assert_eq!(endpoints[0].table_name(), "items"); + assert_eq!(endpoints[1].table_name(), "items_post"); +} + +#[test] +fn test_branching_allof_completes_within_call_limit() { + // Build a spec where allOf branches exponentially: + // Root -> allOf[A, B, C, D, E] and each of those -> allOf[F, G, H, I, J] + // That's 5*5 = 25 resolve calls, well within the 10,000 limit. + // This test verifies branching specs complete and produce merged properties. + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Branch Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Leaf1": {"type": "object", "properties": {"a": {"type": "string"}}}, + "Leaf2": {"type": "object", "properties": {"b": {"type": "integer"}}}, + "Leaf3": {"type": "object", "properties": {"c": {"type": "boolean"}}}, + "Leaf4": {"type": "object", "properties": {"d": {"type": "number"}}}, + "Leaf5": {"type": "object", "properties": {"e": {"type": "string"}}}, + "Mid1": {"allOf": [{"$ref": "#/components/schemas/Leaf1"}, {"$ref": "#/components/schemas/Leaf2"}]}, + "Mid2": {"allOf": [{"$ref": "#/components/schemas/Leaf3"}, {"$ref": "#/components/schemas/Leaf4"}]}, + "Mid3": {"allOf": [{"$ref": "#/components/schemas/Leaf5"}, {"$ref": "#/components/schemas/Leaf1"}]}, + "Mid4": {"allOf": [{"$ref": "#/components/schemas/Leaf2"}, {"$ref": "#/components/schemas/Leaf3"}]}, + "Mid5": {"allOf": [{"$ref": "#/components/schemas/Leaf4"}, {"$ref": "#/components/schemas/Leaf5"}]}, + "Root": { + "allOf": [ + {"$ref": "#/components/schemas/Mid1"}, + {"$ref": "#/components/schemas/Mid2"}, + {"$ref": "#/components/schemas/Mid3"}, + {"$ref": "#/components/schemas/Mid4"}, + {"$ref": "#/components/schemas/Mid5"} + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let root = spec.resolve_ref("#/components/schemas/Root").unwrap(); + let resolved = spec.resolve_schema(root); + + // All 5 leaf properties should be merged + assert_eq!(resolved.schema_type, Some("object".to_string())); + assert!(resolved.properties.contains_key("a")); + assert!(resolved.properties.contains_key("b")); + assert!(resolved.properties.contains_key("c")); + assert!(resolved.properties.contains_key("d")); + assert!(resolved.properties.contains_key("e")); +} + +#[test] +fn test_excessive_branching_hits_call_limit_gracefully() { + // Build a spec where each level has 4 branches x 4 levels = 4^4 = 256 resolve calls. + // The call limit of 10,000 easily handles this, but we verify it doesn't hang. + // A truly exponential spec (4^10 = 1M) would be capped by MAX_RESOLVE_CALLS. + let spec_json = r##"{ + "openapi": "3.0.0", + "info": {"title": "Deep Branch", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "L0a": {"type": "object", "properties": {"x": {"type": "string"}}}, + "L0b": {"type": "object", "properties": {"y": {"type": "string"}}}, + "L0c": {"type": "object", "properties": {"z": {"type": "string"}}}, + "L0d": {"type": "object", "properties": {"w": {"type": "string"}}}, + "L1a": {"allOf": [{"$ref": "#/components/schemas/L0a"}, {"$ref": "#/components/schemas/L0b"}, {"$ref": "#/components/schemas/L0c"}, {"$ref": "#/components/schemas/L0d"}]}, + "L1b": {"allOf": [{"$ref": "#/components/schemas/L0a"}, {"$ref": "#/components/schemas/L0b"}, {"$ref": "#/components/schemas/L0c"}, {"$ref": "#/components/schemas/L0d"}]}, + "L1c": {"allOf": [{"$ref": "#/components/schemas/L0a"}, {"$ref": "#/components/schemas/L0b"}, {"$ref": "#/components/schemas/L0c"}, {"$ref": "#/components/schemas/L0d"}]}, + "L1d": {"allOf": [{"$ref": "#/components/schemas/L0a"}, {"$ref": "#/components/schemas/L0b"}, {"$ref": "#/components/schemas/L0c"}, {"$ref": "#/components/schemas/L0d"}]}, + "L2a": {"allOf": [{"$ref": "#/components/schemas/L1a"}, {"$ref": "#/components/schemas/L1b"}, {"$ref": "#/components/schemas/L1c"}, {"$ref": "#/components/schemas/L1d"}]}, + "L2b": {"allOf": [{"$ref": "#/components/schemas/L1a"}, {"$ref": "#/components/schemas/L1b"}, {"$ref": "#/components/schemas/L1c"}, {"$ref": "#/components/schemas/L1d"}]}, + "L2c": {"allOf": [{"$ref": "#/components/schemas/L1a"}, {"$ref": "#/components/schemas/L1b"}, {"$ref": "#/components/schemas/L1c"}, {"$ref": "#/components/schemas/L1d"}]}, + "L2d": {"allOf": [{"$ref": "#/components/schemas/L1a"}, {"$ref": "#/components/schemas/L1b"}, {"$ref": "#/components/schemas/L1c"}, {"$ref": "#/components/schemas/L1d"}]}, + "Root": {"allOf": [{"$ref": "#/components/schemas/L2a"}, {"$ref": "#/components/schemas/L2b"}, {"$ref": "#/components/schemas/L2c"}, {"$ref": "#/components/schemas/L2d"}]} + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let root = spec.resolve_ref("#/components/schemas/Root").unwrap(); + + // Should complete without hanging — the call limit prevents exponential blowup + let resolved = spec.resolve_schema(root); + + // Should still produce an object with merged properties (from the leaves that resolved) + assert_eq!(resolved.schema_type, Some("object".to_string())); +} + +// --- Parent sibling merging for composition keywords --- + +#[test] +fn test_allof_with_parent_required_siblings() { + // Parent-level `required` alongside allOf should be merged into the result + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "name": {"type": "string"} + }, + "required": ["id"] + }, + "Extended": { + "allOf": [ + {"$ref": "#/components/schemas/Base"} + ], + "properties": { + "extra": {"type": "boolean"} + }, + "required": ["name", "extra"] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let extended = spec.resolve_ref("#/components/schemas/Extended").unwrap(); + let resolved = spec.resolve_schema(extended); + + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("extra")); + // Required from both Base and parent should be merged + assert!(resolved.required.contains(&"id".to_string())); + assert!(resolved.required.contains(&"name".to_string())); + assert!(resolved.required.contains(&"extra".to_string())); +} + +#[test] +fn test_oneof_with_parent_properties() { + // Parent-level properties alongside oneOf should be merged + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Mixed": { + "oneOf": [ + { + "type": "object", + "properties": { + "variant_a": {"type": "string"} + } + }, + { + "type": "object", + "properties": { + "variant_b": {"type": "integer"} + } + } + ], + "properties": { + "common": {"type": "boolean"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let mixed = spec.resolve_ref("#/components/schemas/Mixed").unwrap(); + let resolved = spec.resolve_schema(mixed); + + assert!(resolved.properties.contains_key("variant_a")); + assert!(resolved.properties.contains_key("variant_b")); + assert!( + resolved.properties.contains_key("common"), + "parent-level 'common' should be merged alongside oneOf" + ); +} + +#[test] +fn test_anyof_with_parent_properties() { + // Parent-level properties alongside anyOf should be merged + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Flexible": { + "anyOf": [ + { + "type": "object", + "properties": { + "opt_a": {"type": "string"} + } + } + ], + "properties": { + "shared": {"type": "number"} + } + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let flexible = spec.resolve_ref("#/components/schemas/Flexible").unwrap(); + let resolved = spec.resolve_schema(flexible); + + assert!(resolved.properties.contains_key("opt_a")); + assert!( + resolved.properties.contains_key("shared"), + "parent-level 'shared' should be merged alongside anyOf" + ); +} + +#[test] +fn test_ref_with_allof_coexistence() { + // OpenAPI 3.1: $ref can coexist with allOf on the same schema + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "base_field": {"type": "string"} + } + }, + "Mixin": { + "type": "object", + "properties": { + "mixin_field": {"type": "integer"} + } + }, + "Combined": { + "$ref": "#/components/schemas/Base", + "allOf": [ + {"$ref": "#/components/schemas/Mixin"} + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let combined = spec.resolve_ref("#/components/schemas/Combined").unwrap(); + let resolved = spec.resolve_schema(combined); + + assert!( + resolved.properties.contains_key("base_field"), + "base_field from $ref should be present" + ); + assert!( + resolved.properties.contains_key("mixin_field"), + "mixin_field from allOf should be merged alongside $ref" + ); +} + +#[test] +fn test_ref_with_oneof_coexistence() { + // OpenAPI 3.1: $ref can coexist with oneOf + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "Base": { + "type": "object", + "properties": { + "id": {"type": "string"} + } + }, + "VariantA": { + "type": "object", + "properties": { + "a_field": {"type": "boolean"} + } + }, + "Extended": { + "$ref": "#/components/schemas/Base", + "oneOf": [ + {"$ref": "#/components/schemas/VariantA"} + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let extended = spec.resolve_ref("#/components/schemas/Extended").unwrap(); + let resolved = spec.resolve_schema(extended); + + assert!(resolved.properties.contains_key("id"), "$ref id present"); + assert!( + resolved.properties.contains_key("a_field"), + "oneOf a_field merged alongside $ref" + ); +} + +#[test] +fn test_parent_nullable_propagates_to_composition() { + // Parent-level nullable should propagate through merge_parent_siblings + let spec_json = r##"{ + "openapi": "3.1.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": {}, + "components": { + "schemas": { + "NullableComposed": { + "nullable": true, + "allOf": [ + { + "type": "object", + "properties": { + "field": {"type": "string"} + } + } + ] + } + } + } + }"##; + + let spec = OpenApiSpec::from_str(spec_json).unwrap(); + let schema = spec + .resolve_ref("#/components/schemas/NullableComposed") + .unwrap(); + let resolved = spec.resolve_schema(schema); + + assert!(resolved.properties.contains_key("field")); + assert!(resolved.nullable, "parent nullable should propagate"); +} + +// --- Fix 8: table_name sanitization --- + +#[test] +fn test_table_name_with_dots() { + let endpoint = EndpointInfo { + path: "/api/v1.0/items".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "api_v1_0_items"); +} + +#[test] +fn test_table_name_with_at_sign() { + let endpoint = EndpointInfo { + path: "/@user/repos".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "_user_repos"); +} + +#[test] +fn test_table_name_leading_digit() { + let endpoint = EndpointInfo { + path: "/3d-models".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "_3d_models"); +} + +#[test] +fn test_table_name_post_with_special_chars() { + let endpoint = EndpointInfo { + path: "/search.json".to_string(), + method: "POST", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "search_json_post"); +} + +#[test] +fn test_table_name_multiple_special_chars() { + let endpoint = EndpointInfo { + path: "/api/v2.1/@me/data".to_string(), + method: "GET", + response_schema: None, + }; + assert_eq!(endpoint.table_name(), "api_v2_1_me_data"); +} diff --git a/wasm-wrappers/fdw/openapi_fdw/test/.env.example b/wasm-wrappers/fdw/openapi_fdw/test/.env.example new file mode 100644 index 00000000..dfc2199c --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/.env.example @@ -0,0 +1,5 @@ +# Tokens for authenticated examples (github, threads). +# Copy this file to examples/.env and fill in your values. + +GITHUB_TOKEN=ghp_your-personal-access-token-here +THREADS_ACCESS_TOKEN=your-access-token-here diff --git a/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh b/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh new file mode 100755 index 00000000..0c8b197b --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh @@ -0,0 +1,407 @@ +#!/usr/bin/env bash +# Benchmark: openapi-fdw vs pg_http vs pg_net +# Compares query performance for the same MockServer endpoints. +# +# Usage: +# bash test/run.sh # start containers + run tests first +# bash test/benchmark.sh # default 10 iterations +# bash test/benchmark.sh 50 # custom iteration count +set -euo pipefail + +cd "$(dirname "$0")/.." + +# ---- Configuration ---- +ITERATIONS="${1:-10}" +MOCKSERVER_URL="http://mockserver:1080" +COMPOSE_FILE="test/docker-compose.yml" +PGNET_POLL_INTERVAL=0.01 # seconds between pg_net polls +PGNET_MAX_WAIT=5000 # max milliseconds to wait for pg_net response +PGNET_AVAILABLE=false + +# ---- Helpers ---- + +# All psql calls set search_path first via separate -c flag +psql_cmd() { + docker compose -f "$COMPOSE_FILE" exec -T -e PGPASSWORD="${POSTGRES_PASSWORD:-postgres}" db \ + psql -U postgres -P pager=off \ + -c "SET search_path TO public, extensions, net;" \ + "$@" 2>&1 | sed '/^SET$/d' +} + +# Portable nanosecond timestamp (macOS date doesn't support %N) +now_ns() { + if date +%s%N 2>/dev/null | grep -qv N; then + date +%s%N + else + python3 -c "import time; print(int(time.time()*1e9))" + fi +} + +# Compute min/avg/max from a list of numbers +compute_stats() { + echo "$@" | tr ' ' '\n' | awk ' + NR==1 { min=$1; max=$1; sum=0 } + { sum+=$1; if($1max) max=$1 } + END { printf "%.1f %.1f %.1f\n", min, sum/NR, max } + ' +} + +print_row() { + printf " %-18s | %8s | %8s | %8s | %4s\n" "$1" "$2" "$3" "$4" "$5" +} + +print_separator() { + printf " %-18s-+-%8s-+-%8s-+-%8s-+-%4s\n" \ + "------------------" "--------" "--------" "--------" "----" +} + +# Benchmark a synchronous query (openapi-fdw or pg_http) +bench_sync() { + local sql="$1" + local iterations="$2" + local times=() + + for _ in $(seq 1 "$iterations"); do + local elapsed + elapsed=$(psql_cmd -c " + DO \$bench\$ + DECLARE + t0 timestamptz := clock_timestamp(); + t1 timestamptz; + BEGIN + PERFORM * FROM ($sql) AS _b; + t1 := clock_timestamp(); + RAISE NOTICE '%', extract(epoch from (t1 - t0)) * 1000; + END \$bench\$; + " | grep 'NOTICE' | head -1 | sed 's/.*NOTICE: *//') + if [ -n "$elapsed" ]; then + times+=("$elapsed") + fi + done + + if [ ${#times[@]} -gt 0 ]; then + compute_stats "${times[@]}" + else + echo "ERR ERR ERR" + fi +} + +# Benchmark pg_net (async: fire request, poll, extract) +bench_pgnet() { + local fire_sql="$1" + local extract_sql_template="$2" # contains REQID placeholder + local iterations="$3" + local times=() + + for _ in $(seq 1 "$iterations"); do + local start_ns + start_ns=$(now_ns) + + # Fire request (auto-commits when psql returns) + local req_id + req_id=$(psql_cmd -t -c "$fire_sql" | tr -d ' \n') + + # Poll until response arrives + local waited=0 + local ready="f" + while [ "$ready" != "t" ] && [ "$waited" -lt "$PGNET_MAX_WAIT" ]; do + sleep "$PGNET_POLL_INTERVAL" + ready=$(psql_cmd -t -c \ + "SELECT EXISTS(SELECT 1 FROM net._http_response WHERE id = $req_id);" \ + | tr -d ' \n') + waited=$((waited + 10)) + done + + # Extract data + local extract_sql="${extract_sql_template//REQID/$req_id}" + psql_cmd -t -c "$extract_sql" > /dev/null || true + + local end_ns + end_ns=$(now_ns) + local elapsed_ms + elapsed_ms=$(echo "scale=1; ($end_ns - $start_ns) / 1000000" | bc) + times+=("$elapsed_ms") + + # Clean up response row + psql_cmd -c "DELETE FROM net._http_response WHERE id = $req_id;" > /dev/null || true + done + + if [ ${#times[@]} -gt 0 ]; then + compute_stats "${times[@]}" + else + echo "ERR ERR ERR" + fi +} + +# Get row count for a query +row_count() { + psql_cmd -t -c "SELECT count(*) FROM ($1) AS _v;" | tr -d ' \n' +} + +# ---- Pre-flight Checks ---- + +echo "=============================================================================" +echo " OpenAPI FDW Benchmark" +echo "=============================================================================" +echo "" + +# Check containers +if ! docker compose -f "$COMPOSE_FILE" ps 2>/dev/null | grep -q "db.*Up"; then + echo "ERROR: Containers not running. Start them first:" + echo " bash test/run.sh" + exit 1 +fi + +# Verify FDW is working +echo "Checking openapi-fdw..." +if ! psql_cmd -c "SELECT count(*) FROM mock_items;" > /dev/null; then + echo "ERROR: openapi-fdw not working. Run test/run.sh first." + exit 1 +fi +echo " openapi-fdw: OK" + +# ---- Extension Setup ---- + +echo "Setting up extensions..." + +if psql_cmd -c "CREATE EXTENSION IF NOT EXISTS http WITH SCHEMA extensions;" > /dev/null || \ + psql_cmd -c "CREATE EXTENSION IF NOT EXISTS http;" > /dev/null; then + echo " http: OK" +else + echo "ERROR: Could not create http extension." + exit 1 +fi + +if psql_cmd -c "CREATE EXTENSION IF NOT EXISTS pg_net WITH SCHEMA extensions;" > /dev/null || \ + psql_cmd -c "CREATE EXTENSION IF NOT EXISTS pg_net;" > /dev/null; then + PGNET_AVAILABLE=true + echo " pg_net: OK" +else + echo " pg_net: UNAVAILABLE (skipping)" +fi + +# ---- Warmup ---- + +echo "" +echo "Warming up..." +psql_cmd -c "SELECT * FROM mock_items;" > /dev/null +psql_cmd -c "SELECT * FROM extensions.http_get('$MOCKSERVER_URL/items');" > /dev/null +if $PGNET_AVAILABLE; then + psql_cmd -c "SELECT net.http_get('$MOCKSERVER_URL/items');" > /dev/null + sleep 1 # let bg worker process +fi + +# ---- Benchmark ---- + +echo "" +echo "=============================================================================" +echo " openapi-fdw vs pg_http vs pg_net" +echo " Iterations: $ITERATIONS | MockServer: $MOCKSERVER_URL" +echo "=============================================================================" + +# ============================ +# Scenario 1: Simple Array +# ============================ +echo "" +echo "Scenario 1: Simple Array (GET /items -> 3 rows, extract id + name)" +echo "-----------------------------------------------------------------------------" +print_row "Approach" "Min (ms)" "Avg (ms)" "Max (ms)" "Rows" +print_separator + +FDW_SQL="SELECT id, name FROM mock_items" +ROWS=$(row_count "$FDW_SQL") +STATS=$(bench_sync "$FDW_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "openapi-fdw" $STATS "$ROWS" + +HTTP_SQL="SELECT (elem->>'id')::bigint AS id, elem->>'name' AS name + FROM (SELECT content FROM extensions.http_get('$MOCKSERVER_URL/items')) r, + jsonb_array_elements(r.content::jsonb) AS elem" +STATS=$(bench_sync "$HTTP_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "pg_http" $STATS "$ROWS" + +if $PGNET_AVAILABLE; then + FIRE="SELECT net.http_get('$MOCKSERVER_URL/items')" + EXTRACT="SELECT (elem->>'id')::bigint, elem->>'name' + FROM net._http_response r, + jsonb_array_elements(r.content::jsonb) AS elem + WHERE r.id = REQID" + STATS=$(bench_pgnet "$FIRE" "$EXTRACT" "$ITERATIONS") + # shellcheck disable=SC2086 + print_row "pg_net (async)" $STATS "$ROWS *" +fi + +# ============================ +# Scenario 2: Wrapped Response +# ============================ +echo "" +echo "Scenario 2: Wrapped Response (GET /products -> unwrap 'data' -> 2 rows)" +echo "-----------------------------------------------------------------------------" +print_row "Approach" "Min (ms)" "Avg (ms)" "Max (ms)" "Rows" +print_separator + +FDW_SQL="SELECT id, name, price FROM mock_products" +ROWS=$(row_count "$FDW_SQL") +STATS=$(bench_sync "$FDW_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "openapi-fdw" $STATS "$ROWS" + +HTTP_SQL="SELECT elem->>'id' AS id, elem->>'name' AS name, + (elem->>'price')::double precision AS price + FROM (SELECT content FROM extensions.http_get('$MOCKSERVER_URL/products')) r, + jsonb_array_elements((r.content::jsonb)->'data') AS elem" +STATS=$(bench_sync "$HTTP_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "pg_http" $STATS "$ROWS" + +if $PGNET_AVAILABLE; then + FIRE="SELECT net.http_get('$MOCKSERVER_URL/products')" + EXTRACT="SELECT elem->>'id', elem->>'name', + (elem->>'price')::double precision + FROM net._http_response r, + jsonb_array_elements((r.content::jsonb)->'data') AS elem + WHERE r.id = REQID" + STATS=$(bench_pgnet "$FIRE" "$EXTRACT" "$ITERATIONS") + # shellcheck disable=SC2086 + print_row "pg_net (async)" $STATS "$ROWS *" +fi + +# ============================ +# Scenario 3: Type Coercion +# ============================ +echo "" +echo "Scenario 3: Type Coercion (GET /typed-data -> 1 row, mixed PG types)" +echo "-----------------------------------------------------------------------------" +print_row "Approach" "Min (ms)" "Avg (ms)" "Max (ms)" "Rows" +print_separator + +FDW_SQL="SELECT id, name, score, count, active, created_date, updated_at FROM mock_typed_data" +ROWS=$(row_count "$FDW_SQL") +STATS=$(bench_sync "$FDW_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "openapi-fdw" $STATS "$ROWS" + +HTTP_SQL="SELECT (elem->>'id')::bigint AS id, elem->>'name' AS name, + (elem->>'score')::real AS score, (elem->>'count')::integer AS count, + (elem->>'active')::boolean AS active, + (elem->>'created_date')::date AS created_date, + (elem->>'updated_at')::timestamptz AS updated_at + FROM (SELECT content FROM extensions.http_get('$MOCKSERVER_URL/typed-data')) r, + jsonb_array_elements(r.content::jsonb) AS elem" +STATS=$(bench_sync "$HTTP_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "pg_http" $STATS "$ROWS" + +if $PGNET_AVAILABLE; then + FIRE="SELECT net.http_get('$MOCKSERVER_URL/typed-data')" + EXTRACT="SELECT (elem->>'id')::bigint, elem->>'name', + (elem->>'score')::real, (elem->>'count')::integer, + (elem->>'active')::boolean, (elem->>'created_date')::date, + (elem->>'updated_at')::timestamptz + FROM net._http_response r, + jsonb_array_elements(r.content::jsonb) AS elem + WHERE r.id = REQID" + STATS=$(bench_pgnet "$FIRE" "$EXTRACT" "$ITERATIONS") + # shellcheck disable=SC2086 + print_row "pg_net (async)" $STATS "$ROWS *" +fi + +# ============================ +# Scenario 4: GeoJSON Nested +# ============================ +echo "" +echo "Scenario 4: GeoJSON Nested (GET /features -> features[].properties -> 3 rows)" +echo "-----------------------------------------------------------------------------" +print_row "Approach" "Min (ms)" "Avg (ms)" "Max (ms)" "Rows" +print_separator + +FDW_SQL="SELECT station_id, name, state, elevation FROM mock_stations" +ROWS=$(row_count "$FDW_SQL") +STATS=$(bench_sync "$FDW_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "openapi-fdw" $STATS "$ROWS" + +HTTP_SQL="SELECT props->>'station_id' AS station_id, props->>'name' AS name, + props->>'state' AS state, (props->>'elevation')::bigint AS elevation + FROM (SELECT content FROM extensions.http_get('$MOCKSERVER_URL/features')) r, + jsonb_array_elements((r.content::jsonb)->'features') AS feature, + LATERAL (SELECT feature->'properties' AS props) AS sub" +STATS=$(bench_sync "$HTTP_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "pg_http" $STATS "$ROWS" + +if $PGNET_AVAILABLE; then + FIRE="SELECT net.http_get('$MOCKSERVER_URL/features')" + EXTRACT="SELECT props->>'station_id', props->>'name', + props->>'state', (props->>'elevation')::bigint + FROM net._http_response r, + jsonb_array_elements((r.content::jsonb)->'features') AS feature, + LATERAL (SELECT feature->'properties' AS props) AS sub + WHERE r.id = REQID" + STATS=$(bench_pgnet "$FIRE" "$EXTRACT" "$ITERATIONS") + # shellcheck disable=SC2086 + print_row "pg_net (async)" $STATS "$ROWS *" +fi + +# ============================ +# Scenario 5: POST-for-Read +# ============================ +echo "" +echo "Scenario 5: POST-for-Read (POST /search -> 1 row)" +echo "-----------------------------------------------------------------------------" +print_row "Approach" "Min (ms)" "Avg (ms)" "Max (ms)" "Rows" +print_separator + +FDW_SQL="SELECT id, label FROM mock_search_post" +ROWS=$(row_count "$FDW_SQL") +STATS=$(bench_sync "$FDW_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "openapi-fdw" $STATS "$ROWS" + +HTTP_SQL="SELECT (elem->>'id')::bigint AS id, elem->>'label' AS label + FROM (SELECT content FROM extensions.http_post('$MOCKSERVER_URL/search', '{}', 'application/json')) r, + jsonb_array_elements(r.content::jsonb) AS elem" +STATS=$(bench_sync "$HTTP_SQL" "$ITERATIONS") +# shellcheck disable=SC2086 +print_row "pg_http" $STATS "$ROWS" + +if $PGNET_AVAILABLE; then + FIRE="SELECT net.http_post('$MOCKSERVER_URL/search', '{}'::jsonb)" + EXTRACT="SELECT (elem->>'id')::bigint, elem->>'label' + FROM net._http_response r, + jsonb_array_elements(r.content::jsonb) AS elem + WHERE r.id = REQID" + STATS=$(bench_pgnet "$FIRE" "$EXTRACT" "$ITERATIONS") + # shellcheck disable=SC2086 + print_row "pg_net (async)" $STATS "$ROWS *" +fi + +# ---- Ergonomics Comparison ---- + +echo "" +echo "=============================================================================" +echo " SQL Ergonomics Comparison (Scenario 4: GeoJSON Nested)" +echo "=============================================================================" +echo "" +echo " openapi-fdw (1 line):" +echo " SELECT station_id, name, state, elevation FROM mock_stations;" +echo "" +echo " pg_http (6 lines):" +echo " SELECT props->>'station_id', props->>'name'," +echo " props->>'state', (props->>'elevation')::bigint" +echo " FROM (SELECT content FROM extensions.http_get('.../features')) r," +echo " jsonb_array_elements((r.content::jsonb)->'features') AS feature," +echo " LATERAL (SELECT feature->'properties' AS props) AS sub;" +echo "" + +# ---- Notes ---- + +echo "=============================================================================" +echo " NOTES" +echo " * pg_net timing includes async dispatch + polling overhead" +echo " - openapi-fdw: automatic JSON unwrapping, type coercion, column mapping" +echo " - pg_http: manual jsonb extraction and type casting required" +echo " - pg_net: async model — response requires polling net._http_response" +echo " - All approaches hit same MockServer (near-zero network latency)" +echo "=============================================================================" diff --git a/wasm-wrappers/fdw/openapi_fdw/test/docker-compose.yml b/wasm-wrappers/fdw/openapi_fdw/test/docker-compose.yml new file mode 100644 index 00000000..1dbfc262 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/docker-compose.yml @@ -0,0 +1,25 @@ +services: + mockserver: + image: mockserver/mockserver:mockserver-5.15.0 + ports: + - "1080:1080" + volumes: + - ./mock-spec.json:/config/mock-spec.json:ro + + db: + image: supabase/postgres:15.14.1.081 + ports: + - "54322:5432" + environment: + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: postgres + POSTGRES_USER: postgres + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/99-init.sql:ro + healthcheck: + test: ["CMD-SHELL", "pg_isready -U supabase_admin"] + interval: 2s + timeout: 5s + retries: 10 + depends_on: + - mockserver diff --git a/wasm-wrappers/fdw/openapi_fdw/test/expectations.json b/wasm-wrappers/fdw/openapi_fdw/test/expectations.json new file mode 100644 index 00000000..7904e5e7 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/expectations.json @@ -0,0 +1,896 @@ +[ + { + "httpRequest": { + "method": "GET", + "path": "/items" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"name\":\"Widget\",\"price\":9.99,\"in_stock\":true},{\"id\":2,\"name\":\"Gadget\",\"price\":24.99,\"in_stock\":true},{\"id\":3,\"name\":\"Doohickey\",\"price\":4.99,\"in_stock\":false}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/products" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":\"prod-1\",\"name\":\"Laptop\",\"price\":999.99},{\"id\":\"prod-2\",\"name\":\"Mouse\",\"price\":29.99}],\"total\":2}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/features" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"type\":\"FeatureCollection\",\"features\":[{\"type\":\"Feature\",\"geometry\":{\"type\":\"Point\",\"coordinates\":[-104.99,39.74]},\"properties\":{\"station_id\":\"KDEN\",\"name\":\"Denver International\",\"state\":\"CO\",\"elevation\":5431}},{\"type\":\"Feature\",\"geometry\":{\"type\":\"Point\",\"coordinates\":[-118.41,33.94]},\"properties\":{\"station_id\":\"KLAX\",\"name\":\"Los Angeles International\",\"state\":\"CA\",\"elevation\":126}},{\"type\":\"Feature\",\"geometry\":{\"type\":\"Point\",\"coordinates\":[-87.90,41.97]},\"properties\":{\"station_id\":\"KORD\",\"name\":\"O Hare International\",\"state\":\"IL\",\"elevation\":672}}]}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/paginated-items", + "queryStringParameters": { + "after": [ + "cursor_page2" + ], + "limit": [ + "2" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":3,\"value\":\"page2-a\"},{\"id\":4,\"value\":\"page2-b\"}],\"cursor\":\"cursor_page3\"}" + } + }, + "priority": 10 + }, + { + "httpRequest": { + "method": "GET", + "path": "/paginated-items", + "queryStringParameters": { + "after": [ + "cursor_page3" + ], + "limit": [ + "2" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":5,\"value\":\"page3-a\"}]}" + } + }, + "priority": 10 + }, + { + "httpRequest": { + "method": "GET", + "path": "/paginated-items" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":1,\"value\":\"page1-a\"},{\"id\":2,\"value\":\"page1-b\"}],\"cursor\":\"cursor_page2\"}" + } + }, + "priority": 0 + }, + { + "httpRequest": { + "method": "GET", + "path": "/url-paginated", + "queryStringParameters": { + "page": [ + "2" + ], + "limit": [ + "2" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"items\":[{\"id\":3,\"label\":\"third\"}],\"pagination\":{\"next\":\"\"}}" + } + }, + "priority": 10 + }, + { + "httpRequest": { + "method": "GET", + "path": "/url-paginated" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"items\":[{\"id\":1,\"label\":\"first\"},{\"id\":2,\"label\":\"second\"}],\"pagination\":{\"next\":\"http://mockserver:1080/url-paginated?page=2&limit=2\"}}" + } + }, + "priority": 0 + }, + { + "httpRequest": { + "method": "GET", + "path": "/search", + "queryStringParameters": { + "category": [ + "electronics" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"name\":\"Laptop\",\"category\":\"electronics\"},{\"id\":2,\"name\":\"Phone\",\"category\":\"electronics\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/users/42/posts" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":101,\"title\":\"First Post\",\"body\":\"Hello world\"},{\"id\":102,\"title\":\"Second Post\",\"body\":\"More content\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/camel-case" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"firstName\":\"Alice\",\"lastName\":\"Smith\",\"emailAddress\":\"alice@example.com\"},{\"id\":2,\"firstName\":\"Bob\",\"lastName\":\"Jones\",\"emailAddress\":\"bob@example.com\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/not-found/999" + }, + "httpResponse": { + "statusCode": 404, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"error\":\"Not found\"}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/rate-limited" + }, + "httpResponse": { + "statusCode": 429, + "headers": { + "content-type": [ + "application/json" + ], + "Retry-After": [ + "1" + ] + }, + "body": { + "type": "JSON", + "json": "{\"error\":\"Too many requests\"}" + } + }, + "times": { + "remainingTimes": 1, + "unlimited": false + }, + "priority": 10 + }, + { + "httpRequest": { + "method": "GET", + "path": "/rate-limited" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"ok\"}]" + } + }, + "priority": 0 + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/bearer", + "headers": { + "Authorization": [ + "Bearer test-api-key-123" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"authenticated\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/custom-header", + "headers": { + "X-API-Key": [ + "custom-key-456" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"custom_auth\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/custom-prefix", + "headers": { + "Authorization": [ + "Token prefix-key-789" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"prefix_auth\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/token", + "headers": { + "Authorization": [ + "Bearer bearer-token-abc" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"bearer_auth\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/headers-json", + "headers": { + "X-Custom-One": [ + "value1" + ], + "X-Custom-Two": [ + "value2" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"headers_json\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/typed-data" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"name\":\"typed-row\",\"uuid_field\":\"550e8400-e29b-41d4-a716-446655440000\",\"score\":3.14,\"rating\":2.71828,\"count\":42,\"active\":true,\"created_date\":\"2024-01-15\",\"updated_at\":\"2024-06-15T10:30:00Z\",\"tags\":[\"a\",\"b\",\"c\"],\"metadata\":{\"key\":\"val\"},\"nullable_field\":null}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/singleton" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"id\":\"only-one\",\"name\":\"singleton\",\"version\":42}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/empty-results" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/records-wrapped" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"records\":[{\"id\":1,\"value\":\"rec-wrap\"},{\"id\":2,\"value\":\"rec-two\"}],\"total\":2}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/entries-wrapped" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"entries\":[{\"id\":1,\"label\":\"entry-1\"},{\"id\":2,\"label\":\"entry-2\"}]}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/results-wrapped" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"results\":[{\"id\":1,\"value\":\"res-wrap\"}],\"count\":1}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/pascal-case" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"Id\":1,\"Name\":\"PascalAlice\",\"Age\":30},{\"Id\":2,\"Name\":\"PascalBob\",\"Age\":25}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/has-more-items", + "queryStringParameters": { + "after": [ + "cursor_hasmore_2" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":2,\"value\":\"hm2\"}],\"has_more\":false}" + } + }, + "priority": 10 + }, + { + "httpRequest": { + "method": "GET", + "path": "/has-more-items" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":1,\"value\":\"hm1\"}],\"has_more\":true,\"next_cursor\":\"cursor_hasmore_2\"}" + } + }, + "priority": 0 + }, + { + "httpRequest": { + "method": "GET", + "path": "/relative-paged", + "queryStringParameters": { + "page": [ + "2" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":2,\"label\":\"rel2\"}]}" + } + }, + "priority": 10 + }, + { + "httpRequest": { + "method": "GET", + "path": "/relative-paged" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"data\":[{\"id\":1,\"label\":\"rel1\"}],\"next\":\"?page=2\"}" + } + }, + "priority": 0 + }, + { + "httpRequest": { + "method": "GET", + "path": "/orgs/acme/repos/widget/issues" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"title\":\"Bug report\",\"state\":\"open\"},{\"id\":2,\"title\":\"Feature request\",\"state\":\"closed\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/resources/res-42" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"id\":\"res-42\",\"name\":\"found-by-id\"}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/server-error" + }, + "httpResponse": { + "statusCode": 500, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"error\":\"Internal server error\"}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/invalid-json" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": "not valid json {{" + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/unix-timestamps" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"name\":\"epoch-row\",\"created_at\":1686816600,\"updated_at\":\"2024-06-15T10:30:00Z\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/acronym-fields" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"clusterIP\":\"10.0.0.1\",\"apiURL\":\"https://api.test.com\",\"htmlParser\":\"v2\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/mixed-types" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"count\":42,\"enabled\":true,\"ratio\":3.14,\"name\":\"test\"},{\"id\":2,\"count\":null,\"enabled\":null,\"ratio\":null,\"name\":null}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/jsonld-graph" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/ld+json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"@context\":{\"@version\":\"1.1\",\"wx\":\"https://api.weather.gov/ontology#\"},\"@graph\":[{\"@id\":\"urn:alert:1\",\"@type\":\"wx:Alert\",\"headline\":\"Storm warning\",\"severity\":\"Severe\"},{\"@id\":\"urn:alert:2\",\"@type\":\"wx:Alert\",\"headline\":\"Heat advisory\",\"severity\":\"Moderate\"}]}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/query-key", + "queryStringParameters": { + "api_key": [ + "query-key-123" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"query_auth\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/auth/cookie", + "headers": { + "cookie": [ + "session=cookie-key-abc" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"status\":\"cookie_auth\"}]" + } + } + }, + { + "httpRequest": { + "method": "POST", + "path": "/search" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"label\":\"found-via-post\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/jsonld-features" + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/geo+json" + ] + }, + "body": { + "type": "JSON", + "json": "{\"@context\":[\"https://geojson.org/geojson-ld/geojson-context.jsonld\"],\"type\":\"FeatureCollection\",\"features\":[{\"type\":\"Feature\",\"properties\":{\"@id\":\"https://api.weather.gov/stations/KDEN\",\"@type\":\"wx:ObservationStation\",\"stationIdentifier\":\"KDEN\",\"name\":\"Denver International\",\"timeZone\":\"America/Denver\"}},{\"type\":\"Feature\",\"properties\":{\"@id\":\"https://api.weather.gov/stations/KLAX\",\"@type\":\"wx:ObservationStation\",\"stationIdentifier\":\"KLAX\",\"name\":\"Los Angeles International\",\"timeZone\":\"America/Los_Angeles\"}}]}" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/config-test-custom", + "queryStringParameters": { + "limit": [ + "5" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"value\":\"custom-page-size\"}]" + } + } + }, + { + "httpRequest": { + "method": "GET", + "path": "/config-test-default", + "queryStringParameters": { + "limit": [ + "10" + ] + } + }, + "httpResponse": { + "statusCode": 200, + "headers": { + "content-type": [ + "application/json" + ] + }, + "body": { + "type": "JSON", + "json": "[{\"id\":1,\"value\":\"server-default-size\"}]" + } + } + } +] \ No newline at end of file diff --git a/wasm-wrappers/fdw/openapi_fdw/test/init.sql b/wasm-wrappers/fdw/openapi_fdw/test/init.sql new file mode 100644 index 00000000..168bcb66 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/init.sql @@ -0,0 +1,791 @@ +-- OpenAPI FDW integration test setup +-- This runs automatically on container startup. +-- Note: fdw_package_url uses file:// for local Docker testing. In production, use the +-- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm + +-- Create supabase_admin role if it doesn't exist (required by wrappers extension) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_admin') THEN + CREATE ROLE supabase_admin WITH SUPERUSER CREATEDB CREATEROLE LOGIN PASSWORD 'postgres'; + END IF; +END +$$; + +create schema if not exists extensions; +create extension if not exists wrappers with schema extensions; + +-- wasm_fdw functions live in the extensions schema +set search_path to public, extensions; + +create foreign data wrapper wasm_wrapper + handler wasm_fdw_handler + validator wasm_fdw_validator; + +-- NWS API server (no auth required, just needs User-Agent) +create server nws_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-test/0.2.0', + accept 'application/geo+json' + ); + +-- NWS API server WITH debug enabled for comparison +create server nws_server_debug + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-test/0.2.0', + accept 'application/geo+json', + debug 'true' + ); + +-- Weather stations (GeoJSON FeatureCollection → /features, nested /properties) +create foreign table nws_stations ( + station_identifier text, + name text, + time_zone text, + elevation jsonb, + attrs jsonb +) + server nws_server + options ( + endpoint '/stations', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier', + cursor_path '/pagination/next', + page_size '100', + page_size_param 'limit' + ); + +-- Same table with debug enabled +create foreign table nws_stations_debug ( + station_identifier text, + name text, + time_zone text, + elevation jsonb, + attrs jsonb +) + server nws_server_debug + options ( + endpoint '/stations', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier', + cursor_path '/pagination/next', + page_size '100', + page_size_param 'limit' + ); + +-- Active weather alerts +create foreign table nws_alerts ( + id text, + area_desc text, + severity text, + certainty text, + event text, + headline text, + description text, + onset text, + expires text, + attrs jsonb +) + server nws_server + options ( + endpoint '/alerts/active', + response_path '/features', + object_path '/properties', + rowid_column 'id' + ); + +-- Stations filtered by state (query parameter pushdown) +create foreign table nws_stations_by_state ( + station_identifier text, + name text, + state text, + time_zone text, + attrs jsonb +) + server nws_server_debug + options ( + endpoint '/stations', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier', + page_size '50', + page_size_param 'limit' + ); + +-- ============================================================ +-- Mock API server (deterministic local testing via MockServer) +-- ============================================================ + +create server mock_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + page_size '10', + page_size_param 'limit' + ); + +create server mock_server_paginated + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + page_size '2', + page_size_param 'limit', + cursor_param 'after' + ); + +-- Test 1: Basic JSON array response +create foreign table mock_items ( + id bigint, + name text, + price double precision, + in_stock boolean, + attrs jsonb +) + server mock_server + options ( + endpoint '/items', + rowid_column 'id' + ); + +-- Test 2: Wrapped response (auto-detect "data" key) +create foreign table mock_products ( + id text, + name text, + price double precision, + attrs jsonb +) + server mock_server + options ( + endpoint '/products', + rowid_column 'id' + ); + +-- Test 3: GeoJSON-style nested objects +create foreign table mock_stations ( + station_id text, + name text, + state text, + elevation bigint, + attrs jsonb +) + server mock_server + options ( + endpoint '/features', + response_path '/features', + object_path '/properties', + rowid_column 'station_id' + ); + +-- Test 4: Cursor pagination (token-based) +create foreign table mock_paginated_items ( + id bigint, + value text, + attrs jsonb +) + server mock_server_paginated + options ( + endpoint '/paginated-items', + rowid_column 'id', + cursor_path '/cursor' + ); + +-- Test 5: Cursor pagination (full URL — tests the bug fix) +create foreign table mock_url_paginated ( + id bigint, + label text, + attrs jsonb +) + server mock_server_paginated + options ( + endpoint '/url-paginated', + rowid_column 'id', + cursor_path '/pagination/next', + response_path '/items' + ); + +-- Test 6: Query parameter pushdown +create foreign table mock_search ( + id bigint, + name text, + category text, + attrs jsonb +) + server mock_server + options ( + endpoint '/search', + rowid_column 'id' + ); + +-- Test 7: Path parameter substitution +create foreign table mock_user_posts ( + id bigint, + user_id text, + title text, + body text, + attrs jsonb +) + server mock_server + options ( + endpoint '/users/{user_id}/posts', + rowid_column 'id' + ); + +-- Test 8: camelCase column matching +create foreign table mock_camel ( + id bigint, + first_name text, + last_name text, + email_address text, + attrs jsonb +) + server mock_server + options ( + endpoint '/camel-case', + rowid_column 'id' + ); + +-- Test 9: 404 returns empty (not error) +create foreign table mock_not_found ( + id text, + name text, + attrs jsonb +) + server mock_server + options ( + endpoint '/not-found', + rowid_column 'id' + ); + +-- Test 10: Rate limiting with 429 retry +create foreign table mock_rate_limited ( + id bigint, + status text, + attrs jsonb +) + server mock_server + options ( + endpoint '/rate-limited', + rowid_column 'id' + ); + +-- ============================================================ +-- Auth servers (each auth mode needs its own server) +-- ============================================================ + +-- Auth: API key with default Authorization: Bearer header +create server mock_auth_bearer_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + api_key 'test-api-key-123' + ); + +-- Auth: API key with custom header (X-API-Key) +create server mock_auth_custom_header_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + api_key 'custom-key-456', + api_key_header 'X-API-Key' + ); + +-- Auth: API key with custom prefix (Token) +create server mock_auth_prefix_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + api_key 'prefix-key-789', + api_key_prefix 'Token' + ); + +-- Auth: Bearer token (separate from api_key) +create server mock_auth_token_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + bearer_token 'bearer-token-abc' + ); + +-- Auth: API key in query parameter (Fix 6) +create server mock_auth_query_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + api_key 'query-key-123', + api_key_header 'api_key', + api_key_location 'query' + ); + +-- Auth: API key in cookie (Fix 13) +create server mock_auth_cookie_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + api_key 'cookie-key-abc', + api_key_header 'session', + api_key_location 'cookie' + ); + +-- Auth: Custom headers via JSON +create server mock_auth_headers_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + headers '{"X-Custom-One":"value1","X-Custom-Two":"value2"}' + ); + +-- Auth test tables (one per auth server) +create foreign table mock_auth_bearer_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_bearer_server + options ( + endpoint '/auth/bearer', + rowid_column 'id' + ); + +create foreign table mock_auth_custom_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_custom_header_server + options ( + endpoint '/auth/custom-header', + rowid_column 'id' + ); + +create foreign table mock_auth_prefix_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_prefix_server + options ( + endpoint '/auth/custom-prefix', + rowid_column 'id' + ); + +create foreign table mock_auth_token_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_token_server + options ( + endpoint '/auth/token', + rowid_column 'id' + ); + +create foreign table mock_auth_query_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_query_server + options ( + endpoint '/auth/query-key', + rowid_column 'id' + ); + +create foreign table mock_auth_cookie_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_cookie_server + options ( + endpoint '/auth/cookie', + rowid_column 'id' + ); + +create foreign table mock_auth_headers_data ( + id bigint, + status text, + attrs jsonb +) + server mock_auth_headers_server + options ( + endpoint '/auth/headers-json', + rowid_column 'id' + ); + +-- ============================================================ +-- Type coercion and data format tests +-- ============================================================ + +-- Type coercion: all supported PostgreSQL types +create foreign table mock_typed_data ( + id bigint, + name text, + uuid_field text, + score real, + rating double precision, + count integer, + active boolean, + created_date date, + updated_at timestamptz, + tags jsonb, + metadata jsonb, + nullable_field text, + attrs jsonb +) + server mock_server + options ( + endpoint '/typed-data', + rowid_column 'id' + ); + +-- Single object response (not wrapped in array) +create foreign table mock_singleton ( + id text, + name text, + version integer, + attrs jsonb +) + server mock_server + options ( + endpoint '/singleton', + rowid_column 'id' + ); + +-- Empty array response +create foreign table mock_empty ( + id text, + name text, + attrs jsonb +) + server mock_server + options ( + endpoint '/empty-results', + rowid_column 'id' + ); + +-- Records wrapper auto-detection +create foreign table mock_records ( + id bigint, + value text, + attrs jsonb +) + server mock_server + options ( + endpoint '/records-wrapped', + rowid_column 'id' + ); + +-- Entries wrapper auto-detection +create foreign table mock_entries ( + id bigint, + label text, + attrs jsonb +) + server mock_server + options ( + endpoint '/entries-wrapped', + rowid_column 'id' + ); + +-- Results wrapper auto-detection +create foreign table mock_results ( + id bigint, + value text, + attrs jsonb +) + server mock_server + options ( + endpoint '/results-wrapped', + rowid_column 'id' + ); + +-- ============================================================ +-- Column matching tests +-- ============================================================ + +-- PascalCase → case-insensitive matching +create foreign table mock_pascal ( + id bigint, + name text, + age integer, + attrs jsonb +) + server mock_server + options ( + endpoint '/pascal-case', + rowid_column 'id' + ); + +-- ============================================================ +-- Pagination edge case tests +-- ============================================================ + +-- has_more pattern (auto-detection, no explicit cursor_path) +create foreign table mock_has_more ( + id bigint, + value text, + attrs jsonb +) + server mock_server_paginated + options ( + endpoint '/has-more-items', + rowid_column 'id' + ); + +-- Relative URL pagination (query-only: "?page=2") +create foreign table mock_relative_paged ( + id bigint, + label text, + attrs jsonb +) + server mock_server + options ( + endpoint '/relative-paged', + rowid_column 'id' + ); + +-- ============================================================ +-- URL construction tests +-- ============================================================ + +-- Multiple path parameters +create foreign table mock_multi_path ( + id bigint, + title text, + state text, + org text, + repo text, + attrs jsonb +) + server mock_server + options ( + endpoint '/orgs/{org}/repos/{repo}/issues', + rowid_column 'id' + ); + +-- Rowid pushdown (WHERE id = 'x' → GET /resources/x) +create foreign table mock_resources ( + id text, + name text, + attrs jsonb +) + server mock_server + options ( + endpoint '/resources', + rowid_column 'id' + ); + +-- ============================================================ +-- Error handling tests +-- ============================================================ + +-- HTTP 500 error +create foreign table mock_server_error ( + id text, + name text, + attrs jsonb +) + server mock_server + options ( + endpoint '/server-error', + rowid_column 'id' + ); + +-- Invalid JSON response +create foreign table mock_invalid_json ( + id text, + name text, + attrs jsonb +) + server mock_server + options ( + endpoint '/invalid-json', + rowid_column 'id' + ); + +-- ============================================================ +-- Edge case tests: unix timestamps, acronyms, mixed types +-- ============================================================ + +-- Unix timestamps (Stripe-style epoch seconds in timestamptz columns) +create foreign table mock_unix_timestamps ( + id bigint, + name text, + created_at timestamptz, + updated_at timestamptz, + attrs jsonb +) + server mock_server + options ( + endpoint '/unix-timestamps', + rowid_column 'id' + ); + +-- Acronym field names (clusterIP, apiURL, htmlParser) +create foreign table mock_acronym_fields ( + id bigint, + cluster_ip text, + api_url text, + html_parser text, + attrs jsonb +) + server mock_server + options ( + endpoint '/acronym-fields', + rowid_column 'id' + ); + +-- Mixed types with nulls (testing null handling across types) +create foreign table mock_mixed_types ( + id bigint, + count integer, + enabled boolean, + ratio double precision, + name text, + attrs jsonb +) + server mock_server + options ( + endpoint '/mixed-types', + rowid_column 'id' + ); + +-- ============================================================ +-- JSON-LD tests (@-prefixed keys, @graph wrapper) +-- ============================================================ + +-- JSON-LD @graph wrapper (NWS alerts with application/ld+json) +create foreign table mock_jsonld_alerts ( + _id text, + _type text, + headline text, + severity text, + attrs jsonb +) + server mock_server + options ( + endpoint '/jsonld-graph', + rowid_column '_id' + ); + +-- JSON-LD GeoJSON with @-prefixed properties in nested objects +create foreign table mock_jsonld_stations ( + _id text, + _type text, + station_identifier text, + name text, + time_zone text, + attrs jsonb +) + server mock_server + options ( + endpoint '/jsonld-features', + response_path '/features', + object_path '/properties', + rowid_column 'station_identifier' + ); + +-- ============================================================ +-- POST-for-read test +-- ============================================================ + +-- POST method for search (data retrieval via POST) +create foreign table mock_search_post ( + id bigint, + label text, + attrs jsonb +) + server mock_server + options ( + endpoint '/search', + method 'POST', + rowid_column 'id' + ); + +-- ============================================================ +-- Bug regression tests: sticky config, re_scan +-- ============================================================ + +-- Bug 2: Table with custom page_size override (server default is 10) +-- Sends limit=5 — MockServer expectation requires exactly limit=5 +create foreign table mock_config_custom_page ( + id bigint, + value text, + attrs jsonb +) + server mock_server + options ( + endpoint '/config-test-custom', + rowid_column 'id', + page_size '5' + ); + +-- Bug 2: Table WITHOUT page_size override (should use server default 10) +-- Sends limit=10 — MockServer expectation requires exactly limit=10 +-- If sticky bug exists: would send limit=5 (leaked from previous scan) → no match → error +create foreign table mock_config_default_page ( + id bigint, + value text, + attrs jsonb +) + server mock_server + options ( + endpoint '/config-test-default', + rowid_column 'id' + ); + +-- ============================================================ +-- OpenAPI spec-driven server (comprehensive custom spec) +-- Tables are created dynamically via IMPORT FOREIGN SCHEMA in run.sh +-- ============================================================ + +create server mock_openapi_server + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'http://mockserver:1080', + spec_url 'http://mockserver:1080/openapi.json', + page_size '10', + page_size_param 'limit' + ); diff --git a/wasm-wrappers/fdw/openapi_fdw/test/mock-spec.json b/wasm-wrappers/fdw/openapi_fdw/test/mock-spec.json new file mode 100644 index 00000000..e1d3a2de --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/mock-spec.json @@ -0,0 +1,673 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "Edge Case Test API", + "description": "Comprehensive API spec for testing OpenAPI FDW edge cases: type mappings, $ref resolution, allOf/oneOf composition, parameterized paths, various response shapes.", + "version": "1.0.0" + }, + "servers": [ + { + "url": "https://{region}.api.example.com/v{version}", + "variables": { + "region": { + "default": "us-east-1" + }, + "version": { + "default": "1" + } + } + } + ], + "paths": { + "/typed-records": { + "get": { + "operationId": "getTypedRecords", + "summary": "Get records with all supported types", + "responses": { + "200": { + "description": "Array of typed records", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedRecord" + } + } + } + } + } + } + } + }, + "/composed-items": { + "get": { + "operationId": "getComposedItems", + "summary": "Get items using allOf composition (inheritance)", + "responses": { + "200": { + "description": "Array of composed items", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ComposedItem" + } + } + } + } + } + } + } + }, + "/polymorphic": { + "get": { + "operationId": "getPolymorphic", + "summary": "Get items using oneOf composition (polymorphism)", + "responses": { + "200": { + "description": "Array of polymorphic items", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/PolymorphicItem" + } + } + } + } + } + } + } + }, + "/singleton": { + "get": { + "operationId": "getSingleton", + "summary": "Get a single object (not array)", + "responses": { + "200": { + "description": "Single object response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Singleton" + } + } + } + } + } + } + }, + "/no-schema-endpoint": { + "get": { + "operationId": "getNoSchema", + "summary": "Endpoint with no response schema defined", + "responses": { + "200": { + "description": "Success with no content schema" + } + } + } + }, + "/created-response": { + "get": { + "operationId": "getCreatedResponse", + "summary": "Endpoint with only 201 response (no 200)", + "responses": { + "201": { + "description": "Created", + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + }, + "created": { + "type": "boolean" + } + } + } + } + } + } + } + } + }, + "/xml-only": { + "get": { + "operationId": "getXmlOnly", + "summary": "Endpoint with only XML content type", + "responses": { + "200": { + "description": "XML response", + "content": { + "application/xml": { + "schema": { + "type": "object", + "properties": { + "data": { + "type": "string" + } + } + } + } + } + } + } + } + }, + "/ref-response": { + "get": { + "operationId": "getRefResponse", + "summary": "Endpoint using $ref at the response level (Fix 1)", + "responses": { + "200": { + "$ref": "#/components/responses/ItemListResponse" + } + } + } + }, + "/wildcard-response": { + "get": { + "operationId": "getWildcardResponse", + "summary": "Endpoint using 2XX wildcard status code (Fix 2)", + "responses": { + "2XX": { + "description": "Any success", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "status": { + "type": "string" + } + } + } + } + } + } + } + } + } + }, + "/users-write-only": { + "get": { + "operationId": "getUsersWriteOnly", + "summary": "Endpoint with writeOnly properties that should be excluded (Fix 3)", + "responses": { + "200": { + "description": "User list", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/UserWithPassword" + } + } + } + } + } + } + } + }, + "/primitive-union": { + "get": { + "operationId": "getPrimitiveUnion", + "summary": "Endpoint with oneOf primitive types in a property (Fix 5)", + "responses": { + "200": { + "description": "Items with flexible value field", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FlexibleValue" + } + } + } + } + } + } + } + }, + "/charset-endpoint": { + "get": { + "operationId": "getCharsetEndpoint", + "summary": "Endpoint with charset in content-type (Fix 8)", + "responses": { + "200": { + "description": "JSON with charset", + "content": { + "application/json; charset=utf-8": { + "schema": { + "type": "array", + "items": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "label": { + "type": "string" + } + } + } + } + } + } + } + } + } + }, + "/ref-with-siblings": { + "get": { + "operationId": "getRefWithSiblings", + "summary": "Endpoint with $ref + sibling properties (Fix 12)", + "responses": { + "200": { + "description": "Items using $ref with extra properties", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ExtendedEntity" + } + } + } + } + } + } + } + }, + "/search": { + "post": { + "operationId": "searchItems", + "summary": "Search items via POST (POST-for-read)", + "responses": { + "200": { + "description": "Search results", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RefResponseItem" + } + } + } + } + } + } + } + }, + "/resources/{id}": { + "get": { + "operationId": "getResourceById", + "summary": "Get resource by ID (single path param, should be skipped by IMPORT)", + "parameters": [ + { + "name": "id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Single resource", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TypedRecord" + } + } + } + } + } + } + }, + "/orgs/{org}/repos/{repo}/issues": { + "get": { + "operationId": "getOrgRepoIssues", + "summary": "Get issues (multiple path params, should be skipped by IMPORT)", + "parameters": [ + { + "name": "org", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "repo", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Issues list", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Issue" + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "TypedRecord": { + "type": "object", + "required": [ + "id", + "name" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "name": { + "type": "string" + }, + "code": { + "type": "string", + "format": "uuid" + }, + "score": { + "type": "number", + "format": "float" + }, + "rating": { + "type": "number", + "format": "double" + }, + "count": { + "type": "integer", + "format": "int32" + }, + "active": { + "type": "boolean" + }, + "created_date": { + "type": "string", + "format": "date" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "address": { + "$ref": "#/components/schemas/Address" + }, + "nullable_field": { + "type": "string", + "nullable": true + }, + "created_epoch": { + "type": "integer", + "format": "unix-time" + }, + "multi_type_field": { + "description": "OpenAPI 3.1 multi-type array — should map to jsonb (Fix 4)", + "type": [ + "string", + "integer" + ] + } + } + }, + "Address": { + "type": "object", + "properties": { + "street": { + "type": "string" + }, + "city": { + "type": "string" + }, + "zip": { + "type": "string" + } + } + }, + "BaseEntity": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "created_at": { + "type": "string", + "format": "date-time" + } + } + }, + "ComposedItem": { + "allOf": [ + { + "$ref": "#/components/schemas/BaseEntity" + }, + { + "type": "object", + "required": [ + "title" + ], + "properties": { + "title": { + "type": "string" + }, + "priority": { + "type": "integer", + "format": "int32" + } + } + } + ] + }, + "PolymorphicItem": { + "oneOf": [ + { + "type": "object", + "properties": { + "user_id": { + "type": "string" + }, + "user_name": { + "type": "string" + } + } + }, + { + "type": "object", + "properties": { + "org_id": { + "type": "string" + }, + "org_name": { + "type": "string" + } + } + } + ] + }, + "Singleton": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "integer", + "format": "int32" + } + } + }, + "Issue": { + "type": "object", + "required": [ + "id", + "title" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "title": { + "type": "string" + }, + "state": { + "type": "string" + } + } + }, + "RefResponseItem": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "label": { + "type": "string" + } + } + }, + "UserWithPassword": { + "type": "object", + "required": [ + "id", + "username" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "username": { + "type": "string" + }, + "email": { + "type": "string" + }, + "password": { + "type": "string", + "writeOnly": true + }, + "password_hash": { + "type": "string", + "writeOnly": true + } + } + }, + "ExtendedEntity": { + "description": "Uses $ref with sibling properties (OpenAPI 3.1 pattern, Fix 12)", + "$ref": "#/components/schemas/BaseEntity", + "properties": { + "extra_field": { + "type": "string" + } + } + }, + "FlexibleValue": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "integer", + "format": "int64" + }, + "name": { + "type": "string" + }, + "value": { + "description": "Can be string or integer — oneOf primitives should map to jsonb", + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + } + } + } + }, + "responses": { + "ItemListResponse": { + "description": "A list of items (used via $ref from /ref-response)", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RefResponseItem" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/wasm-wrappers/fdw/openapi_fdw/test/run-examples.sh b/wasm-wrappers/fdw/openapi_fdw/test/run-examples.sh new file mode 100755 index 00000000..2bbfbcb5 --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/run-examples.sh @@ -0,0 +1,642 @@ +#!/usr/bin/env bash +# Unified test runner for examples. +# +# Usage: +# ./test/run-examples.sh Run all examples +# ./test/run-examples.sh nws Run a single example +# ./test/run-examples.sh nws pokeapi Run specific examples +# ./test/run-examples.sh --no-cleanup Keep container running after tests +# ./test/run-examples.sh nws --no-cleanup Run one example, keep it running +set -euo pipefail +cd "$(dirname "$0")/.." + +WASM_BIN="../target/wasm32-unknown-unknown/release/openapi_fdw.wasm" + +CLEANUP=true +EXAMPLES=() +ALL_EXAMPLES=(nws carapi pokeapi github threads) +COMPOSE="test/docker-compose.yml" + +while [[ $# -gt 0 ]]; do + case $1 in + --no-cleanup) CLEANUP=false; shift ;; + -h|--help) + echo "Usage: ./test/run-examples.sh [EXAMPLE...] [--no-cleanup]" + echo "" + echo "Examples: ${ALL_EXAMPLES[*]}" + echo "" + echo "Options:" + echo " --no-cleanup Keep Docker container running after tests" + echo "" + echo "Authenticated examples (github, threads) require tokens in test/.env." + echo "See test/.env.example for the template." + exit 0 + ;; + *) + EXAMPLES+=("$1"); shift ;; + esac +done + +if [ ${#EXAMPLES[@]} -eq 0 ]; then + EXAMPLES=("${ALL_EXAMPLES[@]}") +fi + +# Validate example names +for ex in "${EXAMPLES[@]}"; do + found=false + for valid in "${ALL_EXAMPLES[@]}"; do + if [ "$ex" = "$valid" ]; then found=true; break; fi + done + if [ "$found" = false ]; then + echo "ERROR: Unknown example '$ex'. Valid: ${ALL_EXAMPLES[*]}" + exit 1 + fi +done + +# Load env vars for authenticated examples +if [ -f "test/.env" ]; then + set -a + source test/.env + set +a +fi + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +PASS=0 +FAIL=0 + +psql_cmd() { + docker compose -f "$COMPOSE" exec -T -e PGPASSWORD=postgres db psql -U supabase_admin -d postgres -P pager=off "$@" +} + +run_test() { + local name="$1" sql="$2" expected="$3" + printf " %-40s " "$name" + local output + output=$(psql_cmd -c "$sql" 2>&1) || true + if echo "$output" | grep -q "$expected"; then + echo "PASS" + PASS=$((PASS + 1)) + else + echo "FAIL" + echo " Expected: $expected" + echo " Output: $(echo "$output" | head -5)" + FAIL=$((FAIL + 1)) + fi +} + +run_count_test() { + local name="$1" sql="$2" min_count="$3" + printf " %-40s " "$name" + local output + output=$(psql_cmd -t -c "$sql" 2>&1) || true + local count + count=$(echo "$output" | tr -d ' \n') + if [ "$count" -ge "$min_count" ] 2>/dev/null; then + echo "PASS ($count rows)" + PASS=$((PASS + 1)) + else + echo "FAIL (got $count, expected >= $min_count)" + FAIL=$((FAIL + 1)) + fi +} + +# --------------------------------------------------------------------------- +# Container lifecycle +# --------------------------------------------------------------------------- + +CONTAINER_STARTED=false + +cleanup() { + if [ "$CONTAINER_STARTED" = true ] && [ "$CLEANUP" = true ]; then + echo "==> Cleaning up..." + docker compose -f "$COMPOSE" down -v 2>/dev/null || true + elif [ "$CONTAINER_STARTED" = true ]; then + echo "" + echo "Container still running (--no-cleanup). To tear down manually:" + echo " docker compose -f $COMPOSE down -v" + fi +} +trap cleanup EXIT + +check_auth() { + case $1 in + github) + if [ -z "${GITHUB_TOKEN:-}" ]; then + echo "ERROR: GITHUB_TOKEN not set." + echo " cp test/.env.example test/.env # then add your token" + exit 1 + fi + ;; + threads) + if [ -z "${THREADS_ACCESS_TOKEN:-}" ]; then + echo "ERROR: THREADS_ACCESS_TOKEN not set." + echo " cp test/.env.example test/.env # then add your token" + exit 1 + fi + ;; + esac +} + +start_container() { + echo "==> Starting PostgreSQL..." + docker compose -f "$COMPOSE" down -v 2>/dev/null || true + docker compose -f "$COMPOSE" up -d + CONTAINER_STARTED=true + + echo "Waiting for PostgreSQL..." + for i in $(seq 1 60); do + if docker compose -f "$COMPOSE" exec -T db pg_isready -U supabase_admin > /dev/null 2>&1; then + echo "PostgreSQL ready after ${i}s" + break + fi + if [ "$i" -eq 60 ]; then + echo "ERROR: PostgreSQL failed to start" + exit 1 + fi + sleep 1 + done + sleep 3 + + echo "==> Copying WASM binary into container..." + local container + container=$(docker compose -f "$COMPOSE" ps -q db) + docker cp "$WASM_BIN" "$container":/openapi_fdw.wasm + docker compose -f "$COMPOSE" exec -T db chmod 644 /openapi_fdw.wasm +} + +# Drop all FDW objects so the next example starts clean +reset_fdw() { + psql_cmd -c "DROP FOREIGN DATA WRAPPER IF EXISTS wasm_wrapper CASCADE;" > /dev/null 2>&1 +} + +load_example() { + local name=$1 + + echo "==> Loading $name..." + reset_fdw + psql_cmd < "examples/$name/init.sql" > /dev/null 2>&1 + + case $name in + github) + psql_cmd -c " + ALTER SERVER github OPTIONS (SET api_key '${GITHUB_TOKEN}'); + ALTER SERVER github_debug OPTIONS (SET api_key '${GITHUB_TOKEN}'); + " > /dev/null 2>&1 + ;; + threads) + psql_cmd -c " + ALTER SERVER threads OPTIONS (SET api_key '${THREADS_ACCESS_TOKEN}'); + ALTER SERVER threads_debug OPTIONS (SET api_key '${THREADS_ACCESS_TOKEN}'); + ALTER SERVER threads_import OPTIONS (SET api_key '${THREADS_ACCESS_TOKEN}'); + " > /dev/null 2>&1 + ;; + esac +} + +# --------------------------------------------------------------------------- +# Verify functions — one per example +# --------------------------------------------------------------------------- + +verify_nws() { + echo "=== NWS Weather API ===" + echo "" + + echo "Stations (GeoJSON + pagination):" + run_test "Basic query" \ + "SELECT station_identifier, name, time_zone FROM stations LIMIT 3;" \ + "station_identifier" + run_count_test "Pagination (50+ rows fetched)" \ + "SELECT count(*) FROM (SELECT 1 FROM stations LIMIT 60) t;" \ + 51 + run_test "Lookup by rowid_column" \ + "SELECT station_identifier, name FROM stations WHERE station_identifier = 'KDEN';" \ + "KDEN" + + echo "" + echo "JSONB Columns:" + run_test "Elevation as jsonb" \ + "SELECT station_identifier, elevation->>'value' AS elev, elevation->>'unitCode' AS unit FROM stations LIMIT 3;" \ + "wmoUnit" + + echo "" + echo "camelCase Matching:" + run_test "stationIdentifier -> station_identifier" \ + "SELECT station_identifier FROM stations LIMIT 1;" \ + "station_identifier" + run_test "timeZone -> time_zone" \ + "SELECT time_zone FROM stations WHERE time_zone IS NOT NULL LIMIT 1;" \ + "(1 row)" + + echo "" + echo "Active Alerts (timestamptz coercion):" + run_test "Alerts with timestamps" \ + "SELECT event, severity, headline, onset FROM active_alerts LIMIT 3;" \ + "severity" + run_test "timestamptz format (onset)" \ + "SELECT onset FROM active_alerts WHERE onset IS NOT NULL LIMIT 1;" \ + "+00" + + echo "" + echo "Query Param Pushdown (severity=Severe):" + run_test "Filter by severity" \ + "SELECT event, severity, headline FROM active_alerts WHERE severity = 'Severe' LIMIT 3;" \ + "severity" + + echo "" + echo "Station Observations (path param):" + run_test "KDEN observations" \ + "SELECT timestamp, text_description, temperature->>'value' AS temp FROM station_observations WHERE station_id = 'KDEN' LIMIT 3;" \ + "text_description" + + echo "" + echo "Latest Observation (single object):" + run_test "Single row response" \ + "SELECT text_description, temperature->>'value' AS temp FROM latest_observation WHERE station_id = 'KDEN';" \ + "(1 row)" + + echo "" + echo "Point Metadata (composite path param):" + run_test "Denver coordinates" \ + "SELECT grid_id, grid_x, grid_y FROM point_metadata WHERE point = '39.7456,-104.9887';" \ + "BOU" + + echo "" + echo "Forecast (multi-path-param + nested response):" + run_test "Denver forecast" \ + "SELECT name, temperature, temperature_unit, is_daytime, short_forecast FROM forecast_periods WHERE wfo = 'BOU' AND x = '63' AND y = '62' LIMIT 3;" \ + "temperature_unit" + + echo "" + echo "Type Coercion:" + run_test "Boolean (is_daytime)" \ + "SELECT is_daytime FROM forecast_periods WHERE wfo = 'BOU' AND x = '63' AND y = '62' LIMIT 1;" \ + "t" + run_test "Integer (temperature)" \ + "SELECT temperature FROM forecast_periods WHERE wfo = 'BOU' AND x = '63' AND y = '62' LIMIT 1;" \ + "(1 row)" + + echo "" + echo "LIMIT Pushdown:" + run_count_test "LIMIT 3 returns exactly 3" \ + "SELECT count(*) FROM (SELECT 1 FROM stations LIMIT 3) t;" \ + 3 + + echo "" + echo "Debug Mode:" + run_test "HTTP request details" \ + "SELECT station_identifier FROM stations_debug LIMIT 1;" \ + "HTTP GET" + run_test "Scan statistics" \ + "SELECT station_identifier FROM stations_debug LIMIT 1;" \ + "Scan complete" + + echo "" + echo "IMPORT FOREIGN SCHEMA:" + psql_cmd -c "DROP SCHEMA IF EXISTS nws_verify CASCADE;" > /dev/null 2>&1 + psql_cmd -c "CREATE SCHEMA nws_verify;" > /dev/null 2>&1 + run_test "Auto-generate tables" \ + "IMPORT FOREIGN SCHEMA \"unused\" FROM SERVER nws_import INTO nws_verify;" \ + "IMPORT FOREIGN SCHEMA" + run_count_test "Generated tables" \ + "SELECT count(*) FROM information_schema.foreign_tables WHERE foreign_table_schema = 'nws_verify';" \ + 1 + psql_cmd -c "DROP SCHEMA nws_verify CASCADE;" > /dev/null 2>&1 + + echo "" + echo "Attrs catch-all column:" + run_test "Extra fields in attrs" \ + "SELECT station_identifier, attrs->>'county' AS county FROM stations LIMIT 3;" \ + "county" +} + +verify_carapi() { + echo "=== CarAPI ===" + echo "" + + echo "Makes (pagination + auto-detected wrapper):" + run_test "Basic query" \ + "SELECT id, name FROM makes LIMIT 5;" \ + "Acura" + run_count_test "Has makes" \ + "SELECT count(*) FROM (SELECT 1 FROM makes LIMIT 10) t;" \ + 5 + + echo "" + echo "Models (query pushdown):" + run_test "Toyota 2020 models" \ + "SELECT id, name, make FROM models WHERE make = 'Toyota' AND year = '2020' LIMIT 5;" \ + "Toyota" + + echo "" + echo "Trims (pricing + query pushdown):" + run_test "2020 Camry trims" \ + "SELECT trim, msrp, description FROM trims WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' LIMIT 3;" \ + "Sedan" + + echo "" + echo "Bodies (dimensions):" + run_test "2020 Camry body" \ + "SELECT type, doors, length, curb_weight FROM bodies WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' LIMIT 2;" \ + "Sedan" + + echo "" + echo "Engines (performance data):" + run_test "2020 Camry engines" \ + "SELECT engine_type, horsepower_hp, cylinders, transmission FROM engines WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' LIMIT 2;" \ + "horsepower_hp" + + echo "" + echo "Mileages (fuel economy):" + run_test "2020 Camry mileage" \ + "SELECT combined_mpg, epa_city_mpg, epa_highway_mpg, range_city FROM mileages WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' LIMIT 2;" \ + "combined_mpg" + + echo "" + echo "Exterior Colors (color data):" + run_test "2020 Camry colors" \ + "SELECT color, rgb FROM exterior_colors WHERE year = '2020' AND make = 'Toyota' AND model = 'Camry' LIMIT 3;" \ + "color" + + echo "" + echo "OBD Codes:" + run_test "Fetch codes" \ + "SELECT code, description FROM obd_codes LIMIT 5;" \ + "code" + + echo "" + echo "Debug Mode:" + run_test "HTTP request details" \ + "SELECT id FROM makes_debug LIMIT 1;" \ + "HTTP GET" +} + +verify_pokeapi() { + echo "=== PokeAPI ===" + echo "" + + echo "Pokemon List (offset-based pagination):" + run_test "Basic query" \ + "SELECT name, url FROM pokemon LIMIT 5;" \ + "name" + run_count_test "Pagination (20+ rows fetched)" \ + "SELECT count(*) FROM (SELECT 1 FROM pokemon LIMIT 25) t;" \ + 1 + + echo "" + echo "Pokemon Detail (path param):" + run_test "Pikachu lookup" \ + "SELECT id, name, height, weight, base_experience FROM pokemon_detail WHERE name = 'pikachu';" \ + "(1 row)" + + echo "" + echo "Types List:" + run_test "Basic query" \ + "SELECT name, url FROM types LIMIT 5;" \ + "name" + + echo "" + echo "Type Detail (path param):" + run_test "Fire type lookup" \ + "SELECT id, name FROM type_detail WHERE name = 'fire';" \ + "(1 row)" + + echo "" + echo "Berries List:" + run_test "Basic query" \ + "SELECT name, url FROM berries LIMIT 5;" \ + "name" + + echo "" + echo "Berry Detail (path param):" + run_test "Cheri berry lookup" \ + "SELECT id, name, growth_time, max_harvest FROM berry_detail WHERE name = 'cheri';" \ + "(1 row)" + + echo "" + echo "Debug Mode:" + run_test "HTTP request details" \ + "SELECT name FROM pokemon_debug LIMIT 1;" \ + "HTTP GET" +} + +verify_github() { + echo "=== GitHub API ===" + echo "" + + echo "My Profile (single object):" + run_test "Fetch profile" \ + "SELECT login, id, name FROM my_profile;" \ + "(1 row)" + run_test "Has login" \ + "SELECT login FROM my_profile;" \ + "login" + + echo "" + echo "My Repos (pagination):" + run_test "Basic query" \ + "SELECT id, name, language FROM my_repos LIMIT 5;" \ + "id" + run_count_test "Has repos" \ + "SELECT count(*) FROM (SELECT 1 FROM my_repos LIMIT 5) t;" \ + 1 + + echo "" + echo "Repo Detail (path params):" + run_test "Fetch supabase/wrappers" \ + "SELECT name, stargazers_count, language FROM repo_detail WHERE owner = 'supabase' AND repo = 'wrappers';" \ + "(1 row)" + + echo "" + echo "Repo Issues (path params + pagination):" + run_test "Fetch issues" \ + "SELECT number, title, state FROM repo_issues WHERE owner = 'supabase' AND repo = 'wrappers' LIMIT 5;" \ + "number" + + echo "" + echo "Repo Pulls (path params + state pushdown):" + run_test "Fetch closed PRs" \ + "SELECT number, title, state FROM repo_pulls WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'closed' LIMIT 5;" \ + "closed" + + echo "" + echo "Repo Releases (path params):" + run_test "Fetch releases" \ + "SELECT tag_name, name, prerelease FROM repo_releases WHERE owner = 'supabase' AND repo = 'wrappers' LIMIT 5;" \ + "tag_name" + + echo "" + echo "Search Repos (query pushdown):" + run_test "Search for repos" \ + "SELECT name, full_name, stargazers_count FROM search_repos WHERE q = 'openapi foreign data wrapper' LIMIT 5;" \ + "name" + + echo "" + echo "Debug Mode:" + run_test "HTTP request details" \ + "SELECT id FROM search_repos_debug WHERE q = 'supabase' LIMIT 1;" \ + "HTTP GET" +} + +verify_threads() { + echo "=== Threads API ===" + echo "" + + echo "My Profile (single object):" + run_test "Fetch profile" \ + "SELECT id, username, name FROM my_profile;" \ + "(1 row)" + run_test "Has username" \ + "SELECT username FROM my_profile;" \ + "username" + + echo "" + echo "My Threads (pagination + timestamptz):" + run_test "Basic query" \ + "SELECT id, text, media_type, timestamp FROM my_threads LIMIT 5;" \ + "id" + run_count_test "Has posts" \ + "SELECT count(*) FROM (SELECT 1 FROM my_threads LIMIT 5) t;" \ + 1 + + echo "" + echo "My Replies:" + run_test "Basic query" \ + "SELECT id, text, timestamp FROM my_replies LIMIT 5;" \ + "id" + + echo "" + echo "Thread Detail (path param):" + local thread_id + thread_id=$(psql_cmd -t -c "SELECT id FROM my_threads LIMIT 1;" 2>/dev/null | tr -d ' \n') + if [ -n "$thread_id" ]; then + run_test "Fetch by ID" \ + "SELECT id, text, media_type FROM thread_detail WHERE thread_id = '$thread_id';" \ + "(1 row)" + else + echo " SKIP (no threads found)" + fi + + echo "" + echo "Thread Replies (path param + pagination):" + if [ -n "$thread_id" ]; then + run_test "Fetch replies" \ + "SELECT id, text, username FROM thread_replies WHERE thread_id = '$thread_id' LIMIT 5;" \ + "id" + else + echo " SKIP (no threads found)" + fi + + echo "" + echo "Thread Conversation (all-depth replies):" + if [ -n "$thread_id" ]; then + run_test "Fetch conversation" \ + "SELECT id, text, username FROM thread_conversation WHERE thread_id = '$thread_id' LIMIT 5;" \ + "id" + else + echo " SKIP (no threads found)" + fi + + echo "" + echo "Keyword Search (query param pushdown):" + run_test "Search for 'threads'" \ + "SELECT id, text, username FROM keyword_search WHERE q = 'threads' LIMIT 5;" \ + "id" + + echo "" + echo "Profile Lookup (query param):" + printf " %-40s " "Look up @threads" + local pl_output + pl_output=$(psql_cmd -c "SELECT username, name, is_verified FROM profile_lookup WHERE username = 'threads';" 2>&1) || true + if echo "$pl_output" | grep -q "threads"; then + echo "PASS" + PASS=$((PASS + 1)) + elif echo "$pl_output" | grep -qi "error\|permission\|500"; then + echo "SKIP (permission not available)" + else + echo "FAIL" + echo " Expected: threads" + echo " Output: $(echo "$pl_output" | head -5)" + FAIL=$((FAIL + 1)) + fi + + echo "" + echo "Publishing Limit:" + run_test "Fetch quota" \ + "SELECT quota_usage, config FROM publishing_limit;" \ + "quota_usage" + + echo "" + echo "Debug Mode:" + run_test "HTTP request details" \ + "SELECT id FROM keyword_search_debug WHERE q = 'meta' LIMIT 1;" \ + "HTTP GET" + + echo "" + echo "IMPORT FOREIGN SCHEMA:" + psql_cmd -c "DROP SCHEMA IF EXISTS threads_auto CASCADE;" > /dev/null 2>&1 + psql_cmd -c "CREATE SCHEMA threads_auto;" > /dev/null 2>&1 + run_test "Auto-generate tables" \ + "IMPORT FOREIGN SCHEMA \"unused\" FROM SERVER threads_import INTO threads_auto;" \ + "IMPORT FOREIGN SCHEMA" + run_count_test "Generated tables" \ + "SELECT count(*) FROM information_schema.foreign_tables WHERE foreign_table_schema = 'threads_auto';" \ + 1 + psql_cmd -c "DROP SCHEMA threads_auto CASCADE;" > /dev/null 2>&1 + + echo "" + echo "Attrs catch-all column:" + run_test "Extra fields in attrs" \ + "SELECT id, attrs->>'media_product_type' AS product_type FROM my_threads LIMIT 3;" \ + "THREADS" +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +echo "==> Building WASM binary..." +make build +chmod +r "$WASM_BIN" + +start_container + +EXAMPLE_PASS=0 +EXAMPLE_FAIL=0 + +for example in "${EXAMPLES[@]}"; do + check_auth "$example" + + echo "" + echo "============================================" + echo " Example: $example" + echo "============================================" + echo "" + + load_example "$example" + + echo "" + PASS=0 + FAIL=0 + "verify_$example" + + echo "" + echo "--------------------------------------------" + echo " $example: $PASS passed, $FAIL failed" + echo "--------------------------------------------" + + if [ "$FAIL" -eq 0 ]; then + EXAMPLE_PASS=$((EXAMPLE_PASS + 1)) + else + EXAMPLE_FAIL=$((EXAMPLE_FAIL + 1)) + fi +done + +echo "" +echo "============================================" +echo " Examples: $((EXAMPLE_PASS + EXAMPLE_FAIL)) run, $EXAMPLE_PASS passed, $EXAMPLE_FAIL failed" +echo "============================================" + +[ "$EXAMPLE_FAIL" -eq 0 ] diff --git a/wasm-wrappers/fdw/openapi_fdw/test/run.sh b/wasm-wrappers/fdw/openapi_fdw/test/run.sh new file mode 100755 index 00000000..5e64d62e --- /dev/null +++ b/wasm-wrappers/fdw/openapi_fdw/test/run.sh @@ -0,0 +1,761 @@ +#!/usr/bin/env bash +# Integration test runner for OpenAPI FDW +# Uses MockServer for deterministic testing (no network dependency). +# Set RUN_NWS_TESTS=1 to also run live NWS API tests. +set -euo pipefail + +cd "$(dirname "$0")/.." + +WASM_BIN="../target/wasm32-unknown-unknown/release/openapi_fdw.wasm" + +PASS=0 +FAIL=0 + +psql_cmd() { + docker compose -f test/docker-compose.yml exec -T -e PGPASSWORD="${POSTGRES_PASSWORD:-postgres}" db psql -U postgres -P pager=off "$@" +} + +# Run a query, check output contains expected substring +run_test() { + local test_name="$1" + local sql="$2" + local expected="$3" + + echo "" + echo "--- $test_name ---" + local output + output=$(psql_cmd -c "$sql" 2>&1) || true + echo "$output" + + if echo "$output" | grep -q "$expected"; then + echo "PASS" + PASS=$((PASS + 1)) + else + echo "FAIL — expected to find: $expected" + FAIL=$((FAIL + 1)) + fi +} + +# Run a count query, check exact row count +run_count_test() { + local test_name="$1" + local sql="$2" + local expected_count="$3" + + echo "" + echo "--- $test_name ---" + local output + output=$(psql_cmd -t -c "$sql" 2>&1) || true + local count + count=$(echo "$output" | tr -d ' \n') + echo "count=$count" + + if [ "$count" = "$expected_count" ]; then + echo "PASS" + PASS=$((PASS + 1)) + else + echo "FAIL — expected $expected_count, got $count" + FAIL=$((FAIL + 1)) + fi +} + +# Run a query, check output contains expected ERROR substring +run_error_test() { + local test_name="$1" + local sql="$2" + local expected="$3" + + echo "" + echo "--- $test_name ---" + local output + output=$(psql_cmd -c "$sql" 2>&1) || true + echo "$output" + + if echo "$output" | grep -qi "$expected"; then + echo "PASS" + PASS=$((PASS + 1)) + else + echo "FAIL — expected error containing: $expected" + FAIL=$((FAIL + 1)) + fi +} + +# ---- Build ---- + +echo "=== Building WASM binary ===" +make build +chmod +r "$WASM_BIN" + +# ---- Start services ---- + +echo "" +echo "=== Starting services ===" +docker compose -f test/docker-compose.yml down -v 2>/dev/null || true +docker compose -f test/docker-compose.yml up -d + +echo "Waiting for MockServer..." +for i in $(seq 1 180); do + if curl -sf --max-time 2 -X PUT http://localhost:1080/mockserver/status > /dev/null 2>&1; then + echo "MockServer ready after ${i}s" + break + fi + sleep 1 +done +if ! curl -sf --max-time 2 -X PUT http://localhost:1080/mockserver/status > /dev/null 2>&1; then + echo "ERROR: MockServer failed to start within 180s" + docker compose -f test/docker-compose.yml logs mockserver + exit 1 +fi + +echo "Loading MockServer expectations..." +curl -sf -X PUT http://localhost:1080/mockserver/expectation -d @test/expectations.json > /dev/null +echo "MockServer expectations loaded" + +echo "Waiting for Postgres..." +for i in $(seq 1 120); do + if docker compose -f test/docker-compose.yml exec -T db pg_isready -U supabase_admin > /dev/null 2>&1; then + echo "Postgres ready after ${i}s" + break + fi + sleep 1 +done +if ! docker compose -f test/docker-compose.yml exec -T db pg_isready -U supabase_admin > /dev/null 2>&1; then + echo "ERROR: Postgres failed to start within 120s" + docker compose -f test/docker-compose.yml logs db + exit 1 +fi +sleep 3 # wait for init scripts + +echo "" +echo "=== Copying WASM binary into container ===" +container=$(docker compose -f test/docker-compose.yml ps -q db) +docker cp "$WASM_BIN" "$container":/openapi_fdw.wasm +docker compose -f test/docker-compose.yml exec -T db chmod 644 /openapi_fdw.wasm + +echo "" +echo "=== Loading custom spec into MockServer ===" +# Serve the spec at /openapi.json for the FDW's spec_url +python3 -c " +import json, sys +spec = json.load(open('test/mock-spec.json')) +exp = { + 'httpRequest': {'method': 'GET', 'path': '/openapi.json'}, + 'httpResponse': { + 'statusCode': 200, + 'headers': {'content-type': ['application/json']}, + 'body': json.dumps(spec) + } +} +sys.stdout.write(json.dumps(exp)) +" | curl -sf -X PUT http://localhost:1080/mockserver/expectation -d @- > /dev/null +echo "Spec served at /openapi.json" + +# Load spec into MockServer OpenAPI mode (auto-generates endpoint responses) +# May fail if MockServer rejects advanced OpenAPI features (e.g., 2XX wildcards) — non-fatal +if curl -sf -X PUT http://localhost:1080/mockserver/openapi \ + -d '{"specUrlOrPayload":"file:/config/mock-spec.json"}' > /dev/null 2>&1; then + echo "OpenAPI expectations loaded" +else + echo "Warning: MockServer OpenAPI auto-generation failed (expected with 2XX/3.1 features)" + echo "IMPORT FOREIGN SCHEMA tests still work via /openapi.json endpoint" +fi + +# ---- Mock API Tests: Core ---- + +echo "" +echo "=========================================" +echo " Mock API Tests — Core" +echo "=========================================" + +run_test "Test 1: Basic JSON array" \ + "SELECT id, name, price, in_stock FROM mock_items;" \ + "Widget" + +run_count_test "Test 1b: Row count" \ + "SELECT count(*) FROM mock_items;" \ + "3" + +run_test "Test 2: Wrapped response (auto-detect 'data' key)" \ + "SELECT id, name, price FROM mock_products;" \ + "Laptop" + +run_count_test "Test 2b: Row count" \ + "SELECT count(*) FROM mock_products;" \ + "2" + +run_test "Test 3: GeoJSON nested objects" \ + "SELECT station_id, name, state, elevation FROM mock_stations;" \ + "Denver International" + +run_count_test "Test 3b: Row count" \ + "SELECT count(*) FROM mock_stations;" \ + "3" + +run_count_test "Test 4: Cursor pagination (token) — all pages" \ + "SELECT count(*) FROM mock_paginated_items;" \ + "5" + +run_test "Test 4b: Verify last page data" \ + "SELECT id, value FROM mock_paginated_items;" \ + "page3-a" + +run_count_test "Test 5: URL-based pagination — all pages" \ + "SELECT count(*) FROM mock_url_paginated;" \ + "3" + +run_test "Test 5b: Verify page 2 data" \ + "SELECT id, label FROM mock_url_paginated;" \ + "third" + +run_test "Test 6: Query parameter pushdown" \ + "SELECT id, name, category FROM mock_search WHERE category = 'electronics';" \ + "Phone" + +run_count_test "Test 6b: Row count" \ + "SELECT count(*) FROM mock_search WHERE category = 'electronics';" \ + "2" + +run_test "Test 7: Path parameter substitution" \ + "SELECT id, title FROM mock_user_posts WHERE user_id = '42';" \ + "First Post" + +run_count_test "Test 7b: Row count" \ + "SELECT count(*) FROM mock_user_posts WHERE user_id = '42';" \ + "2" + +run_test "Test 8: camelCase column matching" \ + "SELECT first_name, last_name, email_address FROM mock_camel;" \ + "Alice" + +run_test "Test 8b: Second row" \ + "SELECT first_name, last_name FROM mock_camel;" \ + "Bob" + +run_count_test "Test 9: 404 returns empty" \ + "SELECT count(*) FROM mock_not_found WHERE id = '999';" \ + "0" + +run_test "Test 10: Rate limiting 429 retry" \ + "SELECT id, status FROM mock_rate_limited;" \ + "ok" + +# ---- Auth Tests ---- + +echo "" +echo "=========================================" +echo " Auth Tests" +echo "=========================================" + +run_test "Test 11: API key (default Authorization: Bearer)" \ + "SELECT status FROM mock_auth_bearer_data;" \ + "authenticated" + +run_test "Test 12: API key (custom header X-API-Key)" \ + "SELECT status FROM mock_auth_custom_data;" \ + "custom_auth" + +run_test "Test 13: API key (custom prefix Token)" \ + "SELECT status FROM mock_auth_prefix_data;" \ + "prefix_auth" + +run_test "Test 14: Bearer token" \ + "SELECT status FROM mock_auth_token_data;" \ + "bearer_auth" + +run_test "Test 15: Custom headers JSON" \ + "SELECT status FROM mock_auth_headers_data;" \ + "headers_json" + +run_test "Test 15b: API key in query parameter (Fix 6)" \ + "SELECT status FROM mock_auth_query_data;" \ + "query_auth" + +run_test "Test 15c: API key in cookie (Fix 13)" \ + "SELECT status FROM mock_auth_cookie_data;" \ + "cookie_auth" + +# ---- Type Coercion & Data Format Tests ---- + +echo "" +echo "=========================================" +echo " Type Coercion & Data Format Tests" +echo "=========================================" + +run_test "Test 16: Type coercion — text, bool, int" \ + "SELECT name, active, count FROM mock_typed_data;" \ + "typed-row" + +run_test "Test 16b: Date parsing" \ + "SELECT created_date FROM mock_typed_data;" \ + "2024-01-15" + +run_test "Test 16c: Timestamp parsing" \ + "SELECT updated_at FROM mock_typed_data;" \ + "2024-06-15" + +run_count_test "Test 16d: Null handling" \ + "SELECT count(*) FROM mock_typed_data WHERE nullable_field IS NULL;" \ + "1" + +run_test "Test 16e: UUID as text" \ + "SELECT uuid_field FROM mock_typed_data;" \ + "550e8400" + +run_test "Test 16f: JSONB array column" \ + "SELECT tags FROM mock_typed_data;" \ + "a" + +run_test "Test 16g: Float (real) column" \ + "SELECT score FROM mock_typed_data;" \ + "3.14" + +run_test "Test 17: Single object response" \ + "SELECT id, name, version FROM mock_singleton;" \ + "singleton" + +run_count_test "Test 17b: Singleton row count" \ + "SELECT count(*) FROM mock_singleton;" \ + "1" + +run_count_test "Test 18: Empty array response" \ + "SELECT count(*) FROM mock_empty;" \ + "0" + +run_test "Test 19: Records wrapper auto-detect" \ + "SELECT id, value FROM mock_records;" \ + "rec-wrap" + +run_count_test "Test 19b: Records row count" \ + "SELECT count(*) FROM mock_records;" \ + "2" + +run_test "Test 20: Entries wrapper auto-detect" \ + "SELECT id, label FROM mock_entries;" \ + "entry-1" + +run_count_test "Test 20b: Entries row count" \ + "SELECT count(*) FROM mock_entries;" \ + "2" + +run_test "Test 21: Results wrapper auto-detect" \ + "SELECT id, value FROM mock_results;" \ + "res-wrap" + +run_count_test "Test 21b: Results row count" \ + "SELECT count(*) FROM mock_results;" \ + "1" + +# ---- Column Matching Tests ---- + +echo "" +echo "=========================================" +echo " Column Matching Tests" +echo "=========================================" + +run_test "Test 22: PascalCase → case-insensitive match" \ + "SELECT id, name, age FROM mock_pascal;" \ + "PascalAlice" + +run_count_test "Test 22b: PascalCase row count" \ + "SELECT count(*) FROM mock_pascal;" \ + "2" + +# ---- Pagination Edge Cases ---- + +echo "" +echo "=========================================" +echo " Pagination Edge Cases" +echo "=========================================" + +run_count_test "Test 23: has_more pattern — all pages" \ + "SELECT count(*) FROM mock_has_more;" \ + "2" + +run_test "Test 23b: has_more data verification" \ + "SELECT id, value FROM mock_has_more;" \ + "hm2" + +run_count_test "Test 24: Relative URL pagination — all pages" \ + "SELECT count(*) FROM mock_relative_paged;" \ + "2" + +run_test "Test 24b: Relative URL data verification" \ + "SELECT id, label FROM mock_relative_paged;" \ + "rel2" + +# Table-level page_size isolation: query table with override first, then server default. +# MockServer expectations require exact limit= values — if config leaks between scans, +# the second query gets no matching expectation and fails. +run_test "Test 47: Table-level page_size override (limit=5)" \ + "SELECT id, value FROM mock_config_custom_page;" \ + "custom-page-size" + +run_test "Test 48: Server default page_size restored after override (limit=10)" \ + "SELECT id, value FROM mock_config_default_page;" \ + "server-default-size" + +# ---- URL Construction Tests ---- + +echo "" +echo "=========================================" +echo " URL Construction Tests" +echo "=========================================" + +run_test "Test 25: Multiple path parameters" \ + "SELECT id, title FROM mock_multi_path WHERE org = 'acme' AND repo = 'widget';" \ + "Bug report" + +run_count_test "Test 25b: Multi-path row count" \ + "SELECT count(*) FROM mock_multi_path WHERE org = 'acme' AND repo = 'widget';" \ + "2" + +run_test "Test 26: Rowid pushdown (single resource)" \ + "SELECT id, name FROM mock_resources WHERE id = 'res-42';" \ + "found-by-id" + +run_count_test "Test 26b: Rowid pushdown count" \ + "SELECT count(*) FROM mock_resources WHERE id = 'res-42';" \ + "1" + +# ---- Error Handling Tests ---- + +echo "" +echo "=========================================" +echo " Error Handling Tests" +echo "=========================================" + +run_error_test "Test 27: HTTP 500 error" \ + "SELECT * FROM mock_server_error;" \ + "error" + +run_error_test "Test 28: Invalid JSON response" \ + "SELECT * FROM mock_invalid_json;" \ + "error" + +# ---- Edge Case Tests ---- + +echo "" +echo "=========================================" +echo " Edge Case Tests" +echo "=========================================" + +run_test "Test 33: Unix timestamp → timestamptz" \ + "SELECT id, name, created_at FROM mock_unix_timestamps;" \ + "epoch-row" + +run_test "Test 33b: Unix timestamp parsed as date" \ + "SELECT created_at FROM mock_unix_timestamps;" \ + "2023" + +run_test "Test 33c: String datetime still works alongside unix" \ + "SELECT updated_at FROM mock_unix_timestamps;" \ + "2024-06-15" + +run_test "Test 34: Acronym field names (clusterIP → cluster_ip)" \ + "SELECT cluster_ip, api_url, html_parser FROM mock_acronym_fields;" \ + "10.0.0.1" + +run_test "Test 34b: API URL acronym" \ + "SELECT api_url FROM mock_acronym_fields;" \ + "https://api.test.com" + +run_count_test "Test 35: Mixed types with nulls — row count" \ + "SELECT count(*) FROM mock_mixed_types;" \ + "2" + +run_count_test "Test 35b: Null handling — count non-null names" \ + "SELECT count(*) FROM mock_mixed_types WHERE name IS NOT NULL;" \ + "1" + +run_count_test "Test 35c: Null handling — count null booleans" \ + "SELECT count(*) FROM mock_mixed_types WHERE enabled IS NULL;" \ + "1" + +# ---- POST-for-Read Tests ---- + +echo "" +echo "=========================================" +echo " POST-for-Read Tests" +echo "=========================================" + +run_test "Test 45: POST-for-read — query data via POST" \ + "SELECT id, label FROM mock_search_post;" \ + "found-via-post" + +run_count_test "Test 45b: POST-for-read — row count" \ + "SELECT count(*) FROM mock_search_post;" \ + "1" + +# ---- JSON-LD Tests ---- + +echo "" +echo "=========================================" +echo " JSON-LD Tests" +echo "=========================================" + +run_count_test "Test 36: @graph wrapper auto-detection" \ + "SELECT count(*) FROM mock_jsonld_alerts;" \ + "2" + +run_test "Test 36b: @-prefixed key matching (_id → @id)" \ + "SELECT _id, headline FROM mock_jsonld_alerts;" \ + "urn:alert:1" + +run_test "Test 36c: @type key matching" \ + "SELECT _type FROM mock_jsonld_alerts;" \ + "wx:Alert" + +run_test "Test 36d: Non-@ column alongside @-columns" \ + "SELECT headline, severity FROM mock_jsonld_alerts;" \ + "Storm warning" + +run_count_test "Test 37: JSON-LD GeoJSON with @-keys in properties" \ + "SELECT count(*) FROM mock_jsonld_stations;" \ + "2" + +run_test "Test 37b: @id in nested object_path" \ + "SELECT _id FROM mock_jsonld_stations;" \ + "api.weather.gov" + +run_test "Test 37c: camelCase + @-keys together" \ + "SELECT station_identifier, name, time_zone FROM mock_jsonld_stations;" \ + "Denver International" + +# ---- IMPORT FOREIGN SCHEMA Tests ---- + +echo "" +echo "=========================================" +echo " IMPORT FOREIGN SCHEMA Tests" +echo "=========================================" + +# Grant permissions for IMPORT FOREIGN SCHEMA +psql_cmd -U supabase_admin -d postgres \ + -c "GRANT USAGE ON FOREIGN SERVER mock_openapi_server TO postgres;" \ + -c "GRANT ALL ON SCHEMA public TO postgres;" > /dev/null 2>&1 + +run_test "Test 29: IMPORT FOREIGN SCHEMA from custom spec" \ + "IMPORT FOREIGN SCHEMA \"openapi\" FROM SERVER mock_openapi_server INTO public;" \ + "IMPORT FOREIGN SCHEMA" + +run_count_test "Test 29b: Correct number of tables imported (no parameterized paths)" \ + "SELECT count(*) FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server';" \ + "14" + +run_test "Test 29c: typed_records table imported" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' ORDER BY foreign_table_name;" \ + "typed_records" + +run_test "Test 29d: composed_items table imported (allOf)" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' ORDER BY foreign_table_name;" \ + "composed_items" + +run_test "Test 29e: Parameterized paths excluded" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' ORDER BY foreign_table_name;" \ + "singleton" + +# Verify type mappings in typed_records +run_test "Test 30: Type mapping — date column" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'created_date';" \ + "date" + +run_test "Test 30b: Type mapping — timestamptz column" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'updated_at';" \ + "timestamp with time zone" + +run_test "Test 30c: Type mapping — integer (int32) column" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'count';" \ + "integer" + +run_test "Test 30d: Type mapping — real (float) column" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'score';" \ + "real" + +run_test "Test 30e: Type mapping — boolean column" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'active';" \ + "boolean" + +run_test "Test 30f: Type mapping — jsonb (array) column" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'tags';" \ + "jsonb" + +# Verify allOf merged properties in composed_items +run_test "Test 31: allOf — has base property (created_at)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'composed_items' AND column_name = 'created_at';" \ + "created_at" + +run_test "Test 31b: allOf — has extended property (title)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'composed_items' AND column_name = 'title';" \ + "title" + +run_test "Test 31c: allOf — has extended property (priority)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'composed_items' AND column_name = 'priority';" \ + "priority" + +# Verify oneOf merged properties in polymorphic (all nullable) +run_test "Test 32: oneOf — has variant 1 property" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'polymorphic' AND column_name = 'user_id';" \ + "user_id" + +run_test "Test 32b: oneOf — has variant 2 property" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'polymorphic' AND column_name = 'org_id';" \ + "org_id" + +run_test "Test 32c: unix-time format maps to timestamptz" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'created_epoch';" \ + "timestamp with time zone" + +# ---- Fix 4: Multi-type arrays → jsonb ---- + +run_test "Test 32d: Multi-type array [\"string\",\"integer\"] → jsonb (Fix 4)" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'multi_type_field';" \ + "jsonb" + +# ---- Fix 1: $ref in Response objects ---- + +run_test "Test 38: ref_response table imported (\$ref resolution)" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'ref_response';" \ + "ref_response" + +run_test "Test 38b: \$ref response — has label column" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'ref_response' AND column_name = 'label';" \ + "label" + +run_test "Test 38c: \$ref response — has id column" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'ref_response' AND column_name = 'id';" \ + "id" + +# ---- Fix 2: 2XX wildcard status codes ---- + +run_test "Test 39: wildcard_response table imported (2XX)" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'wildcard_response';" \ + "wildcard_response" + +run_test "Test 39b: 2XX wildcard — has status column" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'wildcard_response' AND column_name = 'status';" \ + "status" + +# ---- Fix 3: writeOnly properties filtered ---- + +run_test "Test 40: users_write_only table imported" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'users_write_only';" \ + "users_write_only" + +run_test "Test 40b: writeOnly — has username (non-writeOnly)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'users_write_only' AND column_name = 'username';" \ + "username" + +run_test "Test 40c: writeOnly — has email (non-writeOnly)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'users_write_only' AND column_name = 'email';" \ + "email" + +run_count_test "Test 40d: writeOnly — password excluded" \ + "SELECT count(*) FROM information_schema.columns WHERE table_name = 'users_write_only' AND column_name = 'password';" \ + "0" + +run_count_test "Test 40e: writeOnly — password_hash excluded" \ + "SELECT count(*) FROM information_schema.columns WHERE table_name = 'users_write_only' AND column_name = 'password_hash';" \ + "0" + +# ---- Fix 5: Primitive oneOf composition → jsonb ---- + +run_test "Test 41: primitive_union table imported" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'primitive_union';" \ + "primitive_union" + +run_test "Test 41b: Primitive oneOf — value column is jsonb" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'primitive_union' AND column_name = 'value';" \ + "jsonb" + +# ---- Fix 8: Content-type with charset ---- + +run_test "Test 42: charset_endpoint table imported (Fix 8)" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'charset_endpoint';" \ + "charset_endpoint" + +run_test "Test 42b: charset endpoint — has id column" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'charset_endpoint' AND column_name = 'id';" \ + "id" + +run_test "Test 42c: charset endpoint — has label column" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'charset_endpoint' AND column_name = 'label';" \ + "label" + +# ---- Fix 11: uuid format maps to uuid PG type ---- + +run_test "Test 43: uuid format → uuid type (Fix 11)" \ + "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'typed_records' AND column_name = 'code';" \ + "uuid" + +# ---- Fix 12: \$ref with sibling properties ---- + +run_test "Test 44: ref_with_siblings table imported (Fix 12)" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'ref_with_siblings';" \ + "ref_with_siblings" + +run_test "Test 44b: \$ref sibling — has base property (id from BaseEntity)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'ref_with_siblings' AND column_name = 'id';" \ + "id" + +run_test "Test 44c: \$ref sibling — has extra_field (from sibling properties)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'ref_with_siblings' AND column_name = 'extra_field';" \ + "extra_field" + +# ---- POST-for-Read IMPORT Tests ---- + +run_test "Test 46: POST table imported (search_post)" \ + "SELECT foreign_table_name FROM information_schema.foreign_tables WHERE foreign_server_name = 'mock_openapi_server' AND foreign_table_name = 'search_post';" \ + "search_post" + +run_test "Test 46b: POST table has method option" \ + "SELECT ftoptions::text FROM pg_foreign_table WHERE ftrelid = 'search_post'::regclass;" \ + "method=POST" + +run_test "Test 46c: POST table has correct columns (id)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'search_post' AND column_name = 'id';" \ + "id" + +run_test "Test 46d: POST table has correct columns (label)" \ + "SELECT column_name FROM information_schema.columns WHERE table_name = 'search_post' AND column_name = 'label';" \ + "label" + +# ---- Summary ---- + +echo "" +echo "=========================================" +echo " Results: $PASS passed, $FAIL failed" +echo "=========================================" + +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi + +# ---- Optional: NWS API Tests ---- + +if [ "${RUN_NWS_TESTS:-}" = "1" ]; then + echo "" + echo "=========================================" + echo " NWS API Tests (network-dependent)" + echo "=========================================" + + run_test "NWS: Basic stations query" \ + "SELECT station_identifier, name, time_zone FROM nws_stations LIMIT 5;" \ + "America/" + + run_test "NWS: Debug timing" \ + "SELECT station_identifier, name FROM nws_stations_debug LIMIT 5;" \ + "HTTP fetch" + + run_test "NWS: Active weather alerts" \ + "SELECT id, severity, event FROM nws_alerts LIMIT 5;" \ + "urn:oid" + + echo "" + echo "=========================================" + echo " NWS Results: $PASS passed, $FAIL failed" + echo "=========================================" +fi + +echo "" +echo "=== Done! ===" +echo "Connect manually: psql -h localhost -p 54322 -U postgres" +echo "Tear down: docker compose -f test/docker-compose.yml down -v" diff --git a/wasm-wrappers/fdw/openapi_fdw/wit/world.wit b/wasm-wrappers/fdw/openapi_fdw/wit/world.wit index 22e6baff..b0f0ffa4 100644 --- a/wasm-wrappers/fdw/openapi_fdw/wit/world.wit +++ b/wasm-wrappers/fdw/openapi_fdw/wit/world.wit @@ -1,8 +1,7 @@ -package supabase:openapi-fdw@0.1.4; +package supabase:openapi-fdw@0.2.0; world openapi { import supabase:wrappers/http@0.2.0; - import supabase:wrappers/jwt@0.2.0; import supabase:wrappers/stats@0.2.0; import supabase:wrappers/time@0.2.0; import supabase:wrappers/utils@0.2.0; From 2781b96ee716c0735d9ce802ec2058680123304c Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Mon, 16 Feb 2026 20:17:30 -0600 Subject: [PATCH 2/9] docs(openapi): rewrite README and fix catalog docs Rewrite README with clearer features list, honest performance section comparing FDW vs pg_http (DX tradeoff with SQL examples and end-to-end benchmarks), and move limitations up for visibility. Consolidate PERFORMANCE.md into README. Update benchmark script to measure full read-to-write lifecycle (INSERT INTO) instead of PERFORM. Fix tabbed content indentation in catalog docs for pymdownx.tabbed rendering. --- docs/catalog/openapi.md | 6 +- wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md | 285 ------------------ wasm-wrappers/fdw/openapi_fdw/README.md | 138 ++++++--- .../fdw/openapi_fdw/test/benchmark.sh | 13 +- 4 files changed, 119 insertions(+), 323 deletions(-) delete mode 100644 wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md diff --git a/docs/catalog/openapi.md b/docs/catalog/openapi.md index 1be25372..5d25d339 100644 --- a/docs/catalog/openapi.md +++ b/docs/catalog/openapi.md @@ -18,8 +18,8 @@ This wrapper allows you to query any REST API endpoint as a PostgreSQL foreign t | Version | Wasm Package URL | Checksum | Required Wrappers Version | | ------- | ---------------- | -------- | ------------------------- | -| 0.2.0 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm` | `{checksum}` | >=0.5.0 | -| 0.1.4 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.1.4/openapi_fdw.wasm` | `dd434f8565b060b181d1e69e1e4d5c8b9c3ac5ca444056d3c2fb939038d308fe` | >=0.5.0 | +| 0.2.0 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm` | `{checksum}` | >=0.5.0 | +| 0.1.4 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.1.4/openapi_fdw.wasm` | `dd434f8565b060b181d1e69e1e4d5c8b9c3ac5ca444056d3c2fb939038d308fe` | >=0.5.0 | ## Preparation @@ -414,6 +414,8 @@ For APIs with very strict rate limits, consider using materialized views to cach ## Examples +For additional real-world examples with multiple tables, pagination, and advanced features, see the **[examples directory on GitHub](https://github.com/supabase/wrappers/tree/main/wasm-wrappers/fdw/openapi_fdw/examples)**. There are step-by-step walkthroughs for querying the NWS Weather API, PokéAPI, CarAPI, GitHub, and Threads. + ### Basic Query ```sql diff --git a/wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md b/wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md deleted file mode 100644 index 8e7be064..00000000 --- a/wasm-wrappers/fdw/openapi_fdw/PERFORMANCE.md +++ /dev/null @@ -1,285 +0,0 @@ -# OpenAPI FDW Performance Analysis - -## Overview - -This document captures performance characteristics, optimizations, and benchmark results for the OpenAPI FDW. - -## Benchmark Results (Feb 2026) - -### End-to-End Performance (10 iterations each) - -| Scenario | OpenAPI FDW | pg_http | pg_net | Overhead | -| ---------- | ------------- | --------- | -------- | ---------- | -| Simple Array (3 rows) | 183ms | 10ms | 786ms | +173ms | -| Wrapped Response (2 rows) | 178ms | 7ms | 787ms | +171ms | -| Type Coercion (1 row) | 185ms | 10ms | 792ms | +175ms | -| GeoJSON Nested (3 rows) | 188ms | 11ms | 788ms | +177ms | -| POST-for-Read (1 row) | 198ms | 14ms | 785ms | +184ms | - -**Key Finding:** Consistent ~170-180ms overhead regardless of row count, indicating fixed per-query cost. - -### Micro-Benchmark Results - -From Criterion benchmarks (`cargo bench --bench fdw_benchmarks`): - -| Operation | Time | Notes | -| ----------- | ------ | ------- | -| Column sanitization (camelCase→snake_case) | 60-122ns | One-time per column | -| camelCase conversion | 19-123ns | Cached in begin_scan | -| JSON key lookup (HashMap) | 12-16ns | O(1) exact match | -| DateTime normalization (Cow) | **0.98ns** | Zero-copy for valid datetimes | -| DateTime normalization (String) | 13ns | Allocates new string | -| JSON parsing (10 rows) | 2.6µs | ~260ns per row | -| JSON parsing (1000 rows) | 265µs | ~265ns per row | -| Type conversion (primitives) | 0.6-0.7ns | i64, f64, bool | -| Type conversion (string) | 12ns | Allocates | -| URL building (no params) | 15ns | String concat | -| URL building (3 params) | 85ns | With urlencoding | -| URL building (10 params) | 110ns | Scales linearly | - -## Performance Characteristics - -### Per-Query Overhead Breakdown - -The ~170ms overhead is composed of: - -1. **WASM Runtime Initialization** (~100-120ms) - - Component instantiation - - Module loading - - Memory setup - -2. **FDW Framework** (~20-30ms) - - PostgreSQL FDW API calls - - WASM boundary crossings - - Context setup - -3. **OpenAPI FDW Logic** (~30-40ms) - - Column metadata caching - - HTTP request setup - - JSON parsing - - Column key mapping - -### Per-Row Costs - -Once initialized, per-row costs are minimal: - -- **JSON parsing**: ~265ns per row (measured) -- **Type conversion**: 0.6-12ns per cell (measured) -- **Column key lookup**: 12-16ns per column (measured) -- **WASM boundary crossing**: ~100ns per cell push (estimated) - -**Total per-row**: ~1-2µs for typical 5-column row - -### Scaling Characteristics - -- ✅ **Excellent**: Row count (1 row vs 1000 rows has minimal impact) -- ✅ **Good**: Column count (O(1) lookups via pre-built key map) -- ⚠️ **Fixed**: Per-query overhead (~170ms regardless of data size) - -## Real-World Performance Impact - -### With Typical API Latency - -Most REST APIs have 100-500ms base latency. Example: - -| API Latency | pg_http Total | OpenAPI FDW Total | Relative Overhead | -| ------------- | --------------- | ------------------- | ------------------- | -| 100ms | 110ms | 280ms | +154% | -| 200ms | 210ms | 380ms | +81% | -| 300ms | 310ms | 480ms | +55% | -| 500ms | 510ms | 680ms | +33% | - -**Takeaway:** With realistic API latency, overhead ranges from 33-80%, not 2000%. - -### When OpenAPI FDW Wins - -Despite the overhead, OpenAPI FDW provides value when: - -1. **Complex JSON structures** - Automatic unwrapping vs manual jsonb queries -2. **Type safety** - Automatic type conversion vs manual casts -3. **Pagination** - Automatic vs manual cursor handling -4. **Schema discovery** - IMPORT FOREIGN SCHEMA vs manual DDL -5. **Maintainability** - 1-line queries vs 10-line jsonb wrangling - -## Optimization History - -### Feb 2026 - Cleanup & Performance Sprint - -1. **Fixed deduplication bug** (schema.rs:88) - - Removed redundant `sanitize_column_name()` call - - Eliminated unwrap() in hot path - -2. **Removed JSON clone** (spec.rs:187) - - Changed `from_json(&JsonValue)` to `from_json(JsonValue)` - - Eliminates clone of entire OpenAPI spec - -3. **`Cow` for datetime normalization** - - **13× faster** for already-valid datetimes (0.98ns vs 13ns) - - Eliminates 50% of allocations in date/timestamp columns - -4. **HashMap pre-allocation** - - `substitute_path_params`: 2× quals capacity (injected_params) - - `build_query_params`: quals + 3 capacity - - Eliminates rehashing during URL construction - -5. **Function extraction** - - `build_url()`: 150 lines → 30 lines (extracted helpers) - - `json_to_cell_cached()`: 135 lines → 40 lines (extracted converters) - - Eliminated code duplication in type conversion - -6. **Column metadata caching** (existing optimization) - - Eliminates ~2000 WASM boundary crossings per 100-row scan - - Caches name, type_oid, camelCase, lowercase variants - -7. **Column key pre-resolution** (existing optimization) - - Builds column→JSON key map once per page - - O(1) lookups vs O(N) search per cell - -## Known Bottlenecks - -### 1. WASM Runtime Startup (~100-120ms) - -**Root cause:** Supabase Wrappers recreates the entire wasmtime stack for every query in `wasm_fdw.rs:new()`: - -| Step | Cached? | Est. Cost | -| ------ | --------- | ----------- | -| Engine creation | No | ~20-30ms | -| Component load (from disk) | Yes (file only) | ~10-20ms | -| WASM → native compilation | No | ~40-60ms | -| Linker setup | No | ~5-10ms | -| Component instantiation | No (required per-query) | ~30-50ms | - -Only the WASM binary file is cached on disk. The Engine, compiled native code, and Linker are rebuilt from scratch every time. - -**Tested:** We added Engine caching via `static OnceLock` in the Supabase Wrappers source. Results: - -- First query (cold): ~213ms -- Subsequent queries (same connection): ~179ms avg -- **Savings: ~35ms per query**, but only within the same PostgreSQL backend process - -Since PostgreSQL is multi-process (each connection = separate process), the cache doesn't help across connections. Not worth maintaining a fork for, but a good upstream contribution opportunity. - -**Upstream opportunities** (in Supabase Wrappers): - -- Shared Engine via `OnceLock` (~35ms savings per connection) -- Wasmtime compilation cache to disk (`config.cache_config_load_default()`) for cross-process savings -- Component caching per foreign server - -**Impact:** 60-70% of total overhead - -### 2. WASM Boundary Crossings (~30-50ms total) - -**Root cause:** WIT interface serialization for each cell - -Per 100-row × 10-column scan: - -- 1000 `row.push(cell)` calls × ~50ns each = **50µs** -- Column metadata setup: ~100 calls × ~200ns = **20µs** -- Not actually significant! - -**Actual impact:** <1ms (negligible) - -### 3. HTTP Request Clone (~1-2ms) - -**Root cause:** `http::Request` takes ownership, must clone headers and body - -```rust -let req = http::Request { - url, - headers: self.headers.clone(), // Vec<(String, String)> - body: self.request_body.clone(), // String -}; -``` - -**Potential optimizations:** - -- Reuse request structure -- Reference-counted headers (Arc) - -**Impact:** <1% of total overhead - -## Optimization Opportunities - -### High Impact (>10ms savings) - -1. **WASM Module Caching** - - Pre-load module at extension init - - Reuse across queries - - **Potential savings:** 60-80ms - -2. **Lazy Column Initialization** - - Only cache metadata for SELECTed columns - - Skip camelCase conversion for unused columns - - **Potential savings:** 10-20ms - -### Medium Impact (1-10ms savings) - -1. **JSON Parser Optimization** - - Use simd-json instead of serde_json - - **Potential savings:** 5-10ms for large responses - -2. **URL Building Cache** - - Cache built URLs for repeated queries - - **Potential savings:** 1-5ms - -### Low Impact (<1ms savings) - -1. **String Interning** - - Intern repeated enum values - - **Potential savings:** <1ms - -2. **Remove Header Clone** - - Use Arc> - - **Potential savings:** <1ms - -## Test Infrastructure - -### Unit Tests: 337 tests - -- 151 spec tests (OpenAPI parsing) -- 52 schema tests (type mapping) -- 134 lib tests (FDW logic) - -### Integration Tests: 80+ assertions - -- Docker-based (PostgreSQL + MockServer) -- Covers all major OpenAPI features -- Tests both typed and raw JSONB queries - -### Benchmarks - -- **Micro**: Criterion-based (`cargo bench`) -- **End-to-end**: Docker-based (`bash test/benchmark.sh`) -- **Comparison**: OpenAPI FDW vs pg_http vs pg_net - -## Conclusion - -The OpenAPI FDW is **well-optimized at the algorithmic level**: - -- O(1) column lookups -- Zero-copy where possible -- Minimal allocations per row -- Pre-cached metadata - -The remaining ~170ms overhead is primarily **WASM runtime initialization** in Supabase Wrappers, which is: - -- One-time per query (not per row) -- Outside our control (requires upstream changes) -- Acceptable given the DX benefits - -For typical REST APIs with 100-500ms latency, the relative overhead is **30-80%**, which is reasonable given the automatic JSON unwrapping, type conversion, pagination, and schema discovery. - -## Recommendations - -1. **For high-frequency queries**: Consider caching results in materialized views -2. **For low-latency requirements**: Use pg_http with manual JSON extraction -3. **For most use cases**: OpenAPI FDW provides excellent DX/performance trade-off -4. **For future optimization**: Focus on WASM module caching/reuse - ---- - -Last updated: February 2026 -Benchmark environment: MockServer (near-zero network latency) -PostgreSQL: 15.14 (Supabase distribution) -WASM Target: wasm32-unknown-unknown -Rust: 1.88+ diff --git a/wasm-wrappers/fdw/openapi_fdw/README.md b/wasm-wrappers/fdw/openapi_fdw/README.md index 391eae2b..d183b304 100644 --- a/wasm-wrappers/fdw/openapi_fdw/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/README.md @@ -1,20 +1,40 @@ # OpenAPI WASM Foreign Data Wrapper -This is a WASM-based Foreign Data Wrapper (FDW) for integrating any OpenAPI 3.0+ compliant REST API into PostgreSQL through Supabase Wrappers. +Created by [Cody Bromley](https://github.com/codybrom). -Point this at an OpenAPI spec and query the API with SQL. The FDW parses the spec, figures out the endpoints and response schemas, and lets you `IMPORT FOREIGN SCHEMA` to generate tables automatically. +A WASM-based Foreign Data Wrapper (FDW) that lets you query any REST API with an [OpenAPI 3.0+](https://www.openapis.org/) spec as PostgreSQL foreign tables, built on [Supabase Wrappers](https://github.com/supabase/wrappers). -Handles pagination, rate limiting (429 backoff), path parameter substitution from WHERE clauses, POST-for-read endpoints, and stops fetching early when you use LIMIT. +Point it at an OpenAPI spec and query the API with SQL. The FDW parses the spec for endpoints and response schemas, and allows you to `IMPORT FOREIGN SCHEMA` to generate tables automatically or manually create tables for parameterized endpoints. + +## Features + +- **Automatic schema import** — Reads OpenAPI 3.0/3.1 specs (via `spec_url` or inline `spec_json`) and generates foreign tables with `IMPORT FOREIGN SCHEMA` +- **Automatic pagination** — Detects and follows cursor-based, offset-based, and Link-header pagination across multiple pages +- **Path parameter support** — Substitutes path parameters from WHERE clauses (e.g., `WHERE user_id = '123'` fills `/users/{user_id}/posts`) +- **Query pushdown** — Forwards non-path WHERE clauses as query parameters to filter at the API level +- **LIMIT pushdown** — Passes `LIMIT` to the API's page-size parameter to avoid over-fetching +- **POST-for-read** — Supports APIs that use POST for search/query endpoints via the `method` table option +- **Rate-limit handling** — Retries automatically with exponential backoff on HTTP 429 responses +- **Type coercion** — Maps JSON types to PostgreSQL types (`text`, `integer`, `boolean`, `timestamptz`, `jsonb`, etc.) +- **camelCase matching** — Matches API field names like `stationIdentifier` to snake_case columns like `station_identifier` +- **Auth support** — API key (header, query param, or cookie) and Bearer token authentication, with Supabase Vault integration +- **Debug mode** — Set `debug 'true'` on the server to log HTTP request/response details as PostgreSQL INFO messages + +## Limitations + +- Read-only (no INSERT/UPDATE/DELETE) +- POST-for-read available via `method` table option, but only GET endpoints are auto-imported +- Auth: API key and Bearer token only (no OAuth2 flows — use pre-obtained tokens) +- OpenAPI 3.x only (Swagger 2.0 is rejected) ## Documentation -[https://fdw.dev/catalog/openapi/](https://fdw.dev/catalog/openapi/) +Full reference: [fdw.dev/catalog/openapi](https://fdw.dev/catalog/openapi/) ## Quick Start ```sql --- Create a server pointing to any OpenAPI-compliant API -CREATE SERVER my_api_server +CREATE SERVER my_api FOREIGN DATA WRAPPER wasm_wrapper OPTIONS ( fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', @@ -26,61 +46,111 @@ OPTIONS ( api_key_id '' ); --- Import all endpoints as tables -IMPORT FOREIGN SCHEMA openapi -FROM SERVER my_api_server -INTO openapi; +-- Auto-generate tables from the spec +IMPORT FOREIGN SCHEMA openapi FROM SERVER my_api INTO api; + +-- Or create tables manually +CREATE FOREIGN TABLE api.users ( + id text, name text, email text, attrs jsonb +) SERVER my_api OPTIONS (endpoint '/users'); --- Query the API -SELECT * FROM openapi.users WHERE id = '123'; +-- Query +SELECT * FROM api.users WHERE id = '123'; ``` +## Examples + +The [`examples/`](examples/) directory has complete working setups for 5 real APIs: + +| Example | API | Auth | Key Features | +| --- | --- | --- | --- | +| [nws](examples/nws/) | National Weather Service | None | GeoJSON, cursor pagination, path params, IMPORT FOREIGN SCHEMA | +| [pokeapi](examples/pokeapi/) | PokéAPI | None | Offset-based pagination, auto-detected `results` wrapper | +| [carapi](examples/carapi/) | CarAPI | None | Page-based pagination, query pushdown, `data` wrapper | +| [github](examples/github/) | GitHub REST API | Bearer token | Custom headers, search pushdown, `items` wrapper | +| [threads](examples/threads/) | Meta Threads API | OAuth (query param) | Cursor pagination, inline `spec_json` | + +Each includes a `README.md` walkthrough and an `init.sql` you can run directly. + ## Development ### Building ```bash -cd wasm-wrappers/fdw/openapi_fdw cargo component build --release --target wasm32-unknown-unknown ``` -### Running Tests +### Testing ```bash -# Unit tests (run with native target) +# 518 unit tests cargo test -# Benchmarks +# Micro-benchmarks (Criterion) cargo bench --bench fdw_benchmarks -# Integration tests (Docker-based) +# Integration tests (Docker: PostgreSQL + MockServer) bash test/run.sh -# Integration tests (from wrappers directory) -cd wrappers -cargo pgrx test --features "wasm_fdw pg16" +# End-to-end benchmarks (Docker, vs pg_http) +bash test/benchmark.sh + +# Example validation (Docker: runs all 5 examples) +bash test/run-examples.sh ``` ### Code Quality ```bash -make check # runs fmt, clippy, test, build +make check # fmt, clippy, test, build ``` -## Limitations +## Performance + +Using any FDW comes with certain tradeoffs, and this one is no exception when it comes to performance. Wasm FDWs in Supabase Wrappers have a relatively fixed ~170-180ms overhead because the Supabase Wrappers WASM runtime needs to start up, compile the module, and initialize the component before any work begins. Once the runtime is up, processing each row is fast (~1-2us), but the overhead is the same whether you're fetching 1 row or 1,000. + +The best alternative is something like `pg_http`, a native C extension that makes a raw HTTP call and hands you back JSON to parse yourself. Native is always going to be faster than WASM, but part of why it's faster is because it does less for you. Here's what it looks like to sync API data into a local table with each approach. + +With the FDW: + +```sql +INSERT INTO local_stations SELECT * FROM api.stations; +``` + +With `pg_http`: + +```sql +INSERT INTO local_stations +SELECT props->>'station_id', props->>'name', + props->>'state', (props->>'elevation')::bigint +FROM (SELECT content FROM http_get('.../features')) r, + jsonb_array_elements((r.content::jsonb)->'features') AS f, + LATERAL (SELECT f->'properties' AS props) AS sub; +``` + +And that's just one page of results from one endpoint. With `pg_http`, you'd also need to handle pagination loops, rate-limit retries, and response envelope detection yourself. These are all things the OpenAPI FDW handles automatically. + +Here's the raw cost of that convenience. Both approaches benchmarked end-to-end (API call through to local table write) against a local mock server on PostgreSQL 15: + +| Scenario | OpenAPI FDW | pg_http | Overhead | +| --- | --- | --- | --- | +| Simple Array (3 rows) | 188ms | 13ms | +175ms | +| Wrapped Response (2 rows) | 191ms | 8ms | +183ms | +| Type Coercion (1 row) | 188ms | 7ms | +181ms | +| GeoJSON Nested (3 rows) | 192ms | 7ms | +185ms | +| POST-for-Read (1 row) | 195ms | 13ms | +182ms | + +These were measured with near-zero network latency, so they isolate the WASM overhead. With a real API responding in 100-400ms, the gap narrows. If your API call has 200ms of latency, the roundtrip takes ~210ms through `pg_http` and ~375ms through the FDW. The overhead doesn't disappear, but it becomes a lot more reasonable when you factor in the SQL you're not writing or maintaining. -- Read-only (no INSERT/UPDATE/DELETE support) -- Only GET endpoints are supported (POST-for-read is available via the `method` table option) -- Authentication limited to API key and Bearer token (No OAuth2 flow support yet - use pre-obtained tokens) -- Only OpenAPI 3.x specs are supported (Swagger 2.0 is rejected) +For queries you run frequently, a [materialized view](https://supabase.com/blog/postgresql-views) can cache the results locally and skip the FDW on subsequent reads. ## Changelog -| Version | Date | Notes | -| ------- | ---------- | ---------------------------------------------------- | -| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 337 unit tests, 5 real-world examples | -| 0.1.4 | 2026-02-09 | Type coercion, auth validation, table naming, URL fixes | -| 0.1.3 | 2026-02-06 | Avoid cloning JSON response data | -| 0.1.2 | 2026-02-01 | Fix query param filtering | -| 0.1.1 | 2026-01-26 | URL encoding, identifier quoting, version validation | -| 0.1.0 | 2026-01-25 | Initial version | +| Version | Date | Notes | +| --- | --- | --- | +| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 518 unit tests, 5 real-world examples | +| 0.1.4 | 2026-02-09 | Type coercion, auth validation, table naming, URL fixes | +| 0.1.3 | 2026-02-06 | Avoid cloning JSON response data | +| 0.1.2 | 2026-02-01 | Fix query param filtering | +| 0.1.1 | 2026-01-26 | URL encoding, identifier quoting, version validation | +| 0.1.0 | 2026-01-25 | Initial version | diff --git a/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh b/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh index 0c8b197b..2af903ca 100755 --- a/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh +++ b/wasm-wrappers/fdw/openapi_fdw/test/benchmark.sh @@ -55,13 +55,20 @@ print_separator() { "------------------" "--------" "--------" "--------" "----" } -# Benchmark a synchronous query (openapi-fdw or pg_http) +# Benchmark a synchronous query by inserting results into a temp table. +# This measures the full lifecycle: API call -> parse -> write to local table. bench_sync() { local sql="$1" local iterations="$2" + local table="${3:-_bench_sink}" local times=() + # Create the destination table from the query schema (once) + psql_cmd -c "DROP TABLE IF EXISTS $table;" > /dev/null + psql_cmd -c "CREATE TABLE $table AS $sql LIMIT 0;" > /dev/null + for _ in $(seq 1 "$iterations"); do + psql_cmd -c "TRUNCATE $table;" > /dev/null local elapsed elapsed=$(psql_cmd -c " DO \$bench\$ @@ -69,7 +76,7 @@ bench_sync() { t0 timestamptz := clock_timestamp(); t1 timestamptz; BEGIN - PERFORM * FROM ($sql) AS _b; + INSERT INTO $table $sql; t1 := clock_timestamp(); RAISE NOTICE '%', extract(epoch from (t1 - t0)) * 1000; END \$bench\$; @@ -79,6 +86,8 @@ bench_sync() { fi done + psql_cmd -c "DROP TABLE IF EXISTS $table;" > /dev/null + if [ ${#times[@]} -gt 0 ]; then compute_stats "${times[@]}" else From 55fe1e98b1077956100c90989c47fd09eec135a7 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Mon, 16 Feb 2026 20:58:56 -0600 Subject: [PATCH 3/9] chore(openapi): revert workspace release profile and update README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove [profile.release] (strip, lto) from the shared wasm-wrappers workspace Cargo.toml — these affect all wasm FDWs, not just openapi. Revert Cargo.lock to match main. Minor README updates. --- wasm-wrappers/fdw/Cargo.lock | 194 ++++++++++++------------ wasm-wrappers/fdw/Cargo.toml | 4 - wasm-wrappers/fdw/openapi_fdw/README.md | 15 +- 3 files changed, 102 insertions(+), 111 deletions(-) diff --git a/wasm-wrappers/fdw/Cargo.lock b/wasm-wrappers/fdw/Cargo.lock index 0ed0d85b..4e40c0d0 100644 --- a/wasm-wrappers/fdw/Cargo.lock +++ b/wasm-wrappers/fdw/Cargo.lock @@ -11,12 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -40,15 +34,15 @@ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "cal_fdw" @@ -76,10 +70,11 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.23" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ + "find-msvc-tools", "shlex", ] @@ -93,22 +88,21 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-link 0.1.1", + "windows-link", ] [[package]] @@ -140,18 +134,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.58" +version = "4.5.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.58" +version = "4.5.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" dependencies = [ "anstyle", "clap_lex", @@ -250,6 +244,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "half" version = "2.7.1" @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -336,15 +336,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -352,21 +352,21 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.172" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "log" -version = "0.4.27" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" -version = "2.7.4" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "notion_fdw" @@ -456,18 +456,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -523,15 +523,9 @@ checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "rustversion" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" - -[[package]] -name = "ryu" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "same-file" @@ -544,18 +538,28 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -564,14 +568,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", + "serde_core", + "zmij", ] [[package]] @@ -608,9 +613,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.101" +version = "2.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" dependencies = [ "proc-macro2", "quote", @@ -629,9 +634,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "urlencoding" @@ -651,35 +656,22 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -687,31 +679,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -728,22 +720,22 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.1" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46ec44dc15085cea82cf9c78f85a9114c463a369786585ad2882d1ff0b0acf40" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.1.1", + "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", @@ -752,21 +744,15 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "windows-link" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" - [[package]] name = "windows-link" version = "0.2.1" @@ -775,20 +761,20 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.3" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b895b5356fc36103d0f64dd1e94dfa7ac5633f1c9dd6e80fe9ec4adef69e09d" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.1", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a7ab927b2637c19b3dbe0965e75d8f2d30bdd697a1516191cad2ec4df8fb28a" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.1.1", + "windows-link", ] [[package]] @@ -797,7 +783,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -825,3 +811,9 @@ dependencies = [ "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/wasm-wrappers/fdw/Cargo.toml b/wasm-wrappers/fdw/Cargo.toml index ada7fec0..e0a9b931 100644 --- a/wasm-wrappers/fdw/Cargo.toml +++ b/wasm-wrappers/fdw/Cargo.toml @@ -17,10 +17,6 @@ members = [ ] resolver = "2" -[profile.release] -strip = "debuginfo" -lto = true - [workspace.package] edition = "2024" rust-version = "1.88" diff --git a/wasm-wrappers/fdw/openapi_fdw/README.md b/wasm-wrappers/fdw/openapi_fdw/README.md index d183b304..d006a68c 100644 --- a/wasm-wrappers/fdw/openapi_fdw/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/README.md @@ -34,7 +34,8 @@ Full reference: [fdw.dev/catalog/openapi](https://fdw.dev/catalog/openapi/) ## Quick Start ```sql -CREATE SERVER my_api +-- Create a server pointing to any OpenAPI-compliant API +CREATE SERVER my_api_server FOREIGN DATA WRAPPER wasm_wrapper OPTIONS ( fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', @@ -46,16 +47,18 @@ OPTIONS ( api_key_id '' ); --- Auto-generate tables from the spec -IMPORT FOREIGN SCHEMA openapi FROM SERVER my_api INTO api; +-- Import all endpoints as tables +IMPORT FOREIGN SCHEMA openapi +FROM SERVER my_api_server +INTO openapi; -- Or create tables manually CREATE FOREIGN TABLE api.users ( id text, name text, email text, attrs jsonb ) SERVER my_api OPTIONS (endpoint '/users'); --- Query -SELECT * FROM api.users WHERE id = '123'; +-- Query the API +SELECT * FROM openapi.users WHERE id = '123'; ``` ## Examples @@ -80,7 +83,7 @@ Each includes a `README.md` walkthrough and an `init.sql` you can run directly. cargo component build --release --target wasm32-unknown-unknown ``` -### Testing +### Running Tests ```bash # 518 unit tests From b6d3cb0776e685f753ce0a692d851e847f4360f3 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Mon, 16 Feb 2026 21:03:26 -0600 Subject: [PATCH 4/9] chore(openapi): remove criterion micro-benchmarks Remove benches/fdw_benchmarks.rs and the criterion dev-dependency. These benchmarks tested re-implemented copies of FDW logic rather than actual code, added ~38 transitive dependencies, and caused build errors on wasm targets. The SQL-level benchmark script (test/benchmark.sh) provides meaningful end-to-end performance analysis. --- wasm-wrappers/fdw/Cargo.lock | 350 ---------------- wasm-wrappers/fdw/openapi_fdw/Cargo.toml | 7 - wasm-wrappers/fdw/openapi_fdw/README.md | 3 - .../fdw/openapi_fdw/benches/fdw_benchmarks.rs | 383 ------------------ 4 files changed, 743 deletions(-) delete mode 100644 wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs diff --git a/wasm-wrappers/fdw/Cargo.lock b/wasm-wrappers/fdw/Cargo.lock index 4e40c0d0..9f8384cc 100644 --- a/wasm-wrappers/fdw/Cargo.lock +++ b/wasm-wrappers/fdw/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - [[package]] name = "android_system_properties" version = "0.1.5" @@ -20,18 +11,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" - [[package]] name = "autocfg" version = "1.5.0" @@ -62,12 +41,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - [[package]] name = "cc" version = "1.2.56" @@ -105,58 +78,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.5.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" - [[package]] name = "clerk_fdw" version = "0.2.2" @@ -171,96 +92,12 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "is-terminal", - "itertools", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] - [[package]] name = "helloworld_fdw" version = "0.2.0" @@ -268,12 +105,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hubspot_fdw" version = "0.2.0" @@ -314,26 +145,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "is-terminal" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.17" @@ -392,17 +203,10 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - [[package]] name = "openapi_fdw" version = "0.2.0" dependencies = [ - "criterion", "serde", "serde_json", "urlencoding", @@ -426,34 +230,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - [[package]] name = "proc-macro2" version = "1.0.106" @@ -472,70 +248,12 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" - [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - [[package]] name = "serde" version = "1.0.228" @@ -622,16 +340,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "unicode-ident" version = "1.0.24" @@ -644,16 +352,6 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "wasm-bindgen" version = "0.2.108" @@ -699,25 +397,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "web-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - [[package]] name = "windows-core" version = "0.62.2" @@ -777,41 +456,12 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - [[package]] name = "wit-bindgen-rt" version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4db52a11d4dfb0a59f194c064055794ee6564eb1ced88c25da2cf76e50c5621" -[[package]] -name = "zerocopy" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "zmij" version = "1.0.21" diff --git a/wasm-wrappers/fdw/openapi_fdw/Cargo.toml b/wasm-wrappers/fdw/openapi_fdw/Cargo.toml index ffd90d70..5d15b842 100644 --- a/wasm-wrappers/fdw/openapi_fdw/Cargo.toml +++ b/wasm-wrappers/fdw/openapi_fdw/Cargo.toml @@ -14,13 +14,6 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" urlencoding = "2.1" -[dev-dependencies] -criterion = { version = "0.5", features = ["html_reports"] } - -[[bench]] -name = "fdw_benchmarks" -harness = false - [package.metadata.component] package = "supabase:openapi-fdw" diff --git a/wasm-wrappers/fdw/openapi_fdw/README.md b/wasm-wrappers/fdw/openapi_fdw/README.md index d006a68c..51d143bc 100644 --- a/wasm-wrappers/fdw/openapi_fdw/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/README.md @@ -89,9 +89,6 @@ cargo component build --release --target wasm32-unknown-unknown # 518 unit tests cargo test -# Micro-benchmarks (Criterion) -cargo bench --bench fdw_benchmarks - # Integration tests (Docker: PostgreSQL + MockServer) bash test/run.sh diff --git a/wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs b/wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs deleted file mode 100644 index 89ae10e1..00000000 --- a/wasm-wrappers/fdw/openapi_fdw/benches/fdw_benchmarks.rs +++ /dev/null @@ -1,383 +0,0 @@ -use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; -use serde_json::json; - -// Note: We can't directly import from lib.rs due to WASM component model, -// so we'll benchmark the core algorithms that we can extract - -/// Benchmark: camelCase to snake_case conversion (used for every column) -fn bench_sanitize_column_name(c: &mut Criterion) { - let mut group = c.benchmark_group("column_name_sanitization"); - - let test_cases = vec![ - ("simpleCase", "simple_case"), - ("clusterIP", "cluster_ip"), - ("HTMLParser", "html_parser"), - ("getHTTPSUrl", "get_https_url"), - ("user_id", "user_id"), - ("userId", "user_id"), - ("@id", "_id"), - ("created-at", "created_at"), - ("123_start", "_123_start"), - ]; - - for (input, _expected) in test_cases { - group.bench_with_input(BenchmarkId::from_parameter(input), &input, |b, &input| { - b.iter(|| { - let mut result = String::new(); - let chars: Vec = input.chars().collect(); - - for (i, &c) in chars.iter().enumerate() { - if c.is_uppercase() && i > 0 { - let prev = chars[i - 1]; - let next_is_lower = chars.get(i + 1).is_some_and(|n| n.is_lowercase()); - - if prev.is_lowercase() - || prev.is_ascii_digit() - || (prev.is_uppercase() && next_is_lower) - { - result.push('_'); - } - result.push(c.to_ascii_lowercase()); - } else if c.is_alphanumeric() || c == '_' { - result.push(c.to_ascii_lowercase()); - } else { - result.push('_'); - } - } - - if result.starts_with(|c: char| c.is_ascii_digit()) { - result.insert(0, '_'); - } - - black_box(result) - }); - }); - } - - group.finish(); -} - -/// Benchmark: snake_case to camelCase (used during column matching) -fn bench_to_camel_case(c: &mut Criterion) { - let mut group = c.benchmark_group("to_camel_case"); - - let test_cases = vec![ - "user_id", - "created_at", - "cluster_ip", - "html_parser", - "simple_name", - "very_long_column_name_with_many_underscores", - ]; - - for input in test_cases { - group.bench_with_input(BenchmarkId::from_parameter(input), &input, |b, &input| { - b.iter(|| { - let mut result = String::new(); - let mut capitalize_next = false; - - for c in input.chars() { - if c == '_' { - capitalize_next = true; - } else if capitalize_next { - result.push(c.to_uppercase().next().unwrap_or(c)); - capitalize_next = false; - } else { - result.push(c); - } - } - - black_box(result) - }); - }); - } - - group.finish(); -} - -/// Benchmark: JSON object key lookup (happens once per cell) -fn bench_json_key_lookup(c: &mut Criterion) { - let mut group = c.benchmark_group("json_key_lookup"); - group.throughput(Throughput::Elements(1)); - - // Small object (5 keys) - let small_obj = json!({ - "id": 123, - "name": "John Doe", - "email": "john@example.com", - "createdAt": "2024-01-15T10:30:00Z", - "isActive": true - }); - - // Medium object (20 keys) - let medium_obj = json!({ - "id": 123, - "name": "John Doe", - "email": "john@example.com", - "createdAt": "2024-01-15T10:30:00Z", - "updatedAt": "2024-01-15T10:30:00Z", - "isActive": true, - "age": 30, - "city": "San Francisco", - "country": "USA", - "zipCode": "94102", - "phoneNumber": "+1234567890", - "companyName": "Acme Inc", - "jobTitle": "Engineer", - "department": "Engineering", - "salary": 100000, - "startDate": "2020-01-01", - "manager": "Jane Smith", - "teamSize": 5, - "projects": ["project1", "project2"], - "skills": ["rust", "sql"] - }); - - group.bench_function("small_object_exact_match", |b| { - b.iter(|| { - let obj = small_obj.as_object().unwrap(); - black_box(obj.get("name")) - }); - }); - - group.bench_function("medium_object_exact_match", |b| { - b.iter(|| { - let obj = medium_obj.as_object().unwrap(); - black_box(obj.get("name")) - }); - }); - - group.bench_function("small_object_case_insensitive", |b| { - b.iter(|| { - let obj = small_obj.as_object().unwrap(); - let target = "createdat"; - black_box( - obj.iter() - .find(|(k, _)| k.to_lowercase() == target) - .map(|(_, v)| v), - ) - }); - }); - - group.finish(); -} - -/// Benchmark: DateTime normalization (happens for every date/timestamp cell) -fn bench_normalize_datetime(c: &mut Criterion) { - let mut group = c.benchmark_group("normalize_datetime"); - - group.bench_function("date_only", |b| { - let input = "2024-01-15"; - b.iter(|| { - let result = if input.len() == 10 - && input.as_bytes().get(4) == Some(&b'-') - && input.as_bytes().get(7) == Some(&b'-') - { - format!("{input}T00:00:00Z") - } else { - input.to_string() - }; - black_box(result) - }); - }); - - group.bench_function("full_datetime", |b| { - let input = "2024-01-15T10:30:00Z"; - b.iter(|| { - let result = if input.len() == 10 - && input.as_bytes().get(4) == Some(&b'-') - && input.as_bytes().get(7) == Some(&b'-') - { - format!("{input}T00:00:00Z") - } else { - input.to_string() - }; - black_box(result) - }); - }); - - group.bench_function("date_only_cow", |b| { - use std::borrow::Cow; - let input = "2024-01-15"; - b.iter(|| { - let result: Cow = if input.len() == 10 - && input.as_bytes().get(4) == Some(&b'-') - && input.as_bytes().get(7) == Some(&b'-') - { - Cow::Owned(format!("{input}T00:00:00Z")) - } else { - Cow::Borrowed(input) - }; - black_box(result) - }); - }); - - group.bench_function("full_datetime_cow", |b| { - use std::borrow::Cow; - let input = "2024-01-15T10:30:00Z"; - b.iter(|| { - let result: Cow = if input.len() == 10 - && input.as_bytes().get(4) == Some(&b'-') - && input.as_bytes().get(7) == Some(&b'-') - { - Cow::Owned(format!("{input}T00:00:00Z")) - } else { - Cow::Borrowed(input) - }; - black_box(result) - }); - }); - - group.finish(); -} - -/// Benchmark: JSON parsing (happens once per page) -fn bench_json_parsing(c: &mut Criterion) { - let mut group = c.benchmark_group("json_parsing"); - - // Small response (10 rows) - let small_json = serde_json::to_string(&json!({ - "data": (0..10).map(|i| json!({ - "id": i, - "name": format!("User {}", i), - "email": format!("user{}@example.com", i), - "createdAt": "2024-01-15T10:30:00Z", - "isActive": true - })).collect::>() - })) - .unwrap(); - - // Large response (1000 rows) - let large_json = serde_json::to_string(&json!({ - "data": (0..1000).map(|i| json!({ - "id": i, - "name": format!("User {}", i), - "email": format!("user{}@example.com", i), - "createdAt": "2024-01-15T10:30:00Z", - "isActive": true - })).collect::>() - })) - .unwrap(); - - group.throughput(Throughput::Bytes(small_json.len() as u64)); - group.bench_function("small_response_10_rows", |b| { - b.iter(|| black_box(serde_json::from_str::(&small_json).unwrap())); - }); - - group.throughput(Throughput::Bytes(large_json.len() as u64)); - group.bench_function("large_response_1000_rows", |b| { - b.iter(|| black_box(serde_json::from_str::(&large_json).unwrap())); - }); - - group.finish(); -} - -/// Benchmark: URL building with query parameters -fn bench_url_building(c: &mut Criterion) { - let mut group = c.benchmark_group("url_building"); - - group.bench_function("no_params", |b| { - let base = "https://api.example.com/users"; - let params: Vec = vec![]; - b.iter(|| { - let mut url = base.to_string(); - if !params.is_empty() { - url.push('?'); - url.push_str(¶ms.join("&")); - } - black_box(url) - }); - }); - - group.bench_function("few_params_3", |b| { - let base = "https://api.example.com/users"; - let params = vec![ - "limit=100".to_string(), - "offset=0".to_string(), - "sort=created_at".to_string(), - ]; - b.iter(|| { - let mut url = base.to_string(); - if !params.is_empty() { - url.push('?'); - url.push_str(¶ms.join("&")); - } - black_box(url) - }); - }); - - group.bench_function("many_params_10", |b| { - let base = "https://api.example.com/users"; - let params = vec![ - "limit=100".to_string(), - "offset=0".to_string(), - "sort=created_at".to_string(), - "filter=active".to_string(), - "include=profile".to_string(), - "fields=id,name,email".to_string(), - "page=1".to_string(), - "per_page=50".to_string(), - "order=desc".to_string(), - "search=test".to_string(), - ]; - b.iter(|| { - let mut url = base.to_string(); - if !params.is_empty() { - url.push('?'); - url.push_str(¶ms.join("&")); - } - black_box(url) - }); - }); - - group.finish(); -} - -/// Benchmark: Type conversion (happens for every cell) -fn bench_type_conversion(c: &mut Criterion) { - let mut group = c.benchmark_group("type_conversion"); - - group.bench_function("json_to_i64", |b| { - let val = json!(12345); - b.iter(|| black_box(val.as_i64())); - }); - - group.bench_function("json_to_f64", |b| { - let val = json!(123.45); - b.iter(|| black_box(val.as_f64())); - }); - - group.bench_function("json_to_string", |b| { - let val = json!("test string"); - b.iter(|| black_box(val.as_str().map(|s| s.to_owned()))); - }); - - group.bench_function("json_to_bool", |b| { - let val = json!(true); - b.iter(|| black_box(val.as_bool())); - }); - - group.bench_function("json_complex_to_string", |b| { - let val = json!({ - "nested": { - "object": "value" - } - }); - b.iter(|| black_box(val.to_string())); - }); - - group.finish(); -} - -criterion_group!( - benches, - bench_sanitize_column_name, - bench_to_camel_case, - bench_json_key_lookup, - bench_normalize_datetime, - bench_json_parsing, - bench_url_building, - bench_type_conversion, -); - -criterion_main!(benches); From 61e405d85ef8d2fcd1610869d6eaf6d0c10f5e20 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Mon, 16 Feb 2026 21:22:17 -0600 Subject: [PATCH 5/9] fix(openapi): use native target for test and clippy in Makefile Unit tests and clippy can't run on wasm32-unknown-unknown since there's no runtime to execute the binary. Auto-detect the host target via rustc so make test and make clippy work out of the box on any platform. --- wasm-wrappers/fdw/openapi_fdw/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wasm-wrappers/fdw/openapi_fdw/Makefile b/wasm-wrappers/fdw/openapi_fdw/Makefile index df301939..9edc531c 100644 --- a/wasm-wrappers/fdw/openapi_fdw/Makefile +++ b/wasm-wrappers/fdw/openapi_fdw/Makefile @@ -8,10 +8,10 @@ fmt: cargo fmt clippy: - RUSTFLAGS="-D warnings" cargo clippy --all --tests --no-deps + RUSTFLAGS="-D warnings" cargo clippy --all --tests --no-deps --target $$(rustc -vV | awk '/^host:/ {print $$2}') test: - cargo test + cargo test --target $$(rustc -vV | awk '/^host:/ {print $$2}') build: cargo component build --release --target wasm32-unknown-unknown From 60da79cea1f9c32b16c0b136060693acfc494192 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Mon, 16 Feb 2026 22:49:45 -0600 Subject: [PATCH 6/9] feat(openapi): add YAML spec support, review fixes, and example improvements Add YAML spec parsing via serde_yaml_ng so spec_url accepts both JSON and YAML OpenAPI specs. Many APIs only publish YAML, so this makes the FDW work out of the box with more APIs. Also addresses PR review items: - Replace deprecated serde_yaml with serde_yaml_ng - debug_assert! -> assert! in this_mut() for release safety - Header deduplication prevents duplicate content-type/authorization - Empty/whitespace credentials filtered with warning - Retry on 502/503 in addition to 429, with status-specific hints - RowsOut stats now count rows consumed by PG, not just fetched - Validate max_pages >= 1 - base_url validation for spec-derived server URLs - Improved error messages (show both JSON and YAML parse errors) Example updates: - All 5 examples get IMPORT FOREIGN SCHEMA as section 1 - New import servers with spec_url (or spec_json for Threads) - Threads example shows CREATE SERVER with inline spec_json - PokeAPI highlights YAML spec support --- docs/catalog/openapi.md | 18 +- wasm-wrappers/fdw/Cargo.lock | 48 +++++ wasm-wrappers/fdw/openapi_fdw/Cargo.toml | 1 + wasm-wrappers/fdw/openapi_fdw/README.md | 4 +- .../fdw/openapi_fdw/examples/carapi/README.md | 50 ++++- .../fdw/openapi_fdw/examples/carapi/init.sql | 13 ++ .../fdw/openapi_fdw/examples/github/README.md | 50 ++++- .../fdw/openapi_fdw/examples/github/init.sql | 25 ++- .../fdw/openapi_fdw/examples/nws/README.md | 68 ++++--- .../openapi_fdw/examples/pokeapi/README.md | 47 ++++- .../fdw/openapi_fdw/examples/pokeapi/init.sql | 15 ++ .../openapi_fdw/examples/threads/README.md | 156 +++++++++++---- .../fdw/openapi_fdw/examples/threads/init.sql | 3 +- .../fdw/openapi_fdw/src/column_matching.rs | 38 ++-- wasm-wrappers/fdw/openapi_fdw/src/config.rs | 31 ++- .../fdw/openapi_fdw/src/config_tests.rs | 75 ++++++- wasm-wrappers/fdw/openapi_fdw/src/lib.rs | 40 ++-- .../fdw/openapi_fdw/src/pagination.rs | 20 +- wasm-wrappers/fdw/openapi_fdw/src/request.rs | 104 +++++++--- .../fdw/openapi_fdw/src/request_tests.rs | 81 +++++--- wasm-wrappers/fdw/openapi_fdw/src/schema.rs | 16 +- wasm-wrappers/fdw/openapi_fdw/src/spec.rs | 49 +++-- .../fdw/openapi_fdw/src/spec_tests.rs | 187 ++++++++++++++++++ 23 files changed, 883 insertions(+), 256 deletions(-) diff --git a/docs/catalog/openapi.md b/docs/catalog/openapi.md index 5d25d339..4570554a 100644 --- a/docs/catalog/openapi.md +++ b/docs/catalog/openapi.md @@ -96,7 +96,7 @@ We need to provide Postgres with the credentials to access the API and any addit | ------ | :------: | ----------- | | `fdw_package_*` | Yes | Standard Wasm FDW package metadata. See [Available Versions](#available-versions). | | `base_url` | Yes* | Base URL for the API (e.g., `https://api.example.com/v1`). *Optional if `spec_url` or `spec_json` provides servers. | -| `spec_url` | No | URL to the OpenAPI specification JSON. Required for `IMPORT FOREIGN SCHEMA`. Mutually exclusive with `spec_json`. | +| `spec_url` | No | URL to the OpenAPI specification (JSON or YAML). Required for `IMPORT FOREIGN SCHEMA`. Mutually exclusive with `spec_json`. | | `spec_json` | No | Inline OpenAPI 3.0+ JSON spec for `IMPORT FOREIGN SCHEMA`. Mutually exclusive with `spec_url`. Useful when the API doesn't publish a spec URL. | | `api_key` | No | API key for authentication. | | `api_key_id` | No | Vault secret key ID storing the API key. Use instead of `api_key`. | @@ -369,18 +369,22 @@ options ( | ------------- | --------- | | text | string | | boolean | boolean | -| smallint | number | +| smallint* | number | | integer | number | | bigint | number | | real | number | | double precision | number | -| numeric | number | +| numeric* | number | | date | string (ISO 8601) | -| timestamp | string (ISO 8601) | +| timestamp* | string (ISO 8601) | | timestamptz | string (ISO 8601) | +| bytea | string (base64) | +| time | string (HH:MM:SS) | | jsonb | object/array | | uuid | string | +\* Types marked with an asterisk work when you define tables manually, but `IMPORT FOREIGN SCHEMA` won't generate columns with these types automatically. + ### The `attrs` Column Any foreign table can include an `attrs` column of type `jsonb` to capture the entire raw JSON response for each row: @@ -402,11 +406,11 @@ options (endpoint '/users'); - **Authentication**: Currently supports API Key and Bearer Token authentication. OAuth flows are not supported. - **OpenAPI version**: Only OpenAPI 3.0+ specifications are supported (not Swagger 2.0). -## Rate Limiting +## Automatic Retries -The FDW automatically handles rate limiting: +The FDW automatically retries transient HTTP errors up to 3 times: -- **HTTP 429 responses**: Automatically retries up to 3 times +- **HTTP 429** (Rate Limit), **502** (Bad Gateway), **503** (Service Unavailable) - **Retry-After header**: Respects server-specified delay when provided - **Exponential backoff**: Falls back to 1s, 2s, 4s delays when no Retry-After header is present diff --git a/wasm-wrappers/fdw/Cargo.lock b/wasm-wrappers/fdw/Cargo.lock index 9f8384cc..4d3021f0 100644 --- a/wasm-wrappers/fdw/Cargo.lock +++ b/wasm-wrappers/fdw/Cargo.lock @@ -92,12 +92,24 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "helloworld_fdw" version = "0.2.0" @@ -137,6 +149,16 @@ dependencies = [ "cc", ] +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "infura_fdw" version = "0.1.0" @@ -209,6 +231,7 @@ version = "0.2.0" dependencies = [ "serde", "serde_json", + "serde_yaml_ng", "urlencoding", "wit-bindgen-rt", ] @@ -254,6 +277,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "serde" version = "1.0.228" @@ -297,6 +326,19 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_yaml_ng" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4db627b98b36d4203a7b458cf3573730f2bb591b28871d916dfa9efabfd41f" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "shlex" version = "1.3.0" @@ -346,6 +388,12 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "urlencoding" version = "2.1.3" diff --git a/wasm-wrappers/fdw/openapi_fdw/Cargo.toml b/wasm-wrappers/fdw/openapi_fdw/Cargo.toml index 5d15b842..04ea08ac 100644 --- a/wasm-wrappers/fdw/openapi_fdw/Cargo.toml +++ b/wasm-wrappers/fdw/openapi_fdw/Cargo.toml @@ -12,6 +12,7 @@ crate-type = ["cdylib"] wit-bindgen-rt = "0.41.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +serde_yaml_ng = "0.10" urlencoding = "2.1" [package.metadata.component] diff --git a/wasm-wrappers/fdw/openapi_fdw/README.md b/wasm-wrappers/fdw/openapi_fdw/README.md index 51d143bc..86cb444e 100644 --- a/wasm-wrappers/fdw/openapi_fdw/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/README.md @@ -8,7 +8,7 @@ Point it at an OpenAPI spec and query the API with SQL. The FDW parses the spec ## Features -- **Automatic schema import** — Reads OpenAPI 3.0/3.1 specs (via `spec_url` or inline `spec_json`) and generates foreign tables with `IMPORT FOREIGN SCHEMA` +- **Automatic schema import** — Reads OpenAPI 3.0/3.1 specs in JSON or YAML (via `spec_url` or inline `spec_json`) and generates foreign tables with `IMPORT FOREIGN SCHEMA` - **Automatic pagination** — Detects and follows cursor-based, offset-based, and Link-header pagination across multiple pages - **Path parameter support** — Substitutes path parameters from WHERE clauses (e.g., `WHERE user_id = '123'` fills `/users/{user_id}/posts`) - **Query pushdown** — Forwards non-path WHERE clauses as query parameters to filter at the API level @@ -148,7 +148,7 @@ For queries you run frequently, a [materialized view](https://supabase.com/blog/ | Version | Date | Notes | | --- | --- | --- | -| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 518 unit tests, 5 real-world examples | +| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, YAML spec support, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 531 unit tests, 5 real-world examples | | 0.1.4 | 2026-02-09 | Type coercion, auth validation, table naming, URL fixes | | 0.1.3 | 2026-02-06 | Avoid cloning JSON response data | | 0.1.2 | 2026-02-01 | Fix query param filtering | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md index 02f88397..fad3268e 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md @@ -17,7 +17,36 @@ create server carapi --- -## 1. Makes +## 1. Quick Start with IMPORT FOREIGN SCHEMA + +The `carapi_import` server has a `spec_url` pointing to the CarAPI OpenAPI spec, so tables can be auto-generated: + +```sql +CREATE SCHEMA IF NOT EXISTS carapi_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER carapi_import +INTO carapi_auto; +``` + +See what was generated: + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'carapi_auto'; +``` + +Pick a generated table and query it: + +```sql +SELECT * FROM carapi_auto.makes_v2 LIMIT 3; +``` + +The rest of this example uses manually defined tables to demonstrate specific features (query pushdown, type coercion, debug mode, etc.). + +--- + +## 2. Makes Fetches all car manufacturers. Demonstrates **page-based pagination** with auto-detected `data` wrapper key. The CarAPI wraps responses in `{"collection": {...}, "data": [...]}` and the FDW auto-detects the `data` key. @@ -48,7 +77,7 @@ LIMIT 5; | 2 | Audi | | 25 | Bentley | -## 2. Models +## 3. Models Car models filtered by make and year. Demonstrates **query parameter pushdown** — the WHERE clause values are sent as query parameters to the API, so only matching data is returned. @@ -83,7 +112,7 @@ LIMIT 5; | 7308 | C-HR | Toyota | | 4779 | Camry | Toyota | -## 3. Trims +## 4. Trims Trim levels with MSRP pricing. Combines query pushdown (year, make, model) with integer type coercion for pricing fields. @@ -141,7 +170,7 @@ WHERE year = '2020' AND make = 'Honda' AND model = 'Civic' LIMIT 3; ``` -## 4. Bodies +## 5. Bodies Vehicle body dimensions. Demonstrates mixed types — integer for counts/weights, text for decimal measurements. @@ -195,7 +224,7 @@ WHERE year = '2020' AND make = 'Toyota' AND model = 'RAV4' LIMIT 3; ``` -## 5. Engines +## 6. Engines Engine specifications and performance data. @@ -255,7 +284,7 @@ WHERE year = '2020' AND make = 'Ford' AND model = 'Mustang' LIMIT 3; ``` -## 6. Mileages +## 7. Mileages Fuel economy and range data (EPA ratings). @@ -306,7 +335,7 @@ WHERE year = '2020' AND make = 'Honda' AND model = 'Accord' LIMIT 3; ``` -## 7. Exterior Colors +## 8. Exterior Colors Paint colors with RGB values. @@ -344,7 +373,7 @@ LIMIT 5; | Galactic Aqua Mica | 37,54,65 | | Midnight Black Metallic | 23,23,23 | -## 8. OBD Codes +## 9. OBD Codes OBD-II diagnostic trouble codes. A small dataset available on the free tier. @@ -372,7 +401,7 @@ LIMIT 5; | P0100 | Mass or Volume Air Flow Sensor A Circuit | | U1000 | Manufacturer Controlled DTC | -## 9. Debug Mode +## 10. Debug Mode The `makes_debug` table uses the `carapi_debug` server which has `debug 'true'`. This emits HTTP request details and scan statistics as PostgreSQL INFO messages. @@ -387,7 +416,7 @@ INFO: [openapi_fdw] HTTP GET https://carapi.app/api/makes/v2 -> 200 (1404 bytes INFO: [openapi_fdw] Scan complete: 1 rows, 1 columns ``` -## 10. The `attrs` Column +## 11. The `attrs` Column Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column upfront. @@ -401,6 +430,7 @@ LIMIT 1; | Feature | Table(s) | | --- | --- | +| IMPORT FOREIGN SCHEMA | `carapi_import` server | | Page-based pagination (auto-followed) | `makes`, `models`, `trims`, `bodies`, `engines`, `mileages`, `exterior_colors` | | Auto-detected `data` wrapper key | All tables | | Query parameter pushdown | `models`, `trims`, `bodies`, `engines`, `mileages`, `exterior_colors` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql index 557b98c7..13f2070a 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql +++ b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/init.sql @@ -49,6 +49,19 @@ create server carapi_debug debug 'true' ); +-- ============================================================ +-- Server 3: carapi_import — With spec_url for IMPORT FOREIGN SCHEMA +-- ============================================================ +create server carapi_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://carapi.app/api', + spec_url 'https://carapi.app/swagger.json' + ); + -- ============================================================ -- Table 1: makes -- All car manufacturers (paginated) diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md index b677faff..de93dd12 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md @@ -23,7 +23,38 @@ create server github --- -## 1. Your Profile +## 1. Quick Start with IMPORT FOREIGN SCHEMA + +The `github_import` server has a `spec_url` pointing to the GitHub REST API OpenAPI spec, so tables can be auto-generated: + +> **Note:** The GitHub OpenAPI spec is large (~15 MB). The initial import may take a few seconds to fetch and parse. + +```sql +CREATE SCHEMA IF NOT EXISTS github_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER github_import +INTO github_auto; +``` + +See what was generated: + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'github_auto'; +``` + +Pick a generated table and query it: + +```sql +SELECT * FROM github_auto.user LIMIT 1; +``` + +The rest of this example uses manually defined tables to demonstrate specific features (path parameters, query pushdown, custom headers, etc.). + +--- + +## 2. Your Profile Single object response. The FDW returns one row with your GitHub profile info. @@ -71,7 +102,7 @@ SELECT login, name, email, bio, company, location, blog, FROM my_profile; ``` -## 2. Your Repositories +## 3. Your Repositories Paginated list of your repos. The FDW auto-detects page-based pagination via `Link` headers. @@ -141,7 +172,7 @@ FROM my_repos LIMIT 5; ``` -## 3. Repository Detail (Path Parameters) +## 4. Repository Detail (Path Parameters) Look up a specific repository. The `{owner}` and `{repo}` placeholders in the endpoint are replaced with values from your WHERE clause. @@ -194,7 +225,7 @@ FROM repo_detail WHERE owner = 'supabase' AND repo = 'wrappers'; ``` -## 4. Repository Issues +## 5. Repository Issues Issues for a repository. Two path parameters plus query pushdown for state filtering: @@ -261,7 +292,7 @@ WHERE owner = 'supabase' AND repo = 'wrappers' LIMIT 3; ``` -## 5. Pull Requests +## 6. Pull Requests Pull requests with state filtering via query pushdown: @@ -330,7 +361,7 @@ WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'open' LIMIT 5; ``` -## 6. Releases +## 7. Releases Paginated list of releases for a repository: @@ -385,7 +416,7 @@ WHERE owner = 'supabase' AND repo = 'wrappers' LIMIT 3; ``` -## 7. Search Repositories (Query Pushdown) +## 8. Search Repositories (Query Pushdown) When a WHERE clause references `q`, the FDW sends it as a query parameter to the `/search/repositories` endpoint. The FDW auto-detects the `items` wrapper key in the response. @@ -440,7 +471,7 @@ WHERE q = 'postgres foreign data wrapper rust' LIMIT 5; ``` -## 8. Debug Mode +## 9. Debug Mode The `search_repos_debug` table uses the `github_debug` server which has `debug 'true'`. This emits HTTP request details as PostgreSQL INFO messages. @@ -455,7 +486,7 @@ INFO: [openapi_fdw] HTTP GET https://api.github.com/search/repositories?per_pag INFO: [openapi_fdw] Scan complete: 1 rows, 2 columns ``` -## The `attrs` Column +## 10. The `attrs` Column Every table includes an `attrs jsonb` column that captures all fields not mapped to named columns: @@ -476,6 +507,7 @@ LIMIT 3; | Feature | Table(s) | | --- | --- | +| IMPORT FOREIGN SCHEMA | `github_import` server | | Bearer token auth (Authorization header) | All tables | | Custom HTTP headers (X-GitHub-Api-Version) | All tables | | Page-based pagination (auto-detected) | `my_repos`, `repo_issues`, `repo_pulls`, `repo_releases`, `search_repos` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql index cd56bcbd..81b05e26 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/init.sql @@ -1,5 +1,6 @@ -- OpenAPI FDW example: GitHub API --- Requires a GitHub personal access token (set GITHUB_TOKEN env var). +-- Requires a GitHub personal access token. Replace 'placeholder' in the server definitions +-- below with your token. -- See: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens -- Note: fdw_package_url uses file:// for local Docker testing. In production, use the -- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm @@ -62,6 +63,28 @@ create server github_debug debug 'true' ); +-- ============================================================ +-- Server 3: github_import — With spec_url for IMPORT FOREIGN SCHEMA +-- Note: The GitHub spec is large (~15 MB). The FDW's default +-- max_response_bytes (50 MiB) can handle it, but the initial +-- IMPORT may take a few seconds. +-- ============================================================ +create server github_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.github.com', + api_key 'placeholder', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/vnd.github+json', + headers '{"X-GitHub-Api-Version": "2022-11-28"}', + page_size '30', + page_size_param 'per_page', + spec_url 'https://raw.githubusercontent.com/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json' + ); + -- ============================================================ -- Table 1: my_profile -- Authenticated user's profile — GET /user diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md index b35be986..efc1b44f 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md @@ -19,7 +19,36 @@ create server nws --- -## 1. Weather Stations +## 1. Quick Start with IMPORT FOREIGN SCHEMA + +The `nws_import` server has a `spec_url` pointing to the NWS OpenAPI spec, so tables can be auto-generated: + +```sql +CREATE SCHEMA IF NOT EXISTS nws_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER nws_import +INTO nws_auto; +``` + +See what was generated: + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'nws_auto'; +``` + +Pick a generated table and query it: + +```sql +SELECT * FROM nws_auto.alerts LIMIT 3; +``` + +The rest of this example uses manually defined tables to demonstrate specific features (GeoJSON extraction, path parameters, custom pagination, etc.). + +--- + +## 2. Weather Stations Fetches the full list of US weather stations. Demonstrates **GeoJSON extraction** (`response_path` + `object_path`), **cursor-based pagination** (`cursor_path`), and **camelCase-to-snake_case** column matching (`stationIdentifier` → `station_identifier`). @@ -77,7 +106,7 @@ LIMIT 3; | 000PG | Southside Road | `{"value": 129.2352, "unitCode": "wmoUnit:m"}` | | 000SE | SCE South Hills Park | `{"value": 242.9256, "unitCode": "wmoUnit:m"}` | -## 2. Active Alerts +## 3. Active Alerts Different GeoJSON shape with **timestamptz coercion** for `onset` and `expires` columns. @@ -131,7 +160,7 @@ WHERE severity IN ('Severe', 'Extreme') LIMIT 10; ``` -## 3. Query Param Pushdown (severity filter) +## 4. Query Param Pushdown (severity filter) When a WHERE clause references a column that isn't a path parameter, the FDW sends it as a **query parameter** to the API. The NWS alerts endpoint supports a `severity` filter — and because it echoes `severity` back in every response object, the column is populated naturally: @@ -151,7 +180,7 @@ LIMIT 3; Try other severity values: `Extreme`, `Moderate`, `Minor`, `Unknown`. -## 4. Station Observations +## 5. Station Observations **Path parameter substitution**: the `{station_id}` placeholder in the endpoint is replaced with the value from your WHERE clause. @@ -207,7 +236,7 @@ LIMIT 3; | 2026-02-14 03:40:00+00 | 7 | 25.92 | Cloudy | | 2026-02-14 03:35:00+00 | 8 | 25.92 | Cloudy | -## 5. Current Conditions +## 6. Current Conditions **Single object response** — the `/observations/latest` endpoint returns one GeoJSON Feature (not a FeatureCollection). The FDW auto-detects this and returns a single row. @@ -244,7 +273,7 @@ WHERE station_id = 'KDEN'; | --- | --- | --- | --- | --- | --- | | Cloudy | 7 | 24.084 | 310 | | 65.63 | -## 6. Point Metadata & Forecast +## 7. Point Metadata & Forecast This two-step flow demonstrates **composite path parameters** and **nested response extraction**. @@ -344,31 +373,6 @@ WHERE wfo = 'BOU' AND x = '63' AND y = '62'; > Grid coordinates vary by location. Always use Step 1 to find the right values for your area. -## 7. IMPORT FOREIGN SCHEMA - -Auto-generate table definitions from the NWS OpenAPI spec. The `nws_import` server has a `spec_url` configured. - -```sql -CREATE SCHEMA IF NOT EXISTS nws_auto; - -IMPORT FOREIGN SCHEMA "unused" -FROM SERVER nws_import -INTO nws_auto; -``` - -See what was generated: - -```sql -SELECT foreign_table_name FROM information_schema.foreign_tables -WHERE foreign_table_schema = 'nws_auto'; -``` - -Pick a generated table and query it: - -```sql -SELECT * FROM nws_auto.alerts LIMIT 3; -``` - ## 8. Debug Mode The `stations_debug` table uses the `nws_debug` server which has `debug 'true'`. This emits HTTP request details (method, URL, status, response size) and scan statistics (row/column counts) as PostgreSQL INFO messages. @@ -408,6 +412,7 @@ LIMIT 5; | Feature | Table(s) | | --- | --- | +| IMPORT FOREIGN SCHEMA | `nws_import` server | | GeoJSON extraction (`response_path` + `object_path`) | `stations`, `active_alerts`, `station_observations` | | Cursor-based pagination (`cursor_path`) | `stations` | | Path parameter substitution | `station_observations`, `latest_observation`, `point_metadata`, `forecast_periods` | @@ -416,7 +421,6 @@ LIMIT 5; | Custom headers (`user_agent`, `accept`) | All servers | | LIMIT pushdown | Any table with `LIMIT` | | Debug mode (`debug`) | `stations_debug` | -| IMPORT FOREIGN SCHEMA | `nws_import` server | | Single object response | `latest_observation`, `point_metadata` | | Type coercion (timestamptz, jsonb, boolean, integer) | `active_alerts`, `forecast_periods` | | `attrs` catch-all column | All tables | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md index f4d73c12..366544b7 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md @@ -19,7 +19,36 @@ create server pokeapi --- -## 1. Pokemon List +## 1. Quick Start with IMPORT FOREIGN SCHEMA + +The `pokeapi_import` server has a `spec_url` pointing to the PokeAPI OpenAPI spec (YAML format — the FDW parses both JSON and YAML), so tables can be auto-generated: + +```sql +CREATE SCHEMA IF NOT EXISTS pokeapi_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER pokeapi_import +INTO pokeapi_auto; +``` + +See what was generated: + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'pokeapi_auto'; +``` + +Pick a generated table and query it: + +```sql +SELECT * FROM pokeapi_auto.pokemon LIMIT 3; +``` + +The rest of this example uses manually defined tables to demonstrate specific features (path parameters, jsonb extraction, debug mode, etc.). + +--- + +## 2. Pokemon List Fetches the paginated list of all Pokemon (~1350 entries). Demonstrates **offset-based pagination** with auto-detected `results` wrapper key and `limit` page size parameter. The FDW automatically follows the `next` URL in each response to fetch subsequent pages. @@ -52,7 +81,7 @@ LIMIT 5; List endpoints only return `name` and `url` pairs. Use the detail table to get full data for a specific Pokemon. -## 2. Pokemon Detail +## 3. Pokemon Detail **Path parameter substitution**: the `{name}` placeholder in the endpoint is replaced with the value from your WHERE clause. Returns a single object with full Pokemon data. @@ -139,7 +168,7 @@ WHERE name IN ('charizard', 'blastoise'); Try other Pokemon: `bulbasaur`, `charizard`, `mewtwo`, `snorlax`, `gengar`. -## 3. Types List +## 4. Types List Fetches all Pokemon types. With only 21 types, this fits within a single page (page size is 20, so it takes two small fetches). @@ -185,7 +214,7 @@ FROM types; | unknown | | | shadow | | -## 4. Type Detail +## 5. Type Detail Detailed information about a single type, including **damage relations** (strengths and weaknesses) and a list of all Pokemon of that type. @@ -238,7 +267,7 @@ WHERE name = 'dragon'; Try other types: `water`, `electric`, `dragon`, `fairy`, `ghost`. -## 5. Berries List +## 6. Berries List Fetches all berries (64 items). Demonstrates pagination across multiple pages. @@ -269,7 +298,7 @@ LIMIT 5; | rawst | | | aspear | | -## 6. Berry Detail +## 7. Berry Detail Detailed information about a single berry, including growth data, flavors, and natural gift properties. @@ -334,7 +363,7 @@ WHERE name = 'sitrus'; Try other berries: `chesto`, `pecha`, `rawst`, `aspear`, `leppa`, `oran`, `sitrus`. -## 7. Debug Mode +## 8. Debug Mode The `pokemon_debug` table uses the `pokeapi_debug` server which has `debug 'true'`. This emits HTTP request details (method, URL, status, response size) and scan statistics as PostgreSQL INFO messages. @@ -351,7 +380,7 @@ INFO: [openapi_fdw] HTTP GET https://pokeapi.co/api/v2/pokemon?limit=20 -> 200 INFO: [openapi_fdw] Scan complete: 3 rows, 1 columns ``` -## 8. The `attrs` Column +## 9. The `attrs` Column Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column upfront. @@ -372,6 +401,8 @@ WHERE name = 'pikachu'; | Feature | Table(s) | | --- | --- | +| IMPORT FOREIGN SCHEMA | `pokeapi_import` server | +| YAML spec support | `pokeapi_import` server (spec is YAML, not JSON) | | Offset-based pagination (auto-followed `next` URL) | `pokemon`, `types`, `berries` | | Auto-detected `results` wrapper key | All list tables | | Path parameter substitution | `pokemon_detail`, `type_detail`, `berry_detail` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql index 41879850..c18eceb9 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql +++ b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/init.sql @@ -50,6 +50,21 @@ create server pokeapi_debug debug 'true' ); +-- ============================================================ +-- Server 3: pokeapi_import — With spec_url for IMPORT FOREIGN SCHEMA +-- ============================================================ +create server pokeapi_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'file:///openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://pokeapi.co/api/v2', + spec_url 'https://raw.githubusercontent.com/PokeAPI/pokeapi/master/openapi.yml', + page_size '20', + page_size_param 'limit' + ); + -- ============================================================ -- Table 1: pokemon -- Paginated list of all Pokémon (~1350 items) diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md index b59a957c..7b8f5430 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md @@ -2,6 +2,26 @@ Query the [Meta Threads API](https://developers.facebook.com/docs/threads) using SQL. This example demonstrates authenticated API access, cursor-based pagination, path parameter substitution, and query param pushdown. +> The Threads API does not publish a public OpenAPI spec. This example uses `spec_json` to provide an inline spec for `IMPORT FOREIGN SCHEMA`, and hand-written table definitions for endpoints that need path parameters or custom options. + +## Prerequisites + +You need a Meta Threads access token. To get one: + +1. Create or use an existing app at [developers.facebook.com](https://developers.facebook.com/apps/) +2. Add the **Threads API** product to your app +3. Enable the required permissions (see below) +4. Generate an access token from the [Threads API Tools](https://developers.facebook.com/docs/threads/get-started) page + +### Required Permissions + +| Permission | Needed for | +| --- | --- | +| `threads_basic` | All tables (profile, threads, replies, thread detail, profile lookup, publishing limit) | +| `threads_keyword_search` | `keyword_search` and `keyword_search_debug` tables (sections 8 and 11) | + +> Short-lived tokens expire after 1 hour. For longer sessions, [exchange for a long-lived token](https://developers.facebook.com/docs/threads/get-started/long-lived-tokens) (60 days). + ## Server Configuration ```sql @@ -20,7 +40,88 @@ create server threads --- -## 1. Your Profile +## 1. Inline Spec with IMPORT FOREIGN SCHEMA + +The Threads API has no public OpenAPI spec, so this example uses `spec_json` to embed a hand-written spec directly in the server definition. The FDW parses the inline JSON the same way it would a fetched `spec_url`, auto-generating `CREATE FOREIGN TABLE` statements with correct column names and types. + +This approach also works well for APIs that publish a spec that's too large, outdated, or inaccurate, or when you want a customized subset of endpoints. + +### Create a server with an inline spec + +Here's a minimal example with two endpoints (`/me` and `/profile_lookup`). The full `threads_import` server in [`init.sql`](init.sql) covers all 8 GET endpoints used by this example. + +```sql +create server threads_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url '...openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://graph.threads.net', + api_key '', + api_key_header 'access_token', + api_key_location 'query', + spec_json '{ + "openapi": "3.0.0", + "info": { "title": "Threads API", "version": "1.0.0" }, + "paths": { + "/me": { + "get": { + "responses": { "default": { "content": { "application/json": { "schema": { + "type": "object", + "properties": { + "id": { "type": "string" }, + "username": { "type": "string" }, + "name": { "type": "string" } + } + }}}}} + } + }, + "/profile_lookup": { + "get": { + "parameters": [{ "name": "username", "in": "query", "schema": { "type": "string" } }], + "responses": { "default": { "content": { "application/json": { "schema": { + "type": "object", + "properties": { + "username": { "type": "string" }, + "name": { "type": "string" }, + "follower_count": { "type": "integer" }, + "is_verified": { "type": "boolean" } + } + }}}}} + } + } + } + }' + ); +``` + +### Auto-generate tables from the inline spec + +```sql +CREATE SCHEMA IF NOT EXISTS threads_auto; + +IMPORT FOREIGN SCHEMA "unused" +FROM SERVER threads_import +INTO threads_auto; +``` + +### See what was generated + +```sql +SELECT foreign_table_name FROM information_schema.foreign_tables +WHERE foreign_table_schema = 'threads_auto'; +``` + +Endpoints with path parameters (`/{thread_id}/replies`, `/{thread_id}/conversation`) are skipped during import and need manual table definitions (see sections 5-7). + +--- + +The rest of this example uses manually defined tables to demonstrate specific features (path parameters, custom pagination, field selection via endpoint query strings, etc.). + +--- + +## 2. Your Profile Single object response. The FDW returns one row with your Threads profile info. @@ -52,7 +153,7 @@ FROM my_profile; > Your results will reflect your own Threads profile. -## 2. Your Threads +## 3. Your Threads Paginated list of your posts. The FDW auto-detects the `data` wrapper key and follows cursor-based pagination (`paging.cursors.after`). @@ -120,7 +221,7 @@ WHERE media_type = 'TEXT_POST' LIMIT 5; ``` -## 3. Your Replies +## 4. Your Replies Same pagination pattern as threads, filtered to your replies: @@ -165,7 +266,7 @@ FROM my_replies LIMIT 5; ``` -## 4. Thread Detail (Path Parameter) +## 5. Thread Detail (Path Parameter) Look up a specific thread by ID. The `{thread_id}` placeholder in the endpoint is replaced with the value from your WHERE clause. @@ -206,7 +307,7 @@ WHERE thread_id = ''; | --- | --- | --- | --- | | Your thread text... | TEXT_POST | 2026-02-12 04:46:47+00 | EVERYONE | -## 5. Thread Replies +## 6. Thread Replies Top-level replies to a specific thread. Requires `thread_id` path parameter: @@ -250,7 +351,7 @@ WHERE thread_id = '' LIMIT 10; ``` -## 6. Thread Conversation +## 7. Thread Conversation All replies at all depths, flattened into a single list: @@ -293,9 +394,9 @@ WHERE thread_id = '' LIMIT 20; ``` -## 7. Keyword Search (Query Param Pushdown) +## 8. Keyword Search (Query Param Pushdown) -When a WHERE clause references `q`, the FDW sends it as a query parameter to the `/keyword_search` endpoint. Requires the `threads_keyword_search` permission on your app. +When a WHERE clause references `q`, the FDW sends it as a query parameter to the `/keyword_search` endpoint (requires `threads_keyword_search` permission -- see [Prerequisites](#prerequisites)). ```sql create foreign table keyword_search ( @@ -343,9 +444,9 @@ WHERE q = 'threads' LIMIT 5; ``` -## 8. Profile Lookup +## 9. Profile Lookup -Look up any public profile by username. Requires the `threads_basic` permission. +Look up any public profile by username. ```sql create foreign table profile_lookup ( @@ -388,7 +489,7 @@ FROM profile_lookup WHERE username = 'threads'; ``` -## 9. Publishing Limit +## 10. Publishing Limit Check your current rate limit usage: @@ -415,7 +516,7 @@ FROM publishing_limit; | --- | --- | --- | --- | | 0 | `{"quota_total": 250, "quota_duration": 86400}` | 0 | `{"quota_total": 1000, "quota_duration": 86400}` | -## 10. Debug Mode +## 11. Debug Mode The `keyword_search_debug` table uses the `threads_debug` server which has `debug 'true'`. This emits HTTP request details as PostgreSQL INFO messages. @@ -430,35 +531,6 @@ INFO: [openapi_fdw] HTTP GET https://graph.threads.net/keyword_search?... -> 20 INFO: [openapi_fdw] Scan complete: 3 rows, 2 columns ``` -## 11. IMPORT FOREIGN SCHEMA (Inline `spec_json`) - -Meta's Threads API does not publish an official OpenAPI spec at a public URL. Instead of `spec_url`, this example uses `spec_json` to provide a hand-written spec directly in the server definition. The inline spec describes just the 8 GET endpoints used by this example. - -This approach also works well for APIs that: - -- Don't publish an OpenAPI spec at all (like Threads) -- Publish a spec that's too large, outdated, or inaccurate -- Need a customized subset of endpoints - -The FDW parses the inline JSON the same way it would a fetched spec, auto-generating `CREATE FOREIGN TABLE` statements with correct column names and types. Endpoints with path parameters (`/{thread_id}/replies`, `/{thread_id}/conversation`) are skipped — those need manual table definitions like the ones above. - -Auto-generate table definitions from the inline spec: - -```sql -CREATE SCHEMA IF NOT EXISTS threads_auto; - -IMPORT FOREIGN SCHEMA "unused" -FROM SERVER threads_import -INTO threads_auto; -``` - -See what was generated: - -```sql -SELECT foreign_table_name FROM information_schema.foreign_tables -WHERE foreign_table_schema = 'threads_auto'; -``` - ## 12. The `attrs` Column Every table includes an `attrs jsonb` column that captures all fields not mapped to named columns: @@ -480,6 +552,7 @@ LIMIT 3; | Feature | Table(s) | | --- | --- | +| IMPORT FOREIGN SCHEMA (inline `spec_json`) | `threads_import` server | | API key auth (query param) | All tables | | Cursor-based pagination (auto-detected) | `my_threads`, `my_replies`, `keyword_search` | | Path parameter substitution | `thread_detail`, `thread_replies`, `thread_conversation` | @@ -488,6 +561,5 @@ LIMIT 3; | Endpoint query string (field selection) | All tables except `profile_lookup` | | Type coercion (timestamptz, boolean, bigint) | `my_threads`, `profile_lookup` | | Debug mode | `keyword_search_debug` | -| IMPORT FOREIGN SCHEMA | `threads_import` server | | `attrs` catch-all column | All tables | | `rowid_column` | `my_threads`, `keyword_search`, `profile_lookup` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql b/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql index c7ddce61..8b185e95 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql +++ b/wasm-wrappers/fdw/openapi_fdw/examples/threads/init.sql @@ -1,5 +1,6 @@ -- OpenAPI FDW example: Threads API (Meta) --- Requires a Threads access token (set THREADS_ACCESS_TOKEN env var). +-- Requires a Threads access token. Replace 'placeholder' in the server definitions below +-- with your token, or see the README for how to obtain one. -- See: https://developers.facebook.com/docs/threads -- Note: fdw_package_url uses file:// for local Docker testing. In production, use the -- GitHub release URL: https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm diff --git a/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs b/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs index 4ac490f1..c515c930 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/column_matching.rs @@ -15,13 +15,13 @@ use crate::{OpenApiFdw, extract_effective_row}; /// How a SQL column name was resolved to a JSON key. /// -/// Avoids cloning strings that already exist in [`CachedColumn`] — only the +/// Avoids cloning strings that already exist in CachedColumn -- only the /// case-insensitive fallback (rare) needs its own allocation. #[derive(Debug, Clone, PartialEq)] pub(crate) enum KeyMatch { - /// JSON key matches `CachedColumn::name` exactly + /// JSON key matches CachedColumn::name exactly Exact, - /// JSON key matches `CachedColumn::camel_name` + /// JSON key matches CachedColumn::camel_name CamelCase, /// JSON key matched case-insensitively (stores the original API key) CaseInsensitive(String), @@ -29,9 +29,9 @@ pub(crate) enum KeyMatch { /// Pre-computed column metadata to avoid repeated WASM boundary crossings. /// -/// During `iter_scan`, each call to `ctx.get_columns()`, `col.name()`, and -/// `col.type_oid()` crosses the WASM boundary. By caching these once in -/// `begin_scan`, we eliminate ~2000 boundary crossings per 100-row scan. +/// During iter_scan, each call to ctx.get_columns(), col.name(), and +/// col.type_oid() crosses the WASM boundary. By caching these once in +/// begin_scan, we eliminate ~2000 boundary crossings per 100-row scan. #[derive(Debug)] pub(crate) struct CachedColumn { pub name: String, @@ -39,11 +39,11 @@ pub(crate) struct CachedColumn { pub camel_name: String, pub lower_name: String, /// Alphanumeric-only lowercase name for normalized matching. - /// Strips `@`, `.`, `-`, `$`, etc. so `@id` → `_id` → `id` can match. + /// Strips @, ., -, $ etc. so @id / _id / id can match. pub alnum_name: String, } -/// Convert `snake_case` to `camelCase` +/// Convert snake_case to camelCase pub(crate) fn to_camel_case(s: &str) -> String { let mut result = String::with_capacity(s.len()); let mut capitalize_next = false; @@ -64,8 +64,8 @@ pub(crate) fn to_camel_case(s: &str) -> String { /// Strip non-alphanumeric chars and lowercase for normalized matching. /// -/// Used to match JSON keys with special characters (`@id`, `user.name`, `$oid`) -/// to sanitized SQL column names (`_id`, `user_name`, `_oid`). +/// Used to match JSON keys with special characters (@id, user.name, $oid) +/// to sanitized SQL column names (_id, user_name, _oid). pub(crate) fn normalize_to_alnum(s: &str) -> String { s.chars() .filter(|c| c.is_alphanumeric()) @@ -77,10 +77,10 @@ impl OpenApiFdw { /// Normalize a date/datetime string for RFC3339 parsing. /// /// Handles two non-RFC3339 formats: - /// - Date-only `"2024-01-15"` → `"2024-01-15T00:00:00Z"` - /// - ISO 8601 tz without colon `"2024-01-15T12:00:00+0000"` → `"2024-01-15T12:00:00+00:00"` + /// - Date-only "2024-01-15" becomes "2024-01-15T00:00:00Z" + /// - ISO 8601 tz without colon "2024-01-15T12:00:00+0000" becomes "2024-01-15T12:00:00+00:00" /// - /// Returns `Cow` to avoid allocating when the string is already valid. + /// Returns Cow to avoid allocating when the string is already valid. pub(crate) fn normalize_datetime(s: &str) -> Cow<'_, str> { // Date-only: exactly 10 chars matching YYYY-MM-DD pattern if s.len() == 10 && s.as_bytes().get(4) == Some(&b'-') && s.as_bytes().get(7) == Some(&b'-') @@ -110,8 +110,12 @@ impl OpenApiFdw { /// Build a map from column index to resolved JSON key, using the first row's keys. /// - /// This runs the 3-step matching (exact → camelCase → case-insensitive) once per - /// column instead of once per column per row. Called after each `make_request`. + /// Runs the 3-step matching (exact, camelCase, case-insensitive) once per column + /// instead of once per column per row. Called after each make_request. + /// + /// Only the first row of each page is probed. This works because most APIs return + /// rows with the same key shape. If a later row has different keys, unmatched + /// columns fall back to an O(n) scan in json_to_cell_cached (correct but slower). pub(crate) fn build_column_key_map(&mut self) { if self.cached_columns.is_empty() || self.src_rows.is_empty() { self.column_key_map = vec![None; self.cached_columns.len()]; @@ -249,8 +253,8 @@ impl OpenApiFdw { /// Convert a JSON value to a Cell using cached column metadata and pre-resolved key map. /// - /// Uses `CachedColumn` fields instead of WASM resource methods, and the pre-built - /// `column_key_map` for O(1) JSON key lookup instead of per-row 3-step matching. + /// Uses CachedColumn fields instead of WASM resource methods, and the pre-built + /// column_key_map for O(1) JSON key lookup instead of per-row 3-step matching. pub(crate) fn json_to_cell_cached( &self, src_row: &JsonValue, diff --git a/wasm-wrappers/fdw/openapi_fdw/src/config.rs b/wasm-wrappers/fdw/openapi_fdw/src/config.rs index e8727639..0a40cf09 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/config.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/config.rs @@ -12,10 +12,10 @@ pub(crate) const DEFAULT_MAX_RESPONSE_BYTES: usize = 50 * 1024 * 1024; // 50 MiB /// Server-level configuration. /// -/// Fields are set once in `init()` from server options. A few fields -/// (`page_size`, `page_size_param`, `cursor_param`) can be overridden -/// per-table in `begin_scan`; call `save_pagination_defaults()` after -/// init and `restore_pagination_defaults()` at the start of each scan. +/// Fields are set once in init() from server options. A few fields +/// (page_size, page_size_param, cursor_param) can be overridden +/// per-table in begin_scan; call save_pagination_defaults() after +/// init and restore_pagination_defaults() at the start of each scan. pub(crate) struct ServerConfig { pub(crate) base_url: String, pub(crate) headers: Vec<(String, String)>, @@ -94,7 +94,7 @@ impl Default for ServerConfig { impl ServerConfig { /// Snapshot the current pagination fields as server-level defaults. /// - /// Call once at the end of `init()`, after server options are parsed. + /// Call once at the end of init(), after server options are parsed. pub(crate) fn save_pagination_defaults(&mut self) { self.default_page_size = self.page_size; self.default_page_size_param @@ -104,7 +104,7 @@ impl ServerConfig { /// Restore pagination fields to server-level defaults. /// - /// Call at the start of each `begin_scan()`, before applying table-level overrides. + /// Call at the start of each begin_scan(), before applying table-level overrides. pub(crate) fn restore_pagination_defaults(&mut self) { self.page_size = self.default_page_size; self.page_size_param @@ -123,7 +123,7 @@ impl ServerConfig { /// Apply header configuration from extracted option values. /// - /// Separated from `configure_headers` for testability (Options is a WASM resource). + /// Separated from configure_headers for testability (Options is a WASM resource). pub(crate) fn apply_headers( &mut self, user_agent: Option, @@ -146,7 +146,14 @@ impl ServerConfig { .map_err(|e| format!("Invalid JSON for 'headers' option: {e}"))?; for (key, value) in headers { if let Some(v) = value.as_str() { - self.headers.push((key.to_lowercase(), v.to_string())); + let key_lower = key.to_lowercase(); + // Replace existing header with same name to avoid duplicates + // (e.g., custom content-type overrides the default) + if let Some(existing) = self.headers.iter_mut().find(|h| h.0 == key_lower) { + existing.1 = v.to_string(); + } else { + self.headers.push((key_lower, v.to_string())); + } } else { return Err(format!( "Invalid non-string value for header '{key}' in 'headers' option" @@ -207,7 +214,7 @@ impl ServerConfig { /// Apply authentication configuration from extracted option values. /// - /// Separated from `configure_auth` for testability (Options is a WASM resource). + /// Separated from configure_auth for testability (Options is a WASM resource). pub(crate) fn apply_auth( &mut self, api_key: Option, @@ -216,6 +223,12 @@ impl ServerConfig { api_key_header: &str, api_key_prefix: Option, ) -> FdwResult { + // Filter out empty/whitespace-only credentials (likely vault misconfiguration). + // The warning is emitted upstream by configure_auth; here we just skip them + // to avoid sending meaningless auth headers (e.g., "Bearer "). + let api_key = api_key.filter(|k| !k.trim().is_empty()); + let bearer_token = bearer_token.filter(|t| !t.trim().is_empty()); + // Enforce mutual exclusivity — both would emit duplicate auth headers if api_key.is_some() && bearer_token.is_some() { return Err( diff --git a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs index 607f0adf..9cb024da 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs @@ -312,6 +312,40 @@ fn test_apply_headers_all_options_combined() { ); } +// --- apply_headers: Deduplication --- + +#[test] +fn test_apply_headers_custom_content_type_replaces_default() { + let mut config = ServerConfig::default(); + config + .apply_headers( + None, + None, + Some(r#"{"Content-Type": "text/xml"}"#.to_string()), + ) + .unwrap(); + // Custom content-type should replace the default, not add a duplicate + assert_eq!(config.headers.len(), 1); + assert_eq!(config.headers[0].0, "content-type"); + assert_eq!(config.headers[0].1, "text/xml"); +} + +#[test] +fn test_apply_headers_custom_header_does_not_duplicate() { + let mut config = ServerConfig::default(); + config + .apply_headers( + Some("MyApp/1.0".to_string()), + None, + Some(r#"{"User-Agent": "CustomBot/2.0"}"#.to_string()), + ) + .unwrap(); + // Custom user-agent should replace the one set via option, not duplicate + assert_eq!(config.headers.len(), 2); // content-type + user-agent + let ua = config.headers.iter().find(|h| h.0 == "user-agent").unwrap(); + assert_eq!(ua.1, "CustomBot/2.0"); +} + // --- apply_auth: API key as header (default) --- #[test] @@ -503,21 +537,54 @@ fn test_auth_no_credentials() { // --- apply_auth: Edge cases --- #[test] -fn test_auth_api_key_empty_string() { +fn test_auth_api_key_empty_string_skipped() { let mut config = ServerConfig::default(); config .apply_auth(Some(String::new()), None, "header", "Authorization", None) .unwrap(); - assert_eq!(config.headers[0].1, "Bearer "); + // Empty api_key is filtered out — no auth header should be added + assert!(config.headers.is_empty()); } #[test] -fn test_auth_bearer_token_empty_string() { +fn test_auth_bearer_token_empty_string_skipped() { let mut config = ServerConfig::default(); config .apply_auth(None, Some(String::new()), "header", "Authorization", None) .unwrap(); - assert_eq!(config.headers[0].1, "Bearer "); + // Empty bearer_token is filtered out — no auth header should be added + assert!(config.headers.is_empty()); +} + +#[test] +fn test_auth_api_key_whitespace_only_skipped() { + let mut config = ServerConfig::default(); + config + .apply_auth( + Some(" ".to_string()), + None, + "header", + "Authorization", + None, + ) + .unwrap(); + assert!(config.headers.is_empty()); +} + +#[test] +fn test_auth_both_empty_no_mutual_exclusivity_error() { + // When both credentials are empty, they're filtered out, so no mutual exclusivity error + let mut config = ServerConfig::default(); + config + .apply_auth( + Some(String::new()), + Some(String::new()), + "header", + "Authorization", + None, + ) + .unwrap(); + assert!(config.headers.is_empty()); } #[test] diff --git a/wasm-wrappers/fdw/openapi_fdw/src/lib.rs b/wasm-wrappers/fdw/openapi_fdw/src/lib.rs index ba18629a..43de58fd 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/lib.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/lib.rs @@ -1,7 +1,7 @@ -//! `OpenAPI` Foreign Data Wrapper +//! OpenAPI Foreign Data Wrapper //! -//! A generic Wasm FDW that dynamically parses `OpenAPI` 3.0+ specifications -//! and exposes API endpoints as `PostgreSQL` foreign tables. +//! A generic Wasm FDW that dynamically parses OpenAPI 3.0+ specifications +//! and exposes API endpoints as PostgreSQL foreign tables. // Allow usize->i64 casts for stats (expected to fit on 64-bit systems) #![allow(clippy::cast_possible_wrap)] @@ -37,7 +37,7 @@ use pagination::PaginationState; use schema::generate_all_tables; use spec::OpenApiSpec; -/// The `OpenAPI` FDW state +/// The OpenAPI FDW state #[derive(Debug)] struct OpenApiFdw { // Server-level configuration (set once in init, some overridden per table) @@ -107,10 +107,10 @@ impl Default for OpenApiFdw { /// /// # Safety /// -/// This `static mut` is safe because Wasm execution is single-threaded: +/// This static mut is safe because Wasm execution is single-threaded: /// - No concurrent access is possible (no data races) -/// - Initialized once in `init()` before any scan/modify methods are called -/// - All access goes through `this_mut()` which returns exclusive `&mut` reference +/// - Initialized once in init() before any scan/modify methods are called +/// - All access goes through this_mut() which returns exclusive &mut reference static mut INSTANCE: *mut OpenApiFdw = std::ptr::null_mut::(); static FDW_NAME: &str = "OpenApiFdw"; @@ -120,7 +120,7 @@ const DEFAULT_PAGE_SIZE_PARAM: &str = "limit"; const DEFAULT_CURSOR_PARAM: &str = "after"; const DEFAULT_ROWID_COLUMN: &str = "id"; -/// Validate that a URL starts with `http://` or `https://`. +/// Validate that a URL starts with http:// or https://. fn validate_url(url: &str, field_name: &str) -> Result<(), String> { if !url.starts_with("http://") && !url.starts_with("https://") { return Err(format!( @@ -130,14 +130,14 @@ fn validate_url(url: &str, field_name: &str) -> Result<(), String> { Ok(()) } -/// Parse a string option value as `usize`, returning a descriptive error. +/// Parse a string option value as usize, returning a descriptive error. fn parse_usize_option(value: &str, field_name: &str) -> Result { value .parse() .map_err(|_| format!("Invalid value for '{field_name}': '{value}'")) } -/// Parse an optional string as a boolean flag (`"true"` or `"1"` → true). +/// Parse an optional string as a boolean flag ("true" or "1" means true). fn parse_bool_flag(value: Option<&str>) -> bool { value.is_some_and(|v| v == "true" || v == "1") } @@ -149,8 +149,8 @@ fn should_stop_scanning(consumed: i64, limit: Option) -> bool { /// Extract the effective row from a JSON value, optionally dereferencing an object path. /// -/// Used in `iter_scan` and `build_column_key_map` to apply `object_path` -/// (e.g., `"/properties"` for GeoJSON) to each row before column matching. +/// Used in iter_scan and build_column_key_map to apply object_path +/// (e.g., "/properties" for GeoJSON) to each row before column matching. pub(crate) fn extract_effective_row<'a>( row: &'a JsonValue, object_path: Option<&str>, @@ -174,7 +174,7 @@ impl OpenApiFdw { // methods are called. Wasm is single-threaded, so only one &mut // reference exists at a time (no aliasing). unsafe { - debug_assert!(!INSTANCE.is_null(), "OpenApiFdw not initialized"); + assert!(!INSTANCE.is_null(), "OpenApiFdw not initialized"); &mut (*INSTANCE) } } @@ -213,10 +213,10 @@ impl Guest for OpenApiFdw { } // Whether to include an 'attrs' jsonb column in IMPORT FOREIGN SCHEMA output - this.config.include_attrs = opts + // Default is true; only "false" or "0" explicitly disables it + this.config.include_attrs = !opts .get("include_attrs") - .map(|v| v != "false") - .unwrap_or(true); + .is_some_and(|v| v == "false" || v == "0"); // Validate spec_url format if provided if let Some(ref spec_url) = this.config.spec_url { @@ -237,7 +237,11 @@ impl Guest for OpenApiFdw { // Maximum pages per scan (default 1000, prevents infinite pagination loops) if let Some(s) = opts.get("max_pages") { - this.config.max_pages = parse_usize_option(&s, "max_pages")?; + let val = parse_usize_option(&s, "max_pages")?; + if val == 0 { + return Err("max_pages must be at least 1".to_string()); + } + this.config.max_pages = val; } // Maximum response body size (default 50 MiB) @@ -343,7 +347,6 @@ impl Guest for OpenApiFdw { // Check if we need to fetch more data if this.src_idx >= this.src_rows.len() { stats::inc_stats(FDW_NAME, stats::Metric::RowsIn, this.src_rows.len() as i64); - stats::inc_stats(FDW_NAME, stats::Metric::RowsOut, this.src_rows.len() as i64); // No more pages to fetch if this.pagination.is_exhausted() { @@ -389,6 +392,7 @@ impl Guest for OpenApiFdw { this.src_idx += 1; this.consumed_row_cnt += 1; + stats::inc_stats(FDW_NAME, stats::Metric::RowsOut, 1); if this.config.debug { this.scan_row_count += 1; } diff --git a/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs b/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs index f212197e..ecbfd045 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/pagination.rs @@ -3,14 +3,14 @@ /// A pagination token: either a cursor string or a full/partial URL. #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum PaginationToken { - /// Token-based pagination (e.g., Stripe `next_cursor`) + /// Token-based pagination (e.g., Stripe next_cursor) Cursor(String), - /// Link-based pagination (e.g., GitHub `Link` header, HAL `_links`) + /// Link-based pagination (e.g., GitHub Link header, HAL _links) Url(String), } impl PaginationToken { - /// Returns the inner cursor string, or `None` if this is a URL. + /// Returns the inner cursor string, or None if this is a URL. pub(crate) fn as_cursor(&self) -> Option<&str> { match self { Self::Cursor(s) => Some(s), @@ -18,7 +18,7 @@ impl PaginationToken { } } - /// Returns the inner URL string, or `None` if this is a cursor. + /// Returns the inner URL string, or None if this is a cursor. pub(crate) fn as_url(&self) -> Option<&str> { match self { Self::Url(s) => Some(s), @@ -48,7 +48,7 @@ impl PaginationState { self.pages_fetched = 0; } - /// Returns `true` when there are no more pages to fetch. + /// Returns true when there are no more pages to fetch. pub(crate) fn is_exhausted(&self) -> bool { self.next.is_none() } @@ -68,7 +68,7 @@ impl PaginationState { } } - /// Returns `true` if the page limit has been reached. + /// Returns true if the page limit has been reached. pub(crate) fn exceeds_limit(&self, max_pages: usize) -> bool { self.pages_fetched >= max_pages } @@ -81,11 +81,11 @@ impl PaginationState { self.pages_fetched += 1; } - /// Record the first page after initial `make_request` in `begin_scan`. + /// Record the first page after initial make_request in begin_scan. /// - /// Only sets `pages_fetched = 1`. Does NOT copy `next` into `previous` — - /// there was no token sent for the first page, so `previous` must stay - /// `None` to avoid a false-positive loop detection. + /// Only sets pages_fetched = 1. Does NOT copy next into previous -- + /// there was no token sent for the first page, so previous must stay + /// None to avoid a false-positive loop detection. pub(crate) fn record_first_page(&mut self) { self.pages_fetched = 1; } diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request.rs b/wasm-wrappers/fdw/openapi_fdw/src/request.rs index f6730bd1..ba353b8a 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/request.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/request.rs @@ -7,14 +7,26 @@ use serde_json::Value as JsonValue; use crate::bindings::supabase::wrappers::{ http, stats, time, types::{Cell, Context, FdwError, FdwResult, Value}, + utils, }; use crate::spec::OpenApiSpec; use crate::{FDW_NAME, OpenApiFdw}; const RETRY_AFTER_HEADER: &str = "retry-after"; +pub(crate) const MAX_RETRY_DELAY_MS: u64 = 30_000; + +/// Compute retry delay from a Retry-After header value (in seconds), capped to max_delay_ms. +pub(crate) fn retry_delay_from_header(secs: u64, max_delay_ms: u64) -> u64 { + secs.saturating_mul(1000).min(max_delay_ms) +} + +/// Compute exponential backoff delay for a retry attempt, capped to max_delay_ms. +pub(crate) fn exponential_backoff_delay(retry_count: u32, max_delay_ms: u64) -> u64 { + 1000u64.saturating_mul(1 << retry_count).min(max_delay_ms) +} /// Extract the origin (scheme://authority) from a URL for same-origin comparison. -/// Returns everything up to (but not including) the first `/` after `://`. +/// Returns everything up to (but not including) the first / after ://. fn extract_origin(url: &str) -> &str { if let Some(scheme_end) = url.find("://") { let rest = &url[scheme_end + 3..]; @@ -29,7 +41,7 @@ fn extract_origin(url: &str) -> &str { } /// Redact a query parameter value from a URL for safe logging. -/// Replaces the value of the named parameter with `[REDACTED]`. +/// Replaces the value of the named parameter with [REDACTED]. fn redact_query_param(url: &str, param_name: &str) -> String { let encoded_prefix = format!("{}=", urlencoding::encode(param_name)); if let Some(start) = url.find(&encoded_prefix) { @@ -44,7 +56,7 @@ fn redact_query_param(url: &str, param_name: &str) -> String { } impl OpenApiFdw { - /// Fetch and parse the `OpenAPI` spec + /// Fetch and parse the OpenAPI spec pub(crate) fn fetch_spec(&mut self) -> Result<(), FdwError> { if let Some(ref url) = self.config.spec_url { let req = http::Request { @@ -68,14 +80,25 @@ impl OpenApiFdw { )); } - let spec_json: JsonValue = - serde_json::from_str(&resp.body).map_err(|e| e.to_string())?; + // Try JSON first, fall back to YAML (many OpenAPI specs are published as YAML) + let spec_json: JsonValue = match serde_json::from_str(&resp.body) { + Ok(v) => v, + Err(json_err) => { + serde_yaml_ng::from_str::(&resp.body).map_err(|yaml_err| { + format!( + "Failed to parse OpenAPI spec as JSON ({json_err}) \ + or YAML ({yaml_err})" + ) + })? + } + }; let spec = OpenApiSpec::from_json(spec_json)?; // Use base_url from spec if not explicitly set if self.config.base_url.is_empty() { if let Some(url) = spec.base_url() { self.config.base_url = url.trim_end_matches('/').to_string(); + crate::validate_url(&self.config.base_url, "base_url (from spec servers)")?; } } @@ -98,6 +121,7 @@ impl OpenApiFdw { if self.config.base_url.is_empty() { if let Some(url) = spec.base_url() { self.config.base_url = url.trim_end_matches('/').to_string(); + crate::validate_url(&self.config.base_url, "base_url (from spec servers)")?; } } @@ -116,11 +140,14 @@ impl OpenApiFdw { if let Value::Cell(cell) = qual.value() { match cell { Cell::String(s) => Some(s), + Cell::I8(n) => Some(n.to_string()), + Cell::I16(n) => Some(n.to_string()), Cell::I32(n) => Some(n.to_string()), Cell::I64(n) => Some(n.to_string()), Cell::F32(n) => Some(n.to_string()), Cell::F64(n) => Some(n.to_string()), Cell::Bool(b) => Some(b.to_string()), + Cell::Uuid(u) => Some(u), _ => None, } } else { @@ -130,15 +157,14 @@ impl OpenApiFdw { /// Resolve a relative or absolute pagination URL against the base URL and endpoint. /// - /// Handles four forms of `next_url`: - /// - Absolute URLs (`http://...`, `https://...`) → validated against `base_url` origin - /// - Query-only (`?page=2`) → resolves against `base_url + endpoint` - /// - Absolute paths (`/items?page=2`) → resolves against `base_url` - /// - Bare relative paths (`page/2`) → resolves against `base_url/` + /// Handles four forms of next_url: + /// - Absolute URLs (http://..., https://...) -- validated against base_url origin + /// - Query-only (?page=2) -- resolves against base_url + endpoint + /// - Absolute paths (/items?page=2) -- resolves against base_url + /// - Bare relative paths (page/2) -- resolves against base_url/ /// - /// # Errors /// Returns an error if an absolute pagination URL points to a different origin - /// than `base_url`, which would leak authentication credentials to a third party. + /// than base_url, which would leak authentication credentials to a third party. pub(crate) fn resolve_pagination_url(&self, next_url: &str) -> Result { if next_url.starts_with("http://") || next_url.starts_with("https://") { let next_origin = extract_origin(next_url); @@ -165,7 +191,7 @@ impl OpenApiFdw { /// Substitute path parameters in endpoint template from quals. /// - /// Writes substituted values into `injected` so they can be re-injected + /// Writes substituted values into injected so they can be re-injected /// into result rows (ensuring PostgreSQL's post-filter passes). /// /// Returns (resolved_endpoint, path_params_used) where path_params_used @@ -219,7 +245,9 @@ impl OpenApiFdw { &endpoint[start + end + 1..] ); } else { - // Track missing parameter and remove it from the endpoint to continue + // Track missing parameter and remove the {param} placeholder to continue + // parsing. This is safe because OpenAPI path params are always separated + // by '/' (e.g., /{a}/{b}), so removing one doesn't mangle the next. missing_params.push(param_name.to_string()); endpoint = format!("{}{}", &endpoint[..start], &endpoint[start + end + 1..]); } @@ -248,7 +276,7 @@ impl OpenApiFdw { /// Build query parameters from pagination state, quals, and API key. /// /// Returns (url_params, injected_entries) where injected_entries are - /// qual values to merge into `self.injected_params` for row injection. + /// qual values to merge into self.injected_params for row injection. /// Excludes path parameters and rowid column. pub(crate) fn build_query_params( &self, @@ -321,12 +349,12 @@ impl OpenApiFdw { /// Build the URL for a request, handling path parameters and pagination. /// - /// Updates `self.injected_params` in place (avoids cloning on pagination). + /// Updates self.injected_params in place (avoids cloning on pagination). /// /// Supports endpoint templates like: - /// - `/users/{user_id}/posts` - /// - `/projects/{org}/{repo}/issues` - /// - `/resources/{type}/{id}` + /// - /users/{user_id}/posts + /// - /projects/{org}/{repo}/issues + /// - /resources/{type}/{id} /// /// Path parameters are substituted from WHERE clause quals. /// @@ -400,10 +428,9 @@ impl OpenApiFdw { body: self.request_body.clone(), }; - // Retry loop for rate limiting (HTTP 429) + // Retry loop for transient errors (HTTP 429 rate limit, 502/503 server errors) let mut retry_count = 0; const MAX_RETRIES: u32 = 3; - const MAX_RETRY_DELAY_MS: u64 = 30_000; let resp = loop { let resp = match req.method { @@ -411,12 +438,17 @@ impl OpenApiFdw { _ => http::get(&req)?, }; - // Handle rate limiting (HTTP 429) - if resp.status_code == 429 { + let is_retryable = matches!(resp.status_code, 429 | 502 | 503); + if is_retryable { if retry_count >= MAX_RETRIES { + let hint = if resp.status_code == 429 { + " Consider adding a page_size option to reduce request frequency." + } else { + "" + }; return Err(format!( - "API rate limit exceeded after {MAX_RETRIES} retries. \ - Consider adding a page_size option to reduce request frequency." + "API request failed with HTTP {} after {MAX_RETRIES} retries.{hint}", + resp.status_code )); } @@ -427,12 +459,8 @@ impl OpenApiFdw { .iter() .find(|h| h.0.to_lowercase() == RETRY_AFTER_HEADER) .and_then(|h| h.1.parse::().ok()) - .map(|secs| secs.saturating_mul(1000).min(MAX_RETRY_DELAY_MS)) - .unwrap_or_else(|| { - // Exponential backoff: 1s, 2s, 4s (capped) - let backoff = 1000u64.saturating_mul(1 << retry_count); - backoff.min(MAX_RETRY_DELAY_MS) - }); + .map(|secs| retry_delay_from_header(secs, MAX_RETRY_DELAY_MS)) + .unwrap_or_else(|| exponential_backoff_delay(retry_count, MAX_RETRY_DELAY_MS)); time::sleep(delay_ms); retry_count += 1; @@ -503,6 +531,20 @@ impl OpenApiFdw { // Build column key map for O(1) lookups during iter_scan self.build_column_key_map(); + // Debug: warn once if object_path doesn't match response structure + if self.config.debug { + if let Some(ref path) = self.object_path { + if let Some(first_row) = self.src_rows.first() { + if first_row.pointer(path).is_none() { + utils::report_info(&format!( + "[openapi_fdw] object_path '{path}' not found in response. \ + Falling back to full row object." + )); + } + } + } + } + Ok(()) } } diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs index ed54388c..fbe4f41d 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs @@ -280,52 +280,57 @@ fn test_rowid_url_encoding_normal_ids() { ); } -// --- Retry delay cap tests --- +// --- Retry delay tests (using production functions) --- #[test] -fn test_retry_delay_cap_normal_value() { +fn test_retry_delay_from_header_normal_value() { // Normal Retry-After: 5 seconds → 5000ms, well under cap - let secs: u64 = 5; - let max_delay: u64 = 30_000; - let delay = secs.saturating_mul(1000).min(max_delay); - assert_eq!(delay, 5000); + assert_eq!(retry_delay_from_header(5, MAX_RETRY_DELAY_MS), 5000); } #[test] -fn test_retry_delay_cap_large_value() { +fn test_retry_delay_from_header_large_value() { // Absurdly large Retry-After: 999999 seconds → capped to 30s - let secs: u64 = 999_999; - let max_delay: u64 = 30_000; - let delay = secs.saturating_mul(1000).min(max_delay); - assert_eq!(delay, 30_000); + assert_eq!(retry_delay_from_header(999_999, MAX_RETRY_DELAY_MS), 30_000); } #[test] -fn test_retry_delay_cap_u64_max() { +fn test_retry_delay_from_header_u64_max() { // u64::MAX seconds → saturating_mul prevents overflow, then capped - let secs: u64 = u64::MAX; - let max_delay: u64 = 30_000; - let delay = secs.saturating_mul(1000).min(max_delay); - assert_eq!(delay, 30_000); + assert_eq!( + retry_delay_from_header(u64::MAX, MAX_RETRY_DELAY_MS), + 30_000 + ); } #[test] -fn test_retry_delay_cap_zero() { +fn test_retry_delay_from_header_zero() { // Retry-After: 0 → 0ms (immediate retry) - let secs: u64 = 0; - let max_delay: u64 = 30_000; - let delay = secs.saturating_mul(1000).min(max_delay); - assert_eq!(delay, 0); + assert_eq!(retry_delay_from_header(0, MAX_RETRY_DELAY_MS), 0); +} + +#[test] +fn test_exponential_backoff_first_retry() { + // retry_count=0 → 1000ms + assert_eq!(exponential_backoff_delay(0, MAX_RETRY_DELAY_MS), 1000); +} + +#[test] +fn test_exponential_backoff_second_retry() { + // retry_count=1 → 2000ms + assert_eq!(exponential_backoff_delay(1, MAX_RETRY_DELAY_MS), 2000); +} + +#[test] +fn test_exponential_backoff_third_retry() { + // retry_count=2 → 4000ms + assert_eq!(exponential_backoff_delay(2, MAX_RETRY_DELAY_MS), 4000); } #[test] -fn test_retry_backoff_cap() { - // Exponential backoff at retry_count=10 would be 1024s, but capped - let retry_count: u32 = 10; - let max_delay: u64 = 30_000; - let backoff = 1000u64.saturating_mul(1 << retry_count); - let delay = backoff.min(max_delay); - assert_eq!(delay, 30_000); +fn test_exponential_backoff_capped() { + // retry_count=10 would be 1024s, but capped to 30s + assert_eq!(exponential_backoff_delay(10, MAX_RETRY_DELAY_MS), 30_000); } // --- build_query_params: LIMIT-to-page_size optimization --- @@ -448,6 +453,26 @@ fn test_fetch_spec_from_spec_json_too_large() { assert!(err.contains("limit: 100 bytes")); } +#[test] +fn test_fetch_spec_from_spec_json_rejects_non_http_base_url() { + let spec_with_bad_server = r#"{ + "openapi": "3.0.0", + "info": { "title": "Test", "version": "1.0" }, + "servers": [{ "url": "file:///etc/passwd" }], + "paths": {} + }"#; + let mut fdw = OpenApiFdw { + config: ServerConfig { + spec_json: Some(spec_with_bad_server.to_string()), + ..Default::default() + }, + ..Default::default() + }; + let err = fdw.fetch_spec().unwrap_err(); + assert!(err.contains("base_url (from spec servers)")); + assert!(err.contains("Must start with http://")); +} + #[test] fn test_fetch_spec_neither_url_nor_json() { let mut fdw = OpenApiFdw::default(); diff --git a/wasm-wrappers/fdw/openapi_fdw/src/schema.rs b/wasm-wrappers/fdw/openapi_fdw/src/schema.rs index 733e93c8..59012820 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/schema.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/schema.rs @@ -1,13 +1,13 @@ -//! Schema generation and type mapping for `OpenAPI` FDW +//! Schema generation and type mapping for OpenAPI FDW //! -//! This module handles mapping `OpenAPI` types to `PostgreSQL` types +//! This module handles mapping OpenAPI types to PostgreSQL types //! and generating CREATE FOREIGN TABLE statements. use std::collections::HashMap; use crate::spec::{EndpointInfo, OpenApiSpec, Schema}; -/// Maps `OpenAPI` schema types to `PostgreSQL` type names +/// Maps OpenAPI schema types to PostgreSQL type names pub fn openapi_to_pg_type(schema: &Schema, spec: &OpenApiSpec) -> &'static str { // Resolve $ref if present; otherwise borrow the original (no clone). let owned; @@ -53,7 +53,7 @@ pub struct ColumnDef { pub nullable: bool, } -/// Extract column definitions from an `OpenAPI` response schema +/// Extract column definitions from an OpenAPI response schema pub fn extract_columns(schema: &Schema, spec: &OpenApiSpec, include_attrs: bool) -> Vec { let mut columns = Vec::new(); @@ -122,12 +122,12 @@ pub fn extract_columns(schema: &Schema, spec: &OpenApiSpec, include_attrs: bool) columns } -/// Sanitize a column name for `PostgreSQL` (converts `camelCase` to `snake_case`) +/// Sanitize a column name for PostgreSQL (converts camelCase to snake_case) /// /// Handles consecutive uppercase (acronyms) correctly: -/// - `clusterIP` → `cluster_ip` (not `cluster_i_p`) -/// - `HTMLParser` → `html_parser` (not `h_t_m_l_parser`) -/// - `getHTTPSUrl` → `get_https_url` +/// - clusterIP becomes cluster_ip (not cluster_i_p) +/// - HTMLParser becomes html_parser (not h_t_m_l_parser) +/// - getHTTPSUrl becomes get_https_url fn sanitize_column_name(name: &str) -> String { let mut result = String::new(); let chars: Vec = name.chars().collect(); diff --git a/wasm-wrappers/fdw/openapi_fdw/src/spec.rs b/wasm-wrappers/fdw/openapi_fdw/src/spec.rs index b44066e9..d5c370fc 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/spec.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/spec.rs @@ -1,6 +1,6 @@ -//! `OpenAPI` 3.0+ specification parsing +//! OpenAPI 3.0+ specification parsing //! -//! This module provides types and functions for parsing `OpenAPI` specifications +//! This module provides types and functions for parsing OpenAPI specifications //! and extracting endpoint/schema information for FDW table generation. use serde::Deserialize; @@ -10,12 +10,12 @@ use std::collections::HashMap; /// Raw schema for deserialization — handles OpenAPI 3.1 type arrays. /// -/// OpenAPI 3.1 changed `type` from a string to potentially an array: -/// - 3.0: `"type": "string"` with `"nullable": true` -/// - 3.1: `"type": ["string", "null"]` +/// OpenAPI 3.1 changed type from a string to potentially an array: +/// - 3.0: "type": "string" with "nullable": true +/// - 3.1: "type": ["string", "null"] /// -/// This intermediate struct captures the raw `type` field, then `From` -/// extracts the actual type and sets `nullable` accordingly. +/// This intermediate struct captures the raw type field, then From +/// extracts the actual type and sets nullable accordingly. #[derive(Debug, Deserialize)] struct RawSchema { #[serde(rename = "type")] @@ -88,7 +88,7 @@ impl From for Schema { } } -/// Represents an `OpenAPI` 3.0+ specification +/// Represents an OpenAPI 3.0+ specification #[derive(Debug, Deserialize)] pub struct OpenApiSpec { /// OpenAPI version (must be 3.x) @@ -183,7 +183,7 @@ pub(crate) struct Components { } impl OpenApiSpec { - /// Parse an `OpenAPI` spec from a JSON value + /// Parse an OpenAPI spec from a JSON value pub fn from_json(json: JsonValue) -> Result { let spec: Self = serde_json::from_value(json) .map_err(|e| format!("Failed to parse OpenAPI spec: {e}"))?; @@ -198,7 +198,7 @@ impl OpenApiSpec { Ok(spec) } - /// Parse an `OpenAPI` spec from a JSON string (used in tests) + /// Parse an OpenAPI spec from a JSON string (used in tests) #[cfg(test)] pub fn from_str(s: &str) -> Result { let spec: Self = @@ -214,7 +214,18 @@ impl OpenApiSpec { Ok(spec) } - /// Get the base URL from the spec (first server URL), substituting any variables + /// Parse an OpenAPI spec from a YAML string (used in tests) + #[cfg(test)] + pub fn from_yaml_str(s: &str) -> Result { + let json: JsonValue = + serde_yaml_ng::from_str(s).map_err(|e| format!("Failed to parse YAML: {e}"))?; + Self::from_json(json) + } + + /// Get the base URL from the spec's first server entry, substituting any variables. + /// + /// Only the first server is used. Multi-server specs should set the base_url + /// server option explicitly to select a different server. pub fn base_url(&self) -> Option { self.servers.first().map(|s| { let mut url = s.url.clone(); @@ -227,10 +238,10 @@ impl OpenApiSpec { /// Get all endpoint paths that support GET or POST operations (for querying). /// - /// Parameterized paths (e.g., `/users/{id}`, `/users/{user_id}/posts`) are + /// Parameterized paths (e.g., /users/{id}, /users/{user_id}/posts) are /// excluded because they require path parameter values from WHERE clauses at /// query time. Users should create these tables manually with the appropriate - /// `endpoint` option containing `{param}` placeholders. See the documentation + /// endpoint option containing {param} placeholders. See the documentation /// for path parameter examples. pub fn get_endpoints(&self) -> Vec { let mut endpoints = Vec::new(); @@ -291,7 +302,7 @@ impl OpenApiSpec { media_type.schema.clone() } - /// Parse a `#/components/{section}/{name}` reference, returning the name if it matches. + /// Parse a #/components/{section}/{name} reference, returning the name if it matches. fn parse_component_ref<'a>(reference: &'a str, section: &str) -> Option<&'a str> { let path = reference.strip_prefix("#/components/")?; let name = path.strip_prefix(section)?.strip_prefix('/')?; @@ -494,9 +505,9 @@ impl OpenApiSpec { merged } - /// Merge parent-level `properties` and `required` into a composition result. + /// Merge parent-level properties and required into a composition result. /// - /// Per OpenAPI 3.1, properties defined alongside `allOf`/`oneOf`/`anyOf` + /// Per OpenAPI 3.1, properties defined alongside allOf/oneOf/anyOf /// should be merged into the composed schema (parent properties override). fn merge_parent_siblings(parent: &Schema, merged: &mut Schema) { for (name, prop) in &parent.properties { @@ -531,9 +542,9 @@ pub struct EndpointInfo { impl EndpointInfo { /// Generate a table name from the endpoint path. /// - /// Uses the full path to avoid collisions (e.g., `/v1/users` and `/v2/users` - /// become `v1_users` and `v2_users` instead of both becoming `users`). - /// POST endpoints get a `_post` suffix to avoid collisions with GET tables. + /// Uses the full path to avoid collisions (e.g., /v1/users and /v2/users + /// become v1_users and v2_users instead of both becoming users). + /// POST endpoints get a _post suffix to avoid collisions with GET tables. pub fn table_name(&self) -> String { let cleaned = self.path.trim_matches('/'); diff --git a/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs index deb82cb5..0fe50782 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/spec_tests.rs @@ -5531,3 +5531,190 @@ fn test_table_name_multiple_special_chars() { }; assert_eq!(endpoint.table_name(), "api_v2_1_me_data"); } + +// ── YAML parsing tests ────────────────────────────────────────────── + +#[test] +fn test_parse_yaml_spec() { + let yaml = r#" +openapi: "3.0.0" +info: + title: Test API + version: "1.0" +paths: + /users: + get: + responses: + "200": + content: + application/json: + schema: + type: object + properties: + id: + type: integer + name: + type: string +"#; + let spec = OpenApiSpec::from_yaml_str(yaml).unwrap(); + assert_eq!(spec.openapi, "3.0.0"); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + assert_eq!(endpoints[0].path, "/users"); +} + +#[test] +fn test_parse_yaml_spec_with_refs() { + let yaml = r##" +openapi: "3.0.0" +info: + title: Test API + version: "1.0" +paths: + /pets: + get: + responses: + "200": + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Pet" +components: + schemas: + Pet: + type: object + properties: + id: + type: integer + name: + type: string + tag: + type: string +"##; + let spec = OpenApiSpec::from_yaml_str(yaml).unwrap(); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let items = schema.items.as_ref().unwrap(); + let resolved = spec.resolve_schema(items); + assert!(resolved.properties.contains_key("id")); + assert!(resolved.properties.contains_key("name")); + assert!(resolved.properties.contains_key("tag")); +} + +#[test] +fn test_parse_yaml_spec_31() { + let yaml = r#" +openapi: "3.1.0" +info: + title: Test API + version: "1.0" +servers: + - url: https://api.example.com +paths: + /items: + get: + responses: + "200": + content: + application/json: + schema: + type: object + properties: + count: + type: integer + label: + type: + - string + - "null" +"#; + let spec = OpenApiSpec::from_yaml_str(yaml).unwrap(); + assert_eq!(spec.openapi, "3.1.0"); + assert_eq!(spec.base_url(), Some("https://api.example.com".to_string())); + let endpoints = spec.get_endpoints(); + assert_eq!(endpoints.len(), 1); + let schema = endpoints[0].response_schema.as_ref().unwrap(); + let label = schema.properties.get("label").unwrap(); + assert!(label.nullable); +} + +#[test] +fn test_parse_yaml_spec_rejects_swagger_20() { + let yaml = r#" +swagger: "2.0" +info: + title: Old API + version: "1.0" +paths: {} +"#; + // serde_yaml_ng will parse this but it'll fail the openapi version check + // because the "openapi" field is missing + let result = OpenApiSpec::from_yaml_str(yaml); + assert!(result.is_err()); +} + +#[test] +fn test_yaml_json_produce_same_spec() { + let json = r#"{ + "openapi": "3.0.0", + "info": {"title": "Test", "version": "1.0"}, + "paths": { + "/users": { + "get": { + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + } + } + } + } + } + } + } + } + }"#; + + let yaml = r#" +openapi: "3.0.0" +info: + title: Test + version: "1.0" +paths: + /users: + get: + responses: + "200": + content: + application/json: + schema: + type: object + properties: + id: + type: integer + name: + type: string +"#; + + let json_spec = OpenApiSpec::from_str(json).unwrap(); + let yaml_spec = OpenApiSpec::from_yaml_str(yaml).unwrap(); + + let json_endpoints = json_spec.get_endpoints(); + let yaml_endpoints = yaml_spec.get_endpoints(); + assert_eq!(json_endpoints.len(), yaml_endpoints.len()); + assert_eq!(json_endpoints[0].path, yaml_endpoints[0].path); + + let json_schema = json_endpoints[0].response_schema.as_ref().unwrap(); + let yaml_schema = yaml_endpoints[0].response_schema.as_ref().unwrap(); + assert_eq!(json_schema.properties.len(), yaml_schema.properties.len()); + assert!(json_schema.properties.contains_key("id")); + assert!(yaml_schema.properties.contains_key("id")); +} From 2b1e528898ab82087548f50e25c554716d38b4c3 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Mon, 16 Feb 2026 23:47:30 -0600 Subject: [PATCH 7/9] fix(openapi): address PR #573 review feedback from Copilot and CodeRabbit - Fix pagination URL resolution for parameterized endpoints by storing resolved_endpoint after path param substitution - Fix absolute-path pagination to use origin-only base to avoid duplicating path prefixes (e.g. /v1/v1/items) - Map time and byte/binary formats to text (WIT TypeOid has no time/bytea variants) - Fix .env.example copy instructions and README test counts - Replace {checksum} placeholder with descriptive text in catalog docs --- docs/catalog/openapi.md | 4 +- wasm-wrappers/fdw/openapi_fdw/README.md | 4 +- wasm-wrappers/fdw/openapi_fdw/src/lib.rs | 2 + wasm-wrappers/fdw/openapi_fdw/src/request.rs | 20 +++++++++- .../fdw/openapi_fdw/src/request_tests.rs | 37 +++++++++++++++++++ wasm-wrappers/fdw/openapi_fdw/src/schema.rs | 6 ++- .../fdw/openapi_fdw/src/schema_tests.rs | 12 +++--- .../fdw/openapi_fdw/test/.env.example | 2 +- 8 files changed, 71 insertions(+), 16 deletions(-) diff --git a/docs/catalog/openapi.md b/docs/catalog/openapi.md index 4570554a..9700291d 100644 --- a/docs/catalog/openapi.md +++ b/docs/catalog/openapi.md @@ -18,7 +18,7 @@ This wrapper allows you to query any REST API endpoint as a PostgreSQL foreign t | Version | Wasm Package URL | Checksum | Required Wrappers Version | | ------- | ---------------- | -------- | ------------------------- | -| 0.2.0 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm` | `{checksum}` | >=0.5.0 | +| 0.2.0 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm` | _published on release_ | >=0.5.0 | | 0.1.4 | `https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.1.4/openapi_fdw.wasm` | `dd434f8565b060b181d1e69e1e4d5c8b9c3ac5ca444056d3c2fb939038d308fe` | >=0.5.0 | ## Preparation @@ -378,8 +378,6 @@ options ( | date | string (ISO 8601) | | timestamp* | string (ISO 8601) | | timestamptz | string (ISO 8601) | -| bytea | string (base64) | -| time | string (HH:MM:SS) | | jsonb | object/array | | uuid | string | diff --git a/wasm-wrappers/fdw/openapi_fdw/README.md b/wasm-wrappers/fdw/openapi_fdw/README.md index 86cb444e..5bc96134 100644 --- a/wasm-wrappers/fdw/openapi_fdw/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/README.md @@ -86,7 +86,7 @@ cargo component build --release --target wasm32-unknown-unknown ### Running Tests ```bash -# 518 unit tests +# 534 unit tests cargo test # Integration tests (Docker: PostgreSQL + MockServer) @@ -148,7 +148,7 @@ For queries you run frequently, a [materialized view](https://supabase.com/blog/ | Version | Date | Notes | | --- | --- | --- | -| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, YAML spec support, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 531 unit tests, 5 real-world examples | +| 0.2.0 | 2026-02-15 | Modular architecture, POST-for-read, `spec_json` inline specs, YAML spec support, LIMIT pushdown, OpenAPI 3.1 support, security hardening, 534 unit tests, 5 real-world examples | | 0.1.4 | 2026-02-09 | Type coercion, auth validation, table naming, URL fixes | | 0.1.3 | 2026-02-06 | Avoid cloning JSON response data | | 0.1.2 | 2026-02-01 | Fix query param filtering | diff --git a/wasm-wrappers/fdw/openapi_fdw/src/lib.rs b/wasm-wrappers/fdw/openapi_fdw/src/lib.rs index 43de58fd..2b2e8ed0 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/lib.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/lib.rs @@ -50,6 +50,7 @@ struct OpenApiFdw { method: http::Method, request_body: String, endpoint: String, + resolved_endpoint: String, // endpoint after path param substitution (for pagination) response_path: Option, object_path: Option, // Extract nested object from each row (e.g., "/properties" for GeoJSON) rowid_col: String, @@ -86,6 +87,7 @@ impl Default for OpenApiFdw { method: http::Method::Get, request_body: String::new(), endpoint: String::new(), + resolved_endpoint: String::new(), response_path: None, object_path: None, rowid_col: String::new(), diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request.rs b/wasm-wrappers/fdw/openapi_fdw/src/request.rs index ba353b8a..1c408716 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/request.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/request.rs @@ -180,10 +180,22 @@ impl OpenApiFdw { } Ok(next_url.to_string()) } else if next_url.starts_with('?') { - let endpoint_base = self.endpoint.split('?').next().unwrap_or(&self.endpoint); + // Use resolved_endpoint (post path-param substitution) if available, + // falling back to the template for endpoints without path params. + let ep = if self.resolved_endpoint.is_empty() { + &self.endpoint + } else { + &self.resolved_endpoint + }; + let endpoint_base = ep.split('?').next().unwrap_or(ep); Ok(format!("{}{endpoint_base}{next_url}", self.config.base_url)) } else if next_url.starts_with('/') { - Ok(format!("{}{next_url}", self.config.base_url)) + // Use only the origin (scheme://host) to avoid duplicating any + // path prefix that base_url may contain (e.g. /v1). + Ok(format!( + "{}{next_url}", + extract_origin(&self.config.base_url) + )) } else { Ok(format!("{}/{next_url}", self.config.base_url)) } @@ -382,6 +394,10 @@ impl OpenApiFdw { let (endpoint, path_params_used) = Self::substitute_path_params(&self.endpoint, &quals, &mut self.injected_params)?; + // Store resolved endpoint for pagination (query-only URLs need the + // substituted path, not the raw template with {param} placeholders). + self.resolved_endpoint = endpoint.clone(); + // Check for rowid pushdown for single-resource access // Only if endpoint doesn't already have path params and rowid qual exists if path_params_used.is_empty() { diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs index fbe4f41d..1f5cd154 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/request_tests.rs @@ -11,6 +11,7 @@ fn make_fdw_for_url(base_url: &str, endpoint: &str) -> OpenApiFdw { ..Default::default() }, endpoint: endpoint.to_string(), + resolved_endpoint: endpoint.to_string(), ..Default::default() } } @@ -142,6 +143,42 @@ fn test_resolve_pagination_url_same_origin_case_insensitive() { assert_eq!(url, "https://api.example.com/items?page=2"); } +// --- Pagination bug fix regression tests --- + +#[test] +fn test_resolve_pagination_url_query_only_with_path_params() { + // Bug fix: query-only pagination should use resolved_endpoint (post-substitution), + // not the raw template with {param} placeholders + let fdw = OpenApiFdw { + config: ServerConfig { + base_url: "https://api.example.com".to_string(), + ..Default::default() + }, + endpoint: "/pets/{pet_id}/toys".to_string(), + resolved_endpoint: "/pets/123/toys".to_string(), + ..Default::default() + }; + let url = fdw.resolve_pagination_url("?page=2").unwrap(); + assert_eq!(url, "https://api.example.com/pets/123/toys?page=2"); +} + +#[test] +fn test_resolve_pagination_url_absolute_path_with_base_path() { + // Bug fix: absolute-path pagination should use only the origin, not the + // full base_url, to avoid duplicating the path prefix + let fdw = make_fdw_for_url("https://api.example.com/v1", "/items"); + let url = fdw.resolve_pagination_url("/v1/items?page=2").unwrap(); + assert_eq!(url, "https://api.example.com/v1/items?page=2"); +} + +#[test] +fn test_resolve_pagination_url_absolute_path_different_path() { + // Absolute path that differs from base_url path — uses origin only + let fdw = make_fdw_for_url("https://api.example.com/v1", "/items"); + let url = fdw.resolve_pagination_url("/v2/items?page=2").unwrap(); + assert_eq!(url, "https://api.example.com/v2/items?page=2"); +} + // --- extract_origin tests --- #[test] diff --git a/wasm-wrappers/fdw/openapi_fdw/src/schema.rs b/wasm-wrappers/fdw/openapi_fdw/src/schema.rs index 59012820..d30a2330 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/schema.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/schema.rs @@ -22,8 +22,10 @@ pub fn openapi_to_pg_type(schema: &Schema, spec: &OpenApiSpec) -> &'static str { Some("string") => match resolved.format.as_deref() { Some("date") => "date", Some("date-time") => "timestamptz", - Some("time") => "time", - Some("byte") | Some("binary") => "bytea", + // time and bytea are not supported by the WIT type-oid interface, + // so we map them to text (the FDW casts values via JSON at runtime) + Some("time") => "text", + Some("byte") | Some("binary") => "text", Some("uuid") => "uuid", _ => "text", }, diff --git a/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs index de3ff116..eecfe60b 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/schema_tests.rs @@ -158,7 +158,7 @@ fn test_openapi_to_pg_type_time_format() { format: Some("time".to_string()), ..Default::default() }; - assert_eq!(openapi_to_pg_type(&time_schema, &spec), "time"); + assert_eq!(openapi_to_pg_type(&time_schema, &spec), "text"); } #[test] @@ -177,14 +177,14 @@ fn test_openapi_to_pg_type_byte_binary_format() { format: Some("byte".to_string()), ..Default::default() }; - assert_eq!(openapi_to_pg_type(&byte_schema, &spec), "bytea"); + assert_eq!(openapi_to_pg_type(&byte_schema, &spec), "text"); let binary_schema = Schema { schema_type: Some("string".to_string()), format: Some("binary".to_string()), ..Default::default() }; - assert_eq!(openapi_to_pg_type(&binary_schema, &spec), "bytea"); + assert_eq!(openapi_to_pg_type(&binary_schema, &spec), "text"); } #[test] @@ -1657,16 +1657,16 @@ fn test_extract_columns_31_all_nullable_format_types() { let columns = extract_columns(&schema, &spec, false); let expected = vec![ - ("binary_field", "bytea"), + ("binary_field", "text"), ("bool_field", "boolean"), - ("byte_field", "bytea"), + ("byte_field", "text"), ("date_field", "date"), ("datetime_field", "timestamptz"), ("double_field", "double precision"), ("float_field", "real"), ("int32_field", "integer"), ("int64_field", "bigint"), - ("time_field", "time"), + ("time_field", "text"), ("unix_time_field", "timestamptz"), ("uuid_field", "uuid"), ]; diff --git a/wasm-wrappers/fdw/openapi_fdw/test/.env.example b/wasm-wrappers/fdw/openapi_fdw/test/.env.example index dfc2199c..f1229e30 100644 --- a/wasm-wrappers/fdw/openapi_fdw/test/.env.example +++ b/wasm-wrappers/fdw/openapi_fdw/test/.env.example @@ -1,5 +1,5 @@ # Tokens for authenticated examples (github, threads). -# Copy this file to examples/.env and fill in your values. +# Copy this file to test/.env and fill in your values. GITHUB_TOKEN=ghp_your-personal-access-token-here THREADS_ACCESS_TOKEN=your-access-token-here From f5bbfe9e438c802e5f7e05c140e28cbb6b65b909 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Tue, 17 Feb 2026 08:53:47 -0600 Subject: [PATCH 8/9] docs(openapi): consolidate feature tables and add missing import server blocks Move per-example feature tables into a single Feature Coverage comparison on the main examples README. Add the missing CREATE SERVER blocks for spec_url/spec_json import servers to carapi, pokeapi, github, and nws examples. Rename NWS references to Weather.gov. --- .../fdw/openapi_fdw/examples/README.md | 29 ++++++++++++- .../fdw/openapi_fdw/examples/carapi/README.md | 28 ++++++------- .../fdw/openapi_fdw/examples/github/README.md | 35 ++++++++-------- .../fdw/openapi_fdw/examples/nws/README.md | 42 +++++++++---------- .../openapi_fdw/examples/pokeapi/README.md | 35 +++++++--------- .../openapi_fdw/examples/threads/README.md | 16 ------- 6 files changed, 91 insertions(+), 94 deletions(-) diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/README.md index cd695f0a..f2d8dc23 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/README.md @@ -6,9 +6,9 @@ Each example shows how to configure the FDW against a real API, with complete se | Example | API | Features | | --- | --- | --- | -| [pokeapi](pokeapi/) | [PokéAPI](https://pokeapi.co/) | Offset-based pagination, path params, auto-detected `results` wrapper | +| [nws](nws/) | [Weather.gov](https://www.weather.gov/documentation/services-web-api) | GeoJSON responses, nested path extraction, custom User-Agent | | [carapi](carapi/) | [CarAPI](https://carapi.app/) | Page-based pagination, query pushdown, auto-detected `data` wrapper | -| [nws](nws/) | [National Weather Service](https://www.weather.gov/documentation/services-web-api) | GeoJSON responses, nested path extraction, custom User-Agent | +| [pokeapi](pokeapi/) | [PokéAPI](https://pokeapi.co/) | Offset-based pagination, path params, auto-detected `results` wrapper | ## Auth Required @@ -16,3 +16,28 @@ Each example shows how to configure the FDW against a real API, with complete se | --- | --- | --- | --- | | [github](github/) | [GitHub REST API](https://docs.github.com/en/rest) | Bearer token | Path params, custom headers, `items` wrapper, search pushdown | | [threads](threads/) | [Meta Threads API](https://developers.facebook.com/docs/threads) | OAuth token (query param) | Cursor-based pagination, path params, query pushdown | + +## Feature Coverage + +| Feature | Weather.gov | CarAPI | PokéAPI | GitHub | Threads | +| --- | :---: | :---: | :---: | :---: | :---: | +| IMPORT FOREIGN SCHEMA (`spec_url`) | ✓ | ✓ | ✓ | ✓ | | +| IMPORT FOREIGN SCHEMA (`spec_json`) | | | | | ✓ | +| YAML spec support | | | ✓ | | | +| Page/offset-based pagination | | ✓ | ✓ | ✓ | | +| Cursor-based pagination | ✓ | | | | ✓ | +| URL-based pagination (auto-detected) | | | ✓ | ✓ | | +| Path parameter substitution | ✓ | | ✓ | ✓ | ✓ | +| Query parameter pushdown | ✓ | ✓ | | ✓ | ✓ | +| LIMIT pushdown | ✓ | ✓ | ✓ | ✓ | ✓ | +| GeoJSON extraction (`object_path`) | ✓ | | | | | +| Nested response path (`response_path`) | ✓ | | | | | +| Bearer token / API key auth | | | | ✓ | | +| Query param auth (`api_key_location`) | | | | | ✓ | +| Custom headers | ✓ | | | ✓ | | +| Type coercion (int, bool, timestamptz) | ✓ | ✓ | ✓ | ✓ | ✓ | +| camelCase to snake_case matching | ✓ | | | | | +| Single object response | ✓ | | ✓ | ✓ | | +| Auto-detected wrapper key | | ✓ | ✓ | ✓ | | +| Debug mode | ✓ | ✓ | ✓ | ✓ | | +| `attrs` catch-all column | ✓ | ✓ | ✓ | ✓ | ✓ | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md index fad3268e..b5206b22 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md @@ -21,6 +21,18 @@ create server carapi The `carapi_import` server has a `spec_url` pointing to the CarAPI OpenAPI spec, so tables can be auto-generated: +```sql +create server carapi_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://carapi.app/api', + spec_url 'https://carapi.app/swagger.json' + ); +``` + ```sql CREATE SCHEMA IF NOT EXISTS carapi_auto; @@ -425,19 +437,3 @@ SELECT name, attrs FROM makes LIMIT 1; ``` - -## Features Demonstrated - -| Feature | Table(s) | -| --- | --- | -| IMPORT FOREIGN SCHEMA | `carapi_import` server | -| Page-based pagination (auto-followed) | `makes`, `models`, `trims`, `bodies`, `engines`, `mileages`, `exterior_colors` | -| Auto-detected `data` wrapper key | All tables | -| Query parameter pushdown | `models`, `trims`, `bodies`, `engines`, `mileages`, `exterior_colors` | -| Integer type coercion | `trims` (msrp), `bodies` (curb_weight), `engines` (horsepower), `mileages` (mpg) | -| `timestamptz` coercion | `trims` (created, modified) | -| LIMIT pushdown | Any table with `LIMIT` | -| Debug mode (`debug`) | `makes_debug` | -| `attrs` catch-all column | All tables | -| `rowid_column` | All tables | -| No authentication required | All servers | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md index de93dd12..4c016417 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md @@ -29,6 +29,24 @@ The `github_import` server has a `spec_url` pointing to the GitHub REST API Open > **Note:** The GitHub OpenAPI spec is large (~15 MB). The initial import may take a few seconds to fetch and parse. +```sql +create server github_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.github.com', + api_key '', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/vnd.github+json', + headers '{"X-GitHub-Api-Version": "2022-11-28"}', + page_size '30', + page_size_param 'per_page', + spec_url 'https://raw.githubusercontent.com/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json' + ); +``` + ```sql CREATE SCHEMA IF NOT EXISTS github_auto; @@ -502,20 +520,3 @@ LIMIT 3; | my-project | public | true | | dotfiles | public | false | | cool-app | public | true | - -## Features Demonstrated - -| Feature | Table(s) | -| --- | --- | -| IMPORT FOREIGN SCHEMA | `github_import` server | -| Bearer token auth (Authorization header) | All tables | -| Custom HTTP headers (X-GitHub-Api-Version) | All tables | -| Page-based pagination (auto-detected) | `my_repos`, `repo_issues`, `repo_pulls`, `repo_releases`, `search_repos` | -| Path parameter substitution | `repo_detail`, `repo_issues`, `repo_pulls`, `repo_releases` | -| Query parameter pushdown | `my_repos` (`type`, `sort`), `repo_issues` (`state`), `repo_pulls` (`state`), `search_repos` (`q`) | -| Single object response | `my_profile`, `repo_detail` | -| Auto-detected wrapper key (`items`) | `search_repos`, `search_repos_debug` | -| Type coercion (timestamptz, boolean, bigint) | All tables | -| Debug mode | `search_repos_debug` | -| `attrs` catch-all column | All tables | -| `rowid_column` | `my_repos`, `repo_issues`, `repo_pulls`, `repo_releases`, `search_repos` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md index efc1b44f..2d7fcb9c 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md @@ -1,6 +1,6 @@ -# NWS Weather API Example +# Weather.gov API Example -Query the [National Weather Service API](https://www.weather.gov/documentation/services-web-api) using SQL. This example exercises all major features of the OpenAPI FDW against a real, free, no-auth API. +Query the [Weather.gov API](https://www.weather.gov/documentation/services-web-api) using SQL. This example exercises all major features of the OpenAPI FDW against a real, free, no-auth API. ## Server Configuration @@ -21,7 +21,23 @@ create server nws ## 1. Quick Start with IMPORT FOREIGN SCHEMA -The `nws_import` server has a `spec_url` pointing to the NWS OpenAPI spec, so tables can be auto-generated: +The `nws_import` server has a `spec_url` pointing to the Weather.gov OpenAPI spec, so tables can be auto-generated: + +```sql +create server nws_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://api.weather.gov', + user_agent 'openapi-fdw-example/0.2.0', + accept 'application/geo+json', + spec_url 'https://api.weather.gov/openapi.json', + page_size '50', + page_size_param 'limit' + ); +``` ```sql CREATE SCHEMA IF NOT EXISTS nws_auto; @@ -407,23 +423,3 @@ LIMIT 5; | 000SE | | | 001AS | | | 001BH | | - -## Features Demonstrated - -| Feature | Table(s) | -| --- | --- | -| IMPORT FOREIGN SCHEMA | `nws_import` server | -| GeoJSON extraction (`response_path` + `object_path`) | `stations`, `active_alerts`, `station_observations` | -| Cursor-based pagination (`cursor_path`) | `stations` | -| Path parameter substitution | `station_observations`, `latest_observation`, `point_metadata`, `forecast_periods` | -| Query parameter pushdown | `active_alerts` (with `WHERE severity = ...`) | -| camelCase → snake_case matching | All tables | -| Custom headers (`user_agent`, `accept`) | All servers | -| LIMIT pushdown | Any table with `LIMIT` | -| Debug mode (`debug`) | `stations_debug` | -| Single object response | `latest_observation`, `point_metadata` | -| Type coercion (timestamptz, jsonb, boolean, integer) | `active_alerts`, `forecast_periods` | -| `attrs` catch-all column | All tables | -| Multiple path parameters | `forecast_periods` | -| Nested response extraction (JSON pointer) | `forecast_periods` | -| `rowid_column` | `stations`, `active_alerts` | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md index 366544b7..890ef90d 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md @@ -21,7 +21,21 @@ create server pokeapi ## 1. Quick Start with IMPORT FOREIGN SCHEMA -The `pokeapi_import` server has a `spec_url` pointing to the PokeAPI OpenAPI spec (YAML format — the FDW parses both JSON and YAML), so tables can be auto-generated: +The `pokeapi_import` server has a `spec_url` pointing to the PokeAPI OpenAPI spec (YAML format, the FDW parses both JSON and YAML), so tables can be auto-generated: + +```sql +create server pokeapi_import + foreign data wrapper wasm_wrapper + options ( + fdw_package_url 'https://github.com/supabase/wrappers/releases/download/wasm_openapi_fdw_v0.2.0/openapi_fdw.wasm', + fdw_package_name 'supabase:openapi-fdw', + fdw_package_version '0.2.0', + base_url 'https://pokeapi.co/api/v2', + spec_url 'https://raw.githubusercontent.com/PokeAPI/pokeapi/master/openapi.yml', + page_size '20', + page_size_param 'limit' + ); +``` ```sql CREATE SCHEMA IF NOT EXISTS pokeapi_auto; @@ -396,22 +410,3 @@ WHERE name = 'pikachu'; | name | encounters_url | | --- | --- | | pikachu | | - -## Features Demonstrated - -| Feature | Table(s) | -| --- | --- | -| IMPORT FOREIGN SCHEMA | `pokeapi_import` server | -| YAML spec support | `pokeapi_import` server (spec is YAML, not JSON) | -| Offset-based pagination (auto-followed `next` URL) | `pokemon`, `types`, `berries` | -| Auto-detected `results` wrapper key | All list tables | -| Path parameter substitution | `pokemon_detail`, `type_detail`, `berry_detail` | -| Single object response | `pokemon_detail`, `type_detail`, `berry_detail` | -| Integer type coercion | `pokemon_detail`, `berry_detail` | -| Boolean type coercion | `pokemon_detail` | -| JSONB for complex nested data | `pokemon_detail`, `type_detail`, `berry_detail` | -| LIMIT pushdown | Any table with `LIMIT` | -| Debug mode (`debug`) | `pokemon_debug` | -| `attrs` catch-all column | All tables | -| `rowid_column` | All tables | -| No authentication required | All servers | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md index 7b8f5430..a40bd2f2 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md @@ -547,19 +547,3 @@ LIMIT 3; | 18555728842018816 | THREADS | ABC123xyz | | 18051838931694754 | THREADS | DEF456uvw | | 18099070105919840 | THREADS | GHI789rst | - -## Features Demonstrated - -| Feature | Table(s) | -| --- | --- | -| IMPORT FOREIGN SCHEMA (inline `spec_json`) | `threads_import` server | -| API key auth (query param) | All tables | -| Cursor-based pagination (auto-detected) | `my_threads`, `my_replies`, `keyword_search` | -| Path parameter substitution | `thread_detail`, `thread_replies`, `thread_conversation` | -| Query parameter pushdown | `keyword_search` (with `WHERE q = ...`), `profile_lookup` (with `WHERE username = ...`) | -| Single object response | `my_profile`, `thread_detail`, `profile_lookup` | -| Endpoint query string (field selection) | All tables except `profile_lookup` | -| Type coercion (timestamptz, boolean, bigint) | `my_threads`, `profile_lookup` | -| Debug mode | `keyword_search_debug` | -| `attrs` catch-all column | All tables | -| `rowid_column` | `my_threads`, `keyword_search`, `profile_lookup` | From b2c8a247540c341e9b5f9f92d997349eca59fe19 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Tue, 17 Feb 2026 09:00:18 -0600 Subject: [PATCH 9/9] docs(openapi): fix attrs column description to match actual behavior The attrs column returns the full JSON response object, not just unmapped fields. Updated all example READMEs to accurately describe this, consistent with the catalog docs and every other wasm FDW. --- wasm-wrappers/fdw/openapi_fdw/examples/README.md | 2 +- wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md | 2 +- wasm-wrappers/fdw/openapi_fdw/examples/github/README.md | 2 +- wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md | 2 +- wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md | 2 +- wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/README.md index f2d8dc23..33071348 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/README.md @@ -40,4 +40,4 @@ Each example shows how to configure the FDW against a real API, with complete se | Single object response | ✓ | | ✓ | ✓ | | | Auto-detected wrapper key | | ✓ | ✓ | ✓ | | | Debug mode | ✓ | ✓ | ✓ | ✓ | | -| `attrs` catch-all column | ✓ | ✓ | ✓ | ✓ | ✓ | +| `attrs` full JSON column | ✓ | ✓ | ✓ | ✓ | ✓ | diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md index b5206b22..dfa70e21 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/carapi/README.md @@ -430,7 +430,7 @@ INFO: [openapi_fdw] Scan complete: 1 rows, 1 columns ## 11. The `attrs` Column -Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column upfront. +Every table includes an `attrs jsonb` column that captures the full JSON response object for each row. This is useful for exploring what data the API returns without defining every column upfront. ```sql SELECT name, attrs diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md index 4c016417..2f030b23 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md @@ -506,7 +506,7 @@ INFO: [openapi_fdw] Scan complete: 1 rows, 2 columns ## 10. The `attrs` Column -Every table includes an `attrs jsonb` column that captures all fields not mapped to named columns: +Every table includes an `attrs jsonb` column that captures the full JSON response object for each row: ```sql SELECT name, attrs->>'visibility' AS visibility, diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md index 2d7fcb9c..ede7d05c 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/nws/README.md @@ -408,7 +408,7 @@ INFO: [openapi_fdw] Scan complete: 5 rows, 2 columns ## 9. The `attrs` Column -Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column. +Every table includes an `attrs jsonb` column that captures the full JSON response object for each row. This is useful for exploring what data the API returns without defining every column. ```sql SELECT station_identifier, attrs->>'county' AS county diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md index 890ef90d..443b1ebd 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/pokeapi/README.md @@ -396,7 +396,7 @@ INFO: [openapi_fdw] Scan complete: 3 rows, 1 columns ## 9. The `attrs` Column -Every table includes an `attrs jsonb` column that captures **all fields not mapped to named columns**. This is useful for exploring what data the API returns without defining every column upfront. +Every table includes an `attrs jsonb` column that captures the full JSON response object for each row. This is useful for exploring what data the API returns without defining every column upfront. For list endpoints, `attrs` will be mostly empty since the API only returns `name` and `url`. For detail endpoints, `attrs` captures the remaining fields: diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md index a40bd2f2..fc698e26 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/threads/README.md @@ -533,7 +533,7 @@ INFO: [openapi_fdw] Scan complete: 3 rows, 2 columns ## 12. The `attrs` Column -Every table includes an `attrs jsonb` column that captures all fields not mapped to named columns: +Every table includes an `attrs jsonb` column that captures the full JSON response object for each row. This is useful for accessing fields you haven't defined as named columns: ```sql SELECT id, attrs->>'media_product_type' AS product_type,