From 45595875440845fc63cc981995748c975090ceab Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 15 Jun 2026 19:13:52 +0100 Subject: [PATCH 01/39] feat: OIDC device flow --- .claude/skills/review-pr/SKILL.md | 377 ++++++++++ CHANGELOG.rst | 43 ++ docs/api.rst | 43 ++ docs/auth.rst | 233 ++++++ docs/index.rst | 1 + docs/installation.rst | 8 +- examples/oidc_device_auth.py | 66 ++ setup.py | 2 +- src/questdb/auth/__init__.py | 92 +++ src/questdb/auth/_cache.py | 277 +++++++ src/questdb/auth/_device.py | 614 +++++++++++++++ src/questdb/auth/_discovery.py | 335 +++++++++ src/questdb/auth/_errors.py | 91 +++ src/questdb/auth/_http.py | 234 ++++++ src/questdb/auth/_questdb.py | 305 ++++++++ src/questdb/auth/_render.py | 336 +++++++++ test/test.py | 18 + test/test_auth.py | 1167 +++++++++++++++++++++++++++++ 18 files changed, 4240 insertions(+), 2 deletions(-) create mode 100644 .claude/skills/review-pr/SKILL.md create mode 100644 docs/auth.rst create mode 100644 examples/oidc_device_auth.py create mode 100644 src/questdb/auth/__init__.py create mode 100644 src/questdb/auth/_cache.py create mode 100644 src/questdb/auth/_device.py create mode 100644 src/questdb/auth/_discovery.py create mode 100644 src/questdb/auth/_errors.py create mode 100644 src/questdb/auth/_http.py create mode 100644 src/questdb/auth/_questdb.py create mode 100644 src/questdb/auth/_render.py create mode 100644 test/test_auth.py diff --git a/.claude/skills/review-pr/SKILL.md b/.claude/skills/review-pr/SKILL.md new file mode 100644 index 00000000..6b408b4a --- /dev/null +++ b/.claude/skills/review-pr/SKILL.md @@ -0,0 +1,377 @@ +--- +name: review-pr +description: Review a GitHub pull request against py-questdb-client (Cython + C-ABI) coding standards +argument-hint: [PR number or URL] [--level=0..3] +allowed-tools: Bash(gh *), Bash(git *), Read, Grep, Glob, Agent +--- + +Review the pull request `$ARGUMENTS`. + +## Review mindset + +You are a senior QuestDB engineer performing a blocking code review. `py-questdb-client` is mission-critical software: a **Cython** extension that wraps the **`c-questdb-client` (Rust) library** through its **C ABI**, and is used to ingest production data from customer Python applications. A bug here causes data loss, silent data corruption, segfaults that take down the host Python interpreter, reference-count leaks, or native memory leaks. There is zero tolerance for correctness issues, memory unsafety, refcount imbalance, GIL violations, or an FFI binding that disagrees with the C header it calls. Be critical, thorough, and opinionated. Your job is to catch problems before they ship, not to be nice. + +- **Assume nothing is correct until you've verified it.** Read surrounding code to understand context — don't just look at the diff in isolation. +- **The diff is a hint, not the boundary of the review.** The highest-value bugs almost always live at callsites outside the diff that depend on contracts the diff quietly changed (a `cdef` helper's error-return convention, a buffer's ownership, a `qdb_pystr_buf` arena's lifetime). Treat the diff as the entry point, not the scope. +- **Flag every issue you find**, no matter how small. Do not soften language or hedge. Say "this is wrong" not "this might be an issue". +- **Do not praise the code.** Skip "looks good", "nice work", "clever approach". Focus entirely on problems and risks. +- **Think adversarially.** For each change, work through: + - Inputs: which values break this? Empty buffers, zero-length strings, `None`, NaN/inf floats, boundary integers (`INT64_MAX`/`INT64_MIN`), max-length symbols, non-UTF-8 `str`, `bytes` with embedded NULs, huge `int` that overflows `int64_t`. + - Encoding: how does the code behave when a Python `str` contains lone surrogates, astral codepoints, or characters that fail UTF-8 encoding? + - Memory: every `malloc`/`calloc`/`realloc` — is it freed on the error path, the exception path, and the early-return path? Every `Py_INCREF` — is there a matching `Py_DECREF`? Every `PyObject_GetBuffer` — a matching `PyBuffer_Release`? + - GIL: does a `with nogil` block touch a Python object or call a CPython API function? Does a `cdef ... nogil` function need the GIL it doesn't hold? + - Failure modes: connection dropping mid-flush, partial write, TLS handshake failure, auth rejection, server rejection — does the buffer/sender end in a usable state, and does native memory get released? + - C-ABI callers: what happens when a C function returns `NULL`, returns an error via its out-param, or hands back a pointer the Cython side must free exactly once? +- **Check what's missing**, not just what's there. Missing tests, missing error handling, missing edge cases, missing `ingress.pyi` stub updates for public API changes, `.pxd` declarations out of sync with the C header. +- **Verify every claim.** If the PR title says "fix", verify the bug actually existed and the fix is correct. If it says "improve performance", look for benchmarks or reason about the change against the per-row hot path. If it says "simplify", verify the new code is actually simpler and doesn't drop behavior (e.g. a dropped `free` on an error branch). Treat the PR description as an unverified hypothesis. +- **Read the full context of changed files** when the diff alone is ambiguous. Use Read/Grep/Glob to inspect surrounding code, callers, and related tests. +- **Assess reachability before reporting.** For every potential bug, trace the actual callers and inputs. If a problem requires physically impossible conditions (a length larger than `SIZE_MAX`, a NUL injected through an API that already rejects it, a panic behind a validation guard), it is not a real finding — drop it. Focus on bugs that real workloads can trigger, not theoretical edge cases. +- **Never review generated or build artifacts.** `src/questdb/ingress.c`, `*.html` (Cython annotation), and `*.so` are build outputs. The source of truth is `*.pyx`, `*.pxi`, `*.pxd`, and `*.pyi`. If the diff contains a regenerated `ingress.c`, review the `.pyx`/`.pxi` change that produced it, not the generated C. + +## Review level + +Parse `$ARGUMENTS` for a level token: `--level=N`, `-lN`, or a bare single digit `0`-`3`. **If no level is given, default to 0.** Strip the level token before feeding the remainder (PR number or URL) to `gh` commands. + +The level controls how much of the review below actually runs. Lower levels keep the same review *spirit* — adversarial, blocking, no praise — but cut the breadth of the analysis. Higher levels have significantly higher token cost; reserve level 3 for high-stakes PRs (C-ABI `.pxd` changes, a `c-questdb-client` submodule bump, the dataframe/Arrow ingestion path, `nogil` sections, manual `malloc`/refcount code, ILP wire format, or auth/TLS configuration). + +| Level | What runs | +|-------|-----------| +| **0 (default)** | Steps 1, 2, 4. Skip Step 2.5. Skip Step 3 — no agent spawn; review the diff inline in the main loop, using Read/Grep on demand to resolve ambiguities. Skip Step 3b — verify each finding inline as you write it. Single-pass review covering correctness, Cython memory/refcount/GIL safety, C-ABI binding correctness, tests, and coding standards on the diff itself. | +| **1** | Adds Step 2.5a (semantic delta only — skip 2.5b/2.5c/2.5d). In Step 3, launch only Agent 1 (correctness), Agent 2 (Cython memory & refcount safety), and Agent 7 (tests) in parallel. Skip all other agents. Skip Step 3b — verify findings inline as you draft the report. | +| **2** | Full Step 2.5, but in 2.5b restrict the callsite inventory to public Python symbols (exported in `__all__` / `ingress.pyi`) plus every `cdef`/`cpdef` function and every C-ABI symbol declared in the `.pxd` files. In Step 3, launch Agents 1-8. Skip Agent 9 (cross-context) and Agent 10 (adversarial fresh-context). Step 3b uses a single batched verification agent for all findings instead of one per finding. | +| **3** | Every step below as written, all 10 agents, per-finding verification. The full mission-critical pass. | + +State the chosen level in one line at the start of the review so the user knows what they're getting (e.g., "Reviewing PR #141 at level 2"). If the level was defaulted, mention that level 3 exists for full review. + +## Step 1: Gather PR context + +Capture the PR identifier in `$PR` (the part of `$ARGUMENTS` left after stripping the level token), then fetch metadata, diff, and review comments in a single bash call so `$PR` is in scope for all three `gh` invocations: + +```bash +PR='' +gh pr view "$PR" --json number,title,body,labels,state +gh pr diff "$PR" +gh pr view "$PR" --comments +``` + +If the diff modifies `c-questdb-client` (the git submodule pointer) or any `.pxd` file, note it now — a submodule bump or binding change is the highest-risk class of change in this repo and forces level-3 scrutiny of the C-ABI surface regardless of the requested level. + +## Step 2: PR title and description + +Check: +- Title is clear and describes the change +- Description speaks to end-user impact, not implementation internals +- If fixing an issue, `Fixes #NNN` or a link to the issue is present +- Tone is level-headed and analytical +- For public API changes (anything in `__all__`, a new/changed method on `Sender`/`Buffer`/`Client`, a new keyword argument, or a changed default), the description calls out the API change explicitly, and `CHANGELOG.rst` is updated +- For a `c-questdb-client` submodule bump, the description states which upstream change is being pulled in and why + +## Step 2.5: Map the change surface + +Before launching review agents, produce a structured change surface map. This step is mandatory and must use Grep/Glob — do not reason about callsites from memory. The output of this step is required input for every agent in Step 3. + +### 2.5a Semantic delta per changed symbol + +For every modified or added function (`def`, `cdef`, `cpdef`), method, class, `cdef class` attribute, module-level constant, enum member, or C-ABI declaration in a `.pxd`, write: + +- **Symbol:** fully-qualified name (e.g., `questdb.ingress.Buffer.column`, `_dataframe`, `c_err_to_py`, `line_sender_buffer_column_f64`) +- **Before:** signature, return type, **Cython exception convention** (`except -1` / `except *` / `except? -1` / `except +` / none / `noexcept`), what it raises and on which inputs, `nogil`-ness, whether it touches Python objects, allocation behavior (`malloc`/`calloc`/`realloc`), refcount effect (does it steal/borrow/own a reference?), C-ABI ownership semantics (who frees returned pointers), thread-safety +- **After:** same fields +- **Delta:** one line stating what semantically changed + +"Refactored", "cleaned up", "improved", "simplified" are not acceptable deltas. State the actual behavioral difference. If nothing semantically changed, write "no behavioral change" — but only after checking, not as a default. + +### 2.5b Callsite inventory + +For every changed symbol that is public (in `__all__` / `ingress.pyi`), `cdef`/`cpdef`, declared in a `.pxd`, or a C-ABI function, run Grep across the repository to find every callsite, override, or reference outside the diff. + +Produce a list grouped by file. Search at minimum: + +- **Cython implementation & includes:** `grep -rn 'symbol_name' src/questdb/*.pyx src/questdb/*.pxi` +- **Cython C-ABI / helper declarations:** `grep -rn 'symbol_name' src/questdb/*.pxd` +- **Type stubs:** `grep -rn 'symbol_name' src/questdb/ingress.pyi` +- **C-ABI header (source of truth):** `grep -rn 'symbol_name' c-questdb-client/include/questdb/ingress/` +- **Rust helper crate:** `grep -rn 'symbol_name' rpyutils/src/ rpyutils/include/` +- **Unit & mock-server tests:** `grep -rn 'symbol_name' test/test.py test/mock_server.py test/test_tools.py` +- **System / integration tests:** `grep -rn 'symbol_name' test/system_test.py` +- **DataFrame tests, fuzz tests, leak tests:** `grep -rn 'symbol_name' test/test_dataframe.py test/test_client_dataframe_fuzz.py test/test_dataframe_fuzz.py test/test_dataframe_leaks.py test/test_client_capsule_path.py` +- **Examples:** `grep -rn 'symbol_name' examples/` +- **Docs:** `grep -rn 'symbol_name' docs/` + +A changed public / `cdef` / `.pxd` symbol with zero recorded Grep calls in the trace is a skill violation. The model is not allowed to assert "this is only used here" without showing the search. + +### 2.5c Implicit contract list + +For each changed symbol, walk this checklist and write one line per item, stating before vs after: + +- **Cython exception convention:** does the function return a C type with the right `except` clause? A `cdef` function returning `int`/`void`/a pointer with **no** `except` clause (or `noexcept`, the Cython 3 default for `nogil` functions) **silently swallows any Python exception raised inside it.** Did the convention change, and do all callers still propagate errors correctly? +- **Raises which exceptions on which inputs** (`IngressError`, `ValueError`, `TypeError`, `IngressServerRejectionError`, `UnsupportedDataFrameShapeError`) and which callers catch vs propagate them +- **Native memory:** does the symbol allocate (`malloc`/`calloc`/`realloc`) and who frees it? Does it free on every path including the exception path? +- **Reference counting:** does it `Py_INCREF`/`Py_DECREF`, store a borrowed `PyObject*`, hold a weakref/capsule, or return a borrowed vs owned reference? +- **Buffer protocol:** does it call `PyObject_GetBuffer` (and the matching `PyBuffer_Release`)? Does it keep the exporter alive while the raw pointer is in use? +- **GIL:** does it run under `nogil`? Does it release the GIL around a blocking C call (flush/connect)? Does it reacquire to raise? +- **C-ABI ownership:** does it pass a `line_sender_buffer`/`line_sender_utf8`/`qdb_pystr_buf` pointer into Rust, and who owns it afterward? Is a returned `line_sender_error*` freed exactly once (`line_sender_error_free`)? +- **`qdb_pystr_buf` arena lifetime:** are UTF-8 pointers obtained from the arena still valid after a subsequent `clear`/append (which may reallocate and invalidate earlier pointers)? +- **Buffer/sender state on error:** does a failed call leave the `Buffer` half-written, or the `Sender` in an unusable state requiring reconstruction? +- **`.pxd` ↔ C header agreement:** parameter types, `const`-ness, struct layout, enum discriminant order, return type — does the Cython declaration still match `c-questdb-client/include/questdb/ingress/*.h`? +- **`.pyi` ↔ implementation agreement:** does the stub still match the real signature, defaults, and return type? +- **Wire format:** any change to the ILP bytes produced (protocol v1 / v2), timestamp units, or column encoding. + +### 2.5d Cross-context exposure list + +End this step with an explicit list of "places this change is visible from but the diff does not touch". This is the highest-priority input for the bug-hunting agents in Step 3. + +Group the callsites from 2.5b by execution context. Typical contexts in this codebase: + +- **C-ABI binding surface:** every C-ABI function declared in `src/questdb/line_sender.pxd` / `conf_str.pxd` / `arrow_c_data_interface.pxd` / `mpdecimal_compat.pxd` / `rpyutils.pxd` that the changed code calls (transitively) +- **Buffer build hot path:** `Buffer.column`, `Buffer.symbol`, `Buffer.row`, `Buffer.at*`, and their `cdef` helpers +- **DataFrame / Arrow ingestion path:** everything in `dataframe.pxi`, the pandas/numpy/pyarrow/polars code paths, Arrow C Data Interface (`ArrowArray`/`ArrowSchema`/`ArrowArrayStream`) consumption and release callbacks, PyCapsule handling +- **Egress / query path:** `egress.pxi`, `QueryResult` +- **Flush path:** `Sender.flush`, `Buffer` → transport, the `with nogil` blocking sections +- **Auto-flush logic:** any callsite that triggers flush implicitly (row count / byte threshold / interval) +- **Configuration parsing:** `Sender.from_conf` / `from_env`, the `conf_str` parser, keyword-argument handling +- **Authentication / TLS:** auth token / basic-auth / TLS-CA configuration paths +- **`nogil` / threading surface:** the `active_senders` registry (`rpyutils/src/active_senders.rs`), any code reachable from multiple threads +- **`qdb_pystr_buf` arena users:** every function that obtains UTF-8 pointers from the per-`Buffer` string arena +- **Python type stubs:** `ingress.pyi` +- **Tests:** `test/test.py`, `test/system_test.py`, `test/test_dataframe.py`, fuzz and leak tests +- **Examples & docs:** `examples/*.py`, `docs/` + +Every entry on this list must be reviewed in Step 3. + +### 2.5e Build & binding profile facts + +**This sub-step runs at every level, including levels 0 and 1 where the rest of Step 2.5 is skipped.** A single Cython directive or a submodule bump can flip the safety story for the entire extension; agents must reason from the actual profile, not from defaults. + +Record, with file:line citations: + +- **Cython compiler directives** at the top of `ingress.pyx` and in `setup.py` (`language_level`, `binding`, and — if set — `boundscheck`, `wraparound`, `cdivision`, `initializedcheck`, `nonecheck`). If `boundscheck=False` / `wraparound=False`, **out-of-range or negative C-array/typed-memoryview indexing is undefined behavior, not an `IndexError`** — agents must treat indexing as a crash surface, not a guarded operation. +- **Cython exception-default fact:** in Cython 3, a `cdef`/`cpdef` function declared `nogil` (or any `cdef` returning a non-object type without an explicit `except` clause) defaults to `noexcept` — it **swallows Python exceptions silently**. Agents 1, 2, and 3 must check the actual `except` clause on every changed `cdef` and not assume exceptions propagate. +- **`c-questdb-client` submodule commit** (`git submodule status`) — if the diff moves it, the pinned commit's headers under `c-questdb-client/include/questdb/ingress/` are the *new* source of truth that every `.pxd` must match. Re-verify the `.pxd` ↔ `.h` agreement against the new commit. +- **`rpyutils` Rust crate:** if `rpyutils/src/**` or `rpyutils/Cargo.toml` changed, note its panic/profile behavior — a panic in `rpyutils` reached across the C ABI aborts the Python process. Its headers (`rpyutils/include/`, generated via `cbindgen.toml`) must match `rpyutils.pxd`. +- **Minimum numpy / Python versions** (`pyproject.toml`: `requires-python`, `numpy>=1.21.0`). Code that uses a newer numpy C-API or Python C-API symbol than the floor breaks the oldest supported build. State the floor. +- **`abort()` is imported** (`from libc.stdlib cimport ... abort`). Any reachable `abort()` call, or any Rust panic that crosses the C ABI, terminates the host interpreter with no traceback. Flag the path. + +A review without this section is incomplete. State the relevant facts (directives, exception default, submodule commit) in one line at the top of every Step 3 agent prompt so the agent reasons from the right premise. + +## Step 3: Parallel review + +Every agent receives: +1. The PR diff +2. The full change surface map from Step 2.5 (semantic deltas, callsite inventory, implicit contracts, cross-context exposure list, build & binding profile facts) + +### Anti-anchoring directive (applies to all agents) + +- **Bugs at callsites outside the diff outrank bugs inside the diff.** A confirmed bug in a file the PR did not touch but that calls a changed symbol is a P0 finding. +- **"Looks correct in isolation" is not a valid conclusion.** Before clearing a changed symbol, the agent must walk the callsite inventory from 2.5b and explicitly state, per callsite, whether the new behavior is still correct there. +- **The diff is the entry point, not the scope.** If the change surface map shows the symbol is reachable from N other files, the review covers N+1 files. +- **Project-wide settings affect untouched code.** A change to a Cython directive in `ingress.pyx` or `setup.py` (e.g. flipping `boundscheck` off), a `c-questdb-client` submodule bump, or a `.pxd` declaration change retroactively changes the safety/ABI story for **every** function that compiles under that directive or calls that binding — not just the diff. When directives, `setup.py`, `pyproject.toml`, or `.pxd`/submodule pointers appear in the diff, the review covers the affected surface of the whole extension, not just the touched lines. +- A single finding of the form "in `dataframe.pxi` the new behavior of `Buffer.column` leaks `b.validity` on the exception path" is worth more than five findings inside the diff. + +### Agents + +Launch the following agents in parallel. + +**Agent 1 — Correctness & bugs:** `None`/NULL handling, edge cases, logic errors, off-by-one, operator precedence, error paths. Integer correctness across the Python↔C boundary: Python `int` → `int64_t`/`size_t` conversion and overflow, `` / `` / `` casts that truncate or wrap, signed/unsigned mismatches, negative-length math. NaN/inf float handling. Timestamp unit conversions (micros vs nanos). Correct ILP wire format (v1 / v2). Cross-reference every changed symbol against its callsite inventory and verify the new behavior is correct at each callsite. + +**Agent 2 — Cython memory, refcount & crash surface:** In a Cython extension, anything that corrupts memory or aborts the native side takes down the host Python interpreter with no traceback. Flag every reachable instance of: + +- **Native memory leaks / double-free / use-after-free:** every `malloc`/`calloc`/`realloc` must be `free`d on **all** paths — success, early `return`, and the exception/`except` path (prefer `try/finally`). A `realloc` whose return value is assigned back to the same pointer leaks the original on failure (it returns `NULL` without freeing). Freeing a pointer twice, or using it after `free`, corrupts the heap. +- **Reference-count errors:** every `Py_INCREF` needs a matching `Py_DECREF` on all paths; a missing `DECREF` leaks, an extra `DECREF` causes a later use-after-free crash. Borrowed references (`PyWeakref_GetObject`, dict/list borrows, `PyObject*` stored without incref) must not outlive their owner. Verify `PyCapsule` and weakref handling. +- **Buffer-protocol imbalance:** every `PyObject_GetBuffer` must have a matching `PyBuffer_Release` on all paths, and the raw pointer must not be used after the exporting object can be collected. +- **Indexing under `boundscheck=False`:** per 2.5e, C-array and typed-memoryview indexing is unchecked — an out-of-range or negative index is UB, not an exception. Verify bounds are established before every index on the hot path. +- **Silent exception swallowing:** a `cdef` function returning a C type without the correct `except` clause (or `noexcept`) drops Python exceptions on the floor, turning an error into wrong data. Verify the `except` convention against what the body raises. +- **Direct aborts:** any reachable `abort()` (it is imported), and any **Rust panic crossing the C ABI** (from `c-questdb-client` or `rpyutils`) — both terminate the interpreter. The only defense is that the native side returns an error code/`line_sender_error*`, never panics. +- **Uninitialized memory:** a struct field or `malloc`'d region read before it is written (use `calloc` or explicit init), especially partially-built `pyobj_built_t`-style structs on an error path that then get freed. + +State the relevant build facts (directives, exception default, submodule commit) from 2.5e in the agent's first sentence, and evaluate every finding under the actual settings, not the textbook defaults. + +**Agent 3 — C-ABI boundary safety:** Check every call into the `c-questdb-client` / `rpyutils` C ABI. Verify: +- **`.pxd` matches the C header.** For every changed or called C-ABI symbol, read the actual declaration in `c-questdb-client/include/questdb/ingress/*.h` (or `rpyutils/include/`) and confirm the `.pxd` declaration matches it exactly: parameter types, pointer/`const`-ness, return type, struct field order and types, enum discriminant order. A mismatch is silent memory corruption / ABI breakage. If the submodule pointer moved, verify against the **new** pinned commit. +- **NULL handling:** every pointer returned from a C function checked before dereference; every pointer argument that could be `NULL` handled. +- **Error object lifecycle:** every `line_sender_error*` obtained via an out-param is converted (`c_err_to_py`) and freed exactly once (`line_sender_error_free`) — never leaked, never double-freed, never freed then read. +- **Ownership transfer:** `line_sender_buffer`, `line_sender_utf8`, `qdb_pystr_buf`, `line_sender` handles — who allocates, who frees, and is the lifetime correct relative to the owning `cdef class` (`__cinit__`/`__dealloc__`)? +- **`qdb_pystr_buf` arena invalidation:** UTF-8 pointers handed to Rust must remain valid until the buffer write completes and must not be invalidated by an intervening arena `clear`/append. +- **String encoding:** Python `str` → UTF-8 (`line_sender_utf8`), correct length passed, no lone surrogates, embedded-NUL handling, `bytes` vs `str` distinction. + +**Agent 4 — GIL & concurrency:** Verify: +- **`nogil` correctness:** no `with nogil` block (or `cdef ... nogil` function) touches a Python object, calls the CPython C-API, raises a Python exception, or `INCREF`/`DECREF`s — doing so without the GIL is a crash/corruption. Errors discovered under `nogil` must be deferred and raised after reacquiring the GIL. +- **GIL release around blocking calls:** the flush/connect/network C calls should release the GIL (`with nogil`) so other threads run; verify the released region doesn't reference Python state. +- **Thread-safety:** `Sender`, `Buffer`, and the `active_senders` registry (`rpyutils/src/active_senders.rs`) — verify documented thread-safety matches the implementation, and that shared mutable state reachable from multiple threads is synchronized. Cross-reference every callsite from 2.5b for violations of the concurrency contract. +- **Free-threaded build:** if the change assumes the GIL serializes access, note whether it holds under a free-threaded (no-GIL) CPython build (the CI matrix includes `*t` free-threaded targets). + +**Agent 5 — Resource management & lifecycle:** Leaks on all code paths (especially errors). Check `__cinit__`/`__dealloc__` pairing on every `cdef class` (does `__dealloc__` free everything `__cinit__` and methods allocated, and is it safe when `__cinit__` failed partway?). Native handle lifecycle (`line_sender`, `line_sender_buffer`, `qdb_pystr_buf`). Socket/connection/TLS teardown on error (handled by Rust, but verify the Cython side calls close/free). **Arrow C Data Interface:** `ArrowArray`/`ArrowSchema`/`ArrowArrayStream` `release` callbacks invoked exactly once; PyCapsule consumption semantics correct; no double-release. Walk every callsite from 2.5b that constructs, owns, or transfers ownership of a native handle and verify cleanup on all paths (success, exception, early return). + +**Agent 6 — Performance & allocations:** Unnecessary work on hot paths — the per-row buffer build (`Buffer.column`/`symbol`/`row`) and the per-column DataFrame loop (`dataframe.pxi`). Flag: Python-level operations (attribute lookups, `dict` access, object boxing, `str` re-encoding) inside the inner per-row/per-cell loop that should be hoisted or done at C level; allocations per row/cell that should be amortized; excessive copying of data that could be zero-copy via the buffer protocol / Arrow; O(n²) patterns over rows or columns. Analyze scaling at realistic volume: millions of rows per flush, hundreds of columns. Setup-path costs (sender construction, config parsing, schema inspection done once per DataFrame) are acceptable; per-row/per-cell costs are not. + +**Agent 7 — Test review & coverage:** Coverage gaps, error-path tests, `None`/edge-case tests, boundary conditions, regression tests, test quality. Check: +- Unit / mock-server tests in `test/test.py` (uses `test/mock_server.py`) +- System / integration tests against a real QuestDB in `test/system_test.py` +- DataFrame tests in `test/test_dataframe.py`, fuzz tests in `test/test_client_dataframe_fuzz.py` / `test/test_dataframe_fuzz.py`, and **leak tests** in `test/test_dataframe_leaks.py` (new native-memory or refcount handling should have a leak test) +- Capsule / Arrow path tests in `test/test_client_capsule_path.py` +- Examples in `examples/` still run (and `examples.manifest.yaml` is consistent) + +Cross-reference 2.5d: every cross-context exposure should have a test that exercises the changed symbol from that context. Missing tests for cross-context callsites — especially a new native-memory path without a leak test, or a new C-ABI binding without a system test — is a high-priority finding. + +**Agent 8 — Code quality & API design:** Public API ergonomics and consistency. **`ingress.pyi` stub must match the implementation** (signatures, defaults, return types, new symbols added to `__all__`). Docstrings on public classes/methods. `CHANGELOG.rst` updated for user-visible changes. Backward compatibility of the Python API (renamed/removed kwargs, changed defaults, changed exception types) — breaking changes must be intentional and called out in the PR body. Naming consistent with the codebase. No dead code, no unused `cimport`/`import`. Docs under `docs/` updated for API changes. + +**Agent 9 — Cross-context caller impact:** Walk the callsite inventory from 2.5b. For every callsite, fetch the surrounding code (the calling function plus its callers up two levels) and answer: + +- Does this caller pass inputs the new behavior handles incorrectly? +- Does this caller depend on a contract from the implicit contract list (2.5c) that the change broke — e.g. relying on the old `except` convention, the old ownership of a buffer, the old `qdb_pystr_buf` lifetime, the old refcount behavior? +- Is this caller in a context (a `with nogil` block, the per-row hot loop, an auto-flush trigger, an Arrow release callback, a `__dealloc__`, an exception/error path) where the new behavior misbehaves even if the inputs are valid? +- For a changed `cdef`/`cpdef` exception convention: do all callers still detect and propagate the error? +- For a changed C-ABI declaration: does the `.pxd` still match the C header, and do all Cython callers pass the right types/ownership? +- For a changed buffer/sender state machine: do all callers respect the new state transitions (buffer cleared after error before reuse; flush only when flushable)? + +This agent's output is structured per callsite, not per failure mode. Each callsite gets a verdict: SAFE / BROKEN / NEEDS VERIFICATION. Every BROKEN entry is a P0 finding regardless of whether the file is in the diff. + +This agent is not optional even when the diff is small. Small diffs to widely-used symbols (`Buffer.column`, `Sender.flush`, the dataframe entry point, a C-ABI binding) have the largest blast radius. + +**Agent 10 — Fresh-context adversarial:** Dispatched separately from agents 1-9 to escape checklist anchoring. This agent operates under different rules from the rest: + +- It receives ONLY the PR diff and the names of the changed files. It does NOT receive the change surface map from Step 2.5, the implicit contract list, the cross-context exposure list, or any of the review checklists below. +- Its sole instruction: "find ways this code is wrong". No category list, no failure-mode taxonomy, no project-specific style guide. +- It is free to use Read, Grep, and Glob to explore the repository however it wants. +- Findings are not pre-classified by category. Each finding states: what's wrong, why it's wrong, and the code path that demonstrates it. + +The point of this agent is to surface bugs the structured agents cannot see because they are reasoning inside the same frame. A finding here that none of agents 1-9 produced is high signal — it means the structured review missed it. A finding here that overlaps with agents 1-9 is corroboration. + +Run this agent in parallel with agents 1-9. It is mandatory regardless of diff size. + +Combine all agent findings into a single deduplicated **draft** report. Do NOT present this draft to the user yet — it goes straight into verification. + +## Step 3b: Verify every finding against source code + +The parallel review agents work from the diff plus the change surface map and frequently produce false positives — especially around native memory ownership, refcounting, GIL boundaries, Cython exception conventions, and C-ABI lifecycle. Every finding MUST be verified before it is reported. + +For each finding in the draft report: + +1. **Read the actual source code** at the exact lines cited (in the `.pyx`/`.pxi`/`.pxd`/`.pyi`, never the generated `ingress.c`). Do not rely on the agent's description alone. +2. **Trace the full code path:** follow callers and `cdef` helpers. Remember Cython's `include` model — `dataframe.pxi` and `egress.pxi` are textually included into `ingress.pyx`, so symbols are shared across them. +3. **Check both sides of the C ABI:** if a finding involves Cython↔Rust interaction, read both the Cython call and the C header in `c-questdb-client/include/questdb/ingress/` (or `rpyutils/include/`). Verify ownership transfer, error propagation, and freeing on both sides. +4. **For native-memory-leak claims:** trace every `malloc`/`calloc`/`realloc` to its `free` on ALL paths (success, early return, `except`/exception unwind). Confirm the intervening code can actually raise before claiming the exception path leaks. +5. **For refcount claims:** count `Py_INCREF`/`Py_DECREF` on every path; confirm borrowed-vs-owned reasoning against the CPython C-API contract of each function used. +6. **For exception-swallowing claims:** check the actual `except` clause on the `cdef` and whether the body can raise. Under Cython 3 a `nogil` `cdef` defaults to `noexcept` — confirm whether that's the real declaration. +7. **For GIL claims:** verify the cited code is actually inside a `nogil` region and actually touches a Python object / C-API; a `cdef` function called from `nogil` may itself acquire the GIL. +8. **For C-ABI / `.pxd` mismatch claims:** read the exact declaration in the pinned header and compare field-by-field. A claimed mismatch that actually matches is a false positive. +9. **For numeric overflow/truncation claims:** check reachability at realistic scale — ILP buffers up to a few hundred MB, millions of rows per flush, columns in the tens to low hundreds. Drop overflows that require values beyond that scale. +10. **For performance claims:** confirm the cost is on the per-row/per-cell hot path and measurable relative to surrounding I/O. Downgrade negligible savings to a nit. Exception: a per-row or per-cell allocation / Python-object operation on the buffer-build path is always worth flagging. +11. **For cross-context findings (Agent 9):** re-read the callsite in full, including callers up two levels, and confirm the broken behavior is reachable from production or test paths users will exercise. + +**Classify each finding** as: +- **CONFIRMED in-diff** — the bug is real and inside the diff +- **CONFIRMED at out-of-diff callsite** — the bug is in an unchanged file because the changed symbol is used there in a way that's now broken (cite the file and the contract from 2.5c that was violated) +- **FALSE POSITIVE** — the code is actually correct (explain why) +- **CONFIRMED with nuance** — the issue exists but is less severe than stated (explain) + +**Move false positives to a separate "Downgraded" section** at the end of the report. For each, give a one-line explanation of why it was dismissed. This lets the PR author verify the reasoning and catch verification mistakes. + +Launch verification agents in parallel where findings are independent. Each verification agent should read surrounding source files, not just the diff. + +## Review checklists + +Review the diff for: + +### Correctness & bugs +- `None`/NULL handling at API boundaries +- Edge cases and error paths +- Logic errors, off-by-one, incorrect bounds, wrong operator precedence +- Integer overflow/truncation across the Python↔C boundary (`int` → `int64_t`/`size_t`, ``/`` casts, signed/unsigned) +- Float edge cases (NaN, inf), timestamp unit conversions (micros vs nanos) +- Correct ILP wire format (v1 / v2) +- **Reachability expansion:** for each changed symbol, list the new contexts it can appear in (DataFrame path, `nogil` section, auto-flush, Arrow callback, error path) and verify it works in each. + +### Cython memory & refcount safety +- Every `malloc`/`calloc`/`realloc` freed on success, early-return, and exception paths (prefer `try/finally`); no double-free, no use-after-free; `realloc`-failure path doesn't leak the original +- Every `Py_INCREF` matched by `Py_DECREF`; borrowed references not outliving their owner; weakref/capsule handling correct +- Every `PyObject_GetBuffer` matched by `PyBuffer_Release`; exporter kept alive while the pointer is used +- Correct Cython `except` convention on every `cdef`/`cpdef` returning a C type (no silent exception swallowing; `noexcept` is the Cython-3 default for `nogil` `cdef`) +- No reachable `abort()`, and no Rust panic crossing the C ABI (both kill the interpreter) +- Indexing safe under the active `boundscheck`/`wraparound` directives +- No uninitialized struct/heap memory read (use `calloc` or init before use, especially on partially-built error paths) + +### C-ABI boundary +- `.pxd` declarations match `c-questdb-client/include/questdb/ingress/*.h` (and `rpyutils/include/`) exactly — types, `const`, struct layout, enum order, return type — against the **pinned** submodule commit +- All pointers returned from C checked for NULL before dereference +- Every `line_sender_error*` freed exactly once (`line_sender_error_free`), never double-freed or leaked +- Ownership semantics clear and correct (who allocates the handle, who frees it, lifetime vs the owning `cdef class`) +- `qdb_pystr_buf` arena pointers stay valid until consumed; not invalidated by an intervening `clear`/append +- String handling: `str` → UTF-8 with correct length, lone-surrogate rejection, embedded-NUL handling, `bytes`/`str` distinction +- ABI stability: a submodule bump that reorders a struct or renumbers an enum requires matching `.pxd` updates + +### GIL & concurrency +- No Python object access / C-API call / refcount op / raise inside a `with nogil` block or `cdef ... nogil` function +- GIL released around blocking network/flush C calls; released region references no Python state; errors deferred and raised after reacquiring +- `Sender`/`Buffer`/`active_senders` thread-safety matches documentation; shared mutable state synchronized +- Assumptions that the GIL serializes access re-checked for the free-threaded CPython build + +### Performance +- No per-row/per-cell Python-level operations (attribute/dict lookups, boxing, `str` re-encoding) in the buffer-build or DataFrame inner loops that belong at C level or hoisted to setup +- No per-row/per-cell allocations that should be amortized +- Zero-copy where possible (buffer protocol, Arrow) instead of copying +- No O(n²) over rows or columns at realistic scale (millions of rows, hundreds of columns) + +### Resource management +- `__cinit__`/`__dealloc__` pair frees everything allocated, and `__dealloc__` is safe after a partially-failed `__cinit__` +- Native handles (`line_sender`, `line_sender_buffer`, `qdb_pystr_buf`) released on all paths +- Socket/connection/TLS cleanup on error (Cython side invokes the Rust close/free) +- Arrow `release` callbacks invoked exactly once; PyCapsule consumed correctly; no double-release +- No leak through the C-ABI boundary (ownership documented and consistent) + +### Code quality +- `ingress.pyi` stub matches the implementation (signatures, defaults, return types, `__all__`) +- Public API consistent and ergonomic; backward-compatible (or breaking changes called out in the PR body) +- `CHANGELOG.rst` updated for user-visible changes; `docs/` updated for API changes +- Docstrings on public classes/methods +- Naming consistent with the codebase; no dead code or unused `import`/`cimport` + +### Test review +- **Coverage gaps:** every new/changed code path has a corresponding test; flag missing ones explicitly as "missing test for X" +- **Cross-context coverage:** every entry in the cross-context exposure list (2.5d) has a test exercising the changed symbol from that context +- **Leak coverage:** new native-memory or refcount-handling code has a test in `test/test_dataframe_leaks.py` (or equivalent) +- **Error-path coverage:** failure cases, partial writes, connection drops, TLS/auth failures, server rejections, and edge conditions tested — not just the happy path +- **Edge-case tests:** `None`, empty buffers, zero-length strings, max-length symbols, boundary integers, NaN/inf, non-UTF-8 strings +- **C-ABI / binding changes** covered by a system test in `test/system_test.py` +- **DataFrame / Arrow changes** covered in `test/test_dataframe.py` and the fuzz/capsule tests +- **Test quality:** tests assert the right thing; watch for trivially-passing tests +- **Regression tests:** a bug fix has a test that reproduces the original bug and fails without the fix + +### Unresolved TODOs and FIXMEs +- Scan the diff for `TODO`, `FIXME`, `HACK`, `XXX`, `WORKAROUND`. For each: + - Pre-existing (just moved/reformatted) or newly introduced in this PR? + - If new: unfinished work that should block merge, or an acceptable known limitation? Flag deferred bugs or incomplete implementations. + - If it references a ticket/issue, verify the reference exists. + +### Commit messages +- Plain English titles, under 50 chars +- Active voice, naming the acting subject + +## Step 4: Output + +Present ONLY verified findings (false positives are excluded from Critical/Moderate/Minor). Structure as: + +### Critical +Issues that must be fixed before merge. Each must include: +- Exact file path and line numbers (including out-of-diff files) +- Whether the finding is **in-diff** or **out-of-diff** +- Code path trace showing why the bug is real +- For out-of-diff findings: the contract from 2.5c that was violated and the callsite that triggers it +- Suggested fix + +### Moderate +Issues worth addressing but not blocking. + +### Minor +Style nits and suggestions. + +### Downgraded (false positives) +Findings from the initial review that were dismissed after source code verification. For each, state: +- The original claim (one line) +- Why it was dismissed (one line, citing the specific code that disproves it) + +### Summary +- One-line verdict: approve, request changes, or needs discussion +- Highlight any regressions or tradeoffs +- State how many draft findings were verified vs dropped as false positives (e.g., "8 findings verified, 4 false positives removed") +- State the in-diff vs out-of-diff split (e.g., "5 findings in-diff, 3 findings out-of-diff"). If the diff is non-trivial and out-of-diff is zero, the cross-context pass likely underran — re-invoke Agent 9 with a wider grep before finalizing. \ No newline at end of file diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c3deae50..557b131c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,49 @@ Changelog ========= +Unreleased +---------- + +Features +~~~~~~~~ + +OIDC Authentication (:mod:`questdb.auth`) +************************************************ + +New :mod:`questdb.auth` module to sign in interactively to OIDC-secured +QuestDB Enterprise from Python — including from **remote** kernels +(JupyterHub, SageMaker, Colab, VS Code-remote) that have no local browser. + +It runs the OAuth 2.0 Device Authorization Grant (RFC 8628) client-side: you +authorize in any browser (laptop or phone), and the token is presented to +QuestDB over the auth paths it already supports (HTTP ``Bearer`` / PG-wire +``_sso``). No server change is required. + +.. code-block:: python + + from questdb.auth import OidcDeviceAuth, connect + + # Just the token (use it with PG-wire, HTTP, or any client): + auth = OidcDeviceAuth.from_questdb("https://questdb.example.com:9000") + token = auth.token() + + # Or the integrated session (query to a DataFrame, feed adapters): + qdb = connect("https://questdb.example.com:9000") + df = qdb.sql("SELECT * FROM trades LIMIT 10") + +Highlights: + +* Auto-discovery of OIDC config from the QuestDB ``/settings`` endpoint, with a + fallback to the IdP ``.well-known`` document. +* In-process token cache with silent refresh; optional on-disk cache. +* Adapters for pandas (REST ``/exec``), SQLAlchemy, psycopg and the ingestion + ``Sender``. +* ``token()`` / ``headers()`` require no dependencies beyond the standard + library; ``pandas`` / ``sqlalchemy`` / ``psycopg`` / ``qrcode`` / ``IPython`` + are imported lazily. + +See the :ref:`OIDC authentication guide ` for details. + 4.1.0 (2025-11-28) ------------------ diff --git a/docs/api.rst b/docs/api.rst index b3e1f11e..6b428050 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -67,3 +67,46 @@ questdb.ingress :members: :undoc-members: :show-inheritance: + +questdb.auth +============ + +See the :ref:`oidc_auth` guide for an overview. + +.. autofunction:: questdb.auth.connect + +.. autoclass:: questdb.auth.QuestDB + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: questdb.auth.OidcDeviceAuth + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: questdb.auth.OidcConfig + :members: + :undoc-members: + :show-inheritance: + +.. autoexception:: questdb.auth.OidcError + :show-inheritance: + +.. autoexception:: questdb.auth.OidcConfigError + :show-inheritance: + +.. autoexception:: questdb.auth.OidcInteractionRequired + :show-inheritance: + +.. autoexception:: questdb.auth.OidcDeviceFlowError + :show-inheritance: + +.. autoexception:: questdb.auth.OidcTimeoutError + :show-inheritance: + +.. autoexception:: questdb.auth.OidcAuthError + :show-inheritance: + +.. autoexception:: questdb.auth.OidcNetworkError + :show-inheritance: diff --git a/docs/auth.rst b/docs/auth.rst new file mode 100644 index 00000000..7e40ada5 --- /dev/null +++ b/docs/auth.rst @@ -0,0 +1,233 @@ +.. _oidc_auth: + +=================== +OIDC Authentication +=================== + +QuestDB Enterprise can be secured with `OpenID Connect (OIDC) +`_. The :mod:`questdb.auth` module +lets you sign in interactively from Python — including from a **remote** kernel +(JupyterHub, SageMaker, Colab, VS Code-remote, containers) where there is no +local browser. + +It runs the `OAuth 2.0 Device Authorization Grant (RFC 8628) +`_ entirely client-side: you +authorize in **any** browser (your laptop or your phone), while the kernel only +makes outbound calls to your identity provider (IdP). The resulting token is +then presented to QuestDB over the auth paths it already supports — HTTP +``Authorization: Bearer`` or PG-wire ``_sso`` — so **no server change is +required**. + +.. note:: + + This feature targets **QuestDB Enterprise with OIDC enabled**. The IdP + client referenced by ``acl.oidc.client.id`` must have the device grant + (``urn:ietf:params:oauth:grant-type:device_code``) enabled and be a public + client. See :ref:`oidc_idp_requirements`. + +Two ways to use it +================== + +You can let the helper drive everything, or you can just take the token and use +it with your own tooling. + +Just the token (PG-wire / HTTP / anything) +------------------------------------------ + +If you connect to QuestDB yourself — over PG-wire, raw HTTP, or any other +client — you only need a valid token. This path has **no extra dependencies**. + +.. code-block:: python + + from questdb.auth import OidcDeviceAuth + + # Discover the OIDC configuration from the QuestDB server: + auth = OidcDeviceAuth.from_questdb("https://questdb.example.com:9000") + + token = auth.token() # runs the device flow on first use, else cached + headers = auth.headers() # {"Authorization": "Bearer "} + + # Use the token however you like, e.g. PG-wire via psycopg: + import psycopg + conn = psycopg.connect( + host="questdb.example.com", port=8812, dbname="qdb", + user="_sso", password=token) + +The integrated session +---------------------- + +The high-level :func:`questdb.auth.connect` returns a :class:`~questdb.auth.QuestDB` +session that signs you in and adapts the token into the common Python access +paths. + +.. code-block:: python + + from questdb.auth import connect + + qdb = connect("https://questdb.example.com:9000") # interactive sign-in + df = qdb.sql("SELECT * FROM trades WHERE ts > dateadd('h', -1, now())") + + # Bring-your-own client, same auto-refreshed token: + engine = qdb.sqlalchemy_engine() # PG-wire, token as _sso + with qdb.psycopg() as conn: # raw psycopg + ... + with qdb.sender() as sender: # ingestion (ILP/HTTP) + sender.row("trades", columns={"price": 101.5}, + at=TimestampNanos.now()) + +On first use you will see a sign-in prompt (rendered as a clickable link in +Jupyter, plain text on a terminal):: + + 🔐 Sign in to QuestDB + Open https://idp.example.com/device and enter code: WDJB-MJHT + (or open directly: https://idp.example.com/device?user_code=WDJB-MJHT) + ⏳ waiting for authorization… (4:51 left) + ✅ Signed in as alice@example.com — token cached, expires in 60 min + +Re-running any cell is silent — the token is cached and refreshed silently on +the next use once it nears expiry. + +How it works +============ + +Configuration discovery +------------------------ + +:meth:`OidcDeviceAuth.from_questdb ` +(and :func:`~questdb.auth.connect`) resolve the OIDC configuration in this +order: + +1. ``GET {url}/settings`` (public, no auth) for the QuestDB-authoritative + values: ``acl.oidc.client.id``, ``acl.oidc.scope``, ``acl.oidc.token.endpoint``, + ``acl.oidc.groups.encoded.in.token`` and (on newer servers) + ``acl.oidc.device.authorization.endpoint``. +2. If the device-authorization endpoint is not advertised, the helper falls + back to the IdP discovery document + (``{issuer}/.well-known/openid-configuration``). The issuer is taken from an + explicit ``issuer=`` / ``discovery_url=`` argument, or derived from the token + endpoint's origin. + +Anything you pass explicitly overrides discovery. You can also skip discovery +entirely: + +.. code-block:: python + + auth = OidcDeviceAuth( + client_id="questdb", + device_authorization_endpoint="https://idp/.../device", + token_endpoint="https://idp/.../token", + scope="openid groups", + groups_in_token=True, # send id_token (True) vs access_token (False) + audience="questdb", # optional; some IdPs need it to set `aud` + cache="memory") + +Which token is sent +------------------- + +The helper mirrors QuestDB's own selection logic +(``groupsEncodedInToken ? idToken : accessToken``): + +============================================ ================= +``acl.oidc.groups.encoded.in.token`` Helper sends +============================================ ================= +``true`` ``id_token`` +``false`` ``access_token`` +============================================ ================= + +When sending the ``id_token`` the ``openid`` scope is requested automatically. + +Token lifecycle (cache + refresh) +--------------------------------- + +``token()`` returns the cached token while it is valid (with a small clock-skew +margin). When it nears expiry the helper silently refreshes it using the +``refresh_token`` if one was issued. If the refresh token is missing or rejected +(expired/revoked), it re-runs the interactive sign-in; a transient network error +is raised instead, so you can retry without being needlessly re-prompted. A lock +serializes refresh so parallel cells/threads don't double-prompt. + +Cache backends (``cache=`` argument): + +* ``"memory"`` *(default)* — process-global, nothing written to disk. + Re-running cells is silent; a kernel restart re-prompts once. +* ``"file"`` — ``~/.questdb/oidc-cache.json`` (mode ``600``). Survives kernel + restarts and is shared across kernels on the same host. **Security + trade-off:** the refresh token is stored at rest. +* ``None`` — never persist; prompt every time. + +Non-interactive contexts +------------------------- + +Scheduled / non-interactive notebooks (papermill, cron, CI) have no human to +authorize the device. The helper detects this and raises +:class:`~questdb.auth.OidcInteractionRequired` instead of hanging. Use a QuestDB +**service-account REST token** or the **client-credentials** grant there. + +Connection adapters +=================== + +* :meth:`QuestDB.sql ` — query over REST ``/exec`` to a + pandas DataFrame using ``Authorization: Bearer``. Recommended: there is no + token-length limit (a groups-encoded JWT can be several KB). +* :meth:`QuestDB.sqlalchemy_engine ` — + PG-wire engine that injects a fresh token as the ``_sso`` password for every + new connection. Requires ``acl.oidc.pg.token.as.password.enabled=true``. +* :meth:`QuestDB.psycopg ` — a raw psycopg / + psycopg2 connection. +* :meth:`QuestDB.sender ` — a + :class:`~questdb.ingress.Sender` for ingestion (ILP over HTTP). + +.. note:: + + QuestDB validates the token at **authentication** time, not per query. An + already-open PG connection survives token expiry; only **new** connections + need a fresh token — which is why the PG-wire adapter supplies the token + per-connect. + +.. _oidc_idp_requirements: + +IdP requirements +=============== + +The OIDC client referenced by ``acl.oidc.client.id`` must: + +* have the **Device Authorization grant** enabled; +* be a **public client** (no secret in a notebook); +* optionally issue **refresh tokens** for the device grant (for silent refresh); +* issue tokens whose ``aud`` matches ``acl.oidc.audience`` (some IdPs need an + ``audience``/``resource`` request parameter); +* include the **groups** claim in the token (``groups.encoded.in.token=true``) + or expose it via the **userinfo** endpoint (``false``), matching the server. + +Security notes +============= + +* No IdP passwords are ever entered in the notebook; MFA/SSO happen at the IdP. +* ``https`` is required. Plaintext ``http`` to a **loopback** address + (``localhost`` / ``127.0.0.1`` / ``::1``) is always allowed — it never leaves + the host. ``insecure=True`` additionally permits plaintext to a non-loopback + **QuestDB** host (local development only); it does **not** downgrade the + **IdP**, so the device code and refresh token are never sent in cleartext + over the network. Certificate verification is never disabled. +* **Endpoint trust.** The device code and the long-lived refresh token are sent + to the device-authorization and token endpoints, which are discovered from + QuestDB ``/settings``. The helper requires both endpoints to share a single + origin and rejects the configuration otherwise. Because ``/settings`` is + authoritative-by-QuestDB, a compromised server could in principle point them + elsewhere; pass ``issuer=`` (or ``discovery_url=``) to **pin** the IdP so the + endpoints are verified to belong to it and credentials can't be redirected to + another host. +* Adapters avoid logging the token / PG DSN. Avoid logging them yourself. +* Standard proxy / CA settings (``HTTPS_PROXY``, ``REQUESTS_CA_BUNDLE``, + ``SSL_CERT_FILE``) are honoured; you can also pass ``ca_bundle=``. + +Dependencies +=========== + +``token()`` / ``headers()`` need nothing beyond the standard library. The +following are imported lazily, only when used: + +* ``pandas`` — for :meth:`QuestDB.sql`; +* ``sqlalchemy`` and ``psycopg`` / ``psycopg2`` — for the PG-wire adapters; +* ``qrcode`` — to render a QR code for phone-based authorization (``qr=True``); +* ``IPython`` — for the rich Jupyter prompt (falls back to plain text). diff --git a/docs/index.rst b/docs/index.rst index 4540c9b5..6babf211 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ Contents installation sender conf + auth examples api troubleshooting diff --git a/docs/installation.rst b/docs/installation.rst index dc2f0405..fccd7599 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -6,7 +6,7 @@ Dependency ========== The Python QuestDB client does not have any additional run-time dependencies and -will run on any version of Python >= 3.9 on most platforms and architectures. +will run on any version of Python >= 3.10 on most platforms and architectures. From version 3.0.0, this library depends on ``numpy>=1.21.0``. @@ -23,6 +23,12 @@ These are bundled as the ``dataframe`` extra. Without this option, you may still ingest data row-by-row. +The :ref:`OIDC authentication helper ` (:mod:`questdb.auth`) needs +no extra dependencies for ``token()`` / ``headers()``. Some of its conveniences +import the following lazily, only when used: ``pandas`` (for ``sql()``), +``sqlalchemy`` and ``psycopg`` / ``psycopg2`` (PG-wire adapters), ``qrcode`` +(QR-code prompt) and ``IPython`` (rich Jupyter prompt). + PIP --- diff --git a/examples/oidc_device_auth.py b/examples/oidc_device_auth.py new file mode 100644 index 00000000..2d7d4535 --- /dev/null +++ b/examples/oidc_device_auth.py @@ -0,0 +1,66 @@ +""" +Interactive OIDC sign-in to QuestDB Enterprise from Python (e.g. a notebook). + +Runs the OAuth 2.0 Device Authorization Grant (RFC 8628) client-side: you +authorize in any browser (laptop or phone), while the code runs on a possibly +remote kernel that only makes outbound calls to your identity provider. + +This requires QuestDB Enterprise with OIDC enabled and an IdP client that has +the device grant enabled. It cannot run unattended (there is a human in the +loop), so it is not part of the automated example suite. +""" + +import sys + +from questdb.auth import connect, OidcDeviceAuth, OidcError +from questdb.ingress import TimestampNanos + + +QUESTDB_URL = 'https://questdb.example.com:9000' + + +def integrated(url: str = QUESTDB_URL): + """The high-level path: sign in, then query / ingest with one object.""" + # First call triggers the interactive device-flow sign-in; the token is + # cached, so re-running this is silent until it expires. + qdb = connect(url) + + # Query straight to a pandas DataFrame over REST (Authorization: Bearer). + df = qdb.sql("SELECT * FROM trades WHERE ts > dateadd('h', -1, now())") + print(df) + + # Feed the same auto-refreshed token into your existing tooling: + # engine = qdb.sqlalchemy_engine() # PG-wire, token as _sso password + # with qdb.psycopg() as conn: ... # raw psycopg + with qdb.sender() as sender: # ingestion (ILP over HTTP) + sender.row( + 'trades', + symbols={'symbol': 'ETH-USD', 'side': 'sell'}, + columns={'price': 2615.54, 'amount': 0.00044}, + at=TimestampNanos.now()) + + +def bring_your_own_client(url: str = QUESTDB_URL): + """The low-level path: you just want the token (PG-wire / HTTP / anything).""" + auth = OidcDeviceAuth.from_questdb(url) + + token = auth.token() # valid, auto-refreshed id/access token + headers = auth.headers() # {"Authorization": "Bearer "} + print('Authorization header ready:', 'Authorization' in headers) + + # e.g. hand the token to psycopg yourself over PG-wire: + # import psycopg + # conn = psycopg.connect(host='questdb.example.com', port=8812, + # dbname='qdb', user='_sso', password=token) + return token + + +def main(): + try: + integrated() + except OidcError as e: + sys.stderr.write(f'OIDC sign-in failed: {e}\n') + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 74438319..2f6ab44a 100755 --- a/setup.py +++ b/setup.py @@ -175,7 +175,7 @@ def readme(): name='questdb', version='4.1.0', platforms=['any'], - python_requires='>=3.8', + python_requires='>=3.10', install_requires=[], ext_modules = cythonize([ingress_extension()], annotate=True), cmdclass={'build_ext': questdb_build_ext}, diff --git a/src/questdb/auth/__init__.py b/src/questdb/auth/__init__.py new file mode 100644 index 00000000..e3768bca --- /dev/null +++ b/src/questdb/auth/__init__.py @@ -0,0 +1,92 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +""" +OIDC authentication helper for QuestDB (Jupyter-first). + +Runs the OAuth 2.0 Device Authorization Grant (RFC 8628) entirely client-side, +obtains a token, and presents it to QuestDB over the auth paths it already +supports (HTTP ``Bearer`` / PG-wire ``_sso``). Designed for data scientists on +local **and remote** kernels (JupyterHub, SageMaker, Colab, VS Code-remote), +where the kernel has no browser: you authorize in any browser (laptop or +phone), the kernel only makes outbound calls to the IdP. + +Two ways to use it, depending on your needs: + +* **Just the token** — works with anything (PG-wire, HTTP, your own tooling):: + + from questdb.auth import OidcDeviceAuth + + auth = OidcDeviceAuth.from_questdb("https://questdb.example.com:9000") + token = auth.token() # device flow on first use + headers = auth.headers() # {"Authorization": "Bearer .."} + +* **The integrated session** — query to a DataFrame and feed adapters:: + + from questdb.auth import connect + + qdb = connect("https://questdb.example.com:9000") + df = qdb.sql("SELECT * FROM trades LIMIT 10") + engine = qdb.sqlalchemy_engine() # PG-wire, token as _sso password + with qdb.sender() as sender: # ingestion (ILP/HTTP) + ... + +Only ``token()`` / ``headers()`` are needed for the bring-your-own-client path, +and they require no optional dependencies. ``pandas`` (for ``sql()``), +``sqlalchemy`` / ``psycopg`` (adapters), ``qrcode`` and ``IPython`` are imported +lazily, only when used. +""" + +from ._device import OidcDeviceAuth +from ._discovery import OidcConfig +from ._cache import TokenCache, TokenSet, FileCache, MemoryCache, NullCache +from ._errors import ( + OidcError, + OidcConfigError, + OidcNetworkError, + OidcInteractionRequired, + OidcDeviceFlowError, + OidcTimeoutError, + OidcAuthError, +) +from ._questdb import QuestDB, connect + +__all__ = [ + 'connect', + 'QuestDB', + 'OidcDeviceAuth', + 'OidcConfig', + 'TokenCache', + 'TokenSet', + 'MemoryCache', + 'FileCache', + 'NullCache', + 'OidcError', + 'OidcConfigError', + 'OidcNetworkError', + 'OidcInteractionRequired', + 'OidcDeviceFlowError', + 'OidcTimeoutError', + 'OidcAuthError', +] diff --git a/src/questdb/auth/_cache.py b/src/questdb/auth/_cache.py new file mode 100644 index 00000000..858e113e --- /dev/null +++ b/src/questdb/auth/_cache.py @@ -0,0 +1,277 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +"""Token state and cache backends for :mod:`questdb.auth`.""" + +from __future__ import annotations + +import contextlib +import json +import os +import pathlib +import tempfile +import threading +from dataclasses import asdict, dataclass, replace +from typing import Dict, Optional, Union + +from ._errors import OidcConfigError + +# Refresh a little before the real expiry to absorb clock skew / latency. +DEFAULT_SKEW_SECONDS = 30 + + +@dataclass +class TokenSet: + """A set of tokens obtained from the IdP, plus their expiry.""" + + access_token: Optional[str] = None + id_token: Optional[str] = None + refresh_token: Optional[str] = None + expires_at: float = 0.0 # epoch seconds; 0 == unknown + token_type: str = 'Bearer' + scope: Optional[str] = None + sub: Optional[str] = None + issued_at: float = 0.0 # epoch seconds; 0 == unknown + + def is_valid(self, now: float, skew: float = DEFAULT_SKEW_SECONDS) -> bool: + """True if the token is present and not within ``skew`` of expiry.""" + if self.expires_at <= 0: + return False + # Never let the early-refresh skew exceed half the token's own + # lifetime, so a short-lived (< 2*skew) token isn't reported expired + # the instant it is issued (which would refresh on every call). + if self.issued_at: + lifetime = self.expires_at - self.issued_at + if lifetime > 0: + skew = min(skew, lifetime / 2) + return now < (self.expires_at - skew) + + def to_dict(self) -> Dict[str, object]: + return asdict(self) + + @classmethod + def from_dict(cls, d: Dict[str, object]) -> 'TokenSet': + known = {f for f in cls.__dataclass_fields__} # noqa: C416 + return cls(**{k: v for k, v in d.items() if k in known}) + + +class TokenCache: + """Interface for token caches.""" + + def load(self, key: str) -> Optional[TokenSet]: # pragma: no cover + raise NotImplementedError + + def store(self, key: str, tokens: TokenSet) -> None: # pragma: no cover + raise NotImplementedError + + def clear(self, key: str) -> None: # pragma: no cover + raise NotImplementedError + + +# Module-global so that re-running a notebook cell (which constructs a fresh +# ``OidcDeviceAuth``) reuses the already-acquired token instead of re-prompting. +_MEMORY_STORE: Dict[str, TokenSet] = {} +_MEMORY_LOCK = threading.Lock() + + +class MemoryCache(TokenCache): + """ + Process-global, in-memory cache (the default). + + Safest backend: nothing is written to disk. Tokens survive for the life + of the Python process, so re-running cells is silent, but a kernel + restart re-prompts once. + """ + + def load(self, key: str) -> Optional[TokenSet]: + # Return a copy so callers can't mutate the cached entry in place + # (the live token is refreshed/rotated independently). + with _MEMORY_LOCK: + tokens = _MEMORY_STORE.get(key) + return replace(tokens) if tokens is not None else None + + def store(self, key: str, tokens: TokenSet) -> None: + with _MEMORY_LOCK: + _MEMORY_STORE[key] = replace(tokens) + + def clear(self, key: str) -> None: + with _MEMORY_LOCK: + _MEMORY_STORE.pop(key, None) + + +class NullCache(TokenCache): + """Never persists anything; prompts every time.""" + + def load(self, key: str) -> Optional[TokenSet]: + return None + + def store(self, key: str, tokens: TokenSet) -> None: + pass + + def clear(self, key: str) -> None: + pass + + +# Cross-process file locking, used to serialize read-modify-write on the +# shared cache file. fcntl.flock (POSIX) also serializes across threads/ +# instances in one process (locks are per open file description). Where no OS +# primitive is available it degrades to a best-effort no-op; the atomic +# os.replace still guarantees readers never see a torn file. +try: + import fcntl + + def _lock_fd(fd: int) -> None: + fcntl.flock(fd, fcntl.LOCK_EX) + + def _unlock_fd(fd: int) -> None: + fcntl.flock(fd, fcntl.LOCK_UN) +except ImportError: # pragma: no cover - non-POSIX (e.g. Windows) + try: + import msvcrt + + def _lock_fd(fd: int) -> None: + try: + msvcrt.locking(fd, msvcrt.LK_LOCK, 1) + except OSError: + pass + + def _unlock_fd(fd: int) -> None: + try: + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + except OSError: + pass + except ImportError: # pragma: no cover + def _lock_fd(fd: int) -> None: + pass + + def _unlock_fd(fd: int) -> None: + pass + + +@contextlib.contextmanager +def _interprocess_lock(lock_path: pathlib.Path): + """Best-effort exclusive lock via a sidecar lock file.""" + fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o600) + try: + _lock_fd(fd) + try: + yield + finally: + _unlock_fd(fd) + finally: + os.close(fd) + + +class FileCache(TokenCache): + """ + Opt-in on-disk cache at ``~/.questdb/oidc-cache.json`` (mode ``600``). + + Survives kernel restarts and is shared across kernels on the same host. + Security trade-off: a refresh token is stored at rest. The file is created + owner-only (``0600``) from the start via an atomic temp-file replace, and a + sidecar lock file serializes concurrent read-modify-writes across kernels + so entries are not corrupted or lost. + """ + + def __init__(self, path: Optional[Union[str, os.PathLike]] = None): + if path is None: + path = pathlib.Path.home() / '.questdb' / 'oidc-cache.json' + self.path = pathlib.Path(path) + self._lock_path = self.path.with_name(self.path.name + '.lock') + + def _ensure_dir(self) -> None: + parent = self.path.parent + parent.mkdir(parents=True, exist_ok=True) + try: + os.chmod(parent, 0o700) + except OSError: + pass + + def _read_all(self) -> Dict[str, dict]: + try: + with open(self.path, 'r', encoding='utf-8') as f: + data = json.load(f) + if isinstance(data, dict): + return data + except (FileNotFoundError, ValueError, OSError): + pass + return {} + + def _write_all(self, data: Dict[str, dict]) -> None: + # Atomic, owner-only replace. mkstemp creates the file mode 0600 with a + # unique name, so concurrent writers never share a temp file and the + # refresh token is never group/world-readable, even briefly. + fd, tmp = tempfile.mkstemp( + dir=str(self.path.parent), prefix='.oidc-', suffix='.tmp') + try: + with os.fdopen(fd, 'w', encoding='utf-8') as f: + json.dump(data, f) + os.replace(tmp, self.path) + except BaseException: + with contextlib.suppress(OSError): + os.unlink(tmp) + raise + + def load(self, key: str) -> Optional[TokenSet]: + # Lock-free: the atomic replace guarantees a complete file is read. + entry = self._read_all().get(key) + if isinstance(entry, dict): + try: + return TokenSet.from_dict(entry) + except TypeError: + return None + return None + + def store(self, key: str, tokens: TokenSet) -> None: + self._ensure_dir() + with _interprocess_lock(self._lock_path): + data = self._read_all() + data[key] = tokens.to_dict() + self._write_all(data) + + def clear(self, key: str) -> None: + self._ensure_dir() + with _interprocess_lock(self._lock_path): + data = self._read_all() + if key in data: + del data[key] + self._write_all(data) + + +_CacheSpec = Union[str, None, TokenCache] + + +def make_cache(spec: _CacheSpec) -> TokenCache: + """Resolve a cache spec (``"memory"`` / ``"file"`` / ``None`` / instance).""" + if isinstance(spec, TokenCache): + return spec + if spec is None or spec == 'none': + return NullCache() + if spec == 'memory': + return MemoryCache() + if spec == 'file': + return FileCache() + raise OidcConfigError( + f'Unknown cache backend {spec!r}; ' + "expected 'memory', 'file', None, or a TokenCache instance.") diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py new file mode 100644 index 00000000..45694c64 --- /dev/null +++ b/src/questdb/auth/_device.py @@ -0,0 +1,614 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +"""The OAuth 2.0 device authorization grant (RFC 8628) token manager.""" + +from __future__ import annotations + +import base64 +import binascii +import json +import threading +import time +import urllib.parse +import webbrowser +from typing import Any, Dict, Optional + +from ._cache import TokenSet, make_cache +from ._discovery import OidcConfig, resolve_config, validate_endpoint_origins +from ._errors import ( + OidcConfigError, + OidcDeviceFlowError, + OidcError, + OidcInteractionRequired, + OidcNetworkError, + OidcTimeoutError, +) +from ._http import build_ssl_context, post_form +from ._render import ( + Renderer, + _safe_link_url, + detect_interactive, + in_ipython_kernel, + make_renderer, +) + +DEVICE_CODE_GRANT = 'urn:ietf:params:oauth:grant-type:device_code' +REFRESH_GRANT = 'refresh_token' + +_VALID_FLOWS = ('auto', 'device', 'loopback') + +# A non-positive expires_in is non-conformant; treat it as "unknown". +_DEFAULT_EXPIRES_IN = 3600 + + +class _SystemClock: + """Real time source; the default for :class:`OidcDeviceAuth`.""" + sleep = staticmethod(time.sleep) + monotonic = staticmethod(time.monotonic) + now = staticmethod(time.time) + + +_SYSTEM_CLOCK = _SystemClock() + + +def _decode_jwt_claims(token: Optional[str]) -> Dict[str, Any]: + """ + Best-effort decode of a JWT payload **without signature verification**. + + Used only to show a friendly identity in the sign-in message. QuestDB + performs the real validation. Returns ``{}`` for opaque/invalid tokens. + """ + if not token or token.count('.') < 2: + return {} + try: + payload = token.split('.')[1] + payload += '=' * (-len(payload) % 4) # restore base64 padding + raw = base64.urlsafe_b64decode(payload.encode('ascii')) + claims = json.loads(raw) + return claims if isinstance(claims, dict) else {} + except (ValueError, binascii.Error, UnicodeDecodeError): + return {} + + +def _identity_from_claims(claims: Dict[str, Any]) -> Optional[str]: + for key in ('email', 'preferred_username', 'upn', 'name', 'sub'): + value = claims.get(key) + if value: + return str(value) + return None + + +class OidcDeviceAuth: + """ + Acquire and refresh an OIDC token via the device authorization grant. + + The token is presented to QuestDB over the auth paths it already + supports: HTTP ``Authorization: Bearer`` or PG-wire ``_sso`` (token as + password). The flow runs entirely client-side; QuestDB is never in the + token-acquisition path. + + Most users only ever call :meth:`token` (or :meth:`headers`). The first + call runs the interactive device flow; subsequent calls return the cached + token and refresh it silently (synchronously, on the first call made after + it nears expiry — there is no background thread). Acquisition is + serialized so concurrent callers don't double-prompt, while a valid cached + token is returned without blocking on another thread's in-progress + sign-in. + + .. code-block:: python + + from questdb.auth import OidcDeviceAuth + + # Discover everything from the QuestDB server: + auth = OidcDeviceAuth.from_questdb("https://questdb.example.com:9000") + token = auth.token() # device flow on first use, else cached + + Or fully explicit (no server discovery): + + .. code-block:: python + + auth = OidcDeviceAuth( + client_id="questdb", + device_authorization_endpoint="https://idp/.../device", + token_endpoint="https://idp/.../token", + scope="openid groups", + groups_in_token=True, + audience="questdb", + cache="memory") + """ + + def __init__( + self, + client_id: str, + device_authorization_endpoint: str, + token_endpoint: str, + *, + scope: str = 'openid', + groups_in_token: bool = True, + audience: Optional[str] = None, + issuer: Optional[str] = None, + cache: Any = 'memory', + insecure: bool = False, + ca_bundle: Optional[str] = None, + open_browser: bool = False, + interactive: Optional[bool] = None, + qr: bool = False, + renderer: Optional[Renderer] = None, + default_interval: int = 5, + _clock=None): # injectable time source for testing + if not client_id: + raise OidcConfigError('client_id is required') + if not device_authorization_endpoint: + raise OidcConfigError('device_authorization_endpoint is required') + if not token_endpoint: + raise OidcConfigError('token_endpoint is required') + + # Sending the id_token requires the ``openid`` scope. + if groups_in_token and 'openid' not in scope.split(): + scope = ('openid ' + scope).strip() + + self.config = OidcConfig( + client_id=client_id, + token_endpoint=token_endpoint, + device_authorization_endpoint=device_authorization_endpoint, + scope=scope, + groups_in_token=groups_in_token, + audience=audience, + issuer=issuer) + + # Enforce the credential-endpoint co-location / issuer pin on every + # construction path (not just discovery), so the documented guarantee + # holds for the explicit constructor too. + validate_endpoint_origins( + self.config.token_endpoint, + self.config.device_authorization_endpoint, + self.config.issuer) + + # `insecure` permits plaintext http only to QuestDB (e.g. a local dev + # server). The IdP is always held to https — or loopback http — by + # _idp_post, so the device code / refresh token are never sent in + # cleartext over the network even when this is set. + self.insecure = insecure + self.open_browser = open_browser + self._interactive = interactive + self._default_interval = default_interval + self._cache = make_cache(cache) + self._ctx = build_ssl_context(ca_bundle) + self._renderer = renderer if renderer is not None else make_renderer(qr=qr) + # Serializes token *acquisition* (a silent refresh or the interactive + # sign-in) only. Concurrent callers are possible via the threaded + # SQLAlchemy/psycopg adapters: without this, several connections + # opening as the token expires would run overlapping refreshes, and + # with refresh-token rotation all but one would fail and force a + # spurious re-prompt. It is NOT held on the fast path, so a caller with + # a valid cached token never blocks behind another thread's sign-in. + self._lock = threading.Lock() + self._tokens: Optional[TokenSet] = None + clock = _clock or _SYSTEM_CLOCK + self._sleep = clock.sleep + self._monotonic = clock.monotonic + self._now = clock.now + + # -- construction ------------------------------------------------------- + + @classmethod + def from_questdb( + cls, + url: str, + *, + client_id: Optional[str] = None, + scope: Optional[str] = None, + audience: Optional[str] = None, + groups_in_token: Optional[bool] = None, + issuer: Optional[str] = None, + discovery_url: Optional[str] = None, + token_endpoint: Optional[str] = None, + device_authorization_endpoint: Optional[str] = None, + flow: str = 'auto', + cache: Any = 'memory', + insecure: bool = False, + ca_bundle: Optional[str] = None, + open_browser: bool = False, + interactive: Optional[bool] = None, + qr: bool = False, + renderer: Optional[Renderer] = None, + _clock=None) -> 'OidcDeviceAuth': # injectable time source + """ + Build an :class:`OidcDeviceAuth` by discovering config from QuestDB. + + Reads ``{url}/settings`` for the OIDC client id, scope, endpoints and + groups mode, falling back to the IdP ``.well-known`` document for the + device-authorization endpoint when QuestDB does not advertise it. + Any explicit keyword overrides discovery. + """ + _validate_flow(flow) + ctx = build_ssl_context(ca_bundle) + cfg = resolve_config( + questdb_url=url, + client_id=client_id, + scope=scope, + audience=audience, + groups_in_token=groups_in_token, + token_endpoint=token_endpoint, + device_authorization_endpoint=device_authorization_endpoint, + issuer=issuer, + discovery_url=discovery_url, + ctx=ctx, + insecure=insecure) + return cls( + client_id=cfg.client_id, + device_authorization_endpoint=cfg.device_authorization_endpoint, + token_endpoint=cfg.token_endpoint, + scope=cfg.scope, + groups_in_token=cfg.groups_in_token, + audience=cfg.audience, + issuer=cfg.issuer, + cache=cache, + insecure=insecure, + ca_bundle=ca_bundle, + open_browser=open_browser, + interactive=interactive, + qr=qr, + renderer=renderer, + _clock=_clock) + + # -- public API --------------------------------------------------------- + + def token(self) -> str: + """ + Return a valid token for QuestDB, acquiring or refreshing as needed. + + Returns the ``id_token`` when the server expects groups encoded in the + token (``acl.oidc.groups.encoded.in.token=true``), otherwise the + ``access_token`` — mirroring QuestDB's own selection logic. + """ + return self._select(self._obtain_tokens()) + + def headers(self) -> Dict[str, str]: + """Return ``{"Authorization": "Bearer "}``.""" + return {'Authorization': f'Bearer {self.token()}'} + + @property + def cache_key(self) -> str: + """ + Identifies the token's security context for caching. + + Two sessions share a cached token only when they would accept the same + one: same IdP token endpoint (**path included**, so multi-tenant realms + sharing a host don't collide), client id, scope *set* (order-insensitive), + and audience. The QuestDB URL is deliberately excluded — the same IdP + token is valid against any QuestDB that trusts it. + """ + c = self.config + scope = ' '.join(sorted(c.scope.split())) if c.scope else '' + return '\x1f'.join([ + c.issuer or '', + _normalize_url(c.token_endpoint), + c.client_id, + scope, + c.audience or '']) + + def clear(self) -> None: + """Forget the cached token (forces a fresh sign-in next time).""" + # Serialize against acquisition so a concurrent refresh/sign-in can't + # re-populate the cache right after we clear it. + with self._lock: + self._tokens = None + self._cache.clear(self.cache_key) + + # -- token lifecycle ---------------------------------------------------- + + def _select(self, tokens: TokenSet) -> str: + if self.config.groups_in_token: + if not tokens.id_token: + raise OidcConfigError( + 'Server expects groups encoded in the token but the IdP ' + 'returned no id_token. Ensure the "openid" scope is ' + 'requested (current scope: ' + f'{self.config.scope!r}).') + return tokens.id_token + if not tokens.access_token: + raise OidcConfigError('IdP returned no access_token.') + return tokens.access_token + + def _has_required_token(self, tokens: TokenSet) -> bool: + """ + True if ``tokens`` carries the kind :meth:`_select` will return — the + ``id_token`` when groups are encoded in the token, else the + ``access_token``. The cache gate and the post-refresh check share this + predicate so they can't disagree with ``_select``. + """ + if self.config.groups_in_token: + return bool(tokens.id_token) + return bool(tokens.access_token) + + def _obtain_tokens(self) -> TokenSet: + # Fast path: return a valid cached token without taking the lock, so a + # caller with a usable token never blocks behind another thread's + # in-progress refresh or interactive sign-in. + tokens = self._valid_cached() + if tokens is not None: + return tokens + # Slow path: serialize acquisition so concurrent callers don't run + # overlapping refreshes or double-prompt; the loser re-checks and + # reuses the winner's freshly acquired token. + with self._lock: + tokens = self._valid_cached() + if tokens is not None: + return tokens + return self._acquire() + + def _valid_cached(self) -> Optional[TokenSet]: + tokens = self._tokens + if tokens is None: + tokens = self._cache.load(self.cache_key) + if tokens is not None: + self._tokens = tokens + if (tokens is not None and tokens.is_valid(self._now()) + and self._has_required_token(tokens)): + return tokens + return None + + def _acquire(self) -> TokenSet: + # Called while holding self._lock. Try a silent refresh, else run the + # interactive device flow. + tokens = self._tokens + if tokens is not None and tokens.refresh_token: + try: + refreshed = self._refresh(tokens) + except OidcNetworkError: + # Transient connectivity failure: the refresh token is still + # valid, so re-authenticating won't help (the interactive flow + # needs the same network) and would needlessly re-prompt. + # Surface it — the cached token + refresh_token are kept, so a + # later call retries the refresh. + raise + except OidcError: + # The refresh token was rejected (expired/revoked) or the IdP + # returned an unusable response: fall through to a fresh + # interactive sign-in. + pass + else: + # Only accept a refresh that actually yields the token kind we + # need. Some IdPs don't re-issue the id_token on refresh; such + # a response is unusable, so fall through to the interactive + # flow rather than caching it and looping on every call. + if self._has_required_token(refreshed): + self._store(refreshed) + return refreshed + + fresh = self._run_device_flow() + self._store(fresh) + return fresh + + def _store(self, tokens: TokenSet) -> None: + self._tokens = tokens + self._cache.store(self.cache_key, tokens) + + def _tokenset_from_response(self, body: Dict[str, Any]) -> TokenSet: + try: + expires_in = int(body.get('expires_in', _DEFAULT_EXPIRES_IN)) + except (TypeError, ValueError): + expires_in = _DEFAULT_EXPIRES_IN + if expires_in <= 0: + # A non-positive lifetime would mark a just-issued token as already + # expired, causing refresh/re-prompt churn. Treat it as unknown. + expires_in = _DEFAULT_EXPIRES_IN + claims = (_decode_jwt_claims(body.get('id_token')) + or _decode_jwt_claims(body.get('access_token'))) + now = self._now() + return TokenSet( + access_token=body.get('access_token'), + id_token=body.get('id_token'), + refresh_token=body.get('refresh_token'), + expires_at=now + expires_in, + issued_at=now, + token_type=body.get('token_type', 'Bearer'), + scope=body.get('scope', self.config.scope), + sub=claims.get('sub')) + + def _idp_post(self, url: str, form: Dict[str, Any]): + # IdP POSTs carry the device code / refresh token, so they are always + # required to be https (loopback http is fine for local dev); the + # user's `insecure` flag — which is about the QuestDB link — never + # downgrades them. + return post_form(url, form, ctx=self._ctx, insecure=False) + + def _refresh(self, tokens: TokenSet) -> TokenSet: + status, body = self._idp_post( + self.config.token_endpoint, + { + 'grant_type': REFRESH_GRANT, + 'refresh_token': tokens.refresh_token, + 'client_id': self.config.client_id, + 'scope': self.config.scope, + }) + if status == 200: + refreshed = self._tokenset_from_response(body) + # Many IdPs do not rotate the refresh token; keep the old one. + if not refreshed.refresh_token: + refreshed.refresh_token = tokens.refresh_token + return refreshed + raise OidcDeviceFlowError( + f"Token refresh failed: {body.get('error', 'unknown error')}", + error=body.get('error'), + error_description=body.get('error_description')) + + # -- device flow (RFC 8628) --------------------------------------------- + + def _run_device_flow(self) -> TokenSet: + if not self._is_interactive(): + raise OidcInteractionRequired( + 'Interactive sign-in is required, but no interactive terminal ' + 'or notebook was detected (e.g. papermill / cron / CI). Use a ' + 'QuestDB service-account REST token or the OAuth2 ' + 'client-credentials grant for non-interactive contexts.') + + resp = self._request_device_code() + self._renderer.on_prompt(resp) + self._maybe_open_browser(resp) + tokens = self._poll_for_token(resp) + claims = (_decode_jwt_claims(tokens.id_token) + or _decode_jwt_claims(tokens.access_token)) + identity = _identity_from_claims(claims) + self._renderer.on_success( + identity, max(0.0, tokens.expires_at - self._now())) + return tokens + + def _request_device_code(self) -> Dict[str, Any]: + form = { + 'client_id': self.config.client_id, + 'scope': self.config.scope, + } + if self.config.audience: + form['audience'] = self.config.audience + status, body = self._idp_post( + self.config.device_authorization_endpoint, form) + if status == 200 and body.get('device_code') and body.get('user_code'): + return body + error = body.get('error') + if status in (400, 404, 405) or error in ( + 'invalid_client', 'unauthorized_client', + 'unsupported_grant_type'): + raise OidcDeviceFlowError( + 'The IdP rejected the device-authorization request ' + f'(HTTP {status}, error={error!r}). Ensure the OIDC client ' + f'{self.config.client_id!r} has the device grant ' + "('urn:ietf:params:oauth:grant-type:device_code') enabled and " + 'is registered as a public client.', + error=error, + error_description=body.get('error_description')) + raise OidcDeviceFlowError( + f'Device authorization request failed (HTTP {status}): ' + f'{body.get("error_description") or error or body}', + error=error, + error_description=body.get('error_description')) + + def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: + device_code = resp['device_code'] + try: + interval = max(1, int(resp.get('interval', self._default_interval))) + except (TypeError, ValueError): + interval = self._default_interval + try: + expires_in = int(resp.get('expires_in', 600)) + except (TypeError, ValueError): + expires_in = 600 + deadline = self._monotonic() + expires_in + + while True: + remaining = deadline - self._monotonic() + if remaining <= 0: + self._renderer.on_failure( + 'Code expired — run the cell again to retry.') + raise OidcTimeoutError( + 'The device code expired before authorization completed. ' + 'Run the sign-in again.', + error='expired_token') + self._renderer.on_waiting(remaining) + self._sleep(interval) + + status, body = self._idp_post( + self.config.token_endpoint, + { + 'grant_type': DEVICE_CODE_GRANT, + 'device_code': device_code, + 'client_id': self.config.client_id, + }) + + if status == 200 and body.get('access_token'): + return self._tokenset_from_response(body) + + error = body.get('error') + if error == 'authorization_pending': + continue + if error == 'slow_down': + interval += 5 + continue + if error == 'expired_token': + self._renderer.on_failure( + 'Code expired — run the cell again to retry.') + raise OidcTimeoutError( + 'The device code expired before authorization completed. ' + 'Run the sign-in again.', + error=error) + # access_denied or any other terminal error. + description = body.get('error_description') or error or 'unknown error' + self._renderer.on_failure(f'Sign-in failed: {description}') + raise OidcDeviceFlowError( + f'Device flow failed: {description}', + error=error, + error_description=body.get('error_description')) + + # -- helpers ------------------------------------------------------------ + + def _is_interactive(self) -> bool: + if self._interactive is not None: + return self._interactive + return detect_interactive() + + def _maybe_open_browser(self, resp: Dict[str, Any]) -> None: + # Never auto-open on a (possibly remote) notebook kernel; only do so + # for an explicitly opted-in local terminal session. + if not self.open_browser or in_ipython_kernel(): + return + # Only open an http(s) URL — never a javascript:/data: scheme from a + # malicious or MITM'd device response. + target = _safe_link_url( + resp.get('verification_uri_complete') + or resp.get('verification_uri') + or resp.get('verification_url')) + if target: + try: + webbrowser.open(target) + except Exception: + pass + + +def _validate_flow(flow: str) -> None: + if flow not in _VALID_FLOWS: + raise OidcConfigError( + f'Unknown flow {flow!r}; expected one of {_VALID_FLOWS}.') + if flow == 'loopback': + raise OidcConfigError( + "The 'loopback' (Authorization Code + PKCE) flow is not yet " + "implemented. Use flow='device' (works on local and remote " + 'kernels alike).') + + +def _normalize_url(url: str) -> str: + # Full URL with scheme/host lower-cased and the default port dropped, but + # the path kept (it distinguishes multi-tenant realms). Used for the cache + # key so trivial spelling differences don't cause a spurious re-prompt. + parts = urllib.parse.urlparse(url) + scheme = (parts.scheme or '').lower() + host = (parts.hostname or '').lower() + default_port = {'https': 443, 'http': 80}.get(scheme) + if parts.port and parts.port != default_port: + netloc = f'{host}:{parts.port}' + else: + netloc = host + query = f'?{parts.query}' if parts.query else '' + return f'{scheme}://{netloc}{parts.path}{query}' diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py new file mode 100644 index 00000000..a1cccbe8 --- /dev/null +++ b/src/questdb/auth/_discovery.py @@ -0,0 +1,335 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +""" +OIDC configuration discovery. + +Resolution order, mirroring the design doc: + +1. ``GET {questdb_url}/settings`` (public, no auth) -> the QuestDB-authoritative + ``acl.oidc.*`` values (client id, scope, endpoints, groups mode). +2. If the device-authorization endpoint is not advertised by QuestDB (today's + servers), fall back to the IdP discovery document + (``{issuer}/.well-known/openid-configuration``). +""" + +from __future__ import annotations + +import ssl +import urllib.parse +from dataclasses import dataclass +from typing import Any, Dict, Optional + +from ._errors import OidcConfigError +from ._http import get_json + +# QuestDB /settings keys (see EntPropServerConfiguration.exportConfiguration()). +_K_ENABLED = 'acl.oidc.enabled' +_K_CLIENT_ID = 'acl.oidc.client.id' +_K_SCOPE = 'acl.oidc.scope' +_K_TOKEN_ENDPOINT = 'acl.oidc.token.endpoint' +_K_AUTHORIZATION_ENDPOINT = 'acl.oidc.authorization.endpoint' +_K_DEVICE_ENDPOINT = 'acl.oidc.device.authorization.endpoint' # design §7 (new) +_K_GROUPS_IN_TOKEN = 'acl.oidc.groups.encoded.in.token' +_K_AUDIENCE = 'acl.oidc.audience' +_K_HOST = 'acl.oidc.host' +_K_PORT = 'acl.oidc.port' +_K_TLS_ENABLED = 'acl.oidc.tls.enabled' + + +@dataclass +class OidcConfig: + """Resolved OIDC parameters needed to run the device flow.""" + + client_id: str + token_endpoint: str + device_authorization_endpoint: str + scope: str = 'openid' + groups_in_token: bool = True + audience: Optional[str] = None + issuer: Optional[str] = None + authorization_endpoint: Optional[str] = None + + +def _as_bool(value: Any, default: Optional[bool] = None) -> Optional[bool]: + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return bool(value) + if isinstance(value, str): + v = value.strip().lower() + if v in ('true', '1', 'yes', 'on'): + return True + if v in ('false', '0', 'no', 'off', ''): + return False + return default + + +def settings_config(settings: Any) -> Dict[str, Any]: + """ + Return the flat config map from a ``/settings`` response. + + Modern servers nest values under a ``"config"`` object; older ones return + them at the top level. We tolerate both. + """ + if isinstance(settings, dict): + cfg = settings.get('config') + if isinstance(cfg, dict): + return cfg + return settings + return {} + + +def fetch_settings( + questdb_url: str, + *, + ctx: Optional[ssl.SSLContext] = None, + insecure: bool = False, + timeout: float = 30) -> Dict[str, Any]: + """Fetch and return the QuestDB ``/settings`` config map.""" + base = questdb_url.rstrip('/') + data = get_json(base + '/settings', ctx=ctx, insecure=insecure, + timeout=timeout) + return settings_config(data) + + +def _origin(url: str) -> Optional[str]: + parts = urllib.parse.urlparse(url) + if parts.scheme and parts.netloc: + return f'{parts.scheme}://{parts.netloc}' + return None + + +_DEFAULT_PORTS = {'https': 443, 'http': 80} + + +def _normalized_origin(url: str) -> tuple: + """(scheme, host, port) with default ports filled in, for comparison.""" + parts = urllib.parse.urlparse(url) + scheme = (parts.scheme or '').lower() + host = (parts.hostname or '').lower() + port = parts.port or _DEFAULT_PORTS.get(scheme) + return (scheme, host, port) + + +def _origin_str(url: str) -> str: + scheme, host, port = _normalized_origin(url) + return f'{scheme}://{host}:{port}' if port else f'{scheme}://{host}' + + +def validate_endpoint_origins( + token_endpoint: str, + device_authorization_endpoint: str, + issuer: Optional[str] = None) -> None: + """ + Reject an OIDC configuration that would send credentials off-origin. + + The device code and the long-lived refresh token are POSTed to the device- + authorization and token endpoints. These come from QuestDB ``/settings`` + (or the IdP ``.well-known``), which the client trusts; this check limits a + tampered or MITM'd configuration from redirecting those credentials to an + attacker-controlled host: + + * the two credential endpoints must share a single origin (they are always + co-located on the authorization server per RFC 8628); and + * when the ``issuer`` is known independently (passed explicitly or resolved + from the IdP ``.well-known``), both endpoints must belong to it. + + Pass ``issuer=`` to pin the IdP explicitly when QuestDB advertises the + endpoints directly (so a compromised server cannot redirect the token POST). + """ + if _normalized_origin(token_endpoint) != _normalized_origin( + device_authorization_endpoint): + raise OidcConfigError( + 'OIDC token and device-authorization endpoints are on different ' + f'origins ({_origin_str(token_endpoint)} vs ' + f'{_origin_str(device_authorization_endpoint)}); refusing to send ' + 'credentials. This indicates a misconfigured or tampered OIDC ' + 'configuration.') + if issuer: + issuer_origin = _normalized_origin(issuer) + for label, url in ( + ('token endpoint', token_endpoint), + ('device-authorization endpoint', + device_authorization_endpoint)): + if _normalized_origin(url) != issuer_origin: + raise OidcConfigError( + f'OIDC {label} origin ({_origin_str(url)}) does not match ' + f'the issuer origin ({_origin_str(issuer)}); refusing to ' + 'send credentials to an endpoint outside the trusted ' + 'issuer.') + + +def _resolve_endpoint(value: Optional[str], cfg: Dict[str, Any]) -> Optional[str]: + """ + Turn a possibly-relative endpoint into a full URL. + + QuestDB usually exports fully-resolved URLs, but some deployments store + only the path (e.g. ``/as/token.oauth2``) alongside ``acl.oidc.host``. + """ + if not value: + return None + if value.startswith('http://') or value.startswith('https://'): + return value + if value.startswith('/'): + host = cfg.get(_K_HOST) + if host: + tls = _as_bool(cfg.get(_K_TLS_ENABLED), default=True) + scheme = 'https' if tls else 'http' + port = cfg.get(_K_PORT) + netloc = f'{host}:{port}' if port else str(host) + return f'{scheme}://{netloc}{value}' + return value + + +def well_known_url(issuer: str) -> str: + return issuer.rstrip('/') + '/.well-known/openid-configuration' + + +def discover_device_endpoint_from_idp( + *, + issuer: Optional[str], + discovery_url: Optional[str], + token_endpoint: Optional[str], + ctx: Optional[ssl.SSLContext] = None, + insecure: bool = False, + timeout: float = 30) -> Dict[str, Any]: + """ + Fetch the IdP ``.well-known/openid-configuration`` and return it. + + The discovery URL is taken from ``discovery_url``, else built from + ``issuer``, else (best effort) from the origin of ``token_endpoint``. + """ + url = discovery_url + if not url and issuer: + url = well_known_url(issuer) + if not url and token_endpoint: + origin = _origin(token_endpoint) + if origin: + url = well_known_url(origin) + if not url: + raise OidcConfigError( + 'Cannot discover the IdP device-authorization endpoint: no ' + 'issuer / discovery_url given and none could be derived. Pass ' + 'issuer=... or device_authorization_endpoint=... explicitly.') + return get_json(url, ctx=ctx, insecure=insecure, timeout=timeout) + + +def resolve_config( + *, + questdb_url: Optional[str] = None, + client_id: Optional[str] = None, + scope: Optional[str] = None, + audience: Optional[str] = None, + groups_in_token: Optional[bool] = None, + token_endpoint: Optional[str] = None, + device_authorization_endpoint: Optional[str] = None, + authorization_endpoint: Optional[str] = None, + issuer: Optional[str] = None, + discovery_url: Optional[str] = None, + ctx: Optional[ssl.SSLContext] = None, + insecure: bool = False, + timeout: float = 30) -> OidcConfig: + """ + Resolve a complete :class:`OidcConfig`. + + Explicit keyword arguments always win; anything left ``None`` is filled in + from QuestDB ``/settings`` (if ``questdb_url`` is given) and, as a last + resort for the device endpoint, the IdP discovery document. + """ + cfg: Dict[str, Any] = {} + if questdb_url: + cfg = fetch_settings( + questdb_url, ctx=ctx, insecure=insecure, timeout=timeout) + enabled = _as_bool(cfg.get(_K_ENABLED), default=None) + if enabled is False: + raise OidcConfigError( + f'QuestDB at {questdb_url} reports OIDC is disabled ' + f'({_K_ENABLED}=false). Nothing to authenticate against.') + + client_id = client_id or cfg.get(_K_CLIENT_ID) + if not client_id: + raise OidcConfigError( + 'Missing OIDC client_id. QuestDB did not advertise ' + f'{_K_CLIENT_ID!r} via /settings; pass client_id=... explicitly.') + + if scope is None: + scope = cfg.get(_K_SCOPE) or 'openid' + if groups_in_token is None: + groups_in_token = _as_bool(cfg.get(_K_GROUPS_IN_TOKEN), default=True) + if audience is None: + audience = cfg.get(_K_AUDIENCE) or None + + token_endpoint = ( + token_endpoint or _resolve_endpoint(cfg.get(_K_TOKEN_ENDPOINT), cfg)) + authorization_endpoint = ( + authorization_endpoint + or _resolve_endpoint(cfg.get(_K_AUTHORIZATION_ENDPOINT), cfg)) + device_authorization_endpoint = ( + device_authorization_endpoint + or _resolve_endpoint(cfg.get(_K_DEVICE_ENDPOINT), cfg)) + + # Fall back to IdP discovery when QuestDB doesn't advertise the device + # endpoint (and/or the token endpoint). This contacts the IdP, so it is + # held to https/loopback (insecure=False) regardless of the QuestDB flag. + if not device_authorization_endpoint or not token_endpoint: + doc = discover_device_endpoint_from_idp( + issuer=issuer, discovery_url=discovery_url, + token_endpoint=token_endpoint, ctx=ctx, insecure=False, + timeout=timeout) + device_authorization_endpoint = ( + device_authorization_endpoint + or doc.get('device_authorization_endpoint')) + token_endpoint = token_endpoint or doc.get('token_endpoint') + authorization_endpoint = ( + authorization_endpoint or doc.get('authorization_endpoint')) + issuer = issuer or doc.get('issuer') + + if not token_endpoint: + raise OidcConfigError( + 'Could not resolve the OIDC token endpoint from QuestDB /settings ' + 'or IdP discovery. Pass token_endpoint=... explicitly.') + if not device_authorization_endpoint: + raise OidcConfigError( + 'Could not resolve the device-authorization endpoint. The IdP ' + 'discovery document did not contain ' + '"device_authorization_endpoint". Ensure the IdP supports the ' + 'device grant, or pass device_authorization_endpoint=... ' + 'explicitly.') + + # Note: the credential-endpoint origin check (validate_endpoint_origins) + # is enforced centrally in OidcDeviceAuth.__init__, which every path + # (including the explicit constructor) goes through. + + return OidcConfig( + client_id=client_id, + token_endpoint=token_endpoint, + device_authorization_endpoint=device_authorization_endpoint, + scope=scope, + groups_in_token=bool(groups_in_token), + audience=audience, + issuer=issuer, + authorization_endpoint=authorization_endpoint) diff --git a/src/questdb/auth/_errors.py b/src/questdb/auth/_errors.py new file mode 100644 index 00000000..7262f0cc --- /dev/null +++ b/src/questdb/auth/_errors.py @@ -0,0 +1,91 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +"""Exceptions raised by :mod:`questdb.auth`.""" + +from __future__ import annotations + +from typing import Optional + + +class OidcError(Exception): + """Base class for every error raised by :mod:`questdb.auth`.""" + + +class OidcConfigError(OidcError): + """ + The OIDC configuration could not be resolved or is inconsistent. + + Raised, for example, when QuestDB does not advertise OIDC, when the + IdP device-authorization endpoint cannot be discovered, or when a + required argument is missing. + """ + + +class OidcNetworkError(OidcError): + """A network-level failure while talking to QuestDB or the IdP.""" + + +class OidcInteractionRequired(OidcError): + """ + Interactive sign-in is required but the process is not interactive. + + This is raised instead of hanging forever when the device flow is + started from a context with no human to authorize it (e.g. a + ``papermill`` run, a cron job or CI). Use a QuestDB service-account + REST token or the OAuth2 client-credentials grant in those contexts. + """ + + +class OidcDeviceFlowError(OidcError): + """ + The OAuth 2.0 device authorization grant failed. + + The original IdP ``error``/``error_description`` are preserved on the + exception when available. + """ + + def __init__( + self, + message: str, + *, + error: Optional[str] = None, + error_description: Optional[str] = None): + super().__init__(message) + self.error = error + self.error_description = error_description + + +class OidcTimeoutError(OidcDeviceFlowError): + """The user did not authorize the device in time (the code expired).""" + + +class OidcAuthError(OidcError): + """ + QuestDB rejected the token we presented. + + Typically a ``401``/``403`` from the server. The message includes hints + about the most common causes (scope / ``groups.encoded.in.token`` / + ``audience`` mismatches). + """ diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py new file mode 100644 index 00000000..fc5b158b --- /dev/null +++ b/src/questdb/auth/_http.py @@ -0,0 +1,234 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +""" +A tiny HTTP helper built on the standard library. + +OIDC device flow implementation deliberately avoids a hard dependency on ``requests``/``httpx`` +so that ``OidcDeviceAuth.token()`` / ``headers()`` work out of the box with no +extra installs. Only the device flow, discovery and the REST adapter use this +module; the heavier adapters (SQLAlchemy / psycopg / ingestion ``Sender``) bring +their own transports. + +Standard proxy environment variables (``HTTPS_PROXY`` / ``HTTP_PROXY`` / +``NO_PROXY``) are honoured automatically by ``urllib``. A custom CA bundle can be +supplied explicitly or via ``REQUESTS_CA_BUNDLE`` / ``SSL_CERT_FILE``. +""" + +from __future__ import annotations + +import ipaddress +import json +import os +import ssl +import urllib.error +import urllib.parse +import urllib.request +from typing import Any, Dict, Mapping, Optional + +from ._errors import OidcConfigError, OidcNetworkError, OidcError + +_DEFAULT_TIMEOUT = 30 +_USER_AGENT = 'questdb-python-client (oidc-auth)' + + +def build_ssl_context(ca_bundle: Optional[str] = None) -> ssl.SSLContext: + """ + Build an SSL context, honouring an explicit CA bundle or the + ``REQUESTS_CA_BUNDLE`` / ``SSL_CERT_FILE`` environment variables + (useful behind a corporate TLS-intercepting proxy). + """ + ca = ( + ca_bundle + or os.environ.get('REQUESTS_CA_BUNDLE') + or os.environ.get('SSL_CERT_FILE')) + if ca: + if os.path.isdir(ca): + return ssl.create_default_context(capath=ca) + return ssl.create_default_context(cafile=ca) + return ssl.create_default_context() + + +class HttpResponse: + """A minimal response wrapper (status + raw body + headers).""" + + __slots__ = ('status', 'body', 'headers') + + def __init__(self, status: int, body: bytes, headers: Mapping[str, str]): + self.status = status + self.body = body + self.headers = dict(headers) + + def text(self) -> str: + return self.body.decode('utf-8', errors='replace') + + def json(self) -> Any: + return json.loads(self.body.decode('utf-8')) + + @property + def ok(self) -> bool: + return 200 <= self.status < 300 + + +def _is_loopback(host: Optional[str]) -> bool: + # Traffic to a loopback address never leaves the host, so plaintext http + # carries no network interception risk and is always permitted. + if not host: + return False + if host.lower() == 'localhost': + return True + try: + return ipaddress.ip_address(host).is_loopback + except ValueError: + return False + + +def _require_secure(url: str, insecure: bool) -> None: + parts = urllib.parse.urlparse(url) + scheme = parts.scheme.lower() + if scheme == 'https': + return + if scheme == 'http': + if _is_loopback(parts.hostname): + return + if insecure: + return + raise OidcConfigError( + f'Refusing to use insecure URL {url!r} (scheme {scheme!r}). Use https ' + '(loopback http is always allowed for local development); pass ' + 'insecure=True only to permit plaintext to a non-loopback host.') + + +def _opener(ctx: Optional[ssl.SSLContext]) -> urllib.request.OpenerDirector: + # build_opener keeps the default ProxyHandler (which reads *_PROXY env + # vars), while letting us pin our own TLS context. + if ctx is None: + return urllib.request.build_opener() + return urllib.request.build_opener(urllib.request.HTTPSHandler(context=ctx)) + + +def request( + method: str, + url: str, + *, + form: Optional[Mapping[str, Any]] = None, + data: Optional[bytes] = None, + headers: Optional[Mapping[str, str]] = None, + timeout: float = _DEFAULT_TIMEOUT, + ctx: Optional[ssl.SSLContext] = None, + insecure: bool = False) -> HttpResponse: + """ + Perform a single HTTP request. + + ``form`` is form-url-encoded into the body (``application/x-www-form- + urlencoded``). HTTP error statuses (``4xx``/``5xx``) are returned as an + :class:`HttpResponse` rather than raised, so callers can inspect OAuth + error bodies (e.g. ``authorization_pending``). Only genuine network + failures raise (:class:`OidcNetworkError`). + """ + _require_secure(url, insecure) + body: Optional[bytes] = data + req_headers = {'User-Agent': _USER_AGENT, 'Accept': 'application/json'} + if form is not None: + body = urllib.parse.urlencode( + {k: v for k, v in form.items() if v is not None}).encode('utf-8') + req_headers['Content-Type'] = 'application/x-www-form-urlencoded' + if headers: + req_headers.update(headers) + + req = urllib.request.Request( + url, data=body, headers=req_headers, method=method.upper()) + try: + with _opener(ctx).open(req, timeout=timeout) as resp: + return HttpResponse( + getattr(resp, 'status', resp.getcode()), + resp.read(), + resp.headers) + except urllib.error.HTTPError as e: + # 4xx/5xx still carry a (possibly JSON) body we want to inspect. + # Map a mid-body read failure to a network error (rather than letting a + # bare OSError escape) and close the error response so its socket isn't + # leaked (the poll loop drives many 400s during a long sign-in). + try: + body = e.read() + except (TimeoutError, OSError) as read_err: + raise OidcNetworkError( + f'Failed to read response from {url}: {read_err}') from read_err + finally: + e.close() + return HttpResponse(e.code, body, e.headers or {}) + except urllib.error.URLError as e: + raise OidcNetworkError(f'Failed to reach {url}: {e.reason}') from e + except (TimeoutError, OSError) as e: + raise OidcNetworkError(f'Failed to reach {url}: {e}') from e + + +def get_json( + url: str, + *, + headers: Optional[Mapping[str, str]] = None, + timeout: float = _DEFAULT_TIMEOUT, + ctx: Optional[ssl.SSLContext] = None, + insecure: bool = False) -> Any: + """GET a URL and parse a JSON response, raising on non-2xx.""" + resp = request( + 'GET', url, headers=headers, timeout=timeout, ctx=ctx, + insecure=insecure) + if not resp.ok: + raise OidcError( + f'HTTP {resp.status} from {url}: {resp.text()[:200]}') + try: + return resp.json() + except (ValueError, UnicodeDecodeError) as e: + raise OidcError(f'Invalid JSON from {url}: {e}') from e + + +def post_form( + url: str, + form: Mapping[str, Any], + *, + headers: Optional[Mapping[str, str]] = None, + timeout: float = _DEFAULT_TIMEOUT, + ctx: Optional[ssl.SSLContext] = None, + insecure: bool = False) -> tuple[int, Dict[str, Any]]: + """ + POST a form-url-encoded body and parse the JSON response. + + Returns ``(status, parsed_json)``. Used for the device-authorization and + token endpoints, which return JSON bodies on both success and error. + """ + resp = request( + 'POST', url, form=form, headers=headers, timeout=timeout, ctx=ctx, + insecure=insecure) + try: + parsed = resp.json() + except (ValueError, UnicodeDecodeError): + if resp.ok: + raise OidcError( + f'Expected JSON from {url}, got: {resp.text()[:200]}') + # Non-JSON error body: surface the status + text. + raise OidcError(f'HTTP {resp.status} from {url}: {resp.text()[:200]}') + if not isinstance(parsed, dict): + raise OidcError(f'Unexpected JSON shape from {url}: {parsed!r}') + return resp.status, parsed diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py new file mode 100644 index 00000000..2b95d75b --- /dev/null +++ b/src/questdb/auth/_questdb.py @@ -0,0 +1,305 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +"""High-level QuestDB session: token, REST queries, and connection adapters.""" + +from __future__ import annotations + +import urllib.parse +from typing import Any, Dict, Optional + +from ._device import OidcDeviceAuth +from ._errors import OidcAuthError, OidcError +from ._http import request + +_DEFAULT_PG_PORT = 8812 +_DEFAULT_DATABASE = 'qdb' + +_AUTH_HINT = ( + 'QuestDB rejected the token (HTTP {status}). Common causes:\n' + " * scope / 'acl.oidc.groups.encoded.in.token' mismatch — the server may " + 'expect the id_token (groups in token) while an access_token was sent, or ' + 'vice-versa;\n' + " * the 'groups'/'sub' claim is missing — check the requested scope;\n" + " * 'aud' mismatch — the token's audience does not match " + "'acl.oidc.audience' (try passing audience=...).") + + +def _import_pandas(): + try: + import pandas # type: ignore + return pandas + except ImportError as e: + raise ImportError( + 'Missing optional dependency `pandas`, required for ' + 'QuestDB.sql(). Install it with `pip install questdb[dataframe]`. ' + 'See https://py-questdb-client.readthedocs.io/en/latest/' + 'installation.html') from e + + +def _exec_json_to_df(data: Dict[str, Any], pandas): + columns = data.get('columns') or [] + names = [c.get('name') for c in columns] + dataset = data.get('dataset') + if dataset is None: + dataset = data.get('data') or [] + try: + df = pandas.DataFrame(dataset, columns=names or None) + except ValueError as e: + raise OidcError( + f'Unexpected shape in QuestDB /exec response: {e}') from e + for col in columns: + name = col.get('name') + if col.get('type') in ('TIMESTAMP', 'DATE') and name in df.columns: + try: + df[name] = pandas.to_datetime(df[name], errors='coerce') + except Exception: + pass + return df + + +def _pg_module(): + try: + import psycopg # type: ignore # psycopg v3 + return psycopg + except ImportError: + pass + try: + import psycopg2 # type: ignore + return psycopg2 + except ImportError: + raise ImportError( + 'A PostgreSQL driver is required: install `psycopg` (v3) or ' + '`psycopg2-binary`.') + + +class QuestDB: + """ + A thin, authenticated QuestDB session built on an :class:`OidcDeviceAuth`. + + Provides a one-call DataFrame query over REST plus adapters that feed the + same auto-refreshed token into your existing tools (SQLAlchemy / psycopg / + the ingestion ``Sender``). You can also just take :meth:`token` / + :meth:`headers` and wire them up yourself. + """ + + def __init__( + self, + url: str, + auth: OidcDeviceAuth, + *, + insecure: bool = False): + self.url = url.rstrip('/') + self.auth = auth + self._insecure = insecure + self._ctx = auth._ctx + self._parts = urllib.parse.urlparse(self.url) + + # -- token access ------------------------------------------------------- + + def token(self) -> str: + """Return a valid, auto-refreshed token (see :meth:`OidcDeviceAuth.token`).""" + return self.auth.token() + + def headers(self) -> Dict[str, str]: + """Return ``{"Authorization": "Bearer "}``.""" + return self.auth.headers() + + # -- REST query --------------------------------------------------------- + + def sql(self, query: str, *, limit: Optional[str] = None, + timeout: float = 60) -> 'pandas.DataFrame': + """ + Run a SQL query over QuestDB's REST ``/exec`` endpoint and return a + :class:`pandas.DataFrame`. + + Uses ``Authorization: Bearer`` (no token-length limit, unlike PG-wire), + which makes it the recommended path for large groups-encoded JWTs. + + :param query: The SQL query to run. + :param limit: Optional QuestDB ``limit`` (e.g. ``"1,1000"``). + :param timeout: Request timeout in seconds. + """ + pandas = _import_pandas() + params = {'query': query} + if limit is not None: + params['limit'] = limit + url = f'{self.url}/exec?' + urllib.parse.urlencode(params) + resp = request( + 'GET', url, headers=self.headers(), ctx=self._ctx, + insecure=self._insecure, timeout=timeout) + if resp.status in (401, 403): + raise OidcAuthError(_AUTH_HINT.format(status=resp.status)) + if not resp.ok: + detail = resp.text()[:300] + try: + detail = resp.json().get('error', detail) + except Exception: + pass + raise OidcError( + f'QuestDB query failed (HTTP {resp.status}): {detail}') + return _exec_json_to_df(resp.json(), pandas) + + # -- connection adapters ------------------------------------------------ + + def _host(self) -> Optional[str]: + return self._parts.hostname + + def sqlalchemy_engine( + self, + *, + host: Optional[str] = None, + pg_port: int = _DEFAULT_PG_PORT, + database: str = _DEFAULT_DATABASE, + drivername: Optional[str] = None, + **engine_kwargs) -> 'sqlalchemy.engine.Engine': + """ + Build a SQLAlchemy ``Engine`` for QuestDB's PG-wire endpoint. + + Connects as user ``_sso`` and injects a **fresh** token as the password + for every new connection (via a ``do_connect`` listener), so pooled + connections always authenticate with a valid token. Requires + ``acl.oidc.pg.token.as.password.enabled=true`` on the server. + """ + try: + from sqlalchemy import create_engine, event + from sqlalchemy.engine import URL + except ImportError as e: + raise ImportError( + 'SQLAlchemy is required for QuestDB.sqlalchemy_engine(); ' + 'install it with `pip install sqlalchemy`.') from e + + if drivername is None: + mod = _pg_module() + drivername = ( + 'postgresql+psycopg' + if mod.__name__ == 'psycopg' + else 'postgresql+psycopg2') + + url = URL.create( + drivername=drivername, + username='_sso', + host=host or self._host(), + port=pg_port, + database=database) + engine = create_engine(url, **engine_kwargs) + + auth = self.auth + + @event.listens_for(engine, 'do_connect') + def _provide_token(dialect, conn_rec, cargs, cparams): # noqa: ANN001 + cparams['password'] = auth.token() + + return engine + + def psycopg( + self, + *, + host: Optional[str] = None, + pg_port: int = _DEFAULT_PG_PORT, + database: str = _DEFAULT_DATABASE, + **connect_kwargs) -> 'Any': + """ + Open a raw psycopg (v3) or psycopg2 connection to QuestDB's PG-wire + endpoint, authenticating as ``_sso`` with the current token. + + The token is captured at connect time; open a new connection to pick up + a refreshed token. + """ + mod = _pg_module() + return mod.connect( + host=host or self._host(), + port=pg_port, + dbname=database, + user='_sso', + password=self.auth.token(), + **connect_kwargs) + + def sender(self, *, port: Optional[int] = None, + **sender_kwargs) -> 'questdb.ingress.Sender': + """ + Build a :class:`questdb.ingress.Sender` (ILP-over-HTTP) configured with + the current bearer token, for ingestion. + + The token is captured at creation time; create a new sender to pick up + a refreshed token. + """ + try: + from questdb.ingress import Sender + except ImportError as e: + raise ImportError( + 'The compiled `questdb.ingress` module is required for ' + 'QuestDB.sender(). Install the full client wheel ' + '(`pip install questdb`).') from e + + scheme = 'https' if self._parts.scheme == 'https' else 'http' + resolved_port = port or self._parts.port or ( + 443 if scheme == 'https' else 9000) + conf = f'{scheme}::addr={self._host()}:{resolved_port};' + return Sender.from_conf(conf, token=self.auth.token(), **sender_kwargs) + + +def connect( + url: str, + *, + flow: str = 'auto', + cache: Any = 'memory', + insecure: bool = False, + eager: bool = True, + **opts) -> QuestDB: + """ + High-level entry point: authenticate to QuestDB and return a + :class:`QuestDB` session. + + .. code-block:: python + + from questdb.auth import connect + + qdb = connect("https://questdb.example.com:9000") # signs in + df = qdb.sql("SELECT * FROM trades LIMIT 10") + + Configuration (OIDC client id, scope, endpoints, groups mode) is discovered + from ``{url}/settings`` and, as needed, the IdP ``.well-known`` document. + Re-running the same call reuses the cached token (no re-prompt). + + :param url: The QuestDB HTTP(S) base URL, e.g. + ``"https://questdb.example.com:9000"``. + :param flow: ``"auto"`` (default), ``"device"`` or ``"loopback"``. Today + ``"auto"`` always resolves to the device flow (works on local and + remote kernels); ``"loopback"`` is reserved for a future release. + :param cache: Token cache backend: ``"memory"`` (default), ``"file"`` or + ``None``. + :param insecure: Allow plaintext ``http://`` URLs (development only). + :param eager: If ``True`` (default), sign in immediately; otherwise defer + until the first call that needs a token. + :param opts: Forwarded to :meth:`OidcDeviceAuth.from_questdb` (e.g. + ``client_id``, ``scope``, ``audience``, ``issuer``, ``open_browser``, + ``qr``, ``ca_bundle``). + """ + auth = OidcDeviceAuth.from_questdb( + url, flow=flow, cache=cache, insecure=insecure, **opts) + qdb = QuestDB(url, auth, insecure=insecure) + if eager: + auth.token() + return qdb diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py new file mode 100644 index 00000000..8a0fa9fb --- /dev/null +++ b/src/questdb/auth/_render.py @@ -0,0 +1,336 @@ +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +""" +Presentation of the device-flow prompt. + +Renders a clickable link + user code in Jupyter (via ``IPython.display``) and +falls back to plain text on a terminal. Nothing here is required for +``token()`` / ``headers()`` to work; ``IPython`` and ``qrcode`` are imported +lazily and only when actually used. +""" + +from __future__ import annotations + +import html +import sys +import urllib.parse +from typing import Any, Dict, Optional, TextIO + + +def in_ipython_kernel() -> bool: + """True when running inside an interactive Jupyter/ZMQ kernel.""" + try: + from IPython import get_ipython # type: ignore + except Exception: + return False + ip = get_ipython() + if ip is None: + return False + # ZMQInteractiveShell == notebook / qtconsole / lab; TerminalInteractive + # Shell == ipython in a terminal (still interactive). + return ip.__class__.__name__ in ( + 'ZMQInteractiveShell', 'TerminalInteractiveShell') + + +def detect_interactive() -> bool: + """ + Best-effort detection of whether a human can complete the sign-in. + + Interactive when attached to a TTY or running in an interactive IPython + shell. This guards against hanging forever in a non-interactive context + (papermill / cron / CI). + """ + if in_ipython_kernel(): + return True + try: + return bool(sys.stdin and sys.stdin.isatty() + and sys.stdout and sys.stdout.isatty()) + except Exception: + return False + + +def _verification_uri(resp: Dict[str, Any]) -> str: + # RFC 8628 uses ``verification_uri``; some IdPs (older Google) use + # ``verification_url``. + return resp.get('verification_uri') or resp.get('verification_url') or '' + + +def _verification_uri_complete(resp: Dict[str, Any]) -> Optional[str]: + return (resp.get('verification_uri_complete') + or resp.get('verification_url_complete')) + + +def _safe_link_url(url: Optional[str]) -> Optional[str]: + """ + Return ``url`` only if it uses an ``http(s)`` scheme, else ``None``. + + The verification URL comes from the IdP's device-authorization response, + which is untrusted input. Embedding it in an HTML ``href`` without a scheme + allowlist would let a malicious/MITM'd response inject a ``javascript:`` or + ``data:`` URL that executes in the notebook DOM when clicked + (``html.escape`` guards markup, not the URL scheme). + """ + if not url: + return None + try: + scheme = urllib.parse.urlparse(url).scheme.lower() + except (ValueError, TypeError): + return None + return url if scheme in ('http', 'https') else None + + +def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: + """ + Render ``url`` as a clickable link, or as inert escaped text if its scheme + is not ``http(s)``. + + The visible label defaults to the URL itself. When the URL is rejected, the + (escaped) URL is shown as plain text so the user can still see/copy it, but + it is never turned into a clickable/executable link. + """ + safe = _safe_link_url(url) + label = html.escape(text if text is not None else (url or '')) + if safe is None: + return label + return (f'{label}') + + +def format_prompt(resp: Dict[str, Any]) -> str: + """Plain-text sign-in prompt (also used as the notebook fallback).""" + uri = _verification_uri(resp) + code = resp.get('user_code', '') + complete = _verification_uri_complete(resp) + lines = [ + '🔐 Sign in to QuestDB', + f' Open {uri} and enter code: {code}', + ] + if complete: + lines.append(f' (or open directly: {complete})') + return '\n'.join(lines) + + +def _fmt_mmss(seconds: float) -> str: + seconds = max(0, int(seconds)) + return f'{seconds // 60}:{seconds % 60:02d}' + + +class Renderer: + """No-op renderer interface; subclasses present the prompt to the user.""" + + def on_prompt(self, resp: Dict[str, Any]) -> None: + pass + + def on_waiting(self, seconds_left: float) -> None: + pass + + def on_success(self, identity: Optional[str], expires_in: float) -> None: + pass + + def on_failure(self, message: str) -> None: + pass + + +class TerminalRenderer(Renderer): + """Plain-text rendering for terminals (writes to ``stderr`` by default).""" + + def __init__(self, stream: Optional[TextIO] = None, qr: bool = False): + self._stream = stream if stream is not None else sys.stderr + self._qr = qr + self._countdown_active = False + + def _write(self, text: str) -> None: + try: + self._stream.write(text) + self._stream.flush() + except Exception: + pass + + def on_prompt(self, resp: Dict[str, Any]) -> None: + self._write(format_prompt(resp) + '\n') + if self._qr: + target = _verification_uri_complete(resp) or _verification_uri(resp) + art = _qr_ascii(target) + if art: + self._write(art + '\n') + + def on_waiting(self, seconds_left: float) -> None: + self._countdown_active = True + self._write(f'\r ⏳ waiting for authorization… ({_fmt_mmss(seconds_left)} left) ') + + def on_success(self, identity: Optional[str], expires_in: float) -> None: + if self._countdown_active: + self._write('\n') + self._countdown_active = False + who = f' as {identity}' if identity else '' + mins = max(1, int(round(expires_in / 60))) + self._write(f'✅ Signed in{who} — token cached, expires in {mins} min\n') + + def on_failure(self, message: str) -> None: + if self._countdown_active: + self._write('\n') + self._countdown_active = False + self._write(f'❌ {message}\n') + + +class JupyterRenderer(Renderer): + """Rich rendering for Jupyter using an updatable display handle.""" + + def __init__(self, qr: bool = False): + self._qr = qr + self._handle = None + self._resp: Dict[str, Any] = {} + + def _display(self, html_str: str): + from IPython.display import HTML, display # type: ignore + if self._handle is None: + self._handle = display(HTML(html_str), display_id=True) + else: + self._handle.update(HTML(html_str)) + + def _panel(self, body: str) -> str: + return ( + '
' + + body + '
') + + def on_prompt(self, resp: Dict[str, Any]) -> None: + self._resp = resp + uri = _verification_uri(resp) + code = html.escape(str(resp.get('user_code', ''))) + complete = _verification_uri_complete(resp) + body = [ + '
' + '🔐 Sign in to QuestDB
', + f'
Open {_render_link(uri)} and enter code:
', + f'
{code}
', + ] + if _safe_link_url(complete): + body.append( + '
' + _render_link( + complete, text='Click here to authorize directly →') + + '
') + if self._qr: + qr_target = _safe_link_url(complete) or _safe_link_url(uri) + data_uri = _qr_data_uri(qr_target) if qr_target else None + if data_uri: + body.append( + f'QR code') + body.append( + '
' + '⏳ waiting for authorization…
') + self._display(self._panel(''.join(body))) + + def on_waiting(self, seconds_left: float) -> None: + # Re-render the whole panel (cheap) with an updated countdown. + if not self._resp: + return + self._resp = dict(self._resp) + self._render_with_status( + f'⏳ waiting for authorization… ({_fmt_mmss(seconds_left)} left)', + color='#888') + + def on_success(self, identity: Optional[str], expires_in: float) -> None: + who = html.escape(identity) if identity else '' + mins = max(1, int(round(expires_in / 60))) + suffix = f' as {who}' if who else '' + self._render_with_status( + f'✅ Signed in{suffix} — token cached, expires in {mins} min', + color='#2e7d32') + + def on_failure(self, message: str) -> None: + self._render_with_status('❌ ' + html.escape(message), color='#c62828') + + def _render_with_status(self, status_html: str, color: str) -> None: + resp = self._resp + uri = _verification_uri(resp) + code = html.escape(str(resp.get('user_code', ''))) + complete = _verification_uri_complete(resp) + body = [ + '
' + '🔐 Sign in to QuestDB
', + f'
Open {_render_link(uri)} and enter code:
', + f'
{code}
', + ] + if _safe_link_url(complete): + body.append( + '
' + _render_link( + complete, text='Click here to authorize directly →') + + '
') + body.append( + f'
{status_html}
') + self._display(self._panel(''.join(body))) + + +def make_renderer(qr: bool = False) -> Renderer: + """Pick a renderer appropriate for the current environment.""" + if in_ipython_kernel(): + try: + import IPython.display # noqa: F401 # type: ignore + return JupyterRenderer(qr=qr) + except Exception: + pass + return TerminalRenderer(qr=qr) + + +def _qr_ascii(data: str) -> Optional[str]: + if not data: + return None + try: + import qrcode # type: ignore + except Exception: + return None + try: + qr = qrcode.QRCode(border=1) + qr.add_data(data) + qr.make(fit=True) + import io + buf = io.StringIO() + qr.print_ascii(out=buf, invert=True) + return buf.getvalue() + except Exception: + return None + + +def _qr_data_uri(data: str) -> Optional[str]: + if not data: + return None + try: + import qrcode # type: ignore + except Exception: + return None + try: + import base64 + import io + img = qrcode.make(data) + buf = io.BytesIO() + img.save(buf, format='PNG') + b64 = base64.b64encode(buf.getvalue()).decode('ascii') + return f'data:image/png;base64,{b64}' + except Exception: + return None diff --git a/test/test.py b/test/test.py index 18f4461a..6fdd418f 100755 --- a/test/test.py +++ b/test/test.py @@ -33,6 +33,24 @@ from fixture import _parse_version +# OIDC auth tests (pure-Python; no compiled extension required). +# Imported here so they are picked up by ``unittest.main()`` in CI. +from test_auth import ( + TestDeviceFlow, + TestNonInteractive, + TestRefresh, + TestFileCache, + TestDiscovery, + TestRestAdapter, + TestAdapters, + TestConcurrency, + TestConfigHelpers, + TestEndpointValidation, + TestCacheKey, + TestTransportSecurity, + TestRendererSecurity, +) + NUMPY_VERSION = _parse_version(np.__version__) try: diff --git a/test/test_auth.py b/test/test_auth.py new file mode 100644 index 00000000..427ccb57 --- /dev/null +++ b/test/test_auth.py @@ -0,0 +1,1167 @@ +#!/usr/bin/env python3 +################################################################################ +## ___ _ ____ ____ +## / _ \ _ _ ___ ___| |_| _ \| __ ) +## | | | | | | |/ _ \/ __| __| | | | _ \ +## | |_| | |_| | __/\__ \ |_| |_| | |_) | +## \__\_\\__,_|\___||___/\__|____/|____/ +## +## Copyright (c) 2014-2019 Appsicle +## Copyright (c) 2019-2024 QuestDB +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +################################################################################ + +""" +Standalone unit tests for ``questdb.auth``. + +These do not require the compiled ``questdb.ingress`` extension; they exercise +the device flow, discovery, caching, refresh and the REST adapter against an +in-process mock IdP + mock QuestDB server. + +Run directly:: + + python3 test/test_auth.py -v +""" + +import base64 +import importlib.util +import json +import os +import sys +import tempfile +import threading +import types +import unittest +import http.server +import urllib.parse +from unittest import mock + +sys.dont_write_bytecode = True +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) + +from questdb.auth import ( # noqa: E402 + OidcDeviceAuth, + QuestDB, + connect, + OidcError, + OidcConfigError, + OidcDeviceFlowError, + OidcTimeoutError, + OidcInteractionRequired, + OidcAuthError, + OidcNetworkError, + TokenSet, +) +from questdb.auth._cache import FileCache, MemoryCache, _MEMORY_STORE # noqa: E402 +from questdb.auth._render import Renderer # noqa: E402 + +try: + import pandas as pd +except ImportError: + pd = None + +try: + import fcntl as _fcntl # noqa: F401 + _HAS_FCNTL = True +except ImportError: + _HAS_FCNTL = False + +_HAS_PG_DRIVER = ( + importlib.util.find_spec('psycopg') is not None + or importlib.util.find_spec('psycopg2') is not None) + + +class _FakeAuth: + """A stand-in OidcDeviceAuth for adapter tests (no network).""" + + _ctx = None + + def __init__(self, token='TKN'): + self._token = token + self.calls = 0 + + def token(self): + self.calls += 1 + return self._token + + def headers(self): + return {'Authorization': f'Bearer {self._token}'} + + +def _jwt(claims): + """Build an unsigned JWT-shaped string with the given payload claims.""" + def b64(obj): + raw = json.dumps(obj).encode() + return base64.urlsafe_b64encode(raw).rstrip(b'=').decode() + return f'{b64({"alg": "none"})}.{b64(claims)}.sig' + + +ID_TOKEN = _jwt({'sub': 'user-1', 'email': 'alice@example.com', + 'groups': ['analysts']}) +ACCESS_TOKEN = _jwt({'sub': 'user-1', 'scope': 'openid'}) + + +class FakeClock: + """Deterministic clock: ``sleep`` advances both monotonic and wall time.""" + + def __init__(self): + self.mono = 0.0 + self.wall = 1_000_000.0 + self.sleeps = [] + + def sleep(self, dt): + self.sleeps.append(dt) + self.mono += dt + self.wall += dt + + def monotonic(self): + return self.mono + + def now(self): + return self.wall + + +class MockState: + """Scriptable behaviour shared with the request handler.""" + + def __init__(self): + self.settings = {} + self.well_known = None + # FIFO of (status, body) returned for device_code grant polls. + # When exhausted, the last entry repeats. + self.token_script = [(200, None)] # None => default success body + self.refresh_response = None # (status, body) or None + self.device_response = None # override device-auth response body + self.device_status = 200 + self.expected_bearer = None # for /exec auth check + self.exec_response = None + self.exec_status = 200 + # Recording. + self.device_requests = 0 + self.token_requests = [] + self.refresh_requests = 0 + self.exec_requests = [] + + +class _Handler(http.server.BaseHTTPRequestHandler): + def log_message(self, *args): + pass + + @property + def state(self): + return self.server.state + + def _send_json(self, status, obj): + data = json.dumps(obj).encode() + self.send_response(status) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(data))) + self.end_headers() + self.wfile.write(data) + + def _read_form(self): + length = int(self.headers.get('Content-Length', 0)) + body = self.rfile.read(length).decode() + return {k: v[0] for k, v in urllib.parse.parse_qs(body).items()} + + def do_GET(self): + path = urllib.parse.urlparse(self.path).path + if path == '/settings': + self._send_json(200, self.state.settings) + elif path == '/.well-known/openid-configuration': + if self.state.well_known is None: + self._send_json(404, {'error': 'not found'}) + else: + self._send_json(200, self.state.well_known) + elif path == '/exec': + auth = self.headers.get('Authorization') + if self.state.expected_bearer and auth != ( + 'Bearer ' + self.state.expected_bearer): + self._send_json(401, {'error': 'unauthorized'}) + return + self.state.exec_requests.append(self.path) + self._send_json(self.state.exec_status, self.state.exec_response or { + 'columns': [ + {'name': 'ts', 'type': 'TIMESTAMP'}, + {'name': 'price', 'type': 'DOUBLE'}, + ], + 'dataset': [ + ['2021-01-01T00:00:00.000000Z', 1.5], + ['2021-01-02T00:00:00.000000Z', 2.5], + ], + 'count': 2, + }) + else: + self._send_json(404, {'error': 'not found'}) + + def do_POST(self): + path = urllib.parse.urlparse(self.path).path + form = self._read_form() + if path == '/device': + self.state.device_requests += 1 + if self.state.device_status != 200: + self._send_json(self.state.device_status, + self.state.device_response or + {'error': 'invalid_client'}) + return + body = self.state.device_response or { + 'device_code': 'DEV-CODE', + 'user_code': 'WDJB-MJHT', + 'verification_uri': 'https://idp.example.com/device', + 'verification_uri_complete': + 'https://idp.example.com/device?user_code=WDJB-MJHT', + 'expires_in': 600, + 'interval': 5, + } + self._send_json(200, body) + elif path == '/token': + grant = form.get('grant_type') + if grant == 'refresh_token': + self.state.refresh_requests += 1 + status, body = self.state.refresh_response or ( + 200, self._default_token_body()) + self._send_json(status, body) + return + self.state.token_requests.append(form) + idx = min(len(self.state.token_requests) - 1, + len(self.state.token_script) - 1) + status, body = self.state.token_script[idx] + if body is None: + body = self._default_token_body() + self._send_json(status, body) + else: + self._send_json(404, {'error': 'not found'}) + + @staticmethod + def _default_token_body(): + return { + 'access_token': ACCESS_TOKEN, + 'id_token': ID_TOKEN, + 'refresh_token': 'REFRESH-1', + 'token_type': 'Bearer', + 'expires_in': 3600, + 'scope': 'openid groups', + } + + +class _MockServer(http.server.HTTPServer): + def __init__(self): + super().__init__(('127.0.0.1', 0), _Handler) + self.state = MockState() + + +class AuthTestBase(unittest.TestCase): + def setUp(self): + _MEMORY_STORE.clear() + self.server = _MockServer() + self.state = self.server.state + self.thread = threading.Thread( + target=lambda: self.server.serve_forever(poll_interval=0.02), + daemon=True) + self.thread.start() + self.base = f'http://127.0.0.1:{self.server.server_port}' + + def tearDown(self): + self.server.shutdown() + self.server.server_close() + self.thread.join(timeout=5) + + def make_auth(self, *, clock=None, groups_in_token=True, cache='memory', + interactive=True, **kw): + clock = clock or FakeClock() + self._clock = clock + return OidcDeviceAuth( + client_id='questdb', + device_authorization_endpoint=self.base + '/device', + token_endpoint=self.base + '/token', + scope='openid groups', + groups_in_token=groups_in_token, + cache=cache, + insecure=True, + interactive=interactive, + renderer=Renderer(), + _clock=clock, + **kw) + + +class TestDeviceFlow(AuthTestBase): + def test_happy_path_returns_id_token(self): + self.state.token_script = [ + (400, {'error': 'authorization_pending'}), + (400, {'error': 'authorization_pending'}), + (200, None), + ] + auth = self.make_auth() + token = auth.token() + self.assertEqual(token, ID_TOKEN) + # 3 token polls, slept 'interval' (5s) before each. + self.assertEqual(len(self.state.token_requests), 3) + self.assertEqual(self._clock.sleeps, [5, 5, 5]) + + def test_access_token_when_groups_not_in_token(self): + auth = self.make_auth(groups_in_token=False) + self.assertEqual(auth.token(), ACCESS_TOKEN) + + def test_headers(self): + auth = self.make_auth() + self.assertEqual(auth.headers(), + {'Authorization': 'Bearer ' + ID_TOKEN}) + + def test_slow_down_backs_off(self): + self.state.token_script = [ + (400, {'error': 'slow_down'}), + (200, None), + ] + auth = self.make_auth() + auth.token() + # interval starts at 5, +5 after slow_down. + self.assertEqual(self._clock.sleeps, [5, 10]) + + def test_timeout_when_never_authorized(self): + self.state.device_response = { + 'device_code': 'DEV-CODE', 'user_code': 'X', + 'verification_uri': 'https://idp/device', + 'expires_in': 10, 'interval': 5, + } + self.state.token_script = [(400, {'error': 'authorization_pending'})] + auth = self.make_auth() + with self.assertRaises(OidcTimeoutError): + auth.token() + + def test_access_denied_is_surfaced(self): + self.state.token_script = [ + (400, {'error': 'access_denied', + 'error_description': 'user said no'}), + ] + auth = self.make_auth() + with self.assertRaises(OidcDeviceFlowError) as cm: + auth.token() + self.assertEqual(cm.exception.error, 'access_denied') + self.assertIn('user said no', str(cm.exception)) + + def test_device_endpoint_rejects_grant(self): + self.state.device_status = 400 + self.state.device_response = {'error': 'invalid_client'} + auth = self.make_auth() + with self.assertRaises(OidcDeviceFlowError) as cm: + auth.token() + self.assertIn('device grant', str(cm.exception)) + + def test_token_caches_in_memory_across_instances(self): + self.make_auth().token() + self.assertEqual(self.state.device_requests, 1) + # A brand-new instance with the same config reuses the cached token. + self.make_auth().token() + self.assertEqual(self.state.device_requests, 1) + + def test_missing_id_token_raises_config_error(self): + self.state.token_script = [(200, { + 'access_token': ACCESS_TOKEN, 'token_type': 'Bearer', + 'expires_in': 3600})] # no id_token + auth = self.make_auth(groups_in_token=True) + with self.assertRaises(OidcConfigError): + auth.token() + + def test_200_without_access_token_is_not_success(self): + # A 200 with no access_token must not be treated as a token. + self.state.token_script = [(200, {'token_type': 'Bearer'})] + auth = self.make_auth() + with self.assertRaises(OidcDeviceFlowError): + auth.token() + + def test_access_token_headers(self): + auth = self.make_auth(groups_in_token=False) + self.assertEqual(auth.headers(), + {'Authorization': 'Bearer ' + ACCESS_TOKEN}) + + def test_clear_forces_resignin(self): + auth = self.make_auth() + auth.token() + self.assertEqual(self.state.device_requests, 1) + auth.clear() + auth.token() + self.assertEqual(self.state.device_requests, 2) # prompted again + + def test_openid_scope_auto_added_for_groups_in_token(self): + # groups-in-token requires an id_token, which needs the openid scope. + auth = OidcDeviceAuth( + client_id='questdb', + device_authorization_endpoint=self.base + '/device', + token_endpoint=self.base + '/token', + scope='groups', groups_in_token=True, # no 'openid' + cache='memory', insecure=True, renderer=Renderer()) + self.assertIn('openid', auth.config.scope.split()) + + def test_zero_expires_in_is_treated_as_unknown(self): + # A non-positive expires_in must not mark the just-issued token expired. + self.state.token_script = [(200, { + 'access_token': ACCESS_TOKEN, 'id_token': ID_TOKEN, + 'token_type': 'Bearer', 'expires_in': 0})] + auth = self.make_auth() + auth.token() + self.assertTrue(auth._tokens.is_valid(self._clock.now())) + + def test_short_lived_token_valid_at_issue(self): + # A small positive expires_in (< 2*skew) must not read as expired the + # instant it is issued (adaptive skew = min(skew, lifetime/2)). + self.state.token_script = [(200, { + 'access_token': ACCESS_TOKEN, 'id_token': ID_TOKEN, + 'token_type': 'Bearer', 'expires_in': 20})] + auth = self.make_auth() + auth.token() + t = auth._tokens + self.assertEqual(round(t.expires_at - t.issued_at), 20) + self.assertTrue(t.is_valid(t.issued_at)) # usable right after issue + self.assertFalse(t.is_valid(t.expires_at)) # but still does expire + + def test_open_browser_rejects_dangerous_scheme(self): + auth = self.make_auth(open_browser=True) + with mock.patch('webbrowser.open') as opener: + auth._maybe_open_browser({'verification_uri': 'javascript:alert(1)'}) + opener.assert_not_called() + auth._maybe_open_browser( + {'verification_uri': 'https://idp.example.com/device'}) + opener.assert_called_once_with('https://idp.example.com/device') + + def test_memory_cache_returns_independent_copy(self): + cache = MemoryCache() + cache.store('k', TokenSet(access_token='a', refresh_token='r', + expires_at=1.0)) + loaded = cache.load('k') + loaded.refresh_token = 'MUTATED' + self.assertEqual(cache.load('k').refresh_token, 'r') + + +class TestNonInteractive(AuthTestBase): + def test_non_interactive_raises_without_polling(self): + auth = self.make_auth(interactive=False) + with self.assertRaises(OidcInteractionRequired): + auth.token() + self.assertEqual(self.state.device_requests, 0) + + +class TestRefresh(AuthTestBase): + def _seed_expired(self, auth, refresh_token='REFRESH-1'): + expired = TokenSet( + access_token='old-access', id_token='old-id', + refresh_token=refresh_token, + expires_at=self._clock.now() - 10) + auth._cache.store(auth.cache_key, expired) + + def test_silent_refresh(self): + auth = self.make_auth() + self._seed_expired(auth) + token = auth.token() + self.assertEqual(token, ID_TOKEN) + self.assertEqual(self.state.refresh_requests, 1) + self.assertEqual(self.state.device_requests, 0) # no re-prompt + + def test_refresh_failure_falls_back_to_device_flow(self): + auth = self.make_auth() + self._seed_expired(auth) + self.state.refresh_response = (400, {'error': 'invalid_grant'}) + token = auth.token() + self.assertEqual(token, ID_TOKEN) + self.assertEqual(self.state.refresh_requests, 1) + self.assertEqual(self.state.device_requests, 1) # re-prompted + + def test_refresh_token_preserved_when_not_rotated(self): + auth = self.make_auth() + self._seed_expired(auth) + self.state.refresh_response = (200, { + 'access_token': ACCESS_TOKEN, 'id_token': ID_TOKEN, + 'token_type': 'Bearer', 'expires_in': 3600}) # no new refresh + auth.token() + self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') + + def test_refresh_without_id_token_falls_back_to_device_flow(self): + # groups_in_token=True but the IdP's refresh omits the id_token: the + # refresh is unusable, so fall back to the interactive flow rather than + # caching it and looping (the device flow yields a complete token). + auth = self.make_auth(groups_in_token=True) + self._seed_expired(auth) + self.state.refresh_response = (200, { + 'access_token': ACCESS_TOKEN, 'token_type': 'Bearer', + 'expires_in': 3600}) # no id_token + token = auth.token() + self.assertEqual(token, ID_TOKEN) # from the device flow + self.assertEqual(self.state.refresh_requests, 1) + self.assertEqual(self.state.device_requests, 1) # fell back + + def test_refresh_without_id_token_non_interactive_does_not_loop(self): + # Same situation but non-interactive: surface a clear error rather than + # repeatedly re-running a refresh that can never satisfy _select. + auth = self.make_auth(groups_in_token=True, interactive=False) + self._seed_expired(auth) + self.state.refresh_response = (200, { + 'access_token': ACCESS_TOKEN, 'token_type': 'Bearer', + 'expires_in': 3600}) # no id_token + with self.assertRaises(OidcInteractionRequired): + auth.token() + self.assertEqual(self.state.device_requests, 0) + + def test_cached_token_missing_required_kind_is_refreshed(self): + # A cached, non-expired token that lacks the required kind (here: + # access_token in non-groups mode) must not pass the cache gate and + # then hard-fail in _select; it should trigger a refresh instead. + auth = self.make_auth(groups_in_token=False) + auth._cache.store(auth.cache_key, TokenSet( + access_token=None, id_token='id', refresh_token='REFRESH-1', + expires_at=self._clock.now() + 3600)) + token = auth.token() + self.assertEqual(token, ACCESS_TOKEN) + self.assertEqual(self.state.refresh_requests, 1) + self.assertEqual(self.state.device_requests, 0) + + def test_refresh_network_error_propagates_without_reprompt(self): + # Both endpoints point at a closed port (same origin, so the co-location + # check passes), so the refresh POST fails at the transport layer. The + # error must propagate from the *token* endpoint (the refresh), proving + # the flow did NOT fall back to the device flow on a transient blip. + clock = FakeClock() + auth = OidcDeviceAuth( + client_id='questdb', + device_authorization_endpoint='http://127.0.0.1:1/device', + token_endpoint='http://127.0.0.1:1/token', # connection refused + scope='openid groups', groups_in_token=True, cache='memory', + insecure=True, interactive=True, renderer=Renderer(), + _clock=clock) + expired = TokenSet( + access_token='old', id_token='old-id', refresh_token='REFRESH-1', + expires_at=clock.now() - 10) + auth._cache.store(auth.cache_key, expired) + + with self.assertRaises(OidcNetworkError) as cm: + auth.token() + # The error is from the refresh (token endpoint), not a device-flow + # fallback (device endpoint), and the refresh token is kept for a retry. + self.assertIn('/token', str(cm.exception)) + self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') + + +class TestFileCache(AuthTestBase): + def test_file_cache_works_without_os_lock(self): + # Exercise the no-fcntl/no-msvcrt fallback: with the lock primitives + # no-op'd, the atomic temp-file replace must still keep every entry. + import questdb.auth._cache as cache_mod + tmp = tempfile.mkdtemp() + path = os.path.join(tmp, 'cache.json') + with mock.patch.object(cache_mod, '_lock_fd', lambda fd: None), \ + mock.patch.object(cache_mod, '_unlock_fd', lambda fd: None): + cache = FileCache(path) + cache.store('k1', TokenSet(access_token='a1', expires_at=1.0)) + cache.store('k2', TokenSet(access_token='a2', expires_at=1.0)) + self.assertEqual(cache.load('k1').access_token, 'a1') + self.assertEqual(cache.load('k2').access_token, 'a2') + + def test_file_cache_survives_new_instance(self): + tmp = tempfile.mkdtemp() + path = os.path.join(tmp, 'cache.json') + cache1 = FileCache(path) + self.make_auth(cache=cache1).token() + self.assertEqual(self.state.device_requests, 1) + # New process simulation: fresh memory, load from file. + _MEMORY_STORE.clear() + cache2 = FileCache(path) + token = self.make_auth(cache=cache2).token() + self.assertEqual(token, ID_TOKEN) + self.assertEqual(self.state.device_requests, 1) # no re-prompt + # File is mode 600 where supported. + if os.name == 'posix': + self.assertEqual(os.stat(path).st_mode & 0o777, 0o600) + + @unittest.skipUnless( + _HAS_FCNTL, 'cross-process file lock requires fcntl (POSIX)') + def test_concurrent_writes_preserve_all_entries(self): + # 20 writers (distinct instances, same file, distinct keys) racing: + # the sidecar lock + atomic unique-temp replace must keep every entry + # and never corrupt the file or leave a temp behind. + tmp = tempfile.mkdtemp() + path = os.path.join(tmp, 'cache.json') + + def writer(i): + FileCache(path).store( + f'key-{i}', + TokenSet(access_token=f'a{i}', id_token=f'id{i}', + refresh_token=f'r{i}', expires_at=1.0)) + + threads = [threading.Thread(target=writer, args=(i,)) + for i in range(20)] + for t in threads: + t.start() + for t in threads: + t.join(10) + + final = FileCache(path) + for i in range(20): + ts = final.load(f'key-{i}') + self.assertIsNotNone(ts, f'lost entry key-{i}') + self.assertEqual(ts.access_token, f'a{i}') + leftovers = [n for n in os.listdir(tmp) if n.endswith('.tmp')] + self.assertEqual(leftovers, [], f'temp files left behind: {leftovers}') + + +class TestDiscovery(AuthTestBase): + def test_from_questdb_reads_settings(self): + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.scope': 'openid groups', + 'acl.oidc.groups.encoded.in.token': True, + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device', + }} + auth = OidcDeviceAuth.from_questdb( + self.base, insecure=True, interactive=True, renderer=Renderer(), + _clock=FakeClock()) + self.assertEqual(auth.config.client_id, 'questdb') + self.assertTrue(auth.config.groups_in_token) + self.assertEqual(auth.config.device_authorization_endpoint, + self.base + '/device') + self.assertEqual(auth.token(), ID_TOKEN) + + def test_well_known_fallback_for_device_endpoint(self): + # Settings advertise OIDC + token endpoint but NOT the device endpoint. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.scope': 'openid', + 'acl.oidc.groups.encoded.in.token': False, + 'acl.oidc.token.endpoint': self.base + '/token', + }} + self.state.well_known = { + 'issuer': self.base, + 'token_endpoint': self.base + '/token', + 'device_authorization_endpoint': self.base + '/device', + } + auth = OidcDeviceAuth.from_questdb(self.base, insecure=True, + renderer=Renderer()) + self.assertEqual(auth.config.device_authorization_endpoint, + self.base + '/device') + + def test_oidc_disabled_raises(self): + self.state.settings = {'config': {'acl.oidc.enabled': False}} + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb(self.base, insecure=True) + + def test_missing_device_endpoint_raises(self): + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + }} + self.state.well_known = {'issuer': self.base, + 'token_endpoint': self.base + '/token'} + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb(self.base, insecure=True) + + def test_loopback_flow_not_implemented(self): + # Reserved-but-unimplemented flow raises an OidcError subclass so it's + # caught by `except OidcError` like other config problems. + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb(self.base, flow='loopback', + insecure=True) + + def test_endpoint_origin_mismatch_rejected(self): + # /settings advertises the device endpoint on a different origin than + # the token endpoint: refuse rather than POST credentials off-origin. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': + 'http://127.0.0.2:9/device', # different host:port + }} + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb(self.base, insecure=True) + + def test_issuer_pin_rejects_off_origin_endpoints(self): + # Endpoints are internally consistent, but an explicit issuer pins them + # to a different origin -> reject (a compromised /settings can't + # redirect the token POST when the IdP is pinned). + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device', + }} + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb( + self.base, issuer='https://idp.attacker.example', + insecure=True) + + def test_issuer_pin_accepts_matching_origin(self): + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device', + }} + auth = OidcDeviceAuth.from_questdb( + self.base, issuer=self.base, insecure=True, renderer=Renderer()) + self.assertEqual(auth.config.device_authorization_endpoint, + self.base + '/device') + + +@unittest.skipIf(pd is None, 'pandas not installed') +class TestRestAdapter(AuthTestBase): + def _connected(self): + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.scope': 'openid groups', + 'acl.oidc.groups.encoded.in.token': True, + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device', + }} + self.state.expected_bearer = ID_TOKEN + return connect(self.base, insecure=True, renderer=Renderer(), + interactive=True, _clock=FakeClock()) + + def test_sql_returns_dataframe(self): + qdb = self._connected() + df = qdb.sql('SELECT * FROM trades') + self.assertEqual(list(df.columns), ['ts', 'price']) + self.assertEqual(len(df), 2) + self.assertEqual(df['price'].tolist(), [1.5, 2.5]) + # TIMESTAMP column coerced to datetime. + self.assertTrue(str(df['ts'].dtype).startswith('datetime64')) + + def test_sql_unauthorized_maps_to_auth_error(self): + qdb = self._connected() + self.state.expected_bearer = 'something-else' # force 401 + with self.assertRaises(OidcAuthError): + qdb.sql('SELECT 1') + + def test_connect_is_eager(self): + qdb = self._connected() + self.assertIsInstance(qdb, QuestDB) + # Sign-in already happened during connect(). + self.assertEqual(self.state.device_requests, 1) + + def test_sql_query_error_maps_to_oidc_error(self): + qdb = self._connected() + self.state.exec_status = 400 + self.state.exec_response = {'error': 'unexpected token', 'position': 5} + with self.assertRaises(OidcError) as cm: + qdb.sql('SELEKT 1') + self.assertIn('unexpected token', str(cm.exception)) + self.assertNotIsInstance(cm.exception, OidcAuthError) + + def test_sql_passes_limit(self): + qdb = self._connected() + qdb.sql('SELECT * FROM trades', limit='1,10') + self.assertTrue(any('limit=1' in p for p in self.state.exec_requests)) + + def test_sql_handles_empty_dataset(self): + qdb = self._connected() + self.state.exec_response = {'ddl': 'OK'} # no columns / dataset + df = qdb.sql('CREATE TABLE x (a INT)') + self.assertEqual(len(df), 0) + + def test_sql_malformed_shape_raises_oidc_error(self): + qdb = self._connected() + self.state.exec_response = { # rows shorter than the column list + 'columns': [{'name': 'a', 'type': 'LONG'}, + {'name': 'b', 'type': 'LONG'}], + 'dataset': [[1]]} + with self.assertRaises(OidcError): + qdb.sql('SELECT a, b FROM t') + + +class TestConcurrency(AuthTestBase): + def test_valid_cached_token_does_not_block_during_signin(self): + # A caller with a valid cached token must NOT block behind another + # thread's in-progress sign-in: the fast path takes no lock. + auth = self.make_auth() + valid = TokenSet( + access_token='a', id_token=ID_TOKEN, refresh_token='r', + expires_at=self._clock.now() + 3600) + auth._cache.store(auth.cache_key, valid) + + auth._lock.acquire() # simulate another thread mid-sign-in + try: + result = {} + t = threading.Thread( + target=lambda: result.update(tok=auth.token())) + t.start() + t.join(timeout=5) + self.assertFalse( + t.is_alive(), 'token() blocked behind an in-progress sign-in') + self.assertEqual(result.get('tok'), ID_TOKEN) + finally: + auth._lock.release() + + def test_concurrent_signin_prompts_only_once(self): + # Two threads racing with an empty cache must trigger exactly ONE + # device flow; the loser reuses the winner's token. + auth = self.make_auth() + entered = threading.Event() + release = threading.Event() + + class GatingRenderer(Renderer): + def on_prompt(self, resp): + entered.set() # first thread is now inside the flow + release.wait(5) # ...holding the acquisition lock + + auth._renderer = GatingRenderer() + results = {} + + def call(name): + try: + results[name] = auth.token() + except Exception as e: # noqa: BLE001 + results[name] = e + + t1 = threading.Thread(target=call, args=('a',)) + t1.start() + self.assertTrue(entered.wait(5)) # t1 holds the lock in the flow + t2 = threading.Thread(target=call, args=('b',)) + t2.start() + release.set() # let t1 finish signing in + t1.join(5) + t2.join(5) + self.assertEqual(results.get('a'), ID_TOKEN) + self.assertEqual(results.get('b'), ID_TOKEN) + self.assertEqual(self.state.device_requests, 1) # no second prompt + + +class TestAdapters(unittest.TestCase): + """Connection adapters: tested via injected fake modules (the real + sqlalchemy / psycopg / questdb.ingress need not be installed).""" + + def _qdb(self, url='http://db.example.com:9000', token='TKN'): + return QuestDB(url, _FakeAuth(token), insecure=True) + + def test_sender_builds_conf_with_token(self): + qdb = self._qdb('http://db.example.com:9000', token='TKN') + captured = {} + + fake = types.ModuleType('questdb.ingress') + + class Sender: + @staticmethod + def from_conf(conf, *, token=None, **kw): + captured.update(conf=conf, token=token, kw=kw) + return 'SENDER' + + fake.Sender = Sender + with mock.patch.dict(sys.modules, {'questdb.ingress': fake}): + sender = qdb.sender(auto_flush=False) + self.assertEqual(sender, 'SENDER') + self.assertEqual(captured['conf'], 'http::addr=db.example.com:9000;') + self.assertEqual(captured['token'], 'TKN') + self.assertEqual(captured['kw'], {'auto_flush': False}) + + def test_sender_https_defaults_to_443(self): + qdb = self._qdb('https://db.example.com') # no explicit port + captured = {} + fake = types.ModuleType('questdb.ingress') + + class Sender: + @staticmethod + def from_conf(conf, *, token=None, **kw): + captured['conf'] = conf + return 'S' + + fake.Sender = Sender + with mock.patch.dict(sys.modules, {'questdb.ingress': fake}): + qdb.sender() + self.assertEqual(captured['conf'], 'https::addr=db.example.com:443;') + + def test_psycopg_connects_as_sso_with_token(self): + qdb = self._qdb('http://db.example.com:9000', token='TKN') + captured = {} + fake = types.ModuleType('psycopg') + + def connect(**kw): + captured.update(kw) + return 'CONN' + + fake.connect = connect + with mock.patch.dict(sys.modules, {'psycopg': fake}): + conn = qdb.psycopg(connect_timeout=3) + self.assertEqual(conn, 'CONN') + self.assertEqual(captured['user'], '_sso') + self.assertEqual(captured['password'], 'TKN') + self.assertEqual(captured['host'], 'db.example.com') + self.assertEqual(captured['port'], 8812) + self.assertEqual(captured['dbname'], 'qdb') + self.assertEqual(captured['connect_timeout'], 3) + # The token is fetched at connect time (fresh per connection). + self.assertEqual(qdb.auth.calls, 1) + + def test_sqlalchemy_engine_injects_fresh_token_per_connect(self): + auth = _FakeAuth('TKN') + qdb = QuestDB('http://db.example.com:9000', auth, insecure=True) + created = {} + events = {} + engine_obj = object() + + fake_sa = types.ModuleType('sqlalchemy') + fake_sa.__path__ = [] + + def create_engine(url, **kw): + created.update(url=url, engine_kw=kw) + return engine_obj + + class _Event: + @staticmethod + def listens_for(target, name): + def deco(fn): + events.update(name=name, fn=fn) + return fn + return deco + + fake_sa.create_engine = create_engine + fake_sa.event = _Event + + fake_eng = types.ModuleType('sqlalchemy.engine') + + class _URL: + @staticmethod + def create(**kw): + created.update(kw) + return 'URL' + + fake_eng.URL = _URL + fake_pg = types.ModuleType('psycopg') # drives the drivername choice + + with mock.patch.dict(sys.modules, { + 'sqlalchemy': fake_sa, + 'sqlalchemy.engine': fake_eng, + 'psycopg': fake_pg}): + engine = qdb.sqlalchemy_engine(pool_pre_ping=True) + + self.assertIs(engine, engine_obj) + self.assertEqual(created['drivername'], 'postgresql+psycopg') + self.assertEqual(created['username'], '_sso') + self.assertEqual(created['host'], 'db.example.com') + self.assertEqual(created['port'], 8812) + self.assertEqual(created['database'], 'qdb') + self.assertEqual(created['url'], 'URL') + self.assertEqual(created['engine_kw'], {'pool_pre_ping': True}) + self.assertEqual(events['name'], 'do_connect') + # The listener injects a fresh token on each new connection. + before = auth.calls + for _ in range(2): + cparams = {} + events['fn'](None, None, [], cparams) + self.assertEqual(cparams['password'], 'TKN') + self.assertEqual(auth.calls - before, 2) + + def test_sql_missing_pandas_raises(self): + qdb = self._qdb() + with mock.patch.dict(sys.modules, {'pandas': None}): + with self.assertRaises(ImportError): + qdb.sql('SELECT 1') + + @unittest.skipIf(importlib.util.find_spec('sqlalchemy') is not None, + 'sqlalchemy installed') + def test_sqlalchemy_engine_missing_dep_raises(self): + with self.assertRaises(ImportError): + self._qdb().sqlalchemy_engine() + + @unittest.skipIf(_HAS_PG_DRIVER, 'a PostgreSQL driver is installed') + def test_psycopg_missing_dep_raises(self): + with self.assertRaises(ImportError): + self._qdb().psycopg() + + @unittest.skipIf(importlib.util.find_spec('questdb.ingress') is not None, + 'questdb.ingress extension is built') + def test_sender_missing_extension_raises(self): + with self.assertRaises(ImportError): + self._qdb().sender() + + +class TestConfigHelpers(unittest.TestCase): + def test_as_bool_variants(self): + from questdb.auth._discovery import _as_bool + for v in ('true', 'True', '1', 'yes', 'on', True, 1): + self.assertIs(_as_bool(v), True) + for v in ('false', '0', 'no', 'off', '', False, 0): + self.assertIs(_as_bool(v), False) + self.assertIsNone(_as_bool(None)) + self.assertIs(_as_bool(None, default=True), True) + + def test_resolve_endpoint_relative_path(self): + from questdb.auth._discovery import _resolve_endpoint + cfg = {'acl.oidc.host': 'idp.example.com', + 'acl.oidc.tls.enabled': True, 'acl.oidc.port': 443} + self.assertEqual(_resolve_endpoint('/as/token.oauth2', cfg), + 'https://idp.example.com:443/as/token.oauth2') + self.assertEqual(_resolve_endpoint('https://idp/x', cfg), + 'https://idp/x') # absolute is kept verbatim + + def test_settings_config_nesting(self): + from questdb.auth._discovery import settings_config + self.assertEqual(settings_config({'config': {'a': 1}}), {'a': 1}) + self.assertEqual(settings_config({'a': 1}), {'a': 1}) # flat fallback + + +class TestEndpointValidation(unittest.TestCase): + def setUp(self): + from questdb.auth._discovery import validate_endpoint_origins + self._validate = validate_endpoint_origins + + def test_default_port_equivalence_accepted(self): + # https default (443) vs explicit :443 normalize to the same origin. + self._validate('https://idp/token', 'https://idp:443/device') + + def test_ipv6_same_origin_accepted(self): + self._validate('https://[::1]/token', 'https://[::1]/device') + + def test_off_origin_device_rejected(self): + with self.assertRaises(OidcConfigError): + self._validate('https://idp/token', 'https://evil.example/device') + + def test_both_endpoints_off_issuer_rejected(self): + # Endpoints agree with each other but not with the pinned issuer: + # the issuer-pin loop must check both, not just their consistency. + with self.assertRaises(OidcConfigError): + self._validate('https://idp/token', 'https://idp/device', + issuer='https://other-issuer.example') + + def test_explicit_constructor_enforces_co_location(self): + with self.assertRaises(OidcConfigError): + OidcDeviceAuth( + client_id='questdb', + device_authorization_endpoint='https://idp.example.com/device', + token_endpoint='https://attacker.example/token', + renderer=Renderer()) + + +class TestCacheKey(unittest.TestCase): + def _auth(self, **kw): + opts = dict( + client_id='questdb', + device_authorization_endpoint='https://idp.example.com/device', + token_endpoint='https://idp.example.com/token', + scope='openid groups', groups_in_token=True, cache='memory', + renderer=Renderer()) + opts.update(kw) + return OidcDeviceAuth(**opts) + + def test_realm_path_distinguishes_key(self): + # Multi-tenant IdP: same host, different realm path -> distinct keys + # (the old origin-only key collided, leaking one realm's token). + a = self._auth( + token_endpoint='https://idp.example.com/realmA/token', + device_authorization_endpoint='https://idp.example.com/realmA/dev') + b = self._auth( + token_endpoint='https://idp.example.com/realmB/token', + device_authorization_endpoint='https://idp.example.com/realmB/dev') + self.assertNotEqual(a.cache_key, b.cache_key) + + def test_scope_order_does_not_change_key(self): + self.assertEqual( + self._auth(scope='openid groups').cache_key, + self._auth(scope='groups openid').cache_key) + + def test_audience_distinguishes_key(self): + self.assertNotEqual( + self._auth(audience='aud-1').cache_key, + self._auth(audience='aud-2').cache_key) + + def test_default_port_normalized(self): + self.assertEqual( + self._auth(token_endpoint='https://idp.example.com/token').cache_key, + self._auth( + token_endpoint='https://idp.example.com:443/token').cache_key) + + +class TestTransportSecurity(unittest.TestCase): + def test_require_secure_policy(self): + from questdb.auth._http import _require_secure + # https is always fine. + _require_secure('https://idp.example.com/x', insecure=False) + # loopback http never leaves the host -> always allowed. + _require_secure('http://127.0.0.1:9000/x', insecure=False) + _require_secure('http://localhost/x', insecure=False) + _require_secure('http://[::1]:8080/x', insecure=False) + # non-loopback http is refused unless insecure is explicitly set. + with self.assertRaises(OidcConfigError): + _require_secure('http://idp.example.com/x', insecure=False) + _require_secure('http://idp.example.com/x', insecure=True) + + def test_insecure_does_not_downgrade_idp(self): + # insecure=True must NOT permit plaintext to a non-loopback IdP: the + # device code / refresh token must never traverse the network in clear. + auth = OidcDeviceAuth( + client_id='questdb', + device_authorization_endpoint='http://idp.example.com/device', + token_endpoint='http://idp.example.com/token', + scope='openid', groups_in_token=False, cache='memory', + insecure=True, interactive=True, renderer=Renderer(), + _clock=FakeClock()) + with self.assertRaises(OidcConfigError): + auth.token() + + +class TestRendererSecurity(unittest.TestCase): + """The Jupyter prompt must never turn an IdP-supplied URL into a + clickable/executable link unless it uses an http(s) scheme.""" + + def test_safe_link_url_allowlist(self): + from questdb.auth._render import _safe_link_url + self.assertEqual(_safe_link_url('https://idp/x'), 'https://idp/x') + self.assertEqual(_safe_link_url('http://idp/x'), 'http://idp/x') + self.assertEqual(_safe_link_url('HTTPS://idp/x'), 'HTTPS://idp/x') + for bad in ('javascript:alert(1)', 'data:text/html,x', + 'vbscript:x', 'file:///etc/passwd', '', None): + self.assertIsNone(_safe_link_url(bad)) + + def test_render_link_inert_for_dangerous_scheme(self): + from questdb.auth._render import _render_link + safe = _render_link('https://idp/x') + self.assertIn(' Date: Mon, 15 Jun 2026 22:18:44 +0100 Subject: [PATCH 02/39] fix: pandas 3 string dtype in test_parquet_roundtrip The fastparquet -> pyarrow parquet roundtrip decays the categorical column to a plain string column. On pandas >= 3 that reads back as the new default string dtype (StringDtype(na_value=nan)) rather than object, so the hardcoded np.dtype('O') in fallback_exp_dtypes no longer matched and the assertion failed. Derive the expected dtype from pd.Series(['x']).dtype instead of hardcoding it: this is object on pandas < 3 and the new string dtype on pandas >= 3, matching exactly what pyarrow's read_parquet produces, so the test is version-agnostic. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/test_dataframe.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/test_dataframe.py b/test/test_dataframe.py index 0bde05cf..de58758f 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -1897,14 +1897,19 @@ def df_eq(exp_df, deser_df, exp_dtypes): self.assertTrue(exp_df.equals(deser_df)) # fastparquet doesn't roundtrip with pyarrow parquet properly. - # It decays categories to object and UInt8 to float64. + # It decays categories to plain strings and UInt8 to float64. # We need to set up special case expected results for that. + # The decayed string column comes back as whatever this pandas + # infers for a string column: object on pandas < 3, but the new + # default string dtype (StringDtype(na_value=nan)) on pandas >= 3. + # Derive it instead of hardcoding so the test is version-agnostic. + str_dtype = pd.Series(['x']).dtype fallback_exp_dtypes = [ - np.dtype('O'), + str_dtype, np.dtype('int16'), np.dtype('float64'), np.dtype('float64')] - fallback_df = df.astype({'s': 'object', 'b': 'float64'}) + fallback_df = df.astype({'s': str_dtype, 'b': 'float64'}) df_eq(df, pa2pa_df, exp_dtypes) if fp_wrote: From e394fbdf2e8e24254d48fbc0b6e8cb11e954b82a Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 15 Jun 2026 23:13:06 +0100 Subject: [PATCH 03/39] ci: keep 32-bit wheel tests on the pandas 2 / numpy 1 path Pandas 3 ships no 32-bit wheels. On win32 Python 3.11+ the pandas>=3 install was silently swallowed, fastparquet then pulled in a numpy-1-built pandas 2.0.3 alongside numpy 2, and importing pandas crashed at runtime. test.py tolerated the failed import and silently skipped every pandas test (skip count 39 vs 32), while the 64-bit-only import sanity check never fired to catch it. Gate should_use_pandas3() on a 64-bit interpreter so 32-bit targets stay on the consistent pandas 2 / numpy 1 stack and actually exercise the dataframe tests again. 64-bit targets keep testing pandas 3 unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- ci/pip_install_deps.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ci/pip_install_deps.py b/ci/pip_install_deps.py index d70b9761..e3ee25b4 100644 --- a/ci/pip_install_deps.py +++ b/ci/pip_install_deps.py @@ -77,12 +77,18 @@ def install_pandas3_and_numpy(): def should_use_pandas3(py_version=None): if py_version is None: py_version = sys.version_info[:2] - return py_version >= (3, 11) + # Pandas 3 ships no 32-bit wheels, so only take the pandas 3 / numpy 2 + # path on 64-bit interpreters. On 32-bit (e.g. win32) the pandas 3 install + # would be silently skipped, fastparquet would then drag in a numpy-1-built + # pandas 2.0.3 alongside numpy 2, and importing pandas would crash. + is_64bits = sys.maxsize > 2 ** 32 + return is_64bits and py_version >= (3, 11) def install_default_pandas_and_numpy(): - # Pandas 3 currently requires Python 3.11+, so keep 3.10 wheel tests on - # the pandas 2 / numpy 1.x-compatible path unless explicitly overridden. + # Pandas 3 requires Python 3.11+ and ships only 64-bit wheels, so keep + # 3.10 and all 32-bit wheel tests on the pandas 2 / numpy 1.x-compatible + # path unless explicitly overridden. if should_use_pandas3(): install_pandas3_and_numpy() else: From ae9217881a2fcce1579d1eccc01e27e0a494537b Mon Sep 17 00:00:00 2001 From: glasstiger Date: Tue, 16 Jun 2026 00:39:08 +0100 Subject: [PATCH 04/39] test: silence mock server tracebacks on Windows client disconnect The mock HTTP server only caught BrokenPipeError, but on Windows an abrupt client disconnect raises ConnectionAbortedError/ConnectionResetError -- siblings of BrokenPipeError under the common base ConnectionError, not subclasses of it. The timeout, min-throughput, and retry tests disconnect mid-request on purpose, so these slipped past the handler and the stdlib dumped tracebacks to stderr. The tests still passed, but the CI logs looked broken. Broaden the handler except clauses to ConnectionError, and override HTTPServer.handle_error to swallow connection errors that surface in the stdlib keep-alive readline of the next request line -- outside any request handler's try/except. Real errors are still reported. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/mock_server.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/test/mock_server.py b/test/mock_server.py index 6178a4f7..708be7f4 100644 --- a/test/mock_server.py +++ b/test/mock_server.py @@ -3,6 +3,7 @@ import select import re import http.server as hs +import sys import threading import time import struct @@ -121,6 +122,21 @@ def __exit__(self, _ex_type, _ex_value, _ex_tb): SETTINGS_WITH_PROTOCOL_VERSION_V1_V2_V3 = '{"config":{"release.type":"OSS","release.version":"[DEVELOPMENT]","line.proto.support.versions":[1,2,3],"ilp.proto.transports":["tcp","http"],"posthog.enabled":false,"posthog.api.key":null,"cairo.max.file.name.length":127},"preferences.version":0,"preferences":{}}' SETTINGS_WITHOUT_PROTOCOL_VERSION = '{ "release.type": "OSS", "release.version": "[DEVELOPMENT]", "acl.enabled": false, "posthog.enabled": false, "posthog.api.key": null }' +class _QuietHTTPServer(hs.HTTPServer): + """HTTPServer that stays quiet when a client disconnects abruptly. + + Several tests (e.g. the request-timeout and min-throughput cases) drop the + connection mid-request on purpose. The stdlib would otherwise print a + harmless but noisy traceback for the resulting connection error -- most + visibly on Windows, where the keep-alive read of the next request line + raises ConnectionResetError outside of any request handler's try/except. + """ + def handle_error(self, request, client_address): + if isinstance(sys.exc_info()[1], ConnectionError): + return + super().handle_error(request, client_address) + + class HttpServer: def __init__(self, settings=SETTINGS_WITH_PROTOCOL_VERSION_V1_V2_V3, delay_seconds=0): self.delay_seconds = delay_seconds @@ -162,7 +178,12 @@ def do_GET(self): else: self.send_error(404, "Endpoint not found") self.close_connection = False - except BrokenPipeError: + except ConnectionError: + # The client (sender under test) may disconnect mid-request, + # e.g. in the timeout / min-throughput tests. On Windows this + # surfaces as ConnectionAbortedError/ConnectionResetError + # rather than the BrokenPipeError seen on Unix; both derive + # from ConnectionError. pass def do_POST(self): @@ -187,7 +208,12 @@ def do_POST(self): if body: self.wfile.write(body) self.close_connection = False - except BrokenPipeError: + except ConnectionError: + # The client (sender under test) may disconnect mid-request, + # e.g. in the timeout / min-throughput tests. On Windows this + # surfaces as ConnectionAbortedError/ConnectionResetError + # rather than the BrokenPipeError seen on Unix; both derive + # from ConnectionError. pass return IlpHttpHandler @@ -195,7 +221,7 @@ def do_POST(self): def __enter__(self): self._stop_event = threading.Event() handler_class = self.create_handler() - self._http_server = hs.HTTPServer(('', 0), handler_class, bind_and_activate=True) + self._http_server = _QuietHTTPServer(('', 0), handler_class, bind_and_activate=True) self._http_server.timeout = 30 self._http_server_thread = threading.Thread(target=self._serve) self._http_server_thread.start() From 88308d5de98db7738e8522ba03c07d143cff9e34 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Wed, 17 Jun 2026 17:16:03 +0100 Subject: [PATCH 05/39] ci: skip readonly AZP_ENHANCED agent var in Windows wheel build The windows-2025 image injects a new readonly agent variable, AZP_ENHANCED_WORKER_CRASH_HANDLING. The Windows "Build wheels" step re-exports the vcvars environment via ##vso[task.setvariable ...], and attempting to set this readonly var made the agent emit an ##[error], marking the task failed even though all wheels built and tests passed. Add AZP_ENHANCED to the exclusion regex (prefix match also covers any future AZP_ENHANCED_* vars) in both windows_i686 and windows_x86_64. Co-Authored-By: Claude Opus 4.8 (1M context) --- ci/cibuildwheel.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/cibuildwheel.yaml b/ci/cibuildwheel.yaml index c0d31767..f6ca473f 100644 --- a/ci/cibuildwheel.yaml +++ b/ci/cibuildwheel.yaml @@ -107,7 +107,7 @@ stages: cmd /c "call `"$vsPath`" && set > env_vars.txt" Get-Content env_vars.txt | ForEach-Object { - if ($_ -match "^([^=]+?)=(.*)$" -and $matches[1] -notmatch '^(SYSTEM|AGENT|BUILD|RELEASE|VSTS|TASK|USE_|FAIL_|MSDEPLOY|AZP_75787|AZP_AGENT|AZP_ENABLE|AZURE_HTTP|COPYFILESOVERSSHV0|ENABLE_ISSUE_SOURCE_VALIDATION|MODIFY_NUMBER_OF_RETRIES_IN_ROBOCOPY|MSBUILDHELPERS_ENABLE_TELEMETRY|RETIRE_AZURERM_POWERSHELL_MODULE|ROSETTA2_WARNING|AZP_PS_ENABLE)') { + if ($_ -match "^([^=]+?)=(.*)$" -and $matches[1] -notmatch '^(SYSTEM|AGENT|BUILD|RELEASE|VSTS|TASK|USE_|FAIL_|MSDEPLOY|AZP_75787|AZP_AGENT|AZP_ENABLE|AZP_ENHANCED|AZURE_HTTP|COPYFILESOVERSSHV0|ENABLE_ISSUE_SOURCE_VALIDATION|MODIFY_NUMBER_OF_RETRIES_IN_ROBOCOPY|MSBUILDHELPERS_ENABLE_TELEMETRY|RETIRE_AZURERM_POWERSHELL_MODULE|ROSETTA2_WARNING|AZP_PS_ENABLE)') { [System.Environment]::SetEnvironmentVariable($matches[1], $matches[2], "Process") Write-Host "##vso[task.setvariable variable=$($matches[1])]$($matches[2])" } @@ -137,7 +137,7 @@ stages: cmd /c "call `"$vsPath`" && set > env_vars.txt" Get-Content env_vars.txt | ForEach-Object { - if ($_ -match "^([^=]+?)=(.*)$" -and $matches[1] -notmatch '^(SYSTEM|AGENT|BUILD|RELEASE|VSTS|TASK|USE_|FAIL_|MSDEPLOY|AZP_75787|AZP_AGENT|AZP_ENABLE|AZURE_HTTP|COPYFILESOVERSSHV0|ENABLE_ISSUE_SOURCE_VALIDATION|MODIFY_NUMBER_OF_RETRIES_IN_ROBOCOPY|MSBUILDHELPERS_ENABLE_TELEMETRY|RETIRE_AZURERM_POWERSHELL_MODULE|ROSETTA2_WARNING|AZP_PS_ENABLE)') { + if ($_ -match "^([^=]+?)=(.*)$" -and $matches[1] -notmatch '^(SYSTEM|AGENT|BUILD|RELEASE|VSTS|TASK|USE_|FAIL_|MSDEPLOY|AZP_75787|AZP_AGENT|AZP_ENABLE|AZP_ENHANCED|AZURE_HTTP|COPYFILESOVERSSHV0|ENABLE_ISSUE_SOURCE_VALIDATION|MODIFY_NUMBER_OF_RETRIES_IN_ROBOCOPY|MSBUILDHELPERS_ENABLE_TELEMETRY|RETIRE_AZURERM_POWERSHELL_MODULE|ROSETTA2_WARNING|AZP_PS_ENABLE)') { [System.Environment]::SetEnvironmentVariable($matches[1], $matches[2], "Process") Write-Host "##vso[task.setvariable variable=$($matches[1])]$($matches[2])" } From d5df18634965f9547be32e601d039c3c4392a495 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Wed, 17 Jun 2026 19:41:24 +0100 Subject: [PATCH 06/39] fix: harden OIDC device-flow auth; drop on-disk FileCache backend Review fixes on the OIDC device-flow auth module, plus a simplification that removes the file cache entirely. Hardening: - device-flow poll gates success on _has_required_token (id_token in groups mode, else access_token) instead of always access_token: a completed grant missing the required kind now fails once with a clear error instead of caching an unusable token or discarding a usable id_token. - QuestDB.sql() guards the 2xx path against non-JSON / non-dict bodies, raising OidcError instead of a raw JSONDecodeError / AttributeError. - discovery requires an explicit issuer= (or discovery_url=) before the IdP .well-known fallback; the discovery origin is never derived from a server-supplied token endpoint, so a tampered /settings can't redirect the device-code / refresh-token POSTs. - PG-wire / ILP adapters bracket IPv6 literals in the ILP addr= and raise a clear error on a host-less URL instead of passing None to the driver. - validate_endpoint_origins and cache-key normalization raise OidcConfigError (not a bare ValueError) on a malformed port, via a shared safe_urlparse helper. - the example imports questdb.ingress lazily, so it loads on the pure-Python path with no compiled extension. Simplification: - drop FileCache and its cross-process locking + at-rest refresh token; MemoryCache (process-global, survives notebook cell re-runs) is the only persistent backend, with NullCache for cache=None. This also removes the Windows msvcrt-lock no-op and corrupt-file edge cases entirely. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/auth.rst | 21 +-- examples/oidc_device_auth.py | 6 +- src/questdb/auth/__init__.py | 3 +- src/questdb/auth/_cache.py | 147 +------------------ src/questdb/auth/_device.py | 47 ++++++- src/questdb/auth/_discovery.py | 59 ++++---- src/questdb/auth/_http.py | 17 +++ src/questdb/auth/_questdb.py | 57 ++++++-- test/test.py | 1 - test/test_auth.py | 250 +++++++++++++++++++++++---------- 10 files changed, 335 insertions(+), 273 deletions(-) diff --git a/docs/auth.rst b/docs/auth.rst index 7e40ada5..d44e6b1e 100644 --- a/docs/auth.rst +++ b/docs/auth.rst @@ -103,9 +103,8 @@ order: ``acl.oidc.device.authorization.endpoint``. 2. If the device-authorization endpoint is not advertised, the helper falls back to the IdP discovery document - (``{issuer}/.well-known/openid-configuration``). The issuer is taken from an - explicit ``issuer=`` / ``discovery_url=`` argument, or derived from the token - endpoint's origin. + (``{issuer}/.well-known/openid-configuration``). This path **requires** an + explicit ``issuer=`` (or ``discovery_url=``) argument. Anything you pass explicitly overrides discovery. You can also skip discovery entirely: @@ -150,11 +149,12 @@ Cache backends (``cache=`` argument): * ``"memory"`` *(default)* — process-global, nothing written to disk. Re-running cells is silent; a kernel restart re-prompts once. -* ``"file"`` — ``~/.questdb/oidc-cache.json`` (mode ``600``). Survives kernel - restarts and is shared across kernels on the same host. **Security - trade-off:** the refresh token is stored at rest. * ``None`` — never persist; prompt every time. +Tokens are deliberately never written to disk: a kernel restart re-prompts +(an interactive sign-in is cheap relative to the risk of a refresh token +sitting in a plaintext file at rest). + Non-interactive contexts ------------------------- @@ -214,9 +214,12 @@ Security notes QuestDB ``/settings``. The helper requires both endpoints to share a single origin and rejects the configuration otherwise. Because ``/settings`` is authoritative-by-QuestDB, a compromised server could in principle point them - elsewhere; pass ``issuer=`` (or ``discovery_url=``) to **pin** the IdP so the - endpoints are verified to belong to it and credentials can't be redirected to - another host. + elsewhere; pass ``issuer=`` to **pin** the IdP so the endpoints are verified + to belong to it and credentials can't be redirected to another host. When the + server does not advertise the device-authorization endpoint (so it must be + discovered from the IdP), ``issuer=`` (or ``discovery_url=``) is **required** + for exactly this reason — the helper refuses to guess the discovery origin + from the server-supplied token endpoint. * Adapters avoid logging the token / PG DSN. Avoid logging them yourself. * Standard proxy / CA settings (``HTTPS_PROXY``, ``REQUESTS_CA_BUNDLE``, ``SSL_CERT_FILE``) are honoured; you can also pass ``ca_bundle=``. diff --git a/examples/oidc_device_auth.py b/examples/oidc_device_auth.py index 2d7d4535..1691659f 100644 --- a/examples/oidc_device_auth.py +++ b/examples/oidc_device_auth.py @@ -13,7 +13,6 @@ import sys from questdb.auth import connect, OidcDeviceAuth, OidcError -from questdb.ingress import TimestampNanos QUESTDB_URL = 'https://questdb.example.com:9000' @@ -32,6 +31,11 @@ def integrated(url: str = QUESTDB_URL): # Feed the same auto-refreshed token into your existing tooling: # engine = qdb.sqlalchemy_engine() # PG-wire, token as _sso password # with qdb.psycopg() as conn: ... # raw psycopg + # + # questdb.ingress is the compiled extension; import it lazily (only the + # ingestion path needs it) so this module also loads for the pure-Python + # bring_your_own_client() path, which needs no extension. + from questdb.ingress import TimestampNanos with qdb.sender() as sender: # ingestion (ILP over HTTP) sender.row( 'trades', diff --git a/src/questdb/auth/__init__.py b/src/questdb/auth/__init__.py index e3768bca..f6ba9d1f 100644 --- a/src/questdb/auth/__init__.py +++ b/src/questdb/auth/__init__.py @@ -60,7 +60,7 @@ from ._device import OidcDeviceAuth from ._discovery import OidcConfig -from ._cache import TokenCache, TokenSet, FileCache, MemoryCache, NullCache +from ._cache import TokenCache, TokenSet, MemoryCache, NullCache from ._errors import ( OidcError, OidcConfigError, @@ -80,7 +80,6 @@ 'TokenCache', 'TokenSet', 'MemoryCache', - 'FileCache', 'NullCache', 'OidcError', 'OidcConfigError', diff --git a/src/questdb/auth/_cache.py b/src/questdb/auth/_cache.py index 858e113e..be66ee9a 100644 --- a/src/questdb/auth/_cache.py +++ b/src/questdb/auth/_cache.py @@ -26,13 +26,8 @@ from __future__ import annotations -import contextlib -import json -import os -import pathlib -import tempfile import threading -from dataclasses import asdict, dataclass, replace +from dataclasses import dataclass, replace from typing import Dict, Optional, Union from ._errors import OidcConfigError @@ -67,14 +62,6 @@ def is_valid(self, now: float, skew: float = DEFAULT_SKEW_SECONDS) -> bool: skew = min(skew, lifetime / 2) return now < (self.expires_at - skew) - def to_dict(self) -> Dict[str, object]: - return asdict(self) - - @classmethod - def from_dict(cls, d: Dict[str, object]) -> 'TokenSet': - known = {f for f in cls.__dataclass_fields__} # noqa: C416 - return cls(**{k: v for k, v in d.items() if k in known}) - class TokenCache: """Interface for token caches.""" @@ -133,145 +120,17 @@ def clear(self, key: str) -> None: pass -# Cross-process file locking, used to serialize read-modify-write on the -# shared cache file. fcntl.flock (POSIX) also serializes across threads/ -# instances in one process (locks are per open file description). Where no OS -# primitive is available it degrades to a best-effort no-op; the atomic -# os.replace still guarantees readers never see a torn file. -try: - import fcntl - - def _lock_fd(fd: int) -> None: - fcntl.flock(fd, fcntl.LOCK_EX) - - def _unlock_fd(fd: int) -> None: - fcntl.flock(fd, fcntl.LOCK_UN) -except ImportError: # pragma: no cover - non-POSIX (e.g. Windows) - try: - import msvcrt - - def _lock_fd(fd: int) -> None: - try: - msvcrt.locking(fd, msvcrt.LK_LOCK, 1) - except OSError: - pass - - def _unlock_fd(fd: int) -> None: - try: - msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) - except OSError: - pass - except ImportError: # pragma: no cover - def _lock_fd(fd: int) -> None: - pass - - def _unlock_fd(fd: int) -> None: - pass - - -@contextlib.contextmanager -def _interprocess_lock(lock_path: pathlib.Path): - """Best-effort exclusive lock via a sidecar lock file.""" - fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o600) - try: - _lock_fd(fd) - try: - yield - finally: - _unlock_fd(fd) - finally: - os.close(fd) - - -class FileCache(TokenCache): - """ - Opt-in on-disk cache at ``~/.questdb/oidc-cache.json`` (mode ``600``). - - Survives kernel restarts and is shared across kernels on the same host. - Security trade-off: a refresh token is stored at rest. The file is created - owner-only (``0600``) from the start via an atomic temp-file replace, and a - sidecar lock file serializes concurrent read-modify-writes across kernels - so entries are not corrupted or lost. - """ - - def __init__(self, path: Optional[Union[str, os.PathLike]] = None): - if path is None: - path = pathlib.Path.home() / '.questdb' / 'oidc-cache.json' - self.path = pathlib.Path(path) - self._lock_path = self.path.with_name(self.path.name + '.lock') - - def _ensure_dir(self) -> None: - parent = self.path.parent - parent.mkdir(parents=True, exist_ok=True) - try: - os.chmod(parent, 0o700) - except OSError: - pass - - def _read_all(self) -> Dict[str, dict]: - try: - with open(self.path, 'r', encoding='utf-8') as f: - data = json.load(f) - if isinstance(data, dict): - return data - except (FileNotFoundError, ValueError, OSError): - pass - return {} - - def _write_all(self, data: Dict[str, dict]) -> None: - # Atomic, owner-only replace. mkstemp creates the file mode 0600 with a - # unique name, so concurrent writers never share a temp file and the - # refresh token is never group/world-readable, even briefly. - fd, tmp = tempfile.mkstemp( - dir=str(self.path.parent), prefix='.oidc-', suffix='.tmp') - try: - with os.fdopen(fd, 'w', encoding='utf-8') as f: - json.dump(data, f) - os.replace(tmp, self.path) - except BaseException: - with contextlib.suppress(OSError): - os.unlink(tmp) - raise - - def load(self, key: str) -> Optional[TokenSet]: - # Lock-free: the atomic replace guarantees a complete file is read. - entry = self._read_all().get(key) - if isinstance(entry, dict): - try: - return TokenSet.from_dict(entry) - except TypeError: - return None - return None - - def store(self, key: str, tokens: TokenSet) -> None: - self._ensure_dir() - with _interprocess_lock(self._lock_path): - data = self._read_all() - data[key] = tokens.to_dict() - self._write_all(data) - - def clear(self, key: str) -> None: - self._ensure_dir() - with _interprocess_lock(self._lock_path): - data = self._read_all() - if key in data: - del data[key] - self._write_all(data) - - _CacheSpec = Union[str, None, TokenCache] def make_cache(spec: _CacheSpec) -> TokenCache: - """Resolve a cache spec (``"memory"`` / ``"file"`` / ``None`` / instance).""" + """Resolve a cache spec (``"memory"`` / ``None`` / a TokenCache instance).""" if isinstance(spec, TokenCache): return spec if spec is None or spec == 'none': return NullCache() if spec == 'memory': return MemoryCache() - if spec == 'file': - return FileCache() raise OidcConfigError( f'Unknown cache backend {spec!r}; ' - "expected 'memory', 'file', None, or a TokenCache instance.") + "expected 'memory', None, or a TokenCache instance.") diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 45694c64..27711814 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -31,7 +31,6 @@ import json import threading import time -import urllib.parse import webbrowser from typing import Any, Dict, Optional @@ -45,7 +44,7 @@ OidcNetworkError, OidcTimeoutError, ) -from ._http import build_ssl_context, post_form +from ._http import build_ssl_context, post_form, safe_urlparse from ._render import ( Renderer, _safe_link_url, @@ -344,6 +343,24 @@ def _has_required_token(self, tokens: TokenSet) -> bool: return bool(tokens.id_token) return bool(tokens.access_token) + def _missing_required_token_error(self) -> OidcDeviceFlowError: + """ + Build the terminal error for a *completed* grant whose token response + omits the kind :meth:`_select` needs (the ``id_token`` in groups mode, + else the ``access_token``). Mirrors :meth:`_select`'s diagnostics, but + is an :class:`OidcDeviceFlowError` — a flow failure — so the device-flow + poll can raise it without first caching an unusable response. + """ + if self.config.groups_in_token: + return OidcDeviceFlowError( + 'Device authorization completed but the IdP returned no ' + 'id_token, which this server requires (it expects groups ' + 'encoded in the token). Ensure the "openid" scope is requested ' + f'(current scope: {self.config.scope!r}).') + return OidcDeviceFlowError( + 'Device authorization completed but the IdP returned no ' + 'access_token.') + def _obtain_tokens(self) -> TokenSet: # Fast path: return a valid cached token without taking the lock, so a # caller with a usable token never blocks behind another thread's @@ -538,8 +555,24 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: 'client_id': self.config.client_id, }) - if status == 200 and body.get('access_token'): - return self._tokenset_from_response(body) + if status == 200: + # A 200 is the RFC 6749 §5.1 token response: the grant + # completed. Accept it only if it actually carries the kind + # _select will hand to QuestDB (the id_token in groups mode, + # else the access_token), using the same predicate as the cache + # gate and the post-refresh check so the three can't disagree. + tokens = self._tokenset_from_response(body) + if self._has_required_token(tokens): + return tokens + # The grant completed but the required kind is absent: a stable + # misconfiguration, not a transient poll state. Raise a clear + # terminal error here instead of caching an unusable token and + # silently re-running the whole interactive flow on every later + # token() call. + self._renderer.on_failure( + 'Sign-in failed: the identity provider did not return the ' + 'token this server requires.') + raise self._missing_required_token_error() error = body.get('error') if error == 'authorization_pending': @@ -602,12 +635,12 @@ def _normalize_url(url: str) -> str: # Full URL with scheme/host lower-cased and the default port dropped, but # the path kept (it distinguishes multi-tenant realms). Used for the cache # key so trivial spelling differences don't cause a spurious re-prompt. - parts = urllib.parse.urlparse(url) + parts, port = safe_urlparse(url) scheme = (parts.scheme or '').lower() host = (parts.hostname or '').lower() default_port = {'https': 443, 'http': 80}.get(scheme) - if parts.port and parts.port != default_port: - netloc = f'{host}:{parts.port}' + if port and port != default_port: + netloc = f'{host}:{port}' else: netloc = host query = f'?{parts.query}' if parts.query else '' diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index a1cccbe8..359391c4 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -37,12 +37,11 @@ from __future__ import annotations import ssl -import urllib.parse from dataclasses import dataclass from typing import Any, Dict, Optional from ._errors import OidcConfigError -from ._http import get_json +from ._http import get_json, safe_urlparse # QuestDB /settings keys (see EntPropServerConfiguration.exportConfiguration()). _K_ENABLED = 'acl.oidc.enabled' @@ -116,22 +115,15 @@ def fetch_settings( return settings_config(data) -def _origin(url: str) -> Optional[str]: - parts = urllib.parse.urlparse(url) - if parts.scheme and parts.netloc: - return f'{parts.scheme}://{parts.netloc}' - return None - - _DEFAULT_PORTS = {'https': 443, 'http': 80} def _normalized_origin(url: str) -> tuple: """(scheme, host, port) with default ports filled in, for comparison.""" - parts = urllib.parse.urlparse(url) + parts, explicit_port = safe_urlparse(url) scheme = (parts.scheme or '').lower() host = (parts.hostname or '').lower() - port = parts.port or _DEFAULT_PORTS.get(scheme) + port = explicit_port or _DEFAULT_PORTS.get(scheme) return (scheme, host, port) @@ -213,7 +205,6 @@ def discover_device_endpoint_from_idp( *, issuer: Optional[str], discovery_url: Optional[str], - token_endpoint: Optional[str], ctx: Optional[ssl.SSLContext] = None, insecure: bool = False, timeout: float = 30) -> Dict[str, Any]: @@ -221,20 +212,18 @@ def discover_device_endpoint_from_idp( Fetch the IdP ``.well-known/openid-configuration`` and return it. The discovery URL is taken from ``discovery_url``, else built from - ``issuer``, else (best effort) from the origin of ``token_endpoint``. + ``issuer``. One of the two is required: the discovery origin is **never** + derived from a QuestDB-advertised endpoint, because that would let a + tampered ``/settings`` choose where the device code and refresh token are + sent (the resolved issuer and endpoints would then all share the attacker's + origin and pass the co-location / issuer-pin checks trivially). """ - url = discovery_url - if not url and issuer: - url = well_known_url(issuer) - if not url and token_endpoint: - origin = _origin(token_endpoint) - if origin: - url = well_known_url(origin) + url = discovery_url or (well_known_url(issuer) if issuer else None) if not url: raise OidcConfigError( - 'Cannot discover the IdP device-authorization endpoint: no ' - 'issuer / discovery_url given and none could be derived. Pass ' - 'issuer=... or device_authorization_endpoint=... explicitly.') + 'Cannot discover the IdP device-authorization endpoint: no issuer ' + 'or discovery_url was given. Pass issuer=... (or ' + 'device_authorization_endpoint=... to skip discovery).') return get_json(url, ctx=ctx, insecure=insecure, timeout=timeout) @@ -296,10 +285,30 @@ def resolve_config( # endpoint (and/or the token endpoint). This contacts the IdP, so it is # held to https/loopback (insecure=False) regardless of the QuestDB flag. if not device_authorization_endpoint or not token_endpoint: + # Require a caller-supplied trust anchor before contacting the IdP for + # discovery. Without issuer= / discovery_url=, the discovery target + # would have to be guessed from the token endpoint that /settings + # supplied; a tampered or MITM'd /settings (reachable in cleartext when + # QuestDB is http:// with insecure=True) could then steer discovery — + # and so the device-code and refresh-token POSTs — to an attacker + # origin, with the co-location and issuer-pin checks passing trivially + # because every value shares that one origin. issuer= is out-of-band, + # so the server cannot forge it. + if not issuer and not discovery_url: + raise OidcConfigError( + 'QuestDB did not advertise the OIDC device-authorization ' + 'endpoint (and/or the token endpoint), so it must be ' + 'discovered from the identity provider, but the IdP is not ' + 'pinned. Pass issuer="https://your-idp" (its origin) so a ' + 'tampered or intercepted /settings response cannot redirect ' + 'the device-code and refresh-token requests to an attacker. ' + 'Alternatively pass the endpoint(s) explicitly ' + '(device_authorization_endpoint=..., token_endpoint=...) to ' + 'skip discovery, or discovery_url=... to pin the discovery ' + 'document.') doc = discover_device_endpoint_from_idp( issuer=issuer, discovery_url=discovery_url, - token_endpoint=token_endpoint, ctx=ctx, insecure=False, - timeout=timeout) + ctx=ctx, insecure=False, timeout=timeout) device_authorization_endpoint = ( device_authorization_endpoint or doc.get('device_authorization_endpoint')) diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py index fc5b158b..a845b749 100644 --- a/src/questdb/auth/_http.py +++ b/src/questdb/auth/_http.py @@ -91,6 +91,23 @@ def ok(self) -> bool: return 200 <= self.status < 300 +def safe_urlparse(url: str) -> tuple: + """ + ``urllib.parse.urlparse(url)`` paired with its port, but with a typed error. + + ``ParseResult.port`` raises a bare ``ValueError`` for a non-integer port + (e.g. ``https://idp:notaport``); re-raise it as :class:`OidcConfigError` so + a malformed endpoint URL stays within the package's error contract instead + of escaping as a raw ``ValueError``. Returns ``(parts, port)``. + """ + parts = urllib.parse.urlparse(url) + try: + return parts, parts.port + except ValueError as e: + raise OidcConfigError( + f'Malformed endpoint URL {url!r}: invalid port.') from e + + def _is_loopback(host: Optional[str]) -> bool: # Traffic to a loopback address never leaves the host, so plaintext http # carries no network interception risk and is always permitted. diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 2b95d75b..ac9aa45b 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -30,7 +30,7 @@ from typing import Any, Dict, Optional from ._device import OidcDeviceAuth -from ._errors import OidcAuthError, OidcError +from ._errors import OidcAuthError, OidcConfigError, OidcError from ._http import request _DEFAULT_PG_PORT = 8812 @@ -159,12 +159,51 @@ def sql(self, query: str, *, limit: Optional[str] = None, pass raise OidcError( f'QuestDB query failed (HTTP {resp.status}): {detail}') - return _exec_json_to_df(resp.json(), pandas) + try: + data = resp.json() + except (ValueError, UnicodeDecodeError): + # A 2xx body that isn't JSON (e.g. an HTML error/login page from a + # reverse proxy or captive portal) must surface as a clean + # OidcError, not a raw JSONDecodeError. Mirrors the error path and + # post_form(). + raise OidcError( + 'QuestDB returned a non-JSON success response from /exec: ' + f'{resp.text()[:300]}') + if not isinstance(data, dict): + # Valid JSON but not an object (e.g. a bare list) would make + # _exec_json_to_df fail with AttributeError on .get(); reject it. + raise OidcError( + 'QuestDB /exec returned JSON that is not an object ' + f'(got {type(data).__name__}); cannot build a DataFrame.') + return _exec_json_to_df(data, pandas) # -- connection adapters ------------------------------------------------ - def _host(self) -> Optional[str]: - return self._parts.hostname + def _require_host(self, host: Optional[str] = None) -> str: + """ + Resolve the PG-wire / ILP host: an explicit ``host`` override, else the + host from the QuestDB URL. Raises when neither yields one (e.g. a URL + with no authority such as ``"localhost"`` or ``"questdb:9000"``) instead + of passing a bare ``None`` down to the driver. + + The returned host is *unbracketed* — psycopg and SQLAlchemy take the + address and port as separate arguments. :meth:`_ilp_addr` adds the + brackets an IPv6 literal needs in the ILP ``addr=host:port`` form. + """ + resolved = host or self._parts.hostname + if not resolved: + raise OidcConfigError( + f'The QuestDB URL {self.url!r} has no host. Use a URL with an ' + 'explicit host (e.g. "https://questdb.example.com:9000"), or ' + 'pass host=... to the adapter.') + return resolved + + @staticmethod + def _ilp_addr(host: str, port: int) -> str: + # Bracket an IPv6 literal so the ILP conf parser reads host:port + # unambiguously; hostnames and IPv4 addresses never contain ':'. + bracketed = f'[{host}]' if ':' in host else host + return f'{bracketed}:{port}' def sqlalchemy_engine( self, @@ -200,7 +239,7 @@ def sqlalchemy_engine( url = URL.create( drivername=drivername, username='_sso', - host=host or self._host(), + host=self._require_host(host), port=pg_port, database=database) engine = create_engine(url, **engine_kwargs) @@ -229,7 +268,7 @@ def psycopg( """ mod = _pg_module() return mod.connect( - host=host or self._host(), + host=self._require_host(host), port=pg_port, dbname=database, user='_sso', @@ -256,7 +295,8 @@ def sender(self, *, port: Optional[int] = None, scheme = 'https' if self._parts.scheme == 'https' else 'http' resolved_port = port or self._parts.port or ( 443 if scheme == 'https' else 9000) - conf = f'{scheme}::addr={self._host()}:{resolved_port};' + conf = (f'{scheme}::addr=' + f'{self._ilp_addr(self._require_host(), resolved_port)};') return Sender.from_conf(conf, token=self.auth.token(), **sender_kwargs) @@ -288,8 +328,7 @@ def connect( :param flow: ``"auto"`` (default), ``"device"`` or ``"loopback"``. Today ``"auto"`` always resolves to the device flow (works on local and remote kernels); ``"loopback"`` is reserved for a future release. - :param cache: Token cache backend: ``"memory"`` (default), ``"file"`` or - ``None``. + :param cache: Token cache backend: ``"memory"`` (default) or ``None``. :param insecure: Allow plaintext ``http://`` URLs (development only). :param eager: If ``True`` (default), sign in immediately; otherwise defer until the first call that needs a token. diff --git a/test/test.py b/test/test.py index 6fdd418f..d6c7808a 100755 --- a/test/test.py +++ b/test/test.py @@ -39,7 +39,6 @@ TestDeviceFlow, TestNonInteractive, TestRefresh, - TestFileCache, TestDiscovery, TestRestAdapter, TestAdapters, diff --git a/test/test_auth.py b/test/test_auth.py index 427ccb57..6a4ed951 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -40,7 +40,6 @@ import json import os import sys -import tempfile import threading import types import unittest @@ -64,7 +63,7 @@ OidcNetworkError, TokenSet, ) -from questdb.auth._cache import FileCache, MemoryCache, _MEMORY_STORE # noqa: E402 +from questdb.auth._cache import MemoryCache, _MEMORY_STORE # noqa: E402 from questdb.auth._render import Renderer # noqa: E402 try: @@ -72,12 +71,6 @@ except ImportError: pd = None -try: - import fcntl as _fcntl # noqa: F401 - _HAS_FCNTL = True -except ImportError: - _HAS_FCNTL = False - _HAS_PG_DRIVER = ( importlib.util.find_spec('psycopg') is not None or importlib.util.find_spec('psycopg2') is not None) @@ -148,6 +141,7 @@ def __init__(self): self.expected_bearer = None # for /exec auth check self.exec_response = None self.exec_status = 200 + self.exec_raw = None # (status, content_type, bytes) override # Recording. self.device_requests = 0 self.token_requests = [] @@ -192,6 +186,14 @@ def do_GET(self): self._send_json(401, {'error': 'unauthorized'}) return self.state.exec_requests.append(self.path) + if self.state.exec_raw is not None: + status, ctype, raw = self.state.exec_raw + self.send_response(status) + self.send_header('Content-Type', ctype) + self.send_header('Content-Length', str(len(raw))) + self.end_headers() + self.wfile.write(raw) + return self._send_json(self.state.exec_status, self.state.exec_response or { 'columns': [ {'name': 'ts', 'type': 'TIMESTAMP'}, @@ -366,13 +368,28 @@ def test_token_caches_in_memory_across_instances(self): self.make_auth().token() self.assertEqual(self.state.device_requests, 1) - def test_missing_id_token_raises_config_error(self): + def test_groups_mode_missing_id_token_fails_without_caching(self): + # groups_in_token=True but the completed grant carries only an + # access_token: the poll must reject it as a terminal flow error and + # NOT cache it (otherwise every later token() re-runs the whole + # interactive flow). See M1. self.state.token_script = [(200, { 'access_token': ACCESS_TOKEN, 'token_type': 'Bearer', 'expires_in': 3600})] # no id_token auth = self.make_auth(groups_in_token=True) - with self.assertRaises(OidcConfigError): + with self.assertRaises(OidcDeviceFlowError): auth.token() + self.assertIsNone(auth._tokens) # nothing was cached + + def test_groups_mode_accepts_id_token_without_access_token(self): + # A completed grant that returns only an id_token (no access_token) is + # usable in groups mode and must be returned, not discarded as it was + # when success gated on access_token. See M1. + self.state.token_script = [(200, { + 'id_token': ID_TOKEN, 'token_type': 'Bearer', + 'expires_in': 3600})] # no access_token + auth = self.make_auth(groups_in_token=True) + self.assertEqual(auth.token(), ID_TOKEN) def test_200_without_access_token_is_not_success(self): # A 200 with no access_token must not be treated as a token. @@ -551,68 +568,6 @@ def test_refresh_network_error_propagates_without_reprompt(self): self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') -class TestFileCache(AuthTestBase): - def test_file_cache_works_without_os_lock(self): - # Exercise the no-fcntl/no-msvcrt fallback: with the lock primitives - # no-op'd, the atomic temp-file replace must still keep every entry. - import questdb.auth._cache as cache_mod - tmp = tempfile.mkdtemp() - path = os.path.join(tmp, 'cache.json') - with mock.patch.object(cache_mod, '_lock_fd', lambda fd: None), \ - mock.patch.object(cache_mod, '_unlock_fd', lambda fd: None): - cache = FileCache(path) - cache.store('k1', TokenSet(access_token='a1', expires_at=1.0)) - cache.store('k2', TokenSet(access_token='a2', expires_at=1.0)) - self.assertEqual(cache.load('k1').access_token, 'a1') - self.assertEqual(cache.load('k2').access_token, 'a2') - - def test_file_cache_survives_new_instance(self): - tmp = tempfile.mkdtemp() - path = os.path.join(tmp, 'cache.json') - cache1 = FileCache(path) - self.make_auth(cache=cache1).token() - self.assertEqual(self.state.device_requests, 1) - # New process simulation: fresh memory, load from file. - _MEMORY_STORE.clear() - cache2 = FileCache(path) - token = self.make_auth(cache=cache2).token() - self.assertEqual(token, ID_TOKEN) - self.assertEqual(self.state.device_requests, 1) # no re-prompt - # File is mode 600 where supported. - if os.name == 'posix': - self.assertEqual(os.stat(path).st_mode & 0o777, 0o600) - - @unittest.skipUnless( - _HAS_FCNTL, 'cross-process file lock requires fcntl (POSIX)') - def test_concurrent_writes_preserve_all_entries(self): - # 20 writers (distinct instances, same file, distinct keys) racing: - # the sidecar lock + atomic unique-temp replace must keep every entry - # and never corrupt the file or leave a temp behind. - tmp = tempfile.mkdtemp() - path = os.path.join(tmp, 'cache.json') - - def writer(i): - FileCache(path).store( - f'key-{i}', - TokenSet(access_token=f'a{i}', id_token=f'id{i}', - refresh_token=f'r{i}', expires_at=1.0)) - - threads = [threading.Thread(target=writer, args=(i,)) - for i in range(20)] - for t in threads: - t.start() - for t in threads: - t.join(10) - - final = FileCache(path) - for i in range(20): - ts = final.load(f'key-{i}') - self.assertIsNotNone(ts, f'lost entry key-{i}') - self.assertEqual(ts.access_token, f'a{i}') - leftovers = [n for n in os.listdir(tmp) if n.endswith('.tmp')] - self.assertEqual(leftovers, [], f'temp files left behind: {leftovers}') - - class TestDiscovery(AuthTestBase): def test_from_questdb_reads_settings(self): self.state.settings = {'config': { @@ -633,7 +588,8 @@ def test_from_questdb_reads_settings(self): self.assertEqual(auth.token(), ID_TOKEN) def test_well_known_fallback_for_device_endpoint(self): - # Settings advertise OIDC + token endpoint but NOT the device endpoint. + # Settings advertise OIDC + token endpoint but NOT the device endpoint; + # issuer= is pinned, so the IdP .well-known fallback is allowed. self.state.settings = {'config': { 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', @@ -646,8 +602,47 @@ def test_well_known_fallback_for_device_endpoint(self): 'token_endpoint': self.base + '/token', 'device_authorization_endpoint': self.base + '/device', } - auth = OidcDeviceAuth.from_questdb(self.base, insecure=True, - renderer=Renderer()) + auth = OidcDeviceAuth.from_questdb(self.base, issuer=self.base, + insecure=True, renderer=Renderer()) + self.assertEqual(auth.config.device_authorization_endpoint, + self.base + '/device') + + def test_device_fallback_without_issuer_is_rejected(self): + # M4: QuestDB advertises the token endpoint but not the device + # endpoint, and no issuer is pinned. Discovery would otherwise be + # steered by the (possibly tampered) /settings response, so refuse and + # demand an out-of-band issuer pin — even though a usable .well-known + # is reachable here, it must NOT be fetched. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + }} + self.state.well_known = { + 'issuer': self.base, + 'token_endpoint': self.base + '/token', + 'device_authorization_endpoint': self.base + '/device', + } + with self.assertRaises(OidcConfigError) as cm: + OidcDeviceAuth.from_questdb(self.base, insecure=True) + self.assertIn('issuer', str(cm.exception)) + + def test_device_fallback_with_discovery_url_is_accepted(self): + # discovery_url= is an out-of-band pin too, accepted in lieu of issuer=. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + }} + self.state.well_known = { + 'issuer': self.base, + 'token_endpoint': self.base + '/token', + 'device_authorization_endpoint': self.base + '/device', + } + auth = OidcDeviceAuth.from_questdb( + self.base, + discovery_url=self.base + '/.well-known/openid-configuration', + insecure=True, renderer=Renderer()) self.assertEqual(auth.config.device_authorization_endpoint, self.base + '/device') @@ -657,6 +652,9 @@ def test_oidc_disabled_raises(self): OidcDeviceAuth.from_questdb(self.base, insecure=True) def test_missing_device_endpoint_raises(self): + # issuer= is pinned (so the fallback is allowed), but the IdP's + # discovery doc carries no device_authorization_endpoint: that is the + # error under test, not the missing-issuer guard above. self.state.settings = {'config': { 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', @@ -664,6 +662,21 @@ def test_missing_device_endpoint_raises(self): }} self.state.well_known = {'issuer': self.base, 'token_endpoint': self.base + '/token'} + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb(self.base, issuer=self.base, + insecure=True) + + def test_malformed_endpoint_port_raises_config_error(self): + # /settings advertising a non-integer port in an endpoint must raise + # OidcConfigError (the typed contract), not a bare ValueError that + # callers catching OidcError would miss. See M6. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': 'https://idp:notaport/token', + 'acl.oidc.device.authorization.endpoint': + 'https://idp:notaport/device', + }} with self.assertRaises(OidcConfigError): OidcDeviceAuth.from_questdb(self.base, insecure=True) @@ -780,6 +793,24 @@ def test_sql_malformed_shape_raises_oidc_error(self): with self.assertRaises(OidcError): qdb.sql('SELECT a, b FROM t') + def test_sql_non_json_2xx_raises_oidc_error(self): + # A 2xx body that isn't JSON (e.g. an HTML page from a reverse proxy) + # must raise a clean OidcError, not a raw JSONDecodeError. See M3. + qdb = self._connected() + self.state.exec_raw = (200, 'text/html', b'proxy') + with self.assertRaises(OidcError) as cm: + qdb.sql('SELECT 1') + self.assertNotIsInstance(cm.exception, OidcAuthError) + + def test_sql_non_dict_json_raises_oidc_error(self): + # A valid-JSON-but-not-an-object 2xx body (e.g. a bare list) must raise + # OidcError, not AttributeError from .get(). See M3. + qdb = self._connected() + self.state.exec_response = ['not', 'an', 'object'] + with self.assertRaises(OidcError) as cm: + qdb.sql('SELECT 1') + self.assertNotIsInstance(cm.exception, OidcAuthError) + class TestConcurrency(AuthTestBase): def test_valid_cached_token_does_not_block_during_signin(self): @@ -962,6 +993,61 @@ def create(**kw): self.assertEqual(cparams['password'], 'TKN') self.assertEqual(auth.calls - before, 2) + def test_sender_brackets_ipv6_addr(self): + # An IPv6 literal must be bracketed in the ILP addr=host:port conf, + # else "::1:9000" is ambiguous to the conf parser. See M5. + qdb = self._qdb('https://[::1]:9000') + captured = {} + fake = types.ModuleType('questdb.ingress') + + class Sender: + @staticmethod + def from_conf(conf, *, token=None, **kw): + captured['conf'] = conf + return 'S' + + fake.Sender = Sender + with mock.patch.dict(sys.modules, {'questdb.ingress': fake}): + qdb.sender() + self.assertEqual(captured['conf'], 'https::addr=[::1]:9000;') + + def test_psycopg_uses_bare_ipv6_host(self): + # psycopg takes host and port separately, so the IPv6 host is passed + # WITHOUT brackets (unlike the ILP addr= form). See M5. + qdb = self._qdb('http://[::1]:9000') + captured = {} + fake = types.ModuleType('psycopg') + + def connect(**kw): + captured.update(kw) + return 'CONN' + + fake.connect = connect + with mock.patch.dict(sys.modules, {'psycopg': fake}): + qdb.psycopg() + self.assertEqual(captured['host'], '::1') + + def test_require_host_rejects_hostless_url(self): + # A URL with no extractable host must raise, not pass None to a driver; + # an explicit host= override still resolves. See M5. + for bad in ('localhost', 'questdb:9000'): + with self.subTest(url=bad): + with self.assertRaises(OidcConfigError): + QuestDB(bad, _FakeAuth(), insecure=True)._require_host() + self.assertEqual( + QuestDB('localhost', _FakeAuth())._require_host('h.example'), + 'h.example') + + def test_sender_hostless_url_raises(self): + # The guard propagates through an adapter (not just the helper): + # sender() on a host-less URL raises OidcConfigError. See M5. + qdb = self._qdb('questdb:9000') + fake = types.ModuleType('questdb.ingress') + fake.Sender = object() # import must succeed so we reach the guard + with mock.patch.dict(sys.modules, {'questdb.ingress': fake}): + with self.assertRaises(OidcConfigError): + qdb.sender() + def test_sql_missing_pandas_raises(self): qdb = self._qdb() with mock.patch.dict(sys.modules, {'pandas': None}): @@ -1034,6 +1120,13 @@ def test_both_endpoints_off_issuer_rejected(self): self._validate('https://idp/token', 'https://idp/device', issuer='https://other-issuer.example') + def test_malformed_port_raises_config_error(self): + # A non-integer port must surface as OidcConfigError, not urllib's bare + # ValueError (which callers catching OidcError would miss). See M6. + with self.assertRaises(OidcConfigError): + self._validate('https://idp:notaport/token', + 'https://idp:notaport/device') + def test_explicit_constructor_enforces_co_location(self): with self.assertRaises(OidcConfigError): OidcDeviceAuth( @@ -1054,6 +1147,13 @@ def _auth(self, **kw): opts.update(kw) return OidcDeviceAuth(**opts) + def test_normalize_url_malformed_port_raises_config_error(self): + # cache_key normalization shares the same typed-port guard: a malformed + # port raises OidcConfigError, not a bare ValueError. See M6. + from questdb.auth._device import _normalize_url + with self.assertRaises(OidcConfigError): + _normalize_url('https://idp:notaport/token') + def test_realm_path_distinguishes_key(self): # Multi-tenant IdP: same host, different realm path -> distinct keys # (the old origin-only key collided, leaking one realm's token). From 5ef892bb45ef515aff7de52a9f25f1d4015713f8 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 01:00:03 +0100 Subject: [PATCH 07/39] ci: build questdb master's -SNAPSHOT java client via local-client profile The linux-qdb-master job's "Compile QuestDB master" step failed because questdb master depends on a -SNAPSHOT java-questdb-client that is not published to Maven Central. Add a detect-local-client step template (adapted from questdb/questdb's ci/templates/detect-local-client.yml) that reads questdb.client.version from the cloned core/pom.xml: for a -SNAPSHOT client it inits the java-questdb-client submodule and builds it via the `local-client` Maven profile; for a released client it resolves from Maven Central. The "Compile QuestDB master" step appends the resulting $(CLIENT_PROFILE). Co-Authored-By: Claude Opus 4.8 (1M context) --- ci/run_tests_pipeline.yaml | 9 ++++++- ci/templates/detect-local-client.yml | 36 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 ci/templates/detect-local-client.yml diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 80099fb9..4c444658 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -63,12 +63,19 @@ stages: git clone --depth 1 https://github.com/questdb/questdb.git displayName: git clone questdb master condition: eq(variables.vsQuestDbMaster, true) + # Decide whether to build java-questdb-client from the bundled + # submodule (-P local-client, for a -SNAPSHOT client not on Maven + # Central) or resolve it from Maven Central. Sets $(CLIENT_PROFILE). + - template: templates/detect-local-client.yml + parameters: + qdbRepoPath: questdb + condition: eq(variables.vsQuestDbMaster, true) - task: Maven@3 displayName: "Compile QuestDB master" inputs: mavenPOMFile: "questdb/pom.xml" jdkVersionOption: "1.17" - options: "-DskipTests -Pbuild-web-console" + options: "-DskipTests -Pbuild-web-console $(CLIENT_PROFILE)" condition: eq(variables.vsQuestDbMaster, true) - script: python3 proj.py test 1 displayName: "Test vs released" diff --git a/ci/templates/detect-local-client.yml b/ci/templates/detect-local-client.yml new file mode 100644 index 00000000..55c9a5fb --- /dev/null +++ b/ci/templates/detect-local-client.yml @@ -0,0 +1,36 @@ +# Adapted from questdb/questdb's ci/templates/detect-local-client.yml. +# +# Decide how a cloned QuestDB checkout resolves its java-questdb-client +# dependency: a -SNAPSHOT client version is not published to Maven Central, so +# build it from the bundled java-questdb-client submodule via the `local-client` +# profile; a released version is taken from Maven Central. Sets the +# CLIENT_PROFILE pipeline variable (``-P local-client`` or empty) for the +# following Maven build, and inits the submodule only when it is needed. +# +# Unlike the upstream template, QuestDB is cloned into a subdirectory here, so +# the repo path is a parameter; ``condition`` lets the caller gate this to the +# matrix leg that builds QuestDB master. +parameters: + - name: qdbRepoPath + type: string + default: questdb + - name: condition + type: string + default: succeeded() + +steps: + - bash: | + set -eu + pom="${{ parameters.qdbRepoPath }}/core/pom.xml" + CLIENT_VERSION=$(sed -n 's/.*\(.*\)<\/questdb.client.version>.*/\1/p' "$pom" | head -1) + echo "questdb.client.version=$CLIENT_VERSION" + if echo "$CLIENT_VERSION" | grep -q '\-SNAPSHOT$'; then + echo "SNAPSHOT client detected -> build it locally (local-client profile)" + git -C "${{ parameters.qdbRepoPath }}" submodule update --init java-questdb-client + echo "##vso[task.setvariable variable=CLIENT_PROFILE]-P local-client" + else + echo "Release client detected -> resolve from Maven Central" + echo "##vso[task.setvariable variable=CLIENT_PROFILE]" + fi + displayName: "Detect QuestDB local client profile" + condition: ${{ parameters.condition }} From ae8baa77deb577550a2cae0bc4f6da2fd5088e2f Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 02:47:46 +0100 Subject: [PATCH 08/39] ci: build/run questdb master on JDK 25 QuestDB master bumped its build to Java 25 (javac.target=25). Its maven-enforcer requireJavaVersion reads ${java.enforce.version}, which is only set by the JDK-activated 'java25+' profile ((24,)). Building the linux-qdb-master leg with JDK 17 left that property empty, so the enforcer failed with 'JDK version can't be empty' before compilation. Point the 'Compile QuestDB master' Maven task at $(JAVA_HOME_25_X64) via jdkVersionOption: path, and run 'Test vs master' on the same JDK 25 so the freshly compiled Java 25 bytecode can run. 'Test vs released' stays on JDK 17. Both ubuntu-latest and windows-2025 images preinstall Temurin 25 (JAVA_HOME_25_X64). Co-Authored-By: Claude Opus 4.8 (1M context) --- ci/run_tests_pipeline.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 4c444658..e8a69da0 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -74,7 +74,8 @@ stages: displayName: "Compile QuestDB master" inputs: mavenPOMFile: "questdb/pom.xml" - jdkVersionOption: "1.17" + jdkVersionOption: "path" + jdkDirectory: "$(JAVA_HOME_25_X64)" options: "-DskipTests -Pbuild-web-console $(CLIENT_PROFILE)" condition: eq(variables.vsQuestDbMaster, true) - script: python3 proj.py test 1 @@ -84,7 +85,7 @@ stages: - script: python3 proj.py test 1 displayName: "Test vs master" env: - JAVA_HOME: $(JAVA_HOME_17_X64) + JAVA_HOME: $(JAVA_HOME_25_X64) QDB_REPO_PATH: "./questdb" condition: eq(variables.vsQuestDbMaster, true) - job: TestsAgainstVariousNumpyVersion1x From a4b41c3fa4c66ab0aa3833a1192b01a53079dc0a Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 03:02:02 +0100 Subject: [PATCH 09/39] ci: invoke Maven directly to build questdb master on JDK 25 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Maven@3 task crashes while parsing JDK 25 ('Cannot read properties of null (reading 'major')') — its JDK selection tops out at 21 and its Node-side version detector returns null for 25. Replace the task with a bash step that exports JAVA_HOME=$(JAVA_HOME_25_X64) and runs mvn directly, mirroring the task defaults (questdb/pom.xml, goal 'package', same -DskipTests -Pbuild-web-console $(CLIENT_PROFILE) options). Co-Authored-By: Claude Opus 4.8 (1M context) --- ci/run_tests_pipeline.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index e8a69da0..c9dfff3f 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -70,13 +70,17 @@ stages: parameters: qdbRepoPath: questdb condition: eq(variables.vsQuestDbMaster, true) - - task: Maven@3 + # The Maven@3 task crashes parsing JDK 25 ("Cannot read properties of + # null (reading 'major')") since its JDK support tops out at 21, so + # invoke Maven directly on the preinstalled JDK 25 instead. Mirrors the + # task's defaults: POM questdb/pom.xml, goal "package". + - bash: | + set -eu + export JAVA_HOME="$(JAVA_HOME_25_X64)" + export PATH="$JAVA_HOME/bin:$PATH" + java -version + mvn -B -f questdb/pom.xml package -DskipTests -Pbuild-web-console $(CLIENT_PROFILE) displayName: "Compile QuestDB master" - inputs: - mavenPOMFile: "questdb/pom.xml" - jdkVersionOption: "path" - jdkDirectory: "$(JAVA_HOME_25_X64)" - options: "-DskipTests -Pbuild-web-console $(CLIENT_PROFILE)" condition: eq(variables.vsQuestDbMaster, true) - script: python3 proj.py test 1 displayName: "Test vs released" From 61abd4fc15ba6275c45af4a4fe782304621a731a Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 14:21:27 +0100 Subject: [PATCH 10/39] ci: pass JDK 25 module access flags to questdb master server QuestDB master runs as the io.questdb JPMS module and now uses jdk.internal.vm.ContinuationScope, so on JDK 25 the server dies at startup with IllegalAccessError (java.base does not export jdk.internal.vm to io.questdb), plus Unsafe/native-access warnings. The test fixture launches questdb.jar directly rather than via questdb.sh, so the access flags questdb.sh normally supplies are absent. Set JDK_JAVA_OPTIONS on the 'Test vs master' step with the exact module access flags from questdb.sh (all targeting io.questdb). Scoped to that step, so the JDK 17 'Test vs released' run is unaffected. Co-Authored-By: Claude Opus 4.8 (1M context) --- ci/run_tests_pipeline.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index c9dfff3f..3025ab1a 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -91,6 +91,18 @@ stages: env: JAVA_HOME: $(JAVA_HOME_25_X64) QDB_REPO_PATH: "./questdb" + # QuestDB master runs as the io.questdb JPMS module and needs these + # JDK 25 access flags (mirrors questdb.sh). The test fixture launches + # questdb.jar directly rather than via questdb.sh, so feed them to the + # java launcher through JDK_JAVA_OPTIONS. + JDK_JAVA_OPTIONS: >- + --sun-misc-unsafe-memory-access=allow + --enable-native-access=io.questdb + --add-opens=java.base/java.lang=io.questdb + --add-opens=java.base/java.lang.reflect=io.questdb + --add-opens=java.base/java.nio=io.questdb + --add-opens=java.base/java.time.zone=io.questdb + --add-exports=java.base/jdk.internal.vm=io.questdb condition: eq(variables.vsQuestDbMaster, true) - job: TestsAgainstVariousNumpyVersion1x pool: From 23fb823df08f3f42b174acf436415fb123286ff9 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 15:31:06 +0100 Subject: [PATCH 11/39] do not follow redirects --- src/questdb/auth/_http.py | 32 +++++++++++++++++++++++---- test/test_auth.py | 46 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py index a845b749..ec29a1e3 100644 --- a/src/questdb/auth/_http.py +++ b/src/questdb/auth/_http.py @@ -137,12 +137,36 @@ def _require_secure(url: str, insecure: bool) -> None: 'insecure=True only to permit plaintext to a non-loopback host.') +class _NoRedirect(urllib.request.HTTPRedirectHandler): + """Refuse to follow HTTP redirects. + + The discovery / device / token / ``/settings`` / ``/exec`` endpoints never + legitimately redirect. Auto-following a ``30x`` is unsafe here because only + the *original* URL is vetted: ``_require_secure`` and + ``validate_endpoint_origins`` never see the redirect target. urllib also + does not strip the ``Authorization`` header on a cross-origin redirect, so a + single ``302`` from ``/exec`` would re-send ``Authorization: Bearer + `` to an attacker-chosen host — including a downgrade to plaintext + ``http`` — leaking the QuestDB token off-origin. + + Returning ``None`` makes urllib stop following and surface the ``30x`` as an + ``HTTPError`` (which :func:`request` turns into a non-2xx + :class:`HttpResponse`), so callers see a clean failure instead of a + silently-followed redirect. + """ + + def redirect_request(self, *args, **kwargs): + return None + + def _opener(ctx: Optional[ssl.SSLContext]) -> urllib.request.OpenerDirector: # build_opener keeps the default ProxyHandler (which reads *_PROXY env - # vars), while letting us pin our own TLS context. - if ctx is None: - return urllib.request.build_opener() - return urllib.request.build_opener(urllib.request.HTTPSHandler(context=ctx)) + # vars), while letting us pin our own TLS context and forbid redirects + # (the credential/token endpoints never legitimately redirect). + handlers: list = [_NoRedirect()] + if ctx is not None: + handlers.append(urllib.request.HTTPSHandler(context=ctx)) + return urllib.request.build_opener(*handlers) def request( diff --git a/test/test_auth.py b/test/test_auth.py index 6a4ed951..9f55b669 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1209,6 +1209,52 @@ def test_insecure_does_not_downgrade_idp(self): with self.assertRaises(OidcConfigError): auth.token() + def test_redirects_are_not_followed(self): + # A 30x must NOT be followed: urllib would otherwise re-send the + # Authorization: Bearer header (and downgrade to plaintext http) to the + # redirect target, leaking the QuestDB token off-origin (only the + # original URL is vetted, never the redirect target). The redirect must + # surface as a non-2xx response, and the off-origin host must never be + # contacted. See C1. + from questdb.auth import _http + + seen = [] + + class _Redir(http.server.BaseHTTPRequestHandler): + def log_message(self, *a): + pass + + def do_GET(self): + seen.append((self.path, self.headers.get('Authorization'))) + if self.path == '/exec': + self.send_response(302) + self.send_header('Location', attacker + '/stolen') + self.end_headers() + else: + self.send_response(200) + self.send_header('Content-Length', '2') + self.end_headers() + self.wfile.write(b'{}') + + victim = http.server.HTTPServer(('127.0.0.1', 0), _Redir) + thief = http.server.HTTPServer(('127.0.0.1', 0), _Redir) + attacker = f'http://127.0.0.1:{thief.server_port}' + for srv in (victim, thief): + threading.Thread(target=srv.serve_forever, daemon=True).start() + try: + resp = _http.request( + 'GET', f'http://127.0.0.1:{victim.server_port}/exec', + headers={'Authorization': 'Bearer SECRET'}, timeout=5) + finally: + for srv in (victim, thief): + srv.shutdown() + srv.server_close() + + # The redirect surfaced as a non-2xx response, was not followed, and the + # off-origin target never saw the request (or the bearer token). + self.assertEqual(resp.status, 302) + self.assertEqual(seen, [('/exec', 'Bearer SECRET')]) + class TestRendererSecurity(unittest.TestCase): """The Jupyter prompt must never turn an IdP-supplied URL into a From c13cf698614770e9efed139b66d63aa503f76f1c Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 15:41:13 +0100 Subject: [PATCH 12/39] fix: require IdP pin for plaintext /settings When QuestDB is reached over plaintext http to a non-loopback host (only possible with insecure=True), its /settings response is MITM-able. The issuer-pin requirement previously fired only when an IdP endpoint was missing (the discovery path). A tampered /settings advertising BOTH the token and device-authorization endpoints at one attacker origin skipped that path: the co-location check passed trivially (same origin) and the issuer-pin check was vacuous (no issuer), so the device code and the long-lived refresh token were POSTed to the attacker. Require the same out-of-band pin (issuer= / discovery_url=) before trusting /settings-supplied credential endpoints fetched over such an untrusted channel. Endpoints the caller passed explicitly, and endpoints from an authenticated (https / loopback) /settings, are unaffected, so the https happy path and local-dev loopback are unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 49 ++++++++++++++++++++++- test/test.py | 1 + test/test_auth.py | 71 ++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 359391c4..ccd39fce 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -41,7 +41,7 @@ from typing import Any, Dict, Optional from ._errors import OidcConfigError -from ._http import get_json, safe_urlparse +from ._http import get_json, safe_urlparse, _is_loopback # QuestDB /settings keys (see EntPropServerConfiguration.exportConfiguration()). _K_ENABLED = 'acl.oidc.enabled' @@ -132,6 +132,19 @@ def _origin_str(url: str) -> str: return f'{scheme}://{host}:{port}' if port else f'{scheme}://{host}' +def _settings_channel_is_plaintext(questdb_url: str) -> bool: + """ + True if QuestDB ``/settings`` was fetched over plaintext http to a + non-loopback host — a channel a network MITM can tamper (only reachable + with ``insecure=True``; ``_require_secure`` rejects it otherwise). IdP + endpoints advertised by such an unauthenticated ``/settings`` response must + not be trusted to route credentials without an out-of-band pin. + """ + parts, _ = safe_urlparse(questdb_url) + return (parts.scheme or '').lower() == 'http' and not _is_loopback( + parts.hostname) + + def validate_endpoint_origins( token_endpoint: str, device_authorization_endpoint: str, @@ -272,6 +285,12 @@ def resolve_config( if audience is None: audience = cfg.get(_K_AUDIENCE) or None + # Track which credential endpoints the caller supplied directly. Those are + # trusted; endpoints learned from /settings are only as trustworthy as the + # channel that delivered them (see the insecure-channel guard below). + explicit_token_endpoint = token_endpoint is not None + explicit_device_endpoint = device_authorization_endpoint is not None + token_endpoint = ( token_endpoint or _resolve_endpoint(cfg.get(_K_TOKEN_ENDPOINT), cfg)) authorization_endpoint = ( @@ -281,6 +300,34 @@ def resolve_config( device_authorization_endpoint or _resolve_endpoint(cfg.get(_K_DEVICE_ENDPOINT), cfg)) + # When QuestDB itself was reached over plaintext http to a non-loopback host + # (only possible with insecure=True), its /settings response can be tampered + # in transit. Any IdP credential endpoint it advertises would then route the + # device code and long-lived refresh token to an attacker origin. The + # missing-endpoint discovery path below already demands an out-of-band pin, + # but when a tampered /settings advertises BOTH endpoints at one attacker + # origin that path is skipped, the co-location check passes trivially (they + # share that origin) and the issuer-pin check is vacuous (no issuer) — so + # nothing else catches it. Require the same out-of-band pin (issuer= / + # discovery_url=) before trusting /settings-supplied endpoints over such a + # channel. Endpoints the caller passed explicitly, and endpoints from an + # authenticated (https / loopback) /settings, are unaffected. + settings_supplied_credentials = ( + (token_endpoint and not explicit_token_endpoint) + or (device_authorization_endpoint and not explicit_device_endpoint)) + if (questdb_url and settings_supplied_credentials + and not issuer and not discovery_url + and _settings_channel_is_plaintext(questdb_url)): + raise OidcConfigError( + 'QuestDB was reached over plaintext http (insecure=True), so its ' + '/settings response — and the OIDC endpoints it advertises — can be ' + 'tampered in transit and used to redirect the device-code and ' + 'refresh-token requests to an attacker. Pin the identity provider ' + 'out-of-band with issuer="https://your-idp" (or discovery_url=...), ' + 'pass the endpoints explicitly (token_endpoint=..., ' + 'device_authorization_endpoint=...), or connect to QuestDB over ' + 'https so /settings is authenticated.') + # Fall back to IdP discovery when QuestDB doesn't advertise the device # endpoint (and/or the token endpoint). This contacts the IdP, so it is # held to https/loopback (insecure=False) regardless of the QuestDB flag. diff --git a/test/test.py b/test/test.py index d6c7808a..04a2dcd3 100755 --- a/test/test.py +++ b/test/test.py @@ -40,6 +40,7 @@ TestNonInteractive, TestRefresh, TestDiscovery, + TestInsecureSettingsGuard, TestRestAdapter, TestAdapters, TestConcurrency, diff --git a/test/test_auth.py b/test/test_auth.py index 9f55b669..d2fb3d99 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -728,6 +728,77 @@ def test_issuer_pin_accepts_matching_origin(self): self.base + '/device') +class TestInsecureSettingsGuard(unittest.TestCase): + """ + M1: a /settings response fetched over plaintext http to a non-loopback host + (only reachable with insecure=True) is MITM-able, so IdP endpoints it + advertises must not be trusted to route the device code / refresh token + without an out-of-band issuer/discovery_url pin — even when BOTH endpoints + are present (so the co-location check would otherwise pass trivially). + """ + + _TAMPERED = { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': 'https://evil.example.com/token', + 'acl.oidc.device.authorization.endpoint': + 'https://evil.example.com/device', + } + + def _resolve(self, settings, **kw): + # Stub the network: /settings returns the given (possibly tampered) map, + # and IdP discovery must never be contacted in these guard paths. + from questdb.auth import _discovery + with mock.patch.object(_discovery, 'fetch_settings', + return_value=settings), \ + mock.patch.object( + _discovery, 'discover_device_endpoint_from_idp', + side_effect=AssertionError('IdP discovery must not run')): + return _discovery.resolve_config(**kw) + + def test_both_endpoints_over_plaintext_without_pin_rejected(self): + # The M1 case: both endpoints present at one (attacker) origin, plaintext + # channel, no pin -> refuse, and never contact the IdP. + with self.assertRaises(OidcConfigError) as cm: + self._resolve(self._TAMPERED, + questdb_url='http://qdb.internal.example:9000', + insecure=True) + self.assertIn('issuer', str(cm.exception)) + + def test_plaintext_guard_does_not_fire_for_loopback(self): + # Loopback http never leaves the host, so /settings is not MITM-able; + # the guard must not fire (the common local-dev path). + cfg = self._resolve(self._TAMPERED, + questdb_url='http://127.0.0.1:9000', insecure=True) + self.assertEqual(cfg.token_endpoint, 'https://evil.example.com/token') + + def test_plaintext_guard_does_not_fire_over_https(self): + # Over https /settings is authenticated by TLS; the documented + # trust-the-server behavior is preserved (issuer= stays optional). + cfg = self._resolve(self._TAMPERED, + questdb_url='https://qdb.example.com:9000') + self.assertEqual(cfg.device_authorization_endpoint, + 'https://evil.example.com/device') + + def test_explicit_endpoints_over_plaintext_are_trusted(self): + # Endpoints the caller passed explicitly are not /settings-supplied, so + # the guard must not force a pin even over a plaintext channel. + cfg = self._resolve( + {'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb'}, + questdb_url='http://qdb.internal.example:9000', insecure=True, + token_endpoint='https://idp.example.com/token', + device_authorization_endpoint='https://idp.example.com/device') + self.assertEqual(cfg.token_endpoint, 'https://idp.example.com/token') + + def test_pin_satisfies_guard_over_plaintext(self): + # With an out-of-band issuer pin the guard is satisfied (the actual + # origin validation then happens in OidcDeviceAuth.__init__). + cfg = self._resolve(self._TAMPERED, + questdb_url='http://qdb.internal.example:9000', + insecure=True, issuer='https://evil.example.com') + self.assertEqual(cfg.token_endpoint, 'https://evil.example.com/token') + + @unittest.skipIf(pd is None, 'pandas not installed') class TestRestAdapter(AuthTestBase): def _connected(self): From bb9147c98faec39fb98b09cebc37aac450c513cc Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:14:02 +0100 Subject: [PATCH 13/39] fix: clamp device-flow poll timing fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The device-authorization response's expires_in / interval were trusted verbatim, so a hostile or buggy IdP could break or stall the poll loop — which runs under the acquisition lock, so a stall freezes every other thread needing a token on that instance: * expires_in <= 0 set the deadline to "now", timing the flow out before its first poll even though the user could still authorize; * an unbounded interval (or repeated slow_down) produced a single enormous sleep() holding the lock. Clamp both: expires_in <= 0 -> default, capped at a max lifetime; interval to [1s, 60s] (including after slow_down); and never sleep past the deadline. RFC-typical values (interval=5, expires_in=600) are unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 32 +++++++++++++++++++++---- test/test_auth.py | 48 +++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 27711814..f2797e86 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -61,6 +61,15 @@ # A non-positive expires_in is non-conformant; treat it as "unknown". _DEFAULT_EXPIRES_IN = 3600 +# Bounds for the device-authorization response's timing fields (RFC 8628). The +# device code is short-lived, so the IdP-supplied values are clamped: a hostile +# or buggy response must not be able to time the flow out before its first poll, +# nor pin the polling thread — which holds the acquisition lock — in one +# enormous sleep, nor keep the loop (and the lock) alive indefinitely. +_DEFAULT_DEVICE_CODE_LIFETIME = 600 # expires_in fallback (absent/invalid/<=0) +_MAX_DEVICE_CODE_LIFETIME = 1800 # cap on how long we keep polling +_MAX_POLL_INTERVAL = 60 # cap on the poll interval (incl. slow_down) + class _SystemClock: """Real time source; the default for :class:`OidcDeviceAuth`.""" @@ -526,13 +535,24 @@ def _request_device_code(self) -> Dict[str, Any]: def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: device_code = resp['device_code'] try: - interval = max(1, int(resp.get('interval', self._default_interval))) + interval = int(resp.get('interval', self._default_interval)) except (TypeError, ValueError): interval = self._default_interval + # At least 1s (RFC 8628 floor), and capped so a hostile/huge value can't + # pin the polling thread (which holds the acquisition lock) in one + # enormous sleep. + interval = min(_MAX_POLL_INTERVAL, max(1, interval)) try: - expires_in = int(resp.get('expires_in', 600)) + expires_in = int(resp.get('expires_in', _DEFAULT_DEVICE_CODE_LIFETIME)) except (TypeError, ValueError): - expires_in = 600 + expires_in = _DEFAULT_DEVICE_CODE_LIFETIME + # A non-positive lifetime would time the flow out before the first poll + # (the user has already been shown the code); treat it as unknown. Cap + # the upper end so a hostile expires_in can't keep the loop — and the + # lock — alive indefinitely. + if expires_in <= 0: + expires_in = _DEFAULT_DEVICE_CODE_LIFETIME + expires_in = min(expires_in, _MAX_DEVICE_CODE_LIFETIME) deadline = self._monotonic() + expires_in while True: @@ -545,7 +565,9 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: 'Run the sign-in again.', error='expired_token') self._renderer.on_waiting(remaining) - self._sleep(interval) + # Never sleep past the deadline (remaining > 0 here): a clamped + # interval still shouldn't overshoot a short-lived code. + self._sleep(min(interval, remaining)) status, body = self._idp_post( self.config.token_endpoint, @@ -578,7 +600,7 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: if error == 'authorization_pending': continue if error == 'slow_down': - interval += 5 + interval = min(_MAX_POLL_INTERVAL, interval + 5) continue if error == 'expired_token': self._renderer.on_failure( diff --git a/test/test_auth.py b/test/test_auth.py index d2fb3d99..ea0e4658 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -342,6 +342,54 @@ def test_timeout_when_never_authorized(self): with self.assertRaises(OidcTimeoutError): auth.token() + def test_nonpositive_expires_in_still_polls(self): + # A non-positive expires_in in the device-auth response must be treated + # as unknown, not as "already expired" — otherwise the flow times out + # before its first poll even though the user can still authorize. M2. + self.state.device_response = { + 'device_code': 'DEV-CODE', 'user_code': 'X', + 'verification_uri': 'https://idp/device', + 'expires_in': 0, 'interval': 5, + } + self.state.token_script = [(200, None)] # success on the first poll + auth = self.make_auth() + self.assertEqual(auth.token(), ID_TOKEN) + self.assertEqual(len(self.state.token_requests), 1) # it actually polled + + def test_oversized_interval_is_clamped(self): + # A hostile/huge interval must not pin the polling thread (which holds + # the acquisition lock) in one enormous sleep; the per-poll sleep is + # capped at _MAX_POLL_INTERVAL. M2. + from questdb.auth._device import _MAX_POLL_INTERVAL + self.state.device_response = { + 'device_code': 'DEV-CODE', 'user_code': 'X', + 'verification_uri': 'https://idp/device', + 'expires_in': 600, 'interval': 10 ** 9, + } + self.state.token_script = [(200, None)] + auth = self.make_auth() + auth.token() + self.assertTrue(self._clock.sleeps) + self.assertLessEqual(max(self._clock.sleeps), _MAX_POLL_INTERVAL) + + def test_oversized_expires_in_is_capped(self): + # A hostile expires_in must not keep the poll loop (and the lock) alive + # indefinitely; the lifetime is capped so a never-authorized flow still + # terminates promptly rather than looping millions of times. M2. + from questdb.auth._device import ( + _MAX_DEVICE_CODE_LIFETIME, _MAX_POLL_INTERVAL) + self.state.device_response = { + 'device_code': 'DEV-CODE', 'user_code': 'X', + 'verification_uri': 'https://idp/device', + 'expires_in': 10 ** 9, 'interval': 10 ** 9, # interval clamps too + } + self.state.token_script = [(400, {'error': 'authorization_pending'})] + auth = self.make_auth() + with self.assertRaises(OidcTimeoutError): + auth.token() + max_polls = _MAX_DEVICE_CODE_LIFETIME // _MAX_POLL_INTERVAL + 1 + self.assertLessEqual(len(self.state.token_requests), max_polls) + def test_access_denied_is_surfaced(self): self.state.token_script = [ (400, {'error': 'access_denied', From 0edf9dc6c45564eec6f6acbffaae737d08b40882 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:22:47 +0100 Subject: [PATCH 14/39] fix: map malformed inputs to typed OidcError Several malformed-input paths escaped the package's typed-error contract (callers catch OidcError) with a bare ValueError / AttributeError / http.client.InvalidURL: * a non-string OIDC endpoint in /settings -> AttributeError from .startswith(); now treated as absent so resolution raises a clear OidcConfigError; * a /exec "columns" entry that isn't an object -> AttributeError from .get(); now raises OidcError; * a malformed port in the QuestDB URL -> bare ValueError when an adapter read .port; QuestDB now validates it at construction via safe_urlparse; * the same malformed port reaching the /settings or discovery fetch -> http.client.InvalidURL; request() now wraps InvalidURL as OidcConfigError and any other HTTPException as OidcNetworkError, so the single HTTP choke point never leaks a raw http.client exception. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 6 ++++++ src/questdb/auth/_http.py | 8 +++++++- src/questdb/auth/_questdb.py | 18 +++++++++++++++--- test/test_auth.py | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index ccd39fce..6160ce22 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -197,6 +197,12 @@ def _resolve_endpoint(value: Optional[str], cfg: Dict[str, Any]) -> Optional[str """ if not value: return None + if not isinstance(value, str): + # A non-string endpoint from /settings (e.g. a JSON number) is + # malformed; treat it as absent so resolution falls through to a clear + # OidcConfigError (or the IdP-discovery fallback) instead of an + # AttributeError from .startswith() escaping the typed-error contract. + return None if value.startswith('http://') or value.startswith('https://'): return value if value.startswith('/'): diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py index ec29a1e3..e9f2aa59 100644 --- a/src/questdb/auth/_http.py +++ b/src/questdb/auth/_http.py @@ -38,6 +38,7 @@ from __future__ import annotations +import http.client import ipaddress import json import os @@ -221,7 +222,12 @@ def request( return HttpResponse(e.code, body, e.headers or {}) except urllib.error.URLError as e: raise OidcNetworkError(f'Failed to reach {url}: {e.reason}') from e - except (TimeoutError, OSError) as e: + except http.client.InvalidURL as e: + # A malformed URL (e.g. a non-integer port) can't be turned into a + # request; surface it as a config error rather than letting a raw + # http.client exception escape the package's typed-error contract. + raise OidcConfigError(f'Malformed URL {url!r}: {e}') from e + except (TimeoutError, OSError, http.client.HTTPException) as e: raise OidcNetworkError(f'Failed to reach {url}: {e}') from e diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index ac9aa45b..7ecfdcd5 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -31,7 +31,7 @@ from ._device import OidcDeviceAuth from ._errors import OidcAuthError, OidcConfigError, OidcError -from ._http import request +from ._http import request, safe_urlparse _DEFAULT_PG_PORT = 8812 _DEFAULT_DATABASE = 'qdb' @@ -60,6 +60,15 @@ def _import_pandas(): def _exec_json_to_df(data: Dict[str, Any], pandas): columns = data.get('columns') or [] + # /exec returns a list of {"name", "type"} column descriptors. A malformed + # response (a non-list, or entries that aren't objects) must surface as a + # clean OidcError, not an AttributeError from .get() escaping the package's + # typed-error contract. + if not isinstance(columns, list) or not all( + isinstance(c, dict) for c in columns): + raise OidcError( + 'QuestDB /exec returned a malformed "columns" field; ' + 'cannot build a DataFrame.') names = [c.get('name') for c in columns] dataset = data.get('dataset') if dataset is None: @@ -114,7 +123,10 @@ def __init__( self.auth = auth self._insecure = insecure self._ctx = auth._ctx - self._parts = urllib.parse.urlparse(self.url) + # safe_urlparse validates the port up-front, raising OidcConfigError + # (not a bare ValueError) for a malformed one, so the adapters that read + # the port stay within the package's typed-error contract. + self._parts, self._port = safe_urlparse(self.url) # -- token access ------------------------------------------------------- @@ -293,7 +305,7 @@ def sender(self, *, port: Optional[int] = None, '(`pip install questdb`).') from e scheme = 'https' if self._parts.scheme == 'https' else 'http' - resolved_port = port or self._parts.port or ( + resolved_port = port or self._port or ( 443 if scheme == 'https' else 9000) conf = (f'{scheme}::addr=' f'{self._ilp_addr(self._require_host(), resolved_port)};') diff --git a/test/test_auth.py b/test/test_auth.py index ea0e4658..ec88184a 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -930,6 +930,15 @@ def test_sql_non_dict_json_raises_oidc_error(self): qdb.sql('SELECT 1') self.assertNotIsInstance(cm.exception, OidcAuthError) + def test_sql_non_dict_columns_raises_oidc_error(self): + # A /exec body whose "columns" entries aren't objects must raise a clean + # OidcError, not an AttributeError from .get() on the column. See M3. + qdb = self._connected() + self.state.exec_response = {'columns': [None], 'dataset': [[1]]} + with self.assertRaises(OidcError) as cm: + qdb.sql('SELECT 1') + self.assertNotIsInstance(cm.exception, OidcAuthError) + class TestConcurrency(AuthTestBase): def test_valid_cached_token_does_not_block_during_signin(self): @@ -1157,6 +1166,13 @@ def test_require_host_rejects_hostless_url(self): QuestDB('localhost', _FakeAuth())._require_host('h.example'), 'h.example') + def test_malformed_port_url_raises_config_error(self): + # A QuestDB URL with a non-integer port must raise OidcConfigError at + # construction, not a bare ValueError when an adapter reads .port. M3. + with self.assertRaises(OidcConfigError): + QuestDB('https://questdb.example.com:notaport', _FakeAuth(), + insecure=True) + def test_sender_hostless_url_raises(self): # The guard propagates through an adapter (not just the helper): # sender() on a host-less URL raises OidcConfigError. See M5. @@ -1210,6 +1226,13 @@ def test_resolve_endpoint_relative_path(self): self.assertEqual(_resolve_endpoint('https://idp/x', cfg), 'https://idp/x') # absolute is kept verbatim + def test_resolve_endpoint_ignores_non_string(self): + # A non-string endpoint from /settings (e.g. a JSON number) must be + # treated as absent, not raise AttributeError from .startswith(). M3. + from questdb.auth._discovery import _resolve_endpoint + self.assertIsNone(_resolve_endpoint(8080, {})) + self.assertIsNone(_resolve_endpoint(True, {})) + def test_settings_config_nesting(self): from questdb.auth._discovery import settings_config self.assertEqual(settings_config({'config': {'a': 1}}), {'a': 1}) @@ -1374,6 +1397,15 @@ def do_GET(self): self.assertEqual(resp.status, 302) self.assertEqual(seen, [('/exec', 'Bearer SECRET')]) + def test_malformed_url_raises_config_error(self): + # A non-integer port must surface as OidcConfigError, not a raw + # http.client.InvalidURL escaping the typed-error contract — this is the + # path the QuestDB /settings / discovery fetches go through. See M3. + from questdb.auth._http import request + with self.assertRaises(OidcConfigError): + request('GET', 'https://questdb.example.com:notaport/settings', + timeout=5) + class TestRendererSecurity(unittest.TestCase): """The Jupyter prompt must never turn an IdP-supplied URL into a From 35c3fbb0eb50e29febc660bc047c683d8cf5e953 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:28:42 +0100 Subject: [PATCH 15/39] fix: sanitize device-flow terminal output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plain-text terminal prompt wrote untrusted device-authorization response fields — verification_uri, user_code, the IdP error_description and the JWT-derived identity — verbatim to the TTY. A hostile or MITM'd response could embed ANSI escape sequences (cursor moves, screen clears) to spoof the sign-in prompt or hide the real verification URL. Strip C0/C1 control characters (incl. ESC) from those untrusted strings in format_prompt() and TerminalRenderer.on_success/on_failure before they reach the stream. The Jupyter renderer already html-escapes its output, and the QR path encodes the URL as image data rather than terminal text, so neither needed changes. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_render.py | 30 +++++++++++++++++++++++++----- test/test_auth.py | 27 +++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py index 8a0fa9fb..86fb5716 100644 --- a/src/questdb/auth/_render.py +++ b/src/questdb/auth/_render.py @@ -34,6 +34,7 @@ from __future__ import annotations import html +import re import sys import urllib.parse from typing import Any, Dict, Optional, TextIO @@ -118,11 +119,30 @@ def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: f'rel="noopener noreferrer">{label}') +_CONTROL_CHARS = re.compile(r'[\x00-\x1f\x7f-\x9f]') + + +def _strip_control(text: Optional[str]) -> str: + """ + Strip C0/C1 control characters (incl. ESC) from an untrusted string before + it is written to a terminal. + + The verification URL, user code and IdP error strings come from the device- + authorization response (untrusted). Writing them verbatim to a TTY would let + a hostile or MITM'd response inject ANSI escape sequences — cursor moves, + screen clears — to spoof the prompt or hide the real sign-in URL. The + Jupyter renderer html-escapes its output; the plain-text path needs this. + """ + if not text: + return '' + return _CONTROL_CHARS.sub('', text) + + def format_prompt(resp: Dict[str, Any]) -> str: """Plain-text sign-in prompt (also used as the notebook fallback).""" - uri = _verification_uri(resp) - code = resp.get('user_code', '') - complete = _verification_uri_complete(resp) + uri = _strip_control(_verification_uri(resp)) + code = _strip_control(str(resp.get('user_code', ''))) + complete = _strip_control(_verification_uri_complete(resp)) lines = [ '🔐 Sign in to QuestDB', f' Open {uri} and enter code: {code}', @@ -184,7 +204,7 @@ def on_success(self, identity: Optional[str], expires_in: float) -> None: if self._countdown_active: self._write('\n') self._countdown_active = False - who = f' as {identity}' if identity else '' + who = f' as {_strip_control(identity)}' if identity else '' mins = max(1, int(round(expires_in / 60))) self._write(f'✅ Signed in{who} — token cached, expires in {mins} min\n') @@ -192,7 +212,7 @@ def on_failure(self, message: str) -> None: if self._countdown_active: self._write('\n') self._countdown_active = False - self._write(f'❌ {message}\n') + self._write(f'❌ {_strip_control(message)}\n') class JupyterRenderer(Renderer): diff --git a/test/test_auth.py b/test/test_auth.py index ec88184a..4404ec49 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1459,6 +1459,33 @@ def _display(self, html_str): # avoid importing IPython self.assertIn(' Date: Thu, 18 Jun 2026 16:28:48 +0100 Subject: [PATCH 16/39] docs: correct questdb.auth changelog and API reference * CHANGELOG: drop the stale "optional on-disk cache" claim (the FileCache backend was removed; tokens are never written to disk) and note the python_requires bump to 3.10. * docs/auth.rst: import TimestampNanos in the integrated-session snippet so it runs as written. * docs/api.rst: document TokenCache, TokenSet, MemoryCache and NullCache, which are exported in __all__ but were missing from the reference. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.rst | 8 +++++++- docs/api.rst | 20 ++++++++++++++++++++ docs/auth.rst | 2 ++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 557b131c..672455bb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -39,7 +39,8 @@ Highlights: * Auto-discovery of OIDC config from the QuestDB ``/settings`` endpoint, with a fallback to the IdP ``.well-known`` document. -* In-process token cache with silent refresh; optional on-disk cache. +* In-process token cache with silent refresh (tokens are never written to + disk). * Adapters for pandas (REST ``/exec``), SQLAlchemy, psycopg and the ingestion ``Sender``. * ``token()`` / ``headers()`` require no dependencies beyond the standard @@ -48,6 +49,11 @@ Highlights: See the :ref:`OIDC authentication guide ` for details. +Python Version Support +~~~~~~~~~~~~~~~~~~~~~~~~ + +* Raised the minimum supported Python version to 3.10. + 4.1.0 (2025-11-28) ------------------ diff --git a/docs/api.rst b/docs/api.rst index 6b428050..9ff4daf3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -90,6 +90,26 @@ See the :ref:`oidc_auth` guide for an overview. :undoc-members: :show-inheritance: +.. autoclass:: questdb.auth.TokenCache + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: questdb.auth.TokenSet + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: questdb.auth.MemoryCache + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: questdb.auth.NullCache + :members: + :undoc-members: + :show-inheritance: + .. autoexception:: questdb.auth.OidcError :show-inheritance: diff --git a/docs/auth.rst b/docs/auth.rst index d44e6b1e..d2eedfa1 100644 --- a/docs/auth.rst +++ b/docs/auth.rst @@ -71,6 +71,8 @@ paths. engine = qdb.sqlalchemy_engine() # PG-wire, token as _sso with qdb.psycopg() as conn: # raw psycopg ... + + from questdb.ingress import TimestampNanos # the compiled extension with qdb.sender() as sender: # ingestion (ILP/HTTP) sender.row("trades", columns={"price": 101.5}, at=TimestampNanos.now()) From e043561fc33a47ea7537dbfd680f9d07b64141eb Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:36:50 +0100 Subject: [PATCH 17/39] fix: make TokenSet immutable and keep tokens out of repr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lock-free fast path in OidcDeviceAuth reads a published TokenSet without holding a lock, which is only safe because its fields never change after construction — an invariant previously kept by convention alone. Mark TokenSet frozen so any future in-place mutation fails loudly instead of introducing a torn read, and convert the one such mutation (the refresh carry-forward in _refresh) to dataclasses.replace(). Also keep the access/id/refresh tokens out of repr() so a TokenSet that lands in a log line or traceback cannot leak credentials. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_cache.py | 21 +++++++++++++++------ src/questdb/auth/_device.py | 5 ++++- test/test_auth.py | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/questdb/auth/_cache.py b/src/questdb/auth/_cache.py index be66ee9a..796611c1 100644 --- a/src/questdb/auth/_cache.py +++ b/src/questdb/auth/_cache.py @@ -27,7 +27,7 @@ from __future__ import annotations import threading -from dataclasses import dataclass, replace +from dataclasses import dataclass, field, replace from typing import Dict, Optional, Union from ._errors import OidcConfigError @@ -36,13 +36,22 @@ DEFAULT_SKEW_SECONDS = 30 -@dataclass +@dataclass(frozen=True) class TokenSet: - """A set of tokens obtained from the IdP, plus their expiry.""" + """ + A set of tokens obtained from the IdP, plus their expiry. + + Immutable (``frozen``): the lock-free fast path in + :class:`~questdb.auth._device.OidcDeviceAuth` reads a published ``TokenSet`` + without holding a lock, which is only safe because its fields never change + after construction. Derive a modified copy with :func:`dataclasses.replace` + rather than mutating in place. The three secret fields are kept out of + ``repr`` so a token can't leak into a log line or traceback. + """ - access_token: Optional[str] = None - id_token: Optional[str] = None - refresh_token: Optional[str] = None + access_token: Optional[str] = field(default=None, repr=False) + id_token: Optional[str] = field(default=None, repr=False) + refresh_token: Optional[str] = field(default=None, repr=False) expires_at: float = 0.0 # epoch seconds; 0 == unknown token_type: str = 'Bearer' scope: Optional[str] = None diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index f2797e86..39fdcafb 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -32,6 +32,7 @@ import threading import time import webbrowser +from dataclasses import replace from typing import Any, Dict, Optional from ._cache import TokenSet, make_cache @@ -474,8 +475,10 @@ def _refresh(self, tokens: TokenSet) -> TokenSet: if status == 200: refreshed = self._tokenset_from_response(body) # Many IdPs do not rotate the refresh token; keep the old one. + # TokenSet is frozen, so derive a copy rather than mutating. if not refreshed.refresh_token: - refreshed.refresh_token = tokens.refresh_token + refreshed = replace( + refreshed, refresh_token=tokens.refresh_token) return refreshed raise OidcDeviceFlowError( f"Token refresh failed: {body.get('error', 'unknown error')}", diff --git a/test/test_auth.py b/test/test_auth.py index 4404ec49..fc45a629 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -502,11 +502,38 @@ def test_open_browser_rejects_dangerous_scheme(self): def test_memory_cache_returns_independent_copy(self): cache = MemoryCache() - cache.store('k', TokenSet(access_token='a', refresh_token='r', - expires_at=1.0)) - loaded = cache.load('k') - loaded.refresh_token = 'MUTATED' - self.assertEqual(cache.load('k').refresh_token, 'r') + stored = TokenSet(access_token='a', refresh_token='r', expires_at=1.0) + cache.store('k', stored) + # Each load is a distinct copy — never the object handed to store(), nor + # shared between loads — so a cached entry can't be aliased and reused. + first = cache.load('k') + second = cache.load('k') + self.assertIsNot(first, stored) + self.assertIsNot(first, second) + self.assertEqual(first.refresh_token, 'r') + + def test_tokenset_is_frozen(self): + # TokenSet is immutable: the lock-free fast path reads a published + # TokenSet without a lock, which is only safe if its fields never change + # after construction. Mutating one must raise, not silently succeed. + import dataclasses + t = TokenSet(access_token='a', refresh_token='r', expires_at=1.0) + with self.assertRaises(dataclasses.FrozenInstanceError): + t.refresh_token = 'MUTATED' + # Deriving a modified copy is the supported idiom. + t2 = dataclasses.replace(t, refresh_token='r2') + self.assertEqual(t.refresh_token, 'r') + self.assertEqual(t2.refresh_token, 'r2') + + def test_tokenset_repr_redacts_secrets(self): + # The access/id/refresh tokens must never appear in repr() — a TokenSet + # landing in a log line or traceback would otherwise leak credentials. + r = repr(TokenSet(access_token='SECRET-A', id_token='SECRET-I', + refresh_token='SECRET-R', scope='openid')) + self.assertNotIn('SECRET-A', r) + self.assertNotIn('SECRET-I', r) + self.assertNotIn('SECRET-R', r) + self.assertIn('openid', r) # non-secret metadata still shown class TestNonInteractive(AuthTestBase): From 4e629387af1dc3fabf42af19d251864e9e450092 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:45:47 +0100 Subject: [PATCH 18/39] style: sort questdb.auth __all__ to satisfy Ruff RUF022 Reorder the export list using Ruff's isort-style ordering (CamelCase names first, natural-sorted, then lowercase 'connect' last). The public API is unchanged; this only resolves the RUF022 lint warning. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/__init__.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/questdb/auth/__init__.py b/src/questdb/auth/__init__.py index f6ba9d1f..cfacb6e8 100644 --- a/src/questdb/auth/__init__.py +++ b/src/questdb/auth/__init__.py @@ -73,19 +73,19 @@ from ._questdb import QuestDB, connect __all__ = [ - 'connect', - 'QuestDB', - 'OidcDeviceAuth', - 'OidcConfig', - 'TokenCache', - 'TokenSet', 'MemoryCache', 'NullCache', - 'OidcError', + 'OidcAuthError', + 'OidcConfig', 'OidcConfigError', - 'OidcNetworkError', - 'OidcInteractionRequired', + 'OidcDeviceAuth', 'OidcDeviceFlowError', + 'OidcError', + 'OidcInteractionRequired', + 'OidcNetworkError', 'OidcTimeoutError', - 'OidcAuthError', + 'QuestDB', + 'TokenCache', + 'TokenSet', + 'connect', ] From 4a67cc57a3063cb8e936b59eb76fb18f2c466df5 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:48:47 +0100 Subject: [PATCH 19/39] docs: make review-pr level-0/1 Step 2.5 rules consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The level table said level 0 skips all of Step 2.5, but Step 2.5e is documented as running at every level — a self-contradictory default path. Clarify that levels 0 and 1 skip Steps 2.5a-d while still running Step 2.5e (build & binding profile), aligning the table with the 'runs at every level' rule. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/skills/review-pr/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/skills/review-pr/SKILL.md b/.claude/skills/review-pr/SKILL.md index 6b408b4a..01841f67 100644 --- a/.claude/skills/review-pr/SKILL.md +++ b/.claude/skills/review-pr/SKILL.md @@ -36,8 +36,8 @@ The level controls how much of the review below actually runs. Lower levels keep | Level | What runs | |-------|-----------| -| **0 (default)** | Steps 1, 2, 4. Skip Step 2.5. Skip Step 3 — no agent spawn; review the diff inline in the main loop, using Read/Grep on demand to resolve ambiguities. Skip Step 3b — verify each finding inline as you write it. Single-pass review covering correctness, Cython memory/refcount/GIL safety, C-ABI binding correctness, tests, and coding standards on the diff itself. | -| **1** | Adds Step 2.5a (semantic delta only — skip 2.5b/2.5c/2.5d). In Step 3, launch only Agent 1 (correctness), Agent 2 (Cython memory & refcount safety), and Agent 7 (tests) in parallel. Skip all other agents. Skip Step 3b — verify findings inline as you draft the report. | +| **0 (default)** | Steps 1, 2, 4. Skip Steps 2.5a-d, but still run Step 2.5e (build & binding profile — mandatory at every level). Skip Step 3 — no agent spawn; review the diff inline in the main loop, using Read/Grep on demand to resolve ambiguities. Skip Step 3b — verify each finding inline as you write it. Single-pass review covering correctness, Cython memory/refcount/GIL safety, C-ABI binding correctness, tests, and coding standards on the diff itself. | +| **1** | Adds Step 2.5a (semantic delta only — skip 2.5b/2.5c/2.5d; Step 2.5e still runs, as at every level). In Step 3, launch only Agent 1 (correctness), Agent 2 (Cython memory & refcount safety), and Agent 7 (tests) in parallel. Skip all other agents. Skip Step 3b — verify findings inline as you draft the report. | | **2** | Full Step 2.5, but in 2.5b restrict the callsite inventory to public Python symbols (exported in `__all__` / `ingress.pyi`) plus every `cdef`/`cpdef` function and every C-ABI symbol declared in the `.pxd` files. In Step 3, launch Agents 1-8. Skip Agent 9 (cross-context) and Agent 10 (adversarial fresh-context). Step 3b uses a single batched verification agent for all findings instead of one per finding. | | **3** | Every step below as written, all 10 agents, per-finding verification. The full mission-critical pass. | From 124d4c2afb0b771c12e447964ca4280e50b47c88 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 16:52:04 +0100 Subject: [PATCH 20/39] docs: exclude Agent 10 from review-pr Step 2.5 input contract The Step 3 preamble and Step 2.5 stated that every Step 3 agent receives the change-surface map and build/binding profile facts, but Agent 10 (the fresh-context adversarial agent) is documented as receiving only the diff and changed file names. Carve Agent 10 out of all three general-rule statements (Steps 2.5, 2.5e, and the Step 3 input contract) so they no longer contradict Agent 10's own section. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/skills/review-pr/SKILL.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude/skills/review-pr/SKILL.md b/.claude/skills/review-pr/SKILL.md index 01841f67..2ff79a80 100644 --- a/.claude/skills/review-pr/SKILL.md +++ b/.claude/skills/review-pr/SKILL.md @@ -68,7 +68,7 @@ Check: ## Step 2.5: Map the change surface -Before launching review agents, produce a structured change surface map. This step is mandatory and must use Grep/Glob — do not reason about callsites from memory. The output of this step is required input for every agent in Step 3. +Before launching review agents, produce a structured change surface map. This step is mandatory and must use Grep/Glob — do not reason about callsites from memory. The output of this step is required input for every Step 3 agent except Agent 10 (the fresh-context adversarial agent, which deliberately works from the diff alone). ### 2.5a Semantic delta per changed symbol @@ -152,11 +152,11 @@ Record, with file:line citations: - **Minimum numpy / Python versions** (`pyproject.toml`: `requires-python`, `numpy>=1.21.0`). Code that uses a newer numpy C-API or Python C-API symbol than the floor breaks the oldest supported build. State the floor. - **`abort()` is imported** (`from libc.stdlib cimport ... abort`). Any reachable `abort()` call, or any Rust panic that crosses the C ABI, terminates the host interpreter with no traceback. Flag the path. -A review without this section is incomplete. State the relevant facts (directives, exception default, submodule commit) in one line at the top of every Step 3 agent prompt so the agent reasons from the right premise. +A review without this section is incomplete. State the relevant facts (directives, exception default, submodule commit) in one line at the top of every Step 3 agent prompt (except Agent 10's, which works from the diff alone) so the agent reasons from the right premise. ## Step 3: Parallel review -Every agent receives: +Every agent except Agent 10 receives: 1. The PR diff 2. The full change surface map from Step 2.5 (semantic deltas, callsite inventory, implicit contracts, cross-context exposure list, build & binding profile facts) From a062a0afbd222fa9f14a98467df7f4c6f6a28323 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 18:08:36 +0100 Subject: [PATCH 21/39] fix: ignore user-writable /settings preferences QuestDB /settings nests server-authoritative values under a top-level "config" object alongside a user-writable "preferences" sibling (the web console persists UI prefs there via PUT /settings). Discovery now reads only "config" and refuses to fall back to the top level of a structured response, so a user who can write a preference cannot smuggle an acl.oidc.* key (e.g. a redirected token endpoint that points the device code / refresh token at an attacker) into the resolved OIDC config. Genuinely flat legacy responses are still tolerated at the top level. Ports the trust model from the Java client (java-questdb-client#52). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 36 ++++++++++++++------ test/test_auth.py | 62 ++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 10 deletions(-) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 6160ce22..50732e6c 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -89,17 +89,33 @@ def _as_bool(value: Any, default: Optional[bool] = None) -> Optional[bool]: def settings_config(settings: Any) -> Dict[str, Any]: """ - Return the flat config map from a ``/settings`` response. - - Modern servers nest values under a ``"config"`` object; older ones return - them at the top level. We tolerate both. + Return the trusted config map from a ``/settings`` response. + + Modern QuestDB nests the server-authoritative values under a top-level + ``"config"`` object, alongside a **user-writable** ``"preferences"`` sibling + (the web console persists UI preferences there via ``PUT /settings``). + Discovery must read only ``"config"`` and never the top level, so a user who + can write a preference cannot smuggle an ``acl.oidc.*`` key — e.g. a + redirected ``token.endpoint`` that points the device code / refresh token at + an attacker — into the resolved OIDC configuration. + + A genuinely flat, legacy ``/settings`` response (no ``"config"`` / + ``"preferences"`` split) is still tolerated at the top level. """ - if isinstance(settings, dict): - cfg = settings.get('config') - if isinstance(cfg, dict): - return cfg - return settings - return {} + if not isinstance(settings, dict): + return {} + cfg = settings.get('config') + if isinstance(cfg, dict): + return cfg + # A structured response carries the user-writable "preferences" sibling + # (and normally the "config" object). If either marker is present, the top + # level is NOT trusted config: read "config" or nothing — so user-writable + # preferences can never be mistaken for server-authoritative config, even + # when "config" is absent or malformed. + if 'config' in settings or 'preferences' in settings: + return {} + # Legacy flat response: no config/preferences split; tolerate top-level keys. + return settings def fetch_settings( diff --git a/test/test_auth.py b/test/test_auth.py index fc45a629..162a6ac5 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -662,6 +662,34 @@ def test_from_questdb_reads_settings(self): self.base + '/device') self.assertEqual(auth.token(), ID_TOKEN) + def test_user_writable_preferences_cannot_override_config(self): + # A user-writable "preferences" sibling in /settings must never override + # the trusted "config" object during discovery: end-to-end, the resolved + # credential endpoints come from "config", not the attacker's prefs. + self.state.settings = { + 'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.scope': 'openid groups', + 'acl.oidc.groups.encoded.in.token': True, + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device', + }, + 'preferences.version': 1, + 'preferences': { + 'acl.oidc.token.endpoint': 'https://evil.example.com/token', + 'acl.oidc.device.authorization.endpoint': + 'https://evil.example.com/device', + }, + } + auth = OidcDeviceAuth.from_questdb( + self.base, insecure=True, interactive=True, renderer=Renderer(), + _clock=FakeClock()) + self.assertEqual(auth.config.token_endpoint, self.base + '/token') + self.assertEqual(auth.config.device_authorization_endpoint, + self.base + '/device') + self.assertEqual(auth.token(), ID_TOKEN) + def test_well_known_fallback_for_device_endpoint(self): # Settings advertise OIDC + token endpoint but NOT the device endpoint; # issuer= is pinned, so the IdP .well-known fallback is allowed. @@ -1265,6 +1293,40 @@ def test_settings_config_nesting(self): self.assertEqual(settings_config({'config': {'a': 1}}), {'a': 1}) self.assertEqual(settings_config({'a': 1}), {'a': 1}) # flat fallback + def test_settings_config_ignores_user_writable_preferences(self): + # QuestDB /settings nests server-authoritative values under "config" + # alongside a user-writable "preferences" sibling (the web console + # persists UI prefs there). Discovery must read only "config", so a user + # who can write a preference cannot smuggle an acl.oidc.* key in to + # redirect the device code / refresh token. Ported from the Java client. + from questdb.auth._discovery import settings_config + resp = { + 'config': { + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': 'https://idp.example.com/token'}, + 'preferences.version': 0, + 'preferences': { + 'acl.oidc.token.endpoint': 'https://evil.example.com/token'}, + } + cfg = settings_config(resp) + self.assertEqual(cfg['acl.oidc.token.endpoint'], + 'https://idp.example.com/token') + self.assertNotIn('evil', str(cfg)) + # A structured response (one carrying the user-writable "preferences" + # sibling) must NOT fall back to trusting the top level when "config" is + # absent or malformed: read nothing rather than the top level. + self.assertEqual( + settings_config({'preferences': {'acl.oidc.token.endpoint': 'x'}}), + {}) + self.assertEqual( + settings_config({'config': None, + 'preferences': {'acl.oidc.client.id': 'x'}}), + {}) + # A genuinely flat / legacy response (no config/preferences split) is + # still tolerated at the top level. + self.assertEqual(settings_config({'acl.oidc.client.id': 'q'}), + {'acl.oidc.client.id': 'q'}) + class TestEndpointValidation(unittest.TestCase): def setUp(self): From fe34ffa31a6aac75f5ffdccc1b4f3afb7d52eaa4 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 18 Jun 2026 19:25:59 +0100 Subject: [PATCH 22/39] fix: reject conf metachars in QuestDB host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QuestDB._require_host() passed the URL hostname unsanitized into the ILP conf string sender() builds (addr=host:port;). urlparse keeps ';' and '=' in .hostname, so a crafted or tampered URL such as "https://host;tls_verify=unsafe_off;x=" injected extra conf params — silently disabling the sender's TLS certificate verification (and exposing the bearer token to a MITM), or e.g. auto_flush=off for data loss. Reject ';', '=', whitespace and control characters in the resolved host (':' stays allowed for IPv6 literals) at the single chokepoint shared by sender()/psycopg()/sqlalchemy_engine(). Add a regression test. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_questdb.py | 18 ++++++++++++++++++ test/test_auth.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 7ecfdcd5..8f7d8be3 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -26,6 +26,7 @@ from __future__ import annotations +import re import urllib.parse from typing import Any, Dict, Optional @@ -36,6 +37,15 @@ _DEFAULT_PG_PORT = 8812 _DEFAULT_DATABASE = 'qdb' +# A hostname or IP literal never contains the ILP conf-string delimiters (';' +# separates parameters, '=' separates key from value) nor whitespace/control +# characters. Reject them in the resolved host so a crafted or tampered URL +# can't smuggle extra conf parameters — e.g. ';tls_verify=unsafe_off;', which +# silently disables TLS certificate verification — into the 'addr=host:port;' +# string sender() hands to Sender.from_conf. Note ':' is intentionally allowed +# (IPv6 literals contain it; _ilp_addr brackets them). +_ILLEGAL_HOST_CHARS = re.compile(r'[\x00-\x20\x7f;=]') + _AUTH_HINT = ( 'QuestDB rejected the token (HTTP {status}). Common causes:\n' " * scope / 'acl.oidc.groups.encoded.in.token' mismatch — the server may " @@ -208,6 +218,14 @@ def _require_host(self, host: Optional[str] = None) -> str: f'The QuestDB URL {self.url!r} has no host. Use a URL with an ' 'explicit host (e.g. "https://questdb.example.com:9000"), or ' 'pass host=... to the adapter.') + if _ILLEGAL_HOST_CHARS.search(resolved): + raise OidcConfigError( + f'The QuestDB host {resolved!r} contains an illegal character ' + "(';', '=', whitespace or a control character). A hostname or " + 'IP address never does; this indicates a malformed or tampered ' + 'URL. (Such a host could otherwise inject ILP conf parameters ' + 'such as "tls_verify=unsafe_off" into the sender, silently ' + 'disabling TLS certificate verification.)') return resolved @staticmethod diff --git a/test/test_auth.py b/test/test_auth.py index 162a6ac5..41305b3a 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1228,6 +1228,39 @@ def test_malformed_port_url_raises_config_error(self): QuestDB('https://questdb.example.com:notaport', _FakeAuth(), insecure=True) + def test_host_with_conf_metachars_rejected(self): + # C1: a host containing the ILP conf delimiters (';' / '=') or + # whitespace must be rejected, never spliced into the + # `addr=host:port;` conf string. Otherwise a crafted/tampered URL host + # injects extra conf params — e.g. `tls_verify=unsafe_off`, which + # silently disables the sender's TLS certificate verification, or + # `auto_flush=off` (data loss). urlparse() keeps ';'/'=' in .hostname. + for bad in ('https://realhost;tls_verify=unsafe_off;x=', + 'https://a=b'): + with self.subTest(url=bad): + with self.assertRaises(OidcConfigError): + self._qdb(bad)._require_host() + # An explicit host= override goes through the same guard (incl. + # whitespace, which is never valid in a host). + for bad_host in ('evil;tls_verify=unsafe_off', 'a=b', 'h ost'): + with self.subTest(host=bad_host): + with self.assertRaises(OidcConfigError): + self._qdb()._require_host(bad_host) + # A legitimate host (incl. an IPv6 literal, which contains ':') is + # still accepted — the guard must not over-reject. + self.assertEqual(self._qdb()._require_host('::1'), '::1') + self.assertEqual( + self._qdb()._require_host('questdb.example.com'), + 'questdb.example.com') + # The guard fires through the adapter (sender), before the conf string + # is built and handed to Sender.from_conf. + qdb = self._qdb('https://realhost;tls_verify=unsafe_off:9000') + fake = types.ModuleType('questdb.ingress') + fake.Sender = object() # import must succeed so we reach the guard + with mock.patch.dict(sys.modules, {'questdb.ingress': fake}): + with self.assertRaises(OidcConfigError): + qdb.sender() + def test_sender_hostless_url_raises(self): # The guard propagates through an adapter (not just the helper): # sender() on a host-less URL raises OidcConfigError. See M5. From ae4c5ba10095ce22279abffc35243034fd48ba29 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 11:22:31 +0100 Subject: [PATCH 23/39] fix: harden questdb.auth untrusted-input handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M1 — keep four untrusted-input failures within the OidcError contract instead of leaking a raw stdlib exception: * int(float('inf')) from a JSON Infinity (json.loads accepts it) raised OverflowError — not a ValueError — in the expires_in/interval parses (_device.py); add OverflowError to those handlers. * urllib.parse.urlparse() itself raises ValueError on a malformed IPv6 literal (e.g. "https://[::1") before .port is read. safe_urlparse now wraps the urlparse() call, and _require_secure routes through it, so a bad endpoint from /settings or discovery raises OidcConfigError. * build_ssl_context() leaked FileNotFoundError/ssl.SSLError for a mistyped ca_bundle path (or env var); map to OidcConfigError. * deeply-nested JSON makes json.loads raise RecursionError — not a ValueError; catch it in get_json/post_form and QuestDB.sql. M2 — _strip_control now also strips Unicode bidi-override / zero-width / line-separator ranges (U+200B-200F, U+2028-202E, U+2066-2069, U+FEFF), not just C0/C1. U+202E (RIGHT-TO-LEFT OVERRIDE) in an untrusted device response could otherwise reverse displayed text in the terminal prompt to disguise the real sign-in host. Add 8 regression tests (overflow expires_in/interval, malformed-IPv6 at safe_urlparse/_require_secure/_normalize_url/validate_endpoint_origins/ constructor, bad CA bundle, deeply-nested JSON, bidi/zero-width stripping). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 8 ++- src/questdb/auth/_http.py | 39 ++++++++--- src/questdb/auth/_questdb.py | 9 +-- src/questdb/auth/_render.py | 20 ++++-- test/test_auth.py | 125 +++++++++++++++++++++++++++++++++++ 5 files changed, 177 insertions(+), 24 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 39fdcafb..e727278b 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -437,7 +437,9 @@ def _store(self, tokens: TokenSet) -> None: def _tokenset_from_response(self, body: Dict[str, Any]) -> TokenSet: try: expires_in = int(body.get('expires_in', _DEFAULT_EXPIRES_IN)) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): + # OverflowError: a JSON Infinity (json.loads accepts it) → int(inf); + # it is not a ValueError, so list it to keep the typed contract. expires_in = _DEFAULT_EXPIRES_IN if expires_in <= 0: # A non-positive lifetime would mark a just-issued token as already @@ -539,7 +541,7 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: device_code = resp['device_code'] try: interval = int(resp.get('interval', self._default_interval)) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): interval = self._default_interval # At least 1s (RFC 8628 floor), and capped so a hostile/huge value can't # pin the polling thread (which holds the acquisition lock) in one @@ -547,7 +549,7 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: interval = min(_MAX_POLL_INTERVAL, max(1, interval)) try: expires_in = int(resp.get('expires_in', _DEFAULT_DEVICE_CODE_LIFETIME)) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): expires_in = _DEFAULT_DEVICE_CODE_LIFETIME # A non-positive lifetime would time the flow out before the first poll # (the user has already been shown the code); treat it as unknown. Cap diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py index e9f2aa59..3e691159 100644 --- a/src/questdb/auth/_http.py +++ b/src/questdb/auth/_http.py @@ -64,11 +64,21 @@ def build_ssl_context(ca_bundle: Optional[str] = None) -> ssl.SSLContext: ca_bundle or os.environ.get('REQUESTS_CA_BUNDLE') or os.environ.get('SSL_CERT_FILE')) - if ca: + if not ca: + return ssl.create_default_context() + # A missing / unreadable / invalid bundle makes the stdlib raise a raw + # FileNotFoundError or ssl.SSLError; map it to the package's typed error so + # a mistyped ca_bundle path (or env var) fails clearly instead of leaking a + # bare stdlib exception. + try: if os.path.isdir(ca): return ssl.create_default_context(capath=ca) return ssl.create_default_context(cafile=ca) - return ssl.create_default_context() + except (OSError, ssl.SSLError) as e: + raise OidcConfigError( + f'Could not load the CA bundle {ca!r}: {e}. Check the path points ' + 'to a readable PEM/DER certificate file (or a directory of them).' + ) from e class HttpResponse: @@ -96,17 +106,18 @@ def safe_urlparse(url: str) -> tuple: """ ``urllib.parse.urlparse(url)`` paired with its port, but with a typed error. - ``ParseResult.port`` raises a bare ``ValueError`` for a non-integer port - (e.g. ``https://idp:notaport``); re-raise it as :class:`OidcConfigError` so - a malformed endpoint URL stays within the package's error contract instead - of escaping as a raw ``ValueError``. Returns ``(parts, port)``. + Both ``urlparse`` itself (e.g. ``https://[::1`` — a malformed IPv6 literal) + and ``ParseResult.port`` (e.g. ``https://idp:notaport`` — a non-integer + port) raise a bare ``ValueError``; re-raise it as :class:`OidcConfigError` + so a malformed endpoint URL stays within the package's error contract + instead of escaping as a raw ``ValueError``. Returns ``(parts, port)``. """ - parts = urllib.parse.urlparse(url) try: + parts = urllib.parse.urlparse(url) return parts, parts.port except ValueError as e: raise OidcConfigError( - f'Malformed endpoint URL {url!r}: invalid port.') from e + f'Malformed endpoint URL {url!r}: {e}.') from e def _is_loopback(host: Optional[str]) -> bool: @@ -123,7 +134,9 @@ def _is_loopback(host: Optional[str]) -> bool: def _require_secure(url: str, insecure: bool) -> None: - parts = urllib.parse.urlparse(url) + # safe_urlparse maps a malformed URL (bad IPv6 literal / non-integer port) + # to OidcConfigError instead of letting a bare ValueError escape. + parts, _ = safe_urlparse(url) scheme = parts.scheme.lower() if scheme == 'https': return @@ -247,7 +260,9 @@ def get_json( f'HTTP {resp.status} from {url}: {resp.text()[:200]}') try: return resp.json() - except (ValueError, UnicodeDecodeError) as e: + except (ValueError, UnicodeDecodeError, RecursionError) as e: + # RecursionError: deeply-nested JSON exhausts the decoder's stack; it is + # not a ValueError, so catch it explicitly to keep the typed contract. raise OidcError(f'Invalid JSON from {url}: {e}') from e @@ -270,7 +285,9 @@ def post_form( insecure=insecure) try: parsed = resp.json() - except (ValueError, UnicodeDecodeError): + except (ValueError, UnicodeDecodeError, RecursionError): + # RecursionError: deeply-nested JSON exhausts the decoder's stack; not a + # ValueError, so catch it explicitly to keep the typed contract. if resp.ok: raise OidcError( f'Expected JSON from {url}, got: {resp.text()[:200]}') diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 8f7d8be3..56caf88c 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -183,11 +183,12 @@ def sql(self, query: str, *, limit: Optional[str] = None, f'QuestDB query failed (HTTP {resp.status}): {detail}') try: data = resp.json() - except (ValueError, UnicodeDecodeError): + except (ValueError, UnicodeDecodeError, RecursionError): # A 2xx body that isn't JSON (e.g. an HTML error/login page from a - # reverse proxy or captive portal) must surface as a clean - # OidcError, not a raw JSONDecodeError. Mirrors the error path and - # post_form(). + # reverse proxy or captive portal), or deeply-nested JSON that + # exhausts the decoder's stack (RecursionError, not a ValueError), + # must surface as a clean OidcError, not a raw decoder exception. + # Mirrors the error path and post_form(). raise OidcError( 'QuestDB returned a non-JSON success response from /exec: ' f'{resp.text()[:300]}') diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py index 86fb5716..121456b8 100644 --- a/src/questdb/auth/_render.py +++ b/src/questdb/auth/_render.py @@ -119,19 +119,27 @@ def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: f'rel="noopener noreferrer">{label}') -_CONTROL_CHARS = re.compile(r'[\x00-\x1f\x7f-\x9f]') +# C0/C1 control chars (incl. ESC, which drives ANSI escape sequences) plus the +# Unicode bidi-control, zero-width and line/paragraph-separator ranges. All can +# spoof a terminal prompt: U+202E (RIGHT-TO-LEFT OVERRIDE) reverses displayed +# text to disguise a URL's host; U+2028/U+2029 inject fake lines; zero-width +# chars hide content. Stripped from untrusted device-response fields. +_CONTROL_CHARS = re.compile( + r'[\x00-\x1f\x7f-\x9f\u200b-\u200f\u2028-\u202e\u2066-\u2069\ufeff]') def _strip_control(text: Optional[str]) -> str: """ - Strip C0/C1 control characters (incl. ESC) from an untrusted string before - it is written to a terminal. + Strip control / format characters from an untrusted string before it is + written to a terminal. The verification URL, user code and IdP error strings come from the device- authorization response (untrusted). Writing them verbatim to a TTY would let - a hostile or MITM'd response inject ANSI escape sequences — cursor moves, - screen clears — to spoof the prompt or hide the real sign-in URL. The - Jupyter renderer html-escapes its output; the plain-text path needs this. + a hostile or MITM'd response inject ANSI escape sequences (C0/C1 control + chars — cursor moves, screen clears) or Unicode bidi overrides / zero-width + / line separators to spoof the prompt or hide the real sign-in URL (e.g. + U+202E visually reverses the displayed host). The Jupyter renderer + html-escapes its output; the plain-text path needs this. """ if not text: return '' diff --git a/test/test_auth.py b/test/test_auth.py index 41305b3a..ca30b4b1 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -491,6 +491,28 @@ def test_short_lived_token_valid_at_issue(self): self.assertTrue(t.is_valid(t.issued_at)) # usable right after issue self.assertFalse(t.is_valid(t.expires_at)) # but still does expire + def test_overflow_expires_in_treated_as_unknown(self): + # A non-finite expires_in (JSON Infinity, which json.loads accepts and + # int(inf) turns into an OverflowError — not a ValueError) must not + # crash; treat it as unknown so the token stays usable. See M1. + self.state.token_script = [(200, { + 'access_token': ACCESS_TOKEN, 'id_token': ID_TOKEN, + 'token_type': 'Bearer', 'expires_in': float('inf')})] + auth = self.make_auth() + self.assertEqual(auth.token(), ID_TOKEN) + self.assertTrue(auth._tokens.is_valid(self._clock.now())) + + def test_overflow_device_timing_fields_do_not_crash(self): + # Non-finite interval / expires_in in the device-auth response (JSON + # Infinity) must be treated as unknown, not raise OverflowError. See M1. + self.state.device_response = { + 'device_code': 'DEV-CODE', 'user_code': 'X', + 'verification_uri': 'https://idp/device', + 'expires_in': float('inf'), 'interval': float('inf')} + self.state.token_script = [(200, None)] # success on the first poll + auth = self.make_auth() + self.assertEqual(auth.token(), ID_TOKEN) + def test_open_browser_rejects_dangerous_scheme(self): auth = self.make_auth(open_browser=True) with mock.patch('webbrowser.open') as opener: @@ -1391,6 +1413,19 @@ def test_malformed_port_raises_config_error(self): self._validate('https://idp:notaport/token', 'https://idp:notaport/device') + def test_malformed_ipv6_endpoint_raises_config_error(self): + # A malformed IPv6 literal makes urllib.parse.urlparse() itself raise + # ValueError (before .port is read); it must surface as OidcConfigError, + # not a bare ValueError escaping the typed-error contract. See M1. + with self.assertRaises(OidcConfigError): + self._validate('https://[::1', 'https://[::1') + with self.assertRaises(OidcConfigError): + OidcDeviceAuth( + client_id='questdb', + device_authorization_endpoint='https://[::1', + token_endpoint='https://[::1', + renderer=Renderer()) + def test_explicit_constructor_enforces_co_location(self): with self.assertRaises(OidcConfigError): OidcDeviceAuth( @@ -1418,6 +1453,14 @@ def test_normalize_url_malformed_port_raises_config_error(self): with self.assertRaises(OidcConfigError): _normalize_url('https://idp:notaport/token') + def test_normalize_url_malformed_ipv6_raises_config_error(self): + # cache_key normalization must also map a malformed IPv6 literal (which + # makes urlparse itself raise) to OidcConfigError, not a bare + # ValueError. See M1. + from questdb.auth._device import _normalize_url + with self.assertRaises(OidcConfigError): + _normalize_url('https://[::1') + def test_realm_path_distinguishes_key(self): # Multi-tenant IdP: same host, different realm path -> distinct keys # (the old origin-only key collided, leaking one realm's token). @@ -1528,6 +1571,69 @@ def test_malformed_url_raises_config_error(self): request('GET', 'https://questdb.example.com:notaport/settings', timeout=5) + def test_require_secure_rejects_malformed_ipv6(self): + # _require_secure routes through safe_urlparse, so a malformed IPv6 + # endpoint raises OidcConfigError instead of a bare ValueError (urlparse + # raises before the scheme is even inspected). See M1. + from questdb.auth._http import _require_secure + with self.assertRaises(OidcConfigError): + _require_secure('https://[::1', insecure=False) + # A well-formed IPv6 URL is still accepted (loopback http is allowed). + _require_secure('http://[::1]:8080/x', insecure=False) + + def test_bad_ca_bundle_raises_config_error(self): + # A missing or invalid CA bundle path (explicit or via env) must surface + # as OidcConfigError, not a raw FileNotFoundError / ssl.SSLError. See M1. + import tempfile + from questdb.auth._http import build_ssl_context + with self.assertRaises(OidcConfigError): + build_ssl_context('/no/such/path/ca.pem') + with tempfile.NamedTemporaryFile('w', suffix='.pem', delete=False) as f: + f.write('not a certificate') + bad = f.name + try: + with self.assertRaises(OidcConfigError): + build_ssl_context(bad) + finally: + os.unlink(bad) + + def test_deeply_nested_json_raises_oidc_error(self): + # Deeply-nested JSON makes json.loads raise RecursionError (not a + # ValueError); get_json / post_form must map it to OidcError rather than + # let it escape the typed-error contract. See M1. + from questdb.auth import _http + deep = (b'[' * 100000) + (b']' * 100000) + + class _Deep(http.server.BaseHTTPRequestHandler): + def log_message(self, *a): + pass + + def _send(self): + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(deep))) + self.end_headers() + self.wfile.write(deep) + + def do_GET(self): + self._send() + + def do_POST(self): + self.rfile.read(int(self.headers.get('Content-Length', 0))) + self._send() + + srv = http.server.HTTPServer(('127.0.0.1', 0), _Deep) + threading.Thread(target=srv.serve_forever, daemon=True).start() + base = f'http://127.0.0.1:{srv.server_port}' + try: + with self.assertRaises(OidcError): + _http.get_json(base + '/x', timeout=5) + with self.assertRaises(OidcError): + _http.post_form(base + '/x', {'a': 'b'}, timeout=5) + finally: + srv.shutdown() + srv.server_close() + class TestRendererSecurity(unittest.TestCase): """The Jupyter prompt must never turn an IdP-supplied URL into a @@ -1608,6 +1714,25 @@ def test_terminal_prompt_strips_control_chars(self): self.assertNotIn('\x1b', out) self.assertNotIn('\x07', out) + def test_strip_control_removes_bidi_and_zero_width(self): + # Beyond C0/C1, untrusted device-response fields must have Unicode + # bidi-override / zero-width / line-separator characters stripped before + # they reach a TTY: U+202E (RIGHT-TO-LEFT OVERRIDE) can visually reverse + # a URL to spoof the sign-in host. chr(cp) avoids embedding the + # (invisible) characters in the test source. See M2. + from questdb.auth._render import _strip_control, format_prompt + for cp in (0x202e, 0x202d, 0x2066, 0x2069, 0x200b, 0x200f, + 0x2028, 0x2029, 0xfeff): + self.assertEqual(_strip_control('a' + chr(cp) + 'b'), 'ab', + f'U+{cp:04X} not stripped') + # Legitimate text (incl. accents / CJK / printable ASCII) is preserved. + self.assertEqual(_strip_control('café 北京 user-1'), 'café 北京 user-1') + text = format_prompt({ + 'user_code': 'WD' + chr(0x202e) + 'JB', + 'verification_uri': 'https://idp.example.com/' + chr(0x202e)}) + self.assertNotIn(chr(0x202e), text) + self.assertIn('idp.example.com', text) + if __name__ == '__main__': unittest.main() From afbd8088e334d84db1b247c49f3e271268b44083 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 12:01:48 +0100 Subject: [PATCH 24/39] fix: bound IdP timeout; broaden auth edge tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M3 — thread a per-request `timeout` (default 30s, unchanged) through every IdP call (device-code request, each poll, refresh) via _idp_post, and through the /settings + discovery fetches in from_questdb. It bounds how long a single network leg can hold the token-acquisition lock when the IdP stalls, so an IdP outage no longer freezes other acquiring threads (e.g. SQLAlchemy pool connections) for the urllib default per leg. Exposed on OidcDeviceAuth.__init__ / from_questdb and forwarded by connect(**opts): lower it for tighter lock-hold, raise it for a slow IdP. Total interactive poll duration stays capped by _MAX_DEVICE_CODE_LIFETIME. M4 — add the remaining edge / error-path tests: * device-code / poll / refresh use the configured timeout (M3) * connect(eager=False) defers sign-in until the first token use * IdP .well-known 404 -> OidcError (discovery non-2xx path) * token/device non-JSON 2xx and non-dict JSON -> OidcError; /settings and discovery non-2xx / non-JSON -> OidcError (only /exec had this before) * make_cache resolves 'memory' / None / 'none' / a TokenCache and rejects an unknown spec with OidcConfigError * QR helpers degrade to None when `qrcode` is absent or data is empty Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 19 ++++- src/questdb/auth/_questdb.py | 4 +- test/test_auth.py | 147 +++++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 4 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index e727278b..c3f66f73 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -166,6 +166,7 @@ def __init__( qr: bool = False, renderer: Optional[Renderer] = None, default_interval: int = 5, + timeout: float = 30, _clock=None): # injectable time source for testing if not client_id: raise OidcConfigError('client_id is required') @@ -203,6 +204,13 @@ def __init__( self.open_browser = open_browser self._interactive = interactive self._default_interval = default_interval + # Per-request network timeout for every IdP call (device-code request, + # each poll, refresh). It bounds how long a single network leg can pin + # the acquisition lock if the IdP stalls: lower it to reduce lock-hold + # (and connection-pool starvation) during an IdP outage; raise it for a + # slow IdP. The total interactive-poll duration is separately capped by + # _MAX_DEVICE_CODE_LIFETIME. + self._timeout = timeout self._cache = make_cache(cache) self._ctx = build_ssl_context(ca_bundle) self._renderer = renderer if renderer is not None else make_renderer(qr=qr) @@ -243,6 +251,7 @@ def from_questdb( interactive: Optional[bool] = None, qr: bool = False, renderer: Optional[Renderer] = None, + timeout: float = 30, _clock=None) -> 'OidcDeviceAuth': # injectable time source """ Build an :class:`OidcDeviceAuth` by discovering config from QuestDB. @@ -265,7 +274,8 @@ def from_questdb( issuer=issuer, discovery_url=discovery_url, ctx=ctx, - insecure=insecure) + insecure=insecure, + timeout=timeout) return cls( client_id=cfg.client_id, device_authorization_endpoint=cfg.device_authorization_endpoint, @@ -281,6 +291,7 @@ def from_questdb( interactive=interactive, qr=qr, renderer=renderer, + timeout=timeout, _clock=_clock) # -- public API --------------------------------------------------------- @@ -462,8 +473,10 @@ def _idp_post(self, url: str, form: Dict[str, Any]): # IdP POSTs carry the device code / refresh token, so they are always # required to be https (loopback http is fine for local dev); the # user's `insecure` flag — which is about the QuestDB link — never - # downgrades them. - return post_form(url, form, ctx=self._ctx, insecure=False) + # downgrades them. The timeout bounds how long this leg can hold the + # acquisition lock if the IdP stalls. + return post_form( + url, form, ctx=self._ctx, insecure=False, timeout=self._timeout) def _refresh(self, tokens: TokenSet) -> TokenSet: status, body = self._idp_post( diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 56caf88c..034cc8e6 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -365,7 +365,9 @@ def connect( until the first call that needs a token. :param opts: Forwarded to :meth:`OidcDeviceAuth.from_questdb` (e.g. ``client_id``, ``scope``, ``audience``, ``issuer``, ``open_browser``, - ``qr``, ``ca_bundle``). + ``qr``, ``ca_bundle``, ``timeout`` — the per-request IdP network + timeout, which also bounds how long a stalled IdP can hold the + token-acquisition lock). """ auth = OidcDeviceAuth.from_questdb( url, flow=flow, cache=cache, insecure=insecure, **opts) diff --git a/test/test_auth.py b/test/test_auth.py index ca30b4b1..270351b0 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -36,6 +36,7 @@ """ import base64 +import contextlib import importlib.util import json import os @@ -106,6 +107,41 @@ def b64(obj): ACCESS_TOKEN = _jwt({'sub': 'user-1', 'scope': 'openid'}) +@contextlib.contextmanager +def _raw_response_server(status, content_type, body): + """A throwaway HTTP server that returns one fixed (status, type, body). + + Used to exercise the transport's handling of responses the scripted mock + IdP can't produce (non-JSON 2xx, non-dict JSON, non-2xx) on the token / + device / settings / discovery endpoints. Yields the base URL. + """ + class _H(http.server.BaseHTTPRequestHandler): + def log_message(self, *a): + pass + + def _send(self): + self.send_response(status) + self.send_header('Content-Type', content_type) + self.send_header('Content-Length', str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + self._send() + + def do_POST(self): + self.rfile.read(int(self.headers.get('Content-Length', 0))) + self._send() + + srv = http.server.HTTPServer(('127.0.0.1', 0), _H) + threading.Thread(target=srv.serve_forever, daemon=True).start() + try: + yield f'http://127.0.0.1:{srv.server_port}' + finally: + srv.shutdown() + srv.server_close() + + class FakeClock: """Deterministic clock: ``sleep`` advances both monotonic and wall time.""" @@ -513,6 +549,47 @@ def test_overflow_device_timing_fields_do_not_crash(self): auth = self.make_auth() self.assertEqual(auth.token(), ID_TOKEN) + def test_idp_requests_use_configured_timeout(self): + # The device-code / poll / refresh POSTs must use the configured + # timeout, so a stalled IdP can't pin the acquisition lock for the + # urllib default (30s) per network leg. See M3. + seen = [] + + def fake_post_form(url, form, *, ctx=None, insecure=False, + timeout=None): + seen.append(timeout) + if url.endswith('/device'): + return 200, {'device_code': 'D', 'user_code': 'U', + 'verification_uri': 'https://idp/d', + 'expires_in': 600, 'interval': 5} + return 200, {'access_token': ACCESS_TOKEN, 'id_token': ID_TOKEN, + 'token_type': 'Bearer', 'expires_in': 3600} + + from questdb.auth import _device + auth = self.make_auth(timeout=3) + with mock.patch.object(_device, 'post_form', fake_post_form): + self.assertEqual(auth.token(), ID_TOKEN) + self.assertTrue(seen) + self.assertTrue( + all(t == 3 for t in seen), + f'IdP POSTs did not all use the configured timeout: {seen}') + + def test_connect_lazy_defers_signin(self): + # eager=False must return a session WITHOUT running the device flow; the + # first token-needing call then triggers exactly one sign-in. See M4. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.scope': 'openid groups', + 'acl.oidc.groups.encoded.in.token': True, + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device'}} + qdb = connect(self.base, insecure=True, eager=False, + renderer=Renderer(), interactive=True, _clock=FakeClock()) + self.assertEqual(self.state.device_requests, 0) # deferred + self.assertEqual(qdb.token(), ID_TOKEN) # first use signs in + self.assertEqual(self.state.device_requests, 1) + def test_open_browser_rejects_dangerous_scheme(self): auth = self.make_auth(open_browser=True) with mock.patch('webbrowser.open') as opener: @@ -852,6 +929,20 @@ def test_issuer_pin_accepts_matching_origin(self): self.assertEqual(auth.config.device_authorization_endpoint, self.base + '/device') + def test_well_known_404_raises_oidc_error(self): + # issuer pinned (so the IdP fallback is allowed), but the .well-known + # document 404s: get_json maps the non-2xx to OidcError rather than a + # silent miss that would later masquerade as a missing-endpoint error. + # See M4. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token'}} + self.state.well_known = None # the handler returns 404 for /.well-known + with self.assertRaises(OidcError): + OidcDeviceAuth.from_questdb(self.base, issuer=self.base, + insecure=True) + class TestInsecureSettingsGuard(unittest.TestCase): """ @@ -1382,6 +1473,18 @@ def test_settings_config_ignores_user_writable_preferences(self): self.assertEqual(settings_config({'acl.oidc.client.id': 'q'}), {'acl.oidc.client.id': 'q'}) + def test_make_cache_variants(self): + # The cache factory resolves the documented specs and rejects an + # unknown one with a typed error. See M4. + from questdb.auth._cache import make_cache, MemoryCache, NullCache + self.assertIsInstance(make_cache('memory'), MemoryCache) + self.assertIsInstance(make_cache(None), NullCache) + self.assertIsInstance(make_cache('none'), NullCache) + custom = MemoryCache() + self.assertIs(make_cache(custom), custom) # a TokenCache passes through + with self.assertRaises(OidcConfigError): + make_cache('disk') + class TestEndpointValidation(unittest.TestCase): def setUp(self): @@ -1634,6 +1737,40 @@ def do_POST(self): srv.shutdown() srv.server_close() + def test_post_form_non_json_2xx_raises_oidc_error(self): + # A 2xx body from the token/device endpoint that isn't JSON (e.g. an + # HTML login page from a proxy in front of the IdP) must surface as + # OidcError, not a raw decoder error. Only /exec had this before. M4. + from questdb.auth import _http + with _raw_response_server(200, 'text/html', b'login') as b: + with self.assertRaises(OidcError): + _http.post_form(b + '/token', {'a': 'b'}, timeout=5) + + def test_post_form_non_dict_json_raises_oidc_error(self): + # A 2xx JSON array (valid JSON but not an object) from the token + # endpoint must surface as OidcError. See M4. + from questdb.auth import _http + with _raw_response_server(200, 'application/json', b'[1, 2, 3]') as b: + with self.assertRaises(OidcError): + _http.post_form(b + '/token', {'a': 'b'}, timeout=5) + + def test_get_json_non_2xx_raises_oidc_error(self): + # A non-2xx /settings or discovery response must surface as OidcError. + # See M4. + from questdb.auth import _http + with _raw_response_server(500, 'text/plain', b'boom') as b: + with self.assertRaises(OidcError): + _http.get_json(b + '/settings', timeout=5) + + def test_get_json_non_json_2xx_raises_oidc_error(self): + # A 2xx /settings or discovery body that isn't JSON must surface as + # OidcError, not a raw JSONDecodeError. See M4. + from questdb.auth import _http + with _raw_response_server(200, 'text/html', b'x') as b: + with self.assertRaises(OidcError): + _http.get_json( + b + '/.well-known/openid-configuration', timeout=5) + class TestRendererSecurity(unittest.TestCase): """The Jupyter prompt must never turn an IdP-supplied URL into a @@ -1733,6 +1870,16 @@ def test_strip_control_removes_bidi_and_zero_width(self): self.assertNotIn(chr(0x202e), text) self.assertIn('idp.example.com', text) + def test_qr_helpers_degrade_without_qrcode(self): + # The QR helpers must degrade gracefully (return None), never raise, + # when `qrcode` is absent or the data is empty. See M4. + from questdb.auth import _render + with mock.patch.dict(sys.modules, {'qrcode': None}): + self.assertIsNone(_render._qr_ascii('https://idp/x')) + self.assertIsNone(_render._qr_data_uri('https://idp/x')) + self.assertIsNone(_render._qr_ascii('')) + self.assertIsNone(_render._qr_data_uri('')) + if __name__ == '__main__': unittest.main() From 63edfcea52369fe8e6550b10e4968030c0a99cab Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 12:35:17 +0100 Subject: [PATCH 25/39] fix: address 3 minor questdb.auth review nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cache_key now includes the token-kind mode (groups_in_token): two sessions differing ONLY in that mode no longer collide on one in-memory cache entry and repeatedly evict each other's token. _select gates the served kind so no wrong token was ever returned, but the collision caused avoidable refreshes / re-prompts. (_device.py) * _resolve_endpoint returns None for a path-only endpoint ("/as/token") when acl.oidc.host is absent, so resolution fails with a clear config error (pin the IdP / pass the endpoint explicitly) — or recovers via IdP discovery when issuer= is pinned — instead of passing a scheme-less "/path" downstream that surfaced as a confusing "insecure/malformed URL". (_discovery.py) * _pg_module chains the underlying ImportError (raise ... from e) so the traceback preserves the real cause. (_questdb.py) Add 3 regression tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 12 ++++++++++-- src/questdb/auth/_discovery.py | 19 +++++++++++++------ src/questdb/auth/_questdb.py | 4 ++-- test/test_auth.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index c3f66f73..db793427 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -318,8 +318,15 @@ def cache_key(self) -> str: Two sessions share a cached token only when they would accept the same one: same IdP token endpoint (**path included**, so multi-tenant realms sharing a host don't collide), client id, scope *set* (order-insensitive), - and audience. The QuestDB URL is deliberately excluded — the same IdP + audience, and token-kind mode (``groups_in_token`` — id_token vs + access_token). The QuestDB URL is deliberately excluded — the same IdP token is valid against any QuestDB that trusts it. + + ``groups_in_token`` is part of the key because it selects which token + kind :meth:`_select` returns; without it two sessions that differ only + in that mode would collide on one entry and repeatedly evict each + other's token (the gate self-corrects, but at the cost of avoidable + refreshes / re-prompts). """ c = self.config scope = ' '.join(sorted(c.scope.split())) if c.scope else '' @@ -328,7 +335,8 @@ def cache_key(self) -> str: _normalize_url(c.token_endpoint), c.client_id, scope, - c.audience or '']) + c.audience or '', + 'groups' if c.groups_in_token else 'access']) def clear(self) -> None: """Forget the cached token (forces a fresh sign-in next time).""" diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 50732e6c..4b44645b 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -223,12 +223,19 @@ def _resolve_endpoint(value: Optional[str], cfg: Dict[str, Any]) -> Optional[str return value if value.startswith('/'): host = cfg.get(_K_HOST) - if host: - tls = _as_bool(cfg.get(_K_TLS_ENABLED), default=True) - scheme = 'https' if tls else 'http' - port = cfg.get(_K_PORT) - netloc = f'{host}:{port}' if port else str(host) - return f'{scheme}://{netloc}{value}' + if not host: + # A path-only endpoint with no acl.oidc.host to resolve it against + # can't be turned into a URL. Treat it as absent (return None) so + # resolution fails with the clear "could not resolve the ... + # endpoint" error rather than passing a scheme-less "/path" + # downstream, where it surfaces as a confusing "insecure/malformed + # URL" instead. + return None + tls = _as_bool(cfg.get(_K_TLS_ENABLED), default=True) + scheme = 'https' if tls else 'http' + port = cfg.get(_K_PORT) + netloc = f'{host}:{port}' if port else str(host) + return f'{scheme}://{netloc}{value}' return value diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 034cc8e6..338ff0fd 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -107,10 +107,10 @@ def _pg_module(): try: import psycopg2 # type: ignore return psycopg2 - except ImportError: + except ImportError as e: raise ImportError( 'A PostgreSQL driver is required: install `psycopg` (v3) or ' - '`psycopg2-binary`.') + '`psycopg2-binary`.') from e class QuestDB: diff --git a/test/test_auth.py b/test/test_auth.py index 270351b0..84d71f1b 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1401,6 +1401,15 @@ def test_psycopg_missing_dep_raises(self): with self.assertRaises(ImportError): self._qdb().psycopg() + @unittest.skipIf(_HAS_PG_DRIVER, 'a PostgreSQL driver is installed') + def test_pg_module_missing_chains_cause(self): + # The "no PG driver" ImportError chains the underlying import failure + # (raise ... from e) so the traceback preserves the real cause. + from questdb.auth._questdb import _pg_module + with self.assertRaises(ImportError) as cm: + _pg_module() + self.assertIsInstance(cm.exception.__cause__, ImportError) + @unittest.skipIf(importlib.util.find_spec('questdb.ingress') is not None, 'questdb.ingress extension is built') def test_sender_missing_extension_raises(self): @@ -1434,6 +1443,16 @@ def test_resolve_endpoint_ignores_non_string(self): self.assertIsNone(_resolve_endpoint(8080, {})) self.assertIsNone(_resolve_endpoint(True, {})) + def test_resolve_endpoint_relative_path_without_host_is_none(self): + # A path-only endpoint with no acl.oidc.host can't be resolved; it must + # be treated as absent (None) so resolution fails with a clear "could + # not resolve the ... endpoint" error rather than a scheme-less "/path" + # that later surfaces as a confusing "insecure/malformed URL". + from questdb.auth._discovery import _resolve_endpoint + self.assertIsNone(_resolve_endpoint('/as/token.oauth2', {})) + self.assertIsNone( # port present but host missing -> still unresolved + _resolve_endpoint('/as/token.oauth2', {'acl.oidc.port': 443})) + def test_settings_config_nesting(self): from questdb.auth._discovery import settings_config self.assertEqual(settings_config({'config': {'a': 1}}), {'a': 1}) @@ -1591,6 +1610,15 @@ def test_default_port_normalized(self): self._auth( token_endpoint='https://idp.example.com:443/token').cache_key) + def test_groups_in_token_distinguishes_key(self): + # groups_in_token selects which token kind _select returns, so two + # sessions differing ONLY in that mode must not collide on one cache + # entry (and evict each other). scope already has 'openid' here, so the + # keys can differ only by the mode. + self.assertNotEqual( + self._auth(groups_in_token=True).cache_key, + self._auth(groups_in_token=False).cache_key) + class TestTransportSecurity(unittest.TestCase): def test_require_secure_policy(self): From c869e31e772128db5e829dbbc20507e6c0f67355 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 13:34:32 +0100 Subject: [PATCH 26/39] fix: address 4 follow-up auth review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M1 (issuer pin path-scope) — the issuer pin now scopes /settings-advertised credential endpoints to the issuer PATH (segment-aware), not just its origin, so a tampered /settings can't steer the device code / refresh token to a different realm on a path-based IdP (Keycloak issuers are https://host/realms/{realm}). The origin check (validate_endpoint_origins) stays universal; the path check lives in resolve_config and applies ONLY to /settings-supplied endpoints — caller-explicit and IdP-discovered endpoints are authoritative and not path-restricted, so IdPs that place endpoints outside the issuer path (e.g. Azure AD) still work. M2 (forward CA to Sender) — QuestDB.sender() now forwards the private CA (explicit ca_bundle=, else REQUESTS_CA_BUNDLE / SSL_CERT_FILE, same precedence as build_ssl_context) to the ILP Sender as tls_roots for an https QuestDB, so REST queries and ILP ingestion trust the same roots. PEM file only; caller can override via tls_roots=/tls_ca=. ca_bundle is now stored on OidcDeviceAuth and read by QuestDB. M4 (token-field race) — the lock-free fast path is now strictly READ-ONLY: the cache->field promotion of self._tokens moved into the locked slow-path re-check (and an expired token is still promoted there so _acquire can refresh it). Every write to self._tokens is now serialized; the lock-free read of the frozen TokenSet is kept. M5 (default_interval) — from_questdb (and thus connect(**opts)) now accepts and forwards default_interval; connect(default_interval=...) previously raised TypeError. (M3, malformed-IPv6 -> OidcConfigError, was already fixed in ae4c5ba.) docs/auth.rst: issuer pin documented as origin+path with the Azure caveat; sender documented to inherit the CA bundle. Add 6 regression tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/auth.rst | 24 ++++-- src/questdb/auth/_device.py | 30 ++++++-- src/questdb/auth/_discovery.py | 56 +++++++++++++- src/questdb/auth/_questdb.py | 20 +++++ test/test_auth.py | 129 +++++++++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 12 deletions(-) diff --git a/docs/auth.rst b/docs/auth.rst index d2eedfa1..fbef0b3e 100644 --- a/docs/auth.rst +++ b/docs/auth.rst @@ -217,14 +217,26 @@ Security notes origin and rejects the configuration otherwise. Because ``/settings`` is authoritative-by-QuestDB, a compromised server could in principle point them elsewhere; pass ``issuer=`` to **pin** the IdP so the endpoints are verified - to belong to it and credentials can't be redirected to another host. When the - server does not advertise the device-authorization endpoint (so it must be - discovered from the IdP), ``issuer=`` (or ``discovery_url=``) is **required** - for exactly this reason — the helper refuses to guess the discovery origin - from the server-supplied token endpoint. + to belong to it and credentials can't be redirected to another host. The pin + checks both the **origin** and, for endpoints advertised by ``/settings``, the + issuer **path** — so on a path-based multi-tenant IdP (e.g. Keycloak issuers + ``https://host/realms/{realm}``) a tampered ``/settings`` cannot redirect the + device code / refresh token to a *different realm on the same host*. (Caller- + supplied endpoints and endpoints from the IdP's own discovery document are + trusted as-is and not path-restricted, since some IdPs — e.g. Azure AD — place + their endpoints outside the issuer path; pass such endpoints explicitly or let + discovery resolve them.) When the server does not advertise the device- + authorization endpoint (so it must be discovered from the IdP), ``issuer=`` + (or ``discovery_url=``) is **required** for exactly this reason — the helper + refuses to guess the discovery origin from the server-supplied token endpoint. * Adapters avoid logging the token / PG DSN. Avoid logging them yourself. * Standard proxy / CA settings (``HTTPS_PROXY``, ``REQUESTS_CA_BUNDLE``, - ``SSL_CERT_FILE``) are honoured; you can also pass ``ca_bundle=``. + ``SSL_CERT_FILE``) are honoured; you can also pass ``ca_bundle=``. The same + private CA is forwarded to the ingestion :meth:`~questdb.auth.QuestDB.sender` + (as the ILP ``tls_roots``) for an ``https`` QuestDB, so REST queries and ILP + ingestion trust the same roots. (Only a PEM **file** is forwarded this way; + for a CA *directory*, or to override, pass ``tls_roots=``/``tls_ca=`` to + ``sender()``.) Dependencies =========== diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index db793427..a836626f 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -202,6 +202,10 @@ def __init__( # cleartext over the network even when this is set. self.insecure = insecure self.open_browser = open_browser + # Kept so adapters that build their own transport (QuestDB.sender's ILP + # Sender) can forward the same private CA the urllib _ctx uses, instead + # of falling back to the default trust roots. See QuestDB.sender. + self._ca_bundle = ca_bundle self._interactive = interactive self._default_interval = default_interval # Per-request network timeout for every IdP call (device-code request, @@ -251,6 +255,7 @@ def from_questdb( interactive: Optional[bool] = None, qr: bool = False, renderer: Optional[Renderer] = None, + default_interval: int = 5, timeout: float = 30, _clock=None) -> 'OidcDeviceAuth': # injectable time source """ @@ -291,6 +296,7 @@ def from_questdb( interactive=interactive, qr=qr, renderer=renderer, + default_interval=default_interval, timeout=timeout, _clock=_clock) @@ -391,9 +397,13 @@ def _missing_required_token_error(self) -> OidcDeviceFlowError: 'access_token.') def _obtain_tokens(self) -> TokenSet: - # Fast path: return a valid cached token without taking the lock, so a - # caller with a usable token never blocks behind another thread's - # in-progress refresh or interactive sign-in. + # Fast path: return a valid token without taking the lock, so a caller + # with a usable token never blocks behind another thread's in-progress + # refresh or interactive sign-in. This path is READ-ONLY: it never + # writes self._tokens (M4). Every write to that field happens under the + # lock (the promotion below, plus _store and clear), so the lock-free + # reader can't race a concurrent write / lose an update / resurrect a + # just-cleared token. tokens = self._valid_cached() if tokens is not None: return tokens @@ -401,17 +411,27 @@ def _obtain_tokens(self) -> TokenSet: # overlapping refreshes or double-prompt; the loser re-checks and # reuses the winner's freshly acquired token. with self._lock: + # Promote a cached token into the field under the lock (even an + # expired one, so _acquire can reuse its refresh_token for a silent + # refresh). Done here, not on the lock-free fast path, so every + # write to self._tokens stays serialized. + if self._tokens is None: + cached = self._cache.load(self.cache_key) + if cached is not None: + self._tokens = cached tokens = self._valid_cached() if tokens is not None: return tokens return self._acquire() def _valid_cached(self) -> Optional[TokenSet]: + # Read-only: reads the published field, falling back to a read of the + # shared cache backend. It never writes self._tokens — that write is + # done only under the lock (in _obtain_tokens' slow path / _store / + # clear) — so it is safe to call on the lock-free fast path. tokens = self._tokens if tokens is None: tokens = self._cache.load(self.cache_key) - if tokens is not None: - self._tokens = tokens if (tokens is not None and tokens.is_valid(self._now()) and self._has_required_token(tokens)): return tokens diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 4b44645b..f55d3aba 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -161,6 +161,24 @@ def _settings_channel_is_plaintext(questdb_url: str) -> bool: parts.hostname) +def _endpoint_path_under_issuer(endpoint: str, issuer: str) -> bool: + """ + True if ``endpoint``'s path is the issuer's path or a sub-path of it. + + Segment-aware, so ``/realms/prod`` does not match ``/realms/production``. A + root issuer (no path, e.g. ``https://idp.example.com``) constrains the + origin only and matches any path. Used to keep a tampered ``/settings`` from + redirecting credentials to a different tenant on a path-based multi-tenant + IdP (Keycloak issuers are ``https://host/realms/{realm}``), which an + origin-only check can't catch. + """ + base = (safe_urlparse(issuer)[0].path or '').rstrip('/') + if not base: + return True + ep = safe_urlparse(endpoint)[0].path or '' + return ep == base or ep.startswith(base + '/') + + def validate_endpoint_origins( token_endpoint: str, device_authorization_endpoint: str, @@ -177,7 +195,16 @@ def validate_endpoint_origins( * the two credential endpoints must share a single origin (they are always co-located on the authorization server per RFC 8628); and * when the ``issuer`` is known independently (passed explicitly or resolved - from the IdP ``.well-known``), both endpoints must belong to it. + from the IdP ``.well-known``), both endpoints must share its **origin**. + + This is an origin-level check: it does **not**, on its own, isolate + path-based multi-tenant realms (e.g. Keycloak issuers + ``https://host/realms/{realm}``, where every realm shares one origin). That + path-scoping is enforced separately in :func:`resolve_config`, and only for + endpoints advertised by the (untrusted) QuestDB ``/settings`` — endpoints + from IdP discovery (the issuer's own ``.well-known``) and caller-explicit + endpoints are authoritative and are not path-restricted (some IdPs, e.g. + Azure AD, legitimately place endpoints outside the issuer path). Pass ``issuer=`` to pin the IdP explicitly when QuestDB advertises the endpoints directly (so a compromised server cannot redirect the token POST). @@ -357,6 +384,33 @@ def resolve_config( 'device_authorization_endpoint=...), or connect to QuestDB over ' 'https so /settings is authenticated.') + # When the credential endpoints came from QuestDB /settings (not the + # caller) and an issuer is pinned out-of-band, require each to sit under the + # issuer's PATH, not merely its origin. Path-based IdPs put every tenant on + # one origin (Keycloak issuers are https://host/realms/{realm}), so the + # origin check alone (validate_endpoint_origins) can't stop a tampered + # /settings from steering the device code / refresh token to a different + # realm on the same host. The issuer is out-of-band, so the server can't + # forge it. Caller-explicit endpoints, and endpoints from IdP discovery (the + # issuer's own .well-known), are authoritative and skip this — some IdPs + # (e.g. Azure AD) legitimately place endpoints outside the issuer path. + if issuer: + for label, url, from_settings in ( + ('token endpoint', token_endpoint, + not explicit_token_endpoint), + ('device-authorization endpoint', + device_authorization_endpoint, not explicit_device_endpoint)): + if url and from_settings and not _endpoint_path_under_issuer( + url, issuer): + raise OidcConfigError( + f'The OIDC {label} advertised by QuestDB /settings ' + f'({url!r}) is not under the pinned issuer ({issuer!r}); ' + 'refusing to send credentials to an endpoint outside the ' + 'trusted issuer (e.g. a different realm on the same host). ' + 'If your IdP places endpoints outside the issuer path, pass ' + 'them explicitly (token_endpoint=..., ' + 'device_authorization_endpoint=...).') + # Fall back to IdP discovery when QuestDB doesn't advertise the device # endpoint (and/or the token endpoint). This contacts the IdP, so it is # held to https/loopback (insecure=False) regardless of the QuestDB flag. diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 338ff0fd..98c6c72a 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -26,6 +26,7 @@ from __future__ import annotations +import os import re import urllib.parse from typing import Any, Dict, Optional @@ -133,6 +134,10 @@ def __init__( self.auth = auth self._insecure = insecure self._ctx = auth._ctx + # Same private CA bundle the auth/REST transport uses, so sender() can + # forward it to the ILP Sender (which has its own TLS stack). getattr + # keeps test doubles that only set _ctx working. + self._ca_bundle = getattr(auth, '_ca_bundle', None) # safe_urlparse validates the port up-front, raising OidcConfigError # (not a bare ValueError) for a malformed one, so the adapters that read # the port stay within the package's typed-error contract. @@ -328,6 +333,21 @@ def sender(self, *, port: Optional[int] = None, 443 if scheme == 'https' else 9000) conf = (f'{scheme}::addr=' f'{self._ilp_addr(self._require_host(), resolved_port)};') + # Forward the private CA bundle (explicit ca_bundle=, else the + # REQUESTS_CA_BUNDLE / SSL_CERT_FILE env vars — same precedence as + # build_ssl_context) to the Sender's own TLS stack as tls_roots, so an + # https Sender against a private-CA QuestDB trusts the same roots the + # REST/IdP paths do. Only a PEM file works here (tls_roots is a file; + # the Sender has no capath equivalent), and only over https. The caller + # can still override via tls_roots=/tls_ca= in **sender_kwargs. + if (scheme == 'https' + and 'tls_roots' not in sender_kwargs + and 'tls_ca' not in sender_kwargs): + ca = (self._ca_bundle + or os.environ.get('REQUESTS_CA_BUNDLE') + or os.environ.get('SSL_CERT_FILE')) + if ca and os.path.isfile(ca): + sender_kwargs['tls_roots'] = ca return Sender.from_conf(conf, token=self.auth.token(), **sender_kwargs) diff --git a/test/test_auth.py b/test/test_auth.py index 84d71f1b..e2f43d74 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -943,6 +943,18 @@ def test_well_known_404_raises_oidc_error(self): OidcDeviceAuth.from_questdb(self.base, issuer=self.base, insecure=True) + def test_connect_forwards_default_interval(self): + # M5: connect(**opts) routes through from_questdb; default_interval must + # be accepted (it previously raised TypeError) and reach the auth. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device'}} + qdb = connect(self.base, insecure=True, eager=False, default_interval=9, + renderer=Renderer(), interactive=True, _clock=FakeClock()) + self.assertEqual(qdb.auth._default_interval, 9) + class TestInsecureSettingsGuard(unittest.TestCase): """ @@ -1014,6 +1026,48 @@ def test_pin_satisfies_guard_over_plaintext(self): insecure=True, issuer='https://evil.example.com') self.assertEqual(cfg.token_endpoint, 'https://evil.example.com/token') + def test_issuer_path_scopes_settings_endpoints(self): + # M1: a tampered /settings advertising a DIFFERENT realm's endpoints on + # the SAME host (Keycloak path-based multi-tenancy) is rejected when the + # issuer is pinned to a specific realm — the origin check alone can't + # catch it because both realms share one origin. + kc = 'https://idp.example.com/realms' + evil = { + 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': + kc + '/EVIL/protocol/openid-connect/token', + 'acl.oidc.device.authorization.endpoint': + kc + '/EVIL/protocol/openid-connect/auth/device'} + with self.assertRaises(OidcConfigError) as cm: + self._resolve(evil, questdb_url='https://qdb.example.com:9000', + issuer=kc + '/prod') + self.assertIn('issuer', str(cm.exception).lower()) + # The pinned realm's own endpoints are accepted. + good = { + 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': + kc + '/prod/protocol/openid-connect/token', + 'acl.oidc.device.authorization.endpoint': + kc + '/prod/protocol/openid-connect/auth/device'} + cfg = self._resolve(good, questdb_url='https://qdb.example.com:9000', + issuer=kc + '/prod') + self.assertEqual(cfg.token_endpoint, + kc + '/prod/protocol/openid-connect/token') + + def test_issuer_path_scope_skips_explicit_endpoints(self): + # Caller-explicit endpoints are trusted and NOT path-checked, so an IdP + # that places endpoints outside the issuer path (e.g. Azure AD) still + # works when the endpoints are passed explicitly. + cfg = self._resolve( + {'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb'}, + questdb_url='https://qdb.example.com:9000', + issuer='https://idp.example.com/realms/prod', + token_endpoint='https://idp.example.com/oauth2/v2.0/token', + device_authorization_endpoint=( + 'https://idp.example.com/oauth2/v2.0/devicecode')) + self.assertEqual(cfg.token_endpoint, + 'https://idp.example.com/oauth2/v2.0/token') + @unittest.skipIf(pd is None, 'pandas not installed') class TestRestAdapter(AuthTestBase): @@ -1164,6 +1218,19 @@ def call(name): self.assertEqual(results.get('b'), ID_TOKEN) self.assertEqual(self.state.device_requests, 1) # no second prompt + def test_fast_path_does_not_write_tokens_field(self): + # M4: the lock-free fast path must be READ-ONLY. Serving a valid token + # from the shared cache must not write self._tokens — only the locked + # slow path (and _store/clear) write it — so the lock-free reader can't + # race a concurrent write (lost update / clear() resurrection). + auth = self.make_auth() + valid = TokenSet(access_token='a', id_token=ID_TOKEN, refresh_token='r', + expires_at=self._clock.now() + 3600) + auth._cache.store(auth.cache_key, valid) + self.assertIsNone(auth._tokens) # nothing published yet + self.assertEqual(auth.token(), ID_TOKEN) # served via the fast path + self.assertIsNone(auth._tokens) # fast path did not write it + class TestAdapters(unittest.TestCase): """Connection adapters: tested via injected fake modules (the real @@ -1307,6 +1374,52 @@ def from_conf(conf, *, token=None, **kw): qdb.sender() self.assertEqual(captured['conf'], 'https::addr=[::1]:9000;') + def test_sender_forwards_ca_bundle_as_tls_roots(self): + # M2: an https Sender must inherit the private CA bundle (as tls_roots) + # so it trusts the same roots as the REST/IdP paths; http does not, and + # an explicit tls_roots= is never overridden. + import tempfile + + def captured_conf_kwargs(url, *, ca_bundle, **sender_kwargs): + auth = _FakeAuth('TKN') + auth._ca_bundle = ca_bundle + qdb = QuestDB(url, auth, insecure=True) + captured = {} + fake = types.ModuleType('questdb.ingress') + + class Sender: + @staticmethod + def from_conf(conf, *, token=None, **kw): + captured['kw'] = kw + return 'S' + + fake.Sender = Sender + with mock.patch.dict(sys.modules, {'questdb.ingress': fake}): + qdb.sender(**sender_kwargs) + return captured['kw'] + + with tempfile.NamedTemporaryFile('w', suffix='.pem', delete=False) as f: + f.write('-----dummy-----') + ca = f.name + try: + # https + a real CA file -> forwarded as tls_roots. + self.assertEqual( + captured_conf_kwargs('https://db.example.com:9000', + ca_bundle=ca).get('tls_roots'), ca) + # http -> never forwarded (TLS roots are irrelevant). + self.assertNotIn( + 'tls_roots', + captured_conf_kwargs('http://db.example.com:9000', + ca_bundle=ca)) + # An explicit tls_roots= wins over the inherited bundle. + self.assertEqual( + captured_conf_kwargs('https://db.example.com:9000', + ca_bundle=ca, + tls_roots='/other/ca.pem').get('tls_roots'), + '/other/ca.pem') + finally: + os.unlink(ca) + def test_psycopg_uses_bare_ipv6_host(self): # psycopg takes host and port separately, so the IPv6 host is passed # WITHOUT brackets (unlike the ILP addr= form). See M5. @@ -1556,6 +1669,22 @@ def test_explicit_constructor_enforces_co_location(self): token_endpoint='https://attacker.example/token', renderer=Renderer()) + def test_endpoint_path_under_issuer(self): + # M1: segment-aware path containment used to isolate path-based realms. + from questdb.auth._discovery import _endpoint_path_under_issuer as under + iss = 'https://idp.example.com/realms/prod' + self.assertTrue(under(iss + '/protocol/openid-connect/token', iss)) + self.assertTrue(under(iss, iss)) # exact path + self.assertTrue(under(iss + '/', iss)) # trailing slash + self.assertFalse(under('https://idp.example.com/realms/EVIL/token', iss)) + self.assertFalse( # not a *segment* prefix: prod != production + under('https://idp.example.com/realms/production/token', iss)) + # A root issuer (no path) constrains the origin only -> any path is in. + self.assertTrue( + under('https://idp.example.com/anything', 'https://idp.example.com')) + self.assertTrue( + under('https://idp.example.com/x', 'https://idp.example.com/')) + class TestCacheKey(unittest.TestCase): def _auth(self, **kw): From bcfebd9b3a2020cd82e874376d22df0685bbfe6b Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 15:01:03 +0100 Subject: [PATCH 27/39] fix: address 4 moderate questdb.auth review findings - _decode_jwt_claims: catch RecursionError so a deeply-nested JWT payload from a hostile/buggy IdP degrades to no-identity instead of crashing token()/refresh with a raw exception (mirrors _http/_questdb guards). - _endpoint_path_under_issuer: reject '.'/'..' path segments (decoding %2e first) so a tampered /settings can't steer the device code and refresh token to a different realm via a path the IdP normalizes after the prefix check passes. - _render: coerce non-string verification_uri/_complete to str/None and harden _safe_link_url, so an untrusted device response can't crash the prompt renderer with a bare TypeError/AttributeError. - OidcDeviceAuth docstring: document that a missing/expired-token caller blocks behind an in-progress interactive sign-in, and that connect( eager=True) (the default) is the mitigation. Adds regression tests for each (full auth suite: 127 pass, 11 skipped). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 16 ++++++++- src/questdb/auth/_discovery.py | 12 +++++++ src/questdb/auth/_render.py | 19 ++++++++--- test/test_auth.py | 60 ++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index a836626f..966bed5c 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -97,7 +97,11 @@ def _decode_jwt_claims(token: Optional[str]) -> Dict[str, Any]: raw = base64.urlsafe_b64decode(payload.encode('ascii')) claims = json.loads(raw) return claims if isinstance(claims, dict) else {} - except (ValueError, binascii.Error, UnicodeDecodeError): + except (ValueError, binascii.Error, UnicodeDecodeError, RecursionError): + # RecursionError: a deeply-nested JSON payload exhausts the decoder's + # stack; it is not a ValueError, so list it explicitly so a hostile or + # buggy token response can't crash token()/refresh with a raw exception + # here (mirrors the guards in _http.get_json / post_form / QuestDB.sql). return {} @@ -126,6 +130,16 @@ class OidcDeviceAuth: token is returned without blocking on another thread's in-progress sign-in. + **Concurrency note.** The serialization lock is held for the whole of an + interactive sign-in (up to the device-code lifetime, ~30 min). A caller + that already holds a *valid* cached token never blocks, but a caller whose + token is missing or expired blocks behind whoever is signing in; if that + sign-in is abandoned, each waiter then re-prompts in turn. When several + threads share one auth object (e.g. a SQLAlchemy / psycopg connection + pool), sign in once up front — :func:`questdb.auth.connect` does this for + you with ``eager=True`` (the default), so the interactive flow runs a + single time on the main thread before the pool opens connections. + .. code-block:: python from questdb.auth import OidcDeviceAuth diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index f55d3aba..b5f60ae9 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -37,6 +37,7 @@ from __future__ import annotations import ssl +import urllib.parse from dataclasses import dataclass from typing import Any, Dict, Optional @@ -171,11 +172,22 @@ def _endpoint_path_under_issuer(endpoint: str, issuer: str) -> bool: redirecting credentials to a different tenant on a path-based multi-tenant IdP (Keycloak issuers are ``https://host/realms/{realm}``), which an origin-only check can't catch. + + A ``.`` / ``..`` path segment is rejected outright: urllib puts the dotted + path on the wire verbatim, but the IdP (or a reverse proxy in front of it) + normalizes it, so ``/realms/prod/../attacker/token`` would satisfy a naive + prefix test yet resolve server-side to a *different* realm — defeating the + very isolation this check exists to provide. Percent-encoded dot segments + (``%2e``) are decoded before the segment scan, since a server may unescape + before normalizing; a legitimate endpoint path never contains dot segments. """ base = (safe_urlparse(issuer)[0].path or '').rstrip('/') if not base: return True ep = safe_urlparse(endpoint)[0].path or '' + decoded_segments = urllib.parse.unquote(ep).split('/') + if '.' in decoded_segments or '..' in decoded_segments: + return False return ep == base or ep.startswith(base + '/') diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py index 121456b8..86dd2338 100644 --- a/src/questdb/auth/_render.py +++ b/src/questdb/auth/_render.py @@ -74,13 +74,20 @@ def detect_interactive() -> bool: def _verification_uri(resp: Dict[str, Any]) -> str: # RFC 8628 uses ``verification_uri``; some IdPs (older Google) use - # ``verification_url``. - return resp.get('verification_uri') or resp.get('verification_url') or '' + # ``verification_url``. The device response is untrusted: coerce to str so a + # non-string value (e.g. a JSON number) can't crash the renderer + # (``re.sub`` / ``html.escape``) with a raw TypeError before the prompt is + # even shown — matching the defensive ``str(user_code)`` at the call sites. + uri = resp.get('verification_uri') or resp.get('verification_url') or '' + return uri if isinstance(uri, str) else '' def _verification_uri_complete(resp: Dict[str, Any]) -> Optional[str]: - return (resp.get('verification_uri_complete') - or resp.get('verification_url_complete')) + # Coerce to str / None for the same untrusted-input reason as + # _verification_uri (a non-string would crash the renderer / _safe_link_url). + uri = (resp.get('verification_uri_complete') + or resp.get('verification_url_complete')) + return uri if isinstance(uri, str) else None def _safe_link_url(url: Optional[str]) -> Optional[str]: @@ -93,7 +100,9 @@ def _safe_link_url(url: Optional[str]) -> Optional[str]: ``data:`` URL that executes in the notebook DOM when clicked (``html.escape`` guards markup, not the URL scheme). """ - if not url: + if not url or not isinstance(url, str): + # A non-string (e.g. a JSON number from an untrusted device response) + # has no scheme to vet and would make urlparse raise; treat it as unsafe. return None try: scheme = urllib.parse.urlparse(url).scheme.lower() diff --git a/test/test_auth.py b/test/test_auth.py index e2f43d74..92d2c3d0 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -549,6 +549,20 @@ def test_overflow_device_timing_fields_do_not_crash(self): auth = self.make_auth() self.assertEqual(auth.token(), ID_TOKEN) + def test_deeply_nested_jwt_payload_does_not_crash(self): + # A hostile/buggy IdP returning an id_token whose payload base64-decodes + # to deeply-nested JSON must not crash token() with a raw RecursionError + # from the best-effort identity decode (RecursionError is not a + # ValueError); the decode degrades to no-identity and the token is still + # returned. See _decode_jwt_claims. + payload = base64.urlsafe_b64encode( + (('[' * 60000) + (']' * 60000)).encode()).rstrip(b'=').decode() + nested = f'aaa.{payload}.sig' + self.state.token_script = [(200, { + 'id_token': nested, 'token_type': 'Bearer', 'expires_in': 3600})] + auth = self.make_auth() + self.assertEqual(auth.token(), nested) + def test_idp_requests_use_configured_timeout(self): # The device-code / poll / refresh POSTs must use the configured # timeout, so a stalled IdP can't pin the acquisition lock for the @@ -1068,6 +1082,24 @@ def test_issuer_path_scope_skips_explicit_endpoints(self): self.assertEqual(cfg.token_endpoint, 'https://idp.example.com/oauth2/v2.0/token') + def test_issuer_path_scope_rejects_dot_segment_traversal(self): + # A tampered /settings can't slip a different realm past the issuer-path + # scope with a '..' segment: '/realms/prod/../EVIL/...' satisfies a naive + # prefix test but the IdP normalizes it to the EVIL realm. The dotted + # path must be rejected (even percent-encoded). See + # _endpoint_path_under_issuer. + kc = 'https://idp.example.com/realms' + for ep in (kc + '/prod/../EVIL/protocol/openid-connect', + kc + '/prod/%2e%2e/EVIL/protocol/openid-connect'): + evil = { + 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': ep + '/token', + 'acl.oidc.device.authorization.endpoint': ep + '/auth/device'} + with self.assertRaises(OidcConfigError) as cm: + self._resolve(evil, questdb_url='https://qdb.example.com:9000', + issuer=kc + '/prod') + self.assertIn('issuer', str(cm.exception).lower()) + @unittest.skipIf(pd is None, 'pandas not installed') class TestRestAdapter(AuthTestBase): @@ -1684,6 +1716,12 @@ def test_endpoint_path_under_issuer(self): under('https://idp.example.com/anything', 'https://idp.example.com')) self.assertTrue( under('https://idp.example.com/x', 'https://idp.example.com/')) + # A '.' / '..' segment (even percent-encoded) is rejected: urllib sends + # the dotted path verbatim and the IdP / proxy normalizes it to a + # DIFFERENT realm, which an origin check can't catch. + self.assertFalse(under(iss + '/../EVIL/protocol/token', iss)) + self.assertFalse(under(iss + '/%2e%2e/EVIL/token', iss)) + self.assertFalse(under(iss + '/./token', iss)) class TestCacheKey(unittest.TestCase): @@ -2037,6 +2075,28 @@ def test_qr_helpers_degrade_without_qrcode(self): self.assertIsNone(_render._qr_ascii('')) self.assertIsNone(_render._qr_data_uri('')) + def test_non_string_verification_uri_does_not_crash(self): + # A hostile/buggy device response with a non-string verification_uri / + # _complete (e.g. a JSON number or list) must not crash the renderer + # with a raw TypeError/AttributeError before the prompt is shown; the + # field is coerced away. See _verification_uri / _safe_link_url. + import io + from questdb.auth._render import ( + format_prompt, TerminalRenderer, JupyterRenderer) + resp = {'user_code': 'WDJB-MJHT', 'verification_uri': 12345, + 'verification_uri_complete': ['not', 'a', 'str'], + 'expires_in': 600, 'interval': 5} + self.assertIn('WDJB-MJHT', format_prompt(resp)) # plain-text path + TerminalRenderer(stream=io.StringIO()).on_prompt(resp) # must not raise + captured = {} + + class _Cap(JupyterRenderer): + def _display(self, html_str): + captured['html'] = html_str + + _Cap().on_prompt(resp) # must not raise + self.assertNotIn(' Date: Fri, 19 Jun 2026 15:47:25 +0100 Subject: [PATCH 28/39] fix: make deeply-nested-JSON auth test robust on Python 3.14 test_deeply_nested_json_raises_oidc_error sent a fixed-depth (100000) JSON body and relied on json.loads raising RecursionError. Python 3.14's C json scanner parses far deeper than 3.13 (and the limit ignores sys.setrecursionlimit), so the body parses and get_json returns instead of raising -> 'OidcError not raised' on the cp314 wheel-test leg. Inject the RecursionError via mock so the test deterministically exercises the get_json / post_form -> OidcError mapping (the actual contract) on every Python version, instead of depending on a version-specific nesting depth. The library guard is unchanged and still correct for <=3.13. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/test_auth.py | 42 ++++++++++++------------------------------ 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/test/test_auth.py b/test/test_auth.py index 92d2c3d0..73d67604 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1896,41 +1896,23 @@ def test_bad_ca_bundle_raises_config_error(self): os.unlink(bad) def test_deeply_nested_json_raises_oidc_error(self): - # Deeply-nested JSON makes json.loads raise RecursionError (not a - # ValueError); get_json / post_form must map it to OidcError rather than - # let it escape the typed-error contract. See M1. + # A RecursionError from json.loads (a deeply-nested JSON body exhausts + # the decoder's stack) must be mapped to OidcError, not escape the + # typed-error contract. The depth at which json actually raises is a + # Python-version detail — the C scanner ignores sys.setrecursionlimit, + # and 3.14 parses far deeper than 3.13, so a fixed-depth body no longer + # raises there — so inject the RecursionError directly to test the + # mapping deterministically across versions. See M1. from questdb.auth import _http - deep = (b'[' * 100000) + (b']' * 100000) - - class _Deep(http.server.BaseHTTPRequestHandler): - def log_message(self, *a): - pass - - def _send(self): - self.send_response(200) - self.send_header('Content-Type', 'application/json') - self.send_header('Content-Length', str(len(deep))) - self.end_headers() - self.wfile.write(deep) - - def do_GET(self): - self._send() - - def do_POST(self): - self.rfile.read(int(self.headers.get('Content-Length', 0))) - self._send() - - srv = http.server.HTTPServer(('127.0.0.1', 0), _Deep) - threading.Thread(target=srv.serve_forever, daemon=True).start() - base = f'http://127.0.0.1:{srv.server_port}' - try: + with _raw_response_server( + 200, 'application/json', b'{"ok": true}') as base, \ + mock.patch.object( + _http.json, 'loads', + side_effect=RecursionError('nesting too deep')): with self.assertRaises(OidcError): _http.get_json(base + '/x', timeout=5) with self.assertRaises(OidcError): _http.post_form(base + '/x', {'a': 'b'}, timeout=5) - finally: - srv.shutdown() - srv.server_close() def test_post_form_non_json_2xx_raises_oidc_error(self): # A 2xx body from the token/device endpoint that isn't JSON (e.g. an From a9c58c8b503e803459d802b547eca755673243ea Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 15:59:46 +0100 Subject: [PATCH 29/39] fix: make questdb.auth clear() reliable across shared-cache instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OidcDeviceAuth.clear() took the per-instance self._lock, but the default MemoryCache is process-global. Two instances with the same cache_key have separate locks, so instance A's in-flight _store could resurrect a token instance B had just cleared — clear()'s 'a concurrent refresh/sign-in can't re-populate the cache' guarantee held only within a single instance. Add a per-key generation counter to MemoryCache: clear() bumps it, and the new store_if_current() drops a write whose captured generation is stale. _obtain_tokens captures the generation before the cache read / IdP round-trip and threads it through _acquire -> _store, so a clear() on any instance sharing the global store invalidates a racing store instead of being undone. Backends without the hooks (NullCache / custom TokenCache) store unconditionally as before, so the public TokenCache interface is unchanged. clear()'s docstring/comment now scope the guarantee honestly (local/process cache reset, not an IdP-side revocation). Adds a deterministic end-to-end regression test (clear() on instance B during instance A's sign-in) plus a unit test for the CAS primitive; verified the end-to-end test fails without the fix. Full auth suite: 129 pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_cache.py | 35 +++++++++++++++++++++++++++ src/questdb/auth/_device.py | 48 ++++++++++++++++++++++++++++++------- test/test_auth.py | 41 ++++++++++++++++++++++++++++++- 3 files changed, 114 insertions(+), 10 deletions(-) diff --git a/src/questdb/auth/_cache.py b/src/questdb/auth/_cache.py index 796611c1..b3e0ff95 100644 --- a/src/questdb/auth/_cache.py +++ b/src/questdb/auth/_cache.py @@ -88,6 +88,12 @@ def clear(self, key: str) -> None: # pragma: no cover # Module-global so that re-running a notebook cell (which constructs a fresh # ``OidcDeviceAuth``) reuses the already-acquired token instead of re-prompting. _MEMORY_STORE: Dict[str, TokenSet] = {} +# Per-key counter bumped on every clear(). store_if_current() uses it to drop a +# write from an acquisition that began before a concurrent clear() — including a +# clear() on a *different* OidcDeviceAuth that shares this process-global store, +# whose per-instance lock does not serialize against this one — so clear() can't +# be silently undone by an in-flight sign-in / refresh. +_MEMORY_GENERATION: Dict[str, int] = {} _MEMORY_LOCK = threading.Lock() @@ -114,6 +120,35 @@ def store(self, key: str, tokens: TokenSet) -> None: def clear(self, key: str) -> None: with _MEMORY_LOCK: _MEMORY_STORE.pop(key, None) + _MEMORY_GENERATION[key] = _MEMORY_GENERATION.get(key, 0) + 1 + + def generation(self, key: str) -> int: + """ + Current clear()-generation for ``key``. + + Captured before an acquisition's IdP round-trip and handed back to + :meth:`store_if_current`, which drops the write if a ``clear()`` bumped + the counter meanwhile (see :meth:`store_if_current`). + """ + with _MEMORY_LOCK: + return _MEMORY_GENERATION.get(key, 0) + + def store_if_current( + self, key: str, tokens: TokenSet, generation: int) -> bool: + """ + Store ``tokens`` only if no :meth:`clear` happened since ``generation``. + + If a concurrent ``clear()`` — on this or any other + :class:`~questdb.auth.OidcDeviceAuth` sharing this process-global store — + bumped the counter after ``generation`` was captured, the write is + dropped (returns ``False``) so the just-cleared entry is not resurrected + with a now-stale token. Returns ``True`` when the token was stored. + """ + with _MEMORY_LOCK: + if _MEMORY_GENERATION.get(key, 0) != generation: + return False + _MEMORY_STORE[key] = replace(tokens) + return True class NullCache(TokenCache): diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 966bed5c..71060b17 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -360,8 +360,12 @@ def cache_key(self) -> str: def clear(self) -> None: """Forget the cached token (forces a fresh sign-in next time).""" - # Serialize against acquisition so a concurrent refresh/sign-in can't - # re-populate the cache right after we clear it. + # self._lock serializes against THIS instance's acquisition; the shared + # MemoryCache additionally bumps a per-key generation here, so an + # in-flight acquisition on ANOTHER OidcDeviceAuth that shares the + # process-global store can't repopulate the entry after this clear (its + # _store sees the bumped generation and drops the write). This resets the + # local / process cache only — it does not revoke the token at the IdP. with self._lock: self._tokens = None self._cache.clear(self.cache_key) @@ -425,6 +429,12 @@ def _obtain_tokens(self) -> TokenSet: # overlapping refreshes or double-prompt; the loser re-checks and # reuses the winner's freshly acquired token. with self._lock: + # Capture the cache generation before reading or acquiring, so a + # clear() that races this acquisition — including one on another + # OidcDeviceAuth that shares the process-global MemoryCache (whose + # per-instance lock does not serialize against ours) — invalidates + # the store below instead of resurrecting the just-cleared entry. + generation = self._cache_generation() # Promote a cached token into the field under the lock (even an # expired one, so _acquire can reuse its refresh_token for a silent # refresh). Done here, not on the lock-free fast path, so every @@ -436,7 +446,7 @@ def _obtain_tokens(self) -> TokenSet: tokens = self._valid_cached() if tokens is not None: return tokens - return self._acquire() + return self._acquire(generation) def _valid_cached(self) -> Optional[TokenSet]: # Read-only: reads the published field, falling back to a read of the @@ -451,9 +461,11 @@ def _valid_cached(self) -> Optional[TokenSet]: return tokens return None - def _acquire(self) -> TokenSet: + def _acquire(self, generation: int) -> TokenSet: # Called while holding self._lock. Try a silent refresh, else run the - # interactive device flow. + # interactive device flow. `generation` was captured before the cache + # read in _obtain_tokens; _store drops its write if a concurrent clear() + # has bumped it since (see _store / _cache_generation). tokens = self._tokens if tokens is not None and tokens.refresh_token: try: @@ -476,16 +488,34 @@ def _acquire(self) -> TokenSet: # a response is unusable, so fall through to the interactive # flow rather than caching it and looping on every call. if self._has_required_token(refreshed): - self._store(refreshed) + self._store(refreshed, generation) return refreshed fresh = self._run_device_flow() - self._store(fresh) + self._store(fresh, generation) return fresh - def _store(self, tokens: TokenSet) -> None: + def _store(self, tokens: TokenSet, generation: int) -> None: + # self._tokens is this instance's own view, so always set it — the + # caller uses the token it just acquired. The shared-cache write is + # conditional: a clear() (here or on another instance sharing the + # process-global store) that bumped the generation since it was captured + # drops the write, so clear() is not silently undone. Backends without + # generation support (NullCache / a custom TokenCache) store + # unconditionally, exactly as before. self._tokens = tokens - self._cache.store(self.cache_key, tokens) + store_if_current = getattr(self._cache, 'store_if_current', None) + if store_if_current is not None: + store_if_current(self.cache_key, tokens, generation) + else: + self._cache.store(self.cache_key, tokens) + + def _cache_generation(self) -> int: + # MemoryCache tracks a per-key clear()-generation for the cross-instance + # CAS in _store; other backends don't, so default to 0 (the store is + # then unconditional, matching the pre-existing behavior). + generation = getattr(self._cache, 'generation', None) + return generation(self.cache_key) if generation is not None else 0 def _tokenset_from_response(self, body: Dict[str, Any]) -> TokenSet: try: diff --git a/test/test_auth.py b/test/test_auth.py index 73d67604..0f79bc1e 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -64,7 +64,8 @@ OidcNetworkError, TokenSet, ) -from questdb.auth._cache import MemoryCache, _MEMORY_STORE # noqa: E402 +from questdb.auth._cache import ( # noqa: E402 + MemoryCache, _MEMORY_GENERATION, _MEMORY_STORE) from questdb.auth._render import Renderer # noqa: E402 try: @@ -303,6 +304,7 @@ def __init__(self): class AuthTestBase(unittest.TestCase): def setUp(self): _MEMORY_STORE.clear() + _MEMORY_GENERATION.clear() self.server = _MockServer() self.state = self.server.state self.thread = threading.Thread( @@ -1263,6 +1265,43 @@ def test_fast_path_does_not_write_tokens_field(self): self.assertEqual(auth.token(), ID_TOKEN) # served via the fast path self.assertIsNone(auth._tokens) # fast path did not write it + def test_clear_on_other_instance_survives_inflight_acquire(self): + # Two OidcDeviceAuth instances share the process-global MemoryCache + # (same cache_key) but have separate per-instance locks. If instance B + # clears the entry while instance A's sign-in is in flight, A's store + # must NOT resurrect it: the per-key generation A captured before its + # round-trip no longer matches, so the write is dropped and the cache + # stays cleared (the next fresh load re-prompts, honoring clear()). A + # still returns the token it just acquired. See store_if_current. + a = self.make_auth() + b = self.make_auth() + self.assertEqual(a.cache_key, b.cache_key) + + class _ClearMidFlow(Renderer): + def on_prompt(self, resp): + b.clear() # concurrent clear during A's sign-in + + a._renderer = _ClearMidFlow() + self.assertEqual(a.token(), ID_TOKEN) # A still gets its token + # A's store was dropped, so the shared cache is NOT repopulated; a fresh + # instance therefore re-signs in rather than reusing the cleared token. + self.assertNotIn(a.cache_key, _MEMORY_STORE) + + def test_store_if_current_drops_write_after_concurrent_clear(self): + # Unit cover for the CAS primitive the cross-instance guard relies on: + # a generation captured before a clear() must not be allowed to store. + cache = MemoryCache() + key = 'k' + gen = cache.generation(key) # captured before clear + cache.clear(key) # concurrent clear + self.assertFalse( + cache.store_if_current(key, TokenSet(access_token='T1'), gen)) + self.assertIsNone(cache.load(key)) # write dropped + gen2 = cache.generation(key) # unraced store succeeds + self.assertTrue( + cache.store_if_current(key, TokenSet(access_token='T2'), gen2)) + self.assertIsNotNone(cache.load(key)) + class TestAdapters(unittest.TestCase): """Connection adapters: tested via injected fake modules (the real From 7ed6f05946e2d26d7c4fd4155715b28fd4296bab Mon Sep 17 00:00:00 2001 From: glasstiger Date: Fri, 19 Jun 2026 16:04:01 +0100 Subject: [PATCH 30/39] fix: tolerate non-string acl.oidc.* from QuestDB /settings and IdP discovery resolve_config read acl.oidc.scope / client.id / audience from /settings, and the token / device / authorization endpoints and issuer from the IdP .well-known discovery document, without a type check. A non-string value (a JSON list/number from a buggy or tampered server/IdP) flowed through to scope.split(), safe_urlparse() and the cache-key '\x1f'.join, escaping the typed-error contract with a bare AttributeError / TypeError. Add _str_setting() (mirroring _resolve_endpoint, which already drops a non-string endpoint): accept a non-empty string, else treat as absent. Apply it to both the /settings reads and the IdP-discovery doc values. scope falls back to 'openid', audience/issuer drop to None, and a non-string client.id or discovered endpoint surfaces the existing clear OidcConfigError instead of crashing later. Adds unit + end-to-end regression tests for both the /settings and discovery paths; verified each fails without the fix. Full auth suite: 132 pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 40 ++++++++++++++---- test/test_auth.py | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 7 deletions(-) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index b5f60ae9..4fea20c5 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -88,6 +88,20 @@ def _as_bool(value: Any, default: Optional[bool] = None) -> Optional[bool]: return default +def _str_setting(value: Any) -> Optional[str]: + """ + A ``/settings`` value as a non-empty string, else ``None``. + + ``/settings`` is server-controlled (and tamperable over a plaintext insecure + channel). A non-string ``acl.oidc.*`` value — a JSON list/number from a buggy + or hostile server — must not reach ``scope.split()`` or the cache-key join as + a raw object, where it would escape the package's typed-error contract with a + bare ``AttributeError`` / ``TypeError``. Mirrors :func:`_resolve_endpoint`, + which already drops a non-string endpoint. + """ + return value if isinstance(value, str) and value else None + + def settings_config(settings: Any) -> Dict[str, Any]: """ Return the trusted config map from a ``/settings`` response. @@ -340,18 +354,22 @@ def resolve_config( f'QuestDB at {questdb_url} reports OIDC is disabled ' f'({_K_ENABLED}=false). Nothing to authenticate against.') - client_id = client_id or cfg.get(_K_CLIENT_ID) + # _str_setting drops a non-string /settings value (e.g. a JSON list) so it + # can't reach scope.split() / the cache-key join as a raw object and escape + # the typed-error contract; a non-string client.id thus reads as absent and + # surfaces the clear "Missing client_id" error below. + client_id = client_id or _str_setting(cfg.get(_K_CLIENT_ID)) if not client_id: raise OidcConfigError( 'Missing OIDC client_id. QuestDB did not advertise ' f'{_K_CLIENT_ID!r} via /settings; pass client_id=... explicitly.') if scope is None: - scope = cfg.get(_K_SCOPE) or 'openid' + scope = _str_setting(cfg.get(_K_SCOPE)) or 'openid' if groups_in_token is None: groups_in_token = _as_bool(cfg.get(_K_GROUPS_IN_TOKEN), default=True) if audience is None: - audience = cfg.get(_K_AUDIENCE) or None + audience = _str_setting(cfg.get(_K_AUDIENCE)) # Track which credential endpoints the caller supplied directly. Those are # trusted; endpoints learned from /settings are only as trustworthy as the @@ -451,13 +469,21 @@ def resolve_config( doc = discover_device_endpoint_from_idp( issuer=issuer, discovery_url=discovery_url, ctx=ctx, insecure=False, timeout=timeout) + # The IdP discovery document is untrusted too: coerce its values the + # same way as /settings values. A non-string endpoint / issuer (a JSON + # number/list from a buggy or hostile IdP) must read as absent — the + # clear "could not resolve" OidcConfigError below, or no issuer pin — + # rather than reach safe_urlparse / the cache-key join as a raw object + # and escape the typed-error contract with a bare AttributeError. device_authorization_endpoint = ( device_authorization_endpoint - or doc.get('device_authorization_endpoint')) - token_endpoint = token_endpoint or doc.get('token_endpoint') + or _str_setting(doc.get('device_authorization_endpoint'))) + token_endpoint = ( + token_endpoint or _str_setting(doc.get('token_endpoint'))) authorization_endpoint = ( - authorization_endpoint or doc.get('authorization_endpoint')) - issuer = issuer or doc.get('issuer') + authorization_endpoint + or _str_setting(doc.get('authorization_endpoint'))) + issuer = issuer or _str_setting(doc.get('issuer')) if not token_endpoint: raise OidcConfigError( diff --git a/test/test_auth.py b/test/test_auth.py index 0f79bc1e..56ec2e8c 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1627,6 +1627,81 @@ def test_resolve_endpoint_ignores_non_string(self): self.assertIsNone(_resolve_endpoint(8080, {})) self.assertIsNone(_resolve_endpoint(True, {})) + def test_str_setting_ignores_non_string(self): + # A non-empty string passes through; anything else (a JSON list / + # number / dict, None, empty string) reads as absent so it can't reach + # scope.split() / the cache-key join as a raw object. + from questdb.auth._discovery import _str_setting + self.assertEqual(_str_setting('openid email'), 'openid email') + for bad in (['openid'], 12345, {'x': 1}, True, '', None): + self.assertIsNone(_str_setting(bad)) + + def test_non_string_settings_do_not_crash_resolution(self): + # A buggy/tampered /settings advertising non-string acl.oidc.* values + # must stay within the typed-error contract instead of crashing later + # with a bare AttributeError / TypeError (scope.split() / the cache-key + # join). scope falls back to 'openid', audience drops to None, and a + # non-string client.id reads as absent -> clear OidcConfigError. + from questdb.auth import _discovery + base = { + 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': 'https://idp.example.com/token', + 'acl.oidc.device.authorization.endpoint': + 'https://idp.example.com/device'} + + def from_settings(settings): + with mock.patch.object(_discovery, 'fetch_settings', + return_value=settings): + return OidcDeviceAuth.from_questdb( + 'https://qdb.example.com:9000', renderer=Renderer()) + + auth = from_settings({**base, 'acl.oidc.scope': ['openid', 'groups'], + 'acl.oidc.audience': {'x': 1}}) + self.assertEqual(auth.config.scope, 'openid') # non-string -> default + self.assertIsNone(auth.config.audience) # non-string -> dropped + self.assertTrue(auth.cache_key) # crash site now safe + # A non-string client.id reads as absent -> clear typed error. + with self.assertRaises(OidcConfigError): + from_settings({**base, 'acl.oidc.client.id': 12345}) + + def test_non_string_idp_discovery_values_do_not_crash(self): + # The IdP .well-known discovery document is untrusted too: a non-string + # endpoint / issuer (a JSON number/list from a buggy or hostile IdP) + # must read as absent -> a clear OidcConfigError, not a bare + # AttributeError from safe_urlparse later. See resolve_config discovery. + from questdb.auth import _discovery + settings = {'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb'} + + def from_discovery(well_known, **kw): + with mock.patch.object(_discovery, 'fetch_settings', + return_value=settings), \ + mock.patch.object( + _discovery, 'discover_device_endpoint_from_idp', + return_value=well_known): + return OidcDeviceAuth.from_questdb( + 'https://qdb.example.com:9000', renderer=Renderer(), **kw) + + # Non-string token / device endpoint -> absent -> clear typed error. + with self.assertRaises(OidcConfigError): + from_discovery( + {'device_authorization_endpoint': 'https://idp.example.com/device', + 'token_endpoint': 12345}, + issuer='https://idp.example.com') + with self.assertRaises(OidcConfigError): + from_discovery( + {'device_authorization_endpoint': ['nope'], + 'token_endpoint': 'https://idp.example.com/token'}, + issuer='https://idp.example.com') + # A non-string discovered issuer is dropped (no pin); valid endpoints + # still resolve and the cache key builds (the former crash site). + auth = from_discovery( + {'device_authorization_endpoint': 'https://idp.example.com/device', + 'token_endpoint': 'https://idp.example.com/token', + 'issuer': ['not', 'a', 'string']}, + discovery_url='https://idp.example.com/.well-known/openid-configuration') + self.assertIsNone(auth.config.issuer) + self.assertTrue(auth.cache_key) + def test_resolve_endpoint_relative_path_without_host_is_none(self): # A path-only endpoint with no acl.oidc.host can't be resolved; it must # be treated as absent (None) so resolution fails with a clear "could From 63a3f7aa53fd2c55168e196ddabac335036e6360 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Sun, 21 Jun 2026 21:04:54 +0100 Subject: [PATCH 31/39] fix: keep device-flow poll alive through transient IdP errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RFC 8628 poll loop aborted the whole sign-in on any transient failure during polling — a dropped connection, DNS blip, or per-request timeout (OidcNetworkError), an HTML 502/503/504 from a proxy/LB in front of the IdP (a bare OidcError from post_form), or a 5xx/429 carrying a JSON body (which hit the terminal error branch). Because the device flow targets flaky remote kernels and the failure lands after the user has already authorized in the browser, a single blip discarded a completed sign-in and forced a full restart. Treat these as transient and keep polling until the device-code deadline (RFC 8628 section 3.4): wrap the per-poll request in try/except OidcError -> continue, and continue on a 5xx/429 status, backing off the interval on a 429 rate-limit. Success, authorization_pending, slow_down, expired_token, and genuine OAuth rejections are unchanged; the deadline still bounds the total wait. Add regression tests covering a transient network error mid-poll and a 503 -> 429 -> 200 sequence (both fail against the pre-fix code). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 36 ++++++++++++++++++++++++++------- test/test_auth.py | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 71060b17..41a07ef7 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -659,13 +659,26 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: # interval still shouldn't overshoot a short-lived code. self._sleep(min(interval, remaining)) - status, body = self._idp_post( - self.config.token_endpoint, - { - 'grant_type': DEVICE_CODE_GRANT, - 'device_code': device_code, - 'client_id': self.config.client_id, - }) + try: + status, body = self._idp_post( + self.config.token_endpoint, + { + 'grant_type': DEVICE_CODE_GRANT, + 'device_code': device_code, + 'client_id': self.config.client_id, + }) + except OidcError: + # Transient failure mid-poll, not a terminal OAuth decision: a + # dropped connection / DNS blip / per-request timeout + # (OidcNetworkError), or a non-2xx response with a non-JSON body + # such as an HTML 502/503/504 from a proxy or load balancer in + # front of the IdP (a bare OidcError from post_form). The user + # may already have authorized in the browser, and RFC 8628 §3.4 + # expects polling to continue until the device code expires, so + # poll again instead of discarding the in-progress sign-in. The + # deadline check at the top of the loop bounds the total wait; a + # genuine rejection arrives as a JSON error body (handled below). + continue if status == 200: # A 200 is the RFC 6749 §5.1 token response: the grant @@ -686,6 +699,15 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: 'token this server requires.') raise self._missing_required_token_error() + # A 5xx or 429 that did carry a JSON body is also transient (a + # server-side error or a rate-limit) rather than a terminal OAuth + # rejection: back off on a rate-limit and keep polling until the + # deadline, matching the connection-failure handling above. + if status >= 500 or status == 429: + if status == 429: + interval = min(_MAX_POLL_INTERVAL, interval + 5) + continue + error = body.get('error') if error == 'authorization_pending': continue diff --git a/test/test_auth.py b/test/test_auth.py index 56ec2e8c..6d604e08 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -369,6 +369,46 @@ def test_slow_down_backs_off(self): # interval starts at 5, +5 after slow_down. self.assertEqual(self._clock.sleeps, [5, 10]) + def test_transient_network_error_during_poll_keeps_polling(self): + # A dropped connection / DNS blip / timeout on a single poll must not + # abort a sign-in the user may already have completed in the browser: + # the loop keeps polling until the deadline (RFC 8628 §3.4). M1. + self.state.token_script = [(200, None)] # success once actually polled + auth = self.make_auth() + real_idp_post = auth._idp_post + token_polls = {'n': 0} + + def flaky(url, form): + # Fail only the first poll of the token endpoint; pass the device- + # code request and later polls through to the real transport. + if url == auth.config.token_endpoint: + token_polls['n'] += 1 + if token_polls['n'] == 1: + raise OidcNetworkError('connection reset mid-poll') + return real_idp_post(url, form) + + auth._idp_post = flaky + self.assertEqual(auth.token(), ID_TOKEN) + # First poll raised (transient, retried); second poll reached the IdP. + self.assertEqual(token_polls['n'], 2) + self.assertEqual(len(self.state.token_requests), 1) + self.assertEqual(self._clock.sleeps, [5, 5]) + + def test_transient_5xx_and_429_during_poll_keep_polling(self): + # A 5xx server error or a 429 rate-limit (even carrying a JSON body) is + # transient, not a terminal OAuth rejection: keep polling, backing off + # on the rate-limit. M1. + self.state.token_script = [ + (503, {'error': 'server_error'}), + (429, {'error': 'slow_down'}), + (200, None), + ] + auth = self.make_auth() + self.assertEqual(auth.token(), ID_TOKEN) + self.assertEqual(len(self.state.token_requests), 3) + # 503 polled at the base interval; 429 bumps the interval by 5. + self.assertEqual(self._clock.sleeps, [5, 5, 10]) + def test_timeout_when_never_authorized(self): self.state.device_response = { 'device_code': 'DEV-CODE', 'user_code': 'X', From 954f8a87499279dced16cbfc7675f83f60d4a79a Mon Sep 17 00:00:00 2001 From: glasstiger Date: Sun, 21 Jun 2026 21:09:35 +0100 Subject: [PATCH 32/39] fix: map malformed IdP discovery / /exec payloads to OidcError Two server-payload paths escaped the package's typed-error contract with a raw exception instead of an OidcError: * A valid-JSON-but-not-an-object IdP discovery document (a list/null/ number/string from a captive portal, a misconfigured proxy, or a hostile IdP) reached resolve_config's doc.get(...) calls and raised a bare AttributeError. discover_device_endpoint_from_idp now coerces a non-dict document to {} so resolution fails with the clear 'could not resolve the ... endpoint' OidcConfigError, mirroring settings_config. * A /exec column descriptor with a non-hashable name (a JSON list/object) plus a TIMESTAMP/DATE type raised 'TypeError: unhashable type' from 'name in df.columns' during timestamp coercion. _exec_json_to_df now rejects a non-string column name in its columns guard, and also catches TypeError (not only ValueError) from the DataFrame constructor. Both surface as OidcError now; add regression tests (each fails against the pre-fix code with the raw AttributeError / TypeError). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 11 ++++++++++- src/questdb/auth/_questdb.py | 16 +++++++++++----- test/test_auth.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 4fea20c5..21e3c8db 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -319,7 +319,16 @@ def discover_device_endpoint_from_idp( 'Cannot discover the IdP device-authorization endpoint: no issuer ' 'or discovery_url was given. Pass issuer=... (or ' 'device_authorization_endpoint=... to skip discovery).') - return get_json(url, ctx=ctx, insecure=insecure, timeout=timeout) + doc = get_json(url, ctx=ctx, insecure=insecure, timeout=timeout) + # get_json guarantees valid JSON, not a JSON *object*. A discovery document + # that is valid-JSON-but-not-a-dict (a list/null/number/string from a + # captive portal, a misconfigured proxy, or a hostile IdP) must not reach + # resolve_config's doc.get(...) calls as a raw object, where it would escape + # the package's typed-error contract with a bare AttributeError. Coerce it + # to empty so resolution fails with the clear "could not resolve the ... + # endpoint" OidcConfigError instead — mirroring settings_config, which + # applies the same guard to the QuestDB /settings response. + return doc if isinstance(doc, dict) else {} def resolve_config( diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 98c6c72a..a2186442 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -72,11 +72,15 @@ def _import_pandas(): def _exec_json_to_df(data: Dict[str, Any], pandas): columns = data.get('columns') or [] # /exec returns a list of {"name", "type"} column descriptors. A malformed - # response (a non-list, or entries that aren't objects) must surface as a - # clean OidcError, not an AttributeError from .get() escaping the package's - # typed-error contract. + # response — a non-list, entries that aren't objects, or a non-string name — + # must surface as a clean OidcError, not a raw AttributeError from .get(), + # nor a TypeError from `name in df.columns` below when a name is + # non-hashable (a JSON list/object), escaping the package's typed-error + # contract. A real QuestDB column name is always a string. if not isinstance(columns, list) or not all( - isinstance(c, dict) for c in columns): + isinstance(c, dict) + and isinstance(c.get('name'), (str, type(None))) + for c in columns): raise OidcError( 'QuestDB /exec returned a malformed "columns" field; ' 'cannot build a DataFrame.') @@ -86,7 +90,9 @@ def _exec_json_to_df(data: Dict[str, Any], pandas): dataset = data.get('data') or [] try: df = pandas.DataFrame(dataset, columns=names or None) - except ValueError as e: + except (ValueError, TypeError) as e: + # TypeError too: a hostile/malformed dataset shape can make the pandas + # constructor raise it (not only ValueError); keep it within OidcError. raise OidcError( f'Unexpected shape in QuestDB /exec response: {e}') from e for col in columns: diff --git a/test/test_auth.py b/test/test_auth.py index 6d604e08..ca35d723 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -924,6 +924,22 @@ def test_missing_device_endpoint_raises(self): OidcDeviceAuth.from_questdb(self.base, issuer=self.base, insecure=True) + def test_non_dict_well_known_doc_raises_config_error(self): + # M2: an IdP discovery document that is valid JSON but not an object + # (a list/null/number/string from a captive portal, a misconfigured + # proxy, or a hostile IdP) must surface as a typed OidcConfigError, not + # a raw AttributeError from doc.get(...). issuer= is pinned so the + # fallback is allowed; the doc's shape is the error under test. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.token.endpoint': self.base + '/token', + }} + self.state.well_known = [] # valid JSON, but not an object + with self.assertRaises(OidcConfigError): + OidcDeviceAuth.from_questdb(self.base, issuer=self.base, + insecure=True) + def test_malformed_endpoint_port_raises_config_error(self): # /settings advertising a non-integer port in an endpoint must raise # OidcConfigError (the typed contract), not a bare ValueError that @@ -1235,6 +1251,20 @@ def test_sql_non_dict_columns_raises_oidc_error(self): qdb.sql('SELECT 1') self.assertNotIsInstance(cm.exception, OidcAuthError) + def test_sql_non_string_column_name_raises_oidc_error(self): + # M2: a column descriptor with a non-hashable name (a JSON list/object) + # and a TIMESTAMP/DATE type must raise a clean OidcError, not a raw + # TypeError ("unhashable type") from `name in df.columns` during the + # timestamp coercion. + qdb = self._connected() + self.state.exec_response = { + 'columns': [{'name': ['evil'], 'type': 'TIMESTAMP'}, + {'name': 'b', 'type': 'LONG'}], + 'dataset': [['2021-01-01T00:00:00.000000Z', 2]]} + with self.assertRaises(OidcError) as cm: + qdb.sql('SELECT 1') + self.assertNotIsInstance(cm.exception, OidcAuthError) + class TestConcurrency(AuthTestBase): def test_valid_cached_token_does_not_block_during_signin(self): From 6e2970bc295399f90920c44b2d4094fae7f50acd Mon Sep 17 00:00:00 2001 From: glasstiger Date: Sun, 21 Jun 2026 23:18:43 +0100 Subject: [PATCH 33/39] fix: sanitize untrusted device fields on the Jupyter prompt path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _strip_control (control / bidi / zero-width removal) was applied only on the terminal renderer. The Jupyter renderer — the module's primary surface — relied on html.escape alone, which neutralizes markup but NOT a U+202E bidi override or zero-width chars. A hostile or MITM'd IdP could put such chars in user_code, the verification URI, the JWT-derived identity, or error_description to visually spoof the sign-in prompt in the notebook DOM (the exact attack _strip_control exists to prevent). Route every untrusted Jupyter field through _strip_control before html.escape: factor the shared header/link/code into _prompt_head() (so on_prompt and _render_with_status can't diverge), and strip identity in on_success and the message in on_failure. Also complete _CONTROL_CHARS with the format/bidi code points it implied but missed (U+00AD, U+061C, U+115F, U+180E, U+2060-2064, U+FFF9-FFFB) and correct the _strip_control docstring claim that html-escaping suffices for Jupyter. Add a Jupyter-path regression test and extend the strip test to the new code points (both fail against the pre-fix code; the former shows U+202E reaching the rendered DOM). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_render.py | 73 ++++++++++++++++++++++--------------- test/test_auth.py | 38 ++++++++++++++++++- 2 files changed, 80 insertions(+), 31 deletions(-) diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py index 86dd2338..283153b7 100644 --- a/src/questdb/auth/_render.py +++ b/src/questdb/auth/_render.py @@ -128,13 +128,17 @@ def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: f'rel="noopener noreferrer">{label}') -# C0/C1 control chars (incl. ESC, which drives ANSI escape sequences) plus the -# Unicode bidi-control, zero-width and line/paragraph-separator ranges. All can -# spoof a terminal prompt: U+202E (RIGHT-TO-LEFT OVERRIDE) reverses displayed -# text to disguise a URL's host; U+2028/U+2029 inject fake lines; zero-width -# chars hide content. Stripped from untrusted device-response fields. +# C0/C1 control chars (incl. ESC, which drives ANSI escape sequences), the +# Unicode bidi controls, the zero-width / invisible-format chars, the +# line/paragraph separators and the interlinear-annotation controls. All can +# spoof a prompt: U+202E (RIGHT-TO-LEFT OVERRIDE) reverses displayed text to +# disguise a URL's host; U+2028/U+2029 inject fake lines; zero-width / invisible +# chars hide or join content. Stripped from untrusted device-response fields on +# BOTH the terminal and the Jupyter path (html.escape neutralizes markup, not +# these). Covers the dangerous Unicode Cc/Cf code points for our inputs. _CONTROL_CHARS = re.compile( - r'[\x00-\x1f\x7f-\x9f\u200b-\u200f\u2028-\u202e\u2066-\u2069\ufeff]') + r'[\x00-\x1f\x7f-\x9f\u00ad\u061c\u115f\u180e\u200b-\u200f' + r'\u2028-\u202e\u2060-\u2064\u2066-\u2069\ufeff\ufff9-\ufffb]') def _strip_control(text: Optional[str]) -> str: @@ -147,8 +151,9 @@ def _strip_control(text: Optional[str]) -> str: a hostile or MITM'd response inject ANSI escape sequences (C0/C1 control chars — cursor moves, screen clears) or Unicode bidi overrides / zero-width / line separators to spoof the prompt or hide the real sign-in URL (e.g. - U+202E visually reverses the displayed host). The Jupyter renderer - html-escapes its output; the plain-text path needs this. + U+202E visually reverses the displayed host). Needed on BOTH paths: the + plain-text terminal path (raw bytes to the TTY) and the Jupyter path — + ``html.escape`` neutralizes markup, not bidi/zero-width spoofing. """ if not text: return '' @@ -253,11 +258,25 @@ def _panel(self, body: str) -> str: 'padding:12px 16px;font-family:sans-serif;max-width:520px">' + body + '') - def on_prompt(self, resp: Dict[str, Any]) -> None: - self._resp = resp - uri = _verification_uri(resp) - code = html.escape(str(resp.get('user_code', ''))) + def _prompt_head(self): + """Header + sanitized verification link and user code. + + Shared by :meth:`on_prompt` and :meth:`_render_with_status` so the + sanitization can't be applied to one path and forgotten on the other. + ``verification_uri`` / ``user_code`` / ``verification_uri_complete`` are + untrusted device-response fields: strip control / bidi / zero-width + chars (which ``html.escape`` does NOT remove) before rendering, so a + hostile or MITM'd response can't inject a U+202E bidi override or + zero-width chars to visually spoof the prompt in the notebook DOM. + ``_render_link`` additionally html-escapes and scheme-vets the URL. + Returns ``(body, uri, complete)`` — the sanitized URLs are handed back + so the QR target isn't re-derived (and re-sanitized). + """ + resp = self._resp + uri = _strip_control(_verification_uri(resp)) + code = html.escape(_strip_control(str(resp.get('user_code', '')))) complete = _verification_uri_complete(resp) + complete = _strip_control(complete) if complete else None body = [ '
' '🔐 Sign in to QuestDB
', @@ -270,6 +289,11 @@ def on_prompt(self, resp: Dict[str, Any]) -> None: '
' + _render_link( complete, text='Click here to authorize directly →') + '
') + return body, uri, complete + + def on_prompt(self, resp: Dict[str, Any]) -> None: + self._resp = resp + body, uri, complete = self._prompt_head() if self._qr: qr_target = _safe_link_url(complete) or _safe_link_url(uri) data_uri = _qr_data_uri(qr_target) if qr_target else None @@ -292,7 +316,9 @@ def on_waiting(self, seconds_left: float) -> None: color='#888') def on_success(self, identity: Optional[str], expires_in: float) -> None: - who = html.escape(identity) if identity else '' + # identity is derived from the (untrusted) JWT claims — strip control / + # bidi chars before html-escaping, as for the other rendered fields. + who = html.escape(_strip_control(identity)) if identity else '' mins = max(1, int(round(expires_in / 60))) suffix = f' as {who}' if who else '' self._render_with_status( @@ -300,25 +326,12 @@ def on_success(self, identity: Optional[str], expires_in: float) -> None: color='#2e7d32') def on_failure(self, message: str) -> None: - self._render_with_status('❌ ' + html.escape(message), color='#c62828') + # message may interpolate the IdP's (untrusted) error_description. + self._render_with_status( + '❌ ' + html.escape(_strip_control(message)), color='#c62828') def _render_with_status(self, status_html: str, color: str) -> None: - resp = self._resp - uri = _verification_uri(resp) - code = html.escape(str(resp.get('user_code', ''))) - complete = _verification_uri_complete(resp) - body = [ - '
' - '🔐 Sign in to QuestDB
', - f'
Open {_render_link(uri)} and enter code:
', - f'
{code}
', - ] - if _safe_link_url(complete): - body.append( - '
' + _render_link( - complete, text='Click here to authorize directly →') - + '
') + body, _uri, _complete = self._prompt_head() body.append( f'
{status_html}
') self._display(self._panel(''.join(body))) diff --git a/test/test_auth.py b/test/test_auth.py index ca35d723..17553051 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -2185,6 +2185,40 @@ def _display(self, html_str): # avoid importing IPython self.assertIn(' Date: Sun, 21 Jun 2026 23:36:44 +0100 Subject: [PATCH 34/39] fix: bind discovery_url pin to the IdP origin; close auth test gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M4: discovery_url= is documented as an out-of-band IdP pin, but when it was supplied without issuer=, resolve_config trusted the discovery document's self-declared issuer as the anchor that validate_endpoint_ origins checks the endpoints against. A hostile/confused/multi-tenant discovery host (or one returning no/absent issuer) could declare attacker endpoints all on one origin and pass the co-location + issuer-origin checks vacuously, redirecting the device-code / refresh-token POSTs. Anchor to the caller-pinned discovery_url itself instead: require the discovered credential endpoints to share its origin (OIDC Discovery §4.3 / RFC 8414 §3). This also closes the absent/non-string-issuer variant, and keeps the legitimate case (endpoints on the discovery origin, issuer dropped) working. M5: close auth test-coverage gaps flagged in review: * 401/403 -> OidcAuthError mapping in QuestDB.sql now has a pandas-independent test (new TestRestAdapterAuthErrors, registered in test.py) so it runs on every CI leg, not only where pandas is installed (the status check precedes any DataFrame build); adds the previously-missing 403 case. * test_concurrent_signin_prompts_only_once now asserts both threads finished (is_alive() == False) so a deadlock regression fails loudly instead of leaking a hung thread and passing on a stale result. * adds coverage for the IdP returning error=expired_token during the poll (distinct from the local deadline) and for a rotated refresh token being stored. Adds a regression test for the M4 fix (off-origin endpoints via a pinned discovery_url are refused; fails against the pre-fix code). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 30 +++++++++++ test/test.py | 1 + test/test_auth.py | 93 ++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 21e3c8db..de95b58f 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -492,6 +492,36 @@ def resolve_config( authorization_endpoint = ( authorization_endpoint or _str_setting(doc.get('authorization_endpoint'))) + # OIDC Discovery §4.3 / RFC 8414 §3: when the IdP is pinned ONLY by + # discovery_url (no out-of-band issuer=), the document's self-declared + # issuer would otherwise be the trust anchor validate_endpoint_origins + # (in OidcDeviceAuth.__init__) checks the endpoints against — but that + # issuer comes from the same (possibly hostile, confused, or + # multi-tenant) document, so the check is vacuous, and an absent or + # non-string issuer makes it vacuous too (a document declaring attacker + # endpoints all on one origin would pass co-location trivially). Anchor + # instead to the caller-pinned discovery_url itself: require the + # credential endpoints to live on its origin, so a document can't + # redirect the device-code / refresh-token POSTs to an attacker origin. + # Origin-level, matching validate_endpoint_origins; pass issuer= and the + # endpoints explicitly if your IdP serves discovery and tokens from + # different origins. + if discovery_url and not issuer: + discovery_origin = _normalized_origin(discovery_url) + for label, url in ( + ('token endpoint', token_endpoint), + ('device-authorization endpoint', + device_authorization_endpoint)): + if url and _normalized_origin(url) != discovery_origin: + raise OidcConfigError( + f'The OIDC {label} ({url!r}) discovered via the pinned ' + f'discovery_url ({discovery_url!r}) is on a different ' + 'origin; refusing to let a discovery document redirect ' + 'credentials off the pinned IdP origin (OIDC Discovery ' + '§4.3). Pin the IdP with issuer="https://your-idp" and ' + 'pass token_endpoint=/device_authorization_endpoint= ' + 'explicitly if it serves discovery and tokens from ' + 'different origins.') issuer = issuer or _str_setting(doc.get('issuer')) if not token_endpoint: diff --git a/test/test.py b/test/test.py index 04a2dcd3..054efe5e 100755 --- a/test/test.py +++ b/test/test.py @@ -42,6 +42,7 @@ TestDiscovery, TestInsecureSettingsGuard, TestRestAdapter, + TestRestAdapterAuthErrors, TestAdapters, TestConcurrency, TestConfigHelpers, diff --git a/test/test_auth.py b/test/test_auth.py index 17553051..336ef6a1 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -420,6 +420,16 @@ def test_timeout_when_never_authorized(self): with self.assertRaises(OidcTimeoutError): auth.token() + def test_idp_expired_token_error_raises_timeout(self): + # The token endpoint can itself answer a poll with error=expired_token + # (RFC 8628) — distinct from the local-deadline timeout. It must surface + # as OidcTimeoutError carrying that error, not loop or mis-classify it. + self.state.token_script = [(400, {'error': 'expired_token'})] + auth = self.make_auth() + with self.assertRaises(OidcTimeoutError) as cm: + auth.token() + self.assertEqual(cm.exception.error, 'expired_token') + def test_nonpositive_expires_in_still_polls(self): # A non-positive expires_in in the device-auth response must be treated # as unknown, not as "already expired" — otherwise the flow times out @@ -733,6 +743,20 @@ def test_refresh_token_preserved_when_not_rotated(self): auth.token() self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') + def test_rotated_refresh_token_is_stored(self): + # When the IdP DOES rotate the refresh token, the new one must replace + # the old in the cached token set — else an IdP with one-time-use + # refresh tokens breaks on the NEXT refresh. + auth = self.make_auth() + self._seed_expired(auth) + self.state.refresh_response = (200, { + 'access_token': ACCESS_TOKEN, 'id_token': ID_TOKEN, + 'refresh_token': 'REFRESH-2', # rotated + 'token_type': 'Bearer', 'expires_in': 3600}) + auth.token() + self.assertEqual(auth._tokens.refresh_token, 'REFRESH-2') + self.assertEqual(self.state.device_requests, 0) # no re-prompt + def test_refresh_without_id_token_falls_back_to_device_flow(self): # groups_in_token=True but the IdP's refresh omits the id_token: the # refresh is unusable, so fall back to the interactive flow rather than @@ -904,6 +928,30 @@ def test_device_fallback_with_discovery_url_is_accepted(self): self.assertEqual(auth.config.device_authorization_endpoint, self.base + '/device') + def test_discovery_url_rejects_off_origin_issuer_in_doc(self): + # M4: discovery_url= is advertised as an out-of-band pin, but the doc it + # points to could declare an attacker issuer AND endpoints all on one + # (attacker) origin — which passes co-location / issuer-origin vacuously. + # The discovered issuer must share the pinned discovery_url origin (OIDC + # Discovery §4.3), else refuse. /settings advertises NO endpoints, so + # both come from the (hostile) doc — the exact gap the fix closes. + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + }} + self.state.well_known = { + 'issuer': 'https://attacker.example.net', + 'token_endpoint': 'https://attacker.example.net/token', + 'device_authorization_endpoint': + 'https://attacker.example.net/device', + } + with self.assertRaises(OidcConfigError) as cm: + OidcDeviceAuth.from_questdb( + self.base, + discovery_url=self.base + '/.well-known/openid-configuration', + insecure=True) + self.assertIn('origin', str(cm.exception).lower()) + def test_oidc_disabled_raises(self): self.state.settings = {'config': {'acl.oidc.enabled': False}} with self.assertRaises(OidcConfigError): @@ -1266,6 +1314,47 @@ def test_sql_non_string_column_name_raises_oidc_error(self): self.assertNotIsInstance(cm.exception, OidcAuthError) +class TestRestAdapterAuthErrors(AuthTestBase): + """QuestDB.sql maps 401/403 to OidcAuthError BEFORE it builds a DataFrame, + so the mapping is testable without a real pandas. Kept out of the + pandas-gated TestRestAdapter so this security-relevant mapping runs on EVERY + CI leg, not just the ones where pandas is installed. M5.""" + + def _connected(self): + self.state.settings = {'config': { + 'acl.oidc.enabled': True, + 'acl.oidc.client.id': 'questdb', + 'acl.oidc.scope': 'openid groups', + 'acl.oidc.groups.encoded.in.token': True, + 'acl.oidc.token.endpoint': self.base + '/token', + 'acl.oidc.device.authorization.endpoint': self.base + '/device', + }} + self.state.expected_bearer = ID_TOKEN + return connect(self.base, insecure=True, renderer=Renderer(), + interactive=True, _clock=FakeClock()) + + @staticmethod + def _stub_pandas(): + # sql() reaches the 401/403 check before it touches pandas, so a bare + # stub module is enough to exercise the mapping without the real + # (possibly absent) dependency. + return mock.patch.dict( + sys.modules, {'pandas': types.ModuleType('pandas')}) + + def test_sql_401_maps_to_auth_error_without_pandas(self): + qdb = self._connected() + self.state.expected_bearer = 'something-else' # force 401 + with self._stub_pandas(), self.assertRaises(OidcAuthError): + qdb.sql('SELECT 1') + + def test_sql_403_maps_to_auth_error_without_pandas(self): + qdb = self._connected() + self.state.exec_status = 403 # bearer matches; server forbids + self.state.exec_response = {'error': 'forbidden'} + with self._stub_pandas(), self.assertRaises(OidcAuthError): + qdb.sql('SELECT 1') + + class TestConcurrency(AuthTestBase): def test_valid_cached_token_does_not_block_during_signin(self): # A caller with a valid cached token must NOT block behind another @@ -1318,6 +1407,10 @@ def call(name): release.set() # let t1 finish signing in t1.join(5) t2.join(5) + # Fail loudly on a deadlock regression: a hung thread would otherwise + # leak and let the assertions below pass on a stale/half-filled dict. + self.assertFalse(t1.is_alive(), 'sign-in thread deadlocked') + self.assertFalse(t2.is_alive(), 'waiter thread deadlocked') self.assertEqual(results.get('a'), ID_TOKEN) self.assertEqual(results.get('b'), ID_TOKEN) self.assertEqual(self.state.device_requests, 1) # no second prompt From 20bf2cd7ba9f15f270daa30607afc6a93bd2fc4d Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 22 Jun 2026 11:28:12 +0100 Subject: [PATCH 35/39] fix: classify IdP token-endpoint errors transient vs terminal The device-flow poll loop and the silent refresh both POST to the IdP token endpoint but disagreed on which failures are transient, causing two user-visible bugs: - A non-JSON 4xx during polling (a WAF/proxy HTML error page, or a non-conformant IdP) was treated as transient, so the flow polled until the device code expired and reported a misleading "code expired" instead of failing fast (M1). - A transient 5xx/429 during a silent refresh tore the session down and re-ran the interactive sign-in -- hard-failing as OidcInteractionRequired in non-interactive pools/CI -- even though the refresh token was still valid and a retry would succeed (M2). Preserve the HTTP status on the error (OidcError.status, set by post_form for a non-JSON body) and classify both paths the same way: 4xx => terminal, 5xx/429/network => transient. Add regression tests for both paths plus the status-propagation contract they rely on. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 87 ++++++++++++++++++++++++++++++------- src/questdb/auth/_errors.py | 9 ++++ src/questdb/auth/_http.py | 12 +++-- test/test_auth.py | 74 +++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 18 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 41a07ef7..376ca27d 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -113,6 +113,25 @@ def _identity_from_claims(claims: Dict[str, Any]) -> Optional[str]: return None +def _http_status_is_terminal_4xx(status: Optional[int]) -> bool: + """ + True for a client-error HTTP status that is a definitive rejection. + + A non-JSON response body carrying such a status (e.g. an HTML/plain ``403`` + from a WAF or reverse proxy in front of the IdP, or a non-conformant IdP) is + never a RFC-conformant ``authorization_pending`` / ``slow_down`` — those are + always JSON — so the device-flow poll must fail fast rather than keep + retrying to a misleading "code expired". ``429`` is excluded: it is a + rate-limit, handled as transient with back-off. + """ + return status is not None and 400 <= status < 500 and status != 429 + + +def _http_status_is_transient(status: Optional[int]) -> bool: + """True for a server-side (5xx) or rate-limit (429) status worth retrying.""" + return status is not None and (status >= 500 or status == 429) + + class OidcDeviceAuth: """ Acquire and refresh an OIDC token via the device authorization grant. @@ -551,14 +570,29 @@ def _idp_post(self, url: str, form: Dict[str, Any]): url, form, ctx=self._ctx, insecure=False, timeout=self._timeout) def _refresh(self, tokens: TokenSet) -> TokenSet: - status, body = self._idp_post( - self.config.token_endpoint, - { - 'grant_type': REFRESH_GRANT, - 'refresh_token': tokens.refresh_token, - 'client_id': self.config.client_id, - 'scope': self.config.scope, - }) + try: + status, body = self._idp_post( + self.config.token_endpoint, + { + 'grant_type': REFRESH_GRANT, + 'refresh_token': tokens.refresh_token, + 'client_id': self.config.client_id, + 'scope': self.config.scope, + }) + except OidcNetworkError: + # Already transient (socket drop / DNS / per-request timeout): + # propagate so _acquire keeps the still-valid refresh token and + # retries later instead of re-prompting. + raise + except OidcError as e: + # Non-JSON HTTP error body (e.g. an HTML 5xx from a proxy in front + # of the IdP). A 5xx / 429 is a transient hiccup — re-raise as a + # network error so _acquire keeps the refresh token; a 4xx is a + # genuine rejection, so let it fall through (as an OidcError) to a + # fresh interactive sign-in. + if _http_status_is_transient(getattr(e, 'status', None)): + raise OidcNetworkError(str(e)) from e + raise if status == 200: refreshed = self._tokenset_from_response(body) # Many IdPs do not rotate the refresh token; keep the old one. @@ -567,6 +601,16 @@ def _refresh(self, tokens: TokenSet) -> TokenSet: refreshed = replace( refreshed, refresh_token=tokens.refresh_token) return refreshed + # A transient IdP error (5xx / 429) during a silent refresh must not + # tear down the session: the refresh token is still valid, so surface it + # as a network error and let _acquire keep it and retry later — matching + # the poll loop, which also treats 5xx/429 as transient. Only a genuine + # rejection (an expired/revoked refresh token, a 4xx invalid_grant) + # falls through to a fresh interactive sign-in. + if _http_status_is_transient(status): + raise OidcNetworkError( + f'Token refresh hit a transient IdP error (HTTP {status}); ' + 'the refresh token is still valid — retry later.') raise OidcDeviceFlowError( f"Token refresh failed: {body.get('error', 'unknown error')}", error=body.get('error'), @@ -667,17 +711,30 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: 'device_code': device_code, 'client_id': self.config.client_id, }) - except OidcError: - # Transient failure mid-poll, not a terminal OAuth decision: a - # dropped connection / DNS blip / per-request timeout - # (OidcNetworkError), or a non-2xx response with a non-JSON body - # such as an HTML 502/503/504 from a proxy or load balancer in - # front of the IdP (a bare OidcError from post_form). The user + except OidcError as e: + # A non-JSON 4xx is a terminal rejection (e.g. an HTML/plain + # error page from a WAF or reverse proxy in front of the IdP, or + # a non-conformant IdP): a conformant OAuth error is JSON, so it + # can never be authorization_pending / slow_down. Fail fast + # instead of polling on to a misleading "code expired". + if _http_status_is_terminal_4xx(getattr(e, 'status', None)): + self._renderer.on_failure( + 'Sign-in failed: the identity provider rejected the ' + 'request.') + raise OidcDeviceFlowError( + f'Device flow failed: the IdP rejected the token ' + f'request ({e}).') from e + # Otherwise transient, not a terminal OAuth decision: a dropped + # connection / DNS blip / per-request timeout (OidcNetworkError), + # or a non-JSON 5xx/429 such as an HTML 502/503/504 from a proxy + # in front of the IdP (a bare OidcError from post_form). The user # may already have authorized in the browser, and RFC 8628 §3.4 # expects polling to continue until the device code expires, so # poll again instead of discarding the in-progress sign-in. The # deadline check at the top of the loop bounds the total wait; a - # genuine rejection arrives as a JSON error body (handled below). + # genuine JSON rejection arrives as a JSON error body (below). + if getattr(e, 'status', None) == 429: + interval = min(_MAX_POLL_INTERVAL, interval + 5) continue if status == 200: diff --git a/src/questdb/auth/_errors.py b/src/questdb/auth/_errors.py index 7262f0cc..b4b7f55f 100644 --- a/src/questdb/auth/_errors.py +++ b/src/questdb/auth/_errors.py @@ -32,6 +32,15 @@ class OidcError(Exception): """Base class for every error raised by :mod:`questdb.auth`.""" + def __init__(self, *args, status: Optional[int] = None): + super().__init__(*args) + # HTTP status that produced this error, when it originated from a + # non-JSON HTTP response (else None). Lets the device-flow poll loop and + # the silent refresh tell a terminal 4xx rejection (e.g. a WAF/proxy + # error page) from a transient 5xx/429/network blip even when the body + # was not a conformant JSON OAuth error. + self.status = status + class OidcConfigError(OidcError): """ diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py index 3e691159..f43afeca 100644 --- a/src/questdb/auth/_http.py +++ b/src/questdb/auth/_http.py @@ -290,9 +290,15 @@ def post_form( # ValueError, so catch it explicitly to keep the typed contract. if resp.ok: raise OidcError( - f'Expected JSON from {url}, got: {resp.text()[:200]}') - # Non-JSON error body: surface the status + text. - raise OidcError(f'HTTP {resp.status} from {url}: {resp.text()[:200]}') + f'Expected JSON from {url}, got: {resp.text()[:200]}', + status=resp.status) + # Non-JSON error body: surface the status + text. Attach the HTTP status + # so callers (the device-flow poll loop / silent refresh) can tell a + # terminal 4xx rejection from a transient 5xx/429 even though the body + # was not a conformant JSON OAuth error. + raise OidcError( + f'HTTP {resp.status} from {url}: {resp.text()[:200]}', + status=resp.status) if not isinstance(parsed, dict): raise OidcError(f'Unexpected JSON shape from {url}: {parsed!r}') return resp.status, parsed diff --git a/test/test_auth.py b/test/test_auth.py index 336ef6a1..b9a5588b 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -409,6 +409,27 @@ def test_transient_5xx_and_429_during_poll_keep_polling(self): # 503 polled at the base interval; 429 bumps the interval by 5. self.assertEqual(self._clock.sleeps, [5, 5, 10]) + def test_non_json_4xx_during_poll_is_terminal(self): + # A non-JSON 4xx during polling (an HTML/plain error page from a WAF or + # reverse proxy in front of the IdP, or a non-conformant IdP) is a + # terminal rejection: a conformant OAuth error is JSON, so it can't be + # authorization_pending / slow_down. Fail fast with a device-flow error + # instead of polling on to a misleading "code expired". M1. + auth = self.make_auth() + with _raw_response_server( + 403, 'text/html', b'denied') as raw: + # Point only the poll (token) endpoint at the non-JSON 403; the + # device-code request still hits the JSON mock IdP. Set it post- + # construction so the (already-satisfied) co-location check isn't + # re-run against the throwaway origin. + auth.config.token_endpoint = raw + '/token' + with self.assertRaises(OidcDeviceFlowError) as cm: + auth.token() + # Terminal on the first poll: not a timeout, and it did not keep polling + # to the device-code deadline. + self.assertNotIsInstance(cm.exception, OidcTimeoutError) + self.assertLessEqual(len(self._clock.sleeps), 1) + def test_timeout_when_never_authorized(self): self.state.device_response = { 'device_code': 'DEV-CODE', 'user_code': 'X', @@ -821,6 +842,44 @@ def test_refresh_network_error_propagates_without_reprompt(self): self.assertIn('/token', str(cm.exception)) self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') + def test_refresh_transient_5xx_kept_for_retry(self): + # A transient IdP error (5xx) during a silent refresh must NOT tear the + # session down and re-prompt: the refresh token is still valid, so it is + # surfaced as a retryable OidcNetworkError and the cached token (with its + # refresh token) is kept for a later retry — matching the poll loop, + # which also treats 5xx/429 as transient. M2. + auth = self.make_auth() + self._seed_expired(auth) + self.state.refresh_response = (503, {'error': 'temporarily_unavailable'}) + with self.assertRaises(OidcNetworkError): + auth.token() + self.assertEqual(self.state.refresh_requests, 1) + self.assertEqual(self.state.device_requests, 0) # NOT re-prompted + self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') # kept + + def test_refresh_transient_429_kept_for_retry(self): + # Same as the 5xx case for a 429 rate-limit. M2. + auth = self.make_auth() + self._seed_expired(auth) + self.state.refresh_response = (429, {'error': 'slow_down'}) + with self.assertRaises(OidcNetworkError): + auth.token() + self.assertEqual(self.state.device_requests, 0) + self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') + + def test_refresh_transient_5xx_non_interactive_does_not_hard_fail(self): + # The worst case: in a non-interactive context (papermill / cron / CI) a + # transient refresh error must surface as a retryable OidcNetworkError, + # NOT escalate to OidcInteractionRequired — which a fall-through to the + # device flow would raise, hard-failing a session whose refresh token is + # still valid and would succeed on the next attempt. M2. + auth = self.make_auth(interactive=False) + self._seed_expired(auth) + self.state.refresh_response = (503, {'error': 'temporarily_unavailable'}) + with self.assertRaises(OidcNetworkError): + auth.token() + self.assertEqual(self.state.device_requests, 0) + class TestDiscovery(AuthTestBase): def test_from_questdb_reads_settings(self): @@ -2078,6 +2137,21 @@ def test_require_secure_policy(self): _require_secure('http://idp.example.com/x', insecure=False) _require_secure('http://idp.example.com/x', insecure=True) + def test_post_form_attaches_status_to_non_json_error(self): + # The device-flow poll loop and the silent refresh classify a non-JSON + # token-endpoint failure (4xx terminal vs 5xx/429 transient) by the HTTP + # status, so post_form must attach it to the raised OidcError. M1/M2. + from questdb.auth._http import post_form + with _raw_response_server(403, 'text/plain', b'forbidden') as raw: + with self.assertRaises(OidcError) as cm: + post_form(raw + '/token', {'grant_type': 'x'}) + self.assertEqual(cm.exception.status, 403) + # A non-JSON 5xx likewise carries its status (classified as transient). + with _raw_response_server(503, 'text/html', b'

bad gw

') as raw: + with self.assertRaises(OidcError) as cm: + post_form(raw + '/token', {'grant_type': 'x'}) + self.assertEqual(cm.exception.status, 503) + def test_insecure_does_not_downgrade_idp(self): # insecure=True must NOT permit plaintext to a non-loopback IdP: the # device code / refresh token must never traverse the network in clear. From dcd44bc3040b371b7150cd5fc87bbf8875a09181 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 22 Jun 2026 11:38:14 +0100 Subject: [PATCH 36/39] fix: keep the device-flow prompt visible on a non-UTF-8 terminal TerminalRenderer._write swallowed every write error, so on a stream that can't encode the prompt's emoji -- a legacy code-page Windows console, an ascii PYTHONIOENCODING, or a redirected stderr -- the UnicodeEncodeError discarded the whole prompt, including the verification URL and user code. The sign-in then polled invisibly and looked like a silent hang. Catch UnicodeEncodeError and retry with the stream's own encoding using errors='replace', so only the un-encodable glyphs degrade while the ASCII URL and code still reach the user. The "never raises" contract is kept via the outer swallow. Add a regression test driving the renderer at an ascii-only stream. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_render.py | 13 ++++++++++++- test/test_auth.py | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py index 283153b7..9263b556 100644 --- a/src/questdb/auth/_render.py +++ b/src/questdb/auth/_render.py @@ -205,7 +205,18 @@ def __init__(self, stream: Optional[TextIO] = None, qr: bool = False): def _write(self, text: str) -> None: try: - self._stream.write(text) + try: + self._stream.write(text) + except UnicodeEncodeError: + # The stream's encoding can't represent some characters (e.g. + # the emoji on a legacy code-page Windows console, an ``ascii`` + # PYTHONIOENCODING, or a redirected stderr). Degrade only those + # characters instead of letting the whole prompt — including the + # verification URL and user code — vanish, which would make the + # sign-in look like a silent hang. + enc = getattr(self._stream, 'encoding', None) or 'ascii' + self._stream.write( + text.encode(enc, 'replace').decode(enc, 'replace')) self._stream.flush() except Exception: pass diff --git a/test/test_auth.py b/test/test_auth.py index b9a5588b..1fc5cf0f 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -2413,6 +2413,44 @@ def test_terminal_prompt_strips_control_chars(self): self.assertNotIn('\x1b', out) self.assertNotIn('\x07', out) + def test_terminal_prompt_survives_unencodable_stream(self): + # On a stream whose encoding can't represent the prompt's emoji (a + # legacy code-page Windows console, an `ascii` PYTHONIOENCODING, or a + # redirected stderr), the decorative glyphs must degrade but the + # verification URL and user code must STILL reach the user — not vanish + # into a silent hang. M3. + from questdb.auth._render import TerminalRenderer + + class _AsciiStream: + encoding = 'ascii' + + def __init__(self): + self.parts = [] + + def write(self, s): + s.encode(self.encoding) # raises UnicodeEncodeError, like a TTY + self.parts.append(s) + + def flush(self): + pass + + stream = _AsciiStream() + r = TerminalRenderer(stream=stream) + r.on_prompt({ + 'user_code': 'WDJB-MJHT', + 'verification_uri': 'https://idp.example.com/device', + }) + r.on_success('alice@example.com', 3600) + r.on_failure('access denied') + out = ''.join(stream.parts) + # The essential content survived (only the un-encodable glyphs were + # replaced); nothing was blackholed and no exception escaped. + self.assertIn('https://idp.example.com/device', out) + self.assertIn('WDJB-MJHT', out) + self.assertIn('alice@example.com', out) + self.assertIn('access denied', out) + out.encode('ascii') # the whole transcript is ascii-encodable + def test_strip_control_removes_bidi_and_zero_width(self): # Beyond C0/C1, untrusted device-response fields must have Unicode # bidi-override / zero-width / line-separator characters stripped before From 86cf83f25cebcbce2f5cf550c7daf57bcdfa4ae0 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 22 Jun 2026 11:52:01 +0100 Subject: [PATCH 37/39] fix: close issuer-path pin bypass via encoded/backslash/;params traversal _endpoint_path_under_issuer scanned the path for dot segments after a single percent-decode but did the containment check on the raw string, so three traversals slipped past the issuer-path pin and could steer the device code / refresh token to a different realm on a path-based IdP (Keycloak host/realms/{realm}): * double-encoded dots /realms/prod/%252e%252e/EVIL/token * backslash separator /realms/prod/..\EVIL/token * last-segment ;params /realms/prod/token;..%2f..%2fEVIL (urllib splits ;params off .path, so its dots were never scanned, and a server that unescapes twice or folds a backslash to '/' resolves the others to a different realm.) Compare fully-decoded segments instead of the raw string: a new _decode_path_segments unquotes until stable, folds backslash to '/', and splits on '/'; fold the last segment's ;params back in, reject any '.'/'..' segment, then do a segment-wise prefix check. Legitimate non-traversal escapes (/some%20path, ;jsessionid=abc) are still accepted. Extend the unit and end-to-end (resolve_config) tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_discovery.py | 56 +++++++++++++++++++++++++++------- test/test_auth.py | 16 +++++++++- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index de95b58f..eeb839c2 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -176,6 +176,29 @@ def _settings_channel_is_plaintext(questdb_url: str) -> bool: parts.hostname) +def _decode_path_segments(path: str) -> list: + """ + Fully percent-decode a URL path and split it into ``/`` segments. + + Decoding is repeated until stable so a double/triple-encoded dot segment + (``%252e%252e`` -> ``%2e%2e`` -> ``..``), or an encoded slash (``%2f``) that + splits a segment, is unmasked — a server or reverse proxy may unescape more + than once before it normalizes. A backslash is treated as a separator, since + some proxies fold ``\\`` to ``/`` before routing. The returned segments are + what the containment check compares, never the raw string urllib puts on the + wire, so an encoding the server later undoes can't smuggle a ``..`` past the + scan. The loop is bounded (a real path needs 0-1 passes; more layers than a + server would itself decode can't resolve to a traversal anyway). + """ + decoded = path + for _ in range(10): # bounded; each pass peels one percent-encoding layer + nxt = urllib.parse.unquote(decoded) + if nxt == decoded: + break + decoded = nxt + return decoded.replace('\\', '/').split('/') + + def _endpoint_path_under_issuer(endpoint: str, issuer: str) -> bool: """ True if ``endpoint``'s path is the issuer's path or a sub-path of it. @@ -187,22 +210,33 @@ def _endpoint_path_under_issuer(endpoint: str, issuer: str) -> bool: IdP (Keycloak issuers are ``https://host/realms/{realm}``), which an origin-only check can't catch. - A ``.`` / ``..`` path segment is rejected outright: urllib puts the dotted - path on the wire verbatim, but the IdP (or a reverse proxy in front of it) - normalizes it, so ``/realms/prod/../attacker/token`` would satisfy a naive - prefix test yet resolve server-side to a *different* realm — defeating the - very isolation this check exists to provide. Percent-encoded dot segments - (``%2e``) are decoded before the segment scan, since a server may unescape - before normalizing; a legitimate endpoint path never contains dot segments. + The comparison is done on the fully *decoded* path segments, never the raw + string urllib sends. A ``.`` / ``..`` segment is rejected outright: urllib + puts the dotted path on the wire verbatim, but the IdP (or a reverse proxy + in front of it) normalizes it, so ``/realms/prod/../attacker/token`` would + satisfy a naive prefix test yet resolve server-side to a *different* realm — + defeating the very isolation this check exists to provide. Encoded dot + segments are unmasked first — including double-encoded (``%252e``) and + encoded slashes (``%2f``) a server may unescape more than once — a backslash + is treated as a separator, and the last segment's ``;params`` (which urllib + splits off ``.path``) is folded back in, so none of those can smuggle a + traversal past the segment scan. A legitimate endpoint path never contains + dot segments. """ base = (safe_urlparse(issuer)[0].path or '').rstrip('/') if not base: return True - ep = safe_urlparse(endpoint)[0].path or '' - decoded_segments = urllib.parse.unquote(ep).split('/') - if '.' in decoded_segments or '..' in decoded_segments: + base_segs = _decode_path_segments(base) + eparts = safe_urlparse(endpoint)[0] + # urllib splits the last segment's ;params off .path; fold it back so a + # traversal hidden there (…/token;..%2f..%2fEVIL) can't slip past the scan. + ep_path = eparts.path or '' + if eparts.params: + ep_path = f'{ep_path};{eparts.params}' + ep_segs = _decode_path_segments(ep_path) + if '.' in ep_segs or '..' in ep_segs: return False - return ep == base or ep.startswith(base + '/') + return ep_segs[:len(base_segs)] == base_segs def validate_endpoint_origins( diff --git a/test/test_auth.py b/test/test_auth.py index 1fc5cf0f..25c2ace3 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -1255,7 +1255,10 @@ def test_issuer_path_scope_rejects_dot_segment_traversal(self): # _endpoint_path_under_issuer. kc = 'https://idp.example.com/realms' for ep in (kc + '/prod/../EVIL/protocol/openid-connect', - kc + '/prod/%2e%2e/EVIL/protocol/openid-connect'): + kc + '/prod/%2e%2e/EVIL/protocol/openid-connect', + # double-encoded: a server that unescapes twice resolves the + # '..' the old single-decode check missed (M4). + kc + '/prod/%252e%252e/EVIL/protocol/openid-connect'): evil = { 'acl.oidc.enabled': True, 'acl.oidc.client.id': 'questdb', 'acl.oidc.token.endpoint': ep + '/token', @@ -2058,6 +2061,17 @@ def test_endpoint_path_under_issuer(self): self.assertFalse(under(iss + '/../EVIL/protocol/token', iss)) self.assertFalse(under(iss + '/%2e%2e/EVIL/token', iss)) self.assertFalse(under(iss + '/./token', iss)) + # Encodings/escapes the old decode-once-then-compare-raw check let + # through (M4): a server that unescapes more than once, folds a + # backslash to '/', or normalizes the last segment's ;params would + # resolve these to a DIFFERENT realm, so they must be rejected too. + self.assertFalse(under(iss + '/%252e%252e/EVIL/token', iss)) # 2x-enc + self.assertFalse(under(iss + '/..\\EVIL/token', iss)) # backslash + self.assertFalse(under(iss + '/token;..%2f..%2fEVIL', iss)) # ;params + # A legitimate sub-path with a (non-traversal) percent-escape or matrix + # param is still accepted — only dot traversal is rejected. + self.assertTrue(under(iss + '/some%20path/token', iss)) + self.assertTrue(under(iss + '/token;jsessionid=abc', iss)) class TestCacheKey(unittest.TestCase): From 0001e4d21938698d65fe1f2c1c517b897e5b7058 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 22 Jun 2026 12:26:27 +0100 Subject: [PATCH 38/39] fix: tighten device-auth, host/port handling, and refresh audience Four small robustness/ergonomics fixes surfaced in review: * _request_device_code: a 200 response missing device_code/user_code now raises a clear "non-conformant 200 response" error instead of the self-contradictory "Device authorization request failed (HTTP 200)". * _resolve_endpoint: a non-string acl.oidc.host (a JSON number/list from a buggy or hostile /settings) is dropped via _str_setting instead of being interpolated raw into the netloc (https://12345:9000/...); a non-numeric port is likewise dropped. * QuestDB.sender(): coerce the port kwarg to int (before the extension import, so it fails fast) so a string like "9000;tls_verify=unsafe_off" can't smuggle ILP conf parameters into the addr= string -- the same injection _require_host() already blocks for the host. * _refresh: re-send the audience on refresh (mirroring the device-auth request) so an IdP that scopes `aud` per request keeps it on the rotated token instead of minting one QuestDB rejects after a silent refresh; omitted when unconfigured. Each fix lands with a regression test. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/_device.py | 18 ++++++++++ src/questdb/auth/_discovery.py | 16 +++++++-- src/questdb/auth/_questdb.py | 18 ++++++++-- test/test_auth.py | 65 ++++++++++++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 5 deletions(-) diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 376ca27d..33d8d397 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -578,6 +578,13 @@ def _refresh(self, tokens: TokenSet) -> TokenSet: 'refresh_token': tokens.refresh_token, 'client_id': self.config.client_id, 'scope': self.config.scope, + # Re-send the audience on refresh too, mirroring the + # device-authorization request: some IdPs (e.g. Auth0) need + # it to keep the rotated access token's `aud`, and would + # otherwise mint a token QuestDB rejects only AFTER a silent + # refresh. IdPs that don't use it ignore the param; post_form + # drops it entirely when audience is None (not configured). + 'audience': self.config.audience, }) except OidcNetworkError: # Already transient (socket drop / DNS / per-request timeout): @@ -649,6 +656,17 @@ def _request_device_code(self) -> Dict[str, Any]: if status == 200 and body.get('device_code') and body.get('user_code'): return body error = body.get('error') + if status == 200: + # 200 but the success guard above failed: the response is missing + # device_code/user_code. That is a non-conformant body, not an + # HTTP-level failure — say so plainly rather than the contradictory + # "Device authorization request failed (HTTP 200)". + raise OidcDeviceFlowError( + 'The IdP returned a 200 device-authorization response that is ' + 'missing the required "device_code"/"user_code" fields; cannot ' + 'start the device flow.', + error=error, + error_description=body.get('error_description')) if status in (400, 404, 405) or error in ( 'invalid_client', 'unauthorized_client', 'unsupported_grant_type'): diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index eeb839c2..01010b5e 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -309,7 +309,12 @@ def _resolve_endpoint(value: Optional[str], cfg: Dict[str, Any]) -> Optional[str if value.startswith('http://') or value.startswith('https://'): return value if value.startswith('/'): - host = cfg.get(_K_HOST) + # _str_setting drops a non-string acl.oidc.host (a JSON number/list from + # a buggy or hostile /settings) so it can't be interpolated raw into the + # netloc — e.g. https://12345:9000/path — and instead reads as absent, + # mirroring how endpoint values above are coerced. (safe_urlparse would + # otherwise reject the bogus URL only incidentally, downstream.) + host = _str_setting(cfg.get(_K_HOST)) if not host: # A path-only endpoint with no acl.oidc.host to resolve it against # can't be turned into a URL. Treat it as absent (return None) so @@ -320,8 +325,15 @@ def _resolve_endpoint(value: Optional[str], cfg: Dict[str, Any]) -> Optional[str return None tls = _as_bool(cfg.get(_K_TLS_ENABLED), default=True) scheme = 'https' if tls else 'http' + # A usable port is an int or a digit string; anything else (a JSON + # list/object, a bool, or a non-numeric string) would corrupt the + # netloc, so drop it and resolve host-only. port = cfg.get(_K_PORT) - netloc = f'{host}:{port}' if port else str(host) + if isinstance(port, bool) or not ( + isinstance(port, int) + or (isinstance(port, str) and port.isdigit())): + port = None + netloc = f'{host}:{port}' if port else host return f'{scheme}://{netloc}{value}' return value diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index a2186442..76a6979b 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -326,6 +326,21 @@ def sender(self, *, port: Optional[int] = None, The token is captured at creation time; create a new sender to pick up a refreshed token. """ + scheme = 'https' if self._parts.scheme == 'https' else 'http' + resolved_port = port or self._port or ( + 443 if scheme == 'https' else 9000) + # Coerce to int (before the heavy import, so bad input fails fast) so a + # stray non-integer port kwarg can't smuggle ILP conf parameters — e.g. + # "9000;tls_verify=unsafe_off" — into the addr= string via _ilp_addr, + # the same injection _require_host() blocks for the host. The + # URL-derived self._port is already an int. + try: + resolved_port = int(resolved_port) + except (TypeError, ValueError): + raise OidcConfigError( + f'Invalid port {resolved_port!r} for QuestDB.sender(); expected ' + 'an integer.') + try: from questdb.ingress import Sender except ImportError as e: @@ -334,9 +349,6 @@ def sender(self, *, port: Optional[int] = None, 'QuestDB.sender(). Install the full client wheel ' '(`pip install questdb`).') from e - scheme = 'https' if self._parts.scheme == 'https' else 'http' - resolved_port = port or self._port or ( - 443 if scheme == 'https' else 9000) conf = (f'{scheme}::addr=' f'{self._ilp_addr(self._require_host(), resolved_port)};') # Forward the private CA bundle (explicit ca_bundle=, else the diff --git a/test/test_auth.py b/test/test_auth.py index 25c2ace3..1a3afb96 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -183,6 +183,7 @@ def __init__(self): self.device_requests = 0 self.token_requests = [] self.refresh_requests = 0 + self.refresh_forms = [] self.exec_requests = [] @@ -269,6 +270,7 @@ def do_POST(self): grant = form.get('grant_type') if grant == 'refresh_token': self.state.refresh_requests += 1 + self.state.refresh_forms.append(form) status, body = self.state.refresh_response or ( 200, self._default_token_body()) self._send_json(status, body) @@ -430,6 +432,21 @@ def test_non_json_4xx_during_poll_is_terminal(self): self.assertNotIsInstance(cm.exception, OidcTimeoutError) self.assertLessEqual(len(self._clock.sleeps), 1) + def test_device_200_without_codes_is_rejected_clearly(self): + # A 200 device-authorization response missing device_code/user_code is + # a non-conformant body, not an HTTP failure: the error must say so + # plainly (NOT the self-contradictory "failed (HTTP 200)") and the flow + # must never start polling. + self.state.device_status = 200 + self.state.device_response = {'verification_uri': 'https://idp/device'} + auth = self.make_auth() + with self.assertRaises(OidcDeviceFlowError) as cm: + auth.token() + msg = str(cm.exception) + self.assertNotIn('HTTP 200', msg) + self.assertIn('device_code', msg) + self.assertEqual(self.state.token_requests, []) # never polled + def test_timeout_when_never_authorized(self): self.state.device_response = { 'device_code': 'DEV-CODE', 'user_code': 'X', @@ -867,6 +884,27 @@ def test_refresh_transient_429_kept_for_retry(self): self.assertEqual(self.state.device_requests, 0) self.assertEqual(auth._tokens.refresh_token, 'REFRESH-1') + def test_refresh_includes_audience_when_configured(self): + # The audience is re-sent on refresh (mirroring the device-auth + # request), so an IdP that scopes `aud` per request keeps it on the + # rotated token instead of minting one QuestDB rejects after a silent + # refresh. When no audience is configured the param is omitted. + auth = self.make_auth(audience='questdb-api') + self._seed_expired(auth) + self.assertEqual(auth.token(), ID_TOKEN) + self.assertEqual(self.state.refresh_requests, 1) + self.assertEqual( + self.state.refresh_forms[-1].get('audience'), 'questdb-api') + + # Without an audience, the refresh form carries no audience key. + _MEMORY_STORE.clear() + _MEMORY_GENERATION.clear() + self.state.refresh_forms.clear() + auth2 = self.make_auth() # no audience + self._seed_expired(auth2) + auth2.token() + self.assertNotIn('audience', self.state.refresh_forms[-1]) + def test_refresh_transient_5xx_non_interactive_does_not_hard_fail(self): # The worst case: in a non-interactive context (papermill / cron / CI) a # transient refresh error must surface as a retryable OidcNetworkError, @@ -1825,6 +1863,17 @@ def test_sender_missing_extension_raises(self): with self.assertRaises(ImportError): self._qdb().sender() + def test_sender_rejects_non_integer_port(self): + # A non-integer port kwarg must be rejected before it can be + # interpolated into the addr= conf string, where ";tls_verify= + # unsafe_off" would silently disable TLS verification — the same + # injection _require_host() blocks for the host. The coercion runs + # before the extension import, so this fails cleanly even without it. + qdb = self._qdb('https://db.example.com:9000') + for bad in ('9000;tls_verify=unsafe_off', 'notaport', ['9000']): + with self.assertRaises(OidcConfigError): + qdb.sender(port=bad) + class TestConfigHelpers(unittest.TestCase): def test_as_bool_variants(self): @@ -1937,6 +1986,22 @@ def test_resolve_endpoint_relative_path_without_host_is_none(self): self.assertIsNone( # port present but host missing -> still unresolved _resolve_endpoint('/as/token.oauth2', {'acl.oidc.port': 443})) + def test_resolve_endpoint_ignores_non_string_host(self): + # A non-string acl.oidc.host (a JSON number/list from a buggy or hostile + # /settings) must not be interpolated raw into the netloc (e.g. + # https://12345:9000/path); treat it as absent so a path-only endpoint + # reads as unresolvable, mirroring how endpoint values are coerced. + from questdb.auth._discovery import _resolve_endpoint + for bad_host in (12345, ['idp'], {'h': 'idp'}, True): + self.assertIsNone( + _resolve_endpoint('/as/token', {'acl.oidc.host': bad_host})) + # A non-numeric port is dropped rather than corrupting the netloc. + self.assertEqual( + _resolve_endpoint('/as/token', { + 'acl.oidc.host': 'idp', 'acl.oidc.tls.enabled': True, + 'acl.oidc.port': ['x']}), + 'https://idp/as/token') + def test_settings_config_nesting(self): from questdb.auth._discovery import settings_config self.assertEqual(settings_config({'config': {'a': 1}}), {'a': 1}) From 99c05ace3d3fa6034a2642810a1173afceb98172 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Mon, 22 Jun 2026 12:42:47 +0100 Subject: [PATCH 39/39] docs: condense the questdb.auth comments and docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new module's comments and docstrings were thorough but verbose (inline threat-model essays). Tighten them across all eight files — each long rationale becomes a one-to-two-line "risk + defense" statement — without losing any security/concurrency "why". Comments and docstrings only: code and every non-docstring string (exception messages, rendered prompt text, regexes, dict keys) are unchanged, verified by an AST-equivalence check against the prior revision (docstrings stripped) and the full test suite (152 pass). Net -177 lines of commentary. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/questdb/auth/__init__.py | 20 +- src/questdb/auth/_cache.py | 49 ++--- src/questdb/auth/_device.py | 391 +++++++++++++++------------------ src/questdb/auth/_discovery.py | 291 ++++++++++-------------- src/questdb/auth/_errors.py | 37 ++-- src/questdb/auth/_http.py | 111 ++++------ src/questdb/auth/_questdb.py | 102 ++++----- src/questdb/auth/_render.py | 102 ++++----- 8 files changed, 463 insertions(+), 640 deletions(-) diff --git a/src/questdb/auth/__init__.py b/src/questdb/auth/__init__.py index cfacb6e8..a06acaf5 100644 --- a/src/questdb/auth/__init__.py +++ b/src/questdb/auth/__init__.py @@ -25,16 +25,12 @@ """ OIDC authentication helper for QuestDB (Jupyter-first). -Runs the OAuth 2.0 Device Authorization Grant (RFC 8628) entirely client-side, -obtains a token, and presents it to QuestDB over the auth paths it already -supports (HTTP ``Bearer`` / PG-wire ``_sso``). Designed for data scientists on -local **and remote** kernels (JupyterHub, SageMaker, Colab, VS Code-remote), -where the kernel has no browser: you authorize in any browser (laptop or -phone), the kernel only makes outbound calls to the IdP. +Runs the OAuth 2.0 Device Authorization Grant (RFC 8628) client-side and +presents the token to QuestDB (HTTP ``Bearer`` / PG-wire ``_sso``). Works on +browserless local and remote kernels (JupyterHub, SageMaker, Colab, +VS Code-remote): authorize in any browser, the kernel only calls the IdP. -Two ways to use it, depending on your needs: - -* **Just the token** — works with anything (PG-wire, HTTP, your own tooling):: +* **Just the token** — works with anything; no optional dependencies:: from questdb.auth import OidcDeviceAuth @@ -52,10 +48,8 @@ with qdb.sender() as sender: # ingestion (ILP/HTTP) ... -Only ``token()`` / ``headers()`` are needed for the bring-your-own-client path, -and they require no optional dependencies. ``pandas`` (for ``sql()``), -``sqlalchemy`` / ``psycopg`` (adapters), ``qrcode`` and ``IPython`` are imported -lazily, only when used. +Optional deps (``pandas``, ``sqlalchemy``/``psycopg``, ``qrcode``, ``IPython``) +are imported lazily, only when used. """ from ._device import OidcDeviceAuth diff --git a/src/questdb/auth/_cache.py b/src/questdb/auth/_cache.py index b3e0ff95..b8373fb6 100644 --- a/src/questdb/auth/_cache.py +++ b/src/questdb/auth/_cache.py @@ -39,14 +39,13 @@ @dataclass(frozen=True) class TokenSet: """ - A set of tokens obtained from the IdP, plus their expiry. + IdP tokens plus their expiry. - Immutable (``frozen``): the lock-free fast path in + ``frozen`` because the lock-free fast path in :class:`~questdb.auth._device.OidcDeviceAuth` reads a published ``TokenSet`` - without holding a lock, which is only safe because its fields never change - after construction. Derive a modified copy with :func:`dataclasses.replace` - rather than mutating in place. The three secret fields are kept out of - ``repr`` so a token can't leak into a log line or traceback. + without a lock, which is safe only if its fields never change; use + :func:`dataclasses.replace` for a modified copy. The secret fields are + excluded from ``repr`` so a token can't leak into a log or traceback. """ access_token: Optional[str] = field(default=None, repr=False) @@ -62,9 +61,8 @@ def is_valid(self, now: float, skew: float = DEFAULT_SKEW_SECONDS) -> bool: """True if the token is present and not within ``skew`` of expiry.""" if self.expires_at <= 0: return False - # Never let the early-refresh skew exceed half the token's own - # lifetime, so a short-lived (< 2*skew) token isn't reported expired - # the instant it is issued (which would refresh on every call). + # Cap skew at half the token lifetime, so a short-lived (< 2*skew) + # token isn't reported expired the instant it's issued. if self.issued_at: lifetime = self.expires_at - self.issued_at if lifetime > 0: @@ -85,14 +83,13 @@ def clear(self, key: str) -> None: # pragma: no cover raise NotImplementedError -# Module-global so that re-running a notebook cell (which constructs a fresh -# ``OidcDeviceAuth``) reuses the already-acquired token instead of re-prompting. +# Module-global so a re-run notebook cell (fresh ``OidcDeviceAuth``) reuses the +# acquired token instead of re-prompting. _MEMORY_STORE: Dict[str, TokenSet] = {} -# Per-key counter bumped on every clear(). store_if_current() uses it to drop a -# write from an acquisition that began before a concurrent clear() — including a -# clear() on a *different* OidcDeviceAuth that shares this process-global store, -# whose per-instance lock does not serialize against this one — so clear() can't -# be silently undone by an in-flight sign-in / refresh. +# Per-key counter bumped on every clear(); store_if_current() uses it to drop a +# write from an acquisition that began before a concurrent clear() — even a +# clear() on a different OidcDeviceAuth sharing this store, whose per-instance +# lock doesn't serialize against this one — so clear() can't be silently undone. _MEMORY_GENERATION: Dict[str, int] = {} _MEMORY_LOCK = threading.Lock() @@ -101,14 +98,12 @@ class MemoryCache(TokenCache): """ Process-global, in-memory cache (the default). - Safest backend: nothing is written to disk. Tokens survive for the life - of the Python process, so re-running cells is silent, but a kernel - restart re-prompts once. + Safest backend: nothing hits disk. Tokens live for the life of the process, + so re-running cells is silent; a kernel restart re-prompts once. """ def load(self, key: str) -> Optional[TokenSet]: - # Return a copy so callers can't mutate the cached entry in place - # (the live token is refreshed/rotated independently). + # Return a copy so callers can't mutate the cached entry in place. with _MEMORY_LOCK: tokens = _MEMORY_STORE.get(key) return replace(tokens) if tokens is not None else None @@ -126,9 +121,8 @@ def generation(self, key: str) -> int: """ Current clear()-generation for ``key``. - Captured before an acquisition's IdP round-trip and handed back to - :meth:`store_if_current`, which drops the write if a ``clear()`` bumped - the counter meanwhile (see :meth:`store_if_current`). + Capture before an IdP round-trip and pass to :meth:`store_if_current`, + which drops the write if a ``clear()`` bumped the counter meanwhile. """ with _MEMORY_LOCK: return _MEMORY_GENERATION.get(key, 0) @@ -138,11 +132,10 @@ def store_if_current( """ Store ``tokens`` only if no :meth:`clear` happened since ``generation``. - If a concurrent ``clear()`` — on this or any other - :class:`~questdb.auth.OidcDeviceAuth` sharing this process-global store — + If a concurrent ``clear()`` (on any OidcDeviceAuth sharing this store) bumped the counter after ``generation`` was captured, the write is - dropped (returns ``False``) so the just-cleared entry is not resurrected - with a now-stale token. Returns ``True`` when the token was stored. + dropped (``False``) so the cleared entry isn't resurrected with a stale + token; returns ``True`` when stored. """ with _MEMORY_LOCK: if _MEMORY_GENERATION.get(key, 0) != generation: diff --git a/src/questdb/auth/_device.py b/src/questdb/auth/_device.py index 33d8d397..a3db901a 100644 --- a/src/questdb/auth/_device.py +++ b/src/questdb/auth/_device.py @@ -62,11 +62,10 @@ # A non-positive expires_in is non-conformant; treat it as "unknown". _DEFAULT_EXPIRES_IN = 3600 -# Bounds for the device-authorization response's timing fields (RFC 8628). The -# device code is short-lived, so the IdP-supplied values are clamped: a hostile -# or buggy response must not be able to time the flow out before its first poll, -# nor pin the polling thread — which holds the acquisition lock — in one -# enormous sleep, nor keep the loop (and the lock) alive indefinitely. +# Clamp the device-authorization timing fields (RFC 8628): a hostile/buggy +# response must not time the flow out before its first poll, pin the polling +# thread (which holds the acquisition lock) in one huge sleep, or keep the loop +# (and lock) alive indefinitely. _DEFAULT_DEVICE_CODE_LIFETIME = 600 # expires_in fallback (absent/invalid/<=0) _MAX_DEVICE_CODE_LIFETIME = 1800 # cap on how long we keep polling _MAX_POLL_INTERVAL = 60 # cap on the poll interval (incl. slow_down) @@ -86,8 +85,8 @@ def _decode_jwt_claims(token: Optional[str]) -> Dict[str, Any]: """ Best-effort decode of a JWT payload **without signature verification**. - Used only to show a friendly identity in the sign-in message. QuestDB - performs the real validation. Returns ``{}`` for opaque/invalid tokens. + Used only to show a friendly identity in the sign-in message; QuestDB does + the real validation. Returns ``{}`` for opaque/invalid tokens. """ if not token or token.count('.') < 2: return {} @@ -98,10 +97,9 @@ def _decode_jwt_claims(token: Optional[str]) -> Dict[str, Any]: claims = json.loads(raw) return claims if isinstance(claims, dict) else {} except (ValueError, binascii.Error, UnicodeDecodeError, RecursionError): - # RecursionError: a deeply-nested JSON payload exhausts the decoder's - # stack; it is not a ValueError, so list it explicitly so a hostile or - # buggy token response can't crash token()/refresh with a raw exception - # here (mirrors the guards in _http.get_json / post_form / QuestDB.sql). + # RecursionError (deeply-nested JSON exhausts the decoder stack) isn't a + # ValueError, so list it explicitly: a hostile token must not crash + # token()/refresh here. return {} @@ -115,14 +113,12 @@ def _identity_from_claims(claims: Dict[str, Any]) -> Optional[str]: def _http_status_is_terminal_4xx(status: Optional[int]) -> bool: """ - True for a client-error HTTP status that is a definitive rejection. - - A non-JSON response body carrying such a status (e.g. an HTML/plain ``403`` - from a WAF or reverse proxy in front of the IdP, or a non-conformant IdP) is - never a RFC-conformant ``authorization_pending`` / ``slow_down`` — those are - always JSON — so the device-flow poll must fail fast rather than keep - retrying to a misleading "code expired". ``429`` is excluded: it is a - rate-limit, handled as transient with back-off. + True for a 4xx that is a definitive rejection. + + A non-JSON body with such a status (e.g. an HTML ``403`` from a WAF/proxy or + non-conformant IdP) is never an ``authorization_pending`` / ``slow_down`` + (those are always JSON), so the poll must fail fast rather than retry to a + misleading "code expired". ``429`` is excluded — it's a transient rate-limit. """ return status is not None and 400 <= status < 500 and status != 429 @@ -136,28 +132,23 @@ class OidcDeviceAuth: """ Acquire and refresh an OIDC token via the device authorization grant. - The token is presented to QuestDB over the auth paths it already - supports: HTTP ``Authorization: Bearer`` or PG-wire ``_sso`` (token as - password). The flow runs entirely client-side; QuestDB is never in the - token-acquisition path. - - Most users only ever call :meth:`token` (or :meth:`headers`). The first - call runs the interactive device flow; subsequent calls return the cached - token and refresh it silently (synchronously, on the first call made after - it nears expiry — there is no background thread). Acquisition is - serialized so concurrent callers don't double-prompt, while a valid cached - token is returned without blocking on another thread's in-progress - sign-in. - - **Concurrency note.** The serialization lock is held for the whole of an - interactive sign-in (up to the device-code lifetime, ~30 min). A caller - that already holds a *valid* cached token never blocks, but a caller whose - token is missing or expired blocks behind whoever is signing in; if that - sign-in is abandoned, each waiter then re-prompts in turn. When several - threads share one auth object (e.g. a SQLAlchemy / psycopg connection - pool), sign in once up front — :func:`questdb.auth.connect` does this for - you with ``eager=True`` (the default), so the interactive flow runs a - single time on the main thread before the pool opens connections. + The token is presented to QuestDB over the auth paths it already supports: + HTTP ``Authorization: Bearer`` or PG-wire ``_sso`` (token as password). The + flow runs entirely client-side; QuestDB is never in the acquisition path. + + Most users only call :meth:`token` (or :meth:`headers`). The first call runs + the interactive device flow; later calls return the cached token, refreshing + it silently and synchronously once it nears expiry (no background thread). + Acquisition is serialized so concurrent callers don't double-prompt, while a + valid cached token is returned without blocking on another's sign-in. + + **Concurrency note.** The lock is held for a whole interactive sign-in (up + to the device-code lifetime, ~30 min): a caller with a *valid* cached token + never blocks, but one whose token is missing/expired waits behind the + signer. So when threads share an auth object (e.g. a SQLAlchemy/psycopg + pool), sign in once up front — :func:`questdb.auth.connect` does this via + ``eager=True`` (the default), running the flow once on the main thread before + the pool opens connections. .. code-block:: python @@ -221,43 +212,38 @@ def __init__( audience=audience, issuer=issuer) - # Enforce the credential-endpoint co-location / issuer pin on every - # construction path (not just discovery), so the documented guarantee - # holds for the explicit constructor too. + # Enforce the credential-endpoint co-location / issuer pin here too (not + # just on the discovery path), so the guarantee holds for this + # constructor as well. validate_endpoint_origins( self.config.token_endpoint, self.config.device_authorization_endpoint, self.config.issuer) - # `insecure` permits plaintext http only to QuestDB (e.g. a local dev - # server). The IdP is always held to https — or loopback http — by - # _idp_post, so the device code / refresh token are never sent in - # cleartext over the network even when this is set. + # `insecure` permits plaintext http only to QuestDB (e.g. local dev). + # _idp_post always holds the IdP to https (or loopback http), so the + # device code / refresh token are never sent in cleartext even when set. self.insecure = insecure self.open_browser = open_browser - # Kept so adapters that build their own transport (QuestDB.sender's ILP - # Sender) can forward the same private CA the urllib _ctx uses, instead - # of falling back to the default trust roots. See QuestDB.sender. + # Kept so adapters with their own transport (QuestDB.sender's ILP Sender) + # can forward the same private CA as _ctx rather than the default roots. self._ca_bundle = ca_bundle self._interactive = interactive self._default_interval = default_interval - # Per-request network timeout for every IdP call (device-code request, - # each poll, refresh). It bounds how long a single network leg can pin - # the acquisition lock if the IdP stalls: lower it to reduce lock-hold - # (and connection-pool starvation) during an IdP outage; raise it for a - # slow IdP. The total interactive-poll duration is separately capped by + # Per-request network timeout for every IdP call (device-code, each poll, + # refresh). Bounds how long one network leg pins the acquisition lock if + # the IdP stalls; the total poll duration is separately capped by # _MAX_DEVICE_CODE_LIFETIME. self._timeout = timeout self._cache = make_cache(cache) self._ctx = build_ssl_context(ca_bundle) self._renderer = renderer if renderer is not None else make_renderer(qr=qr) - # Serializes token *acquisition* (a silent refresh or the interactive - # sign-in) only. Concurrent callers are possible via the threaded - # SQLAlchemy/psycopg adapters: without this, several connections - # opening as the token expires would run overlapping refreshes, and - # with refresh-token rotation all but one would fail and force a - # spurious re-prompt. It is NOT held on the fast path, so a caller with - # a valid cached token never blocks behind another thread's sign-in. + # Serializes token *acquisition* (silent refresh or interactive sign-in) + # only. Without it, threaded SQLAlchemy/psycopg connections opening as + # the token expires would run overlapping refreshes — and with + # refresh-token rotation all but one would fail and re-prompt. NOT held + # on the fast path, so a valid cached token never blocks behind a + # sign-in. self._lock = threading.Lock() self._tokens: Optional[TokenSet] = None clock = _clock or _SYSTEM_CLOCK @@ -296,8 +282,8 @@ def from_questdb( Reads ``{url}/settings`` for the OIDC client id, scope, endpoints and groups mode, falling back to the IdP ``.well-known`` document for the - device-authorization endpoint when QuestDB does not advertise it. - Any explicit keyword overrides discovery. + device-authorization endpoint when QuestDB doesn't advertise it. Any + explicit keyword overrides discovery. """ _validate_flow(flow) ctx = build_ssl_context(ca_bundle) @@ -340,7 +326,7 @@ def token(self) -> str: Return a valid token for QuestDB, acquiring or refreshing as needed. Returns the ``id_token`` when the server expects groups encoded in the - token (``acl.oidc.groups.encoded.in.token=true``), otherwise the + token (``acl.oidc.groups.encoded.in.token=true``), else the ``access_token`` — mirroring QuestDB's own selection logic. """ return self._select(self._obtain_tokens()) @@ -354,18 +340,17 @@ def cache_key(self) -> str: """ Identifies the token's security context for caching. - Two sessions share a cached token only when they would accept the same - one: same IdP token endpoint (**path included**, so multi-tenant realms - sharing a host don't collide), client id, scope *set* (order-insensitive), + Two sessions share a cached token only when they'd accept the same one: + same IdP token endpoint (**path included**, so multi-tenant realms on one + host don't collide), client id, scope *set* (order-insensitive), audience, and token-kind mode (``groups_in_token`` — id_token vs - access_token). The QuestDB URL is deliberately excluded — the same IdP - token is valid against any QuestDB that trusts it. - - ``groups_in_token`` is part of the key because it selects which token - kind :meth:`_select` returns; without it two sessions that differ only - in that mode would collide on one entry and repeatedly evict each - other's token (the gate self-corrects, but at the cost of avoidable - refreshes / re-prompts). + access_token). The QuestDB URL is excluded — the same IdP token is valid + against any QuestDB that trusts it. + + ``groups_in_token`` is keyed because it selects the token kind + :meth:`_select` returns; otherwise two sessions differing only in that + mode would collide and repeatedly evict each other's token (self- + correcting, but at the cost of avoidable refreshes / re-prompts). """ c = self.config scope = ' '.join(sorted(c.scope.split())) if c.scope else '' @@ -380,11 +365,10 @@ def cache_key(self) -> str: def clear(self) -> None: """Forget the cached token (forces a fresh sign-in next time).""" # self._lock serializes against THIS instance's acquisition; the shared - # MemoryCache additionally bumps a per-key generation here, so an - # in-flight acquisition on ANOTHER OidcDeviceAuth that shares the - # process-global store can't repopulate the entry after this clear (its - # _store sees the bumped generation and drops the write). This resets the - # local / process cache only — it does not revoke the token at the IdP. + # MemoryCache also bumps a per-key generation, so an in-flight acquire on + # ANOTHER instance sharing the process-global store can't repopulate the + # entry (its _store sees the bumped generation and drops the write). + # Resets the local/process cache only — does not revoke at the IdP. with self._lock: self._tokens = None self._cache.clear(self.cache_key) @@ -406,10 +390,10 @@ def _select(self, tokens: TokenSet) -> str: def _has_required_token(self, tokens: TokenSet) -> bool: """ - True if ``tokens`` carries the kind :meth:`_select` will return — the - ``id_token`` when groups are encoded in the token, else the - ``access_token``. The cache gate and the post-refresh check share this - predicate so they can't disagree with ``_select``. + True if ``tokens`` carries the kind :meth:`_select` will return (the + ``id_token`` in groups mode, else the ``access_token``). The cache gate + and post-refresh check share this predicate so they can't disagree with + ``_select``. """ if self.config.groups_in_token: return bool(tokens.id_token) @@ -417,11 +401,10 @@ def _has_required_token(self, tokens: TokenSet) -> bool: def _missing_required_token_error(self) -> OidcDeviceFlowError: """ - Build the terminal error for a *completed* grant whose token response - omits the kind :meth:`_select` needs (the ``id_token`` in groups mode, - else the ``access_token``). Mirrors :meth:`_select`'s diagnostics, but - is an :class:`OidcDeviceFlowError` — a flow failure — so the device-flow - poll can raise it without first caching an unusable response. + Terminal error for a *completed* grant whose response omits the kind + :meth:`_select` needs. Mirrors :meth:`_select`'s diagnostics but as an + :class:`OidcDeviceFlowError`, so the poll can raise it without first + caching an unusable response. """ if self.config.groups_in_token: return OidcDeviceFlowError( @@ -434,29 +417,26 @@ def _missing_required_token_error(self) -> OidcDeviceFlowError: 'access_token.') def _obtain_tokens(self) -> TokenSet: - # Fast path: return a valid token without taking the lock, so a caller - # with a usable token never blocks behind another thread's in-progress - # refresh or interactive sign-in. This path is READ-ONLY: it never - # writes self._tokens (M4). Every write to that field happens under the - # lock (the promotion below, plus _store and clear), so the lock-free - # reader can't race a concurrent write / lose an update / resurrect a - # just-cleared token. + # Fast path: return a valid token without the lock, so a caller with a + # usable token never blocks behind another thread's refresh/sign-in. + # READ-ONLY — never writes self._tokens; every write to that field is + # under the lock (the promotion below, _store, clear), so this lock-free + # reader can't race a write or resurrect a just-cleared token. tokens = self._valid_cached() if tokens is not None: return tokens - # Slow path: serialize acquisition so concurrent callers don't run - # overlapping refreshes or double-prompt; the loser re-checks and - # reuses the winner's freshly acquired token. + # Slow path: serialize acquisition so concurrent callers don't overlap + # refreshes or double-prompt; the loser re-checks and reuses the + # winner's token. with self._lock: - # Capture the cache generation before reading or acquiring, so a - # clear() that races this acquisition — including one on another - # OidcDeviceAuth that shares the process-global MemoryCache (whose - # per-instance lock does not serialize against ours) — invalidates - # the store below instead of resurrecting the just-cleared entry. + # Capture the generation before reading/acquiring, so a racing + # clear() — including on another instance sharing the process-global + # MemoryCache (whose per-instance lock doesn't serialize against + # ours) — invalidates the store below instead of resurrecting the + # cleared entry. generation = self._cache_generation() - # Promote a cached token into the field under the lock (even an - # expired one, so _acquire can reuse its refresh_token for a silent - # refresh). Done here, not on the lock-free fast path, so every + # Promote a cached token under the lock (even expired, so _acquire + # can reuse its refresh_token). Here, not on the fast path, so every # write to self._tokens stays serialized. if self._tokens is None: cached = self._cache.load(self.cache_key) @@ -468,10 +448,9 @@ def _obtain_tokens(self) -> TokenSet: return self._acquire(generation) def _valid_cached(self) -> Optional[TokenSet]: - # Read-only: reads the published field, falling back to a read of the - # shared cache backend. It never writes self._tokens — that write is - # done only under the lock (in _obtain_tokens' slow path / _store / - # clear) — so it is safe to call on the lock-free fast path. + # Read-only: reads the published field, falling back to the shared cache + # backend. Never writes self._tokens (that's lock-only), so it's safe on + # the lock-free fast path. tokens = self._tokens if tokens is None: tokens = self._cache.load(self.cache_key) @@ -481,31 +460,26 @@ def _valid_cached(self) -> Optional[TokenSet]: return None def _acquire(self, generation: int) -> TokenSet: - # Called while holding self._lock. Try a silent refresh, else run the - # interactive device flow. `generation` was captured before the cache - # read in _obtain_tokens; _store drops its write if a concurrent clear() - # has bumped it since (see _store / _cache_generation). + # Holds self._lock. Try a silent refresh, else run the device flow. + # `generation` was captured before the cache read in _obtain_tokens; + # _store drops its write if a concurrent clear() bumped it since. tokens = self._tokens if tokens is not None and tokens.refresh_token: try: refreshed = self._refresh(tokens) except OidcNetworkError: - # Transient connectivity failure: the refresh token is still - # valid, so re-authenticating won't help (the interactive flow - # needs the same network) and would needlessly re-prompt. - # Surface it — the cached token + refresh_token are kept, so a - # later call retries the refresh. + # Transient: the refresh token is still valid, so the interactive + # flow (same network) wouldn't help and would needlessly + # re-prompt. Surface it; the cached token is kept for a retry. raise except OidcError: - # The refresh token was rejected (expired/revoked) or the IdP - # returned an unusable response: fall through to a fresh - # interactive sign-in. + # Refresh token rejected (expired/revoked) or unusable response: + # fall through to a fresh interactive sign-in. pass else: - # Only accept a refresh that actually yields the token kind we - # need. Some IdPs don't re-issue the id_token on refresh; such - # a response is unusable, so fall through to the interactive - # flow rather than caching it and looping on every call. + # Accept only a refresh that yields the kind we need: some IdPs + # don't re-issue the id_token on refresh, so fall through rather + # than cache an unusable response and loop on every call. if self._has_required_token(refreshed): self._store(refreshed, generation) return refreshed @@ -515,13 +489,12 @@ def _acquire(self, generation: int) -> TokenSet: return fresh def _store(self, tokens: TokenSet, generation: int) -> None: - # self._tokens is this instance's own view, so always set it — the - # caller uses the token it just acquired. The shared-cache write is - # conditional: a clear() (here or on another instance sharing the - # process-global store) that bumped the generation since it was captured - # drops the write, so clear() is not silently undone. Backends without - # generation support (NullCache / a custom TokenCache) store - # unconditionally, exactly as before. + # self._tokens is this instance's own view, so always set it (the caller + # uses what it just acquired). The shared-cache write is conditional: a + # clear() (here or on another instance sharing the store) that bumped the + # generation drops the write, so clear() isn't silently undone. Backends + # without generation support (NullCache / custom TokenCache) store + # unconditionally. self._tokens = tokens store_if_current = getattr(self._cache, 'store_if_current', None) if store_if_current is not None: @@ -531,8 +504,8 @@ def _store(self, tokens: TokenSet, generation: int) -> None: def _cache_generation(self) -> int: # MemoryCache tracks a per-key clear()-generation for the cross-instance - # CAS in _store; other backends don't, so default to 0 (the store is - # then unconditional, matching the pre-existing behavior). + # CAS in _store; other backends don't, so default to 0 (unconditional + # store). generation = getattr(self._cache, 'generation', None) return generation(self.cache_key) if generation is not None else 0 @@ -540,12 +513,12 @@ def _tokenset_from_response(self, body: Dict[str, Any]) -> TokenSet: try: expires_in = int(body.get('expires_in', _DEFAULT_EXPIRES_IN)) except (TypeError, ValueError, OverflowError): - # OverflowError: a JSON Infinity (json.loads accepts it) → int(inf); - # it is not a ValueError, so list it to keep the typed contract. + # OverflowError: a JSON Infinity (json.loads accepts it) → int(inf) + # isn't a ValueError, so list it to keep the typed contract. expires_in = _DEFAULT_EXPIRES_IN if expires_in <= 0: - # A non-positive lifetime would mark a just-issued token as already - # expired, causing refresh/re-prompt churn. Treat it as unknown. + # A non-positive lifetime marks a just-issued token as expired, + # causing refresh/re-prompt churn. Treat it as unknown. expires_in = _DEFAULT_EXPIRES_IN claims = (_decode_jwt_claims(body.get('id_token')) or _decode_jwt_claims(body.get('access_token'))) @@ -561,11 +534,10 @@ def _tokenset_from_response(self, body: Dict[str, Any]) -> TokenSet: sub=claims.get('sub')) def _idp_post(self, url: str, form: Dict[str, Any]): - # IdP POSTs carry the device code / refresh token, so they are always - # required to be https (loopback http is fine for local dev); the - # user's `insecure` flag — which is about the QuestDB link — never - # downgrades them. The timeout bounds how long this leg can hold the - # acquisition lock if the IdP stalls. + # IdP POSTs carry the device code / refresh token, so always https + # (loopback http is fine for local dev); the user's `insecure` flag (the + # QuestDB link) never downgrades them. The timeout bounds how long this + # leg can hold the acquisition lock if the IdP stalls. return post_form( url, form, ctx=self._ctx, insecure=False, timeout=self._timeout) @@ -578,42 +550,38 @@ def _refresh(self, tokens: TokenSet) -> TokenSet: 'refresh_token': tokens.refresh_token, 'client_id': self.config.client_id, 'scope': self.config.scope, - # Re-send the audience on refresh too, mirroring the - # device-authorization request: some IdPs (e.g. Auth0) need - # it to keep the rotated access token's `aud`, and would - # otherwise mint a token QuestDB rejects only AFTER a silent - # refresh. IdPs that don't use it ignore the param; post_form - # drops it entirely when audience is None (not configured). + # Re-send the audience (mirroring the device-authorization + # request): some IdPs (e.g. Auth0) need it to keep the + # rotated token's `aud`, else they mint one QuestDB rejects + # only after a silent refresh. Others ignore it; post_form + # drops it when audience is None. 'audience': self.config.audience, }) except OidcNetworkError: - # Already transient (socket drop / DNS / per-request timeout): - # propagate so _acquire keeps the still-valid refresh token and - # retries later instead of re-prompting. + # Already transient (socket drop / DNS / timeout): propagate so + # _acquire keeps the still-valid refresh token and retries later. raise except OidcError as e: - # Non-JSON HTTP error body (e.g. an HTML 5xx from a proxy in front - # of the IdP). A 5xx / 429 is a transient hiccup — re-raise as a - # network error so _acquire keeps the refresh token; a 4xx is a - # genuine rejection, so let it fall through (as an OidcError) to a - # fresh interactive sign-in. + # Non-JSON HTTP error body (e.g. an HTML 5xx from a proxy). 5xx/429 + # is transient → re-raise as a network error so _acquire keeps the + # refresh token; a 4xx is a genuine rejection, so let it fall through + # to a fresh interactive sign-in. if _http_status_is_transient(getattr(e, 'status', None)): raise OidcNetworkError(str(e)) from e raise if status == 200: refreshed = self._tokenset_from_response(body) - # Many IdPs do not rotate the refresh token; keep the old one. - # TokenSet is frozen, so derive a copy rather than mutating. + # Many IdPs don't rotate the refresh token; keep the old one. + # TokenSet is frozen, so derive a copy. if not refreshed.refresh_token: refreshed = replace( refreshed, refresh_token=tokens.refresh_token) return refreshed - # A transient IdP error (5xx / 429) during a silent refresh must not - # tear down the session: the refresh token is still valid, so surface it - # as a network error and let _acquire keep it and retry later — matching - # the poll loop, which also treats 5xx/429 as transient. Only a genuine - # rejection (an expired/revoked refresh token, a 4xx invalid_grant) - # falls through to a fresh interactive sign-in. + # A transient 5xx/429 during a silent refresh must not tear down the + # session: the refresh token is still valid, so surface it as a network + # error for _acquire to retry — matching the poll loop. Only a genuine + # rejection (expired/revoked token, 4xx invalid_grant) falls through to a + # fresh sign-in. if _http_status_is_transient(status): raise OidcNetworkError( f'Token refresh hit a transient IdP error (HTTP {status}); ' @@ -657,10 +625,9 @@ def _request_device_code(self) -> Dict[str, Any]: return body error = body.get('error') if status == 200: - # 200 but the success guard above failed: the response is missing - # device_code/user_code. That is a non-conformant body, not an - # HTTP-level failure — say so plainly rather than the contradictory - # "Device authorization request failed (HTTP 200)". + # 200 but the guard above failed: device_code/user_code missing. + # A non-conformant body, not an HTTP failure — say so plainly rather + # than a contradictory "failed (HTTP 200)". raise OidcDeviceFlowError( 'The IdP returned a 200 device-authorization response that is ' 'missing the required "device_code"/"user_code" fields; cannot ' @@ -690,18 +657,16 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: interval = int(resp.get('interval', self._default_interval)) except (TypeError, ValueError, OverflowError): interval = self._default_interval - # At least 1s (RFC 8628 floor), and capped so a hostile/huge value can't - # pin the polling thread (which holds the acquisition lock) in one - # enormous sleep. + # At least 1s (RFC 8628 floor), capped so a hostile value can't pin the + # polling thread (which holds the lock) in one enormous sleep. interval = min(_MAX_POLL_INTERVAL, max(1, interval)) try: expires_in = int(resp.get('expires_in', _DEFAULT_DEVICE_CODE_LIFETIME)) except (TypeError, ValueError, OverflowError): expires_in = _DEFAULT_DEVICE_CODE_LIFETIME - # A non-positive lifetime would time the flow out before the first poll - # (the user has already been shown the code); treat it as unknown. Cap - # the upper end so a hostile expires_in can't keep the loop — and the - # lock — alive indefinitely. + # A non-positive lifetime would time out before the first poll (the code + # is already shown); treat it as unknown. Cap the upper end so a hostile + # value can't keep the loop — and the lock — alive indefinitely. if expires_in <= 0: expires_in = _DEFAULT_DEVICE_CODE_LIFETIME expires_in = min(expires_in, _MAX_DEVICE_CODE_LIFETIME) @@ -717,8 +682,7 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: 'Run the sign-in again.', error='expired_token') self._renderer.on_waiting(remaining) - # Never sleep past the deadline (remaining > 0 here): a clamped - # interval still shouldn't overshoot a short-lived code. + # Never sleep past the deadline (remaining > 0 here). self._sleep(min(interval, remaining)) try: @@ -730,11 +694,10 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: 'client_id': self.config.client_id, }) except OidcError as e: - # A non-JSON 4xx is a terminal rejection (e.g. an HTML/plain - # error page from a WAF or reverse proxy in front of the IdP, or - # a non-conformant IdP): a conformant OAuth error is JSON, so it - # can never be authorization_pending / slow_down. Fail fast - # instead of polling on to a misleading "code expired". + # A non-JSON 4xx is a terminal rejection (e.g. an HTML error page + # from a WAF/proxy, or a non-conformant IdP): a conformant OAuth + # error is JSON, so it can never be authorization_pending / + # slow_down. Fail fast instead of polling on to "code expired". if _http_status_is_terminal_4xx(getattr(e, 'status', None)): self._renderer.on_failure( 'Sign-in failed: the identity provider rejected the ' @@ -742,42 +705,36 @@ def _poll_for_token(self, resp: Dict[str, Any]) -> TokenSet: raise OidcDeviceFlowError( f'Device flow failed: the IdP rejected the token ' f'request ({e}).') from e - # Otherwise transient, not a terminal OAuth decision: a dropped - # connection / DNS blip / per-request timeout (OidcNetworkError), - # or a non-JSON 5xx/429 such as an HTML 502/503/504 from a proxy - # in front of the IdP (a bare OidcError from post_form). The user - # may already have authorized in the browser, and RFC 8628 §3.4 - # expects polling to continue until the device code expires, so - # poll again instead of discarding the in-progress sign-in. The - # deadline check at the top of the loop bounds the total wait; a - # genuine JSON rejection arrives as a JSON error body (below). + # Otherwise transient: a dropped connection / DNS blip / timeout + # (OidcNetworkError) or a non-JSON 5xx/429 from a proxy (bare + # OidcError). The user may already have authorized, and RFC 8628 + # §3.4 expects polling to continue until the code expires, so + # poll again rather than discard the sign-in (the deadline bounds + # the total wait; a genuine JSON rejection arrives below). if getattr(e, 'status', None) == 429: interval = min(_MAX_POLL_INTERVAL, interval + 5) continue if status == 200: - # A 200 is the RFC 6749 §5.1 token response: the grant - # completed. Accept it only if it actually carries the kind - # _select will hand to QuestDB (the id_token in groups mode, - # else the access_token), using the same predicate as the cache - # gate and the post-refresh check so the three can't disagree. + # The RFC 6749 §5.1 token response: the grant completed. Accept + # it only if it carries the kind _select hands to QuestDB, using + # the same predicate as the cache gate and post-refresh check so + # the three can't disagree. tokens = self._tokenset_from_response(body) if self._has_required_token(tokens): return tokens - # The grant completed but the required kind is absent: a stable - # misconfiguration, not a transient poll state. Raise a clear - # terminal error here instead of caching an unusable token and - # silently re-running the whole interactive flow on every later - # token() call. + # Grant completed but the required kind is absent: a stable + # misconfiguration, not a transient poll state. Raise a terminal + # error rather than cache an unusable token and silently re-run + # the whole flow on every later token() call. self._renderer.on_failure( 'Sign-in failed: the identity provider did not return the ' 'token this server requires.') raise self._missing_required_token_error() - # A 5xx or 429 that did carry a JSON body is also transient (a - # server-side error or a rate-limit) rather than a terminal OAuth - # rejection: back off on a rate-limit and keep polling until the - # deadline, matching the connection-failure handling above. + # A 5xx/429 with a JSON body is also transient (server error or + # rate-limit), not a terminal rejection: back off on 429 and keep + # polling until the deadline, as above. if status >= 500 or status == 429: if status == 429: interval = min(_MAX_POLL_INTERVAL, interval + 5) @@ -812,12 +769,12 @@ def _is_interactive(self) -> bool: return detect_interactive() def _maybe_open_browser(self, resp: Dict[str, Any]) -> None: - # Never auto-open on a (possibly remote) notebook kernel; only do so - # for an explicitly opted-in local terminal session. + # Never auto-open on a (possibly remote) notebook kernel; only on an + # opted-in local terminal. if not self.open_browser or in_ipython_kernel(): return - # Only open an http(s) URL — never a javascript:/data: scheme from a - # malicious or MITM'd device response. + # Only http(s) — never a javascript:/data: scheme from a malicious or + # MITM'd device response. target = _safe_link_url( resp.get('verification_uri_complete') or resp.get('verification_uri') @@ -841,9 +798,9 @@ def _validate_flow(flow: str) -> None: def _normalize_url(url: str) -> str: - # Full URL with scheme/host lower-cased and the default port dropped, but - # the path kept (it distinguishes multi-tenant realms). Used for the cache - # key so trivial spelling differences don't cause a spurious re-prompt. + # Full URL with scheme/host lower-cased and default port dropped, but path + # kept (it distinguishes multi-tenant realms). Used for the cache key so + # trivial spelling differences don't cause a spurious re-prompt. parts, port = safe_urlparse(url) scheme = (parts.scheme or '').lower() host = (parts.hostname or '').lower() diff --git a/src/questdb/auth/_discovery.py b/src/questdb/auth/_discovery.py index 01010b5e..68fe7301 100644 --- a/src/questdb/auth/_discovery.py +++ b/src/questdb/auth/_discovery.py @@ -25,13 +25,12 @@ """ OIDC configuration discovery. -Resolution order, mirroring the design doc: +Resolution order: -1. ``GET {questdb_url}/settings`` (public, no auth) -> the QuestDB-authoritative +1. ``GET {questdb_url}/settings`` (public) -> QuestDB-authoritative ``acl.oidc.*`` values (client id, scope, endpoints, groups mode). -2. If the device-authorization endpoint is not advertised by QuestDB (today's - servers), fall back to the IdP discovery document - (``{issuer}/.well-known/openid-configuration``). +2. If QuestDB doesn't advertise the device-authorization endpoint, fall back to + the IdP discovery document (``{issuer}/.well-known/openid-configuration``). """ from __future__ import annotations @@ -92,12 +91,10 @@ def _str_setting(value: Any) -> Optional[str]: """ A ``/settings`` value as a non-empty string, else ``None``. - ``/settings`` is server-controlled (and tamperable over a plaintext insecure - channel). A non-string ``acl.oidc.*`` value — a JSON list/number from a buggy - or hostile server — must not reach ``scope.split()`` or the cache-key join as - a raw object, where it would escape the package's typed-error contract with a - bare ``AttributeError`` / ``TypeError``. Mirrors :func:`_resolve_endpoint`, - which already drops a non-string endpoint. + Drops a non-string ``acl.oidc.*`` value (a JSON list/number from a buggy or + hostile server) so it can't reach ``scope.split()`` / the cache-key join as a + raw object and escape the typed-error contract with an ``AttributeError`` / + ``TypeError``. Mirrors :func:`_resolve_endpoint`. """ return value if isinstance(value, str) and value else None @@ -106,27 +103,20 @@ def settings_config(settings: Any) -> Dict[str, Any]: """ Return the trusted config map from a ``/settings`` response. - Modern QuestDB nests the server-authoritative values under a top-level - ``"config"`` object, alongside a **user-writable** ``"preferences"`` sibling - (the web console persists UI preferences there via ``PUT /settings``). - Discovery must read only ``"config"`` and never the top level, so a user who - can write a preference cannot smuggle an ``acl.oidc.*`` key — e.g. a - redirected ``token.endpoint`` that points the device code / refresh token at - an attacker — into the resolved OIDC configuration. - - A genuinely flat, legacy ``/settings`` response (no ``"config"`` / - ``"preferences"`` split) is still tolerated at the top level. + Modern QuestDB nests server-authoritative values under ``"config"``, + alongside a **user-writable** ``"preferences"`` sibling (written via + ``PUT /settings``). Read only ``"config"`` so a user who can write a + preference can't smuggle an ``acl.oidc.*`` key (e.g. a redirected + ``token.endpoint``) into the resolved config. A genuinely flat legacy + response (no ``config`` / ``preferences`` split) is still tolerated. """ if not isinstance(settings, dict): return {} cfg = settings.get('config') if isinstance(cfg, dict): return cfg - # A structured response carries the user-writable "preferences" sibling - # (and normally the "config" object). If either marker is present, the top - # level is NOT trusted config: read "config" or nothing — so user-writable - # preferences can never be mistaken for server-authoritative config, even - # when "config" is absent or malformed. + # Either marker present => structured response: read "config" or nothing, + # never the user-writable top level — even when "config" is absent/malformed. if 'config' in settings or 'preferences' in settings: return {} # Legacy flat response: no config/preferences split; tolerate top-level keys. @@ -166,10 +156,9 @@ def _origin_str(url: str) -> str: def _settings_channel_is_plaintext(questdb_url: str) -> bool: """ True if QuestDB ``/settings`` was fetched over plaintext http to a - non-loopback host — a channel a network MITM can tamper (only reachable - with ``insecure=True``; ``_require_secure`` rejects it otherwise). IdP - endpoints advertised by such an unauthenticated ``/settings`` response must - not be trusted to route credentials without an out-of-band pin. + non-loopback host — a MITM-tamperable channel (only reachable with + ``insecure=True``). Endpoints advertised over it must not route credentials + without an out-of-band pin. """ parts, _ = safe_urlparse(questdb_url) return (parts.scheme or '').lower() == 'http' and not _is_loopback( @@ -180,15 +169,12 @@ def _decode_path_segments(path: str) -> list: """ Fully percent-decode a URL path and split it into ``/`` segments. - Decoding is repeated until stable so a double/triple-encoded dot segment - (``%252e%252e`` -> ``%2e%2e`` -> ``..``), or an encoded slash (``%2f``) that - splits a segment, is unmasked — a server or reverse proxy may unescape more - than once before it normalizes. A backslash is treated as a separator, since - some proxies fold ``\\`` to ``/`` before routing. The returned segments are - what the containment check compares, never the raw string urllib puts on the - wire, so an encoding the server later undoes can't smuggle a ``..`` past the - scan. The loop is bounded (a real path needs 0-1 passes; more layers than a - server would itself decode can't resolve to a traversal anyway). + Decoding repeats until stable so a multiply-encoded dot segment + (``%252e%252e`` -> ``..``) or encoded slash (``%2f``) — which a server/proxy + may unescape more than once before normalizing — is unmasked. Backslash is a + separator (some proxies fold ``\\`` to ``/``). The containment check compares + these decoded segments, not the raw wire string, so an encoding the server + later undoes can't hide a ``..``. Loop bounded: a real path needs 0-1 passes. """ decoded = path for _ in range(10): # bounded; each pass peels one percent-encoding layer @@ -204,32 +190,26 @@ def _endpoint_path_under_issuer(endpoint: str, issuer: str) -> bool: True if ``endpoint``'s path is the issuer's path or a sub-path of it. Segment-aware, so ``/realms/prod`` does not match ``/realms/production``. A - root issuer (no path, e.g. ``https://idp.example.com``) constrains the - origin only and matches any path. Used to keep a tampered ``/settings`` from - redirecting credentials to a different tenant on a path-based multi-tenant - IdP (Keycloak issuers are ``https://host/realms/{realm}``), which an - origin-only check can't catch. - - The comparison is done on the fully *decoded* path segments, never the raw - string urllib sends. A ``.`` / ``..`` segment is rejected outright: urllib - puts the dotted path on the wire verbatim, but the IdP (or a reverse proxy - in front of it) normalizes it, so ``/realms/prod/../attacker/token`` would - satisfy a naive prefix test yet resolve server-side to a *different* realm — - defeating the very isolation this check exists to provide. Encoded dot - segments are unmasked first — including double-encoded (``%252e``) and - encoded slashes (``%2f``) a server may unescape more than once — a backslash - is treated as a separator, and the last segment's ``;params`` (which urllib - splits off ``.path``) is folded back in, so none of those can smuggle a - traversal past the segment scan. A legitimate endpoint path never contains - dot segments. + root issuer (no path) constrains the origin only and matches any path. Stops + a tampered ``/settings`` from redirecting credentials to a different tenant + on a path-based multi-tenant IdP (Keycloak issuers are + ``https://host/realms/{realm}``), which an origin-only check can't catch. + + Compared on fully *decoded* path segments, not the raw wire string. A ``.`` / + ``..`` segment is rejected outright: the server normalizes it, so + ``/realms/prod/../attacker/token`` passes a naive prefix test yet resolves to + a *different* realm. ``_decode_path_segments`` unmasks encoded dot segments, + and the last segment's ``;params`` (which urllib splits off ``.path``) is + folded back, so neither can hide a traversal. Legitimate paths have no dot + segments. """ base = (safe_urlparse(issuer)[0].path or '').rstrip('/') if not base: return True base_segs = _decode_path_segments(base) eparts = safe_urlparse(endpoint)[0] - # urllib splits the last segment's ;params off .path; fold it back so a - # traversal hidden there (…/token;..%2f..%2fEVIL) can't slip past the scan. + # Fold the last segment's ;params back into the path so a traversal hidden + # there (…/token;..%2f..%2fEVIL) can't slip past the scan. ep_path = eparts.path or '' if eparts.params: ep_path = f'{ep_path};{eparts.params}' @@ -246,28 +226,24 @@ def validate_endpoint_origins( """ Reject an OIDC configuration that would send credentials off-origin. - The device code and the long-lived refresh token are POSTed to the device- - authorization and token endpoints. These come from QuestDB ``/settings`` - (or the IdP ``.well-known``), which the client trusts; this check limits a - tampered or MITM'd configuration from redirecting those credentials to an - attacker-controlled host: - - * the two credential endpoints must share a single origin (they are always - co-located on the authorization server per RFC 8628); and - * when the ``issuer`` is known independently (passed explicitly or resolved - from the IdP ``.well-known``), both endpoints must share its **origin**. - - This is an origin-level check: it does **not**, on its own, isolate - path-based multi-tenant realms (e.g. Keycloak issuers - ``https://host/realms/{realm}``, where every realm shares one origin). That - path-scoping is enforced separately in :func:`resolve_config`, and only for - endpoints advertised by the (untrusted) QuestDB ``/settings`` — endpoints - from IdP discovery (the issuer's own ``.well-known``) and caller-explicit - endpoints are authoritative and are not path-restricted (some IdPs, e.g. - Azure AD, legitimately place endpoints outside the issuer path). - - Pass ``issuer=`` to pin the IdP explicitly when QuestDB advertises the - endpoints directly (so a compromised server cannot redirect the token POST). + The device code and long-lived refresh token are POSTed to the device- + authorization and token endpoints. This limits a tampered or MITM'd config + from steering those credentials to an attacker host: + + * the two credential endpoints must share a single origin (always co-located + on the authorization server per RFC 8628); and + * when ``issuer`` is known independently (explicit or from the IdP + ``.well-known``), both endpoints must share its **origin**. + + Origin-level only: it does **not** isolate path-based multi-tenant realms + (e.g. Keycloak ``https://host/realms/{realm}``, one origin per realm). That + path-scoping lives in :func:`resolve_config`, and only for endpoints from the + untrusted QuestDB ``/settings``; endpoints from IdP discovery or the caller + are authoritative and not path-restricted (some IdPs, e.g. Azure AD, + legitimately place endpoints outside the issuer path). + + Pass ``issuer=`` to pin the IdP when QuestDB advertises the endpoints + directly, so a compromised server cannot redirect the token POST. """ if _normalized_origin(token_endpoint) != _normalized_origin( device_authorization_endpoint): @@ -301,33 +277,25 @@ def _resolve_endpoint(value: Optional[str], cfg: Dict[str, Any]) -> Optional[str if not value: return None if not isinstance(value, str): - # A non-string endpoint from /settings (e.g. a JSON number) is - # malformed; treat it as absent so resolution falls through to a clear - # OidcConfigError (or the IdP-discovery fallback) instead of an - # AttributeError from .startswith() escaping the typed-error contract. + # Non-string endpoint (e.g. a JSON number): treat as absent so resolution + # yields a clear OidcConfigError instead of an AttributeError from + # .startswith() escaping the typed-error contract. return None if value.startswith('http://') or value.startswith('https://'): return value if value.startswith('/'): - # _str_setting drops a non-string acl.oidc.host (a JSON number/list from - # a buggy or hostile /settings) so it can't be interpolated raw into the - # netloc — e.g. https://12345:9000/path — and instead reads as absent, - # mirroring how endpoint values above are coerced. (safe_urlparse would - # otherwise reject the bogus URL only incidentally, downstream.) + # _str_setting drops a non-string acl.oidc.host so it can't be + # interpolated raw into the netloc (e.g. https://12345:9000/path). host = _str_setting(cfg.get(_K_HOST)) if not host: - # A path-only endpoint with no acl.oidc.host to resolve it against - # can't be turned into a URL. Treat it as absent (return None) so - # resolution fails with the clear "could not resolve the ... - # endpoint" error rather than passing a scheme-less "/path" - # downstream, where it surfaces as a confusing "insecure/malformed - # URL" instead. + # Path-only endpoint with no host to resolve against: treat as absent + # for the clear "could not resolve" error, rather than passing a + # scheme-less "/path" on to a confusing "malformed URL" downstream. return None tls = _as_bool(cfg.get(_K_TLS_ENABLED), default=True) scheme = 'https' if tls else 'http' - # A usable port is an int or a digit string; anything else (a JSON - # list/object, a bool, or a non-numeric string) would corrupt the - # netloc, so drop it and resolve host-only. + # A usable port is an int or digit string; anything else would corrupt + # the netloc, so drop it and resolve host-only. port = cfg.get(_K_PORT) if isinstance(port, bool) or not ( isinstance(port, int) @@ -352,12 +320,11 @@ def discover_device_endpoint_from_idp( """ Fetch the IdP ``.well-known/openid-configuration`` and return it. - The discovery URL is taken from ``discovery_url``, else built from - ``issuer``. One of the two is required: the discovery origin is **never** - derived from a QuestDB-advertised endpoint, because that would let a - tampered ``/settings`` choose where the device code and refresh token are - sent (the resolved issuer and endpoints would then all share the attacker's - origin and pass the co-location / issuer-pin checks trivially). + The discovery URL comes from ``discovery_url``, else built from ``issuer``; + one is required. The discovery origin is **never** derived from a + QuestDB-advertised endpoint — that would let a tampered ``/settings`` choose + where credentials are sent, with the co-location / issuer-pin checks passing + trivially because every value would share the attacker's origin. """ url = discovery_url or (well_known_url(issuer) if issuer else None) if not url: @@ -366,14 +333,10 @@ def discover_device_endpoint_from_idp( 'or discovery_url was given. Pass issuer=... (or ' 'device_authorization_endpoint=... to skip discovery).') doc = get_json(url, ctx=ctx, insecure=insecure, timeout=timeout) - # get_json guarantees valid JSON, not a JSON *object*. A discovery document - # that is valid-JSON-but-not-a-dict (a list/null/number/string from a - # captive portal, a misconfigured proxy, or a hostile IdP) must not reach - # resolve_config's doc.get(...) calls as a raw object, where it would escape - # the package's typed-error contract with a bare AttributeError. Coerce it - # to empty so resolution fails with the clear "could not resolve the ... - # endpoint" OidcConfigError instead — mirroring settings_config, which - # applies the same guard to the QuestDB /settings response. + # get_json guarantees valid JSON, not a JSON *object*. Coerce a non-dict + # document (from a captive portal, bad proxy, or hostile IdP) to empty so + # resolve_config's doc.get(...) yields a clear "could not resolve" error + # rather than an AttributeError. Mirrors settings_config. return doc if isinstance(doc, dict) else {} @@ -409,10 +372,8 @@ def resolve_config( f'QuestDB at {questdb_url} reports OIDC is disabled ' f'({_K_ENABLED}=false). Nothing to authenticate against.') - # _str_setting drops a non-string /settings value (e.g. a JSON list) so it - # can't reach scope.split() / the cache-key join as a raw object and escape - # the typed-error contract; a non-string client.id thus reads as absent and - # surfaces the clear "Missing client_id" error below. + # _str_setting drops a non-string /settings value so a non-string client.id + # reads as absent and hits the clear "Missing client_id" error below. client_id = client_id or _str_setting(cfg.get(_K_CLIENT_ID)) if not client_id: raise OidcConfigError( @@ -426,9 +387,9 @@ def resolve_config( if audience is None: audience = _str_setting(cfg.get(_K_AUDIENCE)) - # Track which credential endpoints the caller supplied directly. Those are - # trusted; endpoints learned from /settings are only as trustworthy as the - # channel that delivered them (see the insecure-channel guard below). + # Track caller-supplied credential endpoints: those are trusted, whereas + # /settings endpoints are only as trustworthy as the channel that delivered + # them (see the insecure-channel guard below). explicit_token_endpoint = token_endpoint is not None explicit_device_endpoint = device_authorization_endpoint is not None @@ -441,18 +402,13 @@ def resolve_config( device_authorization_endpoint or _resolve_endpoint(cfg.get(_K_DEVICE_ENDPOINT), cfg)) - # When QuestDB itself was reached over plaintext http to a non-loopback host - # (only possible with insecure=True), its /settings response can be tampered - # in transit. Any IdP credential endpoint it advertises would then route the - # device code and long-lived refresh token to an attacker origin. The - # missing-endpoint discovery path below already demands an out-of-band pin, - # but when a tampered /settings advertises BOTH endpoints at one attacker - # origin that path is skipped, the co-location check passes trivially (they - # share that origin) and the issuer-pin check is vacuous (no issuer) — so - # nothing else catches it. Require the same out-of-band pin (issuer= / - # discovery_url=) before trusting /settings-supplied endpoints over such a - # channel. Endpoints the caller passed explicitly, and endpoints from an - # authenticated (https / loopback) /settings, are unaffected. + # Over a plaintext-http /settings channel (insecure=True, non-loopback), a + # tampered response can advertise BOTH credential endpoints at one attacker + # origin: the discovery path below is skipped, co-location passes trivially + # (shared origin) and the issuer-pin check is vacuous (no issuer), so nothing + # else catches it. Demand the same out-of-band pin (issuer= / discovery_url=) + # before trusting /settings endpoints here. Caller-explicit endpoints and + # those from an authenticated (https / loopback) /settings are unaffected. settings_supplied_credentials = ( (token_endpoint and not explicit_token_endpoint) or (device_authorization_endpoint and not explicit_device_endpoint)) @@ -469,16 +425,13 @@ def resolve_config( 'device_authorization_endpoint=...), or connect to QuestDB over ' 'https so /settings is authenticated.') - # When the credential endpoints came from QuestDB /settings (not the - # caller) and an issuer is pinned out-of-band, require each to sit under the - # issuer's PATH, not merely its origin. Path-based IdPs put every tenant on - # one origin (Keycloak issuers are https://host/realms/{realm}), so the - # origin check alone (validate_endpoint_origins) can't stop a tampered - # /settings from steering the device code / refresh token to a different - # realm on the same host. The issuer is out-of-band, so the server can't - # forge it. Caller-explicit endpoints, and endpoints from IdP discovery (the - # issuer's own .well-known), are authoritative and skip this — some IdPs - # (e.g. Azure AD) legitimately place endpoints outside the issuer path. + # For /settings endpoints with an out-of-band issuer, require each under the + # issuer's PATH, not just its origin: path-based IdPs share one origin per + # tenant (Keycloak https://host/realms/{realm}), so the origin check alone + # can't stop a tampered /settings steering credentials to a different realm. + # The out-of-band issuer can't be forged. Caller-explicit endpoints and those + # from IdP discovery are authoritative and skip this — some IdPs (e.g. Azure + # AD) legitimately place endpoints outside the issuer path. if issuer: for label, url, from_settings in ( ('token endpoint', token_endpoint, @@ -497,18 +450,13 @@ def resolve_config( 'device_authorization_endpoint=...).') # Fall back to IdP discovery when QuestDB doesn't advertise the device - # endpoint (and/or the token endpoint). This contacts the IdP, so it is - # held to https/loopback (insecure=False) regardless of the QuestDB flag. + # (and/or token) endpoint. This contacts the IdP, so it is held to + # https/loopback (insecure=False) regardless of the QuestDB flag. if not device_authorization_endpoint or not token_endpoint: - # Require a caller-supplied trust anchor before contacting the IdP for - # discovery. Without issuer= / discovery_url=, the discovery target - # would have to be guessed from the token endpoint that /settings - # supplied; a tampered or MITM'd /settings (reachable in cleartext when - # QuestDB is http:// with insecure=True) could then steer discovery — - # and so the device-code and refresh-token POSTs — to an attacker - # origin, with the co-location and issuer-pin checks passing trivially - # because every value shares that one origin. issuer= is out-of-band, - # so the server cannot forge it. + # Require an out-of-band trust anchor first. Otherwise the discovery + # target would be guessed from the /settings token endpoint, so a + # tampered /settings could steer discovery (and the credential POSTs) to + # an attacker origin with co-location / issuer-pin passing trivially. if not issuer and not discovery_url: raise OidcConfigError( 'QuestDB did not advertise the OIDC device-authorization ' @@ -524,12 +472,10 @@ def resolve_config( doc = discover_device_endpoint_from_idp( issuer=issuer, discovery_url=discovery_url, ctx=ctx, insecure=False, timeout=timeout) - # The IdP discovery document is untrusted too: coerce its values the - # same way as /settings values. A non-string endpoint / issuer (a JSON - # number/list from a buggy or hostile IdP) must read as absent — the - # clear "could not resolve" OidcConfigError below, or no issuer pin — - # rather than reach safe_urlparse / the cache-key join as a raw object - # and escape the typed-error contract with a bare AttributeError. + # The discovery document is untrusted too: coerce its values like + # /settings. A non-string endpoint / issuer reads as absent (clear + # "could not resolve" below, or no issuer pin) instead of reaching + # safe_urlparse / the cache-key join as a raw object. device_authorization_endpoint = ( device_authorization_endpoint or _str_setting(doc.get('device_authorization_endpoint'))) @@ -538,20 +484,13 @@ def resolve_config( authorization_endpoint = ( authorization_endpoint or _str_setting(doc.get('authorization_endpoint'))) - # OIDC Discovery §4.3 / RFC 8414 §3: when the IdP is pinned ONLY by - # discovery_url (no out-of-band issuer=), the document's self-declared - # issuer would otherwise be the trust anchor validate_endpoint_origins - # (in OidcDeviceAuth.__init__) checks the endpoints against — but that - # issuer comes from the same (possibly hostile, confused, or - # multi-tenant) document, so the check is vacuous, and an absent or - # non-string issuer makes it vacuous too (a document declaring attacker - # endpoints all on one origin would pass co-location trivially). Anchor - # instead to the caller-pinned discovery_url itself: require the - # credential endpoints to live on its origin, so a document can't - # redirect the device-code / refresh-token POSTs to an attacker origin. - # Origin-level, matching validate_endpoint_origins; pass issuer= and the - # endpoints explicitly if your IdP serves discovery and tokens from - # different origins. + # OIDC Discovery §4.3 / RFC 8414 §3: when pinned ONLY by discovery_url, + # the document's self-declared issuer (the anchor + # validate_endpoint_origins would use) comes from that same untrusted + # document, so it's a vacuous check. Anchor to the caller-pinned + # discovery_url instead: require the credential endpoints on its origin + # so the document can't redirect the POSTs off it. Origin-level; pass + # issuer= and explicit endpoints if discovery and tokens differ in origin. if discovery_url and not issuer: discovery_origin = _normalized_origin(discovery_url) for label, url in ( @@ -582,9 +521,9 @@ def resolve_config( 'device grant, or pass device_authorization_endpoint=... ' 'explicitly.') - # Note: the credential-endpoint origin check (validate_endpoint_origins) - # is enforced centrally in OidcDeviceAuth.__init__, which every path - # (including the explicit constructor) goes through. + # The credential-endpoint origin check (validate_endpoint_origins) is + # enforced centrally in OidcDeviceAuth.__init__, which every path goes + # through. return OidcConfig( client_id=client_id, diff --git a/src/questdb/auth/_errors.py b/src/questdb/auth/_errors.py index b4b7f55f..17b83e9e 100644 --- a/src/questdb/auth/_errors.py +++ b/src/questdb/auth/_errors.py @@ -34,21 +34,17 @@ class OidcError(Exception): def __init__(self, *args, status: Optional[int] = None): super().__init__(*args) - # HTTP status that produced this error, when it originated from a - # non-JSON HTTP response (else None). Lets the device-flow poll loop and - # the silent refresh tell a terminal 4xx rejection (e.g. a WAF/proxy - # error page) from a transient 5xx/429/network blip even when the body - # was not a conformant JSON OAuth error. + # HTTP status behind a non-JSON HTTP response (else None), so the poll + # loop and silent refresh can tell a terminal 4xx (e.g. a WAF error + # page) from a transient 5xx/429/network blip. self.status = status class OidcConfigError(OidcError): """ - The OIDC configuration could not be resolved or is inconsistent. - - Raised, for example, when QuestDB does not advertise OIDC, when the - IdP device-authorization endpoint cannot be discovered, or when a - required argument is missing. + The OIDC configuration could not be resolved or is inconsistent (e.g. + QuestDB does not advertise OIDC, the IdP device-authorization endpoint + cannot be discovered, or a required argument is missing). """ @@ -58,21 +54,16 @@ class OidcNetworkError(OidcError): class OidcInteractionRequired(OidcError): """ - Interactive sign-in is required but the process is not interactive. - - This is raised instead of hanging forever when the device flow is - started from a context with no human to authorize it (e.g. a - ``papermill`` run, a cron job or CI). Use a QuestDB service-account - REST token or the OAuth2 client-credentials grant in those contexts. + Interactive sign-in is required, but raised instead of hanging in a + non-interactive context (``papermill``, cron, CI). Use a QuestDB + service-account REST token or the OAuth2 client-credentials grant there. """ class OidcDeviceFlowError(OidcError): """ - The OAuth 2.0 device authorization grant failed. - - The original IdP ``error``/``error_description`` are preserved on the - exception when available. + The OAuth 2.0 device authorization grant failed; the IdP + ``error``/``error_description`` are preserved when available. """ def __init__( @@ -92,9 +83,7 @@ class OidcTimeoutError(OidcDeviceFlowError): class OidcAuthError(OidcError): """ - QuestDB rejected the token we presented. - - Typically a ``401``/``403`` from the server. The message includes hints - about the most common causes (scope / ``groups.encoded.in.token`` / + QuestDB rejected the token (typically a ``401``/``403`` from the server); + the message hints at common causes (scope / ``groups.encoded.in.token`` / ``audience`` mismatches). """ diff --git a/src/questdb/auth/_http.py b/src/questdb/auth/_http.py index f43afeca..a07444ed 100644 --- a/src/questdb/auth/_http.py +++ b/src/questdb/auth/_http.py @@ -23,17 +23,16 @@ ################################################################################ """ -A tiny HTTP helper built on the standard library. +A tiny stdlib-only HTTP helper. -OIDC device flow implementation deliberately avoids a hard dependency on ``requests``/``httpx`` -so that ``OidcDeviceAuth.token()`` / ``headers()`` work out of the box with no -extra installs. Only the device flow, discovery and the REST adapter use this -module; the heavier adapters (SQLAlchemy / psycopg / ingestion ``Sender``) bring -their own transports. +Avoids a hard dependency on ``requests``/``httpx`` so ``OidcDeviceAuth.token()`` +/ ``headers()`` work with no extra installs. Only the device flow, discovery and +the REST adapter use this; heavier adapters (SQLAlchemy / psycopg / ingestion +``Sender``) bring their own transports. -Standard proxy environment variables (``HTTPS_PROXY`` / ``HTTP_PROXY`` / -``NO_PROXY``) are honoured automatically by ``urllib``. A custom CA bundle can be -supplied explicitly or via ``REQUESTS_CA_BUNDLE`` / ``SSL_CERT_FILE``. +``urllib`` honours the standard proxy env vars (``HTTPS_PROXY`` / ``HTTP_PROXY`` +/ ``NO_PROXY``); a custom CA bundle can come from ``REQUESTS_CA_BUNDLE`` / +``SSL_CERT_FILE``. """ from __future__ import annotations @@ -56,9 +55,8 @@ def build_ssl_context(ca_bundle: Optional[str] = None) -> ssl.SSLContext: """ - Build an SSL context, honouring an explicit CA bundle or the - ``REQUESTS_CA_BUNDLE`` / ``SSL_CERT_FILE`` environment variables - (useful behind a corporate TLS-intercepting proxy). + Build an SSL context from an explicit CA bundle or the ``REQUESTS_CA_BUNDLE`` + / ``SSL_CERT_FILE`` env vars (useful behind a TLS-intercepting proxy). """ ca = ( ca_bundle @@ -66,10 +64,8 @@ def build_ssl_context(ca_bundle: Optional[str] = None) -> ssl.SSLContext: or os.environ.get('SSL_CERT_FILE')) if not ca: return ssl.create_default_context() - # A missing / unreadable / invalid bundle makes the stdlib raise a raw - # FileNotFoundError or ssl.SSLError; map it to the package's typed error so - # a mistyped ca_bundle path (or env var) fails clearly instead of leaking a - # bare stdlib exception. + # Map the raw FileNotFoundError / ssl.SSLError from a missing/invalid bundle + # to a typed error so a mistyped path fails clearly. try: if os.path.isdir(ca): return ssl.create_default_context(capath=ca) @@ -104,13 +100,12 @@ def ok(self) -> bool: def safe_urlparse(url: str) -> tuple: """ - ``urllib.parse.urlparse(url)`` paired with its port, but with a typed error. + ``urlparse(url)`` paired with its port, but with a typed error. - Both ``urlparse`` itself (e.g. ``https://[::1`` — a malformed IPv6 literal) - and ``ParseResult.port`` (e.g. ``https://idp:notaport`` — a non-integer - port) raise a bare ``ValueError``; re-raise it as :class:`OidcConfigError` - so a malformed endpoint URL stays within the package's error contract - instead of escaping as a raw ``ValueError``. Returns ``(parts, port)``. + Both ``urlparse`` (malformed IPv6 literal) and ``ParseResult.port`` + (non-integer port) raise a bare ``ValueError``; re-raise as + :class:`OidcConfigError` to keep a malformed URL within the error contract. + Returns ``(parts, port)``. """ try: parts = urllib.parse.urlparse(url) @@ -121,8 +116,7 @@ def safe_urlparse(url: str) -> tuple: def _is_loopback(host: Optional[str]) -> bool: - # Traffic to a loopback address never leaves the host, so plaintext http - # carries no network interception risk and is always permitted. + # Loopback traffic never leaves the host, so plaintext http is safe here. if not host: return False if host.lower() == 'localhost': @@ -134,8 +128,7 @@ def _is_loopback(host: Optional[str]) -> bool: def _require_secure(url: str, insecure: bool) -> None: - # safe_urlparse maps a malformed URL (bad IPv6 literal / non-integer port) - # to OidcConfigError instead of letting a bare ValueError escape. + # safe_urlparse maps a malformed URL to OidcConfigError, not a bare ValueError. parts, _ = safe_urlparse(url) scheme = parts.scheme.lower() if scheme == 'https': @@ -154,19 +147,16 @@ def _require_secure(url: str, insecure: bool) -> None: class _NoRedirect(urllib.request.HTTPRedirectHandler): """Refuse to follow HTTP redirects. - The discovery / device / token / ``/settings`` / ``/exec`` endpoints never - legitimately redirect. Auto-following a ``30x`` is unsafe here because only - the *original* URL is vetted: ``_require_secure`` and - ``validate_endpoint_origins`` never see the redirect target. urllib also - does not strip the ``Authorization`` header on a cross-origin redirect, so a - single ``302`` from ``/exec`` would re-send ``Authorization: Bearer - `` to an attacker-chosen host — including a downgrade to plaintext - ``http`` — leaking the QuestDB token off-origin. - - Returning ``None`` makes urllib stop following and surface the ``30x`` as an - ``HTTPError`` (which :func:`request` turns into a non-2xx - :class:`HttpResponse`), so callers see a clean failure instead of a - silently-followed redirect. + These endpoints never legitimately redirect, and auto-following is unsafe: + only the *original* URL is vetted (``_require_secure`` / + ``validate_endpoint_origins`` never see the target), and urllib does not + strip ``Authorization`` on a cross-origin redirect — so one ``302`` from + ``/exec`` could re-send the bearer token to an attacker host, even + downgrading to plaintext ``http``. + + Returning ``None`` surfaces the ``30x`` as an ``HTTPError`` (which + :func:`request` turns into a non-2xx :class:`HttpResponse`), giving callers a + clean failure. """ def redirect_request(self, *args, **kwargs): @@ -174,9 +164,8 @@ def redirect_request(self, *args, **kwargs): def _opener(ctx: Optional[ssl.SSLContext]) -> urllib.request.OpenerDirector: - # build_opener keeps the default ProxyHandler (which reads *_PROXY env - # vars), while letting us pin our own TLS context and forbid redirects - # (the credential/token endpoints never legitimately redirect). + # build_opener keeps the default ProxyHandler (reads *_PROXY env vars) while + # letting us pin our own TLS context and forbid redirects. handlers: list = [_NoRedirect()] if ctx is not None: handlers.append(urllib.request.HTTPSHandler(context=ctx)) @@ -196,11 +185,11 @@ def request( """ Perform a single HTTP request. - ``form`` is form-url-encoded into the body (``application/x-www-form- - urlencoded``). HTTP error statuses (``4xx``/``5xx``) are returned as an - :class:`HttpResponse` rather than raised, so callers can inspect OAuth - error bodies (e.g. ``authorization_pending``). Only genuine network - failures raise (:class:`OidcNetworkError`). + ``form`` is encoded into the body as ``application/x-www-form-urlencoded``. + HTTP error statuses (``4xx``/``5xx``) are returned as an + :class:`HttpResponse`, not raised, so callers can inspect OAuth error bodies + (e.g. ``authorization_pending``); only genuine network failures raise + (:class:`OidcNetworkError`). """ _require_secure(url, insecure) body: Optional[bytes] = data @@ -221,10 +210,9 @@ def request( resp.read(), resp.headers) except urllib.error.HTTPError as e: - # 4xx/5xx still carry a (possibly JSON) body we want to inspect. - # Map a mid-body read failure to a network error (rather than letting a - # bare OSError escape) and close the error response so its socket isn't - # leaked (the poll loop drives many 400s during a long sign-in). + # 4xx/5xx still carry a (possibly JSON) body to inspect. Map a mid-body + # read failure to a network error, and close the response so its socket + # isn't leaked (the poll loop drives many 400s during a long sign-in). try: body = e.read() except (TimeoutError, OSError) as read_err: @@ -236,9 +224,8 @@ def request( except urllib.error.URLError as e: raise OidcNetworkError(f'Failed to reach {url}: {e.reason}') from e except http.client.InvalidURL as e: - # A malformed URL (e.g. a non-integer port) can't be turned into a - # request; surface it as a config error rather than letting a raw - # http.client exception escape the package's typed-error contract. + # A malformed URL (e.g. non-integer port) can't become a request; + # surface it as a config error, not a raw http.client exception. raise OidcConfigError(f'Malformed URL {url!r}: {e}') from e except (TimeoutError, OSError, http.client.HTTPException) as e: raise OidcNetworkError(f'Failed to reach {url}: {e}') from e @@ -261,8 +248,8 @@ def get_json( try: return resp.json() except (ValueError, UnicodeDecodeError, RecursionError) as e: - # RecursionError: deeply-nested JSON exhausts the decoder's stack; it is - # not a ValueError, so catch it explicitly to keep the typed contract. + # RecursionError (deeply-nested JSON) isn't a ValueError, so catch it + # explicitly to keep the typed contract. raise OidcError(f'Invalid JSON from {url}: {e}') from e @@ -278,7 +265,7 @@ def post_form( POST a form-url-encoded body and parse the JSON response. Returns ``(status, parsed_json)``. Used for the device-authorization and - token endpoints, which return JSON bodies on both success and error. + token endpoints, which return JSON on both success and error. """ resp = request( 'POST', url, form=form, headers=headers, timeout=timeout, ctx=ctx, @@ -286,16 +273,14 @@ def post_form( try: parsed = resp.json() except (ValueError, UnicodeDecodeError, RecursionError): - # RecursionError: deeply-nested JSON exhausts the decoder's stack; not a - # ValueError, so catch it explicitly to keep the typed contract. + # RecursionError (deeply-nested JSON) isn't a ValueError, so catch it + # explicitly to keep the typed contract. if resp.ok: raise OidcError( f'Expected JSON from {url}, got: {resp.text()[:200]}', status=resp.status) - # Non-JSON error body: surface the status + text. Attach the HTTP status - # so callers (the device-flow poll loop / silent refresh) can tell a - # terminal 4xx rejection from a transient 5xx/429 even though the body - # was not a conformant JSON OAuth error. + # Non-JSON error body: attach the HTTP status so callers (poll loop / + # silent refresh) can tell a terminal 4xx from a transient 5xx/429. raise OidcError( f'HTTP {resp.status} from {url}: {resp.text()[:200]}', status=resp.status) diff --git a/src/questdb/auth/_questdb.py b/src/questdb/auth/_questdb.py index 76a6979b..2cb7da05 100644 --- a/src/questdb/auth/_questdb.py +++ b/src/questdb/auth/_questdb.py @@ -38,12 +38,10 @@ _DEFAULT_PG_PORT = 8812 _DEFAULT_DATABASE = 'qdb' -# A hostname or IP literal never contains the ILP conf-string delimiters (';' -# separates parameters, '=' separates key from value) nor whitespace/control -# characters. Reject them in the resolved host so a crafted or tampered URL -# can't smuggle extra conf parameters — e.g. ';tls_verify=unsafe_off;', which -# silently disables TLS certificate verification — into the 'addr=host:port;' -# string sender() hands to Sender.from_conf. Note ':' is intentionally allowed +# Reject ILP conf-string delimiters (';', '=') and whitespace/control chars in +# the host: a real hostname/IP never has them, so their presence means a +# tampered URL trying to inject conf params like ';tls_verify=unsafe_off;' +# (which disables TLS verification) into the addr= string. ':' is allowed # (IPv6 literals contain it; _ilp_addr brackets them). _ILLEGAL_HOST_CHARS = re.compile(r'[\x00-\x20\x7f;=]') @@ -71,12 +69,10 @@ def _import_pandas(): def _exec_json_to_df(data: Dict[str, Any], pandas): columns = data.get('columns') or [] - # /exec returns a list of {"name", "type"} column descriptors. A malformed - # response — a non-list, entries that aren't objects, or a non-string name — - # must surface as a clean OidcError, not a raw AttributeError from .get(), - # nor a TypeError from `name in df.columns` below when a name is - # non-hashable (a JSON list/object), escaping the package's typed-error - # contract. A real QuestDB column name is always a string. + # /exec returns a list of {"name", "type"} descriptors. Validate the shape + # (a real column name is always a string) so a malformed response raises a + # clean OidcError rather than a raw AttributeError from .get() or a + # TypeError from `name in df.columns` on a non-hashable name. if not isinstance(columns, list) or not all( isinstance(c, dict) and isinstance(c.get('name'), (str, type(None))) @@ -91,8 +87,8 @@ def _exec_json_to_df(data: Dict[str, Any], pandas): try: df = pandas.DataFrame(dataset, columns=names or None) except (ValueError, TypeError) as e: - # TypeError too: a hostile/malformed dataset shape can make the pandas - # constructor raise it (not only ValueError); keep it within OidcError. + # A malformed dataset shape can make the pandas constructor raise + # ValueError or TypeError; keep both within OidcError. raise OidcError( f'Unexpected shape in QuestDB /exec response: {e}') from e for col in columns: @@ -124,10 +120,9 @@ class QuestDB: """ A thin, authenticated QuestDB session built on an :class:`OidcDeviceAuth`. - Provides a one-call DataFrame query over REST plus adapters that feed the - same auto-refreshed token into your existing tools (SQLAlchemy / psycopg / - the ingestion ``Sender``). You can also just take :meth:`token` / - :meth:`headers` and wire them up yourself. + Offers a one-call DataFrame query over REST plus adapters that feed the + same auto-refreshed token into SQLAlchemy / psycopg / the ingestion + ``Sender``, or take :meth:`token` / :meth:`headers` and wire it up yourself. """ def __init__( @@ -141,12 +136,11 @@ def __init__( self._insecure = insecure self._ctx = auth._ctx # Same private CA bundle the auth/REST transport uses, so sender() can - # forward it to the ILP Sender (which has its own TLS stack). getattr - # keeps test doubles that only set _ctx working. + # forward it to the ILP Sender's own TLS stack. getattr keeps test + # doubles that only set _ctx working. self._ca_bundle = getattr(auth, '_ca_bundle', None) # safe_urlparse validates the port up-front, raising OidcConfigError - # (not a bare ValueError) for a malformed one, so the adapters that read - # the port stay within the package's typed-error contract. + # (not a bare ValueError) for a malformed one. self._parts, self._port = safe_urlparse(self.url) # -- token access ------------------------------------------------------- @@ -168,7 +162,7 @@ def sql(self, query: str, *, limit: Optional[str] = None, :class:`pandas.DataFrame`. Uses ``Authorization: Bearer`` (no token-length limit, unlike PG-wire), - which makes it the recommended path for large groups-encoded JWTs. + so it's the recommended path for large groups-encoded JWTs. :param query: The SQL query to run. :param limit: Optional QuestDB ``limit`` (e.g. ``"1,1000"``). @@ -195,17 +189,15 @@ def sql(self, query: str, *, limit: Optional[str] = None, try: data = resp.json() except (ValueError, UnicodeDecodeError, RecursionError): - # A 2xx body that isn't JSON (e.g. an HTML error/login page from a - # reverse proxy or captive portal), or deeply-nested JSON that - # exhausts the decoder's stack (RecursionError, not a ValueError), - # must surface as a clean OidcError, not a raw decoder exception. - # Mirrors the error path and post_form(). + # A 2xx body that isn't JSON (e.g. an HTML page from a proxy/captive + # portal) or deeply-nested JSON that exhausts the decoder's stack + # (RecursionError) surfaces as a clean OidcError. Mirrors post_form(). raise OidcError( 'QuestDB returned a non-JSON success response from /exec: ' f'{resp.text()[:300]}') if not isinstance(data, dict): - # Valid JSON but not an object (e.g. a bare list) would make - # _exec_json_to_df fail with AttributeError on .get(); reject it. + # Valid JSON but not an object (e.g. a bare list) would break + # _exec_json_to_df's .get(); reject it. raise OidcError( 'QuestDB /exec returned JSON that is not an object ' f'(got {type(data).__name__}); cannot build a DataFrame.') @@ -216,13 +208,13 @@ def sql(self, query: str, *, limit: Optional[str] = None, def _require_host(self, host: Optional[str] = None) -> str: """ Resolve the PG-wire / ILP host: an explicit ``host`` override, else the - host from the QuestDB URL. Raises when neither yields one (e.g. a URL - with no authority such as ``"localhost"`` or ``"questdb:9000"``) instead - of passing a bare ``None`` down to the driver. + host from the QuestDB URL. Raises (rather than passing a bare ``None`` + to the driver) when neither yields one, e.g. a URL with no authority + such as ``"localhost"`` or ``"questdb:9000"``. - The returned host is *unbracketed* — psycopg and SQLAlchemy take the - address and port as separate arguments. :meth:`_ilp_addr` adds the - brackets an IPv6 literal needs in the ILP ``addr=host:port`` form. + The returned host is *unbracketed* — psycopg and SQLAlchemy take address + and port separately. :meth:`_ilp_addr` adds the brackets an IPv6 literal + needs in the ILP ``addr=host:port`` form. """ resolved = host or self._parts.hostname if not resolved: @@ -242,8 +234,8 @@ def _require_host(self, host: Optional[str] = None) -> str: @staticmethod def _ilp_addr(host: str, port: int) -> str: - # Bracket an IPv6 literal so the ILP conf parser reads host:port - # unambiguously; hostnames and IPv4 addresses never contain ':'. + # Bracket an IPv6 literal (it contains ':', unlike hostnames/IPv4) so + # the ILP conf parser reads host:port unambiguously. bracketed = f'[{host}]' if ':' in host else host return f'{bracketed}:{port}' @@ -258,8 +250,8 @@ def sqlalchemy_engine( """ Build a SQLAlchemy ``Engine`` for QuestDB's PG-wire endpoint. - Connects as user ``_sso`` and injects a **fresh** token as the password - for every new connection (via a ``do_connect`` listener), so pooled + Connects as user ``_sso``, injecting a **fresh** token as the password + on every new connection (via a ``do_connect`` listener) so pooled connections always authenticate with a valid token. Requires ``acl.oidc.pg.token.as.password.enabled=true`` on the server. """ @@ -305,8 +297,8 @@ def psycopg( Open a raw psycopg (v3) or psycopg2 connection to QuestDB's PG-wire endpoint, authenticating as ``_sso`` with the current token. - The token is captured at connect time; open a new connection to pick up - a refreshed token. + The token is captured at connect time; reconnect to pick up a refreshed + token. """ mod = _pg_module() return mod.connect( @@ -320,8 +312,8 @@ def psycopg( def sender(self, *, port: Optional[int] = None, **sender_kwargs) -> 'questdb.ingress.Sender': """ - Build a :class:`questdb.ingress.Sender` (ILP-over-HTTP) configured with - the current bearer token, for ingestion. + Build a :class:`questdb.ingress.Sender` (ILP-over-HTTP) for ingestion, + configured with the current bearer token. The token is captured at creation time; create a new sender to pick up a refreshed token. @@ -330,8 +322,8 @@ def sender(self, *, port: Optional[int] = None, resolved_port = port or self._port or ( 443 if scheme == 'https' else 9000) # Coerce to int (before the heavy import, so bad input fails fast) so a - # stray non-integer port kwarg can't smuggle ILP conf parameters — e.g. - # "9000;tls_verify=unsafe_off" — into the addr= string via _ilp_addr, + # stray non-integer port can't inject conf params like + # "9000;tls_verify=unsafe_off" into the addr= string via _ilp_addr — # the same injection _require_host() blocks for the host. The # URL-derived self._port is already an int. try: @@ -353,11 +345,10 @@ def sender(self, *, port: Optional[int] = None, f'{self._ilp_addr(self._require_host(), resolved_port)};') # Forward the private CA bundle (explicit ca_bundle=, else the # REQUESTS_CA_BUNDLE / SSL_CERT_FILE env vars — same precedence as - # build_ssl_context) to the Sender's own TLS stack as tls_roots, so an - # https Sender against a private-CA QuestDB trusts the same roots the - # REST/IdP paths do. Only a PEM file works here (tls_roots is a file; - # the Sender has no capath equivalent), and only over https. The caller - # can still override via tls_roots=/tls_ca= in **sender_kwargs. + # build_ssl_context) to the Sender as tls_roots, so an https Sender + # against a private-CA QuestDB trusts the same roots the REST/IdP paths + # do. Only a PEM file works (tls_roots takes a file, no capath), only + # over https, and the caller can still override via tls_roots=/tls_ca=. if (scheme == 'https' and 'tls_roots' not in sender_kwargs and 'tls_ca' not in sender_kwargs): @@ -395,17 +386,16 @@ def connect( :param url: The QuestDB HTTP(S) base URL, e.g. ``"https://questdb.example.com:9000"``. :param flow: ``"auto"`` (default), ``"device"`` or ``"loopback"``. Today - ``"auto"`` always resolves to the device flow (works on local and - remote kernels); ``"loopback"`` is reserved for a future release. + ``"auto"`` resolves to the device flow (works on local and remote + kernels); ``"loopback"`` is reserved for a future release. :param cache: Token cache backend: ``"memory"`` (default) or ``None``. :param insecure: Allow plaintext ``http://`` URLs (development only). :param eager: If ``True`` (default), sign in immediately; otherwise defer until the first call that needs a token. :param opts: Forwarded to :meth:`OidcDeviceAuth.from_questdb` (e.g. ``client_id``, ``scope``, ``audience``, ``issuer``, ``open_browser``, - ``qr``, ``ca_bundle``, ``timeout`` — the per-request IdP network - timeout, which also bounds how long a stalled IdP can hold the - token-acquisition lock). + ``qr``, ``ca_bundle``, ``timeout`` — the per-request IdP network timeout, + which also bounds how long a stalled IdP can hold the token lock). """ auth = OidcDeviceAuth.from_questdb( url, flow=flow, cache=cache, insecure=insecure, **opts) diff --git a/src/questdb/auth/_render.py b/src/questdb/auth/_render.py index 9263b556..85dc29c0 100644 --- a/src/questdb/auth/_render.py +++ b/src/questdb/auth/_render.py @@ -25,10 +25,9 @@ """ Presentation of the device-flow prompt. -Renders a clickable link + user code in Jupyter (via ``IPython.display``) and -falls back to plain text on a terminal. Nothing here is required for -``token()`` / ``headers()`` to work; ``IPython`` and ``qrcode`` are imported -lazily and only when actually used. +Renders a clickable link + user code in Jupyter (via ``IPython.display``), +falling back to plain text on a terminal. Not required for ``token()`` / +``headers()``; ``IPython`` and ``qrcode`` are imported lazily. """ from __future__ import annotations @@ -49,8 +48,8 @@ def in_ipython_kernel() -> bool: ip = get_ipython() if ip is None: return False - # ZMQInteractiveShell == notebook / qtconsole / lab; TerminalInteractive - # Shell == ipython in a terminal (still interactive). + # ZMQInteractiveShell == notebook/qtconsole/lab; TerminalInteractiveShell + # == ipython in a terminal. return ip.__class__.__name__ in ( 'ZMQInteractiveShell', 'TerminalInteractiveShell') @@ -59,9 +58,8 @@ def detect_interactive() -> bool: """ Best-effort detection of whether a human can complete the sign-in. - Interactive when attached to a TTY or running in an interactive IPython - shell. This guards against hanging forever in a non-interactive context - (papermill / cron / CI). + Interactive when attached to a TTY or an interactive IPython shell; guards + against hanging forever in a non-interactive context (papermill/cron/CI). """ if in_ipython_kernel(): return True @@ -74,17 +72,14 @@ def detect_interactive() -> bool: def _verification_uri(resp: Dict[str, Any]) -> str: # RFC 8628 uses ``verification_uri``; some IdPs (older Google) use - # ``verification_url``. The device response is untrusted: coerce to str so a - # non-string value (e.g. a JSON number) can't crash the renderer - # (``re.sub`` / ``html.escape``) with a raw TypeError before the prompt is - # even shown — matching the defensive ``str(user_code)`` at the call sites. + # ``verification_url``. Coerce to str: the device response is untrusted, and + # a non-string (e.g. a JSON number) would crash the renderer. uri = resp.get('verification_uri') or resp.get('verification_url') or '' return uri if isinstance(uri, str) else '' def _verification_uri_complete(resp: Dict[str, Any]) -> Optional[str]: - # Coerce to str / None for the same untrusted-input reason as - # _verification_uri (a non-string would crash the renderer / _safe_link_url). + # Coerce to str/None for the same untrusted-input reason as _verification_uri. uri = (resp.get('verification_uri_complete') or resp.get('verification_url_complete')) return uri if isinstance(uri, str) else None @@ -94,15 +89,13 @@ def _safe_link_url(url: Optional[str]) -> Optional[str]: """ Return ``url`` only if it uses an ``http(s)`` scheme, else ``None``. - The verification URL comes from the IdP's device-authorization response, - which is untrusted input. Embedding it in an HTML ``href`` without a scheme - allowlist would let a malicious/MITM'd response inject a ``javascript:`` or - ``data:`` URL that executes in the notebook DOM when clicked - (``html.escape`` guards markup, not the URL scheme). + The verification URL is untrusted (from the IdP's device-authorization + response); the scheme allowlist blocks a ``javascript:`` / ``data:`` href + from executing in the notebook DOM (``html.escape`` guards markup, not the + scheme). """ if not url or not isinstance(url, str): - # A non-string (e.g. a JSON number from an untrusted device response) - # has no scheme to vet and would make urlparse raise; treat it as unsafe. + # A non-string has no scheme to vet and would make urlparse raise. return None try: scheme = urllib.parse.urlparse(url).scheme.lower() @@ -116,9 +109,8 @@ def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: Render ``url`` as a clickable link, or as inert escaped text if its scheme is not ``http(s)``. - The visible label defaults to the URL itself. When the URL is rejected, the - (escaped) URL is shown as plain text so the user can still see/copy it, but - it is never turned into a clickable/executable link. + The label defaults to the URL itself. A rejected URL is shown as escaped + plain text (still visible/copyable) but never made clickable. """ safe = _safe_link_url(url) label = html.escape(text if text is not None else (url or '')) @@ -128,14 +120,10 @@ def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: f'rel="noopener noreferrer">{label}
') -# C0/C1 control chars (incl. ESC, which drives ANSI escape sequences), the -# Unicode bidi controls, the zero-width / invisible-format chars, the -# line/paragraph separators and the interlinear-annotation controls. All can -# spoof a prompt: U+202E (RIGHT-TO-LEFT OVERRIDE) reverses displayed text to -# disguise a URL's host; U+2028/U+2029 inject fake lines; zero-width / invisible -# chars hide or join content. Stripped from untrusted device-response fields on -# BOTH the terminal and the Jupyter path (html.escape neutralizes markup, not -# these). Covers the dangerous Unicode Cc/Cf code points for our inputs. +# Strips C0/C1/ESC, bidi overrides, zero-width and line/paragraph separators +# — all can spoof the prompt (e.g. U+202E reverses a URL's host). Applied to +# untrusted device-response fields on both paths; html.escape would not catch +# these. _CONTROL_CHARS = re.compile( r'[\x00-\x1f\x7f-\x9f\u00ad\u061c\u115f\u180e\u200b-\u200f' r'\u2028-\u202e\u2060-\u2064\u2066-\u2069\ufeff\ufff9-\ufffb]') @@ -143,17 +131,12 @@ def _render_link(url: Optional[str], *, text: Optional[str] = None) -> str: def _strip_control(text: Optional[str]) -> str: """ - Strip control / format characters from an untrusted string before it is - written to a terminal. - - The verification URL, user code and IdP error strings come from the device- - authorization response (untrusted). Writing them verbatim to a TTY would let - a hostile or MITM'd response inject ANSI escape sequences (C0/C1 control - chars — cursor moves, screen clears) or Unicode bidi overrides / zero-width - / line separators to spoof the prompt or hide the real sign-in URL (e.g. - U+202E visually reverses the displayed host). Needed on BOTH paths: the - plain-text terminal path (raw bytes to the TTY) and the Jupyter path — - ``html.escape`` neutralizes markup, not bidi/zero-width spoofing. + Strip control / format characters from an untrusted string before display. + + The verification URL, user code and IdP error strings are untrusted; raw + ANSI escapes or bidi/zero-width/line-separator chars could spoof the prompt + or hide the real sign-in URL. Needed on both paths — ``html.escape`` does + not catch bidi/zero-width spoofing. """ if not text: return '' @@ -208,12 +191,10 @@ def _write(self, text: str) -> None: try: self._stream.write(text) except UnicodeEncodeError: - # The stream's encoding can't represent some characters (e.g. - # the emoji on a legacy code-page Windows console, an ``ascii`` - # PYTHONIOENCODING, or a redirected stderr). Degrade only those - # characters instead of letting the whole prompt — including the - # verification URL and user code — vanish, which would make the - # sign-in look like a silent hang. + # The stream's encoding can't represent some chars (e.g. the + # emoji on a legacy Windows console or ascii PYTHONIOENCODING). + # Degrade only those, so the URL/code don't vanish and look like + # a silent hang. enc = getattr(self._stream, 'encoding', None) or 'ascii' self._stream.write( text.encode(enc, 'replace').decode(enc, 'replace')) @@ -272,16 +253,12 @@ def _panel(self, body: str) -> str: def _prompt_head(self): """Header + sanitized verification link and user code. - Shared by :meth:`on_prompt` and :meth:`_render_with_status` so the - sanitization can't be applied to one path and forgotten on the other. - ``verification_uri`` / ``user_code`` / ``verification_uri_complete`` are - untrusted device-response fields: strip control / bidi / zero-width - chars (which ``html.escape`` does NOT remove) before rendering, so a - hostile or MITM'd response can't inject a U+202E bidi override or - zero-width chars to visually spoof the prompt in the notebook DOM. - ``_render_link`` additionally html-escapes and scheme-vets the URL. - Returns ``(body, uri, complete)`` — the sanitized URLs are handed back - so the QR target isn't re-derived (and re-sanitized). + Shared by :meth:`on_prompt` and :meth:`_render_with_status` so + sanitization is applied on both paths, never forgotten on one. The + untrusted device-response fields are stripped of control/bidi/zero-width + chars (which ``html.escape`` does NOT remove) before rendering; + ``_render_link`` also html-escapes and scheme-vets the URL. Returns + ``(body, uri, complete)`` so the QR target isn't re-derived. """ resp = self._resp uri = _strip_control(_verification_uri(resp)) @@ -327,8 +304,7 @@ def on_waiting(self, seconds_left: float) -> None: color='#888') def on_success(self, identity: Optional[str], expires_in: float) -> None: - # identity is derived from the (untrusted) JWT claims — strip control / - # bidi chars before html-escaping, as for the other rendered fields. + # identity comes from untrusted JWT claims: strip then html-escape. who = html.escape(_strip_control(identity)) if identity else '' mins = max(1, int(round(expires_in / 60))) suffix = f' as {who}' if who else '' @@ -337,7 +313,7 @@ def on_success(self, identity: Optional[str], expires_in: float) -> None: color='#2e7d32') def on_failure(self, message: str) -> None: - # message may interpolate the IdP's (untrusted) error_description. + # message may interpolate the IdP's untrusted error_description. self._render_with_status( '❌ ' + html.escape(_strip_control(message)), color='#c62828')