From a1b3b010fa06eb8d7525d17acf213f6ae1eb6989 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 31 Mar 2026 23:02:48 +0200 Subject: [PATCH 1/7] feat(db): create roles and enable RLS on base tables - Creates user1_role (parser + webserver for user1 data) and webserver_role (read-all for admin queries; SET ROLE for scoped reads). - Grants SELECT/INSERT/UPDATE on cml_data, cml_metadata, cml_stats to user1_role; SELECT-only to webserver_role. - Grants SELECT on cml_data_1h and EXECUTE on update_cml_stats() to user1_role. - Enables RLS on the three base tables and creates per-role isolation policies (user_id = 'user1'). - webserver_role gets a permissive (USING true) read-all policy; scoped reads are achieved via SET ROLE user1_role. Backward-compatible: myuser (superuser) bypasses RLS, so the existing parser and webserver continue to work without changes until PR3 and PR5 wire up the new role credentials. Note: cml_data_1h (TimescaleDB continuous aggregate / materialized view) does not support RLS at the DB level; application queries must always include WHERE user_id = ?. Part of multi-user RLS rollout (issue #31). --- database/MIGRATION.md | 108 ++++++++++++++++ database/init.sql | 78 +++++++++++- database/migrations/004_add_roles_rls.sql | 144 ++++++++++++++++++++++ 3 files changed, 329 insertions(+), 1 deletion(-) create mode 100644 database/migrations/004_add_roles_rls.sql diff --git a/database/MIGRATION.md b/database/MIGRATION.md index b24e189..b9afd32 100644 --- a/database/MIGRATION.md +++ b/database/MIGRATION.md @@ -2,6 +2,114 @@ --- +## PR `feat/db-roles-rls` — Create database roles and enable Row-Level Security + +**Branch:** `feat/db-roles-rls` + +`init.sql` only runs on a fresh database volume, so when deploying this branch +to a machine that already has data you must apply the migration file below +**after** migrations 001–003 from `feat/db-add-user-id` have already been applied. + +### Changes + +| File | What it does | +|------|-------------| +| `migrations/004_add_roles_rls.sql` | Creates `user1_role` and `webserver_role`; grants table/function permissions; enables RLS on `cml_data`, `cml_metadata`, `cml_stats`; creates per-role isolation policies | + +### Backward compatibility + +This migration is **fully backward-compatible** with the existing services: + +- `myuser` (PostgreSQL superuser) bypasses RLS by default. The parser and + webserver still connect as `myuser` and see all data unchanged until + PR3 (`feat/parser-user-id`) and PR5 (`feat/webserver-auth`) wire up the + new role credentials. +- No table schema changes — only roles, grants, and policies are added. +- Rollback is possible: revoke grants, drop policies, then drop roles (see + Rollback section below). + +### Note on `cml_data_1h` (continuous aggregate) + +PostgreSQL RLS cannot be applied to materialized views, so `cml_data_1h` is +**not** automatically row-filtered. Queries to this view **must** always +include a `WHERE user_id = ?` predicate. The webserver (PR5) and Grafana +panels enforce this. All raw-data queries route through `cml_data`, which +**is** protected by RLS. + +### Steps + +**1. Back up the database** + +```bash +docker compose exec database pg_dump -U myuser -d mydatabase \ + > backup_pre_roles_rls_$(date +%Y%m%d_%H%M%S).sql +``` + +**2. Pull and rebuild** + +```bash +git pull origin feat/db-roles-rls # or merge to main first +docker compose up -d --build +``` + +**3. Apply the migration** + +```bash +docker compose exec -T database psql -U myuser -d mydatabase \ + < database/migrations/004_add_roles_rls.sql +``` + +**4. Verify** + +```bash +# List the new roles +docker compose exec database psql -U myuser -d mydatabase \ + -c "\du user1_role webserver_role" + +# Confirm RLS is enabled on all three tables +docker compose exec database psql -U myuser -d mydatabase \ + -c "SELECT relname, relrowsecurity FROM pg_class \ + WHERE relname IN ('cml_data','cml_metadata','cml_stats');" + +# Smoke-test: user1_role should see its own rows and nothing else +docker compose exec database psql \ + -U user1_role -d mydatabase \ + -c "SELECT count(*) FROM cml_data;" +``` + +**Rollback:** + +```bash +docker compose exec database psql -U myuser -d mydatabase -c " +-- Drop policies +DROP POLICY IF EXISTS user1_cml_data_policy ON cml_data; +DROP POLICY IF EXISTS user1_cml_metadata_policy ON cml_metadata; +DROP POLICY IF EXISTS user1_cml_stats_policy ON cml_stats; +DROP POLICY IF EXISTS webserver_cml_data_policy ON cml_data; +DROP POLICY IF EXISTS webserver_cml_metadata_policy ON cml_metadata; +DROP POLICY IF EXISTS webserver_cml_stats_policy ON cml_stats; + +-- Disable RLS +ALTER TABLE cml_data DISABLE ROW LEVEL SECURITY; +ALTER TABLE cml_metadata DISABLE ROW LEVEL SECURITY; +ALTER TABLE cml_stats DISABLE ROW LEVEL SECURITY; + +-- Revoke grants +REVOKE ALL ON cml_data, cml_metadata, cml_stats, cml_data_1h + FROM user1_role, webserver_role; +REVOKE EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) + FROM user1_role; +REVOKE user1_role FROM webserver_role; +REVOKE USAGE ON SCHEMA public FROM user1_role, webserver_role; + +-- Drop roles +DROP ROLE IF EXISTS user1_role; +DROP ROLE IF EXISTS webserver_role; +" +``` + +--- + ## PR `feat/db-add-user-id` — Add `user_id` for multi-user RLS support **Branch:** `feat/db-add-user-id` diff --git a/database/init.sql b/database/init.sql index b600dff..1a998bc 100644 --- a/database/init.sql +++ b/database/init.sql @@ -164,4 +164,80 @@ ALTER TABLE cml_data SET ( timescaledb.compress_orderby = 'time DESC' ); -SELECT add_compression_policy('cml_data', INTERVAL '7 days'); \ No newline at end of file +SELECT add_compression_policy('cml_data', INTERVAL '7 days'); + +-- --------------------------------------------------------------------------- +-- Database roles and Row-Level Security (PR feat/db-roles-rls) +-- +-- user1_role: used by the user1 parser instance (writes) and by the +-- webserver (reads via SET ROLE) for user1's scoped data. +-- webserver_role: used by the webserver process. Has a read-all RLS policy +-- for aggregate/admin queries; SET ROLEs to a user role for scoped reads. +-- +-- Passwords shown here are development defaults. +-- Override them via environment variables or a secrets manager in production. +-- +-- Note on cml_data_1h: +-- PostgreSQL RLS cannot be applied to materialized views, so queries to +-- cml_data_1h MUST include a WHERE user_id = ? predicate at the +-- application layer. All raw-data queries route through the RLS-protected +-- base table (cml_data) and are automatically filtered. +-- --------------------------------------------------------------------------- + +CREATE ROLE user1_role LOGIN PASSWORD 'user1password'; +CREATE ROLE webserver_role LOGIN PASSWORD 'webserverpassword'; + +-- Allow webserver_role to impersonate user roles (SET ROLE user1_role). +GRANT user1_role TO webserver_role; + +-- Schema access. +GRANT USAGE ON SCHEMA public TO user1_role, webserver_role; + +-- Table permissions. +GRANT SELECT, INSERT, UPDATE ON cml_data TO user1_role; +GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1_role; +GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1_role; + +GRANT SELECT ON cml_data TO webserver_role; +GRANT SELECT ON cml_metadata TO webserver_role; +GRANT SELECT ON cml_stats TO webserver_role; + +-- Continuous aggregate — application must add WHERE user_id = ? filter. +GRANT SELECT ON cml_data_1h TO user1_role, webserver_role; + +-- Parser calls update_cml_stats() to upsert per-CML statistics. +GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1_role; + +-- Enable Row-Level Security on base tables. +ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; +ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; +ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; + +-- RLS policies for user1_role. +CREATE POLICY user1_cml_data_policy ON cml_data + FOR ALL TO user1_role + USING (user_id = 'user1') + WITH CHECK (user_id = 'user1'); + +CREATE POLICY user1_cml_metadata_policy ON cml_metadata + FOR ALL TO user1_role + USING (user_id = 'user1') + WITH CHECK (user_id = 'user1'); + +CREATE POLICY user1_cml_stats_policy ON cml_stats + FOR ALL TO user1_role + USING (user_id = 'user1') + WITH CHECK (user_id = 'user1'); + +-- RLS policies for webserver_role (read-all; scoped reads use SET ROLE). +CREATE POLICY webserver_cml_data_policy ON cml_data + FOR SELECT TO webserver_role + USING (true); + +CREATE POLICY webserver_cml_metadata_policy ON cml_metadata + FOR SELECT TO webserver_role + USING (true); + +CREATE POLICY webserver_cml_stats_policy ON cml_stats + FOR SELECT TO webserver_role + USING (true); \ No newline at end of file diff --git a/database/migrations/004_add_roles_rls.sql b/database/migrations/004_add_roles_rls.sql new file mode 100644 index 0000000..15c6247 --- /dev/null +++ b/database/migrations/004_add_roles_rls.sql @@ -0,0 +1,144 @@ +-- Migration 004: Create database roles and enable Row-Level Security +-- +-- Part of PR feat/db-roles-rls (multi-user RLS support, Phase 2). +-- Run this AFTER migrations 001–003 (feat/db-add-user-id). +-- +-- What this migration does: +-- 1. Creates user1_role (parser + webserver for user1's data). +-- 2. Creates webserver_role (read-all for admin/aggregate queries; +-- can SET ROLE to a user role for scoped reads). +-- 3. Grants table and function permissions to each role. +-- 4. Enables Row-Level Security on cml_data, cml_metadata, cml_stats. +-- 5. Creates per-role RLS policies on those three tables. +-- 6. Grants SELECT on cml_data_1h (continuous aggregate). +-- +-- Backward-compatibility: +-- myuser (superuser) bypasses RLS by default, so the existing parser +-- and webserver — which both still connect as myuser — continue to work +-- without any changes until PR3 (feat/parser-user-id) and +-- PR5 (feat/webserver-auth) wire up the new role credentials. +-- +-- Note on cml_data_1h (continuous aggregate): +-- PostgreSQL RLS cannot be applied to materialized views, so cml_data_1h +-- has no automatic row filtering. Queries to this view MUST always include +-- a WHERE user_id = ? predicate. The webserver (PR5) and Grafana enforce +-- this at the application layer. All raw-data queries go through the +-- RLS-protected base table (cml_data) and ARE automatically filtered. +-- +-- Passwords shown here are development defaults. Override them via +-- environment variables or a secrets manager before going to production. +-- +-- Apply with: +-- docker compose exec -T database psql -U myuser -d mydatabase \ +-- < database/migrations/004_add_roles_rls.sql +-- +-- Rollback: see MIGRATION.md — drop the roles after revoking all grants. + +-- --------------------------------------------------------------------------- +-- Step 1: Create roles (idempotent via DO block) +-- --------------------------------------------------------------------------- + +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'user1_role') THEN + CREATE ROLE user1_role LOGIN PASSWORD 'user1password'; + END IF; +END +$$; + +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'webserver_role') THEN + CREATE ROLE webserver_role LOGIN PASSWORD 'webserverpassword'; + END IF; +END +$$; + +-- Allow webserver_role to impersonate user roles (SET ROLE user1_role). +-- This replaces the connection-level role switch so the webserver can scope +-- all queries to the logged-in user's data without reconnecting. +GRANT user1_role TO webserver_role; + +-- --------------------------------------------------------------------------- +-- Step 2: Schema access +-- --------------------------------------------------------------------------- + +GRANT USAGE ON SCHEMA public TO user1_role, webserver_role; + +-- --------------------------------------------------------------------------- +-- Step 3: Table permissions +-- --------------------------------------------------------------------------- + +-- user1_role: INSERT/SELECT/UPDATE on the three data tables. +-- No DELETE: raw data is never deleted by design. +GRANT SELECT, INSERT, UPDATE ON cml_data TO user1_role; +GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1_role; +GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1_role; + +-- webserver_role: read-only on base tables. +-- It switches to a user role (SET ROLE) for write operations triggered +-- via the web UI; those operations then use the user role's permissions. +GRANT SELECT ON cml_data TO webserver_role; +GRANT SELECT ON cml_metadata TO webserver_role; +GRANT SELECT ON cml_stats TO webserver_role; + +-- Continuous aggregate view. +-- RLS cannot be enforced on the aggregate directly (see header note). +-- Queries must always filter by user_id at the application layer. +GRANT SELECT ON cml_data_1h TO user1_role, webserver_role; + +-- Parser uses update_cml_stats() to upsert per-CML statistics. +-- Grant execute so user1_role can call it without superuser privileges. +GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1_role; + +-- --------------------------------------------------------------------------- +-- Step 4: Enable Row-Level Security on base tables +-- --------------------------------------------------------------------------- + +ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; +ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; +ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; + +-- --------------------------------------------------------------------------- +-- Step 5: RLS policies for user1_role +-- +-- Each policy binds the role to rows where user_id = 'user1'. +-- USING: applied on SELECT / UPDATE / DELETE (which rows are visible). +-- WITH CHECK: applied on INSERT / UPDATE (which rows can be written). +-- --------------------------------------------------------------------------- + +CREATE POLICY user1_cml_data_policy ON cml_data + FOR ALL TO user1_role + USING (user_id = 'user1') + WITH CHECK (user_id = 'user1'); + +CREATE POLICY user1_cml_metadata_policy ON cml_metadata + FOR ALL TO user1_role + USING (user_id = 'user1') + WITH CHECK (user_id = 'user1'); + +CREATE POLICY user1_cml_stats_policy ON cml_stats + FOR ALL TO user1_role + USING (user_id = 'user1') + WITH CHECK (user_id = 'user1'); + +-- --------------------------------------------------------------------------- +-- Step 6: RLS policies for webserver_role +-- +-- webserver_role has a permissive (USING true) read-all policy so it can +-- execute admin / aggregate queries without role-switching overhead. +-- For per-user scoped reads the webserver does SET ROLE user1_role, which +-- causes user1_role's policies above to take effect instead. +-- --------------------------------------------------------------------------- + +CREATE POLICY webserver_cml_data_policy ON cml_data + FOR SELECT TO webserver_role + USING (true); + +CREATE POLICY webserver_cml_metadata_policy ON cml_metadata + FOR SELECT TO webserver_role + USING (true); + +CREATE POLICY webserver_cml_stats_policy ON cml_stats + FOR SELECT TO webserver_role + USING (true); From bd2be490960d676b0efdcf49b89128dc0c3d4e09 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 31 Mar 2026 23:15:38 +0200 Subject: [PATCH 2/7] refactor(db): use current_user RLS + security-barrier view for aggregate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the initial per-user-role design with a cleaner approach: - Rename user1_role → user1 (role name = user_id value = current_user). This one change unlocks two improvements: - Generic current_user RLS policies: a single policy per base table covers all users; no per-user policy is needed when onboarding new users. - cml_data_1h_secure: a security_barrier view over the continuous aggregate using WHERE user_id = current_user. User roles query this view for fully DB-enforced per-user filtering with no application-level WHERE clause needed. webserver_role retains direct access to cml_data_1h for admin/cross-user aggregate queries. Also updates docs/multi-user-architecture.md to reflect the new role naming convention throughout. --- database/init.sql | 84 +++++++----- database/migrations/004_add_roles_rls.sql | 160 +++++++++++++--------- docs/multi-user-architecture.md | 77 ++++++----- 3 files changed, 186 insertions(+), 135 deletions(-) diff --git a/database/init.sql b/database/init.sql index 1a998bc..202d94c 100644 --- a/database/init.sql +++ b/database/init.sql @@ -169,67 +169,63 @@ SELECT add_compression_policy('cml_data', INTERVAL '7 days'); -- --------------------------------------------------------------------------- -- Database roles and Row-Level Security (PR feat/db-roles-rls) -- --- user1_role: used by the user1 parser instance (writes) and by the --- webserver (reads via SET ROLE) for user1's scoped data. +-- Role naming convention: PG login role name = user_id value in the data. +-- "user1" role ↔ user_id = 'user1' — enables current_user-based RLS +-- policies and the cml_data_1h_secure security-barrier view below. +-- +-- user1: used by the user1 parser instance (writes) and by the webserver +-- (via SET ROLE) for DB-enforced scoped reads. -- webserver_role: used by the webserver process. Has a read-all RLS policy --- for aggregate/admin queries; SET ROLEs to a user role for scoped reads. +-- for admin/aggregate queries; SET ROLEs to a user role for scoped reads. -- -- Passwords shown here are development defaults. -- Override them via environment variables or a secrets manager in production. --- --- Note on cml_data_1h: --- PostgreSQL RLS cannot be applied to materialized views, so queries to --- cml_data_1h MUST include a WHERE user_id = ? predicate at the --- application layer. All raw-data queries route through the RLS-protected --- base table (cml_data) and are automatically filtered. -- --------------------------------------------------------------------------- -CREATE ROLE user1_role LOGIN PASSWORD 'user1password'; +CREATE ROLE user1 LOGIN PASSWORD 'user1password'; CREATE ROLE webserver_role LOGIN PASSWORD 'webserverpassword'; --- Allow webserver_role to impersonate user roles (SET ROLE user1_role). -GRANT user1_role TO webserver_role; +-- Allow webserver_role to impersonate user roles (SET ROLE user1). +GRANT user1 TO webserver_role; -- Schema access. -GRANT USAGE ON SCHEMA public TO user1_role, webserver_role; +GRANT USAGE ON SCHEMA public TO user1, webserver_role; -- Table permissions. -GRANT SELECT, INSERT, UPDATE ON cml_data TO user1_role; -GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1_role; -GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1_role; +GRANT SELECT, INSERT, UPDATE ON cml_data TO user1; +GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1; +GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1; GRANT SELECT ON cml_data TO webserver_role; GRANT SELECT ON cml_metadata TO webserver_role; GRANT SELECT ON cml_stats TO webserver_role; --- Continuous aggregate — application must add WHERE user_id = ? filter. -GRANT SELECT ON cml_data_1h TO user1_role, webserver_role; - -- Parser calls update_cml_stats() to upsert per-CML statistics. -GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1_role; +GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1; -- Enable Row-Level Security on base tables. ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; --- RLS policies for user1_role. -CREATE POLICY user1_cml_data_policy ON cml_data - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +-- Generic current_user policies (one per table covers all users). +-- Because role name = user_id value, no per-user policy is needed. +CREATE POLICY user_cml_data_policy ON cml_data + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); -CREATE POLICY user1_cml_metadata_policy ON cml_metadata - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +CREATE POLICY user_cml_metadata_policy ON cml_metadata + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); -CREATE POLICY user1_cml_stats_policy ON cml_stats - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +CREATE POLICY user_cml_stats_policy ON cml_stats + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); --- RLS policies for webserver_role (read-all; scoped reads use SET ROLE). +-- Permissive read-all policies for webserver_role (admin / cross-user use). CREATE POLICY webserver_cml_data_policy ON cml_data FOR SELECT TO webserver_role USING (true); @@ -240,4 +236,22 @@ CREATE POLICY webserver_cml_metadata_policy ON cml_metadata CREATE POLICY webserver_cml_stats_policy ON cml_stats FOR SELECT TO webserver_role - USING (true); \ No newline at end of file + USING (true); + +-- Security-barrier view over cml_data_1h (continuous aggregate). +-- +-- PostgreSQL cannot apply RLS to materialized views. This view wraps +-- cml_data_1h with WHERE user_id = current_user and security_barrier, +-- providing DB-enforced per-user filtering with no application WHERE clause. +-- +-- User roles query cml_data_1h_secure (auto-filtered). +-- webserver_role queries cml_data_1h_secure after SET ROLE for user pages; +-- queries cml_data_1h directly (as webserver_role) for admin/cross-user +-- aggregates — those paths still need WHERE user_id = ? in the app. +CREATE VIEW cml_data_1h_secure WITH (security_barrier) AS +SELECT * FROM cml_data_1h +WHERE user_id = current_user; + +GRANT SELECT ON cml_data_1h_secure TO user1; +GRANT SELECT ON cml_data_1h TO webserver_role; +GRANT SELECT ON cml_data_1h_secure TO webserver_role; \ No newline at end of file diff --git a/database/migrations/004_add_roles_rls.sql b/database/migrations/004_add_roles_rls.sql index 15c6247..5acc93f 100644 --- a/database/migrations/004_add_roles_rls.sql +++ b/database/migrations/004_add_roles_rls.sql @@ -3,14 +3,28 @@ -- Part of PR feat/db-roles-rls (multi-user RLS support, Phase 2). -- Run this AFTER migrations 001–003 (feat/db-add-user-id). -- +-- Role naming convention: +-- PostgreSQL login role names intentionally match the user_id values +-- stored in the data tables (e.g. role "user1" ↔ user_id = 'user1'). +-- This allows RLS policies to use current_user for automatic filtering, +-- and lets cml_data_1h_secure (a security-barrier view over the +-- continuous aggregate) enforce per-user isolation at the DB level — +-- eliminating the need for application-level WHERE user_id = ? clauses +-- on the aggregate. +-- -- What this migration does: --- 1. Creates user1_role (parser + webserver for user1's data). --- 2. Creates webserver_role (read-all for admin/aggregate queries; --- can SET ROLE to a user role for scoped reads). --- 3. Grants table and function permissions to each role. +-- 1. Creates user1 login role (parser + webserver for user1's data). +-- 2. Creates webserver_role (read-all for admin queries; +-- SET ROLE user1 for DB-enforced scoped reads). +-- 3. Grants table/function permissions to each role. -- 4. Enables Row-Level Security on cml_data, cml_metadata, cml_stats. --- 5. Creates per-role RLS policies on those three tables. --- 6. Grants SELECT on cml_data_1h (continuous aggregate). +-- 5. Creates a single generic current_user policy per base table +-- (works for all users; no per-user policy needed at onboarding). +-- 6. Creates cml_data_1h_secure — a security_barrier view over the +-- continuous aggregate with WHERE user_id = current_user. +-- User roles get SELECT only on this view (not the raw aggregate). +-- webserver_role retains direct SELECT on cml_data_1h for admin +-- queries, and also on cml_data_1h_secure when it SETROLEs. -- -- Backward-compatibility: -- myuser (superuser) bypasses RLS by default, so the existing parser @@ -18,13 +32,6 @@ -- without any changes until PR3 (feat/parser-user-id) and -- PR5 (feat/webserver-auth) wire up the new role credentials. -- --- Note on cml_data_1h (continuous aggregate): --- PostgreSQL RLS cannot be applied to materialized views, so cml_data_1h --- has no automatic row filtering. Queries to this view MUST always include --- a WHERE user_id = ? predicate. The webserver (PR5) and Grafana enforce --- this at the application layer. All raw-data queries go through the --- RLS-protected base table (cml_data) and ARE automatically filtered. --- -- Passwords shown here are development defaults. Override them via -- environment variables or a secrets manager before going to production. -- @@ -35,13 +42,16 @@ -- Rollback: see MIGRATION.md — drop the roles after revoking all grants. -- --------------------------------------------------------------------------- --- Step 1: Create roles (idempotent via DO block) +-- Step 1: Create roles (idempotent via DO blocks) +-- +-- Role "user1" matches user_id = 'user1' in the data, enabling the +-- current_user-based RLS policies and security-barrier view below. -- --------------------------------------------------------------------------- DO $$ BEGIN - IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'user1_role') THEN - CREATE ROLE user1_role LOGIN PASSWORD 'user1password'; + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'user1') THEN + CREATE ROLE user1 LOGIN PASSWORD 'user1password'; END IF; END $$; @@ -54,42 +64,36 @@ BEGIN END $$; --- Allow webserver_role to impersonate user roles (SET ROLE user1_role). --- This replaces the connection-level role switch so the webserver can scope --- all queries to the logged-in user's data without reconnecting. -GRANT user1_role TO webserver_role; +-- Allow webserver_role to impersonate user roles (SET ROLE user1). +-- After SET ROLE user1 the session current_user becomes 'user1', so the +-- generic RLS policies and cml_data_1h_secure both filter automatically. +GRANT user1 TO webserver_role; -- --------------------------------------------------------------------------- -- Step 2: Schema access -- --------------------------------------------------------------------------- -GRANT USAGE ON SCHEMA public TO user1_role, webserver_role; +GRANT USAGE ON SCHEMA public TO user1, webserver_role; -- --------------------------------------------------------------------------- -- Step 3: Table permissions -- --------------------------------------------------------------------------- --- user1_role: INSERT/SELECT/UPDATE on the three data tables. +-- user1: INSERT/SELECT/UPDATE on the three data tables. -- No DELETE: raw data is never deleted by design. -GRANT SELECT, INSERT, UPDATE ON cml_data TO user1_role; -GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1_role; -GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1_role; +GRANT SELECT, INSERT, UPDATE ON cml_data TO user1; +GRANT SELECT, INSERT, UPDATE ON cml_metadata TO user1; +GRANT SELECT, INSERT, UPDATE ON cml_stats TO user1; -- webserver_role: read-only on base tables. --- It switches to a user role (SET ROLE) for write operations triggered --- via the web UI; those operations then use the user role's permissions. -GRANT SELECT ON cml_data TO webserver_role; -GRANT SELECT ON cml_metadata TO webserver_role; -GRANT SELECT ON cml_stats TO webserver_role; +-- Per-user scoped reads are done via SET ROLE user1; the user role's +-- permissions and RLS policies then take effect automatically. +GRANT SELECT ON cml_data TO webserver_role; +GRANT SELECT ON cml_metadata TO webserver_role; +GRANT SELECT ON cml_stats TO webserver_role; --- Continuous aggregate view. --- RLS cannot be enforced on the aggregate directly (see header note). --- Queries must always filter by user_id at the application layer. -GRANT SELECT ON cml_data_1h TO user1_role, webserver_role; - --- Parser uses update_cml_stats() to upsert per-CML statistics. --- Grant execute so user1_role can call it without superuser privileges. -GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1_role; +-- Parser calls update_cml_stats() to upsert per-CML statistics. +GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1; -- --------------------------------------------------------------------------- -- Step 4: Enable Row-Level Security on base tables @@ -100,37 +104,36 @@ ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; -- --------------------------------------------------------------------------- --- Step 5: RLS policies for user1_role +-- Step 5: Generic current_user RLS policies for user roles +-- +-- Because role name = user_id value, a single policy per table covers +-- every user — no per-user policy is needed when onboarding new users. -- --- Each policy binds the role to rows where user_id = 'user1'. --- USING: applied on SELECT / UPDATE / DELETE (which rows are visible). --- WITH CHECK: applied on INSERT / UPDATE (which rows can be written). +-- USING: applied on SELECT / UPDATE / DELETE (visible rows). +-- WITH CHECK: applied on INSERT / UPDATE (writable rows). +-- +-- webserver_role has a separate permissive (USING true) read-all policy +-- so it can run admin / cross-user aggregate queries without SET ROLE. +-- When it does SET ROLE user1, the session role becomes user1 and this +-- generic policy takes over instead. -- --------------------------------------------------------------------------- -CREATE POLICY user1_cml_data_policy ON cml_data - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); - -CREATE POLICY user1_cml_metadata_policy ON cml_metadata - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +CREATE POLICY user_cml_data_policy ON cml_data + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); -CREATE POLICY user1_cml_stats_policy ON cml_stats - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +CREATE POLICY user_cml_metadata_policy ON cml_metadata + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); --- --------------------------------------------------------------------------- --- Step 6: RLS policies for webserver_role --- --- webserver_role has a permissive (USING true) read-all policy so it can --- execute admin / aggregate queries without role-switching overhead. --- For per-user scoped reads the webserver does SET ROLE user1_role, which --- causes user1_role's policies above to take effect instead. --- --------------------------------------------------------------------------- +CREATE POLICY user_cml_stats_policy ON cml_stats + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); +-- Permissive read-all policies for webserver_role (admin / cross-user use). CREATE POLICY webserver_cml_data_policy ON cml_data FOR SELECT TO webserver_role USING (true); @@ -142,3 +145,34 @@ CREATE POLICY webserver_cml_metadata_policy ON cml_metadata CREATE POLICY webserver_cml_stats_policy ON cml_stats FOR SELECT TO webserver_role USING (true); + +-- --------------------------------------------------------------------------- +-- Step 6: Security-barrier view over cml_data_1h (continuous aggregate) +-- +-- PostgreSQL cannot apply RLS to materialized views, so cml_data_1h itself +-- has no automatic row filtering. cml_data_1h_secure wraps it with +-- WHERE user_id = current_user and the security_barrier option, which +-- prevents the planner from pushing attacker-controlled predicates above +-- the security filter. +-- +-- Usage pattern: +-- User roles (e.g. user1) query cml_data_1h_secure — DB-enforced, +-- no WHERE clause needed in the application. +-- +-- webserver_role queries cml_data_1h_secure after SET ROLE user1 for +-- user-scoped pages (fully DB-enforced). For admin / cross-user +-- aggregate queries it queries cml_data_1h directly as webserver_role; +-- those queries must include WHERE user_id = ? at the application layer, +-- but that is acceptable for internal admin paths. +-- --------------------------------------------------------------------------- + +CREATE VIEW cml_data_1h_secure WITH (security_barrier) AS +SELECT * FROM cml_data_1h +WHERE user_id = current_user; + +-- User roles: access only the secure view, not the underlying aggregate. +GRANT SELECT ON cml_data_1h_secure TO user1; + +-- webserver_role: both views (see usage pattern above). +GRANT SELECT ON cml_data_1h TO webserver_role; +GRANT SELECT ON cml_data_1h_secure TO webserver_role; diff --git a/docs/multi-user-architecture.md b/docs/multi-user-architecture.md index f32c86f..0bd368a 100644 --- a/docs/multi-user-architecture.md +++ b/docs/multi-user-architecture.md @@ -66,7 +66,7 @@ This document describes the architecture for supporting multiple users with stro │ ↓ ↓ ↓ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ │ Parser 1 │ │ Parser 2 │ │ Parser 3 │ │ -│ │ (user1_role) │ │ (user2_role) │ │ (user3_role) │ │ +│ │ (user1) │ │ (user2) │ │ (user3) │ │ │ │ HTTP trigger │ │ HTTP trigger │ │ HTTP trigger │ │ │ │ endpoint │ │ endpoint │ │ endpoint │ │ │ └──────────────┘ └──────────────┘ └──────────────┘ │ @@ -144,7 +144,7 @@ CREATE POLICY user1_isolation ON cml_data WITH CHECK (user_id = 'user1'); ``` -When `user1_role` executes `SELECT * FROM cml_data`, PostgreSQL automatically adds `WHERE user_id = 'user1'`. +When `user1` executes `SELECT * FROM cml_data`, PostgreSQL automatically adds `WHERE user_id = 'user1'` via the `current_user`-based policy. ### Authentication Flow @@ -158,7 +158,7 @@ User uploads → SSH key authentication → Chroot to /home/user1/uploads/ #### 2. Parser Layer (Data Processing) ``` -Parser watches /home/user1/uploads/ → Connects as user1_role → Inserts with user_id='user1' +Parser watches /home/user1/uploads/ → Connects as user1 → Inserts with user_id='user1' ``` - Each parser instance watches only their user's volume - Connects to database with unique role credentials @@ -330,16 +330,18 @@ SELECT create_hypertable('cml_data', 'time'); ### Database Roles ```sql --- Parser roles (can insert and read own data) -CREATE ROLE user1_role LOGIN PASSWORD 'secure_password_1'; -CREATE ROLE user2_role LOGIN PASSWORD 'secure_password_2'; -CREATE ROLE user3_role LOGIN PASSWORD 'secure_password_3'; - --- Webserver role (can switch to user roles) +-- User login roles — role name intentionally matches user_id value in the data. +-- This allows a single current_user-based RLS policy to cover all users, +-- and lets cml_data_1h_secure filter the aggregate without any app WHERE clause. +CREATE ROLE user1 LOGIN PASSWORD 'secure_password_1'; +CREATE ROLE user2 LOGIN PASSWORD 'secure_password_2'; +CREATE ROLE user3 LOGIN PASSWORD 'secure_password_3'; + +-- Webserver role (can switch to user roles via SET ROLE) CREATE ROLE webserver_role LOGIN PASSWORD 'webserver_password'; -- Grant role switching capability -GRANT user1_role, user2_role, user3_role TO webserver_role; +GRANT user1, user2, user3 TO webserver_role; ``` ### RLS Policies @@ -349,29 +351,19 @@ GRANT user1_role, user2_role, user3_role TO webserver_role; ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; --- User 1 policies -CREATE POLICY user1_data_policy ON cml_data - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +-- Single generic policy per table — works for all users because role name = user_id. +-- No per-user policy needed; onboarding a new user only requires CREATE ROLE. +CREATE POLICY user_data_policy ON cml_data + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); -CREATE POLICY user1_metadata_policy ON cml_metadata - FOR ALL TO user1_role - USING (user_id = 'user1') - WITH CHECK (user_id = 'user1'); +CREATE POLICY user_metadata_policy ON cml_metadata + FOR ALL + USING (user_id = current_user) + WITH CHECK (user_id = current_user); --- User 2 policies (repeat pattern) -CREATE POLICY user2_data_policy ON cml_data - FOR ALL TO user2_role - USING (user_id = 'user2') - WITH CHECK (user_id = 'user2'); - -CREATE POLICY user2_metadata_policy ON cml_metadata - FOR ALL TO user2_role - USING (user_id = 'user2') - WITH CHECK (user_id = 'user2'); - --- Webserver policies (can read all data, controlled by SET ROLE) +-- Webserver policies (read-all for admin queries; scoped reads use SET ROLE) CREATE POLICY webserver_read_policy ON cml_data FOR SELECT TO webserver_role USING (true); @@ -380,9 +372,20 @@ CREATE POLICY webserver_read_metadata ON cml_metadata FOR SELECT TO webserver_role USING (true); --- Grant table permissions -GRANT SELECT, INSERT, UPDATE, DELETE ON cml_data, cml_metadata TO user1_role, user2_role, user3_role; +-- Grant table permissions (no DELETE — raw data is never deleted by design) +GRANT SELECT, INSERT, UPDATE ON cml_data, cml_metadata TO user1, user2, user3; GRANT SELECT ON cml_data, cml_metadata TO webserver_role; + +-- Security-barrier view over the continuous aggregate. +-- PostgreSQL cannot apply RLS to materialized views, so cml_data_1h itself +-- is not row-filtered. This view enforces per-user isolation at the DB level. +CREATE VIEW cml_data_1h_secure WITH (security_barrier) AS +SELECT * FROM cml_data_1h +WHERE user_id = current_user; + +GRANT SELECT ON cml_data_1h_secure TO user1, user2, user3; +GRANT SELECT ON cml_data_1h TO webserver_role; -- direct for admin queries +GRANT SELECT ON cml_data_1h_secure TO webserver_role; -- via SET ROLE for user pages ``` ## Docker Compose Configuration @@ -445,7 +448,7 @@ services: parser_user1: build: ./parser # Could be custom build per user environment: - - DATABASE_URL=postgresql://user1_role:user1_password@database:5432/mydatabase + - DATABASE_URL=postgresql://user1:user1_password@database:5432/mydatabase - USER_ID=user1 # Used to insert user_id in data - PARSER_INCOMING_DIR=/app/data/incoming - PARSER_ARCHIVED_DIR=/app/data/archived @@ -459,7 +462,7 @@ services: parser_user2: build: ./parser_user2 # Different parser code if needed environment: - - DATABASE_URL=postgresql://user2_role:user2_password@database:5432/mydatabase + - DATABASE_URL=postgresql://user2:user2_password@database:5432/mydatabase - USER_ID=user2 - PARSER_INCOMING_DIR=/app/data/incoming - PARSER_ARCHIVED_DIR=/app/data/archived @@ -572,7 +575,7 @@ with open('/app/users.json', 'r') as f: # Format: { # "user1": { # "password_hash": "bcrypt_hash", - # "db_role": "user1_role", + # "db_role": "user1", # "db_password": "user1_db_password" # } # } @@ -595,7 +598,7 @@ def get_db_connection(): # Create connection URL with user's role credentials base_url = os.getenv("DATABASE_URL") # Replace credentials in URL - # postgresql://webserver_role:pass@host/db → postgresql://user1_role:pass@host/db + # postgresql://webserver_role:pass@host/db → postgresql://user1:pass@host/db user_db_url = base_url.replace( 'webserver_role:' + os.getenv('WEBSERVER_PASSWORD', 'webserver_password'), f"{user_config['db_role']}:{user_config['db_password']}" From f8189fc9e5de2c4068c91fa015b07c4abd6cf31a Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 31 Mar 2026 23:23:06 +0200 Subject: [PATCH 3/7] fix(db): enable RLS on cml_data before compression is configured TimescaleDB rejects ALTER TABLE ... ENABLE ROW LEVEL SECURITY on a hypertable that already has timescaledb.compress set. init.sql (fresh install): Move ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY to immediately before the compression block. The restriction is purely ordering: RLS must be enabled before the compress option is applied. migration 004 (live DB): On an existing deployment (after migration 002) compression is already enabled. Apply the same decompress/recompress pattern used in migration 002: decompress all compressed chunks, ENABLE ROW LEVEL SECURITY, then re-compress chunks older than the policy threshold. cml_metadata and cml_stats are plain tables and are unaffected. --- database/init.sql | 9 ++++++-- database/migrations/004_add_roles_rls.sql | 25 ++++++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/database/init.sql b/database/init.sql index 202d94c..14e855b 100644 --- a/database/init.sql +++ b/database/init.sql @@ -158,6 +158,11 @@ SELECT add_continuous_aggregate_policy('cml_data_1h', -- The current uncompressed week chunk is left untouched so real-time ingestion -- and detail-view queries on recent data have no decompression overhead. -- --------------------------------------------------------------------------- +-- Enable RLS on cml_data BEFORE setting up compression. +-- TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a hypertable +-- that already has timescaledb.compress set — so the order is mandatory. +ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; + ALTER TABLE cml_data SET ( timescaledb.compress, timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id', @@ -203,8 +208,8 @@ GRANT SELECT ON cml_stats TO webserver_role; -- Parser calls update_cml_stats() to upsert per-CML statistics. GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1; --- Enable Row-Level Security on base tables. -ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; +-- Enable Row-Level Security on cml_metadata and cml_stats. +-- (cml_data was already enabled above, before compression was configured.) ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; diff --git a/database/migrations/004_add_roles_rls.sql b/database/migrations/004_add_roles_rls.sql index 5acc93f..4e4129a 100644 --- a/database/migrations/004_add_roles_rls.sql +++ b/database/migrations/004_add_roles_rls.sql @@ -97,9 +97,32 @@ GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1; -- --------------------------------------------------------------------------- -- Step 4: Enable Row-Level Security on base tables +-- +-- TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a hypertable +-- that has timescaledb.compress set. Work around this by decompressing +-- all currently-compressed cml_data chunks, enabling RLS, then +-- re-compressing — the same decompress/recompress pattern used in +-- migration 002. No data is lost if the process is interrupted. +-- +-- cml_metadata and cml_stats are plain tables (no compression), so they +-- can be altered directly. -- --------------------------------------------------------------------------- -ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; +-- Decompress any compressed cml_data chunks. +SELECT decompress_chunk( + format('%I.%I', chunk_schema, chunk_name)::regclass +) +FROM timescaledb_information.chunks +WHERE hypertable_name = 'cml_data' + AND is_compressed = true; + +ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; + +-- Re-compress chunks older than the compression policy threshold (7 days). +SELECT compress_chunk(c) +FROM show_chunks('cml_data', older_than => INTERVAL '7 days') c; + +-- Plain tables: no compression constraint. ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; From 2443cd320057ccf837261b50cca68ed25d8ab2bc Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 31 Mar 2026 23:34:53 +0200 Subject: [PATCH 4/7] =?UTF-8?q?fix(db):=20remove=20RLS=20from=20cml=5Fdata?= =?UTF-8?q?=20=E2=80=94=20incompatible=20with=20TimescaleDB=20compression?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a compressed hypertable, and compression cannot be set on an RLS-enabled table. These two features are mutually exclusive; no ordering of statements works around this. Resolution: - Remove ENABLE ROW LEVEL SECURITY from cml_data entirely (keep compression). - Apply full RLS only to cml_metadata and cml_stats (plain tables, no compression restriction). - Add cml_data_secure: a security_barrier view over cml_data with WHERE user_id = current_user and WITH CHECK OPTION, using the same pattern already established for cml_data_1h_secure. This provides SQL injection protection (optimizer cannot push predicates above the filter) and write-path enforcement via the view. Verified locally: fresh-volume docker compose up now completes without errors (PostgreSQL init process complete). --- database/init.sql | 43 ++++++----- database/migrations/004_add_roles_rls.sql | 89 ++++++++++------------- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/database/init.sql b/database/init.sql index 14e855b..cd4028a 100644 --- a/database/init.sql +++ b/database/init.sql @@ -158,11 +158,10 @@ SELECT add_continuous_aggregate_policy('cml_data_1h', -- The current uncompressed week chunk is left untouched so real-time ingestion -- and detail-view queries on recent data have no decompression overhead. -- --------------------------------------------------------------------------- --- Enable RLS on cml_data BEFORE setting up compression. --- TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a hypertable --- that already has timescaledb.compress set — so the order is mandatory. -ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; - +-- Note: TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a compressed +-- hypertable, and compression cannot be set on an RLS-enabled table. These +-- two features are mutually exclusive on the same hypertable. Per-user +-- isolation for cml_data is provided by the cml_data_secure view below. ALTER TABLE cml_data SET ( timescaledb.compress, timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id', @@ -208,18 +207,16 @@ GRANT SELECT ON cml_stats TO webserver_role; -- Parser calls update_cml_stats() to upsert per-CML statistics. GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1; --- Enable Row-Level Security on cml_metadata and cml_stats. --- (cml_data was already enabled above, before compression was configured.) +-- Row-Level Security on cml_metadata and cml_stats. +-- cml_data is excluded: TimescaleDB does not allow RLS on compressed +-- hypertables (and compression cannot be set on an RLS-enabled table). +-- Per-user isolation for raw cml_data queries is provided by the +-- cml_data_secure security-barrier view defined below. ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; --- Generic current_user policies (one per table covers all users). --- Because role name = user_id value, no per-user policy is needed. -CREATE POLICY user_cml_data_policy ON cml_data - FOR ALL - USING (user_id = current_user) - WITH CHECK (user_id = current_user); - +-- Generic current_user policies for cml_metadata and cml_stats. +-- Because role name = user_id value, one policy per table covers all users. CREATE POLICY user_cml_metadata_policy ON cml_metadata FOR ALL USING (user_id = current_user) @@ -231,10 +228,6 @@ CREATE POLICY user_cml_stats_policy ON cml_stats WITH CHECK (user_id = current_user); -- Permissive read-all policies for webserver_role (admin / cross-user use). -CREATE POLICY webserver_cml_data_policy ON cml_data - FOR SELECT TO webserver_role - USING (true); - CREATE POLICY webserver_cml_metadata_policy ON cml_metadata FOR SELECT TO webserver_role USING (true); @@ -243,6 +236,20 @@ CREATE POLICY webserver_cml_stats_policy ON cml_stats FOR SELECT TO webserver_role USING (true); +-- Security-barrier view over cml_data (compressed hypertable). +-- Provides per-user isolation for raw cml_data queries via +-- WHERE user_id = current_user. The security_barrier option prevents the +-- query optimizer from pushing caller-supplied predicates above the filter +-- (SQL injection protection). WITH CHECK OPTION rejects writes through +-- this view where user_id != current_user. +CREATE VIEW cml_data_secure WITH (security_barrier) AS +SELECT * FROM cml_data +WHERE user_id = current_user +WITH CHECK OPTION; + +GRANT SELECT ON cml_data_secure TO user1; +GRANT SELECT ON cml_data_secure TO webserver_role; + -- Security-barrier view over cml_data_1h (continuous aggregate). -- -- PostgreSQL cannot apply RLS to materialized views. This view wraps diff --git a/database/migrations/004_add_roles_rls.sql b/database/migrations/004_add_roles_rls.sql index 4e4129a..90c739a 100644 --- a/database/migrations/004_add_roles_rls.sql +++ b/database/migrations/004_add_roles_rls.sql @@ -96,33 +96,18 @@ GRANT SELECT ON cml_stats TO webserver_role; GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO user1; -- --------------------------------------------------------------------------- --- Step 4: Enable Row-Level Security on base tables +-- Step 4: Enable Row-Level Security on cml_metadata and cml_stats -- --- TimescaleDB does not allow ENABLE ROW LEVEL SECURITY on a hypertable --- that has timescaledb.compress set. Work around this by decompressing --- all currently-compressed cml_data chunks, enabling RLS, then --- re-compressing — the same decompress/recompress pattern used in --- migration 002. No data is lost if the process is interrupted. +-- cml_data is excluded: TimescaleDB does not allow RLS on compressed +-- hypertables (and compression cannot be set on an RLS-enabled table). +-- These two features are mutually exclusive on the same hypertable. +-- Per-user isolation for raw cml_data queries is provided by the +-- cml_data_secure security-barrier view in Step 6 below. -- -- cml_metadata and cml_stats are plain tables (no compression), so they --- can be altered directly. +-- support RLS without restriction. -- --------------------------------------------------------------------------- --- Decompress any compressed cml_data chunks. -SELECT decompress_chunk( - format('%I.%I', chunk_schema, chunk_name)::regclass -) -FROM timescaledb_information.chunks -WHERE hypertable_name = 'cml_data' - AND is_compressed = true; - -ALTER TABLE cml_data ENABLE ROW LEVEL SECURITY; - --- Re-compress chunks older than the compression policy threshold (7 days). -SELECT compress_chunk(c) -FROM show_chunks('cml_data', older_than => INTERVAL '7 days') c; - --- Plain tables: no compression constraint. ALTER TABLE cml_metadata ENABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; @@ -141,11 +126,9 @@ ALTER TABLE cml_stats ENABLE ROW LEVEL SECURITY; -- generic policy takes over instead. -- --------------------------------------------------------------------------- -CREATE POLICY user_cml_data_policy ON cml_data - FOR ALL - USING (user_id = current_user) - WITH CHECK (user_id = current_user); - +-- Generic current_user policies for cml_metadata and cml_stats. +-- Because role name = user_id value, one policy per table covers all users. +-- (cml_data has no RLS policy — see Step 4 for the reason.) CREATE POLICY user_cml_metadata_policy ON cml_metadata FOR ALL USING (user_id = current_user) @@ -156,11 +139,7 @@ CREATE POLICY user_cml_stats_policy ON cml_stats USING (user_id = current_user) WITH CHECK (user_id = current_user); --- Permissive read-all policies for webserver_role (admin / cross-user use). -CREATE POLICY webserver_cml_data_policy ON cml_data - FOR SELECT TO webserver_role - USING (true); - +-- Permissive read-all policies for webserver_role on the RLS-protected tables. CREATE POLICY webserver_cml_metadata_policy ON cml_metadata FOR SELECT TO webserver_role USING (true); @@ -170,32 +149,38 @@ CREATE POLICY webserver_cml_stats_policy ON cml_stats USING (true); -- --------------------------------------------------------------------------- --- Step 6: Security-barrier view over cml_data_1h (continuous aggregate) --- --- PostgreSQL cannot apply RLS to materialized views, so cml_data_1h itself --- has no automatic row filtering. cml_data_1h_secure wraps it with --- WHERE user_id = current_user and the security_barrier option, which --- prevents the planner from pushing attacker-controlled predicates above --- the security filter. --- --- Usage pattern: --- User roles (e.g. user1) query cml_data_1h_secure — DB-enforced, --- no WHERE clause needed in the application. --- --- webserver_role queries cml_data_1h_secure after SET ROLE user1 for --- user-scoped pages (fully DB-enforced). For admin / cross-user --- aggregate queries it queries cml_data_1h directly as webserver_role; --- those queries must include WHERE user_id = ? at the application layer, --- but that is acceptable for internal admin paths. +-- Step 6: Security-barrier views over cml_data and cml_data_1h +-- +-- Since RLS is not available on cml_data (compressed hypertable), both the +-- raw table and the continuous aggregate are exposed through +-- security_barrier views that restrict rows to current_user. +-- The security_barrier option prevents the query optimizer from pushing +-- caller-supplied predicates above the filter (SQL injection protection). +-- WITH CHECK OPTION rejects writes through the view where user_id != current_user. +-- +-- Usage: +-- User roles query cml_data_secure / cml_data_1h_secure — automatically +-- filtered to current_user, no WHERE clause needed in the application. +-- +-- webserver_role uses the secure views after SET ROLE user1 for user-scoped +-- pages. For admin / cross-user queries it queries the raw tables directly +-- as webserver_role; those paths still need WHERE user_id = ? in the app. -- --------------------------------------------------------------------------- +-- Raw hypertable view. +CREATE VIEW cml_data_secure WITH (security_barrier) AS +SELECT * FROM cml_data +WHERE user_id = current_user +WITH CHECK OPTION; + +GRANT SELECT ON cml_data_secure TO user1; +GRANT SELECT ON cml_data_secure TO webserver_role; + +-- Continuous aggregate view. CREATE VIEW cml_data_1h_secure WITH (security_barrier) AS SELECT * FROM cml_data_1h WHERE user_id = current_user; --- User roles: access only the secure view, not the underlying aggregate. GRANT SELECT ON cml_data_1h_secure TO user1; - --- webserver_role: both views (see usage pattern above). GRANT SELECT ON cml_data_1h TO webserver_role; GRANT SELECT ON cml_data_1h_secure TO webserver_role; From 2b40dc7dd598426d57d5a092cc1f50fbfc7205d0 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 1 Apr 2026 16:22:11 +0200 Subject: [PATCH 5/7] perf(db): drop sublink_id from compress_segmentby Change compress_segmentby from 'user_id, cml_id, sublink_id' to 'user_id, cml_id'. With ~80% of CMLs having 2 sublinks and ~15% having 4, keeping sublinks in the same segment reduces the number of chunks that need to be decompressed per CML query roughly 2-4x. Sublinks of the same CML share correlated RSL/TSL ranges so they compress well together with no meaningful loss in compression ratio. Add migration 005 to apply the change on live databases via a decompress -> alter compress options -> recompress cycle. Update migration 002 comment to cross-reference 005. --- database/MIGRATION.md | 15 ++++++- database/init.sql | 12 +++-- .../002_update_compression_segmentby.sql | 6 ++- .../005_drop_sublink_from_segmentby.sql | 45 +++++++++++++++++++ 4 files changed, 70 insertions(+), 8 deletions(-) create mode 100644 database/migrations/005_drop_sublink_from_segmentby.sql diff --git a/database/MIGRATION.md b/database/MIGRATION.md index b9afd32..2c4a58c 100644 --- a/database/MIGRATION.md +++ b/database/MIGRATION.md @@ -14,7 +14,8 @@ to a machine that already has data you must apply the migration file below | File | What it does | |------|-------------| -| `migrations/004_add_roles_rls.sql` | Creates `user1_role` and `webserver_role`; grants table/function permissions; enables RLS on `cml_data`, `cml_metadata`, `cml_stats`; creates per-role isolation policies | +| `migrations/004_add_roles_rls.sql` | Creates `user1` and `webserver_role` login roles; grants table/function permissions; enables RLS on `cml_metadata` and `cml_stats`; creates `current_user`-based policies; creates `cml_data_secure` and `cml_data_1h_secure` security-barrier views | +| `migrations/005_drop_sublink_from_segmentby.sql` | Removes `sublink_id` from `compress_segmentby`; new setting is `'user_id, cml_id'`; reduces average decompression work per CML query by ~2–4× | ### Backward compatibility @@ -28,6 +29,13 @@ This migration is **fully backward-compatible** with the existing services: - Rollback is possible: revoke grants, drop policies, then drop roles (see Rollback section below). +### Note on `cml_data` isolation + +TimescaleDB does not allow RLS on a compressed hypertable (and compression +cannot be set on an RLS-enabled table — they are mutually exclusive). +`cml_data` keeps compression; per-user isolation is provided by +`cml_data_secure` and `cml_data_1h_secure` security-barrier views. + ### Note on `cml_data_1h` (continuous aggregate) PostgreSQL RLS cannot be applied to materialized views, so `cml_data_1h` is @@ -52,11 +60,14 @@ git pull origin feat/db-roles-rls # or merge to main first docker compose up -d --build ``` -**3. Apply the migration** +**3. Apply the migrations in order** ```bash docker compose exec -T database psql -U myuser -d mydatabase \ < database/migrations/004_add_roles_rls.sql + +docker compose exec -T database psql -U myuser -d mydatabase \ + < database/migrations/005_drop_sublink_from_segmentby.sql ``` **4. Verify** diff --git a/database/init.sql b/database/init.sql index cd4028a..556e911 100644 --- a/database/init.sql +++ b/database/init.sql @@ -147,9 +147,13 @@ SELECT add_continuous_aggregate_policy('cml_data_1h', -- --------------------------------------------------------------------------- -- Compression for cml_data chunks older than 7 days. -- --- compress_segmentby: each compressed segment contains one (cml_id, sublink_id) --- pair, so a query filtered to a single CML decompresses only ~1/728th of a --- chunk — not the whole thing. +-- compress_segmentby: one compressed segment per (user_id, cml_id). +-- user_id is the leading key so a per-user query skips all other users' +-- segments entirely. sublink_id is intentionally omitted: ~80% of CMLs +-- have 2 sublinks and ~15% have 4; keeping sublinks together in one +-- segment roughly halves decompression work per CML query vs. splitting +-- by sublink. Filtering to a specific sublink after decompression is a +-- trivial CPU operation on already-decompressed columnar data. -- compress_orderby: matches the query pattern (time range scans), allowing -- skip-scan decompression for narrow time windows within a segment. -- @@ -164,7 +168,7 @@ SELECT add_continuous_aggregate_policy('cml_data_1h', -- isolation for cml_data is provided by the cml_data_secure view below. ALTER TABLE cml_data SET ( timescaledb.compress, - timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id', + timescaledb.compress_segmentby = 'user_id, cml_id', timescaledb.compress_orderby = 'time DESC' ); diff --git a/database/migrations/002_update_compression_segmentby.sql b/database/migrations/002_update_compression_segmentby.sql index 059e87f..b814861 100644 --- a/database/migrations/002_update_compression_segmentby.sql +++ b/database/migrations/002_update_compression_segmentby.sql @@ -3,8 +3,10 @@ -- Part of PR feat/db-add-user-id. -- Run this AFTER 001_add_user_id.sql. -- --- Adds user_id to compress_segmentby so that per-user range scans --- decompress only the relevant segment instead of the full chunk. +-- Adds user_id as the leading segmentby key so that per-user range scans +-- decompress only the relevant segments instead of the full chunk. +-- sublink_id is included here alongside cml_id; it was later dropped in +-- migration 005 (feat/db-roles-rls) — see that file for the rationale. -- The decompress → alter → recompress cycle is non-destructive; no data -- is lost if the process is interrupted (TimescaleDB keeps the original -- uncompressed chunks until recompression succeeds). diff --git a/database/migrations/005_drop_sublink_from_segmentby.sql b/database/migrations/005_drop_sublink_from_segmentby.sql new file mode 100644 index 0000000..3c5ef31 --- /dev/null +++ b/database/migrations/005_drop_sublink_from_segmentby.sql @@ -0,0 +1,45 @@ +-- Migration 005: Drop sublink_id from cml_data compression segmentby +-- +-- Part of PR feat/db-roles-rls. +-- Run this AFTER migration 004_add_roles_rls.sql. +-- +-- Rationale: +-- Migration 002 set compress_segmentby = 'user_id, cml_id, sublink_id'. +-- Keeping sublink_id as a segment key means a query for one CML +-- requires decompressing one segment per sublink. Given the expected +-- data distribution (~80% of CMLs have 2 sublinks, ~15% have 4), +-- removing sublink_id reduces average decompression work per CML query +-- by roughly 2–4×. Filtering to a specific sublink after decompression +-- is a trivial CPU operation on already-decompressed columnar data, so +-- there is no meaningful cost on that side. +-- +-- Sublinks of the same CML share similar RSL/TSL value ranges (same +-- physical link), so they continue to compress well together within one +-- (user_id, cml_id) segment. +-- +-- The decompress → alter → recompress cycle is non-destructive; no data +-- is lost if the process is interrupted (TimescaleDB keeps the original +-- uncompressed chunks until recompression succeeds). +-- +-- Apply with: +-- docker compose exec -T database psql -U myuser -d mydatabase \ +-- < database/migrations/005_drop_sublink_from_segmentby.sql + +-- Step 1: Decompress all currently-compressed chunks. +SELECT decompress_chunk( + format('%I.%I', chunk_schema, chunk_name)::regclass +) +FROM timescaledb_information.chunks +WHERE hypertable_name = 'cml_data' + AND is_compressed = true; + +-- Step 2: Update compress_segmentby to (user_id, cml_id) — drop sublink_id. +ALTER TABLE cml_data SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'user_id, cml_id', + timescaledb.compress_orderby = 'time DESC' +); + +-- Step 3: Re-compress chunks older than the policy threshold (7 days). +SELECT compress_chunk(c) +FROM show_chunks('cml_data', older_than => INTERVAL '7 days') c; From 4f1d3b49828a9d921a5ff52102fbd86f629327a1 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 1 Apr 2026 16:25:40 +0200 Subject: [PATCH 6/7] =?UTF-8?q?docs(db):=20fix=20stale=20note=20on=20cml?= =?UTF-8?q?=5Fdata=5F1h=20=E2=80=94=20cml=5Fdata=5F1h=5Fsecure=20view=20pr?= =?UTF-8?q?ovides=20isolation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- database/MIGRATION.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/database/MIGRATION.md b/database/MIGRATION.md index 2c4a58c..7261e8b 100644 --- a/database/MIGRATION.md +++ b/database/MIGRATION.md @@ -38,11 +38,12 @@ cannot be set on an RLS-enabled table — they are mutually exclusive). ### Note on `cml_data_1h` (continuous aggregate) -PostgreSQL RLS cannot be applied to materialized views, so `cml_data_1h` is -**not** automatically row-filtered. Queries to this view **must** always -include a `WHERE user_id = ?` predicate. The webserver (PR5) and Grafana -panels enforce this. All raw-data queries route through `cml_data`, which -**is** protected by RLS. +PostgreSQL RLS cannot be applied to materialized views, so `cml_data_1h` itself +cannot carry row-level policies. The same security-barrier view trick used for +`cml_data` is applied here too: `cml_data_1h_secure` is a `security_barrier` +view that filters `WHERE user_id = current_user`, providing the same automatic +per-user isolation. User roles (`user1`, `webserver_role`) are granted access +to `cml_data_1h_secure` only, not to the underlying `cml_data_1h` aggregate. ### Steps From 21ce1f44d2b42657d485e4207e531e86e8e65c0a Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 1 Apr 2026 16:35:04 +0200 Subject: [PATCH 7/7] docs(db): fix stale role names and policy names in verify/rollback sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Verify: \du user1_role -> user1; smoke-test uses user1 role and cml_data_secure view (cml_data direct gives all rows — no RLS on it); RLS check comment clarifies cml_data shows f intentionally - Rollback: drop cml_data_secure/cml_data_1h_secure views; correct policy names to user_cml_metadata_policy / user_cml_stats_policy / webserver_cml_metadata_policy / webserver_cml_stats_policy; remove DISABLE RLS on cml_data (never enabled); fix user1_role -> user1 throughout REVOKE and DROP ROLE statements - migration 004 header: step 4 description corrects 'cml_data, cml_metadata, cml_stats' -> 'cml_metadata and cml_stats' --- database/MIGRATION.md | 36 ++++++++++++----------- database/migrations/004_add_roles_rls.sql | 3 +- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/database/MIGRATION.md b/database/MIGRATION.md index 7261e8b..397a704 100644 --- a/database/MIGRATION.md +++ b/database/MIGRATION.md @@ -76,46 +76,48 @@ docker compose exec -T database psql -U myuser -d mydatabase \ ```bash # List the new roles docker compose exec database psql -U myuser -d mydatabase \ - -c "\du user1_role webserver_role" + -c "\du user1 webserver_role" -# Confirm RLS is enabled on all three tables +# Confirm RLS is enabled on cml_metadata and cml_stats +# (cml_data intentionally shows f — compression and RLS are mutually exclusive) docker compose exec database psql -U myuser -d mydatabase \ -c "SELECT relname, relrowsecurity FROM pg_class \ WHERE relname IN ('cml_data','cml_metadata','cml_stats');" -# Smoke-test: user1_role should see its own rows and nothing else +# Smoke-test: user1 should see only their own rows via the secure view docker compose exec database psql \ - -U user1_role -d mydatabase \ - -c "SELECT count(*) FROM cml_data;" + -U user1 -d mydatabase \ + -c "SELECT count(*) FROM cml_data_secure;" ``` **Rollback:** ```bash docker compose exec database psql -U myuser -d mydatabase -c " --- Drop policies -DROP POLICY IF EXISTS user1_cml_data_policy ON cml_data; -DROP POLICY IF EXISTS user1_cml_metadata_policy ON cml_metadata; -DROP POLICY IF EXISTS user1_cml_stats_policy ON cml_stats; -DROP POLICY IF EXISTS webserver_cml_data_policy ON cml_data; +-- Drop security-barrier views +DROP VIEW IF EXISTS cml_data_secure; +DROP VIEW IF EXISTS cml_data_1h_secure; + +-- Drop policies (only cml_metadata and cml_stats have RLS) +DROP POLICY IF EXISTS user_cml_metadata_policy ON cml_metadata; +DROP POLICY IF EXISTS user_cml_stats_policy ON cml_stats; DROP POLICY IF EXISTS webserver_cml_metadata_policy ON cml_metadata; DROP POLICY IF EXISTS webserver_cml_stats_policy ON cml_stats; --- Disable RLS -ALTER TABLE cml_data DISABLE ROW LEVEL SECURITY; +-- Disable RLS (cml_data was never RLS-enabled) ALTER TABLE cml_metadata DISABLE ROW LEVEL SECURITY; ALTER TABLE cml_stats DISABLE ROW LEVEL SECURITY; -- Revoke grants REVOKE ALL ON cml_data, cml_metadata, cml_stats, cml_data_1h - FROM user1_role, webserver_role; + FROM user1, webserver_role; REVOKE EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) - FROM user1_role; -REVOKE user1_role FROM webserver_role; -REVOKE USAGE ON SCHEMA public FROM user1_role, webserver_role; + FROM user1; +REVOKE user1 FROM webserver_role; +REVOKE USAGE ON SCHEMA public FROM user1, webserver_role; -- Drop roles -DROP ROLE IF EXISTS user1_role; +DROP ROLE IF EXISTS user1; DROP ROLE IF EXISTS webserver_role; " ``` diff --git a/database/migrations/004_add_roles_rls.sql b/database/migrations/004_add_roles_rls.sql index 90c739a..551eab6 100644 --- a/database/migrations/004_add_roles_rls.sql +++ b/database/migrations/004_add_roles_rls.sql @@ -17,7 +17,8 @@ -- 2. Creates webserver_role (read-all for admin queries; -- SET ROLE user1 for DB-enforced scoped reads). -- 3. Grants table/function permissions to each role. --- 4. Enables Row-Level Security on cml_data, cml_metadata, cml_stats. +-- 4. Enables Row-Level Security on cml_metadata and cml_stats. +-- (cml_data is excluded — see Step 4 comment for the reason.) -- 5. Creates a single generic current_user policy per base table -- (works for all users; no per-user policy needed at onboarding). -- 6. Creates cml_data_1h_secure — a security_barrier view over the