From 20a29925b93317545aa8c26f12039c6a016eea1b Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 31 Mar 2026 22:18:59 +0200 Subject: [PATCH 1/2] feat(db): add user_id columns, update aggregate and compression segmentby Part of multi-user RLS architecture (PR1 of 7). Changes to init.sql (fresh deployments): - cml_data: add user_id TEXT NOT NULL DEFAULT 'user1' - cml_metadata: add user_id, PRIMARY KEY (cml_id, sublink_id, user_id), keep UNIQUE (cml_id, sublink_id) for backward compat until PR3 - cml_stats: add user_id, PRIMARY KEY (cml_id, user_id) - update_cml_stats: add optional target_user_id param (DEFAULT 'user1') so existing single-arg call sites keep working - cml_data_1h: add user_id to SELECT and GROUP BY - compress_segmentby: add user_id as leading key - Add idx_cml_data_user_id and idx_cml_metadata_user_id indexes Migration files for existing deployments: - database/migrations/001_add_user_id.sql - database/migrations/002_update_compression_segmentby.sql - database/migrations/003_update_aggregate_user_id.sql Backward compatible: DEFAULT 'user1' + retained UNIQUE constraint mean the unmodified parser continues to work without any code changes. --- database/MIGRATION.md | 74 +++++++++++ database/init.sql | 84 ++++++++---- database/migrations/001_add_user_id.sql | 123 ++++++++++++++++++ .../002_update_compression_segmentby.sql | 34 +++++ .../003_update_aggregate_user_id.sql | 49 +++++++ 5 files changed, 337 insertions(+), 27 deletions(-) create mode 100644 database/migrations/001_add_user_id.sql create mode 100644 database/migrations/002_update_compression_segmentby.sql create mode 100644 database/migrations/003_update_aggregate_user_id.sql diff --git a/database/MIGRATION.md b/database/MIGRATION.md index b899064..b24e189 100644 --- a/database/MIGRATION.md +++ b/database/MIGRATION.md @@ -1,5 +1,79 @@ # Database Migration Guide +--- + +## PR `feat/db-add-user-id` — Add `user_id` for multi-user RLS support + +**Branch:** `feat/db-add-user-id` + +`init.sql` only runs on a fresh database volume, so when deploying this branch +to a machine that already has data you must apply the three migration files in +order. + +### Changes + +| File | What it does | +|------|-------------| +| `migrations/001_add_user_id.sql` | Adds `user_id TEXT NOT NULL DEFAULT 'user1'` to `cml_data`, `cml_metadata`, `cml_stats`; updates primary keys; adds per-user indexes; updates `update_cml_stats` to accept an optional `target_user_id` (DEFAULT `'user1'`) | +| `migrations/002_update_compression_segmentby.sql` | Decompresses existing chunks, adds `user_id` as leading key in `compress_segmentby`, re-compresses old chunks | +| `migrations/003_update_aggregate_user_id.sql` | Drops and recreates `cml_data_1h` with `user_id` in `SELECT` and `GROUP BY` | + +### Backward compatibility + +All three migrations are **backward-compatible** with the existing single-user +parser: + +- `DEFAULT 'user1'` on each `user_id` column means un-modified `INSERT` + statements (no `user_id` column supplied) keep writing to `user1`. +- A `UNIQUE (cml_id, sublink_id)` constraint is kept on `cml_metadata` so the + parser's `ON CONFLICT (cml_id, sublink_id)` clause stays valid. + *(This constraint is dropped in PR `feat/parser-user-id`.)* +- `update_cml_stats(cml_id)` — the existing single-argument call — still works + because `target_user_id` defaults to `'user1'`. + +### Steps + +**1. Back up the database** + +```bash +docker compose exec database pg_dump -U myuser -d mydatabase \ + > backup_pre_multiuser_$(date +%Y%m%d_%H%M%S).sql +``` + +**2. Pull and rebuild** + +```bash +git pull origin feat/db-add-user-id # or merge to main first +docker compose up -d --build +``` + +**3. Apply the migrations in order** + +```bash +docker compose exec -T database psql -U myuser -d mydatabase \ + < database/migrations/001_add_user_id.sql + +docker compose exec -T database psql -U myuser -d mydatabase \ + < database/migrations/002_update_compression_segmentby.sql + +docker compose exec -T database psql -U myuser -d mydatabase \ + < database/migrations/003_update_aggregate_user_id.sql +``` + +**4. (Optional) Trigger an immediate aggregate backfill** + +The refresh policy will backfill `cml_data_1h` within the next hour. +To do it immediately: + +```bash +docker compose exec database psql -U myuser -d mydatabase -c \ + "CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL);" +``` + +**Rollback:** restore from the backup taken in step 1. + +--- + ## `cml_data_1h` continuous aggregate **Branch:** `feature/performance-and-grafana-improvements` diff --git a/database/init.sql b/database/init.sql index 0a05351..b600dff 100644 --- a/database/init.sql +++ b/database/init.sql @@ -3,7 +3,8 @@ CREATE TABLE cml_data ( cml_id TEXT NOT NULL, sublink_id TEXT NOT NULL, rsl REAL, - tsl REAL + tsl REAL, + user_id TEXT NOT NULL DEFAULT 'user1' ); CREATE TABLE cml_metadata ( @@ -16,11 +17,16 @@ CREATE TABLE cml_metadata ( frequency REAL, polarization TEXT, length REAL, - PRIMARY KEY (cml_id, sublink_id) + user_id TEXT NOT NULL DEFAULT 'user1', + PRIMARY KEY (cml_id, sublink_id, user_id), + -- Backward-compat constraint: keeps the parser's ON CONFLICT (cml_id, sublink_id) + -- clause valid until PR3 (feat/parser-user-id) updates it. + UNIQUE (cml_id, sublink_id) ); CREATE TABLE cml_stats ( - cml_id TEXT PRIMARY KEY, + cml_id TEXT NOT NULL, + user_id TEXT NOT NULL DEFAULT 'user1', total_records BIGINT, valid_records BIGINT, null_records BIGINT, @@ -30,13 +36,22 @@ CREATE TABLE cml_stats ( mean_rsl REAL, stddev_rsl REAL, last_rsl REAL, - last_update TIMESTAMPTZ DEFAULT NOW() + last_update TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY (cml_id, user_id) ); -CREATE OR REPLACE FUNCTION update_cml_stats(target_cml_id TEXT) RETURNS VOID AS $$ +-- update_cml_stats(target_cml_id, target_user_id) +-- +-- target_user_id defaults to 'user1' so the existing single-argument call +-- sites in the parser continue to work until PR3 updates them. +CREATE OR REPLACE FUNCTION update_cml_stats( + target_cml_id TEXT, + target_user_id TEXT DEFAULT 'user1' +) RETURNS VOID AS $$ BEGIN INSERT INTO cml_stats ( cml_id, + user_id, total_records, valid_records, null_records, @@ -50,35 +65,49 @@ BEGIN ) SELECT cd.cml_id::text, - COUNT(*) as total_records, - COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) as valid_records, - COUNT(CASE WHEN cd.rsl IS NULL THEN 1 END) as null_records, - ROUND(100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*), 2) as completeness_percent, - MIN(cd.rsl) as min_rsl, - MAX(cd.rsl) as max_rsl, - ROUND(AVG(cd.rsl)::numeric, 2) as mean_rsl, - ROUND(STDDEV(cd.rsl)::numeric, 2) as stddev_rsl, - (SELECT rsl FROM cml_data WHERE cml_id = cd.cml_id ORDER BY time DESC LIMIT 1) as last_rsl, + target_user_id, + COUNT(*) AS total_records, + COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) AS valid_records, + COUNT(CASE WHEN cd.rsl IS NULL THEN 1 END) AS null_records, + ROUND( + 100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*), + 2 + ) AS completeness_percent, + MIN(cd.rsl) AS min_rsl, + MAX(cd.rsl) AS max_rsl, + ROUND(AVG(cd.rsl)::numeric, 2) AS mean_rsl, + ROUND(STDDEV(cd.rsl)::numeric, 2) AS stddev_rsl, + ( + SELECT rsl FROM cml_data + WHERE cml_id = cd.cml_id + AND user_id = target_user_id + ORDER BY time DESC LIMIT 1 + ) AS last_rsl, NOW() FROM cml_data cd - WHERE cd.cml_id = target_cml_id + WHERE cd.cml_id = target_cml_id + AND cd.user_id = target_user_id GROUP BY cd.cml_id - ON CONFLICT (cml_id) DO UPDATE SET - total_records = EXCLUDED.total_records, - valid_records = EXCLUDED.valid_records, - null_records = EXCLUDED.null_records, + ON CONFLICT (cml_id, user_id) DO UPDATE SET + total_records = EXCLUDED.total_records, + valid_records = EXCLUDED.valid_records, + null_records = EXCLUDED.null_records, completeness_percent = EXCLUDED.completeness_percent, - min_rsl = EXCLUDED.min_rsl, - max_rsl = EXCLUDED.max_rsl, - mean_rsl = EXCLUDED.mean_rsl, - stddev_rsl = EXCLUDED.stddev_rsl, - last_rsl = EXCLUDED.last_rsl, - last_update = EXCLUDED.last_update; + min_rsl = EXCLUDED.min_rsl, + max_rsl = EXCLUDED.max_rsl, + mean_rsl = EXCLUDED.mean_rsl, + stddev_rsl = EXCLUDED.stddev_rsl, + last_rsl = EXCLUDED.last_rsl, + last_update = EXCLUDED.last_update; END; $$ LANGUAGE plpgsql; SELECT create_hypertable('cml_data', 'time'); +-- Per-user lookup indexes. +CREATE INDEX idx_cml_data_user_id ON cml_data (user_id); +CREATE INDEX idx_cml_metadata_user_id ON cml_metadata (user_id); + -- Index is created by the archive_loader service after bulk data load (faster COPY). -- If no archive data is loaded, create it manually: -- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC); @@ -93,6 +122,7 @@ CREATE MATERIALIZED VIEW cml_data_1h WITH (timescaledb.continuous) AS SELECT time_bucket('1 hour', time) AS bucket, + user_id, cml_id, sublink_id, MIN(rsl) AS rsl_min, @@ -102,7 +132,7 @@ SELECT MAX(tsl) AS tsl_max, AVG(tsl) AS tsl_avg FROM cml_data -GROUP BY bucket, cml_id, sublink_id +GROUP BY bucket, user_id, cml_id, sublink_id WITH NO DATA; -- Automatically refresh every hour, covering up to 2 days of history. @@ -130,7 +160,7 @@ SELECT add_continuous_aggregate_policy('cml_data_1h', -- --------------------------------------------------------------------------- ALTER TABLE cml_data SET ( timescaledb.compress, - timescaledb.compress_segmentby = 'cml_id, sublink_id', + timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id', timescaledb.compress_orderby = 'time DESC' ); diff --git a/database/migrations/001_add_user_id.sql b/database/migrations/001_add_user_id.sql new file mode 100644 index 0000000..2153ca8 --- /dev/null +++ b/database/migrations/001_add_user_id.sql @@ -0,0 +1,123 @@ +-- Migration 001: Add user_id columns to all tables +-- +-- Part of PR feat/db-add-user-id (multi-user RLS support, Phase 1). +-- Backward-compatible: existing parser continues to work unchanged. +-- - DEFAULT 'user1' on each user_id column means un-modified INSERT +-- statements (no user_id supplied) keep writing to the single user. +-- - UNIQUE (cml_id, sublink_id) on cml_metadata keeps the parser's +-- ON CONFLICT (cml_id, sublink_id) clause valid until PR3 updates it. +-- - update_cml_stats gains a second parameter (DEFAULT 'user1') so the +-- existing single-argument call site still compiles and runs correctly. +-- +-- Apply this to an existing deployment with: +-- docker compose exec database psql -U myuser -d mydatabase \ +-- -f /path/to/001_add_user_id.sql +-- +-- Rollback: restore from the backup taken before running this file. + +-- --------------------------------------------------------------------------- +-- Step 1: Add nullable user_id column to each table, back-fill existing rows +-- to 'user1', then tighten to NOT NULL with a DEFAULT for new rows. +-- --------------------------------------------------------------------------- + +ALTER TABLE cml_data ADD COLUMN IF NOT EXISTS user_id TEXT; +ALTER TABLE cml_metadata ADD COLUMN IF NOT EXISTS user_id TEXT; +ALTER TABLE cml_stats ADD COLUMN IF NOT EXISTS user_id TEXT; + +UPDATE cml_data SET user_id = 'user1' WHERE user_id IS NULL; +UPDATE cml_metadata SET user_id = 'user1' WHERE user_id IS NULL; +UPDATE cml_stats SET user_id = 'user1' WHERE user_id IS NULL; + +ALTER TABLE cml_data ALTER COLUMN user_id SET NOT NULL; +ALTER TABLE cml_data ALTER COLUMN user_id SET DEFAULT 'user1'; +ALTER TABLE cml_metadata ALTER COLUMN user_id SET NOT NULL; +ALTER TABLE cml_metadata ALTER COLUMN user_id SET DEFAULT 'user1'; +ALTER TABLE cml_stats ALTER COLUMN user_id SET NOT NULL; +ALTER TABLE cml_stats ALTER COLUMN user_id SET DEFAULT 'user1'; + +-- --------------------------------------------------------------------------- +-- Step 2: Update primary keys on cml_metadata and cml_stats to be +-- (…, user_id). A UNIQUE (cml_id, sublink_id) index is kept on +-- cml_metadata so the parser's existing ON CONFLICT clause stays +-- valid; it will be dropped in PR3 once the parser is updated. +-- --------------------------------------------------------------------------- + +ALTER TABLE cml_metadata DROP CONSTRAINT IF EXISTS cml_metadata_pkey; +ALTER TABLE cml_metadata ADD PRIMARY KEY (cml_id, sublink_id, user_id); +-- Keep for backward compat with parser's ON CONFLICT (cml_id, sublink_id). +-- TODO: drop this constraint in PR3 (feat/parser-user-id). +ALTER TABLE cml_metadata + ADD CONSTRAINT cml_metadata_sublink_unique UNIQUE (cml_id, sublink_id); + +ALTER TABLE cml_stats DROP CONSTRAINT IF EXISTS cml_stats_pkey; +ALTER TABLE cml_stats ADD PRIMARY KEY (cml_id, user_id); + +-- --------------------------------------------------------------------------- +-- Step 3: Add indexes for per-user query performance. +-- --------------------------------------------------------------------------- + +CREATE INDEX IF NOT EXISTS idx_cml_data_user_id ON cml_data (user_id); +CREATE INDEX IF NOT EXISTS idx_cml_metadata_user_id ON cml_metadata (user_id); + +-- --------------------------------------------------------------------------- +-- Step 4: Replace update_cml_stats with a version that accepts an optional +-- target_user_id (DEFAULT 'user1') for backward compatibility. +-- --------------------------------------------------------------------------- + +CREATE OR REPLACE FUNCTION update_cml_stats( + target_cml_id TEXT, + target_user_id TEXT DEFAULT 'user1' +) RETURNS VOID AS $$ +BEGIN + INSERT INTO cml_stats ( + cml_id, + user_id, + total_records, + valid_records, + null_records, + completeness_percent, + min_rsl, + max_rsl, + mean_rsl, + stddev_rsl, + last_rsl, + last_update + ) + SELECT + cd.cml_id::text, + target_user_id, + COUNT(*) AS total_records, + COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) AS valid_records, + COUNT(CASE WHEN cd.rsl IS NULL THEN 1 END) AS null_records, + ROUND( + 100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*), + 2 + ) AS completeness_percent, + MIN(cd.rsl) AS min_rsl, + MAX(cd.rsl) AS max_rsl, + ROUND(AVG(cd.rsl)::numeric, 2) AS mean_rsl, + ROUND(STDDEV(cd.rsl)::numeric, 2) AS stddev_rsl, + ( + SELECT rsl FROM cml_data + WHERE cml_id = cd.cml_id + AND user_id = target_user_id + ORDER BY time DESC LIMIT 1 + ) AS last_rsl, + NOW() + FROM cml_data cd + WHERE cd.cml_id = target_cml_id + AND cd.user_id = target_user_id + GROUP BY cd.cml_id + ON CONFLICT (cml_id, user_id) DO UPDATE SET + total_records = EXCLUDED.total_records, + valid_records = EXCLUDED.valid_records, + null_records = EXCLUDED.null_records, + completeness_percent = EXCLUDED.completeness_percent, + min_rsl = EXCLUDED.min_rsl, + max_rsl = EXCLUDED.max_rsl, + mean_rsl = EXCLUDED.mean_rsl, + stddev_rsl = EXCLUDED.stddev_rsl, + last_rsl = EXCLUDED.last_rsl, + last_update = EXCLUDED.last_update; +END; +$$ LANGUAGE plpgsql; diff --git a/database/migrations/002_update_compression_segmentby.sql b/database/migrations/002_update_compression_segmentby.sql new file mode 100644 index 0000000..b8e5609 --- /dev/null +++ b/database/migrations/002_update_compression_segmentby.sql @@ -0,0 +1,34 @@ +-- Migration 002: Update cml_data compression segmentby to include user_id +-- +-- Part of PR feat/db-add-user-id. +-- Run this AFTER 001_add_user_id.sql. +-- +-- Adds user_id to compress_segmentby so that per-user range scans +-- decompress only the relevant segment instead of the full chunk. +-- The decompress → alter → recompress cycle is non-destructive; no data +-- is lost if the process is interrupted (TimescaleDB keeps the original +-- uncompressed chunks until recompression succeeds). +-- +-- Apply with: +-- docker compose exec database psql -U myuser -d mydatabase \ +-- -f /path/to/002_update_compression_segmentby.sql + +-- Step 1: Decompress all currently-compressed chunks so that the +-- compress_segmentby setting can be changed. +SELECT decompress_chunk(c) +FROM show_chunks('cml_data') c +WHERE _timescaledb_internal.is_compressed_chunk(c); + +-- Step 2: Update the compression settings to include user_id as the +-- leading segment key. user_id first ensures that a query for a +-- single user decompresses only their segments. +ALTER TABLE cml_data SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id', + timescaledb.compress_orderby = 'time DESC' +); + +-- Step 3: Re-compress chunks that were already old enough for compression +-- (i.e. older than 7 days per the existing policy). +SELECT compress_chunk(c) +FROM show_chunks('cml_data', older_than => INTERVAL '7 days') c; diff --git a/database/migrations/003_update_aggregate_user_id.sql b/database/migrations/003_update_aggregate_user_id.sql new file mode 100644 index 0000000..77943e7 --- /dev/null +++ b/database/migrations/003_update_aggregate_user_id.sql @@ -0,0 +1,49 @@ +-- Migration 003: Recreate cml_data_1h continuous aggregate with user_id +-- +-- Part of PR feat/db-add-user-id. +-- Run this AFTER 001_add_user_id.sql. Order relative to 002 is not critical. +-- +-- The existing cml_data_1h view does not include user_id in its GROUP BY, +-- so RLS policies (added in PR2) cannot be applied to it. The view must be +-- dropped and recreated; this is a non-destructive operation because the +-- continuous aggregate is re-materialised from the underlying raw cml_data. +-- +-- A brief gap in Grafana's hourly-aggregate data is expected while the +-- refresh policy backfills the view (~1 refresh cycle, up to 1 hour). +-- Queries that fall in the gap automatically fall through to raw cml_data. +-- +-- Apply with: +-- docker compose exec database psql -U myuser -d mydatabase \ +-- -f /path/to/003_update_aggregate_user_id.sql + +-- Step 1: Remove the old view and its dependent policy + grants. +DROP MATERIALIZED VIEW IF EXISTS cml_data_1h CASCADE; + +-- Step 2: Recreate with user_id in SELECT and GROUP BY. +CREATE MATERIALIZED VIEW cml_data_1h +WITH (timescaledb.continuous) AS +SELECT + time_bucket('1 hour', time) AS bucket, + user_id, + cml_id, + sublink_id, + MIN(rsl) AS rsl_min, + MAX(rsl) AS rsl_max, + AVG(rsl) AS rsl_avg, + MIN(tsl) AS tsl_min, + MAX(tsl) AS tsl_max, + AVG(tsl) AS tsl_avg +FROM cml_data +GROUP BY bucket, user_id, cml_id, sublink_id +WITH NO DATA; + +-- Step 3: Restore the refresh policy (same parameters as before). +SELECT add_continuous_aggregate_policy('cml_data_1h', + start_offset => INTERVAL '2 days', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '1 hour' +); + +-- Step 4: Optional — trigger an immediate backfill rather than waiting for +-- the next scheduled refresh. Remove the leading '--' to enable. +-- CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL); From 99d7dda54524a454be6b07b493a8e237134bc5e4 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 31 Mar 2026 22:28:30 +0200 Subject: [PATCH 2/2] fix(db): use timescaledb_information.chunks to detect compressed chunks _timescaledb_internal.is_compressed_chunk() does not exist in all TimescaleDB versions. Use the stable public view instead. --- .../migrations/002_update_compression_segmentby.sql | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/database/migrations/002_update_compression_segmentby.sql b/database/migrations/002_update_compression_segmentby.sql index b8e5609..059e87f 100644 --- a/database/migrations/002_update_compression_segmentby.sql +++ b/database/migrations/002_update_compression_segmentby.sql @@ -15,9 +15,13 @@ -- Step 1: Decompress all currently-compressed chunks so that the -- compress_segmentby setting can be changed. -SELECT decompress_chunk(c) -FROM show_chunks('cml_data') c -WHERE _timescaledb_internal.is_compressed_chunk(c); +-- Uses timescaledb_information.chunks (works across all TimescaleDB versions). +SELECT decompress_chunk( + format('%I.%I', chunk_schema, chunk_name)::regclass +) +FROM timescaledb_information.chunks +WHERE hypertable_name = 'cml_data' + AND is_compressed = true; -- Step 2: Update the compression settings to include user_id as the -- leading segment key. user_id first ensures that a query for a