OpenSenseAction · cchwala · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/database/MIGRATION.md b/database/MIGRATION.md
@@ -1,5 +1,79 @@
 # Database Migration Guide
 
+---
+
+## PR `feat/db-add-user-id` — Add `user_id` for multi-user RLS support
+
+**Branch:** `feat/db-add-user-id`
+
+`init.sql` only runs on a fresh database volume, so when deploying this branch
+to a machine that already has data you must apply the three migration files in
+order.
+
+### Changes
+
+| File | What it does |
+|------|-------------|
+| `migrations/001_add_user_id.sql` | Adds `user_id TEXT NOT NULL DEFAULT 'user1'` to `cml_data`, `cml_metadata`, `cml_stats`; updates primary keys; adds per-user indexes; updates `update_cml_stats` to accept an optional `target_user_id` (DEFAULT `'user1'`) |
+| `migrations/002_update_compression_segmentby.sql` | Decompresses existing chunks, adds `user_id` as leading key in `compress_segmentby`, re-compresses old chunks |
+| `migrations/003_update_aggregate_user_id.sql` | Drops and recreates `cml_data_1h` with `user_id` in `SELECT` and `GROUP BY` |
+
+### Backward compatibility
+
+All three migrations are **backward-compatible** with the existing single-user
+parser:
+
+- `DEFAULT 'user1'` on each `user_id` column means un-modified `INSERT`
+  statements (no `user_id` column supplied) keep writing to `user1`.
+- A `UNIQUE (cml_id, sublink_id)` constraint is kept on `cml_metadata` so the
+  parser's `ON CONFLICT (cml_id, sublink_id)` clause stays valid.
+  *(This constraint is dropped in PR `feat/parser-user-id`.)*
+- `update_cml_stats(cml_id)` — the existing single-argument call — still works
+  because `target_user_id` defaults to `'user1'`.
+
+### Steps
+
+**1. Back up the database**
+
+```bash
+docker compose exec database pg_dump -U myuser -d mydatabase \
+    > backup_pre_multiuser_$(date +%Y%m%d_%H%M%S).sql
+```
+
+**2. Pull and rebuild**
+
+```bash
+git pull origin feat/db-add-user-id   # or merge to main first
+docker compose up -d --build
+```
+
+**3. Apply the migrations in order**
+
+```bash
+docker compose exec -T database psql -U myuser -d mydatabase \
+    < database/migrations/001_add_user_id.sql
+
+docker compose exec -T database psql -U myuser -d mydatabase \
+    < database/migrations/002_update_compression_segmentby.sql
+
+docker compose exec -T database psql -U myuser -d mydatabase \
+    < database/migrations/003_update_aggregate_user_id.sql
+```
+
+**4. (Optional) Trigger an immediate aggregate backfill**
+
+The refresh policy will backfill `cml_data_1h` within the next hour.
+To do it immediately:
+
+```bash
+docker compose exec database psql -U myuser -d mydatabase -c \
+    "CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL);"
+```
+
+**Rollback:** restore from the backup taken in step 1.
+
+---
+
 ## `cml_data_1h` continuous aggregate
 
 **Branch:** `feature/performance-and-grafana-improvements`

diff --git a/database/init.sql b/database/init.sql
@@ -3,7 +3,8 @@ CREATE TABLE cml_data (
     cml_id TEXT NOT NULL,
     sublink_id TEXT NOT NULL,
     rsl REAL,
-    tsl REAL
+    tsl REAL,
+    user_id TEXT NOT NULL DEFAULT 'user1'
 );
 
 CREATE TABLE cml_metadata (
@@ -16,11 +17,16 @@ CREATE TABLE cml_metadata (
     frequency REAL,
     polarization TEXT,
     length REAL,
-    PRIMARY KEY (cml_id, sublink_id)
+    user_id TEXT NOT NULL DEFAULT 'user1',
+    PRIMARY KEY (cml_id, sublink_id, user_id),
+    -- Backward-compat constraint: keeps the parser's ON CONFLICT (cml_id, sublink_id)
+    -- clause valid until PR3 (feat/parser-user-id) updates it.
+    UNIQUE (cml_id, sublink_id)
 );
 
 CREATE TABLE cml_stats (
-    cml_id TEXT PRIMARY KEY,
+    cml_id TEXT NOT NULL,
+    user_id TEXT NOT NULL DEFAULT 'user1',
     total_records BIGINT,
     valid_records BIGINT,
     null_records BIGINT,
@@ -30,13 +36,22 @@ CREATE TABLE cml_stats (
     mean_rsl REAL,
     stddev_rsl REAL,
     last_rsl REAL,
-    last_update TIMESTAMPTZ DEFAULT NOW()
+    last_update TIMESTAMPTZ DEFAULT NOW(),
+    PRIMARY KEY (cml_id, user_id)
 );
 
-CREATE OR REPLACE FUNCTION update_cml_stats(target_cml_id TEXT) RETURNS VOID AS $$
+-- update_cml_stats(target_cml_id, target_user_id)
+--
+-- target_user_id defaults to 'user1' so the existing single-argument call
+-- sites in the parser continue to work until PR3 updates them.
+CREATE OR REPLACE FUNCTION update_cml_stats(
+    target_cml_id  TEXT,
+    target_user_id TEXT DEFAULT 'user1'
+) RETURNS VOID AS $$
 BEGIN
     INSERT INTO cml_stats (
         cml_id,
+        user_id,
         total_records,
         valid_records,
         null_records,
@@ -50,35 +65,49 @@ BEGIN
     )
     SELECT
         cd.cml_id::text,
-        COUNT(*) as total_records,
-        COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) as valid_records,
-        COUNT(CASE WHEN cd.rsl IS NULL THEN 1 END) as null_records,
-        ROUND(100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*), 2) as completeness_percent,
-        MIN(cd.rsl) as min_rsl,
-        MAX(cd.rsl) as max_rsl,
-        ROUND(AVG(cd.rsl)::numeric, 2) as mean_rsl,
-        ROUND(STDDEV(cd.rsl)::numeric, 2) as stddev_rsl,
-        (SELECT rsl FROM cml_data WHERE cml_id = cd.cml_id ORDER BY time DESC LIMIT 1) as last_rsl,
+        target_user_id,
+        COUNT(*)                                                              AS total_records,
+        COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END)                        AS valid_records,
+        COUNT(CASE WHEN cd.rsl IS NULL     THEN 1 END)                        AS null_records,
+        ROUND(
+            100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*),
+            2
+        )                                                                     AS completeness_percent,
+        MIN(cd.rsl)                                                           AS min_rsl,
+        MAX(cd.rsl)                                                           AS max_rsl,
+        ROUND(AVG(cd.rsl)::numeric,    2)                                     AS mean_rsl,
+        ROUND(STDDEV(cd.rsl)::numeric, 2)                                     AS stddev_rsl,
+        (
+            SELECT rsl FROM cml_data
+            WHERE  cml_id  = cd.cml_id
+              AND  user_id = target_user_id
+            ORDER  BY time DESC LIMIT 1
+        )                                                                     AS last_rsl,
         NOW()
     FROM cml_data cd
-    WHERE cd.cml_id = target_cml_id
+    WHERE cd.cml_id  = target_cml_id
+      AND cd.user_id = target_user_id
     GROUP BY cd.cml_id
-    ON CONFLICT (cml_id) DO UPDATE SET
-        total_records = EXCLUDED.total_records,
-        valid_records = EXCLUDED.valid_records,
-        null_records = EXCLUDED.null_records,
+    ON CONFLICT (cml_id, user_id) DO UPDATE SET
+        total_records        = EXCLUDED.total_records,
+        valid_records        = EXCLUDED.valid_records,
+        null_records         = EXCLUDED.null_records,
         completeness_percent = EXCLUDED.completeness_percent,
-        min_rsl = EXCLUDED.min_rsl,
-        max_rsl = EXCLUDED.max_rsl,
-        mean_rsl = EXCLUDED.mean_rsl,
-        stddev_rsl = EXCLUDED.stddev_rsl,
-        last_rsl = EXCLUDED.last_rsl,
-        last_update = EXCLUDED.last_update;
+        min_rsl              = EXCLUDED.min_rsl,
+        max_rsl              = EXCLUDED.max_rsl,
+        mean_rsl             = EXCLUDED.mean_rsl,
+        stddev_rsl           = EXCLUDED.stddev_rsl,
+        last_rsl             = EXCLUDED.last_rsl,
+        last_update          = EXCLUDED.last_update;
 END;
 $$ LANGUAGE plpgsql;
 
 SELECT create_hypertable('cml_data', 'time');
 
+-- Per-user lookup indexes.
+CREATE INDEX idx_cml_data_user_id     ON cml_data     (user_id);
+CREATE INDEX idx_cml_metadata_user_id ON cml_metadata  (user_id);
+
 -- Index is created by the archive_loader service after bulk data load (faster COPY).
 -- If no archive data is loaded, create it manually:
 -- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC);
@@ -93,6 +122,7 @@ CREATE MATERIALIZED VIEW cml_data_1h
 WITH (timescaledb.continuous) AS
 SELECT
     time_bucket('1 hour', time) AS bucket,
+    user_id,
     cml_id,
     sublink_id,
     MIN(rsl)  AS rsl_min,
@@ -102,7 +132,7 @@ SELECT
     MAX(tsl)  AS tsl_max,
     AVG(tsl)  AS tsl_avg
 FROM cml_data
-GROUP BY bucket, cml_id, sublink_id
+GROUP BY bucket, user_id, cml_id, sublink_id
 WITH NO DATA;
 
 -- Automatically refresh every hour, covering up to 2 days of history.
@@ -130,7 +160,7 @@ SELECT add_continuous_aggregate_policy('cml_data_1h',
 -- ---------------------------------------------------------------------------
 ALTER TABLE cml_data SET (
     timescaledb.compress,
-    timescaledb.compress_segmentby = 'cml_id, sublink_id',
+    timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id',
     timescaledb.compress_orderby   = 'time DESC'
 );
 

diff --git a/database/migrations/001_add_user_id.sql b/database/migrations/001_add_user_id.sql
@@ -0,0 +1,123 @@
+-- Migration 001: Add user_id columns to all tables
+--
+-- Part of PR feat/db-add-user-id (multi-user RLS support, Phase 1).
+-- Backward-compatible: existing parser continues to work unchanged.
+--   - DEFAULT 'user1' on each user_id column means un-modified INSERT
+--     statements (no user_id supplied) keep writing to the single user.
+--   - UNIQUE (cml_id, sublink_id) on cml_metadata keeps the parser's
+--     ON CONFLICT (cml_id, sublink_id) clause valid until PR3 updates it.
+--   - update_cml_stats gains a second parameter (DEFAULT 'user1') so the
+--     existing single-argument call site still compiles and runs correctly.
+--
+-- Apply this to an existing deployment with:
+--   docker compose exec database psql -U myuser -d mydatabase \
+--     -f /path/to/001_add_user_id.sql
+--
+-- Rollback: restore from the backup taken before running this file.
+
+-- ---------------------------------------------------------------------------
+-- Step 1: Add nullable user_id column to each table, back-fill existing rows
+--         to 'user1', then tighten to NOT NULL with a DEFAULT for new rows.
+-- ---------------------------------------------------------------------------
+
+ALTER TABLE cml_data    ADD COLUMN IF NOT EXISTS user_id TEXT;
+ALTER TABLE cml_metadata ADD COLUMN IF NOT EXISTS user_id TEXT;
+ALTER TABLE cml_stats    ADD COLUMN IF NOT EXISTS user_id TEXT;
+
+UPDATE cml_data    SET user_id = 'user1' WHERE user_id IS NULL;
+UPDATE cml_metadata SET user_id = 'user1' WHERE user_id IS NULL;
+UPDATE cml_stats    SET user_id = 'user1' WHERE user_id IS NULL;
+
+ALTER TABLE cml_data    ALTER COLUMN user_id SET NOT NULL;
+ALTER TABLE cml_data    ALTER COLUMN user_id SET DEFAULT 'user1';
+ALTER TABLE cml_metadata ALTER COLUMN user_id SET NOT NULL;
+ALTER TABLE cml_metadata ALTER COLUMN user_id SET DEFAULT 'user1';
+ALTER TABLE cml_stats    ALTER COLUMN user_id SET NOT NULL;
+ALTER TABLE cml_stats    ALTER COLUMN user_id SET DEFAULT 'user1';
+
+-- ---------------------------------------------------------------------------
+-- Step 2: Update primary keys on cml_metadata and cml_stats to be
+--         (…, user_id).  A UNIQUE (cml_id, sublink_id) index is kept on
+--         cml_metadata so the parser's existing ON CONFLICT clause stays
+--         valid; it will be dropped in PR3 once the parser is updated.
+-- ---------------------------------------------------------------------------
+
+ALTER TABLE cml_metadata DROP CONSTRAINT IF EXISTS cml_metadata_pkey;
+ALTER TABLE cml_metadata ADD PRIMARY KEY (cml_id, sublink_id, user_id);
+-- Keep for backward compat with parser's ON CONFLICT (cml_id, sublink_id).
+-- TODO: drop this constraint in PR3 (feat/parser-user-id).
+ALTER TABLE cml_metadata
+    ADD CONSTRAINT cml_metadata_sublink_unique UNIQUE (cml_id, sublink_id);
+
+ALTER TABLE cml_stats DROP CONSTRAINT IF EXISTS cml_stats_pkey;
+ALTER TABLE cml_stats ADD PRIMARY KEY (cml_id, user_id);
+
+-- ---------------------------------------------------------------------------
+-- Step 3: Add indexes for per-user query performance.
+-- ---------------------------------------------------------------------------
+
+CREATE INDEX IF NOT EXISTS idx_cml_data_user_id     ON cml_data     (user_id);
+CREATE INDEX IF NOT EXISTS idx_cml_metadata_user_id ON cml_metadata  (user_id);
+
+-- ---------------------------------------------------------------------------
+-- Step 4: Replace update_cml_stats with a version that accepts an optional
+--         target_user_id (DEFAULT 'user1') for backward compatibility.
+-- ---------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION update_cml_stats(
+    target_cml_id  TEXT,
+    target_user_id TEXT DEFAULT 'user1'
+) RETURNS VOID AS $$
+BEGIN
+    INSERT INTO cml_stats (
+        cml_id,
+        user_id,
+        total_records,
+        valid_records,
+        null_records,
+        completeness_percent,
+        min_rsl,
+        max_rsl,
+        mean_rsl,
+        stddev_rsl,
+        last_rsl,
+        last_update
+    )
+    SELECT
+        cd.cml_id::text,
+        target_user_id,
+        COUNT(*)                                                              AS total_records,
+        COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END)                        AS valid_records,
+        COUNT(CASE WHEN cd.rsl IS NULL     THEN 1 END)                        AS null_records,
+        ROUND(
+            100.0 * COUNT(CASE WHEN cd.rsl IS NOT NULL THEN 1 END) / COUNT(*),
+            2
+        )                                                                     AS completeness_percent,
+        MIN(cd.rsl)                                                           AS min_rsl,
+        MAX(cd.rsl)                                                           AS max_rsl,
+        ROUND(AVG(cd.rsl)::numeric,    2)                                     AS mean_rsl,
+        ROUND(STDDEV(cd.rsl)::numeric, 2)                                     AS stddev_rsl,
+        (
+            SELECT rsl FROM cml_data
+            WHERE  cml_id  = cd.cml_id
+              AND  user_id = target_user_id
+            ORDER  BY time DESC LIMIT 1
+        )                                                                     AS last_rsl,
+        NOW()
+    FROM cml_data cd
+    WHERE cd.cml_id  = target_cml_id
+      AND cd.user_id = target_user_id
+    GROUP BY cd.cml_id
+    ON CONFLICT (cml_id, user_id) DO UPDATE SET
+        total_records        = EXCLUDED.total_records,
+        valid_records        = EXCLUDED.valid_records,
+        null_records         = EXCLUDED.null_records,
+        completeness_percent = EXCLUDED.completeness_percent,
+        min_rsl              = EXCLUDED.min_rsl,
+        max_rsl              = EXCLUDED.max_rsl,
+        mean_rsl             = EXCLUDED.mean_rsl,
+        stddev_rsl           = EXCLUDED.stddev_rsl,
+        last_rsl             = EXCLUDED.last_rsl,
+        last_update          = EXCLUDED.last_update;
+END;
+$$ LANGUAGE plpgsql;
diff --git a/database/migrations/002_update_compression_segmentby.sql b/database/migrations/002_update_compression_segmentby.sql
@@ -0,0 +1,38 @@
+-- Migration 002: Update cml_data compression segmentby to include user_id
+--
+-- Part of PR feat/db-add-user-id.
+-- Run this AFTER 001_add_user_id.sql.
+--
+-- Adds user_id to compress_segmentby so that per-user range scans
+-- decompress only the relevant segment instead of the full chunk.
+-- The decompress → alter → recompress cycle is non-destructive; no data
+-- is lost if the process is interrupted (TimescaleDB keeps the original
+-- uncompressed chunks until recompression succeeds).
+--
+-- Apply with:
+--   docker compose exec database psql -U myuser -d mydatabase \
+--     -f /path/to/002_update_compression_segmentby.sql
+
+-- Step 1: Decompress all currently-compressed chunks so that the
+--         compress_segmentby setting can be changed.
+--         Uses timescaledb_information.chunks (works across all TimescaleDB versions).
+SELECT decompress_chunk(
+    format('%I.%I', chunk_schema, chunk_name)::regclass
+)
+FROM timescaledb_information.chunks
+WHERE hypertable_name = 'cml_data'
+  AND is_compressed = true;
+
+-- Step 2: Update the compression settings to include user_id as the
+--         leading segment key.  user_id first ensures that a query for a
+--         single user decompresses only their segments.
+ALTER TABLE cml_data SET (
+    timescaledb.compress,
+    timescaledb.compress_segmentby = 'user_id, cml_id, sublink_id',
+    timescaledb.compress_orderby   = 'time DESC'
+);
+
+-- Step 3: Re-compress chunks that were already old enough for compression
+--         (i.e. older than 7 days per the existing policy).
+SELECT compress_chunk(c)
+FROM   show_chunks('cml_data', older_than => INTERVAL '7 days') c;