diff --git a/database/init.sql b/database/init.sql index ce79455..0a05351 100644 --- a/database/init.sql +++ b/database/init.sql @@ -112,4 +112,26 @@ SELECT add_continuous_aggregate_policy('cml_data_1h', start_offset => INTERVAL '2 days', end_offset => INTERVAL '1 hour', schedule_interval => INTERVAL '1 hour' -); \ No newline at end of file +); + +-- --------------------------------------------------------------------------- +-- Compression for cml_data chunks older than 7 days. +-- +-- compress_segmentby: each compressed segment contains one (cml_id, sublink_id) +-- pair, so a query filtered to a single CML decompresses only ~1/728th of a +-- chunk — not the whole thing. +-- compress_orderby: matches the query pattern (time range scans), allowing +-- skip-scan decompression for narrow time windows within a segment. +-- +-- At ~10-20x compression ratio, the last month of data fits in shared_buffers +-- after a single cache warm-up, regardless of how many new streams are added. +-- The current uncompressed week chunk is left untouched so real-time ingestion +-- and detail-view queries on recent data have no decompression overhead. +-- --------------------------------------------------------------------------- +ALTER TABLE cml_data SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'cml_id, sublink_id', + timescaledb.compress_orderby = 'time DESC' +); + +SELECT add_compression_policy('cml_data', INTERVAL '7 days'); \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index e844737..e4a779f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,6 +73,18 @@ services: build: ./database ports: - "5432:5432" + # Tune PostgreSQL memory for an 8 GB VM. + # shared_buffers: keep recently-used chunks in RAM (default is only 128 MB). + # effective_cache_size: hints to the planner how much OS page cache is available. + # work_mem: memory per sort/hash operation; speeds up ORDER BY on large result sets. + # random_page_cost: tell the planner data is effectively cached, prefer index scans. + command: > + postgres + -c shared_buffers=2GB + -c effective_cache_size=4GB + -c work_mem=64MB + -c maintenance_work_mem=256MB + -c random_page_cost=1.1 healthcheck: test: ["CMD-SHELL", "pg_isready -U myuser -d mydatabase"] interval: 5s diff --git a/grafana/provisioning/dashboards/definitions/cml-realtime.json b/grafana/provisioning/dashboards/definitions/cml-realtime.json index 831d2ca..dff6b04 100644 --- a/grafana/provisioning/dashboards/definitions/cml-realtime.json +++ b/grafana/provisioning/dashboards/definitions/cml-realtime.json @@ -105,7 +105,7 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", + "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", @@ -373,7 +373,7 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' min' AS metric,\n rsl_min AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' min' AS metric,\n rsl_min AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) > 259200\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", "refId": "A" }, { @@ -383,7 +383,7 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' max' AS metric,\n rsl_max AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' max' AS metric,\n rsl_max AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) > 259200\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", "refId": "B" }, { @@ -403,8 +403,18 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS metric,\n rsl AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND (\n ('${interval}' = 'auto' AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) <= 259200)\n OR '${interval}' = 'raw'\n )\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "rawSql": "WITH bucketed AS (\n SELECT\n time_bucket('$__interval', time) AS bucket,\n sublink_id,\n MIN(rsl) AS rsl_min,\n MAX(rsl) AS rsl_max,\n AVG(rsl) AS rsl_avg\n FROM cml_data\n WHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) <= 259200\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\n GROUP BY 1, 2\n)\nSELECT bucket AS \"time\", sublink_id || ' min' AS metric, rsl_min AS value FROM bucketed\nUNION ALL\nSELECT bucket AS \"time\", sublink_id || ' max' AS metric, rsl_max AS value FROM bucketed\nUNION ALL\nSELECT bucket AS \"time\", sublink_id || ' avg' AS metric, rsl_avg AS value FROM bucketed\nORDER BY 1 ASC", "refId": "D" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS metric,\n rsl AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'raw'\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "E" } ], "title": "CML Time Series - Received Signal Level", @@ -708,7 +718,7 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' min' AS metric,\n tsl_min AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' min' AS metric,\n tsl_min AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) > 259200\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", "refId": "A" }, { @@ -718,7 +728,7 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' max' AS metric,\n tsl_max AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' max' AS metric,\n tsl_max AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) > 259200\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", "refId": "B" }, { @@ -738,8 +748,18 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS metric,\n tsl AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND (\n ('${interval}' = 'auto' AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) <= 259200)\n OR '${interval}' = 'raw'\n )\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "rawSql": "SELECT\n time_bucket('$__interval', time) AS \"time\",\n sublink_id AS metric,\n AVG(tsl) AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) <= 259200\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nGROUP BY 1, 2\nORDER BY 1 ASC", "refId": "D" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS metric,\n tsl AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'raw'\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "E" } ], "title": "CML Time Series - Transmitted Signal Level",