diff --git a/README.md b/README.md index e14c0ae..b9d6fad 100644 --- a/README.md +++ b/README.md @@ -109,3 +109,67 @@ COMPOSE_PROJECT_NAME=quantumleap COMPOSE_DOMAIN=quantumleap.srvitkiotlab.itkdev.dk COMPOSE_FILES=docker-compose.yml,docker-compose.prod.yml ``` + +## Test data + +``` shell +task test-data:load +``` + +## Data types + +QuantumLeap tries to guess data types, but sometimes guesses wrong and may need a little help, e.g. + +``` sql +# The first "temperature" data may have been integral. +ALTER TABLE "etrefrigerator-sensor" ALTER COLUMN temperature TYPE DOUBLE PRECISION; +ALTER TABLE "etrefrigerator-sensor" ALTER COLUMN battery TYPE DOUBLE PRECISION; +``` + + + + + + +* +* +* + + + +``` sql +quantumleap=> CREATE INDEX ON public."etrefrigerator-sensor" (department); +CREATE INDEX + +quantumleap=> EXPLAIN ANALYSE SELECT * FROM "etrefrigerator-sensor" WHERE department = 'test'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Index Scan using "etrefrigerator-sensor_department_idx" on "etrefrigerator-sensor" (cost=0.42..8.44 rows=1 width=322) (actual time=0.072..0.073 rows=0 loops=1) + Index Cond: (department = 'test'::text) + Planning Time: 0.623 ms + Execution Time: 0.142 ms +(4 rows) + +quantumleap=> DROP INDEX "etrefrigerator-sensor_department_idx"; +DROP INDEX + +quantumleap=> EXPLAIN ANALYSE SELECT * FROM "etrefrigerator-sensor" WHERE department = 'test'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------- + Gather (cost=1000.00..8122.64 rows=1 width=322) (actual time=32.369..36.594 rows=0 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Parallel Seq Scan on "etrefrigerator-sensor" (cost=0.00..7122.54 rows=1 width=322) (actual time=8.708..8.709 rows=0 loops=3) + Filter: (department = 'test'::text) + Rows Removed by Filter: 105123 + Planning Time: 0.263 ms + Execution Time: 36.625 ms +(8 rows) + +quantumleap=> SELECT * FROM pg_indexes WHERE tablename = 'etrefrigerator-sensor'; + schemaname | tablename | indexname | tablespace | indexdef +------------+-----------------------+--------------------------------------+------------+------------------------------------------------------------------------------------------------------------------------------- + public | etrefrigerator-sensor | etrefrigerator-sensor_time_index_idx | | CREATE INDEX "etrefrigerator-sensor_time_index_idx" ON public."etrefrigerator-sensor" USING btree (time_index DESC) + public | etrefrigerator-sensor | ix_etrefrigerator-sensor_eid_and_tx | | CREATE INDEX "ix_etrefrigerator-sensor_eid_and_tx" ON public."etrefrigerator-sensor" USING btree (entity_id, time_index DESC) +(2 rows) +``` diff --git a/Taskfile.yml b/Taskfile.yml index 1086be1..3d0bc0d 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -11,6 +11,11 @@ dotenv: vars: DOCKER_COMPOSE: '{{.TASK_DOCKER_COMPOSE | default "docker compose"}}' +includes: + test-data: + taskfile: ./test-data/Taskfile.yml + dir: ./test-data + tasks: grafana:reset-database: desc: Reset Grafana database @@ -47,3 +52,18 @@ tasks: compose: cmds: - '{{.DOCKER_COMPOSE}} {{.TASK_ARGS}} {{.CLI_ARGS}}' + + default: + silent: true + cmd: task --list + + coding-standards:apply: + cmds: + # https://docs.astral.sh/ruff/installation/ + - docker run -v .:/io --rm ghcr.io/astral-sh/ruff check --fix + + coding-standards:check: + cmds: + - task: coding-standards:apply + # https://docs.astral.sh/ruff/installation/ + - docker run -v .:/io --rm ghcr.io/astral-sh/ruff check diff --git a/test-data/Taskfile.yml b/test-data/Taskfile.yml new file mode 100644 index 0000000..ed86ffc --- /dev/null +++ b/test-data/Taskfile.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://taskfile.dev/schema.json + +version: '3' + +vars: + TEST_DATA_SERVICE_NAME: test-data-timescale + +tasks: + load: + desc: Load test data + prompt: Really load test data (and reset the current test data)? + cmds: + - task: :compose + vars: + TASK_ARGS: rm --force --stop {{.TEST_DATA_SERVICE_NAME}} + - task: :compose + vars: + TASK_ARGS: up {{.TEST_DATA_SERVICE_NAME}} --detach --wait + - task: info + + info: + desc: Show test data info + cmds: + - task: sql:query + vars: + TASK_ARGS: '\dt' + - task: sql:query + vars: + TASK_ARGS: SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM "etrefrigerator-sensor" + + sql:query: + desc: "Run SQL query on test database, example: task {{.TASK}} -- 'SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM \"etrefrigerator-sensor\"'" + cmds: + - task: :compose + vars: + TASK_ARGS: exec {{.TEST_DATA_SERVICE_NAME}} psql quantumleap quantumleap --command {{if .TASK_ARGS}}'{{.TASK_ARGS}}'{{end}} + + sql:cli: + desc: "Run interactive SQL cli on test database" + cmds: + - task: :compose + vars: + TASK_ARGS: exec {{.TEST_DATA_SERVICE_NAME}} psql quantumleap quantumleap diff --git a/test-data/docker-compose.yml b/test-data/docker-compose.yml new file mode 100644 index 0000000..5c9095e --- /dev/null +++ b/test-data/docker-compose.yml @@ -0,0 +1,16 @@ +services: + test-data-timescale: + image: timescale/timescaledb-ha:${TIMESCALE_VERSION:-pg17.9-ts2.25.2-oss} + volumes: + # https://www.w3tutorials.net/blog/how-to-create-user-database-in-script-for-docker-postgres/ + - ./test-data/initdb.d/:/docker-entrypoint-initdb.d/ + - ./test-data/:/test-data/ + environment: + - POSTGRES_PASSWORD=* + networks: + - quantumleap + healthcheck: + test: ["CMD-SHELL", "pg_isready -U quantumleap"] + interval: 10s + timeout: 5s + retries: 5 diff --git a/test-data/generate-data.py b/test-data/generate-data.py new file mode 100755 index 0000000..1739bc7 --- /dev/null +++ b/test-data/generate-data.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +# Note: This script is run inside the `test-data-timescale` service (unsing the +# timescale/timescaledb-ha image), i.e. we can only import the packages listed +# by +# +# task compose -- exec test-data-timescale pip list +# +# Test the script with an incantation like +# +# task compose -- exec test-data-timescale /test-data/generate-data.py + +import csv +import datetime as dt +import random +import sys +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('values', metavar='value', nargs='+', help='Examples: {time.isoformat()}, {random.randint(0, 100)}, round(random.uniform(0.00, 100.00), 2)') +parser.add_argument('--start-time', help='A time in ISO 8601 format, e.g "2021-01-01"') +parser.add_argument('--end-time', help='A time in ISO 8601 format, e.g "2021-01-01"') +parser.add_argument('--number-of-rows', type=int) +parser.add_argument('--interval', type=int, default=60*60, help='Interval in seconds') +parser.add_argument('--interval-wobble', type=int, default=0, help='If set, a random number between -interval-wobble and interval-wobble will be added to the time') +parser.add_argument('--random-seed', type=int, default=0, help='Seed for the random number generator') +parser.add_argument('--debug', action='store_true') +args = parser.parse_args() + +if args.random_seed != 0: + random.seed(args.random_seed) + +number_of_rows = args.number_of_rows +values = args.values +start_time = dt.datetime.fromisoformat(args.start_time).astimezone() if args.start_time is not None else None +end_time = dt.datetime.fromisoformat(args.end_time).astimezone() if args.end_time is not None else None +interval = args.interval +interval_wobble = args.interval_wobble + +if interval < 1: + raise ValueError("Interval must be greater than 0") + +if interval_wobble is not None and interval_wobble < 0: + raise ValueError("Interval wobble must be greater than 0") + +if number_of_rows is not None and number_of_rows < 0: + raise ValueError("Number of rows must be greater than 0") + +delta = dt.timedelta(seconds=interval) + +if start_time is None and end_time is None: + raise ValueError("A start time or end time must be specified") +elif start_time is None and number_of_rows is None: + raise ValueError("Number of rows must be specified along with start time") +elif end_time is None and number_of_rows is None: + raise ValueError("Number of rows must be specified along with end time") + +if start_time is not None and end_time is None and number_of_rows > 0: + end_time = start_time + number_of_rows * delta + +if start_time is None and end_time is not None and number_of_rows > 0: + start_time = end_time - number_of_rows * delta + +# https://stackoverflow.com/a/57597617 +def generate_values(values): + return map(lambda template: eval(f"f'{template}'"), values) + +writer = csv.writer(sys.stdout) +debug_writer = csv.writer(sys.stderr) + +time = start_time +index = 0 +while time < end_time: + time += delta + if interval_wobble is not None and interval_wobble > 0: + time += dt.timedelta(seconds=random.randint(-interval_wobble, interval_wobble)) + + writer.writerow(generate_values(values)) + if args.debug: + debug_writer.writerow(generate_values(values)) + + index += 1 diff --git a/test-data/initdb.d/.gitignore b/test-data/initdb.d/.gitignore new file mode 100644 index 0000000..afed073 --- /dev/null +++ b/test-data/initdb.d/.gitignore @@ -0,0 +1 @@ +*.csv diff --git a/test-data/initdb.d/01-init.sql b/test-data/initdb.d/01-init.sql new file mode 100644 index 0000000..43357ab --- /dev/null +++ b/test-data/initdb.d/01-init.sql @@ -0,0 +1,10 @@ +-- # @todo Rewrite to use environment variables (cf. https://stackoverflow.com/a/70976611) +CREATE ROLE quantumleap LOGIN PASSWORD '*'; +GRANT pg_execute_server_program TO quantumleap; + +CREATE DATABASE quantumleap OWNER quantumleap ENCODING 'UTF8'; + +\connect quantumleap + +CREATE EXTENSION IF NOT EXISTS postgis CASCADE; +CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE; diff --git a/test-data/initdb.d/02-load-data.sql b/test-data/initdb.d/02-load-data.sql new file mode 100644 index 0000000..351db45 --- /dev/null +++ b/test-data/initdb.d/02-load-data.sql @@ -0,0 +1,147 @@ +-- pg_dump --schema-only --schema="public" --table=etrefrigerator-sensor quantumleap + +\connect quantumleap + +-- +-- Name: etrefrigerator-sensor; Type: TABLE; Schema: public; Owner: quantumleap +-- + +CREATE TABLE public."etrefrigerator-sensor" ( + entity_id text, + entity_type text, + time_index timestamp with time zone NOT NULL, + fiware_servicepath text, + __original_ngsi_entity__ jsonb, + instanceid text, + appliance text, + battery bigint, + department text, + floor text, + humidity double precision, + name text, + room text, + temperature double precision +); + + +ALTER TABLE public."etrefrigerator-sensor" OWNER TO quantumleap; + + + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view#4-adjust-wal-settings-for-loading +-- Increase checkpoint distance (apply to session or system) +-- SET checkpoint_timeout = '30min'; +-- SET max_wal_size = '10GB'; + +-- Reduce WAL level for this session +SET synchronous_commit = off; + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view#1-drop-indexes-before-loading +-- Load data before adding indexes! + +COPY "etrefrigerator-sensor" ( + entity_id, + entity_type, + appliance, + name, + room, + department, + floor, + time_index, + battery, + humidity, + temperature +) +FROM PROGRAM 'python3 /test-data/generate-data.py \ + "refrigerator-sensor" \ + "refrigerator-sensor:00000000-Milesight" \ + "Fryser" \ + "" \ + "" \ + "" \ + "" \ + "{time.isoformat()}" \ + "{random.randint(0, 100)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + --start-time "2025-01-01" --end-time "2027-01-01" --interval 600 --interval-wobble 30 +' WITH (FORMAT csv, LOG_VERBOSITY verbose); + +COPY "etrefrigerator-sensor" ( + entity_id, + entity_type, + appliance, + name, + room, + department, + floor, + time_index, + battery, + humidity, + temperature +) +FROM PROGRAM 'python3 /test-data/generate-data.py \ + "refrigerator-sensor" \ + "refrigerator-sensor:11111111-Milesight" \ + "Fryser" \ + "" \ + "" \ + "" \ + "" \ + "{time.isoformat()}" \ + "{random.randint(0, 100)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + --start-time "2025-01-01" --end-time "2027-01-01" --interval 600 --interval-wobble 30 +' WITH (FORMAT csv, LOG_VERBOSITY verbose); + +COPY "etrefrigerator-sensor" ( + entity_id, + entity_type, + appliance, + name, + room, + department, + floor, + time_index, + battery, + humidity, + temperature +) +FROM PROGRAM 'python3 /test-data/generate-data.py \ + "refrigerator-sensor" \ + "refrigerator-sensor:22222222-Milesight" \ + "Fryser" \ + "" \ + "" \ + "" \ + "" \ + "{time.isoformat()}" \ + "{random.randint(0, 100)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + --start-time "2025-01-01" --end-time "2027-01-01" --interval 600 --interval-wobble 30 +' WITH (FORMAT csv, LOG_VERBOSITY verbose); + + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view#4-adjust-wal-settings-for-loading +-- Reset to defaults +RESET synchronous_commit; + +-- +-- Name: etrefrigerator-sensor_time_index_idx; Type: INDEX; Schema: public; Owner: quantumleap +-- + +CREATE INDEX "etrefrigerator-sensor_time_index_idx" ON public."etrefrigerator-sensor" USING btree (time_index DESC); + + +-- +-- Name: ix_etrefrigerator-sensor_eid_and_tx; Type: INDEX; Schema: public; Owner: quantumleap +-- + +CREATE INDEX "ix_etrefrigerator-sensor_eid_and_tx" ON public."etrefrigerator-sensor" USING btree (entity_id, time_index DESC); + + +SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM "etrefrigerator-sensor";