From deaf5e15ee0c19b4ecbbab1b7aa1b06b4e5ff6b4 Mon Sep 17 00:00:00 2001 From: Mikkel Ricky Date: Thu, 26 Mar 2026 10:23:58 +0100 Subject: [PATCH 1/3] Added test data --- README.md | 6 +++ Taskfile.yml | 9 ++++ test-data/Taskfile.yml | 38 +++++++++++++++ test-data/docker-compose.yml | 17 +++++++ test-data/generate-etrefrigerator-sensor.py | 53 +++++++++++++++++++++ test-data/initdb.d/.gitignore | 1 + test-data/initdb.d/01-init.sql | 9 ++++ test-data/initdb.d/02-load-data.sql | 46 ++++++++++++++++++ 8 files changed, 179 insertions(+) create mode 100644 test-data/Taskfile.yml create mode 100644 test-data/docker-compose.yml create mode 100644 test-data/generate-etrefrigerator-sensor.py create mode 100644 test-data/initdb.d/.gitignore create mode 100644 test-data/initdb.d/01-init.sql create mode 100644 test-data/initdb.d/02-load-data.sql diff --git a/README.md b/README.md index e14c0ae..0cced6e 100644 --- a/README.md +++ b/README.md @@ -109,3 +109,9 @@ COMPOSE_PROJECT_NAME=quantumleap COMPOSE_DOMAIN=quantumleap.srvitkiotlab.itkdev.dk COMPOSE_FILES=docker-compose.yml,docker-compose.prod.yml ``` + +## Test data + +``` shell +task test-data:load +``` diff --git a/Taskfile.yml b/Taskfile.yml index 1086be1..62aa621 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -11,6 +11,11 @@ dotenv: vars: DOCKER_COMPOSE: '{{.TASK_DOCKER_COMPOSE | default "docker compose"}}' +includes: + test-data: + taskfile: ./test-data/Taskfile.yml + dir: ./test-data + tasks: grafana:reset-database: desc: Reset Grafana database @@ -47,3 +52,7 @@ tasks: compose: cmds: - '{{.DOCKER_COMPOSE}} {{.TASK_ARGS}} {{.CLI_ARGS}}' + + default: + silent: true + cmd: task --list diff --git a/test-data/Taskfile.yml b/test-data/Taskfile.yml new file mode 100644 index 0000000..370ecbf --- /dev/null +++ b/test-data/Taskfile.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://taskfile.dev/schema.json + +version: '3' + +tasks: + load: + desc: Load test data + prompt: Really load test data (and reset the current test data)? + cmds: + - task: :compose + vars: + TASK_ARGS: rm --force --stop test-data-timescale + - task: generate-data:etrefrigerator-sensor + - task: :compose + vars: + TASK_ARGS: up test-data-timescale --detach --wait + - task: info + + info: + desc: Show test data info + cmds: + - task: :compose + vars: + TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command '\dt' + - task: :compose + vars: + TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command 'SELECT COUNT(*) FROM "etrefrigerator-sensor"' + + generate-data:etrefrigerator-sensor: + desc: Generate etrefrigerator-sensor.csv + cmds: + - task: python + vars: + TASK_ARGS: generate-etrefrigerator-sensor.py + + python: + internal: true + cmd: docker run --rm --volume $PWD:/app --workdir /app python:3 python {{.TASK_ARGS}} diff --git a/test-data/docker-compose.yml b/test-data/docker-compose.yml new file mode 100644 index 0000000..1a2519f --- /dev/null +++ b/test-data/docker-compose.yml @@ -0,0 +1,17 @@ +services: + test-data-timescale: + image: timescale/timescaledb-ha:${TIMESCALE_VERSION:-pg17.9-ts2.25.2-oss} + volumes: + # https://www.w3tutorials.net/blog/how-to-create-user-database-in-script-for-docker-postgres/ + - ./test-data/initdb.d/:/docker-entrypoint-initdb.d/ + # https://github.com/timescale/timescaledb-docker-ha/blob/master/Dockerfile + # - ./.docker/data/testdata-timescale:/home/postgres/pgdata/data + environment: + - POSTGRES_PASSWORD=* + networks: + - quantumleap + healthcheck: + test: ["CMD-SHELL", "pg_isready -U quantumleap"] + interval: 10s + timeout: 5s + retries: 5 diff --git a/test-data/generate-etrefrigerator-sensor.py b/test-data/generate-etrefrigerator-sensor.py new file mode 100644 index 0000000..ffc8dd3 --- /dev/null +++ b/test-data/generate-etrefrigerator-sensor.py @@ -0,0 +1,53 @@ +import csv +import datetime as dt +import random +import uuid + +random.seed(19750523) + +number_of_rows=1_000_000 + +filename='initdb.d/etrefrigerator-sensor.csv' + +entity_type = 'refrigerator-sensor' +entity_id = 'refrigerator-sensor:5e318760-Milesight' +fiware_servicepath = '' +instanceid = f'urn:ngsi-ld:{uuid.UUID(int=random.getrandbits(128))}' +appliance = 'Fryser' +department = 'Department' +floor = 'floor' +name = 'name' +room = 'room' + +with open(filename, 'w', newline='') as csvfile: + writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + + time = dt.datetime(1990, 1, 1).astimezone() + + for i in range(number_of_rows): + time_index = 0 + __original_ngsi_entity__ = None # '{}' + battery = random.randint(0, 100) + humidity = round(random.uniform(0.00, 100.00), 2) + temperature = round(random.uniform(-10, 20), 2) + + writer.writerow([ + entity_id, + entity_type, + time.isoformat(), + fiware_servicepath, + __original_ngsi_entity__, + instanceid, + appliance, + battery, + department, + floor, + humidity, + name, + room, + temperature, + ]) + + time += dt.timedelta(seconds=1) + +print(f'{number_of_rows} rows written to file {filename}.') diff --git a/test-data/initdb.d/.gitignore b/test-data/initdb.d/.gitignore new file mode 100644 index 0000000..afed073 --- /dev/null +++ b/test-data/initdb.d/.gitignore @@ -0,0 +1 @@ +*.csv diff --git a/test-data/initdb.d/01-init.sql b/test-data/initdb.d/01-init.sql new file mode 100644 index 0000000..0e25c81 --- /dev/null +++ b/test-data/initdb.d/01-init.sql @@ -0,0 +1,9 @@ +-- # @todo Reqrite to use environment variables (cf. https://stackoverflow.com/a/70976611) +CREATE ROLE quantumleap LOGIN PASSWORD '*'; + +CREATE DATABASE quantumleap OWNER quantumleap ENCODING 'UTF8'; + +\connect quantumleap + +CREATE EXTENSION IF NOT EXISTS postgis CASCADE; +CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE; diff --git a/test-data/initdb.d/02-load-data.sql b/test-data/initdb.d/02-load-data.sql new file mode 100644 index 0000000..0716b19 --- /dev/null +++ b/test-data/initdb.d/02-load-data.sql @@ -0,0 +1,46 @@ +-- pg_dump --schema-only --schema="public" --table=etrefrigerator-sensor quantumleap + +\connect quantumleap + +-- +-- Name: etrefrigerator-sensor; Type: TABLE; Schema: public; Owner: quantumleap +-- + +CREATE TABLE public."etrefrigerator-sensor" ( + entity_id text, + entity_type text, + time_index timestamp with time zone NOT NULL, + fiware_servicepath text, + __original_ngsi_entity__ jsonb, + instanceid text, + appliance text, + battery bigint, + department text, + floor text, + humidity double precision, + name text, + room text, + temperature double precision +); + + +ALTER TABLE public."etrefrigerator-sensor" OWNER TO quantumleap; + +-- +-- Name: etrefrigerator-sensor_time_index_idx; Type: INDEX; Schema: public; Owner: quantumleap +-- + +CREATE INDEX "etrefrigerator-sensor_time_index_idx" ON public."etrefrigerator-sensor" USING btree (time_index DESC); + + +-- +-- Name: ix_etrefrigerator-sensor_eid_and_tx; Type: INDEX; Schema: public; Owner: quantumleap +-- + +CREATE INDEX "ix_etrefrigerator-sensor_eid_and_tx" ON public."etrefrigerator-sensor" USING btree (entity_id, time_index DESC); + + +-- docker run --rm --volume $PWD:/app --workdir /app python:3 python generate-etrefrigerator-sensor.py + +COPY public."etrefrigerator-sensor" (entity_id, entity_type, time_index, fiware_servicepath, __original_ngsi_entity__, instanceid, appliance, battery, department, floor, humidity, name, room, temperature) +FROM '/docker-entrypoint-initdb.d/etrefrigerator-sensor.csv' DELIMITER ',' CSV; From 5124ae3b52d33b55aa5b9349696d27a89330f2f4 Mon Sep 17 00:00:00 2001 From: Mikkel Ricky Date: Thu, 26 Mar 2026 12:48:38 +0100 Subject: [PATCH 2/3] Cleaned up test data --- test-data/Taskfile.yml | 3 +- test-data/docker-compose.yml | 3 +- test-data/generate-etrefrigerator-sensor.py | 64 ++++++++++----------- test-data/initdb.d/01-init.sql | 3 +- test-data/initdb.d/02-load-data.sql | 2 +- 5 files changed, 37 insertions(+), 38 deletions(-) diff --git a/test-data/Taskfile.yml b/test-data/Taskfile.yml index 370ecbf..06bc58b 100644 --- a/test-data/Taskfile.yml +++ b/test-data/Taskfile.yml @@ -10,7 +10,6 @@ tasks: - task: :compose vars: TASK_ARGS: rm --force --stop test-data-timescale - - task: generate-data:etrefrigerator-sensor - task: :compose vars: TASK_ARGS: up test-data-timescale --detach --wait @@ -24,7 +23,7 @@ tasks: TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command '\dt' - task: :compose vars: - TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command 'SELECT COUNT(*) FROM "etrefrigerator-sensor"' + TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command 'SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM "etrefrigerator-sensor"' generate-data:etrefrigerator-sensor: desc: Generate etrefrigerator-sensor.csv diff --git a/test-data/docker-compose.yml b/test-data/docker-compose.yml index 1a2519f..5c9095e 100644 --- a/test-data/docker-compose.yml +++ b/test-data/docker-compose.yml @@ -4,8 +4,7 @@ services: volumes: # https://www.w3tutorials.net/blog/how-to-create-user-database-in-script-for-docker-postgres/ - ./test-data/initdb.d/:/docker-entrypoint-initdb.d/ - # https://github.com/timescale/timescaledb-docker-ha/blob/master/Dockerfile - # - ./.docker/data/testdata-timescale:/home/postgres/pgdata/data + - ./test-data/:/test-data/ environment: - POSTGRES_PASSWORD=* networks: diff --git a/test-data/generate-etrefrigerator-sensor.py b/test-data/generate-etrefrigerator-sensor.py index ffc8dd3..e360b82 100644 --- a/test-data/generate-etrefrigerator-sensor.py +++ b/test-data/generate-etrefrigerator-sensor.py @@ -1,10 +1,12 @@ import csv import datetime as dt import random +import sys import uuid random.seed(19750523) +number_of_rows=100_000_000 number_of_rows=1_000_000 filename='initdb.d/etrefrigerator-sensor.csv' @@ -19,35 +21,33 @@ name = 'name' room = 'room' -with open(filename, 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) - - time = dt.datetime(1990, 1, 1).astimezone() - - for i in range(number_of_rows): - time_index = 0 - __original_ngsi_entity__ = None # '{}' - battery = random.randint(0, 100) - humidity = round(random.uniform(0.00, 100.00), 2) - temperature = round(random.uniform(-10, 20), 2) - - writer.writerow([ - entity_id, - entity_type, - time.isoformat(), - fiware_servicepath, - __original_ngsi_entity__, - instanceid, - appliance, - battery, - department, - floor, - humidity, - name, - room, - temperature, - ]) - - time += dt.timedelta(seconds=1) - -print(f'{number_of_rows} rows written to file {filename}.') +writer = csv.writer(sys.stdout) +time = dt.datetime(2025, 1, 1).astimezone() + +for i in range(number_of_rows): + time_index = 0 + __original_ngsi_entity__ = None # '{}' + battery = random.randint(0, 100) + humidity = round(random.uniform(0.00, 100.00), 2) + temperature = round(random.uniform(-10, 20), 2) + + writer.writerow([ + entity_id, + entity_type, + time.isoformat(), + fiware_servicepath, + __original_ngsi_entity__, + instanceid, + appliance, + battery, + department, + floor, + humidity, + name, + room, + temperature, + ]) + + time += dt.timedelta(minutes=10) + +# print(f'{number_of_rows} rows written to file {filename}.') diff --git a/test-data/initdb.d/01-init.sql b/test-data/initdb.d/01-init.sql index 0e25c81..43357ab 100644 --- a/test-data/initdb.d/01-init.sql +++ b/test-data/initdb.d/01-init.sql @@ -1,5 +1,6 @@ --- # @todo Reqrite to use environment variables (cf. https://stackoverflow.com/a/70976611) +-- # @todo Rewrite to use environment variables (cf. https://stackoverflow.com/a/70976611) CREATE ROLE quantumleap LOGIN PASSWORD '*'; +GRANT pg_execute_server_program TO quantumleap; CREATE DATABASE quantumleap OWNER quantumleap ENCODING 'UTF8'; diff --git a/test-data/initdb.d/02-load-data.sql b/test-data/initdb.d/02-load-data.sql index 0716b19..806311a 100644 --- a/test-data/initdb.d/02-load-data.sql +++ b/test-data/initdb.d/02-load-data.sql @@ -43,4 +43,4 @@ CREATE INDEX "ix_etrefrigerator-sensor_eid_and_tx" ON public."etrefrigerator-sen -- docker run --rm --volume $PWD:/app --workdir /app python:3 python generate-etrefrigerator-sensor.py COPY public."etrefrigerator-sensor" (entity_id, entity_type, time_index, fiware_servicepath, __original_ngsi_entity__, instanceid, appliance, battery, department, floor, humidity, name, room, temperature) -FROM '/docker-entrypoint-initdb.d/etrefrigerator-sensor.csv' DELIMITER ',' CSV; +FROM PROGRAM 'python3 /test-data/generate-etrefrigerator-sensor.py' CSV; From 8d37312792b7c20f9567f14dc19ad4167d20daa0 Mon Sep 17 00:00:00 2001 From: Mikkel Ricky Date: Fri, 17 Apr 2026 14:19:08 +0200 Subject: [PATCH 3/3] Test data --- README.md | 58 +++++++++++ Taskfile.yml | 11 ++ test-data/Taskfile.yml | 32 +++--- test-data/generate-data.py | 82 +++++++++++++++ test-data/generate-etrefrigerator-sensor.py | 53 ---------- test-data/initdb.d/02-load-data.sql | 109 +++++++++++++++++++- 6 files changed, 275 insertions(+), 70 deletions(-) create mode 100755 test-data/generate-data.py delete mode 100644 test-data/generate-etrefrigerator-sensor.py diff --git a/README.md b/README.md index 0cced6e..b9d6fad 100644 --- a/README.md +++ b/README.md @@ -115,3 +115,61 @@ COMPOSE_FILES=docker-compose.yml,docker-compose.prod.yml ``` shell task test-data:load ``` + +## Data types + +QuantumLeap tries to guess data types, but sometimes guesses wrong and may need a little help, e.g. + +``` sql +# The first "temperature" data may have been integral. +ALTER TABLE "etrefrigerator-sensor" ALTER COLUMN temperature TYPE DOUBLE PRECISION; +ALTER TABLE "etrefrigerator-sensor" ALTER COLUMN battery TYPE DOUBLE PRECISION; +``` + + + + + + +* +* +* + + + +``` sql +quantumleap=> CREATE INDEX ON public."etrefrigerator-sensor" (department); +CREATE INDEX + +quantumleap=> EXPLAIN ANALYSE SELECT * FROM "etrefrigerator-sensor" WHERE department = 'test'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Index Scan using "etrefrigerator-sensor_department_idx" on "etrefrigerator-sensor" (cost=0.42..8.44 rows=1 width=322) (actual time=0.072..0.073 rows=0 loops=1) + Index Cond: (department = 'test'::text) + Planning Time: 0.623 ms + Execution Time: 0.142 ms +(4 rows) + +quantumleap=> DROP INDEX "etrefrigerator-sensor_department_idx"; +DROP INDEX + +quantumleap=> EXPLAIN ANALYSE SELECT * FROM "etrefrigerator-sensor" WHERE department = 'test'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------- + Gather (cost=1000.00..8122.64 rows=1 width=322) (actual time=32.369..36.594 rows=0 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Parallel Seq Scan on "etrefrigerator-sensor" (cost=0.00..7122.54 rows=1 width=322) (actual time=8.708..8.709 rows=0 loops=3) + Filter: (department = 'test'::text) + Rows Removed by Filter: 105123 + Planning Time: 0.263 ms + Execution Time: 36.625 ms +(8 rows) + +quantumleap=> SELECT * FROM pg_indexes WHERE tablename = 'etrefrigerator-sensor'; + schemaname | tablename | indexname | tablespace | indexdef +------------+-----------------------+--------------------------------------+------------+------------------------------------------------------------------------------------------------------------------------------- + public | etrefrigerator-sensor | etrefrigerator-sensor_time_index_idx | | CREATE INDEX "etrefrigerator-sensor_time_index_idx" ON public."etrefrigerator-sensor" USING btree (time_index DESC) + public | etrefrigerator-sensor | ix_etrefrigerator-sensor_eid_and_tx | | CREATE INDEX "ix_etrefrigerator-sensor_eid_and_tx" ON public."etrefrigerator-sensor" USING btree (entity_id, time_index DESC) +(2 rows) +``` diff --git a/Taskfile.yml b/Taskfile.yml index 62aa621..3d0bc0d 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -56,3 +56,14 @@ tasks: default: silent: true cmd: task --list + + coding-standards:apply: + cmds: + # https://docs.astral.sh/ruff/installation/ + - docker run -v .:/io --rm ghcr.io/astral-sh/ruff check --fix + + coding-standards:check: + cmds: + - task: coding-standards:apply + # https://docs.astral.sh/ruff/installation/ + - docker run -v .:/io --rm ghcr.io/astral-sh/ruff check diff --git a/test-data/Taskfile.yml b/test-data/Taskfile.yml index 06bc58b..ed86ffc 100644 --- a/test-data/Taskfile.yml +++ b/test-data/Taskfile.yml @@ -2,6 +2,9 @@ version: '3' +vars: + TEST_DATA_SERVICE_NAME: test-data-timescale + tasks: load: desc: Load test data @@ -9,29 +12,32 @@ tasks: cmds: - task: :compose vars: - TASK_ARGS: rm --force --stop test-data-timescale + TASK_ARGS: rm --force --stop {{.TEST_DATA_SERVICE_NAME}} - task: :compose vars: - TASK_ARGS: up test-data-timescale --detach --wait + TASK_ARGS: up {{.TEST_DATA_SERVICE_NAME}} --detach --wait - task: info info: desc: Show test data info cmds: - - task: :compose + - task: sql:query vars: - TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command '\dt' - - task: :compose + TASK_ARGS: '\dt' + - task: sql:query vars: - TASK_ARGS: exec test-data-timescale psql quantumleap quantumleap --command 'SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM "etrefrigerator-sensor"' + TASK_ARGS: SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM "etrefrigerator-sensor" - generate-data:etrefrigerator-sensor: - desc: Generate etrefrigerator-sensor.csv + sql:query: + desc: "Run SQL query on test database, example: task {{.TASK}} -- 'SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM \"etrefrigerator-sensor\"'" cmds: - - task: python + - task: :compose vars: - TASK_ARGS: generate-etrefrigerator-sensor.py + TASK_ARGS: exec {{.TEST_DATA_SERVICE_NAME}} psql quantumleap quantumleap --command {{if .TASK_ARGS}}'{{.TASK_ARGS}}'{{end}} - python: - internal: true - cmd: docker run --rm --volume $PWD:/app --workdir /app python:3 python {{.TASK_ARGS}} + sql:cli: + desc: "Run interactive SQL cli on test database" + cmds: + - task: :compose + vars: + TASK_ARGS: exec {{.TEST_DATA_SERVICE_NAME}} psql quantumleap quantumleap diff --git a/test-data/generate-data.py b/test-data/generate-data.py new file mode 100755 index 0000000..1739bc7 --- /dev/null +++ b/test-data/generate-data.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +# Note: This script is run inside the `test-data-timescale` service (unsing the +# timescale/timescaledb-ha image), i.e. we can only import the packages listed +# by +# +# task compose -- exec test-data-timescale pip list +# +# Test the script with an incantation like +# +# task compose -- exec test-data-timescale /test-data/generate-data.py + +import csv +import datetime as dt +import random +import sys +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('values', metavar='value', nargs='+', help='Examples: {time.isoformat()}, {random.randint(0, 100)}, round(random.uniform(0.00, 100.00), 2)') +parser.add_argument('--start-time', help='A time in ISO 8601 format, e.g "2021-01-01"') +parser.add_argument('--end-time', help='A time in ISO 8601 format, e.g "2021-01-01"') +parser.add_argument('--number-of-rows', type=int) +parser.add_argument('--interval', type=int, default=60*60, help='Interval in seconds') +parser.add_argument('--interval-wobble', type=int, default=0, help='If set, a random number between -interval-wobble and interval-wobble will be added to the time') +parser.add_argument('--random-seed', type=int, default=0, help='Seed for the random number generator') +parser.add_argument('--debug', action='store_true') +args = parser.parse_args() + +if args.random_seed != 0: + random.seed(args.random_seed) + +number_of_rows = args.number_of_rows +values = args.values +start_time = dt.datetime.fromisoformat(args.start_time).astimezone() if args.start_time is not None else None +end_time = dt.datetime.fromisoformat(args.end_time).astimezone() if args.end_time is not None else None +interval = args.interval +interval_wobble = args.interval_wobble + +if interval < 1: + raise ValueError("Interval must be greater than 0") + +if interval_wobble is not None and interval_wobble < 0: + raise ValueError("Interval wobble must be greater than 0") + +if number_of_rows is not None and number_of_rows < 0: + raise ValueError("Number of rows must be greater than 0") + +delta = dt.timedelta(seconds=interval) + +if start_time is None and end_time is None: + raise ValueError("A start time or end time must be specified") +elif start_time is None and number_of_rows is None: + raise ValueError("Number of rows must be specified along with start time") +elif end_time is None and number_of_rows is None: + raise ValueError("Number of rows must be specified along with end time") + +if start_time is not None and end_time is None and number_of_rows > 0: + end_time = start_time + number_of_rows * delta + +if start_time is None and end_time is not None and number_of_rows > 0: + start_time = end_time - number_of_rows * delta + +# https://stackoverflow.com/a/57597617 +def generate_values(values): + return map(lambda template: eval(f"f'{template}'"), values) + +writer = csv.writer(sys.stdout) +debug_writer = csv.writer(sys.stderr) + +time = start_time +index = 0 +while time < end_time: + time += delta + if interval_wobble is not None and interval_wobble > 0: + time += dt.timedelta(seconds=random.randint(-interval_wobble, interval_wobble)) + + writer.writerow(generate_values(values)) + if args.debug: + debug_writer.writerow(generate_values(values)) + + index += 1 diff --git a/test-data/generate-etrefrigerator-sensor.py b/test-data/generate-etrefrigerator-sensor.py deleted file mode 100644 index e360b82..0000000 --- a/test-data/generate-etrefrigerator-sensor.py +++ /dev/null @@ -1,53 +0,0 @@ -import csv -import datetime as dt -import random -import sys -import uuid - -random.seed(19750523) - -number_of_rows=100_000_000 -number_of_rows=1_000_000 - -filename='initdb.d/etrefrigerator-sensor.csv' - -entity_type = 'refrigerator-sensor' -entity_id = 'refrigerator-sensor:5e318760-Milesight' -fiware_servicepath = '' -instanceid = f'urn:ngsi-ld:{uuid.UUID(int=random.getrandbits(128))}' -appliance = 'Fryser' -department = 'Department' -floor = 'floor' -name = 'name' -room = 'room' - -writer = csv.writer(sys.stdout) -time = dt.datetime(2025, 1, 1).astimezone() - -for i in range(number_of_rows): - time_index = 0 - __original_ngsi_entity__ = None # '{}' - battery = random.randint(0, 100) - humidity = round(random.uniform(0.00, 100.00), 2) - temperature = round(random.uniform(-10, 20), 2) - - writer.writerow([ - entity_id, - entity_type, - time.isoformat(), - fiware_servicepath, - __original_ngsi_entity__, - instanceid, - appliance, - battery, - department, - floor, - humidity, - name, - room, - temperature, - ]) - - time += dt.timedelta(minutes=10) - -# print(f'{number_of_rows} rows written to file {filename}.') diff --git a/test-data/initdb.d/02-load-data.sql b/test-data/initdb.d/02-load-data.sql index 806311a..351db45 100644 --- a/test-data/initdb.d/02-load-data.sql +++ b/test-data/initdb.d/02-load-data.sql @@ -26,6 +26,110 @@ CREATE TABLE public."etrefrigerator-sensor" ( ALTER TABLE public."etrefrigerator-sensor" OWNER TO quantumleap; + + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view#4-adjust-wal-settings-for-loading +-- Increase checkpoint distance (apply to session or system) +-- SET checkpoint_timeout = '30min'; +-- SET max_wal_size = '10GB'; + +-- Reduce WAL level for this session +SET synchronous_commit = off; + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view#1-drop-indexes-before-loading +-- Load data before adding indexes! + +COPY "etrefrigerator-sensor" ( + entity_id, + entity_type, + appliance, + name, + room, + department, + floor, + time_index, + battery, + humidity, + temperature +) +FROM PROGRAM 'python3 /test-data/generate-data.py \ + "refrigerator-sensor" \ + "refrigerator-sensor:00000000-Milesight" \ + "Fryser" \ + "" \ + "" \ + "" \ + "" \ + "{time.isoformat()}" \ + "{random.randint(0, 100)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + --start-time "2025-01-01" --end-time "2027-01-01" --interval 600 --interval-wobble 30 +' WITH (FORMAT csv, LOG_VERBOSITY verbose); + +COPY "etrefrigerator-sensor" ( + entity_id, + entity_type, + appliance, + name, + room, + department, + floor, + time_index, + battery, + humidity, + temperature +) +FROM PROGRAM 'python3 /test-data/generate-data.py \ + "refrigerator-sensor" \ + "refrigerator-sensor:11111111-Milesight" \ + "Fryser" \ + "" \ + "" \ + "" \ + "" \ + "{time.isoformat()}" \ + "{random.randint(0, 100)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + --start-time "2025-01-01" --end-time "2027-01-01" --interval 600 --interval-wobble 30 +' WITH (FORMAT csv, LOG_VERBOSITY verbose); + +COPY "etrefrigerator-sensor" ( + entity_id, + entity_type, + appliance, + name, + room, + department, + floor, + time_index, + battery, + humidity, + temperature +) +FROM PROGRAM 'python3 /test-data/generate-data.py \ + "refrigerator-sensor" \ + "refrigerator-sensor:22222222-Milesight" \ + "Fryser" \ + "" \ + "" \ + "" \ + "" \ + "{time.isoformat()}" \ + "{random.randint(0, 100)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + "{round(random.uniform(0.00, 100.00), 2)}" \ + --start-time "2025-01-01" --end-time "2027-01-01" --interval 600 --interval-wobble 30 +' WITH (FORMAT csv, LOG_VERBOSITY verbose); + + +-- https://oneuptime.com/blog/post/2026-01-25-load-millions-rows-copy-postgresql/view#4-adjust-wal-settings-for-loading +-- Reset to defaults +RESET synchronous_commit; + -- -- Name: etrefrigerator-sensor_time_index_idx; Type: INDEX; Schema: public; Owner: quantumleap -- @@ -40,7 +144,4 @@ CREATE INDEX "etrefrigerator-sensor_time_index_idx" ON public."etrefrigerator-se CREATE INDEX "ix_etrefrigerator-sensor_eid_and_tx" ON public."etrefrigerator-sensor" USING btree (entity_id, time_index DESC); --- docker run --rm --volume $PWD:/app --workdir /app python:3 python generate-etrefrigerator-sensor.py - -COPY public."etrefrigerator-sensor" (entity_id, entity_type, time_index, fiware_servicepath, __original_ngsi_entity__, instanceid, appliance, battery, department, floor, humidity, name, room, temperature) -FROM PROGRAM 'python3 /test-data/generate-etrefrigerator-sensor.py' CSV; +SELECT COUNT(*), MIN(time_index), MAX(time_index) FROM "etrefrigerator-sensor";