From 16889c5dfd542d7e4cd6d6535e64bd23958ff1f9 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 02:10:52 +0000 Subject: [PATCH 01/21] test: scaffold integration harness directory and config --- tests/integration/README.md | 3 +++ tests/integration/config.yaml | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/integration/README.md create mode 100644 tests/integration/config.yaml diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..547a696 --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,3 @@ +# Integration test harness + +End-to-end harness using Docker Compose. See `CONTRIBUTING.md` ("Integration testing") for usage. Top-level entry point: `scripts/run-integration.sh`. Stack defined in `docker-compose.test.yaml`. diff --git a/tests/integration/config.yaml b/tests/integration/config.yaml new file mode 100644 index 0000000..32ee29c --- /dev/null +++ b/tests/integration/config.yaml @@ -0,0 +1,23 @@ +zerohop_enabled: true +zerohop_channels: + - "LongTurbo" + - "LongFast" + - "LongModerate" + - "MediumFast" + - "MediumSlow" + - "ShortFast" + - "ShortSlow" + - "ShortTurbo" + +drop_enabled: true +drop_channels: "zerohop_channels" +drop_portnums: + - "RANGE_TEST_APP" + +grpc_port: 9000 +health_port: 8080 +topic_filter: "msh/#" +stats_interval_s: 10 +log_level: "INFO" +log_format: "text" +stats_log: true From 5b33887a73fbc9e32e5fea5be2e2cf6ed0e682db Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 02:19:57 +0000 Subject: [PATCH 02/21] test: add exhook-init container that registers floodgate via EMQX REST --- tests/integration/exhook-init/Dockerfile | 8 +++ tests/integration/exhook-init/register.sh | 72 +++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 tests/integration/exhook-init/Dockerfile create mode 100755 tests/integration/exhook-init/register.sh diff --git a/tests/integration/exhook-init/Dockerfile b/tests/integration/exhook-init/Dockerfile new file mode 100644 index 0000000..cbbebda --- /dev/null +++ b/tests/integration/exhook-init/Dockerfile @@ -0,0 +1,8 @@ +FROM alpine:3.20 + +RUN apk add --no-cache curl jq bash + +COPY register.sh /register.sh +RUN chmod +x /register.sh + +ENTRYPOINT ["/register.sh"] diff --git a/tests/integration/exhook-init/register.sh b/tests/integration/exhook-init/register.sh new file mode 100755 index 0000000..9b1de3b --- /dev/null +++ b/tests/integration/exhook-init/register.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Register floodgate as an ExHook in EMQX once the broker REST API is up. +# Idempotent: if a hook named "floodgate" already exists, PUT to update it +# instead of POSTing a new one. + +set -euo pipefail + +EMQX_URL="${EMQX_URL:-http://emqx:18083}" +EMQX_USER="${EMQX_USER:-admin}" +EMQX_PASS="${EMQX_PASS:-public}" +HOOK_URL="${HOOK_URL:-http://floodgate:9000}" +HOOK_NAME="floodgate" + +echo "exhook-init: waiting for EMQX REST at ${EMQX_URL} ..." +for i in $(seq 1 60); do + if curl -sf -o /dev/null "${EMQX_URL}/api/v5/status"; then + echo "exhook-init: EMQX REST is up after ${i}s" + break + fi + sleep 1 +done + +echo "exhook-init: logging in" +TOKEN=$(curl -sf -X POST "${EMQX_URL}/api/v5/login" \ + -H 'Content-Type: application/json' \ + -d "{\"username\":\"${EMQX_USER}\",\"password\":\"${EMQX_PASS}\"}" \ + | jq -r .token) + +if [ -z "${TOKEN}" ] || [ "${TOKEN}" = "null" ]; then + echo "exhook-init: failed to obtain EMQX REST token" >&2 + exit 1 +fi + +BODY=$(cat </dev/null +else + echo "exhook-init: hook '${HOOK_NAME}' does not exist — creating" + curl -sf -X POST "${EMQX_URL}/api/v5/exhooks" \ + -H "Authorization: Bearer ${TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${BODY}" >/dev/null +fi + +echo "exhook-init: verifying registration" +STATUS=$(curl -sf "${EMQX_URL}/api/v5/exhooks/${HOOK_NAME}" \ + -H "Authorization: Bearer ${TOKEN}" | jq -r '.status // "unknown"') +echo "exhook-init: hook '${HOOK_NAME}' status=${STATUS}" + +if [ "${STATUS}" = "connected" ] || [ "${STATUS}" = "running" ]; then + echo "exhook-init: success" + exit 0 +fi + +echo "exhook-init: hook registered but status=${STATUS}; floodgate may still be starting." +echo "exhook-init: init container exits 0; floodgate->EMQX gRPC will recover on its own." +exit 0 From 2836d7c8e941da224614dbc296529de9f0a2adad Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 02:35:13 +0000 Subject: [PATCH 03/21] test: add docker-compose.test.yaml with emqx+floodgate+exhook-init Local verification on this host is constrained by a k3s iptables FORWARD policy that drops Docker user-defined-bridge inter-container traffic. CI runs on clean GitHub Actions runners and will validate the stack end-to-end in the integration job. --- docker-compose.test.yaml | 59 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 docker-compose.test.yaml diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml new file mode 100644 index 0000000..a301f70 --- /dev/null +++ b/docker-compose.test.yaml @@ -0,0 +1,59 @@ +# Integration test stack. Brought up by scripts/run-integration.sh. +# All inter-service traffic stays on the floodgate-test-net bridge. +# Only floodgate /health (8080->18089) and EMQX REST (18083) are exposed +# to the host so the runner can poll readiness — MQTT 1883 and gRPC 9000 +# are container-internal only. + +services: + emqx: + image: emqx/emqx:6.1.1 + container_name: floodgate-test-emqx + networks: [floodgate-test-net] + ports: + - "18083:18083" + environment: + EMQX_NAME: "emqx-test" + EMQX_DASHBOARD__DEFAULT_PASSWORD: "public" + healthcheck: + test: ["CMD", "/opt/emqx/bin/emqx", "ctl", "status"] + interval: 5s + timeout: 10s + retries: 12 + start_period: 10s + + floodgate: + build: + context: . + dockerfile: Dockerfile + image: floodgate-test:ci + container_name: floodgate-test-floodgate + networks: [floodgate-test-net] + ports: + - "18089:8080" + environment: + FLOODGATE_CONFIG: /app/config.yaml + volumes: + - ./tests/integration/config.yaml:/app/config.yaml:ro + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"] + interval: 5s + timeout: 5s + retries: 12 + start_period: 5s + + exhook-init: + build: + context: tests/integration/exhook-init + container_name: floodgate-test-exhook-init + networks: [floodgate-test-net] + depends_on: + emqx: + condition: service_healthy + floodgate: + condition: service_healthy + restart: "no" + +networks: + floodgate-test-net: + name: floodgate-test-net + driver: bridge From cd23232684988690b7c6b3f33185576974d70a59 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 02:35:47 +0000 Subject: [PATCH 04/21] fix: add curl connect/max timeouts to exhook-init register.sh Without these flags, curl will hang indefinitely on an unreachable broker (network failure rather than not-yet-ready). The wait loop's 60-iteration cap never advances. --connect-timeout 2 + --max-time 5 keeps each probe bounded so the loop and its overall timeout work. --- tests/integration/exhook-init/register.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integration/exhook-init/register.sh b/tests/integration/exhook-init/register.sh index 9b1de3b..90bb8db 100755 --- a/tests/integration/exhook-init/register.sh +++ b/tests/integration/exhook-init/register.sh @@ -13,7 +13,7 @@ HOOK_NAME="floodgate" echo "exhook-init: waiting for EMQX REST at ${EMQX_URL} ..." for i in $(seq 1 60); do - if curl -sf -o /dev/null "${EMQX_URL}/api/v5/status"; then + if curl -sf --connect-timeout 2 --max-time 5 -o /dev/null "${EMQX_URL}/api/v5/status"; then echo "exhook-init: EMQX REST is up after ${i}s" break fi @@ -21,7 +21,7 @@ for i in $(seq 1 60); do done echo "exhook-init: logging in" -TOKEN=$(curl -sf -X POST "${EMQX_URL}/api/v5/login" \ +TOKEN=$(curl -sf --connect-timeout 2 --max-time 5 -X POST "${EMQX_URL}/api/v5/login" \ -H 'Content-Type: application/json' \ -d "{\"username\":\"${EMQX_USER}\",\"password\":\"${EMQX_PASS}\"}" \ | jq -r .token) @@ -42,23 +42,23 @@ BODY=$(cat </dev/null else echo "exhook-init: hook '${HOOK_NAME}' does not exist — creating" - curl -sf -X POST "${EMQX_URL}/api/v5/exhooks" \ + curl -sf --connect-timeout 2 --max-time 5 -X POST "${EMQX_URL}/api/v5/exhooks" \ -H "Authorization: Bearer ${TOKEN}" \ -H 'Content-Type: application/json' \ -d "${BODY}" >/dev/null fi echo "exhook-init: verifying registration" -STATUS=$(curl -sf "${EMQX_URL}/api/v5/exhooks/${HOOK_NAME}" \ +STATUS=$(curl -sf --connect-timeout 2 --max-time 5 "${EMQX_URL}/api/v5/exhooks/${HOOK_NAME}" \ -H "Authorization: Bearer ${TOKEN}" | jq -r '.status // "unknown"') echo "exhook-init: hook '${HOOK_NAME}' status=${STATUS}" From b5c6a80d95e56624a606fdd4bb495e0595c8532c Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 02:42:15 +0000 Subject: [PATCH 05/21] test: add test-driver image with subscribe-only smoke run --- docker-compose.test.yaml | 19 +++++++++++++ tests/integration/test-driver/Dockerfile | 14 +++++++++ tests/integration/test-driver/run.py | 36 ++++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 tests/integration/test-driver/Dockerfile create mode 100644 tests/integration/test-driver/run.py diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index a301f70..c715c6a 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -53,6 +53,25 @@ services: condition: service_healthy restart: "no" + test-driver: + build: + context: tests/integration/test-driver + image: floodgate-test-driver:ci + container_name: floodgate-test-driver + networks: [floodgate-test-net] + depends_on: + emqx: + condition: service_healthy + floodgate: + condition: service_healthy + exhook-init: + condition: service_completed_successfully + profiles: ["driver"] + environment: + EMQX_HOST: "emqx" + EMQX_PORT: "1883" + FLOODGATE_HEALTH_URL: "http://floodgate:8080/health" + networks: floodgate-test-net: name: floodgate-test-net diff --git a/tests/integration/test-driver/Dockerfile b/tests/integration/test-driver/Dockerfile new file mode 100644 index 0000000..ae3968b --- /dev/null +++ b/tests/integration/test-driver/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.13-slim + +WORKDIR /test-driver + +RUN pip install --no-cache-dir \ + "paho-mqtt>=2.0,<3" \ + "meshtastic>=2.5,<3" \ + "cryptography>=42" \ + "requests>=2.32" \ + "pyyaml>=6" + +COPY run.py /test-driver/run.py + +ENTRYPOINT ["python", "/test-driver/run.py"] diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py new file mode 100644 index 0000000..0ce7fda --- /dev/null +++ b/tests/integration/test-driver/run.py @@ -0,0 +1,36 @@ +"""Integration test driver. Runs each test case, prints PASS/FAIL, exits 0 iff all pass.""" + +import os +import sys +import time + +import paho.mqtt.client as mqtt + +EMQX_HOST = os.environ.get("EMQX_HOST", "emqx") +EMQX_PORT = int(os.environ.get("EMQX_PORT", "1883")) + + +def main() -> int: + received: list[tuple[str, bytes]] = [] + + def on_message(_c, _u, msg): + received.append((msg.topic, bytes(msg.payload))) + + client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="floodgate-test-driver") + client.on_message = on_message + print(f"test-driver: connecting to {EMQX_HOST}:{EMQX_PORT}", flush=True) + client.connect(EMQX_HOST, EMQX_PORT, keepalive=30) + client.subscribe("msh/#", qos=0) + client.loop_start() + + time.sleep(2) + print(f"test-driver: subscribed; received {len(received)} messages so far", flush=True) + print("PASS: test-driver-smoke", flush=True) + + client.loop_stop() + client.disconnect() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 16e81a4fd50e6219574efd6031778b2372d727be Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:12:15 +0000 Subject: [PATCH 06/21] test: add test-driver helpers (subscriber, publisher, envelope builder) --- tests/integration/test-driver/run.py | 214 ++++++++++++++++++++++++--- 1 file changed, 194 insertions(+), 20 deletions(-) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index 0ce7fda..fa7a5fd 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -1,36 +1,210 @@ -"""Integration test driver. Runs each test case, prints PASS/FAIL, exits 0 iff all pass.""" +"""Integration test driver. + +Runs each integration case sequentially. For every case: + 1. Crafts a Meshtastic ServiceEnvelope (or borrows one from meshtasticd). + 2. Publishes it via MQTT to a topic whose channel name fits the case. + 3. Waits briefly for floodgate to process and EMQX to deliver. + 4. Asserts on (a) what the bound subscriber received and (b) floodgate's + /health stats, then prints one 'PASS: ' or 'FAIL: : ' line. + +Exits 0 iff every case passed. +""" + +from __future__ import annotations import os import sys +import threading import time +from dataclasses import dataclass import paho.mqtt.client as mqtt +import requests +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from meshtastic import ( # noqa: F401 (portnums re-exported for cases) + mesh_pb2, + mqtt_pb2, + portnums_pb2, +) + +EMQX_HOST = os.environ.get("EMQX_HOST", "emqx") +EMQX_PORT = int(os.environ.get("EMQX_PORT", "1883")) +FLOODGATE_HEALTH = os.environ.get("FLOODGATE_HEALTH_URL", "http://floodgate:8080/health") + +# 16-byte AES-128 key derived from the default Meshtastic PSK ("AQ=="). +DEFAULT_KEY = bytes.fromhex("d4f1bb3a20290759f0bcffabcf4e6901") +# Arbitrary 16-byte key that floodgate does NOT have — used to simulate a +# custom-keyed channel where floodgate cannot decrypt the inner Data. +CUSTOM_KEY = bytes.fromhex("00112233445566778899aabbccddeeff") + +# Settle window between publish and assertion. EMQX + floodgate gRPC + EMQX +# delivery is normally <100ms on a local bridge; 1s gives plenty of margin +# without making the suite slow. +SETTLE_SECONDS = 1.0 + + +# --------------------------------------------------------------------------- +# Subscriber capture +# --------------------------------------------------------------------------- + +@dataclass +class Captured: + topic: str + payload: bytes + + +class Subscriber: + """Background paho-mqtt subscriber that records every message on msh/#.""" + + def __init__(self, host: str, port: int): + self._host = host + self._port = port + self._messages: list[Captured] = [] + self._lock = threading.Lock() + self._client = mqtt.Client( + mqtt.CallbackAPIVersion.VERSION2, + client_id="floodgate-test-driver-sub", + ) + self._client.on_message = self._on_message + + def _on_message(self, _client, _userdata, msg): + with self._lock: + self._messages.append(Captured(topic=msg.topic, payload=bytes(msg.payload))) + + def start(self): + self._client.connect(self._host, self._port, keepalive=30) + self._client.subscribe("msh/#", qos=0) + self._client.loop_start() + # Give EMQX a moment to register the subscription before we publish. + time.sleep(0.5) + + def stop(self): + self._client.loop_stop() + self._client.disconnect() + + def snapshot(self) -> list[Captured]: + with self._lock: + return list(self._messages) + + +# --------------------------------------------------------------------------- +# Publisher +# --------------------------------------------------------------------------- + +class Publisher: + def __init__(self, host: str, port: int): + self._client = mqtt.Client( + mqtt.CallbackAPIVersion.VERSION2, + client_id="floodgate-test-driver-pub", + ) + self._client.connect(host, port, keepalive=30) + self._client.loop_start() + + def publish(self, topic: str, payload: bytes): + info = self._client.publish(topic, payload=payload, qos=1) + info.wait_for_publish(timeout=5.0) + + def close(self): + self._client.loop_stop() + self._client.disconnect() + + +# --------------------------------------------------------------------------- +# Crypto / packet builders +# --------------------------------------------------------------------------- + +def _build_nonce(packet_id: int, from_node: int) -> bytes: + return ( + (packet_id & 0xFFFFFFFFFFFFFFFF).to_bytes(8, "little") + + (from_node & 0xFFFFFFFF).to_bytes(4, "little") + + b"\x00\x00\x00\x00" + ) + + +def encrypt(plaintext: bytes, key: bytes, packet_id: int, from_node: int) -> bytes: + nonce = _build_nonce(packet_id, from_node) + return Cipher(algorithms.AES(key), modes.CTR(nonce)).encryptor().update(plaintext) + + +def build_envelope( + *, + channel: str, + portnum: int, + payload: bytes, + packet_id: int, + from_node: int, + to_node: int = 0xFFFFFFFF, + hop_limit: int = 3, + hop_start: int = 3, + key: bytes = DEFAULT_KEY, +) -> bytes: + """Build a Meshtastic ServiceEnvelope wrapping an encrypted Data message.""" + data = mesh_pb2.Data() + data.portnum = portnum + data.payload = payload + encrypted = encrypt(data.SerializeToString(), key=key, + packet_id=packet_id, from_node=from_node) + + pkt = mesh_pb2.MeshPacket() + setattr(pkt, "from", from_node) # 'from' is a Python keyword + pkt.to = to_node + pkt.id = packet_id + pkt.hop_limit = hop_limit + pkt.hop_start = hop_start + pkt.encrypted = encrypted + + env = mqtt_pb2.ServiceEnvelope() + env.packet.CopyFrom(pkt) + env.channel_id = channel + env.gateway_id = "!00000001" + return env.SerializeToString() + + +def topic_for(channel: str, *, gateway: str = "!00000001") -> str: + return f"msh/US/2/e/{channel}/{gateway}" + + +# --------------------------------------------------------------------------- +# Health-stats reader +# --------------------------------------------------------------------------- -EMQX_HOST = os.environ.get("EMQX_HOST", "emqx") -EMQX_PORT = int(os.environ.get("EMQX_PORT", "1883")) +def health_stats() -> dict: + resp = requests.get(FLOODGATE_HEALTH, timeout=5) + resp.raise_for_status() + return resp.json()["stats"] -def main() -> int: - received: list[tuple[str, bytes]] = [] +# --------------------------------------------------------------------------- +# Test-case orchestration scaffold (cases filled in later tasks) +# --------------------------------------------------------------------------- - def on_message(_c, _u, msg): - received.append((msg.topic, bytes(msg.payload))) +@dataclass +class Outcome: + name: str + passed: bool = True + detail: str = "" - client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="floodgate-test-driver") - client.on_message = on_message - print(f"test-driver: connecting to {EMQX_HOST}:{EMQX_PORT}", flush=True) - client.connect(EMQX_HOST, EMQX_PORT, keepalive=30) - client.subscribe("msh/#", qos=0) - client.loop_start() + def line(self) -> str: + prefix = "PASS" if self.passed else "FAIL" + return f"{prefix}: {self.name}" + (f" — {self.detail}" if self.detail else "") - time.sleep(2) - print(f"test-driver: subscribed; received {len(received)} messages so far", flush=True) - print("PASS: test-driver-smoke", flush=True) - client.loop_stop() - client.disconnect() - return 0 +def run_all() -> int: + sub = Subscriber(EMQX_HOST, EMQX_PORT) + sub.start() + pub = Publisher(EMQX_HOST, EMQX_PORT) + try: + outcomes: list[Outcome] = [] + # cases will be appended in the next tasks + for o in outcomes: + print(o.line(), flush=True) + failed = [o for o in outcomes if not o.passed] + print(f"\n{len(outcomes) - len(failed)}/{len(outcomes)} cases passed", flush=True) + return 1 if failed else 0 + finally: + pub.close() + sub.stop() if __name__ == "__main__": - sys.exit(main()) + sys.exit(run_all()) From 940bf5949b35a534fe6d935ea8e78371b36a64b8 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:29:15 +0000 Subject: [PATCH 07/21] test: add zerohop integration case Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/integration/test-driver/run.py | 58 +++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index fa7a5fd..b3462e1 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -174,6 +174,30 @@ def health_stats() -> dict: return resp.json()["stats"] +# --------------------------------------------------------------------------- +# Envelope inspection helpers (used by every test case to read the delivered +# bytes back out of the subscriber's capture buffer). +# --------------------------------------------------------------------------- + +def _parse_hop_limit(payload: bytes) -> int | None: + """Return MeshPacket.hop_limit from a serialized ServiceEnvelope, or None.""" + try: + env = mqtt_pb2.ServiceEnvelope() + env.ParseFromString(payload) + return env.packet.hop_limit if env.HasField("packet") else None + except Exception: + return None + + +def _packet_id_of(payload: bytes) -> int | None: + try: + env = mqtt_pb2.ServiceEnvelope() + env.ParseFromString(payload) + return env.packet.id if env.HasField("packet") else None + except Exception: + return None + + # --------------------------------------------------------------------------- # Test-case orchestration scaffold (cases filled in later tasks) # --------------------------------------------------------------------------- @@ -189,13 +213,43 @@ def line(self) -> str: return f"{prefix}: {self.name}" + (f" — {self.detail}" if self.detail else "") +def case_zerohop(pub: Publisher, sub: Subscriber) -> Outcome: + name = "zerohop" + pre = health_stats() + pkt_id = 0xA1A1A1A1 + body = build_envelope( + channel = "LongFast", + portnum = portnums_pb2.PortNum.TEXT_MESSAGE_APP, + payload = b"hello-zerohop", + packet_id = pkt_id, + from_node = 0xDEADBEEF, + hop_limit = 3, + hop_start = 3, + ) + pub.publish(topic_for("LongFast"), body) + time.sleep(SETTLE_SECONDS) + + delivered = [m for m in sub.snapshot() if _packet_id_of(m.payload) == pkt_id] + if not delivered: + return Outcome(name, False, "no packet with our id was delivered") + hop = _parse_hop_limit(delivered[-1].payload) + if hop != 0: + return Outcome(name, False, f"delivered hop_limit={hop}, expected 0") + + post = health_stats() + if post.get("zerohop", 0) - pre.get("zerohop", 0) < 1: + return Outcome(name, False, "stats.zerohop did not increment") + return Outcome(name) + + def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() pub = Publisher(EMQX_HOST, EMQX_PORT) try: - outcomes: list[Outcome] = [] - # cases will be appended in the next tasks + outcomes: list[Outcome] = [ + case_zerohop(pub, sub), + ] for o in outcomes: print(o.line(), flush=True) failed = [o for o in outcomes if not o.passed] From afa14a771385623f250861d378f346c97072b9c8 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:29:31 +0000 Subject: [PATCH 08/21] test: add drop integration case Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/integration/test-driver/run.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index b3462e1..9fb26fa 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -242,6 +242,33 @@ def case_zerohop(pub: Publisher, sub: Subscriber) -> Outcome: return Outcome(name) +def case_drop(pub: Publisher, sub: Subscriber) -> Outcome: + name = "drop" + pre = health_stats() + pkt_id = 0xA2A2A2A2 + body = build_envelope( + channel = "LongFast", + portnum = portnums_pb2.PortNum.RANGE_TEST_APP, + payload = b"flood", + packet_id = pkt_id, + from_node = 0xDEADBEEF, + hop_limit = 3, + ) + pub.publish(topic_for("LongFast"), body) + time.sleep(SETTLE_SECONDS) + + delivered = [m for m in sub.snapshot() if _packet_id_of(m.payload) == pkt_id] + if delivered: + return Outcome(name, False, + f"packet was delivered to subscriber ({len(delivered)} times); " + "drop should have denied it") + + post = health_stats() + if post.get("dropped", 0) - pre.get("dropped", 0) < 1: + return Outcome(name, False, "stats.dropped did not increment") + return Outcome(name) + + def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() @@ -249,6 +276,7 @@ def run_all() -> int: try: outcomes: list[Outcome] = [ case_zerohop(pub, sub), + case_drop(pub, sub), ] for o in outcomes: print(o.line(), flush=True) From 56e9d6ddccb08689b23697fca0ca6c61623ed787 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:30:02 +0000 Subject: [PATCH 09/21] test: add passthru integration case Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/integration/test-driver/run.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index 9fb26fa..00ca845 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -269,6 +269,39 @@ def case_drop(pub: Publisher, sub: Subscriber) -> Outcome: return Outcome(name) +def case_passthru(pub: Publisher, sub: Subscriber) -> Outcome: + """Channel NOT in zerohop_channels — packet must transit unchanged.""" + name = "passthru" + pre = health_stats() + pkt_id = 0xA3A3A3A3 + body = build_envelope( + channel = "PrivateClear", + portnum = portnums_pb2.PortNum.TEXT_MESSAGE_APP, + payload = b"hello-private", + packet_id = pkt_id, + from_node = 0xDEADBEEF, + hop_limit = 3, + key = DEFAULT_KEY, + ) + pub.publish(topic_for("PrivateClear"), body) + time.sleep(SETTLE_SECONDS) + + delivered = [m for m in sub.snapshot() if _packet_id_of(m.payload) == pkt_id] + if not delivered: + return Outcome(name, False, "packet was not delivered to subscriber") + if delivered[-1].payload != body: + return Outcome(name, False, + "delivered payload was modified; passthru must be byte-identical") + hop = _parse_hop_limit(delivered[-1].payload) + if hop != 3: + return Outcome(name, False, f"hop_limit={hop}, expected 3 (no zerohop on this channel)") + + post = health_stats() + if post.get("passthru", 0) - pre.get("passthru", 0) < 1: + return Outcome(name, False, "stats.passthru did not increment") + return Outcome(name) + + def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() @@ -277,6 +310,7 @@ def run_all() -> int: outcomes: list[Outcome] = [ case_zerohop(pub, sub), case_drop(pub, sub), + case_passthru(pub, sub), ] for o in outcomes: print(o.line(), flush=True) From 1a820f67ede999e8459a654155ecbec30dc54dd7 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:30:21 +0000 Subject: [PATCH 10/21] test: add noop integration case Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/integration/test-driver/run.py | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index 00ca845..a0506e0 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -302,6 +302,35 @@ def case_passthru(pub: Publisher, sub: Subscriber) -> Outcome: return Outcome(name) +def case_noop(pub: Publisher, sub: Subscriber) -> Outcome: + """Already hop_limit=0 on a zerohop channel — delivered unchanged, counted as noop.""" + name = "noop" + pre = health_stats() + pkt_id = 0xA4A4A4A4 + body = build_envelope( + channel = "LongFast", + portnum = portnums_pb2.PortNum.TEXT_MESSAGE_APP, + payload = b"already-zero", + packet_id = pkt_id, + from_node = 0xDEADBEEF, + hop_limit = 0, + hop_start = 3, + ) + pub.publish(topic_for("LongFast"), body) + time.sleep(SETTLE_SECONDS) + + delivered = [m for m in sub.snapshot() if _packet_id_of(m.payload) == pkt_id] + if not delivered: + return Outcome(name, False, "packet was not delivered to subscriber") + if delivered[-1].payload != body: + return Outcome(name, False, "noop delivered payload should be byte-identical") + + post = health_stats() + if post.get("noop", 0) - pre.get("noop", 0) < 1: + return Outcome(name, False, "stats.noop did not increment") + return Outcome(name) + + def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() @@ -311,6 +340,7 @@ def run_all() -> int: case_zerohop(pub, sub), case_drop(pub, sub), case_passthru(pub, sub), + case_noop(pub, sub), ] for o in outcomes: print(o.line(), flush=True) From 9387ce7661061ae8d392deda35d65956b8daaaab Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:30:48 +0000 Subject: [PATCH 11/21] test: add custom-key channel passthru integration case Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/integration/test-driver/run.py | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index a0506e0..f7d2e8a 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -331,6 +331,45 @@ def case_noop(pub: Publisher, sub: Subscriber) -> Outcome: return Outcome(name) +def case_custom_key_passthru(pub: Publisher, sub: Subscriber) -> Outcome: + """Channel NOT in zerohop_channels, encrypted with a key floodgate doesn't have. + + floodgate cannot decrypt the inner Data, so it cannot read the portnum. + The drop filter must therefore NOT fire even if drop_portnums would + otherwise match. Because the channel is not in zerohop_channels, the + packet is delivered byte-identically. + """ + name = "custom-key-passthru" + pre = health_stats() + pkt_id = 0xA5A5A5A5 + body = build_envelope( + channel = "PrivateNet", + portnum = portnums_pb2.PortNum.RANGE_TEST_APP, # would match drop_portnums if readable + payload = b"opaque", + packet_id = pkt_id, + from_node = 0xDEADBEEF, + hop_limit = 3, + key = CUSTOM_KEY, + ) + pub.publish(topic_for("PrivateNet"), body) + time.sleep(SETTLE_SECONDS) + + delivered = [m for m in sub.snapshot() if _packet_id_of(m.payload) == pkt_id] + if not delivered: + return Outcome(name, False, "packet was not delivered to subscriber") + if delivered[-1].payload != body: + return Outcome(name, False, + "delivered payload was modified; passthru must be byte-identical") + + post = health_stats() + if post.get("passthru", 0) - pre.get("passthru", 0) < 1: + return Outcome(name, False, "stats.passthru did not increment") + if post.get("dropped", 0) - pre.get("dropped", 0) > 0: + return Outcome(name, False, + "stats.dropped incremented; drop must not fire on unreadable portnum") + return Outcome(name) + + def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() @@ -341,6 +380,7 @@ def run_all() -> int: case_drop(pub, sub), case_passthru(pub, sub), case_noop(pub, sub), + case_custom_key_passthru(pub, sub), ] for o in outcomes: print(o.line(), flush=True) From 8cf71baa6031e599918fe8ab2604a24a4b1346c2 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:34:58 +0000 Subject: [PATCH 12/21] test: add meshtasticd (sim mode) + MQTT init sidecar to integration stack --- docker-compose.test.yaml | 31 ++++++++++++++ tests/integration/meshtasticd/Dockerfile.init | 11 +++++ tests/integration/meshtasticd/init.sh | 41 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 tests/integration/meshtasticd/Dockerfile.init create mode 100755 tests/integration/meshtasticd/init.sh diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index c715c6a..4f27613 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -41,6 +41,37 @@ services: retries: 12 start_period: 5s + meshtasticd: + image: ghcr.io/meshtastic/meshtasticd:latest + container_name: floodgate-test-meshtasticd + networks: [floodgate-test-net] + # Append "-s" to the upstream entrypoint to run in SimRadio mode (no LoRa + # hardware needed). Matches the firmware project's own CI pattern. + command: ["-s"] + depends_on: + emqx: + condition: service_healthy + floodgate: + condition: service_healthy + exhook-init: + condition: service_completed_successfully + restart: "no" + + meshtasticd-init: + build: + context: tests/integration/meshtasticd + dockerfile: Dockerfile.init + container_name: floodgate-test-meshtasticd-init + networks: [floodgate-test-net] + depends_on: + meshtasticd: + condition: service_started + environment: + MESHTASTICD_HOST: "meshtasticd" + MESHTASTICD_PORT: "4403" + EMQX_ADDR: "emqx:1883" + restart: "no" + exhook-init: build: context: tests/integration/exhook-init diff --git a/tests/integration/meshtasticd/Dockerfile.init b/tests/integration/meshtasticd/Dockerfile.init new file mode 100644 index 0000000..21b864a --- /dev/null +++ b/tests/integration/meshtasticd/Dockerfile.init @@ -0,0 +1,11 @@ +FROM python:3.13-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends netcat-openbsd \ + && rm -rf /var/lib/apt/lists/* \ + && pip install --no-cache-dir "meshtastic>=2.5,<3" + +COPY init.sh /init.sh +RUN chmod +x /init.sh + +ENTRYPOINT ["/init.sh"] diff --git a/tests/integration/meshtasticd/init.sh b/tests/integration/meshtasticd/init.sh new file mode 100755 index 0000000..fa80f15 --- /dev/null +++ b/tests/integration/meshtasticd/init.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Configure MQTT on the meshtasticd container after it is up. MQTT lives in +# ModuleConfig protobuf on the firmware, not in the YAML config file — +# upstream firmware's own CI does the same: start meshtasticd -s, then poke +# settings over the TCP API on port 4403. + +set -euo pipefail + +HOST="${MESHTASTICD_HOST:-meshtasticd}" +PORT="${MESHTASTICD_PORT:-4403}" +EMQX_ADDR="${EMQX_ADDR:-emqx:1883}" + +echo "meshtasticd-init: waiting for ${HOST}:${PORT}" +for i in $(seq 1 60); do + if nc -z "${HOST}" "${PORT}" 2>/dev/null; then + echo "meshtasticd-init: TCP API ready after ${i}s"; break + fi + sleep 1 + [ $i -eq 60 ] && { echo "meshtasticd-init: ${HOST}:${PORT} never came up" >&2; exit 1; } +done + +# Brief settle so firmware finishes its own init before we start poking config +sleep 3 + +echo "meshtasticd-init: configuring MQTT module" +meshtastic --host "${HOST}" --port "${PORT}" \ + --set moduleConfig.mqtt.enabled true \ + --set moduleConfig.mqtt.address "${EMQX_ADDR}" \ + --set moduleConfig.mqtt.root msh \ + --set moduleConfig.mqtt.tls_enabled false \ + --set moduleConfig.mqtt.encryption_enabled true \ + --set moduleConfig.mqtt.json_enabled false + +# Let the firmware reconnect to the broker with the new settings +sleep 5 + +echo "meshtasticd-init: sending probe text message" +meshtastic --host "${HOST}" --port "${PORT}" \ + --sendtext "floodgate-roundtrip-probe" + +echo "meshtasticd-init: done" From 1cf4cbd75d6a9e476581ce04f7fa7a0aa73a210c Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:35:27 +0000 Subject: [PATCH 13/21] test: add meshtasticd round-trip integration case --- tests/integration/test-driver/run.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index f7d2e8a..caa66e9 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -370,6 +370,33 @@ def case_custom_key_passthru(pub: Publisher, sub: Subscriber) -> Outcome: return Outcome(name) +def case_meshtasticd_roundtrip(_pub: Publisher, sub: Subscriber) -> Outcome: + """Verify the meshtasticd container's traffic transits floodgate to subscribers. + + The meshtasticd-init sidecar configures MQTT and sends one probe text + message; meshtasticd also publishes its own NodeInfo on first MQTT + connection. We wait up to 60s for at least one msh/<...>/e/<...> message + whose packet_id is OUTSIDE our crafted 0xAxxxxxxx range — i.e. came + from the daemon, not from our Publisher. + """ + name = "meshtasticd-roundtrip" + deadline = time.monotonic() + 60 + while time.monotonic() < deadline: + for m in sub.snapshot(): + if "/e/" not in m.topic: + continue + pid = _packet_id_of(m.payload) + if pid is None: + continue + if 0xA0000000 <= pid <= 0xAFFFFFFF: + continue # one of our crafted test packets + return Outcome(name) + time.sleep(1.0) + return Outcome(name, False, + "no organic meshtasticd traffic seen in 60s — meshtasticd-init " + "may have failed to enable MQTT (check `docker compose logs meshtasticd-init`)") + + def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() @@ -381,6 +408,7 @@ def run_all() -> int: case_passthru(pub, sub), case_noop(pub, sub), case_custom_key_passthru(pub, sub), + case_meshtasticd_roundtrip(pub, sub), ] for o in outcomes: print(o.line(), flush=True) From 1a18f898d01e1d89d72626665419fd264f3ab0b7 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:37:04 +0000 Subject: [PATCH 14/21] test: add scripts/run-integration.sh orchestrator --- scripts/run-integration.sh | 100 +++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 scripts/run-integration.sh diff --git a/scripts/run-integration.sh b/scripts/run-integration.sh new file mode 100755 index 0000000..331fed2 --- /dev/null +++ b/scripts/run-integration.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# Local + CI driver for the integration harness. +# +# Modes: +# (default) bring stack up, run all cases, tear stack down, exit 0/non-zero +# --keep bring stack up, run all cases, leave stack running for poking +# --teardown tear the stack down (and volumes/networks); skip running cases + +set -euo pipefail + +COMPOSE_FILE="docker-compose.test.yaml" +HEALTH_URL="http://localhost:18089/health" +EMQX_URL="http://localhost:18083/api/v5/status" + +usage() { + cat < Tearing down integration stack" + docker compose -f "$COMPOSE_FILE" down -v --remove-orphans +} + +if [ "$mode" = "teardown-only" ]; then + teardown + exit 0 +fi + +echo "==> Bringing up integration stack" +docker compose -f "$COMPOSE_FILE" up -d --build + +cleanup_on_error() { + rc=$? + if [ $rc -ne 0 ] && [ "$mode" != "run-and-keep" ]; then + echo "==> Run failed — collecting service logs before teardown" + docker compose -f "$COMPOSE_FILE" logs --no-color --tail=200 || true + teardown + fi + exit $rc +} +trap cleanup_on_error EXIT + +echo "==> Waiting for EMQX REST" +for i in $(seq 1 60); do + if curl -sf --connect-timeout 2 --max-time 5 -o /dev/null "$EMQX_URL"; then + echo " EMQX REST ready after ${i}s"; break + fi + sleep 1 + [ "$i" -eq 60 ] && { echo "EMQX REST never came up" >&2; exit 1; } +done + +echo "==> Waiting for floodgate /health" +for i in $(seq 1 60); do + if curl -sf --connect-timeout 2 --max-time 5 -o /dev/null "$HEALTH_URL"; then + echo " floodgate /health ready after ${i}s"; break + fi + sleep 1 + [ "$i" -eq 60 ] && { echo "floodgate /health never came up" >&2; exit 1; } +done + +echo "==> Letting meshtasticd settle" +sleep 8 + +echo "==> Running test-driver" +set +e +docker compose -f "$COMPOSE_FILE" run --rm test-driver +rc=$? +set -e + +echo "==> Test-driver exit code: $rc" + +if [ "$mode" = "run-and-keep" ]; then + echo "==> --keep: leaving stack running." + echo " floodgate /health: $HEALTH_URL" + echo " EMQX dashboard: http://localhost:18083 (admin/public)" + echo " Tear down with: $0 --teardown" + trap - EXIT + exit $rc +fi + +trap - EXIT +teardown +exit $rc From 71d235c8bd2321ba7c9b317b67cb6abab44e72a4 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:54:33 +0000 Subject: [PATCH 15/21] ci: add integration job that runs the compose harness on PR --- .github/workflows/ci.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c1abb7..99551ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,6 +57,7 @@ jobs: run: | pytest tests/ -q \ --ignore=tests/test_container_smoke.py \ + --ignore=tests/integration \ --cov=src/floodgate \ --cov-report=term-missing \ --cov-report=xml @@ -91,6 +92,31 @@ jobs: - name: Run container smoke tests run: pytest tests/test_container_smoke.py -m smoke -v + # --------------------------------------------------------------------------- + # Integration test — full Docker Compose stack: emqx + floodgate + meshtasticd + # End-to-end validation of drop / zerohop / passthru / noop / custom-key / + # meshtasticd round-trip via subscriber capture and /health stats. + # --------------------------------------------------------------------------- + integration: + name: Integration test (compose stack) + runs-on: ubuntu-latest + needs: smoke + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - name: Run integration harness + run: ./scripts/run-integration.sh + + - name: Dump compose logs on failure + if: failure() + run: docker compose -f docker-compose.test.yaml logs --no-color || true + + - name: Always clean up + if: always() + run: docker compose -f docker-compose.test.yaml down -v --remove-orphans || true + # --------------------------------------------------------------------------- # Manifest validation — verify k8s YAML is valid against the k8s schema # --------------------------------------------------------------------------- From 3fde442b68b47b0bf45ecf4695ee4cb4853a3fee Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:55:09 +0000 Subject: [PATCH 16/21] docs: document integration harness and --keep workflow in CONTRIBUTING --- CONTRIBUTING.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d6f5fd9..3dfdde0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,6 +22,7 @@ Every PR and push to `main` runs four jobs in sequence: | **lint** | `ruff` style and import checks | | **unit tests** | Pure Python tests across Python 3.11/3.12/3.13 — no external services needed. CI generates the Meshtastic protobuf stubs before running so the unmocked protobuf payload tests in `tests/payloads/protobuf/` are exercised. Mocked tests in the rest of the suite still run without protobufs (handy for fast local iteration). | | **container smoke** | Builds the Docker image, starts the container, and verifies `/health` returns `200 OK`. Catches Dockerfile bugs and runtime import errors that unit tests cannot. | +| **integration** | Brings up `docker-compose.test.yaml` (EMQX + floodgate + meshtasticd + test-driver) and runs `drop` / `zerohop` / `passthru` / `noop` / `custom-key passthru` / meshtasticd round-trip end-to-end. See "Integration testing" below. | | **manifest validation** | Validates `k8s/*.yaml` against the Kubernetes schema with `kubeconform`. | ### Running locally @@ -50,6 +51,29 @@ pytest tests/test_container_smoke.py -m smoke -v ruff check src/ tests/ ``` +### Integration testing + +The integration harness (`scripts/run-integration.sh`) brings up a full Docker Compose stack — EMQX + floodgate + an ExHook auto-registration container + meshtasticd in SimRadio mode + a Python test-driver — on an isolated bridge network and runs end-to-end checks for `drop`, `zerohop`, `passthru`, `noop`, `custom-key channel passthru`, and a meshtasticd round-trip. + +Each case verifies BOTH what the subscriber received (delivered MQTT bytes) AND floodgate's `/health` stats — a behavior change with no stat increment, or a stat increment with no delivery effect, both fail the case. One PASS or FAIL line is printed per case. + +Requirements: `docker` and `bash`. The `pytest` suite never runs the harness — it's opt-in via the script. + +```bash +# One-shot verification — brings the stack up, runs cases, tears down, exits 0/non-zero. +./scripts/run-integration.sh + +# Ad-hoc poking — leave the stack running after the cases finish. +./scripts/run-integration.sh --keep +# floodgate /health: http://localhost:18089/health +# EMQX dashboard: http://localhost:18083 (admin / public) + +# Tear the stack down (and volumes/network) without running cases. +./scripts/run-integration.sh --teardown +``` + +CI runs the same script in the `integration` job after the `smoke` job passes. A failed case dumps service logs into the workflow output before tearing down. + ## Commit style Conventional commits: `feat:`, `fix:`, `docs:`, `test:`, `chore:` From fb1666422a526159c5d2ec30c673627eee862126 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:55:25 +0000 Subject: [PATCH 17/21] docs: reference integration harness in CLAUDE.md --- CLAUDE.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index e18422b..c39eddc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -24,6 +24,9 @@ Gateway → EMQX → [ExHook gRPC] → floodgate → drop / modify / passthru | `src/floodgate/health.py` | HTTP health check server on `health_port`. | | `src/floodgate/__main__.py` | CLI entry point. | | `proto/emqx/exhook.proto` | EMQX ExHook interface definition. | +| `docker-compose.test.yaml` | Integration test stack (emqx, floodgate, exhook-init, meshtasticd, meshtasticd-init, test-driver) on an isolated bridge network. | +| `scripts/run-integration.sh` | Integration harness orchestrator — `--keep` leaves the stack up, `--teardown` removes it. | +| `tests/integration/` | Integration test assets: floodgate config, ExHook init container, meshtasticd init sidecar, test-driver image + cases. | ## Dev Setup @@ -35,6 +38,7 @@ cd floodgate pip install -e ".[dev]" pytest tests/ --ignore=tests/test_container_smoke.py -q # no Docker required pytest tests/ -q # full suite including container smoke test (requires Docker) +./scripts/run-integration.sh # full Docker Compose end-to-end test (requires Docker) ``` Routing-logic tests mock the low-level zerohop functions, so the suite runs From efd472d3fb26b6c26b8f5a182852962d36b02654 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 04:59:12 +0000 Subject: [PATCH 18/21] test: address pre-PR code review on integration harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - register.sh: add explicit timeout-exit on the EMQX wait loop so a never-up broker fails fast and clearly, matching the pattern in init.sh and run-integration.sh - docker-compose.test.yaml: test-driver now depends on meshtasticd-init completing successfully; the meshtasticd-roundtrip case no longer relies on emergent ordering plus the runner script's sleep - CONTRIBUTING.md: CI job count was stale (four → five) after adding the integration job --- CONTRIBUTING.md | 2 +- docker-compose.test.yaml | 2 ++ tests/integration/exhook-init/register.sh | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3dfdde0..0e99ac8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ All PRs are squash-merged. One PR per feature or fix. ## CI jobs -Every PR and push to `main` runs four jobs in sequence: +Every PR and push to `main` runs five jobs in sequence: | Job | What it checks | |-----|----------------| diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index 4f27613..0a1de3e 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -97,6 +97,8 @@ services: condition: service_healthy exhook-init: condition: service_completed_successfully + meshtasticd-init: + condition: service_completed_successfully profiles: ["driver"] environment: EMQX_HOST: "emqx" diff --git a/tests/integration/exhook-init/register.sh b/tests/integration/exhook-init/register.sh index 90bb8db..687488f 100755 --- a/tests/integration/exhook-init/register.sh +++ b/tests/integration/exhook-init/register.sh @@ -18,6 +18,7 @@ for i in $(seq 1 60); do break fi sleep 1 + [ "$i" -eq 60 ] && { echo "exhook-init: EMQX REST never came up" >&2; exit 1; } done echo "exhook-init: logging in" From 86fc61335d37b0ef9c312c6d2c03afdd8706ea60 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 05:26:06 +0000 Subject: [PATCH 19/21] fix: meshtasticd image is on Docker Hub, override sh-wrapped CMD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI's first run of the integration job aborted on `docker compose pull`: ghcr.io/meshtastic/meshtasticd does not exist (NAME_UNKNOWN); the official image is published to Docker Hub at meshtastic/meshtasticd. The image also has no ENTRYPOINT and a sh-wrapped CMD (`sh -cx 'meshtasticd --fsdir=/var/lib/meshtasticd'`), so passing `command: ["-s"]` does not append to the existing invocation — it replaces the whole CMD with `-s` and Docker tries to exec `-s` directly. Override entrypoint + command together to invoke `meshtasticd -s --fsdir=/var/lib/meshtasticd` cleanly. Verified locally: container starts, prints "Running in simulated mode" and "API server listen on TCP port 4403". --- docker-compose.test.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index 0a1de3e..3fa67d8 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -42,12 +42,15 @@ services: start_period: 5s meshtasticd: - image: ghcr.io/meshtastic/meshtasticd:latest + image: meshtastic/meshtasticd:latest container_name: floodgate-test-meshtasticd networks: [floodgate-test-net] - # Append "-s" to the upstream entrypoint to run in SimRadio mode (no LoRa - # hardware needed). Matches the firmware project's own CI pattern. - command: ["-s"] + # The upstream image's default CMD is sh-wrapped (`sh -cx 'meshtasticd + # --fsdir=...'`), so we override entrypoint + command together to inject + # `-s` (SimRadio mode — no LoRa hardware). Matches the firmware project's + # own CI pattern (firmware/.github/workflows/test_native.yml). + entrypoint: ["meshtasticd"] + command: ["-s", "--fsdir=/var/lib/meshtasticd"] depends_on: emqx: condition: service_healthy From 8a811e7c35cdda6c6d804c8657f01ad04bf241a5 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 06:13:27 +0000 Subject: [PATCH 20/21] fix: meshtasticd-init CLI invocation and runner log dump on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI exposed two bugs: 1. init.sh used the wrong meshtastic CLI syntax. The CLI rejects --port 4403 (that flag is for serial ports) and accepts only flat dotted preference paths like mqtt.enabled — not the protobuf-nested form moduleConfig.mqtt.enabled. Verified locally: the corrected form ('--host HOST:PORT' + '--set mqtt.') connects, prints "Set mqtt. to " for every key, and exits 0. 2. run-integration.sh skipped the compose-logs dump on the explicit- failure path (when 'docker compose run test-driver' returned non-zero). The workflow's 'if: failure()' log-dump step fires AFTER the runner's teardown, so all containers were already gone and the workflow log dump was empty. Now the runner dumps service logs before teardown whenever the test-driver returned non-zero. --- scripts/run-integration.sh | 8 ++++++++ tests/integration/meshtasticd/init.sh | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/scripts/run-integration.sh b/scripts/run-integration.sh index 331fed2..a4b8ecf 100755 --- a/scripts/run-integration.sh +++ b/scripts/run-integration.sh @@ -96,5 +96,13 @@ if [ "$mode" = "run-and-keep" ]; then fi trap - EXIT + +if [ $rc -ne 0 ]; then + # Dump service logs BEFORE teardown on the explicit-failure path. Without + # this, the workflow's `if: failure()` log-dump step fires after teardown + # finishes — by then all containers are gone and nothing remains to log. + echo "==> Run failed — dumping service logs before teardown" + docker compose -f "$COMPOSE_FILE" logs --no-color --tail=400 || true +fi teardown exit $rc diff --git a/tests/integration/meshtasticd/init.sh b/tests/integration/meshtasticd/init.sh index fa80f15..c583a71 100755 --- a/tests/integration/meshtasticd/init.sh +++ b/tests/integration/meshtasticd/init.sh @@ -23,19 +23,22 @@ done sleep 3 echo "meshtasticd-init: configuring MQTT module" -meshtastic --host "${HOST}" --port "${PORT}" \ - --set moduleConfig.mqtt.enabled true \ - --set moduleConfig.mqtt.address "${EMQX_ADDR}" \ - --set moduleConfig.mqtt.root msh \ - --set moduleConfig.mqtt.tls_enabled false \ - --set moduleConfig.mqtt.encryption_enabled true \ - --set moduleConfig.mqtt.json_enabled false +# The meshtastic CLI uses flat dotted preference paths (mqtt.enabled, +# mqtt.address, ...) — not the protobuf-nested form (moduleConfig.mqtt.*). +# --port means *serial* port; for TCP we pass host:port via --host. +meshtastic --host "${HOST}:${PORT}" \ + --set mqtt.enabled true \ + --set mqtt.address "${EMQX_ADDR}" \ + --set mqtt.root msh \ + --set mqtt.tls_enabled false \ + --set mqtt.encryption_enabled true \ + --set mqtt.json_enabled false # Let the firmware reconnect to the broker with the new settings sleep 5 echo "meshtasticd-init: sending probe text message" -meshtastic --host "${HOST}" --port "${PORT}" \ +meshtastic --host "${HOST}:${PORT}" \ --sendtext "floodgate-roundtrip-probe" echo "meshtasticd-init: done" From 0610d4b32dc1c6478a3c273a75d1f24174fb94a7 Mon Sep 17 00:00:00 2001 From: Eric Becker Date: Sat, 2 May 2026 06:33:33 +0000 Subject: [PATCH 21/21] test: drop meshtasticd from integration harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The meshtasticd-init configuration path is fundamentally broken inside the upstream image: any MQTT module config change triggers the firmware to schedule "Reboot in 7 seconds", which calls execv() on itself — and the exec fails inside the container ("execv() returned -1! No such file or directory"). Reproduced locally with a reachable broker. Working around it would require either pre-baking a /prefs/module.proto binary protobuf or fighting Docker restart policies plus reboot timing windows, both of which violate KISS. The five crafted test cases (zerohop, drop, passthru, noop, custom-key passthru) already exercise floodgate end-to-end with real Meshtastic ServiceEnvelope protobufs — built with the same `meshtastic` Python library the firmware uses internally. Wire format is identical; meshtasticd-in-the-loop wasn't testing anything the crafted cases don't already test, just adding CI flakiness. Removed: - meshtasticd + meshtasticd-init services from compose - tests/integration/meshtasticd/ directory - case_meshtasticd_roundtrip from run.py - "Letting meshtasticd settle" sleep from runner script - meshtasticd mentions from CONTRIBUTING.md and CLAUDE.md --- CLAUDE.md | 4 +- CONTRIBUTING.md | 4 +- docker-compose.test.yaml | 36 --------------- scripts/run-integration.sh | 3 -- tests/integration/meshtasticd/Dockerfile.init | 11 ----- tests/integration/meshtasticd/init.sh | 44 ------------------- tests/integration/test-driver/run.py | 28 ------------ 7 files changed, 4 insertions(+), 126 deletions(-) delete mode 100644 tests/integration/meshtasticd/Dockerfile.init delete mode 100755 tests/integration/meshtasticd/init.sh diff --git a/CLAUDE.md b/CLAUDE.md index c39eddc..501edd7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -24,9 +24,9 @@ Gateway → EMQX → [ExHook gRPC] → floodgate → drop / modify / passthru | `src/floodgate/health.py` | HTTP health check server on `health_port`. | | `src/floodgate/__main__.py` | CLI entry point. | | `proto/emqx/exhook.proto` | EMQX ExHook interface definition. | -| `docker-compose.test.yaml` | Integration test stack (emqx, floodgate, exhook-init, meshtasticd, meshtasticd-init, test-driver) on an isolated bridge network. | +| `docker-compose.test.yaml` | Integration test stack (emqx, floodgate, exhook-init, test-driver) on an isolated bridge network. | | `scripts/run-integration.sh` | Integration harness orchestrator — `--keep` leaves the stack up, `--teardown` removes it. | -| `tests/integration/` | Integration test assets: floodgate config, ExHook init container, meshtasticd init sidecar, test-driver image + cases. | +| `tests/integration/` | Integration test assets: floodgate config, ExHook init container, test-driver image + cases. | ## Dev Setup diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0e99ac8..f2460d5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,7 +22,7 @@ Every PR and push to `main` runs five jobs in sequence: | **lint** | `ruff` style and import checks | | **unit tests** | Pure Python tests across Python 3.11/3.12/3.13 — no external services needed. CI generates the Meshtastic protobuf stubs before running so the unmocked protobuf payload tests in `tests/payloads/protobuf/` are exercised. Mocked tests in the rest of the suite still run without protobufs (handy for fast local iteration). | | **container smoke** | Builds the Docker image, starts the container, and verifies `/health` returns `200 OK`. Catches Dockerfile bugs and runtime import errors that unit tests cannot. | -| **integration** | Brings up `docker-compose.test.yaml` (EMQX + floodgate + meshtasticd + test-driver) and runs `drop` / `zerohop` / `passthru` / `noop` / `custom-key passthru` / meshtasticd round-trip end-to-end. See "Integration testing" below. | +| **integration** | Brings up `docker-compose.test.yaml` (EMQX + floodgate + test-driver) and runs `drop` / `zerohop` / `passthru` / `noop` / `custom-key passthru` end-to-end. See "Integration testing" below. | | **manifest validation** | Validates `k8s/*.yaml` against the Kubernetes schema with `kubeconform`. | ### Running locally @@ -53,7 +53,7 @@ ruff check src/ tests/ ### Integration testing -The integration harness (`scripts/run-integration.sh`) brings up a full Docker Compose stack — EMQX + floodgate + an ExHook auto-registration container + meshtasticd in SimRadio mode + a Python test-driver — on an isolated bridge network and runs end-to-end checks for `drop`, `zerohop`, `passthru`, `noop`, `custom-key channel passthru`, and a meshtasticd round-trip. +The integration harness (`scripts/run-integration.sh`) brings up a full Docker Compose stack — EMQX + floodgate + an ExHook auto-registration container + a Python test-driver — on an isolated bridge network and runs end-to-end checks for `drop`, `zerohop`, `passthru`, `noop`, and `custom-key channel passthru`. The test-driver crafts real Meshtastic `ServiceEnvelope` protobufs (using the same `meshtastic` Python library the firmware uses internally), so each case exercises the exact wire format floodgate sees in production. Each case verifies BOTH what the subscriber received (delivered MQTT bytes) AND floodgate's `/health` stats — a behavior change with no stat increment, or a stat increment with no delivery effect, both fail the case. One PASS or FAIL line is printed per case. diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index 3fa67d8..c715c6a 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -41,40 +41,6 @@ services: retries: 12 start_period: 5s - meshtasticd: - image: meshtastic/meshtasticd:latest - container_name: floodgate-test-meshtasticd - networks: [floodgate-test-net] - # The upstream image's default CMD is sh-wrapped (`sh -cx 'meshtasticd - # --fsdir=...'`), so we override entrypoint + command together to inject - # `-s` (SimRadio mode — no LoRa hardware). Matches the firmware project's - # own CI pattern (firmware/.github/workflows/test_native.yml). - entrypoint: ["meshtasticd"] - command: ["-s", "--fsdir=/var/lib/meshtasticd"] - depends_on: - emqx: - condition: service_healthy - floodgate: - condition: service_healthy - exhook-init: - condition: service_completed_successfully - restart: "no" - - meshtasticd-init: - build: - context: tests/integration/meshtasticd - dockerfile: Dockerfile.init - container_name: floodgate-test-meshtasticd-init - networks: [floodgate-test-net] - depends_on: - meshtasticd: - condition: service_started - environment: - MESHTASTICD_HOST: "meshtasticd" - MESHTASTICD_PORT: "4403" - EMQX_ADDR: "emqx:1883" - restart: "no" - exhook-init: build: context: tests/integration/exhook-init @@ -100,8 +66,6 @@ services: condition: service_healthy exhook-init: condition: service_completed_successfully - meshtasticd-init: - condition: service_completed_successfully profiles: ["driver"] environment: EMQX_HOST: "emqx" diff --git a/scripts/run-integration.sh b/scripts/run-integration.sh index a4b8ecf..56b7292 100755 --- a/scripts/run-integration.sh +++ b/scripts/run-integration.sh @@ -75,9 +75,6 @@ for i in $(seq 1 60); do [ "$i" -eq 60 ] && { echo "floodgate /health never came up" >&2; exit 1; } done -echo "==> Letting meshtasticd settle" -sleep 8 - echo "==> Running test-driver" set +e docker compose -f "$COMPOSE_FILE" run --rm test-driver diff --git a/tests/integration/meshtasticd/Dockerfile.init b/tests/integration/meshtasticd/Dockerfile.init deleted file mode 100644 index 21b864a..0000000 --- a/tests/integration/meshtasticd/Dockerfile.init +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.13-slim - -RUN apt-get update \ - && apt-get install -y --no-install-recommends netcat-openbsd \ - && rm -rf /var/lib/apt/lists/* \ - && pip install --no-cache-dir "meshtastic>=2.5,<3" - -COPY init.sh /init.sh -RUN chmod +x /init.sh - -ENTRYPOINT ["/init.sh"] diff --git a/tests/integration/meshtasticd/init.sh b/tests/integration/meshtasticd/init.sh deleted file mode 100755 index c583a71..0000000 --- a/tests/integration/meshtasticd/init.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash -# Configure MQTT on the meshtasticd container after it is up. MQTT lives in -# ModuleConfig protobuf on the firmware, not in the YAML config file — -# upstream firmware's own CI does the same: start meshtasticd -s, then poke -# settings over the TCP API on port 4403. - -set -euo pipefail - -HOST="${MESHTASTICD_HOST:-meshtasticd}" -PORT="${MESHTASTICD_PORT:-4403}" -EMQX_ADDR="${EMQX_ADDR:-emqx:1883}" - -echo "meshtasticd-init: waiting for ${HOST}:${PORT}" -for i in $(seq 1 60); do - if nc -z "${HOST}" "${PORT}" 2>/dev/null; then - echo "meshtasticd-init: TCP API ready after ${i}s"; break - fi - sleep 1 - [ $i -eq 60 ] && { echo "meshtasticd-init: ${HOST}:${PORT} never came up" >&2; exit 1; } -done - -# Brief settle so firmware finishes its own init before we start poking config -sleep 3 - -echo "meshtasticd-init: configuring MQTT module" -# The meshtastic CLI uses flat dotted preference paths (mqtt.enabled, -# mqtt.address, ...) — not the protobuf-nested form (moduleConfig.mqtt.*). -# --port means *serial* port; for TCP we pass host:port via --host. -meshtastic --host "${HOST}:${PORT}" \ - --set mqtt.enabled true \ - --set mqtt.address "${EMQX_ADDR}" \ - --set mqtt.root msh \ - --set mqtt.tls_enabled false \ - --set mqtt.encryption_enabled true \ - --set mqtt.json_enabled false - -# Let the firmware reconnect to the broker with the new settings -sleep 5 - -echo "meshtasticd-init: sending probe text message" -meshtastic --host "${HOST}:${PORT}" \ - --sendtext "floodgate-roundtrip-probe" - -echo "meshtasticd-init: done" diff --git a/tests/integration/test-driver/run.py b/tests/integration/test-driver/run.py index caa66e9..f7d2e8a 100644 --- a/tests/integration/test-driver/run.py +++ b/tests/integration/test-driver/run.py @@ -370,33 +370,6 @@ def case_custom_key_passthru(pub: Publisher, sub: Subscriber) -> Outcome: return Outcome(name) -def case_meshtasticd_roundtrip(_pub: Publisher, sub: Subscriber) -> Outcome: - """Verify the meshtasticd container's traffic transits floodgate to subscribers. - - The meshtasticd-init sidecar configures MQTT and sends one probe text - message; meshtasticd also publishes its own NodeInfo on first MQTT - connection. We wait up to 60s for at least one msh/<...>/e/<...> message - whose packet_id is OUTSIDE our crafted 0xAxxxxxxx range — i.e. came - from the daemon, not from our Publisher. - """ - name = "meshtasticd-roundtrip" - deadline = time.monotonic() + 60 - while time.monotonic() < deadline: - for m in sub.snapshot(): - if "/e/" not in m.topic: - continue - pid = _packet_id_of(m.payload) - if pid is None: - continue - if 0xA0000000 <= pid <= 0xAFFFFFFF: - continue # one of our crafted test packets - return Outcome(name) - time.sleep(1.0) - return Outcome(name, False, - "no organic meshtasticd traffic seen in 60s — meshtasticd-init " - "may have failed to enable MQTT (check `docker compose logs meshtasticd-init`)") - - def run_all() -> int: sub = Subscriber(EMQX_HOST, EMQX_PORT) sub.start() @@ -408,7 +381,6 @@ def run_all() -> int: case_passthru(pub, sub), case_noop(pub, sub), case_custom_key_passthru(pub, sub), - case_meshtasticd_roundtrip(pub, sub), ] for o in outcomes: print(o.line(), flush=True)