From 9534c302c253c62d58f93c69824b7c2e131b0edf Mon Sep 17 00:00:00 2001 From: Lars Kroehl Date: Thu, 21 May 2026 01:00:40 +0200 Subject: [PATCH] =?UTF-8?q?feat(db):=20discovery=5Fsnapshots=20table=20?= =?UTF-8?q?=E2=80=94=20daily=20Discovery-Tracking=20storage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migration per Discovery-Tracking-Baseline SPEC (PR #54 merged 2026-05-20) §3.5 + §4 + §6 P2. Schema: - BIGSERIAL primary key - DATE-unique (one row per day; UPSERT-safe) - JSONB payload (schema-flexible — V1 fields can grow without ALTER TABLE) - source_run_status enum (ok/partial/failed) for daily-cron-health visibility Idempotent (IF NOT EXISTS on table + index). Baseline row for 2026-05-21 will be INSERTed manually post-migration-apply (SPEC §3.6 — Pflicht-Termin heute Abend für Delta-Messbarkeit morgen). Cron-Script (scripts/discovery_snapshot.py) folgt in P3. --- .../2026-05-21_create_discovery_snapshots.sql | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 migrations/2026-05-21_create_discovery_snapshots.sql diff --git a/migrations/2026-05-21_create_discovery_snapshots.sql b/migrations/2026-05-21_create_discovery_snapshots.sql new file mode 100644 index 0000000..ff5c8c8 --- /dev/null +++ b/migrations/2026-05-21_create_discovery_snapshots.sql @@ -0,0 +1,30 @@ +-- Discovery-Tracking-Baseline P2 (SPEC docs/specs/2026-05-21_discovery-tracking-baseline-SPEC.md §3.5 + §4) +-- +-- Daily snapshot table for Discovery surfaces: +-- - GSC indexed URLs / impressions / clicks / crawl frequency +-- - nginx bot-hits per User-Agent × endpoint-class +-- - GitHub Stars/Forks/Clones/Views for MoltyCel/* repos +-- - Self-Probe pass/fail (sitemap.xml, llms.txt, /guard/openapi.json, /extendedAgentCard) +-- +-- Schema: BIGSERIAL pk + DATE-unique + JSONB payload (schema-flexible for V1 +-- iteration without ALTER TABLE migrations). +-- +-- Run via P3 cron 00:30 UTC daily (scripts/discovery_snapshot.py — separate PR). +-- Baseline row for 2026-05-21 inserted manually after this migration applies. + +CREATE TABLE IF NOT EXISTS discovery_snapshots ( + id BIGSERIAL PRIMARY KEY, + snapshot_at DATE NOT NULL UNIQUE, + generated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + payload JSONB NOT NULL, + source_run_status TEXT NOT NULL DEFAULT 'ok' + CHECK (source_run_status IN ('ok','partial','failed')) +); + +CREATE INDEX IF NOT EXISTS idx_discovery_snapshots_at + ON discovery_snapshots(snapshot_at DESC); + +COMMENT ON TABLE discovery_snapshots IS + 'Daily Discovery-Tracking snapshots. One row per day. payload-JSONB shape per SPEC §3.5.'; +COMMENT ON COLUMN discovery_snapshots.source_run_status IS + 'ok = all 5 sources captured · partial = some sources failed (see payload.errors) · failed = none captured';