From 08cceeaa6fa1a452cd4c9c7989ac5dd67eaf3f12 Mon Sep 17 00:00:00 2001 From: spideystreet Date: Sat, 2 May 2026 13:24:35 +0200 Subject: [PATCH 1/2] feat(api): add postgres-backed api_db tests and CI job - Add tests/api_db with database marker; skip locally when DATABASE_URL is unset - GHA postgres-db job: pgvector, prisma migrate deploy + seed, pytest tests/api_db - LINKER_SKIP_SEMANTIC_INIT skips sentence-transformers in FastAPI lifespan - Consolidate SQL result mapping in row_mapping module - make test-database target; default Postgres bind to loopback via POSTGRES_BIND_ADDR - Host Dagster dev uses workspace.host.yaml; document verification tiers (AGENTS.md, README) --- .env.example | 4 + .github/workflows/quality-checks.yml | 69 +++++ .gitignore | 1 + AGENTS.md | 44 ++- Makefile | 10 +- README.md | 2 + docker-compose.override.yml | 4 +- ...2026-05-01-quality-architecture-backlog.md | 280 ++++++++++++++++++ pyproject.toml | 1 + src/services/api/main.py | 10 +- src/services/api/routes/projects.py | 27 +- src/services/api/routes/recommendations.py | 5 +- src/services/api/routes/references.py | 14 +- src/services/api/row_mapping.py | 28 ++ tests/api_db/__init__.py | 0 tests/api_db/conftest.py | 19 ++ tests/api_db/test_health_live.py | 8 + tests/api_db/test_references_live.py | 13 + tests/conftest.py | 20 +- tests/unit/test_api_lifespan_semantic_skip.py | 52 ++++ tests/unit/test_core_ml_embed_users.py | 69 +++++ ...test_raw_github_extract_projects_guards.py | 31 ++ workspace.host.yaml | 5 + 23 files changed, 674 insertions(+), 42 deletions(-) create mode 100644 docs/audits/2026-05-01-quality-architecture-backlog.md create mode 100644 src/services/api/row_mapping.py create mode 100644 tests/api_db/__init__.py create mode 100644 tests/api_db/conftest.py create mode 100644 tests/api_db/test_health_live.py create mode 100644 tests/api_db/test_references_live.py create mode 100644 tests/unit/test_api_lifespan_semantic_skip.py create mode 100644 tests/unit/test_core_ml_embed_users.py create mode 100644 tests/unit/test_raw_github_extract_projects_guards.py create mode 100644 workspace.host.yaml diff --git a/.env.example b/.env.example index 868008bc..0b94ba19 100644 --- a/.env.example +++ b/.env.example @@ -10,6 +10,10 @@ POSTGRES_PASSWORD="" POSTGRES_DB="" POSTGRES_PORT="" POSTGRES_HOST="localhost" + +# Postgres bind on host (compose dev db). Default 127.0.0.1 = loopback-only. Use 0.0.0.0 for LAN/Tailscale DBeaver (trusted networks). +# POSTGRES_BIND_ADDR=127.0.0.1 + DATABASE_URL="postgresql://:@:/" # Dagster (local: absolute path to .../dagster_home; Docker: /app/dagster_home) diff --git a/.github/workflows/quality-checks.yml b/.github/workflows/quality-checks.yml index 8a993272..e1f1c389 100644 --- a/.github/workflows/quality-checks.yml +++ b/.github/workflows/quality-checks.yml @@ -21,6 +21,7 @@ jobs: prisma_schema: ${{ steps.filter.outputs.prisma_schema }} ost_docs_paths: ${{ steps.filter.outputs.ost_docs_paths }} dagster_cfg: ${{ steps.filter.outputs.dagster_cfg }} + postgres_suite: ${{ steps.filter.outputs.postgres_suite }} steps: - name: Checkout uses: actions/checkout@v4 @@ -60,6 +61,11 @@ jobs: - 'scripts/docker-entrypoint.sh' prisma_schema: - 'prisma/**' + postgres_suite: + - 'prisma/**' + - 'tests/api_db/**' + - 'tests/conftest.py' + - '.github/workflows/quality-checks.yml' ost_docs_paths: - 'ost-docs/**' - '.gitmodules' @@ -114,6 +120,69 @@ jobs: mkdir -p "$DAGSTER_STORAGE_DIR" "$DAGSTER_LOGS_DIR" uv run pytest -m integration -k test_dagster_startup --no-cov + postgres-db: + needs: changes + if: >- + github.event_name == 'push' + || needs.changes.outputs.workflows == 'true' + || needs.changes.outputs.postgres_suite == 'true' + || needs.changes.outputs.python == 'true' + || needs.changes.outputs.prisma_schema == 'true' + runs-on: ubuntu-latest + services: + postgres: + image: ankane/pgvector:v0.4.1 + env: + POSTGRES_USER: linker_ci + POSTGRES_PASSWORD: linker_ci + POSTGRES_DB: linker_ci + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U linker_ci -d linker_ci" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + env: + DATABASE_URL: postgresql://linker_ci:linker_ci@localhost:5432/linker_ci + LINKER_SKIP_SEMANTIC_INIT: "true" + OST_LINKER_REQUIRE_SERVICE_TOKEN: "false" + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install Python deps + run: uv sync --frozen + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: npm + cache-dependency-path: package-lock.json + + - name: Install Node deps for Prisma CLI + seed + run: npm ci + + - name: Deploy migrations + run: npx prisma migrate deploy + + - name: Seed taxonomy rows + run: ./node_modules/.bin/ts-node --compiler-options '{"module":"CommonJS"}' prisma/seed/seed.ts + + - name: Database-tier pytest + run: uv run pytest tests/api_db --no-cov -v --tb=short + dbt-check: needs: changes if: >- diff --git a/.gitignore b/.gitignore index 78bb6ff9..87423e24 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,7 @@ tmp_dagster/ # Maintainer-only / local audit notes (do not commit) docs/READINESS-AUDIT.md +docs/audit # Local .actrc diff --git a/AGENTS.md b/AGENTS.md index 212f1ec1..f15292a7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,7 +28,7 @@ If `npx ts-node` fails on your Node version, use the `ts-node` line above from t ### Python / Dagster ```bash uv sync # Install Python dependencies -dagster dev -h 0.0.0.0 -p 3000 # Run Dagster locally (outside Docker) +make dev # Dagster UI on :3000 (uses workspace.host.yaml) ``` ### REST API (FastAPI) @@ -38,6 +38,32 @@ pytest -m api # Run API tests ``` The API is a lightweight, read-only service consumed by the [ost-mcp](https://github.com/opensource-together/ost-mcp) MCP server. It exposes project search, similarity, trending recommendations, and reference data. +### FastAPI service token (`OST_LINKER_*`) + +Exact behavior (see `src/services/api/auth.py` and `lifespan` in `src/services/api/main.py`; covered by `pytest -m api` in `tests/api/test_service_token.py`): + +| `OST_LINKER_REQUIRE_SERVICE_TOKEN` | `OST_LINKER_SERVICE_TOKEN` | Protected routes (`/projects`, `/references`, `/recommendations`, …) | `/health` | +| ---------------------------------- | -------------------------- | -------------------------------------------------------------------- | --------- | +| `false` or unset | unset or empty | Open (no `X-Service-Token` required) | Open | +| `false` or unset | set | **401** unless `X-Service-Token` matches | Open | +| `true` | unset or empty | **Startup fails** (`RuntimeError` in lifespan) | n/a | +| `true` | set | **401** unless header matches | Open | + +**MCP-facing production:** set strict mode and a strong shared token; keep transport on a private network or TLS-terminated path so the header is not leaked. + +### Postgres host bind (dev override) + +Compose maps the dev database as `${POSTGRES_BIND_ADDR:-127.0.0.1}:${POSTGRES_PORT:-5433}:5432` (loopback-first by host port **5433** unless you override). Use `POSTGRES_BIND_ADDR=0.0.0.0` **only on trusted LANs** (e.g. DBeaver from another machine on Tailscale) and rely on `POSTGRES_PASSWORD` strength — see `.env.example`. + +### Dagster: Docker vs host + +- **Containers** use `-w /app/workspace.yaml` with `working_directory: /app` (bind-mounted tree). +- **Host** `make dev` uses `workspace.host.yaml` with `working_directory: .` so `src.linker.definitions` loads from your checkout. Keep both YAML files aligned if you rename modules. + +### Ingestion / Dagster regression coverage + +Not every ingestion asset ships full deterministic unit tests against Go binaries. After changing subprocess wiring (`raw_github__extract_projects`, trending, etc.), run a Dagster materialization smoke in dev or document manual rehearsal on the PR. + ### dbt Target `local` in `dbt/profiles.yml` uses `POSTGRES_HOST`, `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_PORT`, `POSTGRES_DB` (defaults **ci_user** / **ci_pass** / **5433** if unset — wrong for your Docker DB). **Load the repo `.env` before running dbt:** @@ -63,10 +89,22 @@ mypy src/ # Type check (strict mode) ```bash pytest # Run all tests (coverage included via --cov=src) pytest tests/test_foo.py -k test_bar # Run a single test -pytest -m unit # Run by marker (unit/integration/performance/api) +pytest -m unit # Run by marker (unit/integration/performance/api/database) pytest -m integration # Dagster startup smoke test +pytest -m database # Only `tests/api_db/` (requires DATABASE_URL; skipped if unset) ``` -`make ci-check` runs ruff (check + format), mypy, unit tests, API tests, and the Dagster smoke — aligned with `.github/workflows/quality-checks.yml`. +`make ci-check` runs ruff (check + format), mypy, unit tests, API tests, and the Dagster smoke — aligned with `.github/workflows/quality-checks.yml`. It does **not** run the Postgres tier; use **`make test-database`** when **`DATABASE_URL`** points at a migrated, seeded DB. + +#### Verification tiers (CI vs local Postgres) + +| Tier | Command | Needs | +| ---- | ------- | ----- | +| **Unit** | `pytest -m unit --cov-fail-under=50` | Python only | +| **API mocks** | `pytest -m api` | Python only (mocked DB + semantic) | +| **Integration (Dagster)** | `pytest -m integration -k test_dagster_startup --no-cov` | Dagster env dirs (see workflow) | +| **Database (`api_db`)** | `DATABASE_URL=... LINKER_SKIP_SEMANTIC_INIT=true make test-database` | Compose **db** (`ankane/pgvector` in docker-compose override), `npx prisma migrate deploy`, Prisma seed | + +**`LINKER_SKIP_SEMANTIC_INIT`** — When set to **`true`**, FastAPI skips loading **`sentence-transformers`** (used in **GitHub Actions `postgres-db`** and **`tests/api_db`**). Routes that call **`get_semantic()`** (e.g. **`/projects`** embedding search) stay untested in that mode. Test config is in `pyproject.toml` under `[tool.pytest.ini_options]`. Tests use class-based style (`class TestXxx`). diff --git a/Makefile b/Makefile index 0cbe0c29..a24dfcdd 100644 --- a/Makefile +++ b/Makefile @@ -5,9 +5,9 @@ setup: uv sync $(MAKE) build-go -## Dev — run Dagster dev server locally +## Dev — run Dagster dev server locally (host paths; see workspace.host.yaml) dev: - uv run dagster dev -h 0.0.0.0 -p 3000 + uv run dagster dev -h 0.0.0.0 -p 3000 -w workspace.host.yaml ## Test — run pytest with coverage test: @@ -62,6 +62,10 @@ ci-check: lint uv run pytest -m api --no-cov uv run pytest -m integration -k test_dagster_startup --no-cov +## Test-database — Postgres-backed FastAPI tier (DATABASE_URL required) +test-database: + uv run pytest tests/api_db --no-cov -v + ## Clean — remove Dagster storage and Python caches clean: bash scripts/clean_dagster.sh @@ -74,4 +78,4 @@ help: @echo "" @grep -E '^## ' $(MAKEFILE_LIST) | sed 's/## / /' -.PHONY: setup dev test lint format typecheck build-go docker-up docker-down db-init dbt-build clean help doctor ci-check +.PHONY: setup dev test lint format typecheck build-go docker-up docker-down db-init dbt-build clean help doctor ci-check test-database diff --git a/README.md b/README.md index 59a5536d..86d3c18e 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ make db-init # Prisma schema + seed make ci-check # Python parity with CI quality job (before a PR); full CI is broader — see AGENTS.md ``` +See [AGENTS.md](AGENTS.md) for **API service-token behavior**, **Postgres host bind**, and **Dagster host vs Docker workspaces** (`workspace.host.yaml`, `Makefile` **`make dev`**). + ## Contributing See [CONTRIBUTING.md](CONTRIBUTING.md) (branch flow, conventions, **`make ci-check`**). For command cheat-sheets (**dbt**, API, Docker overrides), see [AGENTS.md](AGENTS.md). diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 8713d2f8..5ebfb5a9 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -51,9 +51,9 @@ services: POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DB: ${POSTGRES_DB} - # Bind on all host interfaces so peers can reach Postgres via Tailscale or LAN (e.g. DBeaver from another Mac). Use a strong POSTGRES_PASSWORD. + # Default: loopback only (POSTGRES_BIND_ADDR=127.0.0.1). Set POSTGRES_BIND_ADDR=0.0.0.0 for LAN/Tailscale (trusted networks only). Use a strong POSTGRES_PASSWORD. ports: - - "${POSTGRES_PORT:-5433}:5432" + - "${POSTGRES_BIND_ADDR:-127.0.0.1}:${POSTGRES_PORT:-5433}:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: diff --git a/docs/audits/2026-05-01-quality-architecture-backlog.md b/docs/audits/2026-05-01-quality-architecture-backlog.md new file mode 100644 index 00000000..619eff09 --- /dev/null +++ b/docs/audits/2026-05-01-quality-architecture-backlog.md @@ -0,0 +1,280 @@ +# ost-linker — quality & architecture backlog (phase 1) + +**Audit execution date:** 2026-05-01 +**Design spec (workspace meta):** `docs/superpowers/specs/2026-05-01-ost-linker-quality-architecture-audit-design.md` + +## Evidence runs + +### `make ci-check` + +- _Command (from repo root `ost-linker/`):_ `cd /Users/spidey/Developer/git/Ost/ost-linker && make ci-check` (delegates per `Makefile` to `lint` → `uv run ruff format --check src/`, then `make typecheck`, then `uv run pytest -m unit --cov-fail-under=50`, `uv run pytest -m api --no-cov`, `uv run pytest -m integration -k test_dagster_startup --no-cov`) + +- _Exit code:_ **0** + +- _Result excerpt:_ + +``` +uv run ruff check src/ +All checks passed! +uv run ruff format --check src/ +46 files already formatted +... +uv run mypy src/ +Success: no issues found in 46 source files +... +uv run pytest -m unit --cov-fail-under=50 +... 132 passed, 51 deselected ... +Required test coverage of 50% reached. Total coverage: 60.17% +... +uv run pytest -m api --no-cov +====================== 50 passed, 133 deselected in 4.29s ====================== +uv run pytest -m integration -k test_dagster_startup --no-cov +====================== 1 passed, 182 deselected in 12.99s ======================= +``` + +*(Full transcript on audit machine: `/tmp/linker-ci-check.log`.)* + +### Go unit tests (`fetcher`) + +- _Command:_ `cd /Users/spidey/Developer/git/Ost/ost-linker/src/services/go/fetcher && go test ./...` + +- _Excerpt:_ `ok ost-fetcher 0.510s` — **exit 0** + +### Go unit tests (`scraper`) + +- _Command:_ `cd /Users/spidey/Developer/git/Ost/ost-linker/src/services/go/scraper && go test ./...` + +- _Excerpt:_ `ok github.com/opensource-together/ost-ai-engine/github-scraper 0.268s` — **exit 0** + +### Docker Compose + API `/health` + +- _Compose context:_ stack `ost-linker` was already running on audit host (`docker compose ps` from repo root). + +- _Probe:_ `curl -sS -w "\nHTTP_CODE:%{http_code}\n" "http://127.0.0.1:8010/health"` + *(Port **8010** matches `0.0.0.0:8010->8000/tcp` mapping for `ost-linker-api` — see `docker compose ps`.)* + +- _Excerpt / BLOCKED:_ + +``` +{"status":"ok"} +HTTP_CODE:200 +``` + +--- + +## Architecture inventory & layer map + +The `ost-linker` stack is **multi-process**: **Dagster** (`webserver` + `daemon`) runs the batch/ML pipeline; a **FastAPI** `api` service exposes the read-only HTTP surface consumed by **ost-mcp**; **Postgres + pgvector** is added in **`docker-compose.override.yml`** as `db` (dev) with `POSTGRES_PORT` defaulting to **5433** on the host unless overridden (this audit host used **5435**). Published ports follow env overrides: **`DAGSTER_HOST_PORT`** (here **3033→3000**), **`LINKER_API_HOST_PORT`** (**8010→8000**). `workspace.yaml` loads **`src.linker.definitions:defs`** with Docker-centric `working_directory: /app`, matching the image layout where `./scripts/init.sh` waits for Postgres, runs **dbt** parse/build for Dagster manifest materialization, and starts Dagster or **uvicorn**. The **api** container intentionally carries a **minimal env** (database URL, rate limit, service-token flags, OpenAPI toggle) and skips dbt init per `scripts/init.sh`. **Prisma** (`make db-init`, `prisma/schema.prisma`) and **dbt** (`dbt/`, `Settings.dbt_project_dir` in `src/linker/settings.py`) are primarily maintained on the **host** for schema push and analytics builds, while compose shares `dbt_target` with Dagster for manifest reuse. **Go** scraper/fetcher binaries are compiled to fixed paths (`GO_*_PATH` in compose `x-common-env`, e.g. `/usr/local/bin/ost-scraper`) for Dagster assets to shell out. + +--- + +## Vertical trace — MCP-oriented read path + +- **App bootstrap:** `src/services/api/main.py` builds `FastAPI` with **`lifespan`**: validates service-token config when `OST_LINKER_REQUIRE_SERVICE_TOKEN` is true, then **`init_db` + `init_semantic`** before serving. + +- **Public health:** `GET /health` is mounted from `routes/health.py` **without** `Depends(require_service_token)`; it runs `SELECT 1` via SQLAlchemy session (`Depends(get_db)`), so it reflects **DB** reachability, not just process liveness. + +- **Protected read APIs:** `references`, `projects`, and `recommendations` routers are included with **`dependencies=[Depends(require_service_token)]`**. `auth.require_service_token` compares `X-Service-Token` with `OST_LINKER_SERVICE_TOKEN` using `secrets.compare_digest`, but **returns early (allows)** when the expected token env var is **unset**—so misconfiguration can leave business routes open whenever strict mode is off. + +- **Docs / OpenAPI:** `_openapi_urls()` disables `/openapi.json`, `/docs`, `/redoc` when `API_ENABLE_OPENAPI` is false at import time (container start). + +- **Rate limits:** `slowapi` limiter is attached to `app.state` with a JSON 429 handler on `RateLimitExceeded` (per-route decorators; no global SlowAPI middleware). + +--- + +## Vertical trace — ingestion / warehouse + +- **Dagster registry:** `src/linker/definitions.py` composes **scraper** assets via `load_assets_from_modules` over modules such as `raw_github__extract_projects`, `raw_github__extract_trending`, and `core_github__fetch_*`, then registers **`@dbt_assets`** `dbt_project_assets` running `dbt build --indirect-selection cautious` through `DbtCliResource`, plus Python assets **`core_match__classify_projects`**, **`core_public__sync_projects`**, **`core_ml__embed_projects`**, **`core_ml__embed_users`**. + +- **Operational jobs/schedules:** `build_jobs()` exposes `cleanup_dagster_history_job`, `project_enrichment_job`, `run_all_job`, `user_recommendation_job`; `build_schedules()` wires enrichment and recommendation schedules (plus cleanup schedule). **Sensors list is empty** today. + +- **Warehouse contract:** dbt sources in `dbt/models/sources.yml` declare **freshness** and **meta.dagster.asset_key** links for `github` schema tables (e.g. `raw_github_project`) into Dagster groups—this is the documented bridge from raw ingestion tables to dbt/Dagster observability. + +- **Resources:** `build_resources()` binds `DATABASE_URL`, GitHub token, Go binary paths, LLM and embedding resources, dbt CLI, and Postgres pandas IO managers—so asset failures often chain from **env completeness** and **Go binary presence** more than from Python import errors. + +--- + +## Vertical trace — configuration glue + +- **Split deployment surface:** `docker-compose.yml` **`api`** service lists only DB + API tuning env vars, while **`x-common-env`** for Dagster services also carries `GITHUB_ACCESS_TOKEN`, `MISTRAL_API_KEY`, `GO_*_PATH`, `DBT_TARGET`, model paths, etc. `docker-compose.override.yml` rewrites `DATABASE_URL` to `postgresql://…@db:5432/…` for dev and bind-mounts `src/`, `dbt/`, `scripts/` into Dagster containers (api mounts **`src` only**). + +- **Role switch:** `DAGSTER_ROLE` is set to **`daemon`** for the daemon, **`api`** for FastAPI; `scripts/init.sh` branches: daemon may `dbt parse` if manifest missing; **api skips dbt entirely**. + +- **Host vs container dbt target:** Compose sets `DBT_TARGET: docker` in common env; local dbt guidance in `AGENTS.md` stresses loading `.env` and choosing `local` vs `docker` profiles—auditors should treat **profile/target mismatch** as a recurring footgun when reproducing pipeline issues. + +- **Dagster instance config:** Dev override bind-mounts `./dagster.yaml` into `/app/dagster_home/dagster.yaml` (SQLite storage paths via env); production is expected to diverge (see comments in override file). + +--- + +## Pillar review notes + +### pipeline-data + +- **Source freshness:** `dbt/models/sources.yml` defines **warn/error windows** across `github`, `match`, `ml`, and `public.Project` sources—healthy pattern; running `dbt source freshness` was **not executed** in this pass (would need credentialed DB + time); treat as optional follow-up when DB mirrors prod-like volume. + +- **Asset test coverage imbalance:** `make ci-check` unit coverage report shows **low line coverage** on several scraper and sync assets (`raw_github__extract_*`, `core_public__sync_projects`, README/topic/language fetch assets). That signals **pipeline regressions may slip** until integration runs or fuller unit suites exist. + +- **embed_users vs embed_projects:** `core_ml__embed_users.py` registers far fewer exercised lines under unit runs than `core_ml__embed_projects.py` — user embedding/reco path may be under-tested compared to projects. + +### ops-security + +- **Service token semantics:** With `OST_LINKER_REQUIRE_SERVICE_TOKEN=false` (compose default), **and** unset empty shared token behavior in `require_service_token`, business routes rely on network isolation—acceptable for MCP server-to-server if perimeter holds, but **easy to misunderstand** across environments. + +- **Postgres exposure (dev override):** `db` binds `${POSTGRES_PORT:-5433}:5432`; comment warns LAN exposure — ensure **staging/prod** compose files never copy this verbatim without controls. + +- **OpenAPI suppression:** Controlled by **`API_ENABLE_OPENAPI`** — confirm operators set **`false`** in environments that must not advertise schemas publicly. + +### maintainability + +- **Entrypoints verified (Makefile vs CI vs docs):** + + - `Makefile` **`ci-check`** runs: `uv run ruff check src/`; **`ci-check`** also invokes `uv run ruff format --check src/`; **`make typecheck`** → **`uv run mypy src/`**; **`pytest -m unit --cov-fail-under=50`**, **`pytest -m api --no-cov`**, **`pytest -m integration -k test_dagster_startup --no-cov`**. + + - Pytest **`markers`** in `pyproject.toml`: `unit`, `integration`, `performance`, `api` (strict-markers enabled). Only **one** integration test module is marked (`tests/integration/test_dagster_startup.py`), so **`ci-check` ≅ full integration suite** today. + + - `.github/workflows/quality-checks.yml` uses **`dorny/paths-filter`** to **scope PR jobs** versus **always running on push**; maintainers invoking only `make ci-check` locally should remember **path-filter semantics differ** from a full staging push pipeline. + +- **Dagster local vs compose:** Compose commands reference **`workspace.yaml`** with **`working_directory: /app`** (valid in-container). **`Makefile`** `dev` runs `uv run dagster dev -h 0.0.0.0 -p 3000` **without** `-w workspace.yaml`; developers may unknowingly diverge from container definitions—document explicitly or unify workspace selection. + +--- + +## Consolidated findings (paste as GitHub issues) + +Each finding follows the design-spec template. + +### FINDING-001 + +**Title:** Document or enforce clearer FastAPI service-token modes for MCP-facing deployments + +**Pillar:** ops-security + +**Severity:** P2 + +**Summary:** `require_service_token` **no-ops** when `OST_LINKER_SERVICE_TOKEN` is unset (`src/services/api/auth.py`), while `lifespan` enforces completeness only when `OST_LINKER_REQUIRE_SERVICE_TOKEN` is true. Operators can assume routes are authenticated whenever a token header is documented, yet remain fully open if env-based auth is accidentally disabled or unset in a reachable network. + +**Evidence:** + +- `src/services/api/auth.py` lines 11–13 return early when `expected` env is falsy. +- `src/services/api/main.py` includes protected routers behind `Depends(require_service_token)` and documents strict startup checks for mismatching strict flag + missing token (`lifespan` block). + +**Recommendation:** Extend `README.md` / `AGENTS.md` with an explicit truth table (**strict flag × token-present × caller headers**); optionally add **`pytest -m api`** coverage for default compose env vs strict env; consider **WARN log** once per process when routes are unsecured. + +**Acceptance criteria:** + +- [ ] Documented behavior matches code for dev/staging/production expectations. +- [ ] API tests pin at least **two** cases: strict-required failure at startup vs permissive unsecured mode with asserted 401 absent header when token configured. + +**Suggested phase:** `1-documentation-follow-up` (follow-on `2-tooling-follow-up` if WARN log/tests added). + +--- + +### FINDING-002 + +**Title:** Raise unit/integration coverage or add targeted pytest for ingestion and sync Dagster assets + +**Pillar:** pipeline-data secondary: maintainability + +**Severity:** P2 + +**Summary:** **`make ci-check`** shows **below-threshold confidence** on several ingestion-critical modules (scraper/raw extract assets, Postgres sync asset) versus well-covered IO and classification helpers. Pipeline refactors risk regression without narrower tests or recorded integration proofs. + +**Evidence:** + +- `make ci-check` coverage table excerpt (unit stage): scraper/sync assets logged **12–48% covered** branches (see `/tmp/linker-ci-check.log` names `raw_github__extract_projects.py`, `core_public__sync_projects.py`, helper fetch README/topics/languages). + +**Recommendation:** Add **`pytest`** units using existing patterns in `tests/unit/test_io_manager.py` / asset harnesses OR document mandatory manual Dagster runbook steps per release; prioritize **`core_public__sync_projects`** and **`raw_github__extract_projects`** smoke tests. + +**Acceptance criteria:** + +- [ ] Each prioritized asset carries either **automated regression tests** OR an **explicit waiver** documenting manual verification cadence owner. + +**Suggested phase:** `2-tooling-follow-up` + +--- + +### FINDING-003 + +**Title:** Clarify Postgres host port publishing policy for dev/staging/production compose variants + +**Pillar:** ops-security + +**Severity:** P2 + +**Summary:** Dev override binds Postgres on `${POSTGRES_PORT:-5433}:5432` with comment encouraging LAN access. This accelerates DX but **must not leak** to internet-adjacent environments without guardrails. + +**Evidence:** + +- `docker-compose.override.yml` `db.ports` and inline comment on bind scope. + +**Recommendation:** Add **compose profile** separating `desktop-dev` exposure vs **`loopback-only`** profile; mirror guidance in **`AGENTS.md`**. + +**Acceptance criteria:** + +- [ ] Default dev path documented; hardened variant exists or checklist calls out firewall expectations. + +**Suggested phase:** `1-documentation-follow-up` (profiles may elevate to tooling). + +--- + +### FINDING-004 + +**Title:** Align Dagster developer entrypoints (`make dev` vs `workspace.yaml` / compose) + +**Pillar:** maintainability + +**Severity:** P2 + +**Summary:** Containers start Dagster via **`workspace.yaml`** with **`working_directory: /app`**, tuned for Docker bind mounts under `/app/src`. **`Makefile`** `dev` invokes `uv run dagster dev` **without** explicitly binding that workspace file, risking divergent code-location resolution compared to orchestrated environments. + +**Evidence:** + +- `workspace.yaml` `working_directory` path. +- `Makefile` **`dev`** target command string (no `-w workspace.yaml`). +- `docker-compose.yml` webserver/daemon commands pass `-w /app/workspace.yaml`. + +**Recommendation:** Either document **authoritative** workflows (“always use compose for Dagster fidelity”) OR update **`make dev`** to pass `-w workspace.yaml` plus a **`workspace.local.yaml`** when `/app` is invalid on hosts (if Dagster rejects container path locally—validate on macOS/Linux fresh clones). + +**Acceptance criteria:** + +- [ ] Maintainer consensus recorded; instructions updated accordingly; optional smoke **`make dev`** + asset import captured in README. + +**Suggested phase:** `1-documentation-follow-up` + +--- + +### FINDING-005 + +**Title:** Strengthen regression tests around user embedding Dagster asset + +**Pillar:** pipeline-data + +**Severity:** P3 + +**Summary:** **`core_ml__embed_users`** shows thinner coverage than **`core_ml__embed_projects`** in **`make ci-check`** output—user personalization paths deserve parity if product roadmap stresses user-level vectors. + +**Evidence:** + +- Unit coverage excerpt in `/tmp/linker-ci-check.log` (`core_ml__embed_users.py` ~24% lines covered vs embed projects ~90%). + +**Recommendation:** Extend `tests/unit` with chunked streaming stubs similar to **`test_embed_projects_streaming`** for user batches. + +**Acceptance criteria:** + +- [ ] New tests fail on intentional regression stub and **`make ci-check` remains green.** + +**Suggested phase:** `2-tooling-follow-up` + +--- + +## Appendix — audit execution meta + +| Item | Value | +| ----- | ----- | +| `docker compose ps` snapshot | `api` healthy on `8010`, `webserver` on `3033`, `db` on `5435` (audit host overrides) | +| Full `make ci-check` log | `/tmp/linker-ci-check.log` on executor machine | + +--- + +## Subagent-driven development note + +**Task 1** followed the **implementer → spec review → backlog-quality review** checklist in-thread. **Tasks 2–14** were executed **sequentially by the orchestrating agent** to avoid **42** sub-agent round-trips while preserving checklist coverage—re-open any task for isolated subagent rerun if desired. diff --git a/pyproject.toml b/pyproject.toml index e682b5db..8d04f6dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -159,4 +159,5 @@ markers = [ "integration: Integration tests", "performance: Performance tests", "api: API tests", + "database: Tests requiring reachable PostgreSQL (DATABASE_URL); see AGENTS.md", ] diff --git a/src/services/api/main.py b/src/services/api/main.py index 79376fff..f895fb45 100644 --- a/src/services/api/main.py +++ b/src/services/api/main.py @@ -19,7 +19,7 @@ def _get_config() -> APIConfig: @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: - """Startup: init DB + load semantic search model. Shutdown: dispose DB.""" + """Startup: init DB; optionally load semantic model. Shutdown: dispose DB.""" config = _get_config() token_ok = config.service_token and config.service_token.strip() if config.require_service_token and not token_ok: @@ -30,7 +30,13 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: ) raise RuntimeError(msg) init_db(config.database_url) - init_semantic() + skip_semantic = os.environ.get("LINKER_SKIP_SEMANTIC_INIT", "").strip().lower() in ( + "1", + "true", + "yes", + ) + if not skip_semantic: + init_semantic() yield close_db() diff --git a/src/services/api/routes/projects.py b/src/services/api/routes/projects.py index f23b46ae..d60de95e 100644 --- a/src/services/api/routes/projects.py +++ b/src/services/api/routes/projects.py @@ -2,11 +2,11 @@ from fastapi import APIRouter, Depends, HTTPException, Query, Request from sqlalchemy import text -from sqlalchemy.engine import Result from sqlalchemy.orm import Session from src.services.api.dependencies import get_db, get_semantic from src.services.api.rate_limit import RATE_LIMIT, limiter +from src.services.api.row_mapping import mapping_row_first, mapping_rows from src.services.api.schemas import ProjectOut, ProjectSemanticOut, ProjectSimilarOut from src.services.api.semantic import SemanticSearchService @@ -15,15 +15,6 @@ MAX_LIMIT = 50 -def _rows(result: Result[Any]) -> list[dict[str, Any]]: - return [dict(row) for row in result.mappings().all()] - - -def _row_or_none(result: Result[Any]) -> dict[str, Any] | None: - row = result.mappings().first() - return dict(row) if row is not None else None - - @router.get("/search", response_model=list[ProjectOut]) @limiter.limit(RATE_LIMIT) def search_projects( @@ -70,7 +61,7 @@ def search_projects( params["limit"] = limit result = db.execute(text(query), params) - return _rows(result) + return mapping_rows(result) @router.get("/search-natural", response_model=list[ProjectSemanticOut]) @@ -133,7 +124,7 @@ def search_natural( params["limit"] = limit result = db.execute(text(sql), params) - return _rows(result) + return mapping_rows(result) @router.get("/{project_id}", response_model=ProjectOut) @@ -144,7 +135,7 @@ def get_project( db: Session = Depends(get_db), ) -> dict[str, Any]: """Get full project details by ID.""" - project = _row_or_none( + project = mapping_row_first( db.execute( text( """SELECT id, title, description, "repoUrl" AS repo_url, @@ -158,7 +149,7 @@ def get_project( if not project: raise HTTPException(status_code=404, detail="Project not found") - categories = _rows( + categories = mapping_rows( db.execute( text( """SELECT c.id, c.name FROM public."Category" c @@ -169,7 +160,7 @@ def get_project( ) ) - domains = _rows( + domains = mapping_rows( db.execute( text( """SELECT d.id, d.name FROM public."Domain" d @@ -180,7 +171,7 @@ def get_project( ) ) - tech_stacks = _rows( + tech_stacks = mapping_rows( db.execute( text( """SELECT ts.id, ts.name, ts."iconUrl" AS icon_url, @@ -208,7 +199,7 @@ def find_similar( db: Session = Depends(get_db), ) -> list[dict[str, Any]]: """Find similar projects using pgvector cosine similarity.""" - embedding = _row_or_none( + embedding = mapping_row_first( db.execute( text( """SELECT vector FROM ml.embd_github_project @@ -234,4 +225,4 @@ def find_similar( ), {"project_id": project_id, "limit": limit}, ) - return _rows(result) + return mapping_rows(result) diff --git a/src/services/api/routes/recommendations.py b/src/services/api/routes/recommendations.py index 6a070f41..92c97808 100644 --- a/src/services/api/routes/recommendations.py +++ b/src/services/api/routes/recommendations.py @@ -6,6 +6,7 @@ from src.services.api.dependencies import get_db from src.services.api.rate_limit import RATE_LIMIT, limiter +from src.services.api.row_mapping import mapping_rows from src.services.api.schemas import GithubTrendingProjectOut, TrendingProjectOut router = APIRouter(prefix="/recommendations") @@ -34,7 +35,7 @@ def get_github_trending( ), {"limit": limit}, ) - rows = [dict(row) for row in result.mappings().all()] + rows = mapping_rows(result) results = [] for row in rows: @@ -74,4 +75,4 @@ def get_trending( ), {"limit": limit}, ) - return [dict(row) for row in result.mappings().all()] + return mapping_rows(result) diff --git a/src/services/api/routes/references.py b/src/services/api/routes/references.py index 5d897bbf..62e7e760 100644 --- a/src/services/api/routes/references.py +++ b/src/services/api/routes/references.py @@ -1,27 +1,21 @@ -from typing import Any - from fastapi import APIRouter, Depends, Request from sqlalchemy import text -from sqlalchemy.engine import Result from sqlalchemy.orm import Session from src.services.api.dependencies import get_db from src.services.api.rate_limit import RATE_LIMIT, limiter +from src.services.api.row_mapping import mapping_rows from src.services.api.schemas import CategoryOut, DomainOut, TechStackOut router = APIRouter() -def _rows(result: Result[Any]) -> list[dict[str, Any]]: - return [dict(row) for row in result.mappings().all()] - - @router.get("/categories", response_model=list[CategoryOut]) @limiter.limit(RATE_LIMIT) def list_categories(request: Request, db: Session = Depends(get_db)) -> list[dict]: """List all project categories.""" result = db.execute(text('SELECT id, name FROM public."Category" ORDER BY name')) - return _rows(result) + return mapping_rows(result) @router.get("/domains", response_model=list[DomainOut]) @@ -29,7 +23,7 @@ def list_categories(request: Request, db: Session = Depends(get_db)) -> list[dic def list_domains(request: Request, db: Session = Depends(get_db)) -> list[dict]: """List all project domains.""" result = db.execute(text('SELECT id, name FROM public."Domain" ORDER BY name')) - return _rows(result) + return mapping_rows(result) @router.get("/techstacks", response_model=list[TechStackOut]) @@ -43,4 +37,4 @@ def list_techstacks(request: Request, db: Session = Depends(get_db)) -> list[dic ORDER BY name""" ) ) - return _rows(result) + return mapping_rows(result) diff --git a/src/services/api/row_mapping.py b/src/services/api/row_mapping.py new file mode 100644 index 00000000..71819dcf --- /dev/null +++ b/src/services/api/row_mapping.py @@ -0,0 +1,28 @@ +"""Normalize SQLAlchemy mapping rows for FastAPI/Pydantic (UUID → str).""" + +from typing import Any +from uuid import UUID + +from sqlalchemy.engine import Result + + +def mapping_rows(result: Result[Any]) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for row in result.mappings().all(): + d = dict(row) + for key, val in list(d.items()): + if isinstance(val, UUID): + d[key] = str(val) + rows.append(d) + return rows + + +def mapping_row_first(result: Result[Any]) -> dict[str, Any] | None: + row = result.mappings().first() + if row is None: + return None + d = dict(row) + for key, val in list(d.items()): + if isinstance(val, UUID): + d[key] = str(val) + return d diff --git a/tests/api_db/__init__.py b/tests/api_db/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/api_db/conftest.py b/tests/api_db/conftest.py new file mode 100644 index 00000000..ff6cb77d --- /dev/null +++ b/tests/api_db/conftest.py @@ -0,0 +1,19 @@ +import os + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture(scope="module") +def client_db() -> TestClient: + """FastAPI TestClient against real DATABASE_URL.""" + assert os.environ.get("DATABASE_URL"), ( + "DATABASE_URL must be set for database-tier tests." + ) + os.environ.setdefault("LINKER_SKIP_SEMANTIC_INIT", "true") + os.environ.setdefault("OST_LINKER_REQUIRE_SERVICE_TOKEN", "false") + + from src.services.api.main import app + + with TestClient(app) as client: + yield client diff --git a/tests/api_db/test_health_live.py b/tests/api_db/test_health_live.py new file mode 100644 index 00000000..5af2db9f --- /dev/null +++ b/tests/api_db/test_health_live.py @@ -0,0 +1,8 @@ +"""Live DB checks for FastAPI `/health`.""" + + +class TestHealthLive: + def test_health_returns_ok_with_real_db(self, client_db) -> None: + resp = client_db.get("/health") + assert resp.status_code == 200 + assert resp.json() == {"status": "ok"} diff --git a/tests/api_db/test_references_live.py b/tests/api_db/test_references_live.py new file mode 100644 index 00000000..7897de90 --- /dev/null +++ b/tests/api_db/test_references_live.py @@ -0,0 +1,13 @@ +"""Live taxonomy endpoints after prisma seed.""" + +import pytest + + +class TestReferencesTaxonomyLive: + @pytest.mark.parametrize("path", ["/categories", "/domains", "/techstacks"]) + def test_list_nonempty(self, client_db, path: str) -> None: + r = client_db.get(path) + assert r.status_code == 200, r.text + data = r.json() + assert isinstance(data, list) + assert len(data) >= 1, f"{path} expected seed data" diff --git a/tests/conftest.py b/tests/conftest.py index 1c4a3669..70fb35e9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,13 +1,29 @@ +import os + import pytest def pytest_collection_modifyitems(items: list[pytest.Item]) -> None: - """Auto-apply markers based on test directory.""" + """Auto-apply markers based on path; skip live DB suite when DATABASE_URL unset.""" + skip_live = pytest.mark.skip( + reason=( + "Database-tier tests skipped: export DATABASE_URL to a reachable Postgres " + "(see ost-linker AGENTS.md verification tiers)." + ), + ) + for item in items: path = str(item.fspath) + + if "/api_db/" in path: + item.add_marker(pytest.mark.database) + if "/unit/" in path: item.add_marker(pytest.mark.unit) elif "/integration/" in path: item.add_marker(pytest.mark.integration) - elif "/api/" in path: + elif "/api/" in path and "/api_db/" not in path: item.add_marker(pytest.mark.api) + + if "/api_db/" in path and not os.environ.get("DATABASE_URL"): + item.add_marker(skip_live) diff --git a/tests/unit/test_api_lifespan_semantic_skip.py b/tests/unit/test_api_lifespan_semantic_skip.py new file mode 100644 index 00000000..dcc3c905 --- /dev/null +++ b/tests/unit/test_api_lifespan_semantic_skip.py @@ -0,0 +1,52 @@ +"""Lifespan: LINKER_SKIP_SEMANTIC_INIT avoids eager semantic model load.""" + +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + + +class TestLifespanSemanticSkipEnv: + def test_when_skip_semantic_true_init_semantic_not_called( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv("LINKER_SKIP_SEMANTIC_INIT", "true") + monkeypatch.setenv("DATABASE_URL", "postgresql://ci:ci@127.0.0.1:5432/ci") + monkeypatch.setenv("OST_LINKER_REQUIRE_SERVICE_TOKEN", "false") + monkeypatch.delenv("OST_LINKER_SERVICE_TOKEN", raising=False) + + stub_semantic = MagicMock() + + with ( + patch("src.services.api.main.init_db"), + patch("src.services.api.main.init_semantic", stub_semantic), + patch("src.services.api.main.close_db"), + ): + from src.services.api.main import app + + with TestClient(app): + pass + + assert stub_semantic.call_count == 0 + + def test_when_skip_semantic_unset_init_semantic_called_once( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.delenv("LINKER_SKIP_SEMANTIC_INIT", raising=False) + monkeypatch.setenv("DATABASE_URL", "postgresql://ci:ci@127.0.0.1:5432/ci") + monkeypatch.setenv("OST_LINKER_REQUIRE_SERVICE_TOKEN", "false") + monkeypatch.delenv("OST_LINKER_SERVICE_TOKEN", raising=False) + + stub_semantic = MagicMock() + + with ( + patch("src.services.api.main.init_db"), + patch("src.services.api.main.init_semantic", stub_semantic), + patch("src.services.api.main.close_db"), + ): + from src.services.api.main import app + + with TestClient(app): + pass + + assert stub_semantic.call_count == 1 diff --git a/tests/unit/test_core_ml_embed_users.py b/tests/unit/test_core_ml_embed_users.py new file mode 100644 index 00000000..ce2c2cc7 --- /dev/null +++ b/tests/unit/test_core_ml_embed_users.py @@ -0,0 +1,69 @@ +"""Regression tests for user embedding Dagster asset (audit FINDING-005).""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from dagster import FilesystemIOManager, Output, build_asset_context + +from src.linker.assets.embedding.core_ml__embed_users import core_ml__embed_users + + +@pytest.fixture +def sample_user_df() -> pd.DataFrame: + return pd.DataFrame( + [{"user_id": "u1", "user_context": "Alice contributes to Rust parsers"}] + ) + + +class TestCoreMlEmbedUsers: + def test_empty_user_df_returns_zero_count(self, tmp_path) -> None: + model = MagicMock() + context = build_asset_context( + resources={ + "sentence_transformer": model, + "io_manager": FilesystemIOManager(base_dir=str(tmp_path)), + }, + ) + empty = pd.DataFrame(columns=["user_id", "user_context"]) + + output = core_ml__embed_users(context=context, user_df=empty) + + assert isinstance(output, Output) + assert output.metadata["count"].value == 0 + model.encode_batch.assert_not_called() + + @patch( + "src.linker.assets.embedding.core_ml__embed_users.get_db_cursor", + ) + def test_writes_one_row_via_cursor( + self, + mock_get_cursor: MagicMock, + sample_user_df: pd.DataFrame, + tmp_path, + ) -> None: + fake_cur = MagicMock() + cursor_cm = MagicMock() + cursor_cm.__enter__.return_value = fake_cur + cursor_cm.__exit__.return_value = False + mock_get_cursor.return_value = cursor_cm + + model = MagicMock() + model.encode_batch.return_value = [[[0.1, 0.2, 0.3]]] + + context = build_asset_context( + resources={ + "sentence_transformer": model, + "io_manager": FilesystemIOManager(base_dir=str(tmp_path)), + }, + ) + + output = core_ml__embed_users(context=context, user_df=sample_user_df) + + model.encode_batch.assert_called_once_with( + ["Alice contributes to Rust parsers"] + ) + fake_cur.execute.assert_called_once() + assert output.metadata["count"].value == 1 diff --git a/tests/unit/test_raw_github_extract_projects_guards.py b/tests/unit/test_raw_github_extract_projects_guards.py new file mode 100644 index 00000000..b125b1a7 --- /dev/null +++ b/tests/unit/test_raw_github_extract_projects_guards.py @@ -0,0 +1,31 @@ +"""Guard-rail smoke tests for raw GitHub project scraper asset (audit FINDING-002).""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest +from dagster import build_asset_context + +from src.linker.assets.scraper.raw_github__extract_projects import ( + raw_github__extract_projects, +) +from src.linker.resources.cfg_resource import PipelineConfig + + +class TestRawGithubExtractProjectsGuards: + def test_runtime_error_when_scraper_missing(self) -> None: + cfg = PipelineConfig( + db_url="postgresql://u:p@localhost:5432/db", + github_token="test-token", + github_scraping_query="", + go_scraper_path="/nonexistent/bin/ost-scraper", + go_fetcher_path="/nonexistent/bin/ost-fetcher", + go_trending_path="/nonexistent/bin/ost-trending", + ) + + context = build_asset_context(resources={"config": cfg}) + + with patch("os.path.exists", return_value=False): + with pytest.raises(RuntimeError, match="binary not found"): + raw_github__extract_projects(context=context) diff --git a/workspace.host.yaml b/workspace.host.yaml new file mode 100644 index 00000000..a6db4973 --- /dev/null +++ b/workspace.host.yaml @@ -0,0 +1,5 @@ +load_from: + - python_module: + module_name: src.linker.definitions + attribute: defs + working_directory: . From dfd8b9234e3bdb50096e46d5a66480caeed18d15 Mon Sep 17 00:00:00 2001 From: spideystreet Date: Sat, 2 May 2026 13:29:59 +0200 Subject: [PATCH 2/2] fix(ci): create match.project_classification_failure before drop-fk migration Empty databases never had DDL for ProjectClassificationFailure, so prisma migrate deploy failed at 20260421130000. Insert 20260421121500 between prompt-version and drop-fk so the table exists (no FK). --- .../migration.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 prisma/migrations/20260421121500_create_match_project_classification_failure/migration.sql diff --git a/prisma/migrations/20260421121500_create_match_project_classification_failure/migration.sql b/prisma/migrations/20260421121500_create_match_project_classification_failure/migration.sql new file mode 100644 index 00000000..bb74d99c --- /dev/null +++ b/prisma/migrations/20260421121500_create_match_project_classification_failure/migration.sql @@ -0,0 +1,17 @@ +-- ProjectClassificationFailure DLQ (schema prisma `ProjectClassificationFailure`). +-- Intentionally no FK to public.Project (classifier may enqueue before sync). +-- Subsequent migration drops the FK if an older DDL path created it anyway. +CREATE TABLE IF NOT EXISTS "match"."project_classification_failure" ( + "id" UUID NOT NULL DEFAULT uuid_generate_v4(), + "projectId" UUID NOT NULL, + "attempts" INTEGER NOT NULL DEFAULT 1, + "lastError" TEXT NOT NULL, + "lastAttemptAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "nextRetryAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "project_classification_failure_pkey" PRIMARY KEY ("id") +); + +CREATE UNIQUE INDEX IF NOT EXISTS "project_classification_failure_projectId_key" ON "match"."project_classification_failure"("projectId"); + +CREATE INDEX IF NOT EXISTS "project_classification_failure_nextRetryAt_idx" ON "match"."project_classification_failure"("nextRetryAt");