diff --git a/.env.example b/.env.example
index ce4d72c..842c80a 100644
--- a/.env.example
+++ b/.env.example
@@ -18,29 +18,24 @@ API_PORT=8000
 LOG_LEVEL=info
 APP_ENV=dev
 JWT_SECRET_KEY=change-me-generate-with-openssl-rand-hex-32
-AUTH_COOKIE_NAME=bracc_session
-AUTH_COOKIE_SECURE=false
-AUTH_COOKIE_SAMESITE=lax
-TRUST_PROXY_HEADERS=false
 INVITE_CODE=
 CORS_ORIGINS=http://localhost:3000
 PRODUCT_TIER=community
 PATTERNS_ENABLED=false
-PUBLIC_MODE=true
+PUBLIC_MODE=false
 PUBLIC_ALLOW_PERSON=false
 PUBLIC_ALLOW_ENTITY_LOOKUP=false
 PUBLIC_ALLOW_INVESTIGATIONS=false
 PATTERN_SPLIT_THRESHOLD_VALUE=80000
 PATTERN_SPLIT_MIN_COUNT=3
-PATTERN_SHARE_THRESHOLD=0.60
+PATTERN_SHARE_THRESHOLD=0.6
 PATTERN_SRP_MIN_ORGS=5
 PATTERN_INEXIG_MIN_RECURRENCE=3
 PATTERN_MAX_EVIDENCE_REFS=50
-SHARE_TOKEN_TTL_HOURS=168
 
 # Frontend (dev only — production uses Caddy reverse proxy with relative paths)
 VITE_API_URL=http://localhost:8000
-VITE_PUBLIC_MODE=true
+VITE_PUBLIC_MODE=false
 VITE_PATTERNS_ENABLED=false
 
 # Optional: Google Cloud (for Base dos Dados / TSE BigQuery)
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index d6257a9..b026312 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,5 @@
 blank_issues_enabled: false
 contact_links:
   - name: Security vulnerability report
-    url: https://github.com/World-Open-Graph/br-acc/security/advisories/new
+    url: https://github.com/brunoclz/world-transparency-graph/security/advisories/new
     about: Use GitHub Security Advisories for private vulnerability disclosure.
diff --git a/.github/claude-automerge-policy.json b/.github/claude-automerge-policy.json
index 94212a1..e917665 100644
--- a/.github/claude-automerge-policy.json
+++ b/.github/claude-automerge-policy.json
@@ -10,8 +10,8 @@
     "README.md",
     "CONTRIBUTING.md",
     "frontend/src/**",
-    "api/src/icarus/queries/**",
-    "api/src/icarus/models/**",
+    "api/src/bracc/queries/**",
+    "api/src/bracc/models/**",
     "api/tests/**",
     "etl/tests/**",
     "frontend/src/**/*.test.*"
diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 3cbab87..1ef4336 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -23,26 +23,6 @@ on:
         description: "Release title (EN)"
         required: true
         type: string
-      highlights_pt:
-        description: "PT highlights (separate bullets with |)"
-        required: true
-        type: string
-      highlights_en:
-        description: "EN highlights (separate bullets with |)"
-        required: true
-        type: string
-      patterns_included:
-        description: "Comma-separated pattern IDs included in this release (use 'none' if not applicable)"
-        required: true
-        type: string
-      technical_changes_pt:
-        description: "PT technical changes (separate bullets with |)"
-        required: true
-        type: string
-      technical_changes_en:
-        description: "EN technical changes (separate bullets with |)"
-        required: true
-        type: string
 
 permissions:
   contents: write
@@ -124,116 +104,63 @@ jobs:
           COMPARE_URL: ${{ steps.validate.outputs.compare_url }}
           TITLE_PT: ${{ inputs.title_pt }}
           TITLE_EN: ${{ inputs.title_en }}
-          HIGHLIGHTS_PT: ${{ inputs.highlights_pt }}
-          HIGHLIGHTS_EN: ${{ inputs.highlights_en }}
-          PATTERNS_INCLUDED: ${{ inputs.patterns_included }}
-          TECHNICAL_CHANGES_PT: ${{ inputs.technical_changes_pt }}
-          TECHNICAL_CHANGES_EN: ${{ inputs.technical_changes_en }}
         run: |
           set -euo pipefail
           DATE_UTC="$(date -u +"%Y-%m-%d")"
           export DATE_UTC
 
-          python - <<'PY'
-          import json
-          import os
-          from textwrap import dedent
-
-          def split_pipe(raw: str) -> list[str]:
-              normalized = raw.replace("\r\n", "\n").replace("\n", "|")
-              return [item.strip(" -\t") for item in normalized.split("|") if item.strip()]
+          cat > release_notes.md <<NOTES
+          ## PT-BR
 
-          def split_csv(raw: str) -> list[str]:
-              value = raw.strip()
-              if value.lower() in {"none", "n/a", "na", "-"}:
-                  return []
-              return [item.strip() for item in value.split(",") if item.strip()]
+          ${TITLE_PT}
 
-          def bullets(items: list[str], fallback: str) -> str:
-              if not items:
-                  return f"- {fallback}"
-              return "\n".join(f"- {item}" for item in items)
+          ### Escopo
+          - Release publicada por marco.
+          - Mudanças detalhadas por categorias no histórico desta versão.
 
-          highlights_pt = split_pipe(os.environ["HIGHLIGHTS_PT"])
-          highlights_en = split_pipe(os.environ["HIGHLIGHTS_EN"])
-          technical_changes_pt = split_pipe(os.environ["TECHNICAL_CHANGES_PT"])
-          technical_changes_en = split_pipe(os.environ["TECHNICAL_CHANGES_EN"])
-          patterns = split_csv(os.environ["PATTERNS_INCLUDED"])
+          ### Integridade pública
+          Os sinais e padrões refletem coocorrências em bases públicas e não constituem prova legal.
 
-          release_notes = dedent(
-              f"""
-              ## PT-BR
+          ## EN
 
-              {os.environ["TITLE_PT"]}
+          ${TITLE_EN}
 
-              ### Escopo
-              - Release publicada por marco.
-              - Mudanças listadas de forma específica para facilitar auditoria pública.
+          ### Scope
+          - Milestone-based release publication.
+          - Detailed changes grouped by category in this version history.
 
-              ### Destaques
-              {bullets(highlights_pt, "Sem destaques declarados.")}
+          ### Public integrity
+          Signals and patterns reflect co-occurrence in public records and are not legal proof.
 
-              ### Padrões incluídos
-              {bullets(patterns, "Sem novos padrões nesta release.")}
+          ## Compatibility
 
-              ### Mudanças técnicas
-              {bullets(technical_changes_pt, "Sem mudanças técnicas declaradas.")}
+          - Breaking changes: declare explicitly when applicable.
+          - Migration required: declare explicitly when applicable.
 
-              ### Integridade pública
-              Os sinais e padrões refletem coocorrências em bases públicas e não constituem prova legal.
+          ## Compare
 
-              ## EN
+          ${COMPARE_URL}
 
-              {os.environ["TITLE_EN"]}
+          ## Metadata
 
-              ### Scope
-              - Milestone-based release publication.
-              - Changes are listed explicitly for public traceability.
+          - Version: ${VERSION}
+          - Target SHA: ${TARGET_SHA}
+          - Previous tag: ${PREVIOUS_TAG}
+          - Date (UTC): ${DATE_UTC}
+          NOTES
 
-              ### Highlights
-              {bullets(highlights_en, "No highlights declared.")}
-
-              ### Included patterns
-              {bullets(patterns, "No new patterns in this release.")}
-
-              ### Technical changes
-              {bullets(technical_changes_en, "No technical changes declared.")}
-
-              ### Public integrity
-              Signals and patterns reflect co-occurrence in public records and are not legal proof.
-
-              ## Compatibility
-
-              - Breaking changes: none declared.
-              - Migration required: no.
-
-              ## Compare
-
-              {os.environ.get("COMPARE_URL", "")}
-
-              ## Metadata
-
-              - Version: {os.environ["VERSION"]}
-              - Target SHA: {os.environ["TARGET_SHA"]}
-              - Previous tag: {os.environ["PREVIOUS_TAG"]}
-              - Date (UTC): {os.environ.get("DATE_UTC", "")}
-              """
-          ).strip() + "\n"
-
-          with open("release_notes.md", "w", encoding="utf-8") as fh:
-              fh.write(release_notes)
+          python - <<'PY'
+          import json
+          import os
 
           payload = {
               "version": os.environ["VERSION"],
               "date": os.environ.get("DATE_UTC", ""),
-              "highlights_pt": highlights_pt,
-              "highlights_en": highlights_en,
+              "highlights_pt": [os.environ["TITLE_PT"]],
+              "highlights_en": [os.environ["TITLE_EN"]],
               "api_changes": [],
               "data_changes": [],
               "privacy_compliance_changes": [],
-              "patterns_included": patterns,
-              "technical_changes_pt": technical_changes_pt,
-              "technical_changes_en": technical_changes_en,
               "breaking_changes": False,
               "migration_required": False,
               "compare_url": os.environ.get("COMPARE_URL", ""),
diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index 3e02a4c..af879d5 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -6,18 +6,10 @@ on:
   pull_request:
     branches: [main]
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
 jobs:
   gitleaks:
     name: Gitleaks
     runs-on: ubuntu-latest
-    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v4
         with:
@@ -37,7 +29,6 @@ jobs:
   bandit:
     name: Bandit (Python)
     runs-on: ubuntu-latest
-    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v4
 
@@ -57,7 +48,6 @@ jobs:
   pip-audit:
     name: Pip Audit (Python deps)
     runs-on: ubuntu-latest
-    timeout-minutes: 20
     steps:
       - uses: actions/checkout@v4
 
@@ -69,14 +59,6 @@ jobs:
         with:
           python-version: "3.12"
 
-      - name: Cache uv
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/uv
-          key: ${{ runner.os }}-uv-security-${{ hashFiles('api/uv.lock', 'etl/uv.lock') }}
-          restore-keys: |
-            ${{ runner.os }}-uv-security-
-
       - name: Export lock-compatible requirement sets
         run: |
           cd api
@@ -93,7 +75,6 @@ jobs:
   public-privacy-gate:
     name: Public Privacy Gate
     runs-on: ubuntu-latest
-    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v4
 
@@ -107,7 +88,6 @@ jobs:
   compliance-pack-gate:
     name: Compliance Pack Gate
     runs-on: ubuntu-latest
-    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v4
 
@@ -120,9 +100,8 @@ jobs:
 
   public-boundary-gate:
     name: Public Boundary Gate
-    if: vars.PUBLIC_BOUNDARY_GATE_ENABLED == 'true'
+    if: github.repository == 'brunoclz/world-transparency-graph'
     runs-on: ubuntu-latest
-    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v4
 
@@ -136,7 +115,6 @@ jobs:
   internal-instruction-boundary:
     name: Internal Instruction Boundary
     runs-on: ubuntu-latest
-    timeout-minutes: 15
     steps:
       - uses: actions/checkout@v4
 
diff --git a/.gitignore b/.gitignore
index d1565ed..03039bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -75,7 +75,6 @@ scripts/audit-prompts/
 # Local report artifacts in repository root
 /*.pdf
 /*.html
-gitleaks-report*.json
 
 # Playwright MCP cache
 .playwright-mcp/
@@ -91,7 +90,7 @@ data/tse/
 # Local MCP runtime config (keep example only)
 .mcp.json
 
-# Internal assistant instructions (must never be published)
+# Internal assistant instruction files (must never be published)
 CLAUDE.md
 AGENTS.md
 AGENTS*.md
diff --git a/Makefile b/Makefile
index 994a40d..23ff510 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,125 @@
-.PHONY: dev stop seed bootstrap-demo bootstrap-full bootstrap-all bootstrap-all-noninteractive bootstrap-all-report check-public-claims check-source-urls check-pipeline-contracts check-pipeline-inputs generate-pipeline-status generate-source-summary generate-reference-metrics check neutrality
+.PHONY: dev stop api etl frontend lint type-check test test-api test-etl test-frontend test-integration-api test-integration-etl test-integration check seed clean download-cnpj download-tse download-transparencia download-sanctions download-all etl-cnpj etl-cnpj-stream etl-tse etl-transparencia etl-sanctions etl-all link-persons bootstrap-demo bootstrap-full bootstrap-all bootstrap-all-noninteractive bootstrap-all-report check-public-claims check-source-urls check-pipeline-contracts check-pipeline-inputs generate-pipeline-status generate-source-summary generate-reference-metrics
 
+# ── Development ─────────────────────────────────────────
 dev:
-	docker compose -f infra/docker-compose.yml up -d
+	docker compose up -d
 
 stop:
-	docker compose -f infra/docker-compose.yml down
+	docker compose down
+
+# ── API ─────────────────────────────────────────────────
+api:
+	cd api && uv run uvicorn bracc.main:app --reload --host 0.0.0.0 --port 8000
+
+# ── ETL ─────────────────────────────────────────────────
+etl:
+	cd etl && uv run bracc-etl --help
 
 seed:
 	bash infra/scripts/seed-dev.sh
 
+# ── CNPJ Data ──────────────────────────────────────────
+download-cnpj:
+	cd etl && uv run python scripts/download_cnpj.py --reference-only
+	cd etl && uv run python scripts/download_cnpj.py --files 1
+
+download-cnpj-all:
+	cd etl && uv run python scripts/download_cnpj.py --files 10
+
+etl-cnpj:
+	cd etl && uv run bracc-etl run --source cnpj --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+etl-cnpj-dev:
+	cd etl && uv run bracc-etl run --source cnpj --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --limit 10000
+
+etl-cnpj-stream:
+	cd etl && uv run bracc-etl run --source cnpj --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --streaming
+
+# ── TSE Data ──────────────────────────────────────────
+download-tse:
+	cd etl && uv run python scripts/download_tse.py --years 2024
+
+etl-tse:
+	cd etl && uv run bracc-etl run --source tse --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+etl-tse-dev:
+	cd etl && uv run bracc-etl run --source tse --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --limit 10000
+
+# ── Transparencia Data ────────────────────────────────
+download-transparencia:
+	cd etl && uv run python scripts/download_transparencia.py --year 2025
+
+etl-transparencia:
+	cd etl && uv run bracc-etl run --source transparencia --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+etl-transparencia-dev:
+	cd etl && uv run bracc-etl run --source transparencia --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --limit 10000
+
+# ── Sanctions Data ────────────────────────────────────
+download-sanctions:
+	cd etl && uv run python scripts/download_sanctions.py
+
+etl-sanctions:
+	cd etl && uv run bracc-etl run --source sanctions --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+# ── All Data ──────────────────────────────────────────
+download-all: download-cnpj download-tse download-transparencia download-sanctions
+
+etl-all: etl-cnpj etl-tse etl-transparencia etl-sanctions
+
+# ── Entity Resolution ────────────────────────────────────
+link-persons:
+	docker compose exec neo4j cypher-shell -u neo4j -p "$${NEO4J_PASSWORD}" -f /scripts/link_persons.cypher
+
+# ── Frontend ────────────────────────────────────────────
+frontend:
+	cd frontend && npm run dev
+
+# ── Quality ─────────────────────────────────────────────
+lint:
+	cd api && uv run ruff check src/ tests/
+	cd etl && uv run ruff check src/ tests/
+	cd frontend && npm run lint
+
+type-check:
+	cd api && uv run mypy src/
+	cd etl && uv run mypy src/
+	cd frontend && npm run type-check
+
+test-api:
+	cd api && uv run pytest
+
+test-etl:
+	cd etl && uv run pytest
+
+test-frontend:
+	cd frontend && npm test
+
+test: test-api test-etl test-frontend
+
+# ── Integration tests ─────────────────────────────────
+test-integration-api:
+	cd api && uv run pytest -m integration
+
+test-integration-etl:
+	cd etl && uv run pytest -m integration
+
+test-integration: test-integration-api test-integration-etl
+
+# ── Full check (run before commit) ─────────────────────
+check: lint type-check test
+	@echo "All checks passed."
+
+# ── Neutrality audit ───────────────────────────────────
+neutrality:
+	@! grep -rn \
+		"suspicious\|corrupt\|criminal\|fraudulent\|illegal\|guilty\|CRITICAL\|HIGH.*severity\|MEDIUM.*severity\|LOW.*severity" \
+		api/src/ etl/src/ frontend/src/ \
+		--include="*.py" --include="*.ts" --include="*.tsx" --include="*.json" \
+		|| (echo "NEUTRALITY VIOLATION FOUND" && exit 1)
+	@echo "Neutrality check passed."
+
+# ── Bootstrap ─────────────────────────────────────────────
 bootstrap-demo:
 	bash scripts/bootstrap_public_demo.sh --profile demo
 
@@ -24,6 +135,7 @@ bootstrap-all-noninteractive:
 bootstrap-all-report:
 	python3 scripts/run_bootstrap_all.py --repo-root . --report-latest
 
+# ── Quality checks ────────────────────────────────────────
 check-public-claims:
 	python3 scripts/check_public_claims.py --repo-root .
 
@@ -36,22 +148,20 @@ check-pipeline-contracts:
 check-pipeline-inputs:
 	python3 scripts/check_pipeline_inputs.py
 
+# ── Generators ────────────────────────────────────────────
 generate-pipeline-status:
-	python3 scripts/generate_pipeline_status.py --registry-path docs/source_registry_br_v1.csv --output docs/pipeline_status.md
+	python3 scripts/generate_pipeline_status.py
 
 generate-source-summary:
-	python3 scripts/generate_data_sources_summary.py --registry-path docs/source_registry_br_v1.csv --docs-path docs/data-sources.md
+	python3 scripts/generate_data_sources_summary.py
 
 generate-reference-metrics:
-	python3 scripts/generate_reference_metrics.py --json-output audit-results/public-trust/latest/neo4j-reference-metrics.json --doc-output docs/reference_metrics.md
-
-check:
-	cd api && bash ../scripts/ci/python_quality.sh
-	cd etl && bash ../scripts/ci/python_quality.sh
-	cd frontend && bash ../scripts/ci/frontend_quality.sh
-
-neutrality:
-	@! grep -rn "suspicious\|corrupt\|criminal\|fraudulent\|illegal\|guilty" \
-		api/src/ etl/src/ frontend/src/ \
-		--include="*.py" --include="*.ts" --include="*.tsx" --include="*.json" \
-		|| (echo "NEUTRALITY VIOLATION: banned words found in source" && exit 1)
+	python3 scripts/generate_reference_metrics.py
+
+# ── Cleanup ─────────────────────────────────────────────
+clean:
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .pytest_cache -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .mypy_cache -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .ruff_cache -exec rm -rf {} + 2>/dev/null || true
+	rm -rf frontend/dist
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 9933c58..a606287 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "bracc-api"
 version = "0.1.0"
-description = "BRACC API — Brazilian public data anti-corruption graph tool"
+description = "BR-ACC API — Brazilian public data anti-corruption graph tool"
 requires-python = ">=3.12"
 license = "AGPL-3.0-or-later"
 dependencies = [
diff --git a/api/src/bracc/config.py b/api/src/bracc/config.py
index 02a7e80..76bc98e 100644
--- a/api/src/bracc/config.py
+++ b/api/src/bracc/config.py
@@ -1,5 +1,6 @@
 from typing import Literal
 
+from pydantic import Field
 from pydantic_settings import BaseSettings
 
 
@@ -17,14 +18,15 @@ class Settings(BaseSettings):
     jwt_secret_key: str = "change-me-in-production"
     jwt_algorithm: str = "HS256"
     jwt_expire_minutes: int = 1440
-    auth_cookie_name: str = "bracc_session"
-    auth_cookie_secure: bool = False
-    auth_cookie_samesite: Literal["lax", "strict", "none"] = "lax"
-    trust_proxy_headers: bool = False
     rate_limit_anon: str = "60/minute"
     rate_limit_auth: str = "300/minute"
     invite_code: str = ""
     cors_origins: str = "http://localhost:3000"
+    auth_cookie_name: str = "bracc_session"
+    auth_cookie_secure: bool = False
+    auth_cookie_samesite: Literal["lax", "strict", "none"] = "lax"
+    trust_proxy_headers: bool = False
+    share_token_ttl_hours: int = 168  # 7 days
     product_tier: str = "community"
     patterns_enabled: bool = False
     public_mode: bool = False
@@ -37,7 +39,16 @@ class Settings(BaseSettings):
     pattern_srp_min_orgs: int = 5
     pattern_inexig_min_recurrence: int = 3
     pattern_max_evidence_refs: int = 50
-    share_token_ttl_hours: int = 168
+
+    # Pattern hardening defaults (decision-complete contract)
+    pattern_temporal_window_years: int = Field(default=4, ge=1, le=20)
+    pattern_min_contract_value: float = Field(default=100000.0, ge=0)
+    pattern_min_contract_count: int = Field(default=2, ge=1)
+    pattern_min_debt_value: float = Field(default=50000.0, ge=0)
+    pattern_same_as_min_confidence: float = Field(default=0.85, ge=0, le=1)
+    pattern_pep_min_confidence: float = Field(default=0.85, ge=0, le=1)
+    pattern_min_recurrence: int = Field(default=2, ge=1)
+    pattern_min_discrepancy_ratio: float = Field(default=0.30, ge=0, le=1)
 
     model_config = {"env_prefix": "", "env_file": ".env"}
 
diff --git a/api/src/bracc/dependencies.py b/api/src/bracc/dependencies.py
index 9f4b1a1..6f6db6e 100644
--- a/api/src/bracc/dependencies.py
+++ b/api/src/bracc/dependencies.py
@@ -35,7 +35,12 @@ async def close_driver() -> None:
 
 
 async def get_driver(request: Request) -> AsyncDriver:
-    driver: AsyncDriver = request.app.state.neo4j_driver
+    driver: AsyncDriver | None = getattr(request.app.state, "neo4j_driver", None)
+    if driver is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Database connection not available",
+        )
     return driver
 
 
diff --git a/api/src/bracc/main.py b/api/src/bracc/main.py
index b875f24..d9db376 100644
--- a/api/src/bracc/main.py
+++ b/api/src/bracc/main.py
@@ -2,7 +2,7 @@
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 
-from fastapi import FastAPI, Request
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from slowapi import _rate_limit_exceeded_handler
 from slowapi.errors import RateLimitExceeded
@@ -51,7 +51,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
 
 
 app = FastAPI(
-    title="BRACC API",
+    title="BR-ACC API",
     description="Brazilian public data graph analysis tool",
     version="0.1.0",
     lifespan=lifespan,
@@ -85,5 +85,5 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
 
 
 @app.get("/health")
-async def health(request: Request) -> dict[str, str]:
-    return {"status": "ok", "version": request.app.version}
+async def health() -> dict[str, str]:
+    return {"status": "ok"}
diff --git a/api/src/bracc/middleware/cpf_masking.py b/api/src/bracc/middleware/cpf_masking.py
index 8bb4c1d..ec4c79b 100644
--- a/api/src/bracc/middleware/cpf_masking.py
+++ b/api/src/bracc/middleware/cpf_masking.py
@@ -53,7 +53,7 @@ def _is_pep_record(record: dict[str, Any]) -> bool:
 
     for field in ("role", "cargo"):
         value = record.get(field)
-        if isinstance(value, str) and value.strip().lower() in PEP_ROLES:
+        if isinstance(value, str) and any(kw in value.strip().lower() for kw in PEP_ROLES):
             return True
 
     return False
diff --git a/api/src/bracc/middleware/rate_limit.py b/api/src/bracc/middleware/rate_limit.py
index b8caa8e..087b322 100644
--- a/api/src/bracc/middleware/rate_limit.py
+++ b/api/src/bracc/middleware/rate_limit.py
@@ -6,37 +6,20 @@
 from bracc.services.auth_service import decode_access_token
 
 
-def _extract_token(request: Request) -> str | None:
+def _get_rate_limit_key(request: Request) -> str:
+    """Extract user_id from JWT (Bearer or cookie) for rate limiting, fallback to IP."""
     auth = request.headers.get("authorization", "")
     if auth.startswith("Bearer "):
-        return auth[7:].strip()
+        token = auth[7:]
+        user_id = decode_access_token(token)
+        if user_id:
+            return f"user:{user_id}"
     cookie_token = request.cookies.get(settings.auth_cookie_name)
     if isinstance(cookie_token, str) and cookie_token.strip():
-        return cookie_token.strip()
-    return None
-
-
-def _resolve_client_ip(request: Request) -> str:
-    if settings.trust_proxy_headers:
-        forwarded = request.headers.get("x-forwarded-for", "")
-        if forwarded:
-            first_hop = forwarded.split(",", 1)[0].strip()
-            if first_hop:
-                return first_hop
-        real_ip = request.headers.get("x-real-ip", "").strip()
-        if real_ip:
-            return real_ip
-    return get_remote_address(request)
-
-
-def _get_rate_limit_key(request: Request) -> str:
-    """Extract user_id from JWT for rate limiting, fallback to IP."""
-    token = _extract_token(request)
-    if token:
-        user_id = decode_access_token(token)
+        user_id = decode_access_token(cookie_token.strip())
         if user_id:
             return f"user:{user_id}"
-    return _resolve_client_ip(request)
+    return get_remote_address(request)
 
 
 limiter = Limiter(
diff --git a/api/src/bracc/queries/entity_connections.cypher b/api/src/bracc/queries/entity_connections.cypher
index 772e651..15f4093 100644
--- a/api/src/bracc/queries/entity_connections.cypher
+++ b/api/src/bracc/queries/entity_connections.cypher
@@ -1,27 +1,15 @@
-MATCH (center) WHERE elementId(center) = $entity_id
+MATCH (center)
+WHERE elementId(center) = $entity_id
   AND (center:Person OR center:Partner OR center:Company OR center:Contract OR center:Sanction OR center:Election
        OR center:Amendment OR center:Finance OR center:Embargo OR center:Health OR center:Education
        OR center:Convenio OR center:LaborStats OR center:PublicOffice)
-WITH center,
-     CASE
-       WHEN coalesce($include_probable, false) THEN
-         "SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLE_SAME_AS"
-       ELSE
-         "SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS"
-     END AS relationship_filter
-CALL apoc.path.subgraphAll(center, {
-  relationshipFilter: relationship_filter,
-  labelFilter: "-User|-Investigation|-Annotation|-Tag",
-  maxLevel: $depth,
-  limit: 200
-})
-YIELD nodes, relationships
-WITH center, nodes, relationships
-UNWIND relationships AS r
-WITH center,
-     startNode(r) AS src,
-     endNode(r) AS tgt,
-     r
+OPTIONAL MATCH p=(center)-[:SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLE_SAME_AS*1..4]-(connected)
+WHERE length(p) <= $depth
+  AND all(x IN nodes(p) WHERE NOT (x:User OR x:Investigation OR x:Annotation OR x:Tag))
+WITH center, p
+UNWIND CASE WHEN p IS NULL THEN [] ELSE relationships(p) END AS r
+WITH DISTINCT center, r, startNode(r) AS src, endNode(r) AS tgt
+WHERE coalesce($include_probable, false) OR type(r) <> "POSSIBLE_SAME_AS"
 RETURN center AS e,
        r,
        CASE WHEN elementId(src) = elementId(center) THEN tgt ELSE src END AS connected,
diff --git a/api/src/bracc/queries/graph_expand.cypher b/api/src/bracc/queries/graph_expand.cypher
index 733a293..b807d2a 100644
--- a/api/src/bracc/queries/graph_expand.cypher
+++ b/api/src/bracc/queries/graph_expand.cypher
@@ -1,14 +1,21 @@
-MATCH (center) WHERE elementId(center) = $entity_id
+MATCH (center)
+WHERE elementId(center) = $entity_id
   AND (center:Person OR center:Company OR center:Contract OR center:Sanction OR center:Election
        OR center:Amendment OR center:Finance OR center:Embargo OR center:Health OR center:Education
        OR center:Convenio OR center:LaborStats OR center:PublicOffice
        OR center:OffshoreEntity OR center:OffshoreOfficer OR center:GlobalPEP
        OR center:CVMProceeding OR center:Expense)
-CALL apoc.path.subgraphAll(center, {
-  relationshipFilter: "SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLY_SAME_AS|OFFICER_OF|INTERMEDIARY_OF|GLOBAL_PEP_MATCH|CVM_SANCIONADA|GASTOU|FORNECEU",
-  labelFilter: $label_filter,
-  maxLevel: $depth,
-  limit: 200
-})
-YIELD nodes, relationships
-RETURN nodes, relationships, elementId(center) AS center_id
\ No newline at end of file
+OPTIONAL MATCH p=(center)-[:SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLY_SAME_AS|OFFICER_OF|INTERMEDIARY_OF|GLOBAL_PEP_MATCH|CVM_SANCIONADA|GASTOU|FORNECEU*1..4]-(n)
+WHERE length(p) <= $depth
+  AND all(x IN nodes(p) WHERE NOT (x:User OR x:Investigation OR x:Annotation OR x:Tag))
+WITH center, collect(p) AS paths
+WITH center,
+     reduce(ns = [center], p IN paths | ns + CASE WHEN p IS NULL THEN [] ELSE nodes(p) END) AS raw_nodes,
+     reduce(rs = [], p IN paths | rs + CASE WHEN p IS NULL THEN [] ELSE relationships(p) END) AS raw_rels
+UNWIND raw_nodes AS n
+WITH center, collect(DISTINCT n) AS nodes, raw_rels
+UNWIND CASE WHEN size(raw_rels) = 0 THEN [NULL] ELSE raw_rels END AS r
+WITH center, nodes, collect(DISTINCT r) AS rels
+RETURN nodes,
+       [x IN rels WHERE x IS NOT NULL] AS relationships,
+       elementId(center) AS center_id
diff --git a/api/src/bracc/queries/investigation_by_token.cypher b/api/src/bracc/queries/investigation_by_token.cypher
index ba9ff3d..6065b57 100644
--- a/api/src/bracc/queries/investigation_by_token.cypher
+++ b/api/src/bracc/queries/investigation_by_token.cypher
@@ -1,6 +1,4 @@
-MATCH (i:Investigation)
-WHERE i.share_token = $token
-  AND (i.share_expires_at IS NULL OR i.share_expires_at > datetime())
+MATCH (i:Investigation {share_token: $token})
 OPTIONAL MATCH (i)-[:INCLUDES]->(e)
 WITH i, collect(coalesce(e.cpf, e.cnpj, e.contract_id, e.sanction_id, e.amendment_id, e.cnes_code, e.finance_id, e.embargo_id, e.school_id, e.convenio_id, e.stats_id, elementId(e))) AS eids
 RETURN i.id AS id,
@@ -9,5 +7,4 @@ RETURN i.id AS id,
        i.created_at AS created_at,
        i.updated_at AS updated_at,
        i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
        [x IN eids WHERE x IS NOT NULL] AS entity_ids
diff --git a/api/src/bracc/queries/investigation_create.cypher b/api/src/bracc/queries/investigation_create.cypher
index b9bf3f3..a583340 100644
--- a/api/src/bracc/queries/investigation_create.cypher
+++ b/api/src/bracc/queries/investigation_create.cypher
@@ -4,8 +4,7 @@ CREATE (i:Investigation {
   description: $description,
   created_at: datetime(),
   updated_at: datetime(),
-  share_token: null,
-  share_expires_at: null
+  share_token: null
 })
 WITH i
 MATCH (u:User {id: $user_id})
@@ -16,5 +15,4 @@ RETURN i.id AS id,
        i.created_at AS created_at,
        i.updated_at AS updated_at,
        i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
        [] AS entity_ids
diff --git a/api/src/bracc/queries/investigation_get.cypher b/api/src/bracc/queries/investigation_get.cypher
index 25a54bd..1599b20 100644
--- a/api/src/bracc/queries/investigation_get.cypher
+++ b/api/src/bracc/queries/investigation_get.cypher
@@ -7,5 +7,4 @@ RETURN i.id AS id,
        i.created_at AS created_at,
        i.updated_at AS updated_at,
        i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
        [x IN eids WHERE x IS NOT NULL] AS entity_ids
diff --git a/api/src/bracc/queries/investigation_list.cypher b/api/src/bracc/queries/investigation_list.cypher
index 0fe6e48..9310ad6 100644
--- a/api/src/bracc/queries/investigation_list.cypher
+++ b/api/src/bracc/queries/investigation_list.cypher
@@ -13,5 +13,4 @@ RETURN total,
        i.created_at AS created_at,
        i.updated_at AS updated_at,
        i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
        [x IN eids WHERE x IS NOT NULL] AS entity_ids
diff --git a/api/src/bracc/queries/investigation_share.cypher b/api/src/bracc/queries/investigation_share.cypher
index 1115e83..ffb0594 100644
--- a/api/src/bracc/queries/investigation_share.cypher
+++ b/api/src/bracc/queries/investigation_share.cypher
@@ -1,7 +1,5 @@
 MATCH (u:User {id: $user_id})-[:OWNS]->(i:Investigation {id: $id})
 SET i.share_token = $share_token,
-    i.share_expires_at = $share_expires_at,
     i.updated_at = datetime()
 RETURN i.id AS id,
-       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at
+       i.share_token AS share_token
diff --git a/api/src/bracc/queries/investigation_update.cypher b/api/src/bracc/queries/investigation_update.cypher
index c6cfa05..540fc17 100644
--- a/api/src/bracc/queries/investigation_update.cypher
+++ b/api/src/bracc/queries/investigation_update.cypher
@@ -11,5 +11,4 @@ RETURN i.id AS id,
        i.created_at AS created_at,
        i.updated_at AS updated_at,
        i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
        [x IN eids WHERE x IS NOT NULL] AS entity_ids
diff --git a/api/src/bracc/queries/node_degree.cypher b/api/src/bracc/queries/node_degree.cypher
index c751492..f16f291 100644
--- a/api/src/bracc/queries/node_degree.cypher
+++ b/api/src/bracc/queries/node_degree.cypher
@@ -1,5 +1,6 @@
-MATCH (n) WHERE elementId(n) = $entity_id
+MATCH (n)
+WHERE elementId(n) = $entity_id
   AND (n:Person OR n:Company OR n:Contract OR n:Sanction OR n:Election
        OR n:Amendment OR n:Finance OR n:Embargo OR n:Health OR n:Education
        OR n:Convenio OR n:LaborStats OR n:PublicOffice)
-RETURN apoc.node.degree(n) AS degree
\ No newline at end of file
+RETURN COUNT { (n)--() } AS degree
diff --git a/api/src/bracc/queries/public_graph_company.cypher b/api/src/bracc/queries/public_graph_company.cypher
index 7ae6e16..3358f65 100644
--- a/api/src/bracc/queries/public_graph_company.cypher
+++ b/api/src/bracc/queries/public_graph_company.cypher
@@ -2,11 +2,31 @@ MATCH (center:Company)
 WHERE elementId(center) = $company_id
    OR center.cnpj = $company_identifier
    OR center.cnpj = $company_identifier_formatted
-CALL apoc.path.subgraphAll(center, {
-  relationshipFilter: "SOCIO_DE|VENCEU|SANCIONADA|DEVE|RECEBEU_EMPRESTIMO|BENEFICIOU|GEROU_CONVENIO|MUNICIPAL_VENCEU|MUNICIPAL_LICITOU",
-  labelFilter: "+Company|+Contract|+Sanction|+Finance|+Amendment|+Convenio|+Bid|+MunicipalContract|+MunicipalBid|-Person|-Partner|-User|-Investigation|-Annotation|-Tag",
-  maxLevel: $depth,
-  limit: 200
-})
-YIELD nodes, relationships
-RETURN nodes, relationships, elementId(center) AS center_id
+OPTIONAL MATCH p=(center)-[:SOCIO_DE|VENCEU|SANCIONADA|DEVE|RECEBEU_EMPRESTIMO|BENEFICIOU|GEROU_CONVENIO|MUNICIPAL_VENCEU|MUNICIPAL_LICITOU*1..4]-(n)
+WHERE length(p) <= $depth
+  AND all(
+    x IN nodes(p)
+    WHERE NOT (
+      "Person" IN labels(x)
+      OR "Partner" IN labels(x)
+      OR "User" IN labels(x)
+      OR "Investigation" IN labels(x)
+      OR "Annotation" IN labels(x)
+      OR "Tag" IN labels(x)
+    )
+  )
+  AND (
+    n:Company OR n:Contract OR n:Sanction OR n:Finance OR n:Amendment OR n:Convenio
+    OR n:Bid OR n:MunicipalContract OR n:MunicipalBid OR n IS NULL
+  )
+WITH center, collect(p) AS paths
+WITH center,
+     reduce(ns = [center], p IN paths | ns + CASE WHEN p IS NULL THEN [] ELSE nodes(p) END) AS raw_nodes,
+     reduce(rs = [], p IN paths | rs + CASE WHEN p IS NULL THEN [] ELSE relationships(p) END) AS raw_rels
+UNWIND raw_nodes AS n
+WITH center, collect(DISTINCT n) AS nodes, raw_rels
+UNWIND CASE WHEN size(raw_rels) = 0 THEN [NULL] ELSE raw_rels END AS r
+WITH center, nodes, collect(DISTINCT r) AS rels
+RETURN nodes,
+       [x IN rels WHERE x IS NOT NULL] AS relationships,
+       elementId(center) AS center_id
diff --git a/api/src/bracc/queries/schema_init.cypher b/api/src/bracc/queries/schema_init.cypher
index f19611b..8ea17e9 100644
--- a/api/src/bracc/queries/schema_init.cypher
+++ b/api/src/bracc/queries/schema_init.cypher
@@ -1,4 +1,4 @@
-// BRACC Neo4j Schema — Constraints and Indexes
+// BR-ACC Neo4j Schema — Constraints and Indexes
 // Applied on database initialization
 
 // ── Uniqueness Constraints ──────────────────────────────
diff --git a/api/src/bracc/routers/baseline.py b/api/src/bracc/routers/baseline.py
index 2951ca1..8928ba1 100644
--- a/api/src/bracc/routers/baseline.py
+++ b/api/src/bracc/routers/baseline.py
@@ -6,6 +6,7 @@
 from bracc.dependencies import get_session
 from bracc.models.baseline import BaselineResponse
 from bracc.services.baseline_service import BASELINE_QUERIES, run_all_baselines, run_baseline
+from bracc.services.public_guard import enforce_entity_lookup_enabled
 
 router = APIRouter(prefix="/api/v1/baseline", tags=["baseline"])
 
@@ -16,6 +17,7 @@ async def get_baseline_for_entity(
     session: Annotated[AsyncSession, Depends(get_session)],
     dimension: Annotated[str | None, Query()] = None,
 ) -> BaselineResponse:
+    enforce_entity_lookup_enabled()
     if dimension:
         if dimension not in BASELINE_QUERIES:
             available = list(BASELINE_QUERIES.keys())
diff --git a/api/src/bracc/routers/entity.py b/api/src/bracc/routers/entity.py
index e425086..d13e6e8 100644
--- a/api/src/bracc/routers/entity.py
+++ b/api/src/bracc/routers/entity.py
@@ -182,7 +182,7 @@ async def get_entity_timeline(
             date=event_date,
             label=str(label),
             entity_type=entity_type,
-            properties=sanitize_props(props),
+            properties=sanitize_public_properties(sanitize_props(props)),
             sources=[SourceAttribution(database="neo4j_graph")],
         ))
 
diff --git a/api/src/bracc/routers/investigation.py b/api/src/bracc/routers/investigation.py
index d7a0589..ea07581 100644
--- a/api/src/bracc/routers/investigation.py
+++ b/api/src/bracc/routers/investigation.py
@@ -311,7 +311,7 @@ async def export_investigation_pdf(
             cpf_val = node.get("cpf")
             if cpf_val and isinstance(cpf_val, str):
                 role = str(node.get("role", node.get("cargo", ""))).lower()
-                is_pep = role in PEP_ROLES
+                is_pep = any(kw in role for kw in PEP_ROLES)
                 if not is_pep:
                     if "." in document and "-" in document:
                         document = mask_formatted_cpf(document)
diff --git a/api/src/bracc/routers/meta.py b/api/src/bracc/routers/meta.py
index a3e9422..24155eb 100644
--- a/api/src/bracc/routers/meta.py
+++ b/api/src/bracc/routers/meta.py
@@ -6,6 +6,7 @@
 
 from bracc.dependencies import get_session
 from bracc.services.neo4j_service import execute_query_single
+from bracc.services.public_guard import should_hide_person_entities
 from bracc.services.source_registry import load_source_registry, source_registry_summary
 
 router = APIRouter(prefix="/api/v1/meta", tags=["meta"])
@@ -40,7 +41,9 @@ async def database_stats(
     result = {
         "total_nodes": record["total_nodes"] if record else 0,
         "total_relationships": record["total_relationships"] if record else 0,
-        "person_count": record["person_count"] if record else 0,
+        "person_count": (
+            0 if should_hide_person_entities() else (record["person_count"] if record else 0)
+        ),
         "company_count": record["company_count"] if record else 0,
         "health_count": record["health_count"] if record else 0,
         "finance_count": record["finance_count"] if record else 0,
diff --git a/api/src/bracc/routers/public.py b/api/src/bracc/routers/public.py
index 81b8695..716bddf 100644
--- a/api/src/bracc/routers/public.py
+++ b/api/src/bracc/routers/public.py
@@ -57,12 +57,6 @@ async def public_meta(
     return {
         "product": "World Transparency Graph",
         "mode": "public_safe",
-        "dataset_scope": {
-            "local_default": "demo_local",
-            "ingestion_mode": "byo_ingestion",
-            "reference_metrics": "reference_production_snapshot",
-        },
-        "metrics_as_of_utc": "2026-03-01T23:05:00Z",
         "total_nodes": record["total_nodes"] if record else 0,
         "total_relationships": record["total_relationships"] if record else 0,
         "company_count": record["company_count"] if record else 0,
diff --git a/api/src/bracc/routers/search.py b/api/src/bracc/routers/search.py
index 953f436..78a4771 100644
--- a/api/src/bracc/routers/search.py
+++ b/api/src/bracc/routers/search.py
@@ -61,9 +61,9 @@ async def search_entities(
         {
             "query": _escape_lucene(q),
             "entity_type": type_filter,
-            "hide_person_entities": hide_person_entities,
             "skip": skip,
             "limit": size,
+            "hide_person_entities": hide_person_entities,
         },
     )
     total_record = await execute_query_single(
diff --git a/api/tests/integration/conftest.py b/api/tests/integration/conftest.py
index 177c091..e221791 100644
--- a/api/tests/integration/conftest.py
+++ b/api/tests/integration/conftest.py
@@ -9,6 +9,17 @@
 from bracc.main import app
 
 
+def _iter_cypher_statements(path: Path) -> list[str]:
+    # Strip comment-only lines before splitting to avoid dropping statements
+    # that are preceded by section headers.
+    filtered_lines = [
+        line for line in path.read_text().splitlines()
+        if line.strip() and not line.strip().startswith("//")
+    ]
+    text = "\n".join(filtered_lines)
+    return [stmt.strip() for stmt in text.split(";") if stmt.strip()]
+
+
 @pytest.fixture(scope="session")
 def neo4j_container() -> Neo4jContainer:  # type: ignore[misc]
     """Start a Neo4j container for integration tests."""
@@ -25,32 +36,43 @@ def neo4j_uri(neo4j_container: Neo4jContainer) -> str:
 
 @pytest.fixture(scope="session")
 def neo4j_auth(neo4j_container: Neo4jContainer) -> tuple[str, str]:
-    return ("neo4j", neo4j_container.NEO4J_ADMIN_PASSWORD)
+    # testcontainers.neo4j API changed: older versions exposed NEO4J_ADMIN_PASSWORD,
+    # newer versions expose username/password attributes.
+    username = getattr(neo4j_container, "username", "neo4j")
+    password = getattr(
+        neo4j_container,
+        "password",
+        getattr(neo4j_container, "NEO4J_ADMIN_PASSWORD", None),
+    )
+    if password is None:
+        msg = "Could not resolve Neo4j testcontainer password"
+        raise RuntimeError(msg)
+    return (username, password)
 
 
-@pytest.fixture(scope="session")
+@pytest.fixture
 async def neo4j_driver(
     neo4j_uri: str, neo4j_auth: tuple[str, str]
 ) -> AsyncIterator[AsyncDriver]:
+    # Function-scoped driver avoids loop affinity issues between async tests.
     driver = AsyncGraphDatabase.driver(neo4j_uri, auth=neo4j_auth)
+    async with driver.session() as session:
+        # Keep tests deterministic across function scope by resetting test data.
+        await session.run("MATCH (n) DETACH DELETE n")
     # Apply schema
     schema_path = Path(__file__).parent.parent.parent.parent / "infra" / "neo4j" / "init.cypher"
     if schema_path.exists():
         async with driver.session() as session:
-            for statement in schema_path.read_text().split(";"):
-                stmt = statement.strip()
-                if stmt and not stmt.startswith("//"):
-                    await session.run(stmt)
+            for stmt in _iter_cypher_statements(schema_path):
+                await session.run(stmt)
     # Seed dev data
     seed_path = (
         Path(__file__).parent.parent.parent.parent / "infra" / "scripts" / "seed-dev.cypher"
     )
     if seed_path.exists():
         async with driver.session() as session:
-            for statement in seed_path.read_text().split(";"):
-                stmt = statement.strip()
-                if stmt and not stmt.startswith("//"):
-                    await session.run(stmt)
+            for stmt in _iter_cypher_statements(seed_path):
+                await session.run(stmt)
     yield driver
     await driver.close()
 
diff --git a/api/tests/unit/test_auth.py b/api/tests/unit/test_auth.py
index db51e5b..c532b58 100644
--- a/api/tests/unit/test_auth.py
+++ b/api/tests/unit/test_auth.py
@@ -34,7 +34,11 @@ def _setup_mock_session(driver: MagicMock, records: list[MagicMock]) -> AsyncMoc
 
 
 @pytest.mark.anyio
-async def test_register_success(client: AsyncClient) -> None:
+async def test_register_success(client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None:
+    from bracc.config import settings
+
+    monkeypatch.setattr(settings, "invite_code", "")
+
     record = _mock_record({
         "id": "user-uuid",
         "email": "test@example.com",
@@ -56,19 +60,15 @@ async def test_register_success(client: AsyncClient) -> None:
 
 
 @pytest.mark.anyio
-async def test_register_bad_invite(client: AsyncClient) -> None:
+async def test_register_bad_invite(client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None:
     from bracc.config import settings
 
-    original = settings.invite_code
-    try:
-        settings.invite_code = "secret-code"
-        response = await client.post(
-            "/api/v1/auth/register",
-            json={"email": "test@example.com", "password": "password123", "invite_code": "wrong"},
-        )
-        assert response.status_code == 403
-    finally:
-        settings.invite_code = original
+    monkeypatch.setattr(settings, "invite_code", "secret-code")
+    response = await client.post(
+        "/api/v1/auth/register",
+        json={"email": "test@example.com", "password": "password123", "invite_code": "wrong"},
+    )
+    assert response.status_code == 403
 
 
 @pytest.mark.anyio
@@ -155,16 +155,25 @@ async def test_me_invalid_token(client: AsyncClient) -> None:
 
 
 @pytest.mark.anyio
-async def test_register_duplicate_email(client: AsyncClient) -> None:
+async def test_register_duplicate_email(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from neo4j.exceptions import ConstraintError
+
+    from bracc.config import settings
     from bracc.main import app
 
+    monkeypatch.setattr(settings, "invite_code", "")
+
     driver = app.state.neo4j_driver
     mock_session = AsyncMock()
-    mock_session.run = AsyncMock(side_effect=Exception("Constraint violation"))
+    mock_session.run = AsyncMock(side_effect=ConstraintError("Node already exists"))
     driver.session.return_value.__aenter__ = AsyncMock(return_value=mock_session)
 
-    with pytest.raises(Exception, match="Constraint violation"):
-        await client.post(
-            "/api/v1/auth/register",
-            json={"email": "duplicate@example.com", "password": "password123"},
-        )
+    response = await client.post(
+        "/api/v1/auth/register",
+        json={"email": "duplicate@example.com", "password": "password123"},
+    )
+    assert response.status_code == 409
+    assert response.json()["detail"] == "Email already registered"
diff --git a/api/tests/unit/test_auth_service.py b/api/tests/unit/test_auth_service.py
index 3071b4d..1cc6997 100644
--- a/api/tests/unit/test_auth_service.py
+++ b/api/tests/unit/test_auth_service.py
@@ -61,7 +61,9 @@ def test_decode_access_token_invalid() -> None:
 
 
 @pytest.mark.anyio
-async def test_register_user_success() -> None:
+async def test_register_user_success(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(settings, "invite_code", "")
+
     mock_record = _mock_record({
         "id": "user-uuid",
         "email": "test@example.com",
@@ -80,15 +82,11 @@ async def test_register_user_success() -> None:
 
 
 @pytest.mark.anyio
-async def test_register_user_bad_invite() -> None:
-    original = settings.invite_code
-    try:
-        settings.invite_code = "secret-code"
-        session = AsyncMock()
-        with pytest.raises(ValueError, match="Invalid invite code"):
-            await register_user(session, "test@example.com", "password123", "wrong-code")
-    finally:
-        settings.invite_code = original
+async def test_register_user_bad_invite(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(settings, "invite_code", "secret-code")
+    session = AsyncMock()
+    with pytest.raises(ValueError, match="Invalid invite code"):
+        await register_user(session, "test@example.com", "password123", "wrong-code")
 
 
 @pytest.mark.anyio
diff --git a/api/tests/unit/test_cpf_masking.py b/api/tests/unit/test_cpf_masking.py
index 271df24..12721cb 100644
--- a/api/tests/unit/test_cpf_masking.py
+++ b/api/tests/unit/test_cpf_masking.py
@@ -68,6 +68,28 @@ def test_political_role(self, role: str) -> None:
     def test_cargo_field(self) -> None:
         assert _is_pep_record({"name": "X", "cpf": "11111111111", "cargo": "Deputado"})
 
+    @pytest.mark.parametrize(
+        "role",
+        [
+            "Deputado Federal",
+            "deputado federal",
+            "DEPUTADO FEDERAL",
+            "Senador da Republica",
+            "senadora da republica",
+            "Vereador Suplente",
+            "Ministro de Estado",
+            "Governadora do Estado de Sao Paulo",
+            "Presidente da Republica",
+        ],
+    )
+    def test_compound_role_detected_as_pep(self, role: str) -> None:
+        """Compound PEP roles like 'deputado federal' must be detected via substring match."""
+        assert _is_pep_record({"name": "X", "cpf": "11111111111", "role": role})
+
+    def test_compound_cargo_detected_as_pep(self) -> None:
+        """Compound PEP cargo like 'Deputado Federal' must be detected via substring match."""
+        assert _is_pep_record({"name": "X", "cpf": "11111111111", "cargo": "Deputado Federal"})
+
     def test_non_pep_role(self) -> None:
         assert not _is_pep_record({"name": "X", "cpf": "11111111111", "role": "assessor"})
 
@@ -99,6 +121,18 @@ def test_deeply_nested(self) -> None:
         data = {"a": {"b": {"c": [{"cpf": "33333333333", "is_pep": True}]}}}
         assert "33333333333" in _collect_pep_cpfs(data)
 
+    def test_compound_role_collected(self) -> None:
+        """Compound roles like 'Deputado Federal' must be recognized in the walk."""
+        data = {
+            "results": [
+                {"cpf": "11111111111", "role": "Deputado Federal"},
+                {"cpf": "22222222222", "role": "assessor parlamentar"},
+            ]
+        }
+        peps = _collect_pep_cpfs(data)
+        assert "11111111111" in peps
+        assert "22222222222" not in peps
+
 
 # ---------------------------------------------------------------------------
 # Unit tests for mask_cpfs_in_json
@@ -205,4 +239,4 @@ async def test_health_not_masked(client: AsyncClient) -> None:
     """Non-CPF JSON responses pass through unchanged."""
     resp = await client.get("/health")
     assert resp.status_code == 200
-    assert resp.json()["status"] == "ok" and "version" in resp.json()
+    assert resp.json() == {"status": "ok"}
diff --git a/api/tests/unit/test_health.py b/api/tests/unit/test_health.py
index d250005..320e27b 100644
--- a/api/tests/unit/test_health.py
+++ b/api/tests/unit/test_health.py
@@ -8,9 +8,7 @@
 async def test_health_returns_ok(client: AsyncClient) -> None:
     response = await client.get("/health")
     assert response.status_code == 200
-    data = response.json()
-    assert data["status"] == "ok"
-    assert "version" in data
+    assert response.json() == {"status": "ok"}
     assert response.headers["x-content-type-options"] == "nosniff"
     assert response.headers["x-frame-options"] == "DENY"
     assert response.headers["referrer-policy"] == "no-referrer"
diff --git a/api/tests/unit/test_patterns.py b/api/tests/unit/test_patterns.py
deleted file mode 100644
index f87d026..0000000
--- a/api/tests/unit/test_patterns.py
+++ /dev/null
@@ -1,120 +0,0 @@
-from unittest.mock import AsyncMock, patch
-
-import pytest
-from httpx import AsyncClient
-
-from bracc.config import settings
-from bracc.models.pattern import PATTERN_METADATA
-from bracc.services.intelligence_provider import COMMUNITY_PATTERN_IDS, COMMUNITY_PATTERN_QUERIES
-from bracc.services.neo4j_service import CypherLoader
-
-
-@pytest.fixture(autouse=True)
-def _enable_patterns(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.setattr(settings, "patterns_enabled", True)
-
-
-def test_all_community_patterns_have_metadata() -> None:
-    for pattern_id in COMMUNITY_PATTERN_IDS:
-        assert pattern_id in PATTERN_METADATA, f"Missing metadata for {pattern_id}"
-
-
-def test_all_community_patterns_have_query_files() -> None:
-    for query_name in COMMUNITY_PATTERN_QUERIES.values():
-        try:
-            CypherLoader.load(query_name)
-        except FileNotFoundError:
-            pytest.fail(f"Missing .cypher file for query {query_name}.cypher")
-        finally:
-            CypherLoader.clear_cache()
-
-
-def test_pattern_metadata_has_required_fields() -> None:
-    for pid, meta in PATTERN_METADATA.items():
-        assert "name_pt" in meta, f"{pid} missing name_pt"
-        assert "name_en" in meta, f"{pid} missing name_en"
-        assert "desc_pt" in meta, f"{pid} missing desc_pt"
-        assert "desc_en" in meta, f"{pid} missing desc_en"
-
-
-@pytest.mark.anyio
-async def test_list_patterns_endpoint(client: AsyncClient) -> None:
-    response = await client.get("/api/v1/patterns/")
-    assert response.status_code == 200
-    data = response.json()
-    assert "patterns" in data
-    assert len(data["patterns"]) == 8
-
-    ids = {row["id"] for row in data["patterns"]}
-    assert ids == set(COMMUNITY_PATTERN_IDS)
-
-
-@pytest.mark.anyio
-async def test_patterns_endpoint_returns_503_when_disabled(
-    client: AsyncClient,
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
-    monkeypatch.setattr(settings, "patterns_enabled", False)
-    response = await client.get("/api/v1/patterns/")
-    assert response.status_code == 503
-    assert "temporarily unavailable" in response.json()["detail"]
-
-
-@pytest.mark.anyio
-async def test_invalid_pattern_returns_404(client: AsyncClient) -> None:
-    response = await client.get("/api/v1/patterns/test-id/nonexistent_pattern")
-    assert response.status_code == 404
-    assert "Pattern not found" in response.json()["detail"]
-
-
-@pytest.mark.anyio
-async def test_patterns_endpoint_forwards_include_probable(client: AsyncClient) -> None:
-    with patch("bracc.routers.patterns.run_all_patterns", new_callable=AsyncMock) as mock_run_all:
-        mock_run_all.return_value = []
-        response = await client.get("/api/v1/patterns/test-id?include_probable=true")
-
-    assert response.status_code == 200
-    mock_run_all.assert_awaited_once()
-    _driver, entity_id, _lang = mock_run_all.await_args.args
-    assert entity_id == "test-id"
-    assert mock_run_all.await_args.kwargs["include_probable"] is True
-
-
-@pytest.mark.anyio
-async def test_specific_pattern_endpoint_forwards_include_probable(client: AsyncClient) -> None:
-    with patch("bracc.routers.patterns.run_pattern", new_callable=AsyncMock) as mock_run_one:
-        mock_run_one.return_value = []
-        response = await client.get(
-            "/api/v1/patterns/test-id/debtor_contracts?include_probable=true",
-        )
-
-    assert response.status_code == 200
-    mock_run_one.assert_awaited_once()
-    _session, pattern_name, entity_id, _lang = mock_run_one.await_args.args
-    assert pattern_name == "debtor_contracts"
-    assert entity_id == "test-id"
-    assert mock_run_one.await_args.kwargs["include_probable"] is True
-
-
-def test_community_queries_use_bind_params() -> None:
-    for query_name in COMMUNITY_PATTERN_QUERIES.values():
-        try:
-            cypher = CypherLoader.load(query_name)
-        finally:
-            CypherLoader.clear_cache()
-        assert "$company_id" in cypher, f"{query_name}.cypher missing $company_id"
-        assert "$company_identifier" in cypher, f"{query_name}.cypher missing $company_identifier"
-        assert "$company_identifier_formatted" in cypher, (
-            f"{query_name}.cypher missing $company_identifier_formatted"
-        )
-        assert "${" not in cypher, f"{query_name}.cypher uses unsafe string interpolation"
-
-
-def test_no_banned_words_in_pattern_metadata() -> None:
-    banned = {"suspicious", "corrupt", "criminal", "fraudulent", "illegal", "guilty"}
-    for pid, meta in PATTERN_METADATA.items():
-        for key, value in meta.items():
-            for word in banned:
-                assert word not in value.lower(), (
-                    f"Banned word '{word}' in {pid}.{key}: {value}"
-                )
diff --git a/api/tests/unit/test_patterns_new.py b/api/tests/unit/test_patterns_new.py
deleted file mode 100644
index 0b8f919..0000000
--- a/api/tests/unit/test_patterns_new.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""Community public-safe pattern registry and query contract tests."""
-
-import pytest
-
-from bracc.models.pattern import PATTERN_METADATA
-from bracc.services.intelligence_provider import COMMUNITY_PATTERN_IDS, COMMUNITY_PATTERN_QUERIES
-from bracc.services.neo4j_service import CypherLoader
-
-
-def test_community_pattern_registry_exact_ids() -> None:
-    assert len(COMMUNITY_PATTERN_IDS) == 8
-    assert set(COMMUNITY_PATTERN_IDS) == {
-        "sanctioned_still_receiving",
-        "amendment_beneficiary_contracts",
-        "split_contracts_below_threshold",
-        "contract_concentration",
-        "embargoed_receiving",
-        "debtor_contracts",
-        "srp_multi_org_hitchhiking",
-        "inexigibility_recurrence",
-    }
-
-
-def test_community_pattern_query_mapping_is_complete() -> None:
-    assert set(COMMUNITY_PATTERN_QUERIES.keys()) == set(COMMUNITY_PATTERN_IDS)
-    for query_name in COMMUNITY_PATTERN_QUERIES.values():
-        assert query_name.startswith("public_pattern_")
-
-
-@pytest.mark.parametrize("query_name", COMMUNITY_PATTERN_QUERIES.values())
-def test_public_pattern_query_files_load(query_name: str) -> None:
-    try:
-        CypherLoader.load(query_name)
-    finally:
-        CypherLoader.clear_cache()
-
-
-@pytest.mark.parametrize("query_name", COMMUNITY_PATTERN_QUERIES.values())
-def test_public_pattern_query_required_return_aliases(query_name: str) -> None:
-    try:
-        cypher = CypherLoader.load(query_name)
-    finally:
-        CypherLoader.clear_cache()
-
-    for required_alias in (
-        " AS pattern_id",
-        " AS risk_signal",
-        " AS amount_total",
-        " AS window_start",
-        " AS window_end",
-        " AS evidence_refs",
-        " AS evidence_count",
-    ):
-        assert required_alias in cypher, f"{query_name}.cypher missing alias: {required_alias}"
-
-
-@pytest.mark.parametrize("pattern_id", COMMUNITY_PATTERN_IDS)
-def test_community_pattern_metadata_is_present(pattern_id: str) -> None:
-    meta = PATTERN_METADATA.get(pattern_id)
-    assert meta is not None
-    assert meta.get("name_pt")
-    assert meta.get("name_en")
-    assert meta.get("desc_pt")
-    assert meta.get("desc_en")
-
-
-def test_threshold_params_used_in_threshold_patterns() -> None:
-    query_params = {
-        "public_pattern_split_contracts_below_threshold": "$pattern_split_threshold_value",
-        "public_pattern_contract_concentration": "$pattern_share_threshold",
-        "public_pattern_srp_multi_org_hitchhiking": "$pattern_srp_min_orgs",
-        "public_pattern_inexigibility_recurrence": "$pattern_inexig_min_recurrence",
-    }
-    for query_name, required_param in query_params.items():
-        try:
-            cypher = CypherLoader.load(query_name)
-        finally:
-            CypherLoader.clear_cache()
-        assert required_param in cypher, f"{query_name}.cypher missing {required_param}"
diff --git a/api/tests/unit/test_public_mode.py b/api/tests/unit/test_public_mode.py
index 72506ab..71c85a4 100644
--- a/api/tests/unit/test_public_mode.py
+++ b/api/tests/unit/test_public_mode.py
@@ -225,6 +225,135 @@ async def test_public_graph_company_filters_person_nodes(client: AsyncClient) ->
     assert len(payload["edges"]) == 0
 
 
+@pytest.mark.anyio
+async def test_baseline_disabled_in_public_mode(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "public_mode", True)
+    monkeypatch.setattr(settings, "public_allow_entity_lookup", False)
+    response = await client.get("/api/v1/baseline/test-id")
+    assert response.status_code == 403
+    assert "disabled in public mode" in response.json()["detail"]
+
+
+@pytest.mark.anyio
+async def test_stats_hides_person_count_in_public_mode(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "public_mode", True)
+    monkeypatch.setattr(settings, "public_allow_person", False)
+    # Clear stats cache to ensure fresh computation
+    import bracc.routers.meta as meta_mod
+    monkeypatch.setattr(meta_mod, "_stats_cache", None)
+
+    fake_record = {
+        "total_nodes": 100,
+        "total_relationships": 200,
+        "person_count": 999,
+        "company_count": 50,
+        "health_count": 10,
+        "finance_count": 5,
+        "contract_count": 20,
+        "sanction_count": 3,
+        "election_count": 7,
+        "amendment_count": 4,
+        "embargo_count": 2,
+        "education_count": 6,
+        "convenio_count": 8,
+        "laborstats_count": 9,
+        "offshore_entity_count": 1,
+        "offshore_officer_count": 2,
+        "global_pep_count": 3,
+        "cvm_proceeding_count": 4,
+        "expense_count": 11,
+        "pep_record_count": 12,
+        "expulsion_count": 13,
+        "leniency_count": 14,
+        "international_sanction_count": 15,
+        "gov_card_expense_count": 16,
+        "gov_travel_count": 17,
+        "bid_count": 18,
+        "fund_count": 19,
+        "dou_act_count": 20,
+        "tax_waiver_count": 21,
+        "municipal_finance_count": 22,
+        "declared_asset_count": 23,
+        "party_membership_count": 24,
+        "barred_ngo_count": 25,
+        "bcb_penalty_count": 26,
+        "labor_movement_count": 27,
+        "legal_case_count": 28,
+        "judicial_case_count": 29,
+        "source_document_count": 30,
+        "ingestion_run_count": 31,
+        "temporal_violation_count": 32,
+        "cpi_count": 33,
+        "inquiry_requirement_count": 34,
+        "inquiry_session_count": 35,
+        "municipal_bid_count": 36,
+        "municipal_contract_count": 37,
+        "municipal_gazette_act_count": 38,
+    }
+    with patch(
+        "bracc.routers.meta.execute_query_single",
+        new_callable=AsyncMock,
+        return_value=fake_record,
+    ), patch(
+        "bracc.routers.meta.load_source_registry",
+        return_value=[],
+    ), patch(
+        "bracc.routers.meta.source_registry_summary",
+        return_value={
+            "universe_v1_sources": 0,
+            "implemented_sources": 0,
+            "loaded_sources": 0,
+            "healthy_sources": 0,
+            "stale_sources": 0,
+            "blocked_external_sources": 0,
+            "quality_fail_sources": 0,
+            "discovered_uningested_sources": 0,
+        },
+    ):
+        response = await client.get("/api/v1/meta/stats")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["person_count"] == 0
+    assert payload["company_count"] == 50  # non-person counts preserved
+
+
+@pytest.mark.anyio
+async def test_timeline_sanitizes_properties_in_public_mode(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "public_mode", True)
+    monkeypatch.setattr(settings, "public_allow_entity_lookup", True)
+    mock_records = [
+        {
+            "lbls": ["Contract"],
+            "props": {"type": "licitacao", "cpf": "12345678900", "value": 50000.0},
+            "event_date": "2024-01-15",
+            "id": "evt-1",
+        },
+    ]
+    with patch(
+        "bracc.routers.entity.execute_query",
+        new_callable=AsyncMock,
+        return_value=mock_records,
+    ):
+        response = await client.get("/api/v1/entity/test-id/timeline")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert len(payload["events"]) == 1
+    event_props = payload["events"][0]["properties"]
+    assert "cpf" not in event_props
+    assert event_props["value"] == 50000.0
+
+
 @pytest.mark.anyio
 async def test_investigations_disabled_in_public_mode(
     client: AsyncClient,
diff --git a/api/tests/unit/test_rate_limit.py b/api/tests/unit/test_rate_limit.py
index dc6a781..f401a52 100644
--- a/api/tests/unit/test_rate_limit.py
+++ b/api/tests/unit/test_rate_limit.py
@@ -1,24 +1,15 @@
 from unittest.mock import MagicMock
 
-from bracc.config import settings
 from bracc.middleware.rate_limit import _get_rate_limit_key, limiter
 from bracc.services.auth_service import create_access_token
 
 
-def _make_request(
-    auth_header: str | None = None,
-    client_ip: str = "127.0.0.1",
-    cookie_token: str | None = None,
-    x_forwarded_for: str | None = None,
-) -> MagicMock:
+def _make_request(auth_header: str | None = None, client_ip: str = "127.0.0.1") -> MagicMock:
     request = MagicMock()
     headers: dict[str, str] = {}
     if auth_header:
         headers["authorization"] = auth_header
-    if x_forwarded_for:
-        headers["x-forwarded-for"] = x_forwarded_for
     request.headers = headers
-    request.cookies = {settings.auth_cookie_name: cookie_token} if cookie_token else {}
     request.client = MagicMock()
     request.client.host = client_ip
     return request
@@ -43,23 +34,5 @@ def test_key_func_invalid_token_fallback() -> None:
     assert key == "10.0.0.1"
 
 
-def test_key_func_extracts_user_from_cookie_token() -> None:
-    token = create_access_token("cookie-user-1")
-    request = _make_request(cookie_token=token)
-    key = _get_rate_limit_key(request)
-    assert key == "user:cookie-user-1"
-
-
-def test_key_func_uses_forwarded_ip_when_enabled() -> None:
-    original = settings.trust_proxy_headers
-    try:
-        settings.trust_proxy_headers = True
-        request = _make_request(client_ip="127.0.0.1", x_forwarded_for="203.0.113.9, 10.0.0.4")
-        key = _get_rate_limit_key(request)
-        assert key == "203.0.113.9"
-    finally:
-        settings.trust_proxy_headers = original
-
-
 def test_limiter_instance_exists() -> None:
     assert limiter is not None
diff --git a/api/tests/unit/test_search.py b/api/tests/unit/test_search.py
index 494171c..bdc9fe8 100644
--- a/api/tests/unit/test_search.py
+++ b/api/tests/unit/test_search.py
@@ -1,21 +1,6 @@
 import pytest
 from httpx import AsyncClient
 
-from bracc.routers.search import _escape_lucene
-
-
-def test_escape_lucene_cnpj() -> None:
-    assert _escape_lucene("00.000.000/0001-00") == "00.000.000\\/0001\\-00"
-
-
-def test_escape_lucene_plain_text() -> None:
-    assert _escape_lucene("silva construcoes") == "silva construcoes"
-
-
-def test_escape_lucene_all_special_chars() -> None:
-    for ch in r'+-&|!(){}[]^"~*?:\/':
-        assert f"\\{ch}" in _escape_lucene(ch)
-
 
 @pytest.mark.anyio
 async def test_search_rejects_short_query(client: AsyncClient) -> None:
diff --git a/api/uv.lock b/api/uv.lock
index 1cae4a8..ddb9dbc 100644
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -103,6 +103,56 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
 ]
 
+[[package]]
+name = "bracc-api"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "bcrypt" },
+    { name = "fastapi" },
+    { name = "jinja2" },
+    { name = "neo4j" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-multipart" },
+    { name = "slowapi" },
+    { name = "uvicorn", extra = ["standard"] },
+    { name = "weasyprint" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "httpx" },
+    { name = "mypy" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "ruff" },
+    { name = "testcontainers", extra = ["neo4j"] },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "bcrypt", specifier = ">=4.0.0" },
+    { name = "fastapi", specifier = ">=0.115.0" },
+    { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
+    { name = "jinja2", specifier = ">=3.1.0" },
+    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.14.0" },
+    { name = "neo4j", specifier = ">=5.27.0" },
+    { name = "pydantic", specifier = ">=2.10.0" },
+    { name = "pydantic-settings", specifier = ">=2.7.0" },
+    { name = "pyjwt", extras = ["crypto"], specifier = ">=2.9.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
+    { name = "python-multipart", specifier = ">=0.0.18" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" },
+    { name = "slowapi", specifier = ">=0.1.9" },
+    { name = "testcontainers", extras = ["neo4j"], marker = "extra == 'dev'", specifier = ">=4.0" },
+    { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
+    { name = "weasyprint", specifier = ">=62.0" },
+]
+provides-extras = ["dev"]
+
 [[package]]
 name = "brotli"
 version = "1.2.0"
@@ -523,56 +573,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 
-[[package]]
-name = "bracc-api"
-version = "0.1.0"
-source = { editable = "." }
-dependencies = [
-    { name = "bcrypt" },
-    { name = "fastapi" },
-    { name = "jinja2" },
-    { name = "neo4j" },
-    { name = "pydantic" },
-    { name = "pydantic-settings" },
-    { name = "pyjwt", extra = ["crypto"] },
-    { name = "python-multipart" },
-    { name = "slowapi" },
-    { name = "uvicorn", extra = ["standard"] },
-    { name = "weasyprint" },
-]
-
-[package.optional-dependencies]
-dev = [
-    { name = "httpx" },
-    { name = "mypy" },
-    { name = "pytest" },
-    { name = "pytest-asyncio" },
-    { name = "ruff" },
-    { name = "testcontainers", extra = ["neo4j"] },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "bcrypt", specifier = ">=4.0.0" },
-    { name = "fastapi", specifier = ">=0.115.0" },
-    { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
-    { name = "jinja2", specifier = ">=3.1.0" },
-    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.14.0" },
-    { name = "neo4j", specifier = ">=5.27.0" },
-    { name = "pydantic", specifier = ">=2.10.0" },
-    { name = "pydantic-settings", specifier = ">=2.7.0" },
-    { name = "pyjwt", extras = ["crypto"], specifier = ">=2.9.0" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
-    { name = "python-multipart", specifier = ">=0.0.18" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" },
-    { name = "slowapi", specifier = ">=0.1.9" },
-    { name = "testcontainers", extras = ["neo4j"], marker = "extra == 'dev'", specifier = ">=4.0" },
-    { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
-    { name = "weasyprint", specifier = ">=62.0" },
-]
-provides-extras = ["dev"]
-
 [[package]]
 name = "idna"
 version = "3.11"
diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/data/cnpj/extracted/.gitkeep b/data/cnpj/extracted/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/data/cnpj/raw/.gitkeep b/data/cnpj/raw/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/data/cnpj/reference/.gitkeep b/data/cnpj/reference/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/docs/brand/bracc-header.png b/docs/brand/bracc-header.png
deleted file mode 100644
index 7cbd43b..0000000
Binary files a/docs/brand/bracc-header.png and /dev/null differ
diff --git a/docs/brand/wtg-header.png b/docs/brand/wtg-header.png
deleted file mode 100644
index 5e2fa56..0000000
Binary files a/docs/brand/wtg-header.png and /dev/null differ
diff --git a/docs/data-sources.md b/docs/data-sources.md
index 8c1ea34..cd35c72 100644
--- a/docs/data-sources.md
+++ b/docs/data-sources.md
@@ -1,24 +1,13 @@
-# BRACC Data Source Catalog
+# ICARUS Data Source Catalog
 
-<!-- SOURCE_SUMMARY_START -->
-**Generated from `docs/source_registry_br_v1.csv` (as-of UTC: 2026-03-01T23:05:00Z)**
-
-- Universe v1 sources: 108
-- Implemented pipelines: 45
-- Loaded sources (load_state=loaded): 36
-- Partial sources (load_state=partial): 8
-- Not loaded sources (load_state=not_loaded): 64
-- Status counts: loaded=36, partial=5, stale=3, blocked_external=1, not_built=63
-<!-- SOURCE_SUMMARY_END -->
-
-Catalog note: counts and status labels are generated from the public registry (`docs/source_registry_br_v1.csv`).
-This document includes reference production inventory context and backlog discovery; it is not a guarantee that every listed source is currently loaded in your local environment.
+**38 loaded | 3 pipelines pending data | 60+ not yet built**
+Last updated: 2026-02-26
 
 ---
 
-## 1. Reference Production Snapshot (Loaded/Implemented Inventory)
+## 1. LOADED (38 sources)
 
-The table below is a timestamped reference snapshot and should be interpreted together with the generated summary block above.
+All sources below have working ETL pipelines in `etl/src/icarus_etl/pipelines/` and are loaded into production Neo4j.
 
 | # | Source | Pipeline | Nodes Created | Rels Created | Notes |
 |---|--------|----------|---------------|--------------|-------|
diff --git a/docs/demo/dataset-contract.md b/docs/demo/dataset-contract.md
deleted file mode 100644
index d03eda7..0000000
--- a/docs/demo/dataset-contract.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Demo Dataset Contract (WTG Open)
-
-## Objective
-Provide a reproducible, public-safe demo graph with synthetic records only.
-
-## Safety rules
-- Synthetic data only. No real CPF, no real personal names, no real personal addresses.
-- Company identifiers may use synthetic CNPJ-like values reserved for demonstration.
-- Demo graph cannot include `Person` or `Partner` labels.
-- Demo exports must never include private or operational metadata.
-
-## Required files
-- `data/demo/synthetic_graph.json`
-- `data/demo/README.md`
-- `scripts/generate_demo_dataset.py`
-
-## JSON schema (minimum)
-- `nodes[]`: `{id, label, type, properties}`
-- `edges[]`: `{id, source, target, type, properties}`
-- `meta`: `{generated_at_utc, generator_version, source: "synthetic"}`
-
-## Acceptance checks
-- No field name contains `cpf`, `doc_partial`, or `doc_raw`.
-- No node label equals `Person` or `Partner`.
-- CI privacy gate passes.
-
-## Runtime target
-- Dedicated demo Neo4j instance (non-production).
-- Public API served with `PUBLIC_MODE=true`.
diff --git a/docs/release/community_announcement_template.md b/docs/release/community_announcement_template.md
index f4ec9a7..18379cc 100644
--- a/docs/release/community_announcement_template.md
+++ b/docs/release/community_announcement_template.md
@@ -14,7 +14,6 @@ Resumo:
 Release notes: {release_url}
 
 Observação de integridade: os sinais refletem coocorrências em bases públicas e não constituem prova legal.
-Divulgação obrigatória: o repositório público entrega engine + demo + fluxo BYO-data; métricas de escala são snapshots de referência com timestamp.
 
 ## Short post (EN)
 
@@ -28,7 +27,6 @@ Summary:
 Release notes: {release_url}
 
 Integrity note: signals reflect co-occurrence in public records and are not legal proof.
-Mandatory disclosure: the public repo ships engine + demo + BYO-data workflow; production-scale metrics are timestamped reference snapshots.
 
 ## Discord/Telegram long form (PT+EN)
 
@@ -44,11 +42,6 @@ Mandatory disclosure: the public repo ships engine + demo + BYO-data workflow; p
 **Compatibilidade**
 - {pt_compat}
 
-**Reproducibility Reality Check**
-- Funciona agora: {pt_works_now}
-- Requer ingestão de dados: {pt_requires_ingestion}
-- Não incluído por padrão: {pt_not_included}
-
 **Link**
 - {release_url}
 
@@ -64,10 +57,5 @@ Mandatory disclosure: the public repo ships engine + demo + BYO-data workflow; p
 **Compatibility**
 - {en_compat}
 
-**Reproducibility Reality Check**
-- Works now: {en_works_now}
-- Requires data ingestion: {en_requires_ingestion}
-- Not included by default: {en_not_included}
-
 **Link**
 - {release_url}
diff --git a/docs/release/public_boundary_matrix.csv b/docs/release/public_boundary_matrix.csv
index 9f4ff1a..e200cad 100644
--- a/docs/release/public_boundary_matrix.csv
+++ b/docs/release/public_boundary_matrix.csv
@@ -7,8 +7,8 @@ docs/**,PUBLIC with review,Keep public documentation and legal pack,include revi
 .github/workflows/**,PUBLIC,CI and security transparency,include
 scripts/**,PUBLIC with review,Keep public utilities and gates,include reviewed subset
 data/demo/**,PUBLIC,Synthetic demo dataset only,include
-api/src/bracc/services/pattern_service.py,REMOVE_FROM_PUBLIC,Pattern engine disabled pending validation,exclude
-api/src/bracc/queries/pattern_*.cypher,REMOVE_FROM_PUBLIC,Pattern query engine disabled pending validation,exclude
+api/src/icarus/services/pattern_service.py,REMOVE_FROM_PUBLIC,Pattern engine disabled pending validation,exclude
+api/src/icarus/queries/pattern_*.cypher,REMOVE_FROM_PUBLIC,Pattern query engine disabled pending validation,exclude
 scripts/auto_finalize_pncp_backfill.sh,REMOVE_FROM_PUBLIC,Production operational script with server-specific assumptions,exclude
 docs/shadow_rollout_runbook.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude
 docs/ingestion_priority_runbook.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude
diff --git a/docs/release/public_repo_release_checklist.md b/docs/release/public_repo_release_checklist.md
index 73c4f6c..8799cf8 100644
--- a/docs/release/public_repo_release_checklist.md
+++ b/docs/release/public_repo_release_checklist.md
@@ -1,56 +1,78 @@
-# Public Repo Release Checklist — `World-Open-Graph/br-acc`
-
-## 1) Pre-release gate
-
-1. Confirm target merge commit exists on `main`.
-2. Confirm CI + Security + Public gates are green on that commit.
-3. Confirm PR is merged with exactly one release label.
-
-## 2) Public boundary checks
+# Public Repo Release Checklist — World Transparency Graph
 
+## 1) Prepare sanitized snapshot
 ```bash
-python scripts/check_public_privacy.py --repo-root .
-python scripts/check_compliance_pack.py --repo-root .
-python scripts/check_open_core_boundary.py --repo-root .
+bash scripts/prepare_public_snapshot.sh /Users/brunoclz/CORRUPTOS /tmp/world-transparency-graph-public
 ```
 
-Expected: all `PASS`.
-
-## 3) Snapshot hygiene (optional verification)
-
+## 2) Initialize clean-history repo from snapshot
 ```bash
-bash scripts/prepare_public_snapshot.sh . /tmp/br-acc-public
-python /tmp/br-acc-public/scripts/check_public_privacy.py --repo-root /tmp/br-acc-public
-python /tmp/br-acc-public/scripts/check_compliance_pack.py --repo-root /tmp/br-acc-public
-python /tmp/br-acc-public/scripts/check_open_core_boundary.py --repo-root /tmp/br-acc-public
+cd /tmp/world-transparency-graph-public
+git init
+git add .
+git commit -m "Initial public release (WTG)"
 ```
 
-Expected in snapshot:
-
-- No `CLAUDE.md`.
-- No `AGENTS.md` or `AGENTS*.md`.
-- No private operational runbooks outside public scope.
-
-## 4) Publish release (manual workflow)
+## 3) Create GitHub repository (manual)
+- Owner: `brunoclz`
+- Name: `world-transparency-graph`
+- Visibility: Public
+- Do not auto-add README/License (already present)
 
-In GitHub Actions, run **Publish Release** with:
-
-- `version`: SemVer tag (e.g. `v0.3.0`, `v0.3.1-rc.1`)
-- `target_sha`: merge commit on `main`
-- `prerelease`: `false` (stable) or `true` (RC)
-- `title_pt`: release title PT-BR
-- `title_en`: release title EN
-
-## 5) Verify outputs
-
-1. Tag exists in repository.
-2. Release page published under `/releases`.
-3. Notes include PT+EN and non-accusatory disclaimer.
-4. `release_manifest.json` asset is attached.
-5. Compare link is valid (`previous_tag...new_tag`).
-
-## 6) Community communication
+## 4) Push initial release
+```bash
+git branch -M main
+git remote add origin https://github.com/brunoclz/world-transparency-graph.git
+git push -u origin main
+```
 
-1. Use `docs/release/community_announcement_template.md`.
-2. Publish short PT+EN summary with release URL.
-3. Keep wording factual: “signals/co-occurrence”, never accusatory language.
+## 5) Configure branch protection (GitHub UI)
+Require all checks:
+- `API (Python)`
+- `ETL (Python)`
+- `Frontend (TypeScript)`
+- `Neutrality Audit`
+- `Gitleaks`
+- `Bandit (Python)`
+- `Pip Audit (Python deps)`
+- `Public Privacy Gate`
+- `Compliance Pack Gate`
+- `Public Boundary Gate`
+
+## 6) Configure environment defaults
+- Set public deployment environment vars:
+  - `PRODUCT_TIER=community`
+  - `PUBLIC_MODE=true`
+  - `PUBLIC_ALLOW_PERSON=false`
+  - `PUBLIC_ALLOW_ENTITY_LOOKUP=false`
+  - `PUBLIC_ALLOW_INVESTIGATIONS=false`
+  - `PATTERNS_ENABLED=false`
+  - `VITE_PUBLIC_MODE=true`
+  - `VITE_PATTERNS_ENABLED=false`
+
+## 7) Final checks before launch
+- `python scripts/check_public_privacy.py --repo-root .` => `PASS`
+- `python scripts/check_compliance_pack.py --repo-root .` => `PASS`
+- `python scripts/check_open_core_boundary.py --repo-root .` => `PASS`
+- Confirm no internal runbooks in public repo
+- Confirm demo data is synthetic (`data/demo/synthetic_graph.json`)
+- Confirm all legal docs exist in root:
+  - `ETHICS.md`
+  - `LGPD.md`
+  - `PRIVACY.md`
+  - `TERMS.md`
+  - `DISCLAIMER.md`
+  - `SECURITY.md`
+  - `ABUSE_RESPONSE.md`
+
+## 8) Launch communication split
+- Publish product announcement as **WTG**
+- Publish movement announcement as **BRCC**
+- Mention methodology limits and non-accusatory policy
+
+## 9) Release system bootstrap
+- Ensure `.github/release.yml` exists for auto-notes categories.
+- Ensure `.github/release-drafter.yml` + workflow are active.
+- Ensure `publish-release.yml` workflow is present and dispatchable.
+- Ensure release label taxonomy is documented and applied to PRs.
+- Publish first policy-compliant tag from this stream (`v0.3.0`).
diff --git a/docs/release/release_policy.md b/docs/release/release_policy.md
index 7f9bfc3..de6ca83 100644
--- a/docs/release/release_policy.md
+++ b/docs/release/release_policy.md
@@ -48,11 +48,10 @@ A release can only be published from a commit on `main` where all required gates
 Every release must include PT-BR and EN sections with:
 
 1. Scope summary.
-2. Notable changes (explicit bullet points).
-3. Included pattern IDs when release contains pattern/signal changes.
-4. Compatibility/breaking notes.
-5. Privacy/compliance notes when applicable.
-6. Non-accusatory disclaimer.
+2. Notable changes.
+3. Compatibility/breaking notes.
+4. Privacy/compliance notes when applicable.
+5. Non-accusatory disclaimer.
 
 ## Artifacts
 
diff --git a/docs/release/release_runbook.md b/docs/release/release_runbook.md
index 33ca74b..491f11a 100644
--- a/docs/release/release_runbook.md
+++ b/docs/release/release_runbook.md
@@ -37,19 +37,6 @@ For validation cycles use RC:
 - `prerelease`: `true` for RC, `false` for stable
 - `title_pt`: short PT-BR title
 - `title_en`: short EN title
-- `highlights_pt`: PT highlights separated by `|`
-- `highlights_en`: EN highlights separated by `|`
-- `patterns_included`: comma-separated pattern IDs (use `none` when not applicable)
-- `technical_changes_pt`: PT technical changes separated by `|`
-- `technical_changes_en`: EN technical changes separated by `|`
-
-Example inputs for a pattern release:
-
-- `highlights_pt`: `Port de 8 padrões públicos factuais | Padronização de payload público`
-- `highlights_en`: `Port of 8 factual public-safe patterns | Public payload standardization`
-- `patterns_included`: `sanctioned_still_receiving,amendment_beneficiary_contracts,split_contracts_below_threshold,contract_concentration,embargoed_receiving,debtor_contracts,srp_multi_org_hitchhiking,inexigibility_recurrence`
-- `technical_changes_pt`: `Provider community de 4 para 8 padrões | ETL criou relação Contract-REFERENTE_A-Bid`
-- `technical_changes_en`: `Community provider expanded from 4 to 8 patterns | ETL created Contract-REFERENTE_A-Bid linkage`
 
 ## 4) Workflow validations performed
 
@@ -65,7 +52,7 @@ The workflow blocks publication when:
 On success the workflow:
 
 1. Creates and pushes an annotated tag.
-2. Creates GitHub Release (PT+EN notes) with explicit highlights, patterns, and technical changes.
+2. Creates GitHub Release (PT+EN notes).
 3. Uploads `release_manifest.json` asset.
 
 ## 6) Post-release checklist
@@ -73,7 +60,6 @@ On success the workflow:
 1. Open the release page and confirm:
 - version tag is correct,
 - PT+EN notes are present,
-- included patterns are explicitly listed (or marked as none),
 - non-accusatory disclaimer line is present,
 - `release_manifest.json` is attached.
 2. Share release link in community channels.
diff --git a/docs/source_onboarding_contract.md b/docs/source_onboarding_contract.md
deleted file mode 100644
index c58602c..0000000
--- a/docs/source_onboarding_contract.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Source Onboarding Contract (Brazil Coverage v1)
-
-This contract is mandatory for every new source before `shadow -> promote`.
-
-## 1. Source Identity
-- `source_id`:
-- `name`:
-- `category`:
-- `tier`:
-- `owner_agent`:
-- `primary_url`:
-- `access_mode` (`file|api|bigquery|web`):
-- `public_access_mode` (`open|open_with_rate_limit|registration|credentialed_public`):
-- `discovery_status` (`discovered|discovered_uningested|monitored|unreachable`):
-- `last_seen_url`:
-- `cadence_expected`:
-- `cadence_observed`:
-- `quality_status` (`healthy|stale|quality_fail|blocked_external|not_built|partial|loaded`):
-
-## 2. Access and Legal
-- Credential required:
-- Secret name/path:
-- License or usage restriction:
-- LGPD/privacy considerations:
-- `blocked_external` criteria:
-
-## 3. Data Contract
-- Downloader script: `etl/scripts/download_<source>.py`
-- Canonical output files:
-- Manifest file:
-- Manifest mandatory fields (`run_id`, `source_id`, `window_start`, `window_end`, `rows`, `error`, `checksum`, `retrieved_at_utc`):
-- Update cadence:
-- Expected row volume:
-- Partition/window strategy:
-
-## 4. Graph Contract
-- Node labels introduced:
-- Relationship types introduced:
-- Natural key(s) per node:
-- Merge key strategy:
-- Relationship quality tier (`strong|probable`):
-- Provenance fields (`method`, `confidence`, `source_ref`, `run_id`):
-
-## 5. Index and Constraint Contract
-- Required uniqueness constraints:
-- Required date indexes:
-- Required lookup indexes:
-- Required fulltext indexes (if text-heavy):
-
-## 6. Quality Gates (Hard Stop/Go)
-- Identity integrity preserved (`Person.cpf` masked = 0, 14-digit = 0):
-- Freshness SLA threshold:
-- Temporal sanity (`<= now + 365d`):
-- Null/duplicate key thresholds:
-- Mandatory non-zero nodes/rels:
-
-## 7. Operational Flow
-- Shadow load command:
-- Gate runner commands:
-- API smoke checks:
-- Promote command:
-- Rollback command:
-
-## 8. Acceptance
-- Evidence bundle path in `audit-results/`:
-- Final status: `resolved | resolved_full | blocked_external | quality_fail`
-- Reviewer sign-off:
diff --git a/docs/source_registry_br_v1.csv b/docs/source_registry_br_v1.csv
index 1789a8e..2e941f2 100644
--- a/docs/source_registry_br_v1.csv
+++ b/docs/source_registry_br_v1.csv
@@ -1,109 +1,109 @@
-source_id,name,category,tier,status,implementation_state,load_state,frequency,in_universe_v1,primary_url,pipeline_id,owner_agent,access_mode,notes,public_access_mode,discovery_status,last_seen_url,cadence_expected,cadence_observed,quality_status,last_verified_utc,verification_status
-cnpj,Receita Federal CNPJ,identity,P0,loaded,implemented,loaded,monthly,true,https://dadosabertos.rfb.gov.br/CNPJ/,cnpj,Agent A,file,http://dadosabertos.rfb.gov.br,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,transient_error
-tse,TSE elections and donations,electoral,P0,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/,tse,Agent E,file,Core electoral data loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-transparencia,Portal da Transparencia contracts,contracts,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados,transparencia,Agent C,file,Federal contracts and servants,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-sanctions,CEIS CNEP sanctions,sanctions,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/sancoes/consulta,sanctions,Agent C,file,Administrative sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-pep_cgu,CGU PEP list,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/pep,pep_cgu,Agent A,file,PEP baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-bndes,BNDES financings,finance,P1,loaded,implemented,loaded,monthly,true,https://www.bndes.gov.br/wps/portal/site/home/transparencia/dados,bndes,Agent G,file,Loan relationships,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-pgfn,PGFN divida ativa,fiscal,P0,loaded,implemented,loaded,monthly,true,https://www.regularize.pgfn.gov.br/dados-abertos,pgfn,Agent C,file,Debt risk core,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-ibama,IBAMA embargos,environment,P1,loaded,implemented,loaded,monthly,true,https://servicos.ibama.gov.br/ctf/publico/areasembargadas/,ibama,Agent F,file,Environmental enforcement,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-comprasnet,ComprasNet contracts,contracts,P0,stale,implemented,partial,monthly,true,https://dados.gov.br/dados/conjuntos-dados/comprasnet-contratos,comprasnet,Agent C,file,Needs freshness backfill,,monitored,,,,stale,2026-03-01T23:11:31.444615+00:00,ok
-tcu,TCU sanctions,audit,P1,loaded,implemented,loaded,monthly,true,https://contas.tcu.gov.br/ords/f?p=INIDONEAS:INIDONEAS,tcu,Agent C,file,Inidoneidade sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-transferegov,TransfereGov emendas e convenios,transfers,P0,loaded,implemented,loaded,monthly,true,https://www.transferegov.sistema.gov.br/portal/download-de-dados,transferegov,Agent C,file,Transfer relationships,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,transient_error
-rais,RAIS aggregated labor,labor,P1,loaded,implemented,loaded,annual,true,https://basedosdados.org/dataset/br-me-rais,rais,Agent H,bigquery,Aggregate mode only,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-inep,INEP school census,education,P2,loaded,implemented,loaded,annual,true,https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/censo-escolar,inep,Agent H,file,Education coverage,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-dou,Diario Oficial da Uniao,gazette,P0,loaded,implemented,loaded,daily,true,https://www.in.gov.br/leiturajornal,dou,Agent E,bigquery,National acts ingestion,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-datasus,DATASUS CNES,health,P1,loaded,implemented,loaded,monthly,true,https://opendatasus.saude.gov.br/,datasus,Agent H,file,Health establishments,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-icij,ICIJ offshore leaks,offshore,P1,loaded,implemented,loaded,yearly,true,https://offshoreleaks.icij.org/pages/database,icij,Agent G,file,Offshore entities and officers,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-opensanctions,OpenSanctions global PEP,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.opensanctions.org/datasets/peps/,opensanctions,Agent G,file,Global PEP matching,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cvm,CVM proceedings,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/,cvm,Agent G,file,Proceedings loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cvm_funds,CVM fund registry,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/dados/FI/,cvm_funds,Agent G,file,Fund baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-camara,Camara CEAP expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.camara.leg.br/,camara,Agent E,api,Expense reimbursement,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-camara_inquiries,Camara inquiries and requirements,legislative,P0,partial,implemented,partial,daily,true,https://dadosabertos.camara.leg.br/,camara_inquiries,Agent E,api,Sessions still low,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-senado,Senado CEAPS expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://www12.senado.leg.br/dados-abertos,senado,Agent E,api,Expense data loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-ceaf,CEAF expelled servants,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/ceaf,ceaf,Agent A,file,Expulsion evidence,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cepim,CEPIM barred NGOs,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cepim,cepim,Agent A,file,NGO restrictions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cpgf,CPGF gov card expenses,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cpgf,cpgf,Agent H,file,Masked CPF source,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-leniency,Acordos de leniencia,integrity,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/acordos-leniencia,leniency,Agent A,file,High signal low volume,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-ofac,OFAC sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://home.treasury.gov/policy-issues/financial-sanctions/sdn-list-data-files,ofac,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-holdings,Brasil IO holdings,ownership,P1,loaded,implemented,loaded,monthly,true,https://brasil.io/dataset/socios-brasil/,holdings,Agent G,file,Ownership enrichment,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-viagens,Viagens a servico,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/viagens,viagens,Agent H,file,Travel spend baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-siop,SIOP emendas,budget,P0,partial,implemented,partial,annual,true,https://www.siop.planejamento.gov.br/,siop,Agent C,api,Author linkage limited,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-pncp,PNCP bids and contracts,contracts,P0,stale,implemented,partial,monthly,true,https://pncp.gov.br/api/consulta/v1/contratacoes/publicacao,pncp,Agent C,api,Freshness SLA pending,,monitored,,,,stale,2026-03-01T23:11:31.444615+00:00,transient_error
-renuncias,Renuncias fiscais,fiscal,P1,loaded,implemented,loaded,annual,true,https://www.gov.br/receitafederal/pt-br/acesso-a-informacao/dados-abertos,renuncias,Agent G,file,Tax waiver baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-siconfi,SICONFI municipal finance,fiscal,P1,partial,implemented,partial,annual,true,https://apidatalake.tesouro.gov.br/docs/siconfi/,siconfi,Agent C,api,No CNPJ direct links,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-tse_bens,TSE candidate assets,electoral,P1,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/api/3/action/package_search?q=bens,tse_bens,Agent E,file,Patrimony baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-tse_filiados,TSE party memberships,electoral,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.tse.jus.br/api/3/action/package_search?q=filiacao,tse_filiados,Agent E,file,Party network,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-bcb,BCB penalties,finance,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.bcb.gov.br/,bcb,Agent G,file,Bank penalties loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-stf,STF court data,judiciary,P1,loaded,implemented,loaded,monthly,true,https://basedosdados.org/dataset/br-stf-corte-aberta,stf,Agent D,bigquery,Supreme court coverage,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-caged,CAGED labor movements,labor,P1,stale,implemented,partial,monthly,true,https://ftp.mtps.gov.br/pdet/microdados/NOVO%20CAGED/,caged,Agent H,file,Aggregate-only implementation,,monitored,,,,stale,2026-03-01T23:11:31.444615+00:00,transient_error
-eu_sanctions,EU sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://data.europa.eu/data/datasets/consolidated-list-of-persons-groups-and-entities-subject-to-eu-financial-sanctions,eu_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-un_sanctions,UN sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://scsanctions.un.org/resources/xml/en/consolidated.xml,un_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,transient_error
-world_bank,World Bank debarment,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.worldbank.org/en/projects-operations/procurement/debarred-firms,world_bank,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-senado_cpis,Senado CPIs,legislative,P0,partial,implemented,partial,yearly,true,https://www12.senado.leg.br/dados-abertos,senado_cpis,Agent E,api,Needs richer sessions and requirements,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-mides,MiDES municipal procurement,municipal,P0,loaded,implemented,loaded,daily,true,https://basedosdados.org/dataset/world-wb-mides,mides,Agent H,bigquery,Operational after access fix,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-querido_diario,Querido Diario gazettes,municipal,P1,partial,implemented,partial,daily,true,https://queridodiario.ok.org.br/api,querido_diario,Agent H,api,Text availability gap,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-datajud,CNJ DataJud,judiciary,P0,blocked_external,implemented,not_loaded,monthly,true,https://api-publica.datajud.cnj.jus.br/,datajud,Agent D,api,Credentials not fully operational in prod,,monitored,,,,blocked_external,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-bolsa_familia_bpc,Bolsa Familia and BPC,social,P3,not_built,not_implemented,not_loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/bolsa-familia-pagamentos,,Agent H,file,High volume masked identities,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-estban,BCB ESTBAN balances,finance,P3,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/,,Agent G,file,Banking aggregates,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-if_data,BCB IF data indicators,finance,P3,not_built,not_implemented,not_loaded,quarterly,true,https://dadosabertos.bcb.gov.br/,,Agent G,file,Institution KPIs,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-bcb_liquidacao,BCB bank liquidation acts,finance,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/,,Agent G,file,Regulatory actions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-stj_dados_abertos,STJ open data,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.stj.jus.br/,,Agent D,api,Superior court decisions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-cnciai_improbidade,CNIAI improbidade,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://www.cnj.jus.br/sistemas/datajud/,,Agent D,api,Misconduct convictions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-carf_tax_appeals,CARF tax appeals,judiciary,P2,not_built,not_implemented,not_loaded,monthly,true,https://carf.economia.gov.br/dados-abertos,,Agent D,file,Tax litigation,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anp_royalties,ANP royalties and fuel,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anp,,Agent F,api,Oil and gas royalties,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-aneel_concessions,ANEEL concessions,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.aneel.gov.br/,,Agent F,api,Energy concessions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anm_mining_rights,ANM mining rights,regulatory,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anm,,Agent F,api,Mining rights and permits,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-antt_transport_concessions,ANTT concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antt,,Agent F,api,Transport concessions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-ans_health_plans,ANS operators,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ans,,Agent H,api,Health insurance operators,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anvisa_registrations,ANVISA products,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anvisa,,Agent H,api,Regulatory registrations,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anac_aviation_concessions,ANAC concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anac,,Agent F,api,Aviation contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-antaq_port_contracts,ANTAQ contracts,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antaq,,Agent F,api,Port concessions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-ana_water_grants,ANA water grants,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ana,,Agent F,api,Water use rights,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anatel_telecom_licenses,ANATEL licenses,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anatel,,Agent G,api,Telecom operators,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-susep_insurance_market,SUSEP insurance market,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/susep,,Agent G,file,Insurance entities,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-cvm_full_ownership_chain,CVM ownership chains,market,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.cvm.gov.br/,,Agent G,file,Shareholder graph expansion,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-receita_dirbi,Receita DIRBI,tax,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/declaracao-dirbi,,Agent G,file,Tax benefit declarations,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-mapbiomas_alertas,MapBiomas Alerta,environment,P1,not_built,not_implemented,not_loaded,monthly,true,https://alerta.mapbiomas.org/api,,Agent F,api,Deforestation alerts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-sicar_rural_registry,SiCAR rural registry,environment,P1,not_built,not_implemented,not_loaded,quarterly,true,https://www.car.gov.br/publico/municipios/downloads,,Agent F,file,Property boundaries and owners,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-icmbio_cnuc,ICMBio CNUC units,environment,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.gov.br/icmbio/pt-br,,Agent F,file,Protected areas,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-tesouro_emendas,Tesouro emendas,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www.tesourotransparente.gov.br/,,Agent C,file,Budget execution,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-siga_brasil,SIGA Brasil,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www12.senado.leg.br/orcamento/sigabrasil,,Agent C,file,Federal budget traces,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-camara_votes_bills,Camara votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://dadosabertos.camara.leg.br/api/v2,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-senado_votes_bills,Senado votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://legis.senado.leg.br/dadosabertos,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-interpol_red_notices,Interpol red notices,international,P2,not_built,not_implemented,not_loaded,weekly,true,https://www.interpol.int/How-we-work/Notices/Red-Notices,,Agent G,api,Requires key,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_sp,TCE Sao Paulo,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://transparencia.tce.sp.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pe,TCE Pernambuco,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://sistemas.tce.pe.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-tce_rj,TCE Rio de Janeiro,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.tce.rj.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_rs,TCE Rio Grande do Sul,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://tcers.tc.br/fiscalizado/,,Agent H,file,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_mg,TCE Minas Gerais,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mg.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_ba,TCE Bahia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ba.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ce,TCE Ceara,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ce.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_go,TCE Goias,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://portal.tce.go.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pr,TCE Parana,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www1.tce.pr.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_sc,TCE Santa Catarina,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcesc.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_es,TCE Espirito Santo,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcees.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_mt,TCE Mato Grosso,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mt.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ms,TCE Mato Grosso do Sul,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ms.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_am,TCE Amazonas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.am.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pa,TCE Para,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcepa.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ro,TCE Rondonia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ro.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_rr,TCE Roraima,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcerr.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ap,TCE Amapa,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ap.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_to,TCE Tocantins,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceto.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ma,TCE Maranhao,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcema.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pi,TCE Piaui,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.pi.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_rn,TCE Rio Grande do Norte,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.rn.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pb,TCE Paraiba,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://tce.pb.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_al,TCE Alagoas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceal.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_se,TCE Sergipe,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.se.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_sp,Sao Paulo transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sp.gov.br/,,Agent H,api,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_mg,Minas Gerais transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.mg.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_ba,Bahia transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ba.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_ce,Ceara transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ce.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-state_portal_go,Goias transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.go.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_pr,Parana transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pr.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_sc,Santa Catarina transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sc.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_rs,Rio Grande do Sul transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rs.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_pe,Pernambuco transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pe.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_rj,Rio de Janeiro transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rj.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
+source_id,name,category,tier,status,implementation_state,load_state,frequency,in_universe_v1,primary_url,pipeline_id,owner_agent,access_mode,notes,public_access_mode,discovery_status,last_seen_url,cadence_expected,cadence_observed,quality_status
+cnpj,Receita Federal CNPJ,identity,P0,loaded,implemented,loaded,monthly,true,https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/,cnpj,Agent A,file,http://dadosabertos.rfb.gov.br,,monitored,,,,healthy
+tse,TSE elections and donations,electoral,P0,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/,tse,Agent E,file,Core electoral data loaded,,monitored,,,,healthy
+transparencia,Portal da Transparencia contracts,contracts,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados,transparencia,Agent C,file,Federal contracts and servants,,monitored,,,,healthy
+sanctions,CEIS CNEP sanctions,sanctions,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/sancoes/consulta,sanctions,Agent C,file,Administrative sanctions,,monitored,,,,healthy
+pep_cgu,CGU PEP list,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/pep,pep_cgu,Agent A,file,PEP baseline,,monitored,,,,healthy
+bndes,BNDES financings,finance,P1,loaded,implemented,loaded,monthly,true,https://www.bndes.gov.br/wps/portal/site/home/transparencia/dados,bndes,Agent G,file,Loan relationships,,monitored,,,,healthy
+pgfn,PGFN divida ativa,fiscal,P0,loaded,implemented,loaded,monthly,true,https://www.regularize.pgfn.gov.br/dados-abertos,pgfn,Agent C,file,Debt risk core,,monitored,,,,healthy
+ibama,IBAMA embargos,environment,P1,loaded,implemented,loaded,monthly,true,https://servicos.ibama.gov.br/ctf/publico/areasembargadas/,ibama,Agent F,file,Environmental enforcement,,monitored,,,,healthy
+comprasnet,ComprasNet contracts,contracts,P0,stale,implemented,partial,monthly,true,https://dados.gov.br/dados/conjuntos-dados/comprasnet-contratos,comprasnet,Agent C,file,Needs freshness backfill,,monitored,,,,stale
+tcu,TCU sanctions,audit,P1,loaded,implemented,loaded,monthly,true,https://contas.tcu.gov.br/ords/f?p=INIDONEAS:INIDONEAS,tcu,Agent C,file,Inidoneidade sanctions,,monitored,,,,healthy
+transferegov,TransfereGov emendas e convenios,transfers,P0,loaded,implemented,loaded,monthly,true,https://www.transferegov.sistema.gov.br/portal/download-de-dados,transferegov,Agent C,file,Transfer relationships,,monitored,,,,healthy
+rais,RAIS aggregated labor,labor,P1,loaded,implemented,loaded,annual,true,https://basedosdados.org/dataset/br-me-rais,rais,Agent H,bigquery,Aggregate mode only,,monitored,,,,healthy
+inep,INEP school census,education,P2,loaded,implemented,loaded,annual,true,https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/censo-escolar,inep,Agent H,file,Education coverage,,monitored,,,,healthy
+dou,Diario Oficial da Uniao,gazette,P0,loaded,implemented,loaded,daily,true,https://www.in.gov.br/leiturajornal,dou,Agent E,bigquery,National acts ingestion,,monitored,,,,healthy
+datasus,DATASUS CNES,health,P1,loaded,implemented,loaded,monthly,true,https://opendatasus.saude.gov.br/,datasus,Agent H,file,Health establishments,,monitored,,,,healthy
+icij,ICIJ offshore leaks,offshore,P1,loaded,implemented,loaded,yearly,true,https://offshoreleaks.icij.org/pages/database,icij,Agent G,file,Offshore entities and officers,,monitored,,,,healthy
+opensanctions,OpenSanctions global PEP,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.opensanctions.org/datasets/peps/,opensanctions,Agent G,file,Global PEP matching,,monitored,,,,healthy
+cvm,CVM proceedings,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/,cvm,Agent G,file,Proceedings loaded,,monitored,,,,healthy
+cvm_funds,CVM fund registry,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/dados/FI/,cvm_funds,Agent G,file,Fund baseline,,monitored,,,,healthy
+camara,Camara CEAP expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.camara.leg.br/,camara,Agent E,api,Expense reimbursement,,monitored,,,,healthy
+camara_inquiries,Camara inquiries and requirements,legislative,P0,partial,implemented,partial,daily,true,https://dadosabertos.camara.leg.br/,camara_inquiries,Agent E,api,Sessions still low,,monitored,,,,partial
+senado,Senado CEAPS expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://www12.senado.leg.br/dados-abertos,senado,Agent E,api,Expense data loaded,,monitored,,,,healthy
+ceaf,CEAF expelled servants,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/ceaf,ceaf,Agent A,file,Expulsion evidence,,monitored,,,,healthy
+cepim,CEPIM barred NGOs,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cepim,cepim,Agent A,file,NGO restrictions,,monitored,,,,healthy
+cpgf,CPGF gov card expenses,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cpgf,cpgf,Agent H,file,Masked CPF source,,monitored,,,,healthy
+leniency,Acordos de leniencia,integrity,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/acordos-leniencia,leniency,Agent A,file,High signal low volume,,monitored,,,,healthy
+ofac,OFAC sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://home.treasury.gov/policy-issues/financial-sanctions/sdn-list-data-files,ofac,Agent G,file,International sanctions,,monitored,,,,healthy
+holdings,Brasil IO holdings,ownership,P1,loaded,implemented,loaded,monthly,true,https://brasil.io/dataset/socios-brasil/holding/,holdings,Agent G,file,Ownership enrichment,,monitored,,,,healthy
+viagens,Viagens a servico,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/viagens,viagens,Agent H,file,Travel spend baseline,,monitored,,,,healthy
+siop,SIOP emendas,budget,P0,partial,implemented,partial,annual,true,https://www.siop.planejamento.gov.br/,siop,Agent C,api,Author linkage limited,,monitored,,,,partial
+pncp,PNCP bids and contracts,contracts,P0,stale,implemented,partial,monthly,true,https://pncp.gov.br/api/consulta/v1/,pncp,Agent C,api,Freshness SLA pending,,monitored,,,,stale
+renuncias,Renuncias fiscais,fiscal,P1,loaded,implemented,loaded,annual,true,https://www.gov.br/receitafederal/pt-br/acesso-a-informacao/dados-abertos,renuncias,Agent G,file,Tax waiver baseline,,monitored,,,,healthy
+siconfi,SICONFI municipal finance,fiscal,P1,partial,implemented,partial,annual,true,https://apidatalake.tesouro.gov.br/docs/siconfi/,siconfi,Agent C,api,No CNPJ direct links,,monitored,,,,partial
+tse_bens,TSE candidate assets,electoral,P1,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/dataset/bens-candidato,tse_bens,Agent E,file,Patrimony baseline,,monitored,,,,healthy
+tse_filiados,TSE party memberships,electoral,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.tse.jus.br/dataset/filiados-partidos,tse_filiados,Agent E,file,Party network,,monitored,,,,healthy
+bcb,BCB penalties,finance,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.bcb.gov.br/dataset/penalidades,bcb,Agent G,file,Bank penalties loaded,,monitored,,,,healthy
+stf,STF court data,judiciary,P1,loaded,implemented,loaded,monthly,true,https://basedosdados.org/dataset/br-stf-corte-aberta,stf,Agent D,bigquery,Supreme court coverage,,monitored,,,,healthy
+caged,CAGED labor movements,labor,P1,stale,implemented,partial,monthly,true,https://ftp.mtps.gov.br/pdet/microdados/NOVO%20CAGED/,caged,Agent H,file,Aggregate-only implementation,,monitored,,,,stale
+eu_sanctions,EU sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://data.europa.eu/data/datasets/consolidated-list-of-persons-groups-and-entities-subject-to-eu-financial-sanctions,eu_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy
+un_sanctions,UN sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://scsanctions.un.org/resources/xml/en/consolidated.xml,un_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy
+world_bank,World Bank debarment,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.worldbank.org/en/projects-operations/procurement/debarred-firms,world_bank,Agent G,file,International sanctions,,monitored,,,,healthy
+senado_cpis,Senado CPIs,legislative,P0,partial,implemented,partial,yearly,true,https://www12.senado.leg.br/dados-abertos,senado_cpis,Agent E,api,Needs richer sessions and requirements,,monitored,,,,partial
+mides,MiDES municipal procurement,municipal,P0,loaded,implemented,loaded,daily,true,https://basedosdados.org/dataset/world-wb-mides,mides,Agent H,bigquery,Operational after access fix,,monitored,,,,healthy
+querido_diario,Querido Diario gazettes,municipal,P1,partial,implemented,partial,daily,true,https://queridodiario.ok.org.br/api,querido_diario,Agent H,api,Text availability gap,,monitored,,,,partial
+datajud,CNJ DataJud,judiciary,P0,blocked_external,implemented,not_loaded,monthly,true,https://api-publica.datajud.cnj.jus.br/,datajud,Agent D,api,Credentials not fully operational in prod,,monitored,,,,blocked_external
+bolsa_familia_bpc,Bolsa Familia and BPC,social,P3,not_built,not_implemented,not_loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/bolsa-familia-pagamentos,,Agent H,file,High volume masked identities,,discovered_uningested,,,,not_built
+estban,BCB ESTBAN balances,finance,P3,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/dataset/estban,,Agent G,file,Banking aggregates,,discovered_uningested,,,,not_built
+if_data,BCB IF data indicators,finance,P3,not_built,not_implemented,not_loaded,quarterly,true,https://dadosabertos.bcb.gov.br/dataset/if-data,,Agent G,file,Institution KPIs,,discovered_uningested,,,,not_built
+bcb_liquidacao,BCB bank liquidation acts,finance,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/dataset/intervencao-e-liquidacao,,Agent G,file,Regulatory actions,,discovered_uningested,,,,not_built
+stj_dados_abertos,STJ open data,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.stj.jus.br/,,Agent D,api,Superior court decisions,,discovered_uningested,,,,not_built
+cnciai_improbidade,CNIAI improbidade,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://www.cnj.jus.br/sistemas/cnciai/,,Agent D,api,Misconduct convictions,,discovered_uningested,,,,not_built
+carf_tax_appeals,CARF tax appeals,judiciary,P2,not_built,not_implemented,not_loaded,monthly,true,https://carf.economia.gov.br/dados-abertos,,Agent D,file,Tax litigation,,discovered_uningested,,,,not_built
+anp_royalties,ANP royalties and fuel,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anp,,Agent F,api,Oil and gas royalties,,discovered_uningested,,,,not_built
+aneel_concessions,ANEEL concessions,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.aneel.gov.br/,,Agent F,api,Energy concessions,,discovered_uningested,,,,not_built
+anm_mining_rights,ANM mining rights,regulatory,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anm,,Agent F,api,Mining rights and permits,,discovered_uningested,,,,not_built
+antt_transport_concessions,ANTT concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antt,,Agent F,api,Transport concessions,,discovered_uningested,,,,not_built
+ans_health_plans,ANS operators,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ans,,Agent H,api,Health insurance operators,,discovered_uningested,,,,not_built
+anvisa_registrations,ANVISA products,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anvisa,,Agent H,api,Regulatory registrations,,discovered_uningested,,,,not_built
+anac_aviation_concessions,ANAC concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anac,,Agent F,api,Aviation contracts,,discovered_uningested,,,,not_built
+antaq_port_contracts,ANTAQ contracts,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antaq,,Agent F,api,Port concessions,,discovered_uningested,,,,not_built
+ana_water_grants,ANA water grants,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ana,,Agent F,api,Water use rights,,discovered_uningested,,,,not_built
+anatel_telecom_licenses,ANATEL licenses,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anatel,,Agent G,api,Telecom operators,,discovered_uningested,,,,not_built
+susep_insurance_market,SUSEP insurance market,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/susep,,Agent G,file,Insurance entities,,discovered_uningested,,,,not_built
+cvm_full_ownership_chain,CVM ownership chains,market,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.cvm.gov.br/,,Agent G,file,Shareholder graph expansion,,discovered_uningested,,,,not_built
+receita_dirbi,Receita DIRBI,tax,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/declaracao-dirbi,,Agent G,file,Tax benefit declarations,,discovered_uningested,,,,not_built
+mapbiomas_alertas,MapBiomas Alerta,environment,P1,not_built,not_implemented,not_loaded,monthly,true,https://alerta.mapbiomas.org/api,,Agent F,api,Deforestation alerts,,discovered_uningested,,,,not_built
+sicar_rural_registry,SiCAR rural registry,environment,P1,not_built,not_implemented,not_loaded,quarterly,true,https://www.car.gov.br/publico/municipios/downloads,,Agent F,file,Property boundaries and owners,,discovered_uningested,,,,not_built
+icmbio_cnuc,ICMBio CNUC units,environment,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.icmbio.gov.br/portal/faunabrasileira/cadastro-nacional-de-unidades-de-conservacao,,Agent F,file,Protected areas,,discovered_uningested,,,,not_built
+tesouro_emendas,Tesouro emendas,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www.tesourotransparente.gov.br/,,Agent C,file,Budget execution,,discovered_uningested,,,,not_built
+siga_brasil,SIGA Brasil,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www12.senado.leg.br/orcamento/sigabrasil,,Agent C,file,Federal budget traces,,discovered_uningested,,,,not_built
+camara_votes_bills,Camara votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://dadosabertos.camara.leg.br/api/v2,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built
+senado_votes_bills,Senado votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://legis.senado.leg.br/dadosabertos,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built
+interpol_red_notices,Interpol red notices,international,P2,not_built,not_implemented,not_loaded,weekly,true,https://www.interpol.int/How-we-work/Notices/Red-Notices,,Agent G,api,Requires key,,discovered_uningested,,,,not_built
+tce_sp,TCE Sao Paulo,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://transparencia.tce.sp.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built
+tce_pe,TCE Pernambuco,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://sistemas.tce.pe.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built
+tce_rj,TCE Rio de Janeiro,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.tce.rj.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built
+tce_rs,TCE Rio Grande do Sul,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://portal.tce.rs.gov.br/,,Agent H,file,State audit procurement,,discovered_uningested,,,,not_built
+tce_mg,TCE Minas Gerais,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mg.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ba,TCE Bahia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ba.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ce,TCE Ceara,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ce.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_go,TCE Goias,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://portal.tce.go.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pr,TCE Parana,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www1.tce.pr.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_sc,TCE Santa Catarina,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcesc.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_es,TCE Espirito Santo,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcees.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_mt,TCE Mato Grosso,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mt.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ms,TCE Mato Grosso do Sul,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ms.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_am,TCE Amazonas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.am.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pa,TCE Para,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcepa.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ro,TCE Rondonia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ro.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_rr,TCE Roraima,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcerr.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ap,TCE Amapa,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ap.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_to,TCE Tocantins,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceto.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ma,TCE Maranhao,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcema.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pi,TCE Piaui,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.pi.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_rn,TCE Rio Grande do Norte,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.rn.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pb,TCE Paraiba,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://tce.pb.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_al,TCE Alagoas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceal.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_se,TCE Sergipe,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.se.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+state_portal_sp,Sao Paulo transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sp.gov.br/,,Agent H,api,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_mg,Minas Gerais transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.mg.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_ba,Bahia transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ba.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_ce,Ceara transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ce.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_go,Goias transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.go.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_pr,Parana transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pr.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_sc,Santa Catarina transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sc.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_rs,Rio Grande do Sul transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rs.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_pe,Pernambuco transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pe.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_rj,Rio de Janeiro transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rj.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
diff --git a/etl/pyproject.toml b/etl/pyproject.toml
index f54aa99..ff31967 100644
--- a/etl/pyproject.toml
+++ b/etl/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "bracc-etl"
 version = "0.1.0"
-description = "BRACC ETL — Data ingestion pipelines for Brazilian public data"
+description = "BR-ACC ETL — Data ingestion pipelines for Brazilian public data"
 requires-python = ">=3.12"
 license = "AGPL-3.0-or-later"
 dependencies = [
@@ -9,10 +9,11 @@ dependencies = [
     "pandas>=2.2.0",
     "httpx>=0.28.0",
     "click>=8.1.0",
-    "defusedxml>=0.7.1",
     "pydantic>=2.10.0",
     "pydantic-settings>=2.7.0",
     "pypdf>=5.2.0",
+    "defusedxml>=0.7.0",
+    "pandera>=0.21.0",
 ]
 
 [project.optional-dependencies]
diff --git a/etl/scripts/_download_utils.py b/etl/scripts/_download_utils.py
index ed0a67f..14261ea 100644
--- a/etl/scripts/_download_utils.py
+++ b/etl/scripts/_download_utils.py
@@ -3,8 +3,6 @@
 from __future__ import annotations
 
 import logging
-import shutil
-import stat
 import zipfile
 from pathlib import Path
 
@@ -38,12 +36,21 @@ def download_file(url: str, dest: Path, *, timeout: int = 600) -> bool:
 
             response.raise_for_status()
 
+            # If we requested a range but server returned full content (200 vs 206),
+            # start fresh to avoid corruption
+            if start_byte > 0 and response.status_code != 206:
+                logger.warning(
+                    "Server ignored Range header for %s, restarting download",
+                    dest.name,
+                )
+                start_byte = 0
+
             total = response.headers.get("content-length")
             total_mb = f"{int(total) / 1e6:.1f} MB" if total else "unknown size"
             logger.info("Downloading %s (%s)...", dest.name, total_mb)
 
-            mode = "ab" if start_byte > 0 else "wb"
-            downloaded = start_byte
+            mode = "ab" if start_byte > 0 and response.status_code == 206 else "wb"
+            downloaded = start_byte if mode == "ab" else 0
             with open(partial, mode) as f:
                 for chunk in response.iter_bytes(chunk_size=65_536):
                     f.write(chunk)
@@ -58,24 +65,49 @@ def download_file(url: str, dest: Path, *, timeout: int = 600) -> bool:
         return False
 
 
-def extract_zip(zip_path: Path, output_dir: Path) -> list[Path]:
-    """Extract ZIP and return list of extracted files.
+def safe_extract_zip(
+    zip_path: Path,
+    output_dir: Path,
+    *,
+    max_total_bytes: int = 50 * 1024**3,  # 50GB default (CNPJ zips are huge)
+) -> list[Path]:
+    """Safely extract ZIP with path traversal and bomb guards.
 
     Deletes corrupted ZIPs for re-download.
     """
     try:
         with zipfile.ZipFile(zip_path, "r") as zf:
-            extracted = safe_extract_zip(zf, output_dir)
-        logger.info("Extracted %d files from %s", len(extracted), zip_path.name)
-        return extracted
+            # Check for path traversal
+            resolved_output = output_dir.resolve()
+            for info in zf.infolist():
+                target = (output_dir / info.filename).resolve()
+                if not target.is_relative_to(resolved_output):
+                    raise ValueError(
+                        f"Path traversal detected in {zip_path.name}: {info.filename}"
+                    )
+
+            # Check total uncompressed size (zip bomb guard)
+            total_size = sum(info.file_size for info in zf.infolist())
+            if total_size > max_total_bytes:
+                raise ValueError(
+                    f"ZIP bomb guard: {zip_path.name} would extract to "
+                    f"{total_size / 1e9:.1f}GB (limit: {max_total_bytes / 1e9:.1f}GB)"
+                )
+
+            names = zf.namelist()
+            zf.extractall(output_dir)
+
+        logger.info("Extracted %d files from %s", len(names), zip_path.name)
+        return [output_dir / n for n in names]
     except zipfile.BadZipFile:
         logger.warning("Bad ZIP file: %s — deleting for re-download", zip_path.name)
         zip_path.unlink()
         return []
-    except ValueError as exc:
-        logger.warning("Unsafe ZIP file %s: %s — deleting", zip_path.name, exc)
-        zip_path.unlink(missing_ok=True)
-        return []
+
+
+def extract_zip(zip_path: Path, output_dir: Path) -> list[Path]:
+    """Extract ZIP and return list of extracted files."""
+    return safe_extract_zip(zip_path, output_dir)
 
 
 def validate_csv(
@@ -111,60 +143,3 @@ def validate_csv(
     except Exception as e:
         logger.warning("Validation failed for %s: %s", path.name, e)
         return False
-
-
-def safe_extract_zip(
-    archive: zipfile.ZipFile,
-    output_dir: Path,
-    *,
-    max_members: int = 50_000,
-    max_uncompressed_bytes: int = 5_000_000_000,
-) -> list[Path]:
-    """Safely extract a ZIP archive.
-
-    Blocks path traversal, symlinks, and oversized archives.
-    """
-    output_root = output_dir.resolve()
-    infos = archive.infolist()
-    if len(infos) > max_members:
-        msg = f"ZIP has too many entries ({len(infos)} > {max_members})"
-        raise ValueError(msg)
-
-    extracted: list[Path] = []
-    uncompressed_total = 0
-    for info in infos:
-        member_name = info.filename.replace("\\", "/")
-        if not member_name:
-            continue
-
-        # Reject symlink entries.
-        mode = info.external_attr >> 16
-        if stat.S_ISLNK(mode):
-            msg = f"ZIP contains symlink entry: {member_name}"
-            raise ValueError(msg)
-
-        target = (output_dir / member_name).resolve()
-        try:
-            target.relative_to(output_root)
-        except ValueError as exc:
-            msg = f"Path traversal detected: {member_name}"
-            raise ValueError(msg) from exc
-
-        if info.is_dir():
-            target.mkdir(parents=True, exist_ok=True)
-            continue
-
-        uncompressed_total += info.file_size
-        if uncompressed_total > max_uncompressed_bytes:
-            msg = (
-                f"ZIP exceeds max extracted size "
-                f"({uncompressed_total} > {max_uncompressed_bytes})"
-            )
-            raise ValueError(msg)
-
-        target.parent.mkdir(parents=True, exist_ok=True)
-        with archive.open(info, "r") as source, target.open("wb") as destination:
-            shutil.copyfileobj(source, destination)
-        extracted.append(target)
-
-    return extracted
diff --git a/etl/scripts/download_caged.py b/etl/scripts/download_caged.py
index 329943a..3d62382 100644
--- a/etl/scripts/download_caged.py
+++ b/etl/scripts/download_caged.py
@@ -5,9 +5,9 @@
 resumability and memory management on large datasets.
 
 Usage:
-    python etl/scripts/download_caged.py --billing-project bracc-corruptos
-    python etl/scripts/download_caged.py --billing-project bracc-corruptos --start-year 2024
-    python etl/scripts/download_caged.py --billing-project bracc-corruptos --skip-existing
+    python etl/scripts/download_caged.py --billing-project icarus-corruptos
+    python etl/scripts/download_caged.py --billing-project icarus-corruptos --start-year 2024
+    python etl/scripts/download_caged.py --billing-project icarus-corruptos --skip-existing
 """
 
 from __future__ import annotations
diff --git a/etl/scripts/download_camara_inquiries.py b/etl/scripts/download_camara_inquiries.py
index 02813c8..691c244 100644
--- a/etl/scripts/download_camara_inquiries.py
+++ b/etl/scripts/download_camara_inquiries.py
@@ -413,7 +413,7 @@ def _write_manifest(
 )
 @click.option(
     "--billing-project",
-    default="bracc-corruptos",
+    default="icarus-corruptos",
     help="GCP billing project for BQ mode.",
 )
 @click.option(
diff --git a/etl/scripts/download_cnpj.py b/etl/scripts/download_cnpj.py
index aff1d58..62dabd2 100644
--- a/etl/scripts/download_cnpj.py
+++ b/etl/scripts/download_cnpj.py
@@ -6,15 +6,21 @@
     python etl/scripts/download_cnpj.py --reference-only   # reference tables only (tiny)
     python etl/scripts/download_cnpj.py --files 1          # just first file of each type
     python etl/scripts/download_cnpj.py --types Empresas   # specific type only
+    python etl/scripts/download_cnpj.py --release 2026-03  # pin to specific monthly release
 """
 
 from __future__ import annotations
 
+import hashlib
+import json
 import logging
+import os
 import sys
+from datetime import datetime, timezone
 from pathlib import Path
 
 import click
+import httpx
 
 sys.path.insert(0, str(Path(__file__).parent))
 from _download_utils import download_file, extract_zip, validate_csv
@@ -22,7 +28,13 @@
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)
 
-BASE_URL = "https://dadosabertos.rfb.gov.br/CNPJ/"
+# Receita Federal Nextcloud (primary since Jan 2026)
+NEXTCLOUD_BASE = "https://arquivos.receitafederal.gov.br/s/{token}/download?path=%2F&files="
+KNOWN_TOKENS = ["gn672Ad4CF8N6TK", "YggdBLfdninEJX9"]
+
+# Legacy URLs (dadosabertos.rfb.gov.br decommissioned Jan 2026)
+LEGACY_NEW_BASE_PATTERN = "https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/{year_month}/"
+LEGACY_BASE_URL = "https://dadosabertos.rfb.gov.br/CNPJ/"
 
 MAIN_TYPES = ["Empresas", "Socios", "Estabelecimentos"]
 REFERENCE_FILES = [
@@ -48,6 +60,126 @@
 }
 
 
+def _previous_month(year: int, month: int) -> tuple[int, int]:
+    """Return (year, month) for the previous month."""
+    if month == 1:
+        return year - 1, 12
+    return year, month - 1
+
+
+def _check_url_accessible(url: str, timeout: int = 30) -> bool:
+    """Send HTTP HEAD to verify a URL is accessible (2xx)."""
+    try:
+        resp = httpx.head(url, follow_redirects=True, timeout=timeout)
+        return resp.status_code < 400
+    except httpx.HTTPError:
+        return False
+
+
+def _check_nextcloud_token(token: str, timeout: int = 30) -> bool:
+    """Verify a Nextcloud share token is valid via HEAD request."""
+    share_url = f"https://arquivos.receitafederal.gov.br/s/{token}"
+    try:
+        resp = httpx.head(share_url, follow_redirects=True, timeout=timeout)
+        return resp.status_code < 400
+    except httpx.HTTPError:
+        return False
+
+
+def resolve_rf_release(year_month: str | None = None) -> str:
+    """Resolve the Receita Federal CNPJ release URL.
+
+    Strategy:
+    1. Try Nextcloud share (primary since Jan 2026):
+       a. Check CNPJ_SHARE_TOKEN env var first.
+       b. Then try each known token.
+    2. Fall back to legacy dadosabertos.rfb.gov.br paths.
+    3. Raise RuntimeError if nothing works (fail-closed).
+
+    Returns the resolved base URL. For Nextcloud, files are fetched via
+    ``{base_url}{filename}``.
+    """
+    now = datetime.now(timezone.utc)
+
+    # --- Nextcloud (primary) ---
+    tokens_to_try: list[str] = []
+
+    env_token = os.environ.get("CNPJ_SHARE_TOKEN")
+    if env_token:
+        tokens_to_try.append(env_token)
+
+    for t in KNOWN_TOKENS:
+        if t not in tokens_to_try:
+            tokens_to_try.append(t)
+
+    for token in tokens_to_try:
+        logger.info("Probing Nextcloud token: %s...", token[:6])
+        if _check_nextcloud_token(token):
+            base_url = NEXTCLOUD_BASE.format(token=token)
+            logger.info("Resolved CNPJ via Nextcloud (token %s...)", token[:6])
+            return base_url
+
+    # --- Legacy dadosabertos.rfb.gov.br ---
+    if year_month is not None:
+        candidates = [year_month]
+    else:
+        current = f"{now.year:04d}-{now.month:02d}"
+        prev_y, prev_m = _previous_month(now.year, now.month)
+        previous = f"{prev_y:04d}-{prev_m:02d}"
+        candidates = [current, previous]
+
+    for ym in candidates:
+        url = LEGACY_NEW_BASE_PATTERN.format(year_month=ym)
+        logger.info("Probing legacy release URL: %s", url)
+        if _check_url_accessible(url):
+            logger.info("Resolved CNPJ release (legacy new path): %s", url)
+            return url
+
+    logger.info("Trying legacy flat URL: %s", LEGACY_BASE_URL)
+    if _check_url_accessible(LEGACY_BASE_URL):
+        logger.info("Resolved CNPJ release (legacy flat): %s", LEGACY_BASE_URL)
+        return LEGACY_BASE_URL
+
+    tried = ", ".join(candidates)
+    raise RuntimeError(
+        f"Could not resolve CNPJ release. Tried Nextcloud tokens, "
+        f"legacy months [{tried}], and legacy flat path. "
+        "Receita Federal portal may be down or the URL structure has changed."
+    )
+
+
+def _write_manifest(
+    output_dir: Path,
+    base_url: str,
+    resolved_release: str,
+    file_results: list[dict],
+    started_at: str,
+) -> Path:
+    """Write download manifest JSON after download completes."""
+    finished_at = datetime.now(timezone.utc).isoformat()
+
+    # Compute an aggregate checksum over all successful file names + sizes
+    hasher = hashlib.sha256()
+    for fr in sorted(file_results, key=lambda x: x["name"]):
+        hasher.update(f"{fr['name']}:{fr['size_bytes']}:{fr['status']}".encode())
+    checksum = f"sha256:{hasher.hexdigest()}"
+
+    manifest = {
+        "source": "receita_federal_cnpj",
+        "resolved_release": resolved_release,
+        "base_url": base_url,
+        "files": file_results,
+        "started_at": started_at,
+        "finished_at": finished_at,
+        "checksum": checksum,
+    }
+
+    manifest_path = output_dir / "download_manifest.json"
+    manifest_path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False), encoding="utf-8")
+    logger.info("Manifest written: %s", manifest_path)
+    return manifest_path
+
+
 @click.command()
 @click.option("--output-dir", default="./data/cnpj", help="Base output directory")
 @click.option("--files", type=int, default=10, help="Number of files per type (0-9)")
@@ -56,6 +188,7 @@
 @click.option("--skip-existing/--no-skip-existing", default=True, help="Skip already downloaded files")
 @click.option("--skip-extract", is_flag=True, help="Skip extraction after download")
 @click.option("--timeout", type=int, default=600, help="Download timeout in seconds")
+@click.option("--release", default=None, help="Pin to specific monthly release (YYYY-MM format)")
 def main(
     output_dir: str,
     files: int,
@@ -64,8 +197,20 @@ def main(
     skip_existing: bool,
     skip_extract: bool,
     timeout: int,
+    release: str | None,
 ) -> None:
     """Download and extract CNPJ data from Receita Federal."""
+    started_at = datetime.now(timezone.utc).isoformat()
+
+    base_url = resolve_rf_release(release)
+    # Extract the release identifier from the resolved URL
+    resolved_release = release or "legacy"
+    if "arquivos.receitafederal.gov.br" in base_url:
+        resolved_release = "nextcloud"
+    elif "/dados_abertos_cnpj/" in base_url:
+        # Extract YYYY-MM from URL
+        resolved_release = base_url.rstrip("/").rsplit("/", 1)[-1]
+
     base = Path(output_dir)
     raw_dir = base / "raw"
     extract_dir = base / "extracted"
@@ -73,14 +218,26 @@ def main(
     for d in [raw_dir, extract_dir, ref_dir]:
         d.mkdir(parents=True, exist_ok=True)
 
+    file_results: list[dict] = []
+
     # --- Reference tables (always download, they're tiny) ---
     logger.info("=== Reference tables ===")
     for filename in REFERENCE_FILES:
         dest = raw_dir / filename
         if skip_existing and dest.exists():
             logger.info("Skipping (exists): %s", filename)
+            file_results.append({
+                "name": filename,
+                "status": "skipped",
+                "size_bytes": dest.stat().st_size,
+            })
         else:
-            download_file(f"{BASE_URL}{filename}", dest, timeout=timeout)
+            success = download_file(f"{base_url}{filename}", dest, timeout=timeout)
+            file_results.append({
+                "name": filename,
+                "status": "ok" if success else "failed",
+                "size_bytes": dest.stat().st_size if dest.exists() else 0,
+            })
 
         if not skip_extract and dest.exists():
             extracted = extract_zip(dest, ref_dir)
@@ -90,7 +247,8 @@ def main(
                 validate_csv(f, expected_cols=expected)
 
     if reference_only:
-        logger.info("Reference-only mode — done.")
+        logger.info("Reference-only mode -- done.")
+        _write_manifest(base, base_url, resolved_release, file_results, started_at)
         return
 
     # --- Main data files ---
@@ -102,10 +260,25 @@ def main(
             dest = raw_dir / filename
             if skip_existing and dest.exists():
                 logger.info("Skipping (exists): %s", filename)
+                file_results.append({
+                    "name": filename,
+                    "status": "skipped",
+                    "size_bytes": dest.stat().st_size,
+                })
             else:
-                success = download_file(f"{BASE_URL}{filename}", dest, timeout=timeout)
+                success = download_file(f"{base_url}{filename}", dest, timeout=timeout)
                 if not success:
+                    file_results.append({
+                        "name": filename,
+                        "status": "failed",
+                        "size_bytes": 0,
+                    })
                     continue
+                file_results.append({
+                    "name": filename,
+                    "status": "ok",
+                    "size_bytes": dest.stat().st_size if dest.exists() else 0,
+                })
 
             if not skip_extract and dest.exists():
                 extracted = extract_zip(dest, extract_dir)
@@ -120,6 +293,7 @@ def main(
 
     logger.info("=== Download complete ===")
     _print_summary(raw_dir, extract_dir, ref_dir)
+    _write_manifest(base, base_url, resolved_release, file_results, started_at)
 
 
 def _print_summary(raw_dir: Path, extract_dir: Path, ref_dir: Path) -> None:
diff --git a/etl/scripts/download_cnpj_bq.py b/etl/scripts/download_cnpj_bq.py
index 8357bb3..d685db5 100644
--- a/etl/scripts/download_cnpj_bq.py
+++ b/etl/scripts/download_cnpj_bq.py
@@ -10,8 +10,8 @@
   - download_manifest.json
 
 Usage:
-  python etl/scripts/download_cnpj_bq.py --billing-project bracc-corruptos
-  python etl/scripts/download_cnpj_bq.py --billing-project bracc-corruptos --tables socios
+  python etl/scripts/download_cnpj_bq.py --billing-project icarus-corruptos
+  python etl/scripts/download_cnpj_bq.py --billing-project icarus-corruptos --tables socios
 """
 
 from __future__ import annotations
@@ -105,6 +105,44 @@
 PAGE_SIZE = 100_000
 
 
+def _run_bigquery_precheck(
+    *,
+    billing_project: str,
+    source_project: str,
+    source_dataset: str,
+    snapshot_start: str | None,
+) -> None:
+    """Run explicit auth/ACL prechecks before starting large table downloads."""
+    from google.cloud import bigquery
+
+    client = bigquery.Client(project=billing_project)
+    logger.info("Running BigQuery precheck: SELECT 1")
+    list(client.query("SELECT 1 AS ok").result())
+
+    socios_table = f"{source_project}.{source_dataset}.socios"
+    if snapshot_start:
+        precheck_sql = (
+            f"SELECT COUNT(1) AS n FROM `{socios_table}` "
+            "WHERE data >= @snapshot_start"
+        )
+        query_params = [
+            bigquery.ScalarQueryParameter("snapshot_start", "DATE", snapshot_start),
+        ]
+    else:
+        precheck_sql = f"SELECT COUNT(1) AS n FROM `{socios_table}`"
+        query_params = []
+
+    logger.info("Running BigQuery precheck: %s", precheck_sql)
+    rows = list(
+        client.query(
+            precheck_sql,
+            job_config=bigquery.QueryJobConfig(query_parameters=query_params),
+        ).result(),
+    )
+    check_value = rows[0].n if rows else 0
+    logger.info("BigQuery precheck OK: socios_count=%s", check_value)
+
+
 def _sha256_file(path: Path) -> str:
     digest = hashlib.sha256()
     with path.open("rb") as f:
@@ -292,6 +330,19 @@ def main(
         )
     source_project, source_dataset = dataset.split(".", 1)
 
+    try:
+        _run_bigquery_precheck(
+            billing_project=billing_project,
+            source_project=source_project,
+            source_dataset=source_dataset,
+            snapshot_start=snapshot_start,
+        )
+    except Exception as exc:
+        raise click.ClickException(
+            "BigQuery precheck failed. Configure a non-interactive service account "
+            "(GOOGLE_APPLICATION_CREDENTIALS) with dataset ACL and billing access.",
+        ) from exc
+
     selected = list(tables) if tables else list(TABLES.keys())
     run_id = f"cnpj-bq-{datetime.now(UTC).strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}"
     logger.info(
diff --git a/etl/scripts/download_dou.py b/etl/scripts/download_dou.py
index 4d85096..4a0759f 100644
--- a/etl/scripts/download_dou.py
+++ b/etl/scripts/download_dou.py
@@ -22,7 +22,6 @@
 
 import click
 import httpx
-from _download_utils import safe_extract_zip
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
@@ -90,15 +89,24 @@ def _download_zip(
     xml_count = 0
 
     try:
+        resolved_dir = section_dir.resolve()
         with zipfile.ZipFile(BytesIO(resp.content)) as zf:
-            extracted = safe_extract_zip(zf, section_dir)
-            xml_count = sum(1 for path in extracted if path.suffix.lower() == ".xml")
+            for member in zf.namelist():
+                # Path traversal guard
+                target = (section_dir / member).resolve()
+                if not target.is_relative_to(resolved_dir):
+                    logger.warning(
+                        "Path traversal detected in %s: %s — skipping",
+                        zip_name,
+                        member,
+                    )
+                    continue
+                if member.lower().endswith(".xml"):
+                    zf.extract(member, section_dir)
+                    xml_count += 1
     except zipfile.BadZipFile:
         logger.warning("Bad ZIP file: %s", zip_name)
         return 0
-    except ValueError as exc:
-        logger.warning("Unsafe ZIP file %s: %s", zip_name, exc)
-        return 0
 
     if xml_count > 0:
         marker.write_text(str(xml_count))
diff --git a/etl/scripts/download_mides.py b/etl/scripts/download_mides.py
index 8a51a4e..0617568 100644
--- a/etl/scripts/download_mides.py
+++ b/etl/scripts/download_mides.py
@@ -71,7 +71,7 @@ def _write_manifest(out_dir: Path, tables: list[dict[str, Any]]) -> Path:
 
 
 @click.command()
-@click.option("--billing-project", default="bracc-corruptos", help="GCP billing project")
+@click.option("--billing-project", default="icarus-corruptos", help="GCP billing project")
 @click.option(
     "--dataset",
     default=WORLD_WB_DATASET,
diff --git a/etl/scripts/download_pncp.py b/etl/scripts/download_pncp.py
index 4ac59c5..061536e 100644
--- a/etl/scripts/download_pncp.py
+++ b/etl/scripts/download_pncp.py
@@ -439,7 +439,7 @@ def main(
     client = httpx.Client(
         timeout=timeout,
         follow_redirects=True,
-        headers={"User-Agent": "BRACC-ETL/1.0 (public data research)"},
+        headers={"User-Agent": "BR-ACC-ETL/1.0 (public data research)"},
     )
 
     total_records = 0
diff --git a/etl/scripts/download_renuncias.py b/etl/scripts/download_renuncias.py
index fae7bc2..2fbef69 100644
--- a/etl/scripts/download_renuncias.py
+++ b/etl/scripts/download_renuncias.py
@@ -8,10 +8,12 @@
 
 import argparse
 import logging
-import zipfile
+import sys
 from pathlib import Path
 
 import httpx
+
+sys.path.insert(0, str(Path(__file__).parent))
 from _download_utils import safe_extract_zip
 
 logger = logging.getLogger(__name__)
@@ -34,15 +36,14 @@ def download_year(output_dir: Path, year: int) -> None:
             url,
             follow_redirects=True,
             timeout=300,
-            headers={"User-Agent": "BRACC-ETL/1.0"},
+            headers={"User-Agent": "BR-ACC-ETL/1.0"},
         )
         response.raise_for_status()
         dest_zip.write_bytes(response.content)
         logger.info("Downloaded: %s (%d bytes)", dest_zip.name, len(response.content))
 
-        with zipfile.ZipFile(dest_zip, "r") as zf:
-            extracted = safe_extract_zip(zf, output_dir)
-            logger.info("Extracted %d files", len(extracted))
+        extracted = safe_extract_zip(dest_zip, output_dir)
+        logger.info("Extracted %d files", len(extracted))
     except httpx.HTTPError:
         logger.warning("Failed to download renuncias for %d", year)
 
diff --git a/etl/scripts/download_senado_cpis.py b/etl/scripts/download_senado_cpis.py
index d16832a..2a4570f 100644
--- a/etl/scripts/download_senado_cpis.py
+++ b/etl/scripts/download_senado_cpis.py
@@ -16,13 +16,13 @@
 import json
 import logging
 import re
+import defusedxml.ElementTree as ET
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 
 import click
 import httpx
-from defusedxml import ElementTree as ET
 from download_senado_cpi_archive import fetch_archive_historical
 
 logger = logging.getLogger(__name__)
diff --git a/etl/scripts/download_siconfi.py b/etl/scripts/download_siconfi.py
index c105c3b..6ce1687 100644
--- a/etl/scripts/download_siconfi.py
+++ b/etl/scripts/download_siconfi.py
@@ -44,7 +44,7 @@ def get_all_entities() -> list[dict]:
             url,
             params={"offset": offset, "limit": limit},
             timeout=60,
-            headers={"User-Agent": "BRACC-ETL/1.0"},
+            headers={"User-Agent": "BR-ACC-ETL/1.0"},
         )
         response.raise_for_status()
         data = response.json()
@@ -125,7 +125,7 @@ def download_year(
     header_written = partial.exists() and partial.stat().st_size > 0
 
     with (
-        httpx.Client(headers={"User-Agent": "BRACC-ETL/1.0"}) as client,
+        httpx.Client(headers={"User-Agent": "BR-ACC-ETL/1.0"}) as client,
         open(partial, "a", newline="", encoding="utf-8") as f,
     ):
         writer: csv.DictWriter | None = None
diff --git a/etl/scripts/download_stf.py b/etl/scripts/download_stf.py
index ddb13f3..99e4048 100644
--- a/etl/scripts/download_stf.py
+++ b/etl/scripts/download_stf.py
@@ -5,9 +5,9 @@
 Requires `google-cloud-bigquery` and an authenticated GCP project.
 
 Usage:
-    python etl/scripts/download_stf.py --billing-project bracc-corruptos
-    python etl/scripts/download_stf.py --billing-project bracc-corruptos --skip-existing
-    python etl/scripts/download_stf.py --billing-project bracc-corruptos --output-dir ./data/stf
+    python etl/scripts/download_stf.py --billing-project icarus-corruptos
+    python etl/scripts/download_stf.py --billing-project icarus-corruptos --skip-existing
+    python etl/scripts/download_stf.py --billing-project icarus-corruptos --output-dir ./data/stf
 """
 
 from __future__ import annotations
diff --git a/etl/scripts/download_tse_bens.py b/etl/scripts/download_tse_bens.py
index 24cbf87..954e0c1 100644
--- a/etl/scripts/download_tse_bens.py
+++ b/etl/scripts/download_tse_bens.py
@@ -5,9 +5,9 @@
 Requires `google-cloud-bigquery` and an authenticated GCP project.
 
 Usage:
-    python etl/scripts/download_tse_bens.py --billing-project bracc-corruptos
-    python etl/scripts/download_tse_bens.py --billing-project bracc-corruptos --start-year 2018
-    python etl/scripts/download_tse_bens.py --billing-project bracc-corruptos --skip-existing
+    python etl/scripts/download_tse_bens.py --billing-project icarus-corruptos
+    python etl/scripts/download_tse_bens.py --billing-project icarus-corruptos --start-year 2018
+    python etl/scripts/download_tse_bens.py --billing-project icarus-corruptos --skip-existing
 """
 
 from __future__ import annotations
diff --git a/etl/scripts/download_tse_filiados.py b/etl/scripts/download_tse_filiados.py
index 7c26323..f8d2531 100644
--- a/etl/scripts/download_tse_filiados.py
+++ b/etl/scripts/download_tse_filiados.py
@@ -7,9 +7,9 @@
 Requires `google-cloud-bigquery` and an authenticated GCP project.
 
 Usage:
-    python etl/scripts/download_tse_filiados.py --billing-project bracc-corruptos
-    python etl/scripts/download_tse_filiados.py --billing-project bracc-corruptos --skip-existing
-    python etl/scripts/download_tse_filiados.py --billing-project bracc-corruptos --all-statuses
+    python etl/scripts/download_tse_filiados.py --billing-project icarus-corruptos
+    python etl/scripts/download_tse_filiados.py --billing-project icarus-corruptos --skip-existing
+    python etl/scripts/download_tse_filiados.py --billing-project icarus-corruptos --all-statuses
 """
 
 from __future__ import annotations
diff --git a/etl/scripts/download_un_sanctions.py b/etl/scripts/download_un_sanctions.py
index 9ec5af1..ddfe443 100644
--- a/etl/scripts/download_un_sanctions.py
+++ b/etl/scripts/download_un_sanctions.py
@@ -14,10 +14,10 @@
 import json
 import logging
 import sys
+import defusedxml.ElementTree as ET
 from pathlib import Path
 
 import click
-from defusedxml import ElementTree as ET
 
 # Allow imports from scripts/ directory
 sys.path.insert(0, str(Path(__file__).parent))
diff --git a/etl/src/bracc_etl/base.py b/etl/src/bracc_etl/base.py
index 2309a6a..effd05e 100644
--- a/etl/src/bracc_etl/base.py
+++ b/etl/src/bracc_etl/base.py
@@ -21,12 +21,16 @@ def __init__(
         limit: int | None = None,
         chunk_size: int = 50_000,
         neo4j_database: str | None = None,
+        history: bool = False,
     ) -> None:
         self.driver = driver
         self.data_dir = data_dir
         self.limit = limit
         self.chunk_size = chunk_size
         self.neo4j_database = neo4j_database or os.getenv("NEO4J_DATABASE", "neo4j")
+        self.history = history
+        self.rows_in: int = 0
+        self.rows_loaded: int = 0
         source_key = getattr(self, "source_id", getattr(self, "name", "unknown_source"))
         self.run_id = f"{source_key}_{datetime.now(tz=UTC).strftime('%Y%m%d%H%M%S')}"
 
@@ -87,8 +91,8 @@ def _upsert_ingestion_run(
             "    r.started_at = coalesce($started_at, r.started_at), "
             "    r.finished_at = coalesce($finished_at, r.finished_at), "
             "    r.error = coalesce($error, r.error), "
-            "    r.rows_in = coalesce(r.rows_in, 0), "
-            "    r.rows_loaded = coalesce(r.rows_loaded, 0)"
+            "    r.rows_in = $rows_in, "
+            "    r.rows_loaded = $rows_loaded"
         )
         run_id = getattr(self, "run_id", f"{source_id}_manual")
         params = {
@@ -98,6 +102,8 @@ def _upsert_ingestion_run(
             "started_at": started_at,
             "finished_at": finished_at,
             "error": error,
+            "rows_in": self.rows_in,
+            "rows_loaded": self.rows_loaded,
         }
         try:
             with self.driver.session(database=self.neo4j_database) as session:
diff --git a/etl/src/bracc_etl/entity_resolution/config.py b/etl/src/bracc_etl/entity_resolution/config.py
index 6ff0518..34f65a5 100644
--- a/etl/src/bracc_etl/entity_resolution/config.py
+++ b/etl/src/bracc_etl/entity_resolution/config.py
@@ -13,7 +13,7 @@ def get_person_settings() -> dict[str, Any]:
     """
     try:
         import splink.comparison_library as cl  # type: ignore[import-not-found]
-        from splink import SettingsCreator
+        from splink import SettingsCreator  # type: ignore[import-not-found,unused-ignore]
     except ImportError as exc:
         raise ImportError(
             "splink is required for entity resolution. "
diff --git a/etl/src/bracc_etl/pipelines/bcb.py b/etl/src/bracc_etl/pipelines/bcb.py
index a52b790..bbb59d2 100644
--- a/etl/src/bracc_etl/pipelines/bcb.py
+++ b/etl/src/bracc_etl/pipelines/bcb.py
@@ -51,8 +51,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.penalties: list[dict[str, Any]] = []
         self.company_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/bndes.py b/etl/src/bracc_etl/pipelines/bndes.py
index ac9fbf4..49cb86a 100644
--- a/etl/src/bracc_etl/pipelines/bndes.py
+++ b/etl/src/bracc_etl/pipelines/bndes.py
@@ -33,8 +33,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.finances: list[dict[str, Any]] = []
         self.relationships: list[dict[str, Any]] = []
@@ -51,8 +52,15 @@ def _parse_value(self, value: str) -> float:
 
     def extract(self) -> None:
         bndes_dir = Path(self.data_dir) / "bndes"
+        if not bndes_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, bndes_dir)
+            return
+        csv_path = bndes_dir / "operacoes-nao-automaticas.csv"
+        if not csv_path.exists():
+            logger.warning("[%s] CSV file not found: %s", self.name, csv_path)
+            return
         self._raw = pd.read_csv(
-            bndes_dir / "operacoes-nao-automaticas.csv",
+            csv_path,
             dtype=str,
             delimiter=";",
             encoding="latin-1",
diff --git a/etl/src/bracc_etl/pipelines/caged.py b/etl/src/bracc_etl/pipelines/caged.py
index 38f46b1..f0d47eb 100644
--- a/etl/src/bracc_etl/pipelines/caged.py
+++ b/etl/src/bracc_etl/pipelines/caged.py
@@ -88,8 +88,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._csv_files: list[Path] = []
 
     def extract(self) -> None:
diff --git a/etl/src/bracc_etl/pipelines/camara.py b/etl/src/bracc_etl/pipelines/camara.py
index 68d8c4e..4050a36 100644
--- a/etl/src/bracc_etl/pipelines/camara.py
+++ b/etl/src/bracc_etl/pipelines/camara.py
@@ -60,8 +60,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.expenses: list[dict[str, Any]] = []
         self.deputies: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/camara_inquiries.py b/etl/src/bracc_etl/pipelines/camara_inquiries.py
index 30e611f..8966910 100644
--- a/etl/src/bracc_etl/pipelines/camara_inquiries.py
+++ b/etl/src/bracc_etl/pipelines/camara_inquiries.py
@@ -66,8 +66,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
 
         self._raw_inquiries: pd.DataFrame = pd.DataFrame()
         self._raw_requirements: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/ceaf.py b/etl/src/bracc_etl/pipelines/ceaf.py
index d83a4f2..1895ba0 100644
--- a/etl/src/bracc_etl/pipelines/ceaf.py
+++ b/etl/src/bracc_etl/pipelines/ceaf.py
@@ -31,8 +31,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.expulsions: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/cepim.py b/etl/src/bracc_etl/pipelines/cepim.py
index bac6a1a..fc7a914 100644
--- a/etl/src/bracc_etl/pipelines/cepim.py
+++ b/etl/src/bracc_etl/pipelines/cepim.py
@@ -37,8 +37,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.ngos: list[dict[str, Any]] = []
         self.company_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/cnpj.py b/etl/src/bracc_etl/pipelines/cnpj.py
index 5eb713e..ce9d3e9 100644
--- a/etl/src/bracc_etl/pipelines/cnpj.py
+++ b/etl/src/bracc_etl/pipelines/cnpj.py
@@ -216,9 +216,11 @@ def __init__(
         limit: int | None = None,
         chunk_size: int = 50_000,
         history: bool = False,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
-        self.history = history
+        super().__init__(
+            driver, data_dir, limit=limit, chunk_size=chunk_size, history=history, **kwargs,
+        )
         self.run_id = f"cnpj-{datetime.now(UTC).strftime('%Y%m%d%H%M%S')}"
         self._raw_empresas: pd.DataFrame = pd.DataFrame()
         self._raw_socios: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/comprasnet.py b/etl/src/bracc_etl/pipelines/comprasnet.py
index bd6386c..373b33b 100644
--- a/etl/src/bracc_etl/pipelines/comprasnet.py
+++ b/etl/src/bracc_etl/pipelines/comprasnet.py
@@ -63,7 +63,7 @@ class ComprasnetPipeline(Pipeline):
     """ETL pipeline for PNCP federal procurement contracts."""
 
     name = "comprasnet"
-    source_id = "pncp"
+    source_id = "comprasnet"
 
     def __init__(
         self,
@@ -71,8 +71,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self.contracts: list[dict[str, Any]] = []
 
     def extract(self) -> None:
diff --git a/etl/src/bracc_etl/pipelines/cpgf.py b/etl/src/bracc_etl/pipelines/cpgf.py
index e757b71..122e341 100644
--- a/etl/src/bracc_etl/pipelines/cpgf.py
+++ b/etl/src/bracc_etl/pipelines/cpgf.py
@@ -84,8 +84,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.expenses: list[dict[str, Any]] = []
         self.cardholders: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/cvm.py b/etl/src/bracc_etl/pipelines/cvm.py
index 7593a95..8f45cc4 100644
--- a/etl/src/bracc_etl/pipelines/cvm.py
+++ b/etl/src/bracc_etl/pipelines/cvm.py
@@ -38,8 +38,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_processos: pd.DataFrame = pd.DataFrame()
         self._raw_acusados: pd.DataFrame = pd.DataFrame()
         self.proceedings: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/cvm_funds.py b/etl/src/bracc_etl/pipelines/cvm_funds.py
index 4c205bd..dbb320d 100644
--- a/etl/src/bracc_etl/pipelines/cvm_funds.py
+++ b/etl/src/bracc_etl/pipelines/cvm_funds.py
@@ -43,8 +43,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.funds: list[dict[str, Any]] = []
         self.admin_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/datajud.py b/etl/src/bracc_etl/pipelines/datajud.py
index 3805a2f..3410525 100644
--- a/etl/src/bracc_etl/pipelines/datajud.py
+++ b/etl/src/bracc_etl/pipelines/datajud.py
@@ -50,8 +50,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
 
         self._raw_cases: pd.DataFrame = pd.DataFrame()
         self._raw_parties: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/datasus.py b/etl/src/bracc_etl/pipelines/datasus.py
index 6f4f7a1..a4a9269 100644
--- a/etl/src/bracc_etl/pipelines/datasus.py
+++ b/etl/src/bracc_etl/pipelines/datasus.py
@@ -29,8 +29,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.facilities: list[dict[str, Any]] = []
         self.company_links: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/dou.py b/etl/src/bracc_etl/pipelines/dou.py
index 9e83cee..cded552 100644
--- a/etl/src/bracc_etl/pipelines/dou.py
+++ b/etl/src/bracc_etl/pipelines/dou.py
@@ -17,7 +17,10 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
-from defusedxml import ElementTree  # type: ignore[import-untyped]
+from defusedxml.ElementTree import ParseError as _XmlParseError  # type: ignore[import-untyped]
+from defusedxml.ElementTree import (
+    parse as _safe_xml_parse,  # type: ignore[import-untyped,unused-ignore]
+)
 
 from bracc_etl.base import Pipeline
 from bracc_etl.loader import Neo4jBatchLoader
@@ -141,8 +144,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_acts: list[dict[str, str]] = []
         self.acts: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
@@ -227,8 +231,8 @@ def _extract_xml(self, xml_files: list[Path]) -> None:
         """Extract acts from Imprensa Nacional XML dumps."""
         for f in xml_files:
             try:
-                tree = ElementTree.parse(f)  # noqa: S314
-            except ElementTree.ParseError:
+                tree = _safe_xml_parse(f)
+            except _XmlParseError:
                 logger.warning("[dou] Failed to parse XML: %s", f.name)
                 continue
 
diff --git a/etl/src/bracc_etl/pipelines/eu_sanctions.py b/etl/src/bracc_etl/pipelines/eu_sanctions.py
index 2bb60a7..5d3b11e 100644
--- a/etl/src/bracc_etl/pipelines/eu_sanctions.py
+++ b/etl/src/bracc_etl/pipelines/eu_sanctions.py
@@ -76,8 +76,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.sanctions: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/holdings.py b/etl/src/bracc_etl/pipelines/holdings.py
index 801b776..e9afd90 100644
--- a/etl/src/bracc_etl/pipelines/holdings.py
+++ b/etl/src/bracc_etl/pipelines/holdings.py
@@ -36,8 +36,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.holding_rels: list[dict[str, Any]] = []
 
diff --git a/etl/src/bracc_etl/pipelines/ibama.py b/etl/src/bracc_etl/pipelines/ibama.py
index be1f1c6..3256d34 100644
--- a/etl/src/bracc_etl/pipelines/ibama.py
+++ b/etl/src/bracc_etl/pipelines/ibama.py
@@ -40,8 +40,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.embargoes: list[dict[str, Any]] = []
         self.companies: list[dict[str, Any]] = []
@@ -65,7 +66,13 @@ def _primary_biome(self, value: str) -> str:
 
     def extract(self) -> None:
         ibama_dir = Path(self.data_dir) / "ibama"
+        if not ibama_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, ibama_dir)
+            return
         csv_path = ibama_dir / "areas_embargadas.csv"
+        if not csv_path.exists():
+            logger.warning("[%s] CSV file not found: %s", self.name, csv_path)
+            return
         logger.info("[ibama] Reading %s", csv_path)
         self._raw = pd.read_csv(
             csv_path,
diff --git a/etl/src/bracc_etl/pipelines/icij.py b/etl/src/bracc_etl/pipelines/icij.py
index b025f1e..e1fede6 100644
--- a/etl/src/bracc_etl/pipelines/icij.py
+++ b/etl/src/bracc_etl/pipelines/icij.py
@@ -42,8 +42,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._entities_raw: pd.DataFrame = pd.DataFrame()
         self._officers_raw: pd.DataFrame = pd.DataFrame()
         self._intermediaries_raw: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/inep.py b/etl/src/bracc_etl/pipelines/inep.py
index fd0d1c4..0ff9d35 100644
--- a/etl/src/bracc_etl/pipelines/inep.py
+++ b/etl/src/bracc_etl/pipelines/inep.py
@@ -42,8 +42,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self.schools: list[dict[str, Any]] = []
         self.school_company_links: list[dict[str, Any]] = []
 
diff --git a/etl/src/bracc_etl/pipelines/leniency.py b/etl/src/bracc_etl/pipelines/leniency.py
index 6076664..6b4573d 100644
--- a/etl/src/bracc_etl/pipelines/leniency.py
+++ b/etl/src/bracc_etl/pipelines/leniency.py
@@ -31,8 +31,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.agreements: list[dict[str, Any]] = []
         self.company_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/mides.py b/etl/src/bracc_etl/pipelines/mides.py
index 62033c3..7a9f520 100644
--- a/etl/src/bracc_etl/pipelines/mides.py
+++ b/etl/src/bracc_etl/pipelines/mides.py
@@ -74,8 +74,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
 
         self._raw_bids: pd.DataFrame = pd.DataFrame()
         self._raw_contracts: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/ofac.py b/etl/src/bracc_etl/pipelines/ofac.py
index 4b64a62..da7b3a2 100644
--- a/etl/src/bracc_etl/pipelines/ofac.py
+++ b/etl/src/bracc_etl/pipelines/ofac.py
@@ -63,8 +63,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.sanctions: list[dict[str, Any]] = []
 
diff --git a/etl/src/bracc_etl/pipelines/opensanctions.py b/etl/src/bracc_etl/pipelines/opensanctions.py
index fa76dff..4b51290 100644
--- a/etl/src/bracc_etl/pipelines/opensanctions.py
+++ b/etl/src/bracc_etl/pipelines/opensanctions.py
@@ -81,8 +81,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_entities: list[dict[str, Any]] = []
         self.global_peps: list[dict[str, Any]] = []
         self.pep_match_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/pep_cgu.py b/etl/src/bracc_etl/pipelines/pep_cgu.py
index b50ffb3..141f665 100644
--- a/etl/src/bracc_etl/pipelines/pep_cgu.py
+++ b/etl/src/bracc_etl/pipelines/pep_cgu.py
@@ -84,8 +84,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.pep_records: list[dict[str, Any]] = []
         self.person_links: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/pgfn.py b/etl/src/bracc_etl/pipelines/pgfn.py
index 62f6eeb..2d0bf09 100644
--- a/etl/src/bracc_etl/pipelines/pgfn.py
+++ b/etl/src/bracc_etl/pipelines/pgfn.py
@@ -38,8 +38,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._csv_files: list[Path] = []
         self.finances: list[dict[str, Any]] = []
         self.relationships: list[dict[str, Any]] = []
@@ -56,10 +57,13 @@ def _parse_value(self, value: str) -> float:
 
     def extract(self) -> None:
         pgfn_dir = Path(self.data_dir) / "pgfn"
+        if not pgfn_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, pgfn_dir)
+            return
         self._csv_files = sorted(pgfn_dir.glob("arquivo_lai_SIDA_*_*.csv"))
         if not self._csv_files:
-            msg = f"No PGFN CSV files found in {pgfn_dir}"
-            raise FileNotFoundError(msg)
+            logger.warning("[%s] No PGFN CSV files found in %s", self.name, pgfn_dir)
+            return
         logger.info("[pgfn] Found %d CSV files to process", len(self._csv_files))
 
     def transform(self) -> None:
diff --git a/etl/src/bracc_etl/pipelines/pncp.py b/etl/src/bracc_etl/pipelines/pncp.py
index 845c29c..37757f6 100644
--- a/etl/src/bracc_etl/pipelines/pncp.py
+++ b/etl/src/bracc_etl/pipelines/pncp.py
@@ -68,8 +68,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_records: list[dict[str, Any]] = []
         self.bids: list[dict[str, Any]] = []
         self.coverage_start: str = ""
@@ -97,8 +98,8 @@ def _infer_coverage(
                 self.coverage_end = str(manifest.get("coverage_end", "")).strip()
                 self.coverage_complete = bool(manifest.get("coverage_complete", False))
                 return
-            except Exception:
-                logger.warning("Invalid PNCP coverage manifest: %s", manifest_path)
+            except Exception as exc:
+                logger.warning("Invalid PNCP coverage manifest %s: %s", manifest_path, exc)
 
         dates: list[str] = []
         for rec in records:
@@ -135,8 +136,12 @@ def extract(self) -> None:
 
         all_records: list[dict[str, Any]] = []
         for f in json_files:
-            raw = f.read_text(encoding="utf-8")
-            payload = json.loads(raw, strict=False)
+            try:
+                raw = f.read_text(encoding="utf-8")
+                payload = json.loads(raw, strict=False)
+            except (json.JSONDecodeError, OSError) as exc:
+                logger.warning("Failed to parse JSON from %s: %s", f, exc)
+                continue
 
             # Handle both wrapped (API response) and flat (list) formats
             if isinstance(payload, dict) and "data" in payload:
diff --git a/etl/src/bracc_etl/pipelines/querido_diario.py b/etl/src/bracc_etl/pipelines/querido_diario.py
index 3bb3762..846e0f0 100644
--- a/etl/src/bracc_etl/pipelines/querido_diario.py
+++ b/etl/src/bracc_etl/pipelines/querido_diario.py
@@ -64,8 +64,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_acts: list[dict[str, str]] = []
         self.acts: list[dict[str, Any]] = []
         self.company_mentions: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/rais.py b/etl/src/bracc_etl/pipelines/rais.py
index 92945ec..5d84a35 100644
--- a/etl/src/bracc_etl/pipelines/rais.py
+++ b/etl/src/bracc_etl/pipelines/rais.py
@@ -45,8 +45,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self.labor_stats: list[dict[str, Any]] = []
 
     def extract(self) -> None:
diff --git a/etl/src/bracc_etl/pipelines/renuncias.py b/etl/src/bracc_etl/pipelines/renuncias.py
index ef4b948..639810a 100644
--- a/etl/src/bracc_etl/pipelines/renuncias.py
+++ b/etl/src/bracc_etl/pipelines/renuncias.py
@@ -47,8 +47,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.waivers: list[dict[str, Any]] = []
         self.company_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/sanctions.py b/etl/src/bracc_etl/pipelines/sanctions.py
index c8b6d72..c1ac5e5 100644
--- a/etl/src/bracc_etl/pipelines/sanctions.py
+++ b/etl/src/bracc_etl/pipelines/sanctions.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
@@ -19,6 +20,8 @@
     strip_document,
 )
 
+logger = logging.getLogger(__name__)
+
 
 class SanctionsPipeline(Pipeline):
     """ETL pipeline for CEIS/CNEP sanctions data."""
@@ -32,8 +35,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_ceis: pd.DataFrame = pd.DataFrame()
         self._raw_cnep: pd.DataFrame = pd.DataFrame()
         self.sanctions: list[dict[str, Any]] = []
@@ -41,17 +45,19 @@ def __init__(
 
     def extract(self) -> None:
         sanctions_dir = Path(self.data_dir) / "sanctions"
+        if not sanctions_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, sanctions_dir)
+            return
+        ceis_path = sanctions_dir / "ceis.csv"
+        cnep_path = sanctions_dir / "cnep.csv"
+        if not ceis_path.exists() or not cnep_path.exists():
+            logger.warning("[%s] Required CSV files not found in %s", self.name, sanctions_dir)
+            return
         self._raw_ceis = pd.read_csv(
-            sanctions_dir / "ceis.csv",
-            dtype=str,
-            encoding="latin-1",
-            keep_default_na=False,
+            ceis_path, dtype=str, encoding="latin-1", keep_default_na=False,
         )
         self._raw_cnep = pd.read_csv(
-            sanctions_dir / "cnep.csv",
-            dtype=str,
-            encoding="latin-1",
-            keep_default_na=False,
+            cnep_path, dtype=str, encoding="latin-1", keep_default_na=False,
         )
 
     def _process_rows(
diff --git a/etl/src/bracc_etl/pipelines/senado.py b/etl/src/bracc_etl/pipelines/senado.py
index fd16117..5c55181 100644
--- a/etl/src/bracc_etl/pipelines/senado.py
+++ b/etl/src/bracc_etl/pipelines/senado.py
@@ -64,8 +64,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self._senator_lookup: dict[str, dict[str, str]] = {}
         self.expenses: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/senado_cpis.py b/etl/src/bracc_etl/pipelines/senado_cpis.py
index 680d3fb..9b1e953 100644
--- a/etl/src/bracc_etl/pipelines/senado_cpis.py
+++ b/etl/src/bracc_etl/pipelines/senado_cpis.py
@@ -108,8 +108,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
 
         self._raw_inquiries: pd.DataFrame = pd.DataFrame()
         self._raw: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/siconfi.py b/etl/src/bracc_etl/pipelines/siconfi.py
index 472ce2f..128d8a8 100644
--- a/etl/src/bracc_etl/pipelines/siconfi.py
+++ b/etl/src/bracc_etl/pipelines/siconfi.py
@@ -39,8 +39,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: list[dict[str, Any]] = []
         self.finances: list[dict[str, Any]] = []
         self.municipality_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/siop.py b/etl/src/bracc_etl/pipelines/siop.py
index c33b0d0..cb9508b 100644
--- a/etl/src/bracc_etl/pipelines/siop.py
+++ b/etl/src/bracc_etl/pipelines/siop.py
@@ -67,8 +67,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.amendments: list[dict[str, Any]] = []
         self.authors: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/stf.py b/etl/src/bracc_etl/pipelines/stf.py
index 3969574..193ba8b 100644
--- a/etl/src/bracc_etl/pipelines/stf.py
+++ b/etl/src/bracc_etl/pipelines/stf.py
@@ -39,8 +39,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.cases: list[dict[str, Any]] = []
         self.rapporteur_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/tcu.py b/etl/src/bracc_etl/pipelines/tcu.py
index 1b34397..007db8e 100644
--- a/etl/src/bracc_etl/pipelines/tcu.py
+++ b/etl/src/bracc_etl/pipelines/tcu.py
@@ -42,8 +42,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_inabilitados: pd.DataFrame = pd.DataFrame()
         self._raw_inidoneos: pd.DataFrame = pd.DataFrame()
         self._raw_irregulares: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/transferegov.py b/etl/src/bracc_etl/pipelines/transferegov.py
index 2eefd89..e441736 100644
--- a/etl/src/bracc_etl/pipelines/transferegov.py
+++ b/etl/src/bracc_etl/pipelines/transferegov.py
@@ -55,8 +55,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_emendas: pd.DataFrame = pd.DataFrame()
         self._raw_favorecidos: pd.DataFrame = pd.DataFrame()
         self._raw_convenios: pd.DataFrame = pd.DataFrame()
diff --git a/etl/src/bracc_etl/pipelines/transparencia.py b/etl/src/bracc_etl/pipelines/transparencia.py
index 0f9c4ee..5d67d4c 100644
--- a/etl/src/bracc_etl/pipelines/transparencia.py
+++ b/etl/src/bracc_etl/pipelines/transparencia.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import hashlib
+import logging
 import re
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -21,6 +22,8 @@
     strip_document,
 )
 
+logger = logging.getLogger(__name__)
+
 # Classified contracts (Polícia Federal etc.) use this sentinel CNPJ.
 _SIGILOSO_CNPJ = "-11"
 
@@ -78,8 +81,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw_contratos: pd.DataFrame = pd.DataFrame()
         self._raw_servidores: pd.DataFrame = pd.DataFrame()
         self._raw_emendas: pd.DataFrame = pd.DataFrame()
@@ -89,24 +93,30 @@ def __init__(
 
     def extract(self) -> None:
         src_dir = Path(self.data_dir) / "transparencia"
-        self._raw_contratos = pd.read_csv(
-            src_dir / "contratos.csv",
-            dtype=str,
-            keep_default_na=False,
-            encoding="utf-8",
-        )
-        self._raw_servidores = pd.read_csv(
-            src_dir / "servidores.csv",
-            dtype=str,
-            keep_default_na=False,
-            encoding="utf-8",
-        )
-        self._raw_emendas = pd.read_csv(
-            src_dir / "emendas.csv",
-            dtype=str,
-            keep_default_na=False,
-            encoding="utf-8",
-        )
+        if not src_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, src_dir)
+            return
+        contratos_path = src_dir / "contratos.csv"
+        servidores_path = src_dir / "servidores.csv"
+        emendas_path = src_dir / "emendas.csv"
+        if not contratos_path.exists():
+            logger.warning("[%s] contratos.csv not found in %s", self.name, src_dir)
+        else:
+            self._raw_contratos = pd.read_csv(
+                contratos_path, dtype=str, keep_default_na=False, encoding="utf-8",
+            )
+        if not servidores_path.exists():
+            logger.warning("[%s] servidores.csv not found in %s", self.name, src_dir)
+        else:
+            self._raw_servidores = pd.read_csv(
+                servidores_path, dtype=str, keep_default_na=False, encoding="utf-8",
+            )
+        if not emendas_path.exists():
+            logger.warning("[%s] emendas.csv not found in %s", self.name, src_dir)
+        else:
+            self._raw_emendas = pd.read_csv(
+                emendas_path, dtype=str, keep_default_na=False, encoding="utf-8",
+            )
 
     def transform(self) -> None:
         contracts: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/tse.py b/etl/src/bracc_etl/pipelines/tse.py
index b59de31..c539052 100644
--- a/etl/src/bracc_etl/pipelines/tse.py
+++ b/etl/src/bracc_etl/pipelines/tse.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
@@ -18,6 +19,8 @@
     strip_document,
 )
 
+logger = logging.getLogger(__name__)
+
 # TSE 2024 masks ALL candidate CPFs as "-4". After strip_document → "4",
 # format_cpf → "4" — every candidate MERGEs into one ghost node.
 # We use SQ_CANDIDATO (unique sequential ID per candidate per election) instead.
@@ -36,20 +39,33 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self.candidates: list[dict[str, Any]] = []
         self.donations: list[dict[str, Any]] = []
         self.elections: list[dict[str, Any]] = []
 
     def extract(self) -> None:
         tse_dir = Path(self.data_dir) / "tse"
+        if not tse_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, tse_dir)
+            self._raw_candidatos = pd.DataFrame()
+            self._raw_doacoes = pd.DataFrame()
+            return
+        candidatos_path = tse_dir / "candidatos.csv"
+        doacoes_path = tse_dir / "doacoes.csv"
+        if not candidatos_path.exists() or not doacoes_path.exists():
+            logger.warning("[%s] Required CSV files not found in %s", self.name, tse_dir)
+            self._raw_candidatos = pd.DataFrame()
+            self._raw_doacoes = pd.DataFrame()
+            return
         self._raw_candidatos = pd.read_csv(
-            tse_dir / "candidatos.csv", encoding="latin-1", dtype=str,
+            candidatos_path, encoding="latin-1", dtype=str,
             nrows=self.limit,
         )
         self._raw_doacoes = pd.read_csv(
-            tse_dir / "doacoes.csv", encoding="latin-1", dtype=str,
+            doacoes_path, encoding="latin-1", dtype=str,
             nrows=self.limit,
         )
 
diff --git a/etl/src/bracc_etl/pipelines/tse_bens.py b/etl/src/bracc_etl/pipelines/tse_bens.py
index 6806759..ac1dee1 100644
--- a/etl/src/bracc_etl/pipelines/tse_bens.py
+++ b/etl/src/bracc_etl/pipelines/tse_bens.py
@@ -51,8 +51,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.assets: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/tse_filiados.py b/etl/src/bracc_etl/pipelines/tse_filiados.py
index 12a18ef..dd154cc 100644
--- a/etl/src/bracc_etl/pipelines/tse_filiados.py
+++ b/etl/src/bracc_etl/pipelines/tse_filiados.py
@@ -50,8 +50,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.memberships: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/un_sanctions.py b/etl/src/bracc_etl/pipelines/un_sanctions.py
index 7218a15..9b382a4 100644
--- a/etl/src/bracc_etl/pipelines/un_sanctions.py
+++ b/etl/src/bracc_etl/pipelines/un_sanctions.py
@@ -44,8 +44,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: list[dict[str, Any]] = []
         self.sanctions: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/viagens.py b/etl/src/bracc_etl/pipelines/viagens.py
index d3e5abd..cdc7cfe 100644
--- a/etl/src/bracc_etl/pipelines/viagens.py
+++ b/etl/src/bracc_etl/pipelines/viagens.py
@@ -104,8 +104,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.travels: list[dict[str, Any]] = []
         self.person_rels: list[dict[str, Any]] = []
diff --git a/etl/src/bracc_etl/pipelines/world_bank.py b/etl/src/bracc_etl/pipelines/world_bank.py
index 0e94a47..746e50d 100644
--- a/etl/src/bracc_etl/pipelines/world_bank.py
+++ b/etl/src/bracc_etl/pipelines/world_bank.py
@@ -42,8 +42,9 @@ def __init__(
         data_dir: str = "./data",
         limit: int | None = None,
         chunk_size: int = 50_000,
+        **kwargs: Any,
     ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
         self._raw: pd.DataFrame = pd.DataFrame()
         self.sanctions: list[dict[str, Any]] = []
 
diff --git a/etl/src/bracc_etl/runner.py b/etl/src/bracc_etl/runner.py
index 423115e..7c5bcef 100644
--- a/etl/src/bracc_etl/runner.py
+++ b/etl/src/bracc_etl/runner.py
@@ -102,7 +102,7 @@
 
 @click.group()
 def cli() -> None:
-    """BRACC ETL — Data ingestion pipelines for Brazilian public data."""
+    """BR-ACC ETL — Data ingestion pipelines for Brazilian public data."""
     logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
 
@@ -141,14 +141,14 @@ def run(
 ) -> None:
     """Run an ETL pipeline."""
     os.environ["NEO4J_DATABASE"] = neo4j_database
-    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
 
     if source not in PIPELINES:
         available = ", ".join(PIPELINES.keys())
         raise click.ClickException(f"Unknown source: {source}. Available: {available}")
 
-    pipeline_cls = PIPELINES[source]
+    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
     try:
+        pipeline_cls = PIPELINES[source]
         pipeline = pipeline_cls(
             driver=driver,
             data_dir=data_dir,
@@ -156,37 +156,89 @@ def run(
             chunk_size=chunk_size,
             history=history,
         )
-    except TypeError:
-        pipeline = pipeline_cls(
+
+        if streaming and hasattr(pipeline, "run_streaming"):
+            pipeline.run_streaming(start_phase=start_phase)
+        else:
+            pipeline.run()
+
+        run_post_load_hooks(
             driver=driver,
-            data_dir=data_dir,
-            limit=limit,
-            chunk_size=chunk_size,
+            source=source,
+            neo4j_database=neo4j_database,
+            linking_tier=linking_tier,
         )
+    finally:
+        driver.close()
+
+
+def _resolve_rf_release_inline(year_month: str | None = None) -> str:
+    """Resolve Receita Federal CNPJ release URL.
 
-    if streaming and hasattr(pipeline, "run_streaming"):
-        pipeline.run_streaming(start_phase=start_phase)
+    Tries Nextcloud shares first (new primary), falls back to dadosabertos.rfb.gov.br.
+    """
+    from datetime import UTC, datetime
+
+    import httpx
+
+    # --- Nextcloud (primary) ---
+    nextcloud_dl = "https://arquivos.receitafederal.gov.br/s/{token}/download?path=%2F&files="
+    tokens: list[str] = []
+    env_token = os.environ.get("CNPJ_SHARE_TOKEN")
+    if env_token:
+        tokens.append(env_token)
+    tokens.extend(["gn672Ad4CF8N6TK", "YggdBLfdninEJX9"])
+
+    for token in tokens:
+        share_url = f"https://arquivos.receitafederal.gov.br/s/{token}"
+        try:
+            resp = httpx.head(share_url, follow_redirects=True, timeout=30)
+            if resp.status_code < 400:
+                return nextcloud_dl.format(token=token)
+        except httpx.HTTPError:
+            pass
+
+    # --- Legacy dadosabertos (fallback) ---
+    new_base = "https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/{ym}/"
+    legacy_url = "https://dadosabertos.rfb.gov.br/CNPJ/"
+
+    now = datetime.now(UTC)
+    if year_month is not None:
+        candidates = [year_month]
     else:
-        pipeline.run()
+        current = f"{now.year:04d}-{now.month:02d}"
+        prev_m = now.month - 1 if now.month > 1 else 12
+        prev_y = now.year if now.month > 1 else now.year - 1
+        candidates = [current, f"{prev_y:04d}-{prev_m:02d}"]
+
+    for ym in candidates:
+        url = new_base.format(ym=ym)
+        try:
+            resp = httpx.head(url, follow_redirects=True, timeout=30)
+            if resp.status_code < 400:
+                return url
+        except httpx.HTTPError:
+            pass
 
-    run_post_load_hooks(
-        driver=driver,
-        source=source,
-        neo4j_database=neo4j_database,
-        linking_tier=linking_tier,
-    )
+    try:
+        resp = httpx.head(legacy_url, follow_redirects=True, timeout=30)
+        if resp.status_code < 400:
+            return legacy_url
+    except httpx.HTTPError:
+        pass
 
-    driver.close()
+    tried = ", ".join(candidates)
+    msg = f"Could not resolve CNPJ release. Tried Nextcloud tokens, months [{tried}], and legacy."
+    raise RuntimeError(msg)
 
 
 @cli.command()
 @click.option("--output-dir", default="./data/cnpj", help="Output directory")
 @click.option("--files", type=int, default=10, help="Number of files per type (0-9)")
 @click.option("--skip-existing/--no-skip-existing", default=True)
-def download(output_dir: str, files: int, skip_existing: bool) -> None:
+@click.option("--release", default=None, help="Pin to specific monthly release (YYYY-MM)")
+def download(output_dir: str, files: int, skip_existing: bool, release: str | None) -> None:
     """Download CNPJ data from Receita Federal."""
-    import shutil
-    import stat
     import zipfile
     from pathlib import Path
 
@@ -194,56 +246,13 @@ def download(output_dir: str, files: int, skip_existing: bool) -> None:
 
     logger = logging.getLogger(__name__)
 
-    base_url = "https://dadosabertos.rfb.gov.br/CNPJ/"
+    base_url = _resolve_rf_release_inline(release)
+    logger.info("Using CNPJ release URL: %s", base_url)
     file_types = ["Empresas", "Socios", "Estabelecimentos"]
 
     out = Path(output_dir)
     out.mkdir(parents=True, exist_ok=True)
 
-    def _safe_extract_zip(
-        archive: zipfile.ZipFile,
-        output_root: Path,
-        *,
-        max_members: int = 50_000,
-        max_uncompressed_bytes: int = 5_000_000_000,
-    ) -> None:
-        base = output_root.resolve()
-        infos = archive.infolist()
-        if len(infos) > max_members:
-            raise click.ClickException(
-                f"Unsafe ZIP archive: too many entries ({len(infos)} > {max_members})",
-            )
-
-        uncompressed_total = 0
-        for info in infos:
-            if not info.filename:
-                continue
-            member = info.filename.replace("\\", "/")
-            mode = info.external_attr >> 16
-            if stat.S_ISLNK(mode):
-                raise click.ClickException(f"Unsafe ZIP member (symlink): {member}")
-
-            target = (output_root / member).resolve()
-            try:
-                target.relative_to(base)
-            except ValueError as exc:
-                raise click.ClickException(f"Unsafe ZIP member path: {member}") from exc
-
-            if info.is_dir():
-                target.mkdir(parents=True, exist_ok=True)
-                continue
-
-            uncompressed_total += info.file_size
-            if uncompressed_total > max_uncompressed_bytes:
-                raise click.ClickException(
-                    "Unsafe ZIP archive: exceeds max extracted size "
-                    f"({uncompressed_total} > {max_uncompressed_bytes})",
-                )
-
-            target.parent.mkdir(parents=True, exist_ok=True)
-            with archive.open(info, "r") as source, target.open("wb") as destination:
-                shutil.copyfileobj(source, destination)
-
     for file_type in file_types:
         for i in range(min(files, 10)):
             filename = f"{file_type}{i}.zip"
@@ -264,17 +273,84 @@ def _safe_extract_zip(
 
                 logger.info("Extracting %s...", dest.name)
                 with zipfile.ZipFile(dest, "r") as zf:
-                    _safe_extract_zip(zf, out)
+                    # Path traversal guard
+                    out_resolved = out.resolve()
+                    safe = True
+                    for info in zf.infolist():
+                        target = (out / info.filename).resolve()
+                        if not target.is_relative_to(out_resolved):
+                            logger.warning(
+                                "Path traversal in %s: %s — skipping archive",
+                                dest.name,
+                                info.filename,
+                            )
+                            safe = False
+                            break
+                    if not safe:
+                        continue
+                    # Zip bomb guard (50 GB limit for CNPJ data)
+                    total = sum(i.file_size for i in zf.infolist())
+                    if total > 50 * 1024**3:
+                        logger.warning(
+                            "Uncompressed size too large: %s (%.1f GB) — skipping",
+                            dest.name,
+                            total / 1e9,
+                        )
+                        continue
+                    zf.extractall(out)
             except httpx.HTTPError:
                 logger.warning("Failed to download %s (may not exist)", filename)
 
 
 @cli.command()
-def sources() -> None:
+@click.option("--status", "show_status", is_flag=True, help="Show ingestion status from Neo4j")
+@click.option("--neo4j-uri", default="bolt://localhost:7687", help="Neo4j URI")
+@click.option("--neo4j-user", default="neo4j")
+@click.option("--neo4j-password", default=None)
+def sources(show_status: bool, neo4j_uri: str, neo4j_user: str, neo4j_password: str | None) -> None:
     """List available data sources."""
-    click.echo("Available pipelines:")
-    for name in sorted(PIPELINES):
-        click.echo(f"  {name}")
+    if not show_status:
+        click.echo("Available pipelines:")
+        for name in sorted(PIPELINES):
+            click.echo(f"  {name}")
+        return
+
+    if not neo4j_password:
+        neo4j_password = os.environ.get("NEO4J_PASSWORD", "")
+    if not neo4j_password:
+        raise click.ClickException(
+            "--neo4j-password or NEO4J_PASSWORD env var required for --status"
+        )
+
+    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
+    try:
+        with driver.session() as session:
+            result = session.run(
+                "MATCH (r:IngestionRun) "
+                "WITH r ORDER BY r.started_at DESC "
+                "WITH r.source_id AS sid, collect(r)[0] AS latest "
+                "RETURN latest ORDER BY sid"
+            )
+            runs = {r["latest"]["source_id"]: dict(r["latest"]) for r in result}
+
+        click.echo(
+            f"{'Source':<20} {'Status':<15} {'Rows In':>10} {'Loaded':>10} "
+            f"{'Started':<20} {'Finished':<20}"
+        )
+        click.echo("-" * 100)
+
+        for name in sorted(PIPELINES):
+            run = runs.get(name, {})
+            click.echo(
+                f"{name:<20} "
+                f"{run.get('status', '-'):<15} "
+                f"{run.get('rows_in', 0):>10,} "
+                f"{run.get('rows_loaded', 0):>10,} "
+                f"{str(run.get('started_at', '-')):<20} "
+                f"{str(run.get('finished_at', '-')):<20}"
+            )
+    finally:
+        driver.close()
 
 
 if __name__ == "__main__":
diff --git a/etl/src/bracc_etl/schemas/__init__.py b/etl/src/bracc_etl/schemas/__init__.py
new file mode 100644
index 0000000..5a7315b
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/__init__.py
@@ -0,0 +1,5 @@
+"""Pandera DataFrameSchema definitions for ETL data quality validation."""
+
+from bracc_etl.schemas.validator import validate_dataframe, validate_dataframe_sampled
+
+__all__ = ["validate_dataframe", "validate_dataframe_sampled"]
diff --git a/etl/src/bracc_etl/schemas/cnpj.py b/etl/src/bracc_etl/schemas/cnpj.py
new file mode 100644
index 0000000..879e9c6
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/cnpj.py
@@ -0,0 +1,129 @@
+"""Pandera schemas for CNPJ (Receita Federal Company Registry) pipeline.
+
+Validates the three core entity DataFrames produced by CNPJPipeline.transform():
+- empresas: Company nodes (cnpj, razao_social, capital_social, uf, etc.)
+- socios (PF strong): Person nodes keyed by CPF
+- socios (PF partial): Partner nodes keyed by partner_id hash
+
+Column definitions derived from cnpj.py _transform_empresas_rf/simple
+and _transform_socios_rf/simple output dictionaries.
+"""
+
+import pandera.pandas as pa
+
+# ------------------------------------------------------------------
+# Empresas (Company nodes)
+# Output columns: cnpj, razao_social, natureza_juridica, cnae_principal,
+#                  capital_social, uf, municipio, porte_empresa
+# ------------------------------------------------------------------
+empresas_schema = pa.DataFrameSchema(
+    columns={
+        "cnpj": pa.Column(
+            str,
+            nullable=True,
+            coerce=True,
+            checks=[
+                # Formatted CNPJ: XX.XXX.XXX/XXXX-XX (18 chars) or raw digits
+                pa.Check.str_matches(
+                    r"^(\d{2}\.\d{3}\.\d{3}/\d{4}-\d{2}|\d{8,14})$",
+                    error="CNPJ must be formatted (XX.XXX.XXX/XXXX-XX) or 8-14 digits",
+                ),
+            ],
+        ),
+        "razao_social": pa.Column(str, nullable=True, coerce=True),
+        "natureza_juridica": pa.Column(str, nullable=True, coerce=True),
+        "cnae_principal": pa.Column(str, nullable=True, coerce=True),
+        "capital_social": pa.Column(float, nullable=True, coerce=True, checks=[
+            pa.Check.ge(0, error="capital_social must be >= 0"),
+        ]),
+        "uf": pa.Column(str, nullable=True, coerce=True, checks=[
+            # Brazilian UF: 2 uppercase letters or empty
+            pa.Check.str_matches(
+                r"^([A-Z]{2})?$",
+                error="UF must be 2 uppercase letters or empty",
+            ),
+        ]),
+        "municipio": pa.Column(str, nullable=True, coerce=True),
+        "porte_empresa": pa.Column(str, nullable=True, coerce=True),
+    },
+    coerce=True,
+    strict=False,  # Allow extra columns
+)
+
+
+# ------------------------------------------------------------------
+# Socios PF (Person nodes with strong CPF identity)
+# Output columns: name, cpf, tipo_socio
+# ------------------------------------------------------------------
+socios_pf_schema = pa.DataFrameSchema(
+    columns={
+        "name": pa.Column(str, nullable=True, coerce=True),
+        "cpf": pa.Column(
+            str,
+            nullable=True,
+            coerce=True,
+            checks=[
+                # Formatted CPF: XXX.XXX.XXX-XX (14 chars) or raw 11 digits
+                pa.Check.str_matches(
+                    r"^(\d{3}\.\d{3}\.\d{3}-\d{2}|\d{11})$",
+                    error="CPF must be formatted (XXX.XXX.XXX-XX) or 11 digits",
+                ),
+            ],
+        ),
+        "tipo_socio": pa.Column(str, nullable=True, coerce=True),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Socios PF partial (Partner nodes with masked/invalid docs)
+# Output columns: partner_id, name, doc_raw, doc_digits, doc_partial,
+#                  doc_type, tipo_socio, identity_quality, source
+# ------------------------------------------------------------------
+socios_partial_schema = pa.DataFrameSchema(
+    columns={
+        "partner_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="partner_id must not be empty"),
+        ]),
+        "name": pa.Column(str, nullable=True, coerce=True),
+        "doc_raw": pa.Column(str, nullable=True, coerce=True),
+        "doc_digits": pa.Column(str, nullable=True, coerce=True),
+        "doc_partial": pa.Column(str, nullable=True, coerce=True),
+        "doc_type": pa.Column(str, nullable=True, coerce=True),
+        "tipo_socio": pa.Column(str, nullable=True, coerce=True),
+        "identity_quality": pa.Column(str, nullable=True, coerce=True, checks=[
+            pa.Check.isin(
+                ["partial", "unknown", ""],
+                error="identity_quality must be partial/unknown",
+            ),
+        ]),
+        "source": pa.Column(str, nullable=True, coerce=True),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# SOCIO_DE relationships (all variants: PF, partial, PJ)
+# Output columns: source_key, target_key, tipo_socio, qualificacao,
+#                  data_entrada, snapshot_date
+# ------------------------------------------------------------------
+socio_relationship_schema = pa.DataFrameSchema(
+    columns={
+        "source_key": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="source_key must not be empty"),
+        ]),
+        "target_key": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="target_key must not be empty"),
+        ]),
+        "tipo_socio": pa.Column(str, nullable=True, coerce=True),
+        "qualificacao": pa.Column(str, nullable=True, coerce=True),
+        "data_entrada": pa.Column(str, nullable=True, coerce=True),
+        "snapshot_date": pa.Column(str, nullable=True, coerce=True),
+    },
+    coerce=True,
+    strict=False,
+)
diff --git a/etl/src/bracc_etl/schemas/dou.py b/etl/src/bracc_etl/schemas/dou.py
new file mode 100644
index 0000000..cab28e7
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/dou.py
@@ -0,0 +1,103 @@
+"""Pandera schemas for DOU (Diario Oficial da Uniao) pipeline.
+
+Validates the three entity lists produced by DouPipeline.transform():
+- acts: DOUAct nodes (act_id, title, act_type, date, section, etc.)
+- person_rels: PUBLICOU relationships (Person CPF -> DOUAct)
+- company_rels: MENCIONOU relationships (Company CNPJ -> DOUAct)
+
+Column definitions derived from dou.py transform() output dictionaries.
+Act types are classified from title/abstract text into: nomeacao,
+exoneracao, contrato, penalidade, outro.
+"""
+
+import pandera.pandas as pa
+
+# ------------------------------------------------------------------
+# Acts (DOUAct nodes)
+# Output keys: act_id, title, act_type, date, section, agency,
+#              category, text_excerpt, url, source
+# ------------------------------------------------------------------
+acts_schema = pa.DataFrameSchema(
+    columns={
+        "act_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="act_id must not be empty"),
+        ]),
+        "title": pa.Column(str, nullable=True, coerce=True),
+        "act_type": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.isin(
+                ["nomeacao", "exoneracao", "contrato", "penalidade", "outro"],
+                error="act_type must be one of the classified types",
+            ),
+        ]),
+        "date": pa.Column(str, nullable=True, coerce=True),
+        "section": pa.Column(str, nullable=True, coerce=True),
+        "agency": pa.Column(str, nullable=True, coerce=True),
+        "category": pa.Column(str, nullable=True, coerce=True),
+        "text_excerpt": pa.Column(str, nullable=True, coerce=True, checks=[
+            pa.Check.str_length(max_value=500, error="text_excerpt must be <= 500 chars"),
+        ]),
+        "url": pa.Column(str, nullable=True, coerce=True, checks=[
+            pa.Check.str_matches(
+                r"^https?://",
+                error="url must start with http:// or https://",
+            ),
+        ]),
+        "source": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.isin(["imprensa_nacional"], error="source must be 'imprensa_nacional'"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Person relationships (PUBLICOU: Person -> DOUAct)
+# Output keys: source_key (CPF), target_key (act_id)
+# ------------------------------------------------------------------
+person_rels_schema = pa.DataFrameSchema(
+    columns={
+        "source_key": pa.Column(
+            str,
+            nullable=False,
+            coerce=True,
+            checks=[
+                pa.Check.str_matches(
+                    r"^(\d{3}\.\d{3}\.\d{3}-\d{2}|\d{11})$",
+                    error="source_key must be a formatted CPF",
+                ),
+            ],
+        ),
+        "target_key": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="target_key must not be empty"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Company relationships (MENCIONOU: Company -> DOUAct)
+# Output keys: source_key (CNPJ), target_key (act_id)
+# ------------------------------------------------------------------
+company_rels_schema = pa.DataFrameSchema(
+    columns={
+        "source_key": pa.Column(
+            str,
+            nullable=False,
+            coerce=True,
+            checks=[
+                pa.Check.str_matches(
+                    r"^(\d{2}\.\d{3}\.\d{3}/\d{4}-\d{2}|\d{14})$",
+                    error="source_key must be a formatted CNPJ",
+                ),
+            ],
+        ),
+        "target_key": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="target_key must not be empty"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
diff --git a/etl/src/bracc_etl/schemas/pgfn.py b/etl/src/bracc_etl/schemas/pgfn.py
new file mode 100644
index 0000000..3a6bd69
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/pgfn.py
@@ -0,0 +1,80 @@
+"""Pandera schemas for PGFN (Tax Debt / Divida Ativa) pipeline.
+
+Validates the two entity lists produced by PgfnPipeline.transform():
+- finances: Finance nodes (finance_id, type, inscription_number, value, etc.)
+- relationships: DEVE relationships (source_key=CNPJ, target_key=finance_id)
+
+Column definitions derived from pgfn.py transform() output dictionaries.
+Only company (PJ) debtors with PRINCIPAL debtor type are loaded; person
+records are pre-filtered due to LGPD CPF masking by PGFN.
+"""
+
+import pandera.pandas as pa
+
+# ------------------------------------------------------------------
+# Finances (Finance nodes)
+# Output keys: finance_id, type, inscription_number, value, date,
+#              situation, revenue_type, court_action, source
+# ------------------------------------------------------------------
+finances_schema = pa.DataFrameSchema(
+    columns={
+        "finance_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_matches(
+                r"^pgfn_\S+$",
+                error="finance_id must start with 'pgfn_' followed by inscription number",
+            ),
+        ]),
+        "type": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.isin(["divida_ativa"], error="type must be 'divida_ativa'"),
+        ]),
+        "inscription_number": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="inscription_number must not be empty"),
+        ]),
+        "value": pa.Column(float, nullable=True, coerce=True, checks=[
+            pa.Check.ge(0, error="value must be >= 0"),
+        ]),
+        "date": pa.Column(str, nullable=True, coerce=True),
+        "situation": pa.Column(str, nullable=True, coerce=True),
+        "revenue_type": pa.Column(str, nullable=True, coerce=True),
+        "court_action": pa.Column(str, nullable=True, coerce=True),
+        "source": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.isin(["pgfn"], error="source must be 'pgfn'"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# DEVE relationships (Company -> Finance)
+# Output keys: source_key, target_key, value, date, company_name
+# ------------------------------------------------------------------
+deve_relationship_schema = pa.DataFrameSchema(
+    columns={
+        "source_key": pa.Column(
+            str,
+            nullable=False,
+            coerce=True,
+            checks=[
+                pa.Check.str_matches(
+                    r"^(\d{2}\.\d{3}\.\d{3}/\d{4}-\d{2}|\d{14})$",
+                    error="source_key must be a formatted CNPJ",
+                ),
+            ],
+        ),
+        "target_key": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_matches(
+                r"^pgfn_\S+$",
+                error="target_key must be a pgfn_ finance_id",
+            ),
+        ]),
+        "value": pa.Column(float, nullable=True, coerce=True, checks=[
+            pa.Check.ge(0, error="value must be >= 0"),
+        ]),
+        "date": pa.Column(str, nullable=True, coerce=True),
+        "company_name": pa.Column(str, nullable=True, coerce=True),
+    },
+    coerce=True,
+    strict=False,
+)
diff --git a/etl/src/bracc_etl/schemas/transparencia.py b/etl/src/bracc_etl/schemas/transparencia.py
new file mode 100644
index 0000000..02417c8
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/transparencia.py
@@ -0,0 +1,91 @@
+"""Pandera schemas for Transparencia (Portal da Transparencia) pipeline.
+
+Validates the three entity lists produced by TransparenciaPipeline.transform():
+- contracts: Contract nodes (contract_id, object, value, contracting_org, date, cnpj, razao_social)
+- offices: PublicOffice nodes (office_id, servidor_id, cpf_partial, name, org, salary)
+- amendments: Amendment nodes (amendment_id, author_key, name, object, value)
+
+Column definitions derived from transparencia.py transform() output dictionaries.
+"""
+
+import pandera.pandas as pa
+
+# ------------------------------------------------------------------
+# Contracts (Contract nodes + Company VENCEU relationship)
+# Output keys: contract_id, object, value, contracting_org, date, cnpj, razao_social
+# ------------------------------------------------------------------
+contracts_schema = pa.DataFrameSchema(
+    columns={
+        "contract_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="contract_id must not be empty"),
+        ]),
+        "object": pa.Column(str, nullable=True, coerce=True),
+        "value": pa.Column(float, nullable=True, coerce=True, checks=[
+            pa.Check.ge(0, error="value must be >= 0"),
+        ]),
+        "contracting_org": pa.Column(str, nullable=True, coerce=True),
+        "date": pa.Column(str, nullable=True, coerce=True),
+        "cnpj": pa.Column(
+            str,
+            nullable=True,
+            coerce=True,
+            checks=[
+                pa.Check.str_matches(
+                    r"^(\d{2}\.\d{3}\.\d{3}/\d{4}-\d{2}|\d{14})$",
+                    error="CNPJ must be formatted (XX.XXX.XXX/XXXX-XX) or 14 digits",
+                ),
+            ],
+        ),
+        "razao_social": pa.Column(str, nullable=True, coerce=True),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Offices (PublicOffice nodes + Person RECEBEU_SALARIO relationship)
+# Output keys: office_id, servidor_id, cpf_partial, name, org, salary
+# ------------------------------------------------------------------
+offices_schema = pa.DataFrameSchema(
+    columns={
+        "office_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="office_id must not be empty"),
+        ]),
+        "servidor_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="servidor_id must not be empty"),
+        ]),
+        # cpf_partial: 6 middle digits from LGPD-masked CPF, or None
+        "cpf_partial": pa.Column(str, nullable=True, coerce=True),
+        "name": pa.Column(str, nullable=True, coerce=True),
+        "org": pa.Column(str, nullable=True, coerce=True),
+        "salary": pa.Column(float, nullable=True, coerce=True, checks=[
+            pa.Check.ge(0, error="salary must be >= 0"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Amendments (Amendment nodes + Person AUTOR_EMENDA relationship)
+# Output keys: amendment_id, author_key, name, object, value
+# ------------------------------------------------------------------
+amendments_schema = pa.DataFrameSchema(
+    columns={
+        "amendment_id": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="amendment_id must not be empty"),
+        ]),
+        "author_key": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="author_key must not be empty"),
+        ]),
+        "name": pa.Column(str, nullable=True, coerce=True),
+        "object": pa.Column(str, nullable=True, coerce=True),
+        "value": pa.Column(float, nullable=True, coerce=True, checks=[
+            pa.Check.ge(0, error="value must be >= 0"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
diff --git a/etl/src/bracc_etl/schemas/tse.py b/etl/src/bracc_etl/schemas/tse.py
new file mode 100644
index 0000000..4103cbe
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/tse.py
@@ -0,0 +1,104 @@
+"""Pandera schemas for TSE (Electoral Donations) pipeline.
+
+Validates the three entity lists produced by TSEPipeline.transform():
+- candidates: Person nodes (sq_candidato, name, cpf, partido, uf)
+- elections: Election nodes (year, cargo, uf, municipio, candidate_sq)
+- donations: DOOU relationships (candidate_sq, donor_doc, valor, year, etc.)
+
+Column definitions derived from tse.py _transform_candidates and
+_transform_donations output dictionaries.
+
+Note: TSE 2024 masks ALL candidate CPFs as "-4". After stripping,
+candidates without real CPFs omit the 'cpf' key entirely. The cpf
+column is therefore nullable.
+"""
+
+import pandera.pandas as pa
+
+# ------------------------------------------------------------------
+# Candidates (Person nodes)
+# Output keys: sq_candidato, name, partido, uf, cpf (optional)
+# ------------------------------------------------------------------
+candidates_schema = pa.DataFrameSchema(
+    columns={
+        "sq_candidato": pa.Column(str, nullable=False, coerce=True, checks=[
+            pa.Check.str_length(min_value=1, error="sq_candidato must not be empty"),
+        ]),
+        "name": pa.Column(str, nullable=True, coerce=True),
+        "partido": pa.Column(str, nullable=True, coerce=True),
+        "uf": pa.Column(str, nullable=True, coerce=True, checks=[
+            pa.Check.str_matches(
+                r"^[A-Z]{2}$",
+                error="UF must be 2 uppercase letters",
+            ),
+        ]),
+        # cpf is optional — absent for masked candidates (TSE sentinel "-4")
+        "cpf": pa.Column(
+            str,
+            nullable=True,
+            coerce=True,
+            required=False,
+            checks=[
+                pa.Check.str_matches(
+                    r"^(\d{3}\.\d{3}\.\d{3}-\d{2}|\d{11})$",
+                    error="CPF must be formatted or 11 digits",
+                ),
+            ],
+        ),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Elections (Election nodes)
+# Output keys: year, cargo, uf, municipio, candidate_sq
+# ------------------------------------------------------------------
+elections_schema = pa.DataFrameSchema(
+    columns={
+        "year": pa.Column(int, nullable=False, coerce=True, checks=[
+            pa.Check.in_range(1945, 2030, error="year must be between 1945 and 2030"),
+        ]),
+        "cargo": pa.Column(str, nullable=True, coerce=True),
+        "uf": pa.Column(str, nullable=True, coerce=True),
+        "municipio": pa.Column(str, nullable=True, coerce=True),
+        "candidate_sq": pa.Column(str, nullable=False, coerce=True),
+    },
+    coerce=True,
+    strict=False,
+)
+
+
+# ------------------------------------------------------------------
+# Donations (DOOU relationships)
+# Output keys: candidate_sq, donor_doc, donor_name, donor_is_company,
+#              valor, year
+# ------------------------------------------------------------------
+donations_schema = pa.DataFrameSchema(
+    columns={
+        "candidate_sq": pa.Column(str, nullable=False, coerce=True),
+        "donor_doc": pa.Column(
+            str,
+            nullable=True,
+            coerce=True,
+            checks=[
+                # Formatted CPF or CNPJ (11 or 14 digits, with or without punctuation)
+                pa.Check.str_matches(
+                    r"^(\d{3}\.\d{3}\.\d{3}-\d{2}|\d{2}\.\d{3}\.\d{3}/\d{4}-\d{2}|\d{11}|\d{14})$",
+                    error="donor_doc must be formatted CPF or CNPJ",
+                ),
+            ],
+        ),
+        "donor_name": pa.Column(str, nullable=True, coerce=True),
+        "donor_is_company": pa.Column(bool, nullable=False, coerce=True),
+        "valor": pa.Column(float, nullable=False, coerce=True, checks=[
+            pa.Check.ge(0, error="valor must be >= 0"),
+        ]),
+        "year": pa.Column(int, nullable=False, coerce=True, checks=[
+            pa.Check.in_range(1945, 2030, error="year must be between 1945 and 2030"),
+        ]),
+    },
+    coerce=True,
+    strict=False,
+)
diff --git a/etl/src/bracc_etl/schemas/validator.py b/etl/src/bracc_etl/schemas/validator.py
new file mode 100644
index 0000000..7f70ede
--- /dev/null
+++ b/etl/src/bracc_etl/schemas/validator.py
@@ -0,0 +1,80 @@
+"""Schema validation utility with configurable strictness."""
+
+import logging
+import os
+from typing import Any, cast
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+def _get_validation_mode() -> str:
+    """Get validation mode from env: 'warn' (default), 'strict', or 'off'."""
+    return os.environ.get("BRACC_SCHEMA_VALIDATION", "warn").lower()
+
+
+def validate_dataframe(
+    df: pd.DataFrame,
+    schema: Any,  # pa.DataFrameSchema
+    source_name: str,
+) -> pd.DataFrame:
+    """Validate a DataFrame against a Pandera schema.
+
+    Behavior controlled by BRACC_SCHEMA_VALIDATION env var:
+    - 'off': skip validation entirely
+    - 'warn': validate, log warnings, return original df
+    - 'strict': validate, raise on failure
+    """
+    mode = _get_validation_mode()
+    if mode == "off":
+        return df
+
+    try:
+        import pandera as pa
+
+        validated = schema.validate(df, lazy=True)
+        logger.info("[%s] Schema validation passed: %d rows OK", source_name, len(df))
+        return cast("pd.DataFrame", validated)
+    except pa.errors.SchemaErrors as exc:
+        n_failures = len(exc.failure_cases)
+        logger.warning(
+            "[%s] Schema validation: %d failures in %d rows",
+            source_name,
+            n_failures,
+            len(df),
+        )
+        for _, row in exc.failure_cases.head(10).iterrows():
+            logger.warning(
+                "  %s: column=%s check=%s",
+                source_name,
+                row.get("column"),
+                row.get("check"),
+            )
+
+        if mode == "strict":
+            raise
+        return df  # warn mode: return original
+    except ImportError:
+        logger.warning("[%s] pandera not installed, skipping validation", source_name)
+        return df
+
+
+def validate_dataframe_sampled(
+    df: pd.DataFrame,
+    schema: Any,
+    source_name: str,
+    sample_size: int = 10_000,
+) -> pd.DataFrame:
+    """Validate a random sample of a large DataFrame (e.g., CNPJ).
+
+    For DataFrames larger than sample_size, validates only a random sample
+    to keep validation fast on multi-million-row datasets. Always returns
+    the full original DataFrame.
+    """
+    if len(df) <= sample_size:
+        return validate_dataframe(df, schema, source_name)
+
+    sample = df.sample(n=sample_size, random_state=42)
+    validate_dataframe(sample, schema, f"{source_name}[sample={sample_size}]")
+    return df  # Always return full df
diff --git a/etl/src/bracc_etl/transforms/date_formatting.py b/etl/src/bracc_etl/transforms/date_formatting.py
index 3afca17..0776ec0 100644
--- a/etl/src/bracc_etl/transforms/date_formatting.py
+++ b/etl/src/bracc_etl/transforms/date_formatting.py
@@ -1,10 +1,16 @@
+import logging
+
 import pandas as pd
 
+logger = logging.getLogger(__name__)
+
 
 def parse_date(value: str) -> str:
     """Parse a date string to ISO format (YYYY-MM-DD) or empty string.
 
     Handles: DD/MM/YYYY, DD/MM/YYYY HH:MM:SS, YYYY-MM-DD, YYYYMMDD.
+    Returns empty string when all format attempts fail (prevents garbage
+    dates from reaching Neo4j).
     """
     value = value.strip()
     if not value:
@@ -14,4 +20,5 @@ def parse_date(value: str) -> str:
             return str(pd.to_datetime(value, format=fmt).strftime("%Y-%m-%d"))
         except ValueError:
             continue
-    return value
+    logger.debug("Could not parse date: %r", value)
+    return ""
diff --git a/etl/tests/integration/test_link_persons.py b/etl/tests/integration/test_link_persons.py
deleted file mode 100644
index 1c90101..0000000
--- a/etl/tests/integration/test_link_persons.py
+++ /dev/null
@@ -1,307 +0,0 @@
-"""Integration tests for link_persons.cypher.
-
-Runs against a real Neo4j testcontainer to verify SAME_AS relationships
-are created with correct confidence, method, and uniqueness guards.
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-import pytest
-
-if TYPE_CHECKING:
-    from neo4j import Driver
-
-LINK_SCRIPT = (
-    Path(__file__).parent.parent.parent.parent
-    / "scripts"
-    / "link_persons.cypher"
-)
-
-
-def _parse_phases() -> dict[int, str]:
-    """Parse link_persons.cypher into phase number → Cypher blocks."""
-    text = LINK_SCRIPT.read_text()
-    blocks: dict[int, str] = {}
-    current_phase: int | None = None
-    lines: list[str] = []
-    for line in text.splitlines():
-        if line.startswith("// ── Phase "):
-            if current_phase is not None:
-                blocks[current_phase] = "\n".join(lines)
-            phase_str = line.split("Phase ")[1].split(":")[0]
-            current_phase = int(phase_str)
-            lines = []
-        else:
-            lines.append(line)
-    if current_phase is not None:
-        blocks[current_phase] = "\n".join(lines)
-    return blocks
-
-
-PHASE_BLOCKS = _parse_phases()
-
-
-def _strip_comments(cypher: str) -> str:
-    """Remove // comment lines from a Cypher block."""
-    return "\n".join(
-        line for line in cypher.splitlines()
-        if not line.strip().startswith("//")
-    )
-
-
-def _run_cypher(driver: Driver, cypher: str) -> None:
-    """Run one or more semicolon-separated Cypher statements."""
-    with driver.session() as session:
-        for stmt in cypher.split(";"):
-            stmt = _strip_comments(stmt).strip()
-            if stmt:
-                session.run(stmt).consume()
-
-
-def _run_phases(driver: Driver, phases: list[int]) -> None:
-    """Run specific phases from link_persons.cypher."""
-    for phase in phases:
-        _run_cypher(driver, PHASE_BLOCKS[phase])
-
-
-def _clear_db(driver: Driver) -> None:
-    _run_cypher(driver, "MATCH (n) DETACH DELETE n")
-
-
-def _setup(driver: Driver, *statements: str) -> None:
-    """Run setup Cypher statements, each in its own auto-commit tx."""
-    for stmt in statements:
-        _run_cypher(driver, stmt)
-
-
-def _count_same_as(
-    driver: Driver, method: str | None = None,
-) -> int:
-    """Count SAME_AS relationships, optionally filtered by method."""
-    q = "MATCH ()-[r:SAME_AS]->() "
-    if method:
-        q += f"WHERE r.method = '{method}' "
-    q += "RETURN count(r) AS cnt"
-    with driver.session() as session:
-        result = session.run(q)
-        record = result.single()
-        return record["cnt"] if record else 0
-
-
-# ── Phase 4 tests ──────────────────────────────────────────────────
-
-
-@pytest.mark.integration
-def test_phase4_partial_cpf_name_match(neo4j_driver: Driver) -> None:
-    """Phase 4 is disabled: partial CPF matching must not create SAME_AS."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {"
-        "cpf: '005602', cpf_partial: '005602', "
-        "name: 'JOSE DIAS TOFFOLI'})",
-    )
-
-    _run_phases(neo4j_driver, [4])
-
-    assert _count_same_as(neo4j_driver) == 0
-
-
-@pytest.mark.integration
-def test_phase4_no_match_different_name(neo4j_driver: Driver) -> None:
-    """Same cpf_partial/cpf_middle6 but different names -> no match."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {"
-        "cpf: '005602', cpf_partial: '005602', "
-        "name: 'MARIA DA SILVA'})",
-    )
-
-    _run_phases(neo4j_driver, [4])
-
-    assert _count_same_as(neo4j_driver) == 0
-
-
-@pytest.mark.integration
-def test_phase4_no_duplicate_if_already_linked(
-    neo4j_driver: Driver,
-) -> None:
-    """Running disabled Phase 4 twice must still create no SAME_AS."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {"
-        "cpf: '005602', cpf_partial: '005602', "
-        "name: 'JOSE DIAS TOFFOLI'})",
-    )
-
-    _run_phases(neo4j_driver, [4])
-    _run_phases(neo4j_driver, [4])  # idempotent
-
-    assert _count_same_as(neo4j_driver) == 0
-
-
-# ── Phase 5 tests ──────────────────────────────────────────────────
-
-
-@pytest.mark.integration
-def test_phase5_unique_name_match(neo4j_driver: Driver) -> None:
-    """Unique-name servidor (blank cpf_partial) matches unique person."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {name: 'JOSE DIAS TOFFOLI'})"
-        "-[:RECEBEU_SALARIO]->(:PublicOffice {cpf: 'classified_1'})",
-    )
-
-    _run_phases(neo4j_driver, [5])
-
-    with neo4j_driver.session() as s:
-        result = s.run(
-            "MATCH ()-[r:SAME_AS]->() "
-            "RETURN r.confidence AS conf, r.method AS method"
-        )
-        records = list(result)
-        assert len(records) == 1
-        assert records[0]["conf"] == 0.85
-        assert records[0]["method"] == "unique_name_match_servidor"
-
-
-@pytest.mark.integration
-def test_phase5_common_name_servidor_side_no_match(
-    neo4j_driver: Driver,
-) -> None:
-    """Two servidores with same name and blank cpf_partial -> no match."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '111.222.333-44', name: 'JOSE DA SILVA'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {name: 'JOSE DA SILVA'})"
-        "-[:RECEBEU_SALARIO]->(:PublicOffice {cpf: 'classified_a'})",
-        "CREATE (:Person {name: 'JOSE DA SILVA'})"
-        "-[:RECEBEU_SALARIO]->(:PublicOffice {cpf: 'classified_b'})",
-    )
-
-    _run_phases(neo4j_driver, [5])
-
-    assert _count_same_as(neo4j_driver) == 0
-
-
-@pytest.mark.integration
-def test_phase5_common_name_person_side_no_match(
-    neo4j_driver: Driver,
-) -> None:
-    """Unique servidor but two full-CPF persons share the name -> no match."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '111.222.333-44', name: 'MARIA OLIVEIRA'})",
-        "CREATE (:Person {cpf: '555.666.777-88', name: 'MARIA OLIVEIRA'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {name: 'MARIA OLIVEIRA'})"
-        "-[:RECEBEU_SALARIO]->(:PublicOffice {cpf: 'classified_x'})",
-    )
-
-    _run_phases(neo4j_driver, [5])
-
-    assert _count_same_as(neo4j_driver) == 0
-
-
-@pytest.mark.integration
-def test_phase5_requires_recebeu_salario(neo4j_driver: Driver) -> None:
-    """Person without RECEBEU_SALARIO should not match in Phase 5."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        # Amendment author — no RECEBEU_SALARIO relationship
-        "CREATE (:Person {name: 'JOSE DIAS TOFFOLI', author_key: 'toffoli'})",
-    )
-
-    _run_phases(neo4j_driver, [5])
-
-    assert _count_same_as(neo4j_driver, "unique_name_match_servidor") == 0
-
-
-@pytest.mark.integration
-def test_phase5_no_duplicate_if_already_linked(
-    neo4j_driver: Driver,
-) -> None:
-    """Running Phase 5 twice should not create duplicate SAME_AS."""
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {name: 'JOSE DIAS TOFFOLI'})"
-        "-[:RECEBEU_SALARIO]->(:PublicOffice {cpf: 'classified_1'})",
-    )
-
-    _run_phases(neo4j_driver, [5])
-    _run_phases(neo4j_driver, [5])  # idempotent
-
-    assert _count_same_as(neo4j_driver) == 1
-
-
-@pytest.mark.integration
-def test_phase5_servidor_with_cpf_partial_skipped(
-    neo4j_driver: Driver,
-) -> None:
-    """Servidor with cpf_partial IS NOT NULL should not match in Phase 5.
-
-    Phase 5 only handles blank-CPF servidores (cpf_partial IS NULL).
-    Partial-CPF matching is intentionally disabled.
-    """
-    _clear_db(neo4j_driver)
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {cpf: '026.005.602-20', name: 'JOSE DIAS TOFFOLI'})",
-    )
-    _run_phases(neo4j_driver, [0])
-    _setup(
-        neo4j_driver,
-        "CREATE (:Person {name: 'JOSE DIAS TOFFOLI', cpf_partial: '005602'})"
-        "-[:RECEBEU_SALARIO]->(:PublicOffice {cpf: 'partial_1'})",
-    )
-
-    _run_phases(neo4j_driver, [5])
-
-    assert _count_same_as(neo4j_driver, "unique_name_match_servidor") == 0
diff --git a/etl/tests/test_base.py b/etl/tests/test_base.py
index bbc64de..ee21673 100644
--- a/etl/tests/test_base.py
+++ b/etl/tests/test_base.py
@@ -8,8 +8,7 @@ class DummyPipeline(Pipeline):
     source_id = "test"
 
     def __init__(self) -> None:
-        self.driver = MagicMock()
-        self.data_dir = "./data"
+        super().__init__(driver=MagicMock(), data_dir="./data")
         self.extracted = False
         self.transformed = False
         self.loaded = False
diff --git a/etl/tests/test_comprasnet_pipeline.py b/etl/tests/test_comprasnet_pipeline.py
index 0134aef..b8665e3 100644
--- a/etl/tests/test_comprasnet_pipeline.py
+++ b/etl/tests/test_comprasnet_pipeline.py
@@ -23,7 +23,7 @@ def _extract_from_fixtures(pipeline: ComprasnetPipeline) -> None:
 def test_pipeline_name_and_source_id() -> None:
     pipeline = _make_pipeline()
     assert pipeline.name == "comprasnet"
-    assert pipeline.source_id == "pncp"
+    assert pipeline.source_id == "comprasnet"
 
 
 def test_transform_produces_correct_contracts() -> None:
diff --git a/etl/tests/test_date_formatting.py b/etl/tests/test_date_formatting.py
index 265f2d9..17cee75 100644
--- a/etl/tests/test_date_formatting.py
+++ b/etl/tests/test_date_formatting.py
@@ -14,7 +14,7 @@
         ("31/12/2024 14:30:00", "2024-12-31"),
         ("", ""),
         ("  ", ""),
-        ("invalid", "invalid"),
+        ("invalid", ""),
         ("01/01/2000", "2000-01-01"),
     ],
 )
diff --git a/etl/tests/test_download_cnpj.py b/etl/tests/test_download_cnpj.py
new file mode 100644
index 0000000..eb3d9e4
--- /dev/null
+++ b/etl/tests/test_download_cnpj.py
@@ -0,0 +1,210 @@
+"""Tests for etl/scripts/download_cnpj.py — release resolution and manifest."""
+
+from __future__ import annotations
+
+import importlib.util
+import json
+from pathlib import Path
+from typing import TYPE_CHECKING
+from unittest.mock import MagicMock, patch
+
+if TYPE_CHECKING:
+    from types import ModuleType
+
+import httpx
+import pytest
+
+
+def _load_script_module() -> ModuleType:
+    """Load download_cnpj.py as a module without running it."""
+    scripts_dir = Path(__file__).resolve().parents[1] / "scripts"
+    script_path = scripts_dir / "download_cnpj.py"
+    spec = importlib.util.spec_from_file_location("download_cnpj", script_path)
+    assert spec is not None
+    assert spec.loader is not None
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+@pytest.fixture()
+def mod() -> ModuleType:
+    return _load_script_module()
+
+
+# ---- resolve_rf_release tests ----
+
+
+def test_resolve_rf_release_nextcloud_success(mod: ModuleType) -> None:
+    """When Nextcloud token probe returns 200, use Nextcloud URL."""
+    def _fake_head(url: str, **kwargs) -> MagicMock:  # type: ignore[no-untyped-def]
+        resp = MagicMock()
+        resp.status_code = 200
+        return resp
+
+    with (
+        patch.object(httpx, "head", side_effect=_fake_head),
+        patch.dict("os.environ", {}, clear=False),
+    ):
+        result = mod.resolve_rf_release()
+
+    assert "arquivos.receitafederal.gov.br" in result
+    assert "download?path=" in result
+
+
+def test_resolve_rf_release_nextcloud_env_token_priority(mod: ModuleType) -> None:
+    """CNPJ_SHARE_TOKEN env var is tried before known tokens."""
+    probed_urls: list[str] = []
+
+    def _fake_head(url: str, **kwargs) -> MagicMock:  # type: ignore[no-untyped-def]
+        probed_urls.append(url)
+        resp = MagicMock()
+        resp.status_code = 200
+        return resp
+
+    with (
+        patch.object(httpx, "head", side_effect=_fake_head),
+        patch.dict("os.environ", {"CNPJ_SHARE_TOKEN": "customToken123"}, clear=False),
+    ):
+        result = mod.resolve_rf_release()
+
+    # First probe should use the env token
+    assert "customToken123" in probed_urls[0]
+    assert "customToken123" in result
+
+
+def test_resolve_rf_release_fallback_to_legacy_when_nextcloud_down(mod: ModuleType) -> None:
+    """When all Nextcloud tokens fail, fall back to legacy paths."""
+    def _fake_head(url: str, **kwargs) -> MagicMock:  # type: ignore[no-untyped-def]
+        resp = MagicMock()
+        if "arquivos.receitafederal.gov.br" in url:
+            resp.status_code = 404
+        elif "dados_abertos_cnpj" in url:
+            resp.status_code = 200
+        else:
+            resp.status_code = 404
+        return resp
+
+    with (
+        patch.object(httpx, "head", side_effect=_fake_head),
+        patch.dict("os.environ", {}, clear=False),
+    ):
+        result = mod.resolve_rf_release()
+
+    assert "dados_abertos_cnpj" in result
+
+
+def test_resolve_rf_release_explicit_override_legacy(mod: ModuleType) -> None:
+    """When year_month is provided and Nextcloud down, use legacy with that month."""
+    def _fake_head(url: str, **kwargs) -> MagicMock:  # type: ignore[no-untyped-def]
+        resp = MagicMock()
+        if "arquivos.receitafederal.gov.br" in url:
+            resp.status_code = 404
+        else:
+            resp.status_code = 200
+        return resp
+
+    with (
+        patch.object(httpx, "head", side_effect=_fake_head),
+        patch.dict("os.environ", {}, clear=False),
+    ):
+        result = mod.resolve_rf_release("2026-01")
+
+    assert "2026-01" in result
+    assert result == "https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/2026-01/"
+
+
+def test_resolve_rf_release_all_fail_raises(mod: ModuleType) -> None:
+    """When all candidates (Nextcloud + legacy) return 404, raise RuntimeError."""
+    def _fake_head(url: str, **kwargs) -> MagicMock:  # type: ignore[no-untyped-def]
+        resp = MagicMock()
+        resp.status_code = 404
+        return resp
+
+    with (
+        patch.object(httpx, "head", side_effect=_fake_head),
+        patch.dict("os.environ", {}, clear=False),
+        pytest.raises(RuntimeError, match="Could not resolve CNPJ release"),
+    ):
+        mod.resolve_rf_release()
+
+
+def test_resolve_rf_release_legacy_flat_fallback(mod: ModuleType) -> None:
+    """When Nextcloud + legacy new paths fail, fall back to legacy flat URL."""
+    call_urls: list[str] = []
+
+    def _fake_head(url: str, **kwargs) -> MagicMock:  # type: ignore[no-untyped-def]
+        call_urls.append(url)
+        resp = MagicMock()
+        # Everything fails except legacy flat URL
+        if url == "https://dadosabertos.rfb.gov.br/CNPJ/":
+            resp.status_code = 200
+        else:
+            resp.status_code = 404
+        return resp
+
+    with (
+        patch.object(httpx, "head", side_effect=_fake_head),
+        patch.dict("os.environ", {}, clear=False),
+    ):
+        result = mod.resolve_rf_release()
+
+    assert result == "https://dadosabertos.rfb.gov.br/CNPJ/"
+    # Should have tried Nextcloud tokens + legacy new paths + legacy flat
+    assert len(call_urls) >= 5  # 2 Nextcloud + 2 monthly + 1 flat
+
+
+# ---- manifest test ----
+
+
+def test_manifest_written_after_download(mod: ModuleType, tmp_path: Path) -> None:
+    """Verify download_manifest.json is created with expected structure."""
+    from click.testing import CliRunner
+
+    # Patch resolve_rf_release to avoid HTTP calls
+    def _fake_resolve(year_month: str | None = None) -> str:
+        return "https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/2026-03/"
+
+    # Patch download_file to simulate successful downloads
+    def _fake_download(url: str, dest: Path, **kwargs) -> bool:  # type: ignore[no-untyped-def]
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        dest.write_bytes(b"fake-zip-content")
+        return True
+
+    # Patch extract_zip to no-op
+    def _fake_extract(zip_path: Path, output_dir: Path) -> list[Path]:
+        return []
+
+    with (
+        patch.object(mod, "resolve_rf_release", side_effect=_fake_resolve),
+        patch.object(mod, "download_file", side_effect=_fake_download),
+        patch.object(mod, "extract_zip", side_effect=_fake_extract),
+    ):
+        runner = CliRunner()
+        result = runner.invoke(
+            mod.main,
+            [
+                "--output-dir", str(tmp_path),
+                "--files", "1",
+                "--skip-extract",
+            ],
+        )
+
+    assert result.exit_code == 0, result.output
+
+    manifest_path = tmp_path / "download_manifest.json"
+    assert manifest_path.exists(), f"Manifest not found. Output:\n{result.output}"
+
+    manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
+    assert manifest["source"] == "receita_federal_cnpj"
+    assert manifest["resolved_release"] == "2026-03"
+    assert manifest["base_url"] == "https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/2026-03/"
+    assert "checksum" in manifest
+    assert manifest["checksum"].startswith("sha256:")
+    assert "started_at" in manifest
+    assert "finished_at" in manifest
+
+    # Should have reference files + main files (1 per type = 3 main + 6 reference)
+    assert len(manifest["files"]) == 9
+    statuses = {f["status"] for f in manifest["files"]}
+    assert statuses <= {"ok", "skipped", "failed"}
diff --git a/etl/tests/test_download_cnpj_bq.py b/etl/tests/test_download_cnpj_bq.py
index dea7294..c390fdb 100644
--- a/etl/tests/test_download_cnpj_bq.py
+++ b/etl/tests/test_download_cnpj_bq.py
@@ -47,12 +47,13 @@ def _fake_download(*args, **kwargs):  # type: ignore[no-untyped-def]
         }
 
     monkeypatch.setattr(module, "_download_table", _fake_download)
+    monkeypatch.setattr(module, "_run_bigquery_precheck", lambda **kw: None)
     runner = CliRunner()
     result = runner.invoke(
         module.main,
         [
             "--billing-project",
-            "bracc-corruptos",
+            "icarus-corruptos",
             "--output-dir",
             str(tmp_path),
             "--dataset",
@@ -94,12 +95,13 @@ def _fail_on_socios(*args, **kwargs):  # type: ignore[no-untyped-def]
         }
 
     monkeypatch.setattr(module, "_download_table", _fail_on_socios)
+    monkeypatch.setattr(module, "_run_bigquery_precheck", lambda **kw: None)
     runner = CliRunner()
     result = runner.invoke(
         module.main,
         [
             "--billing-project",
-            "bracc-corruptos",
+            "icarus-corruptos",
             "--output-dir",
             str(tmp_path),
             "--dataset",
diff --git a/etl/uv.lock b/etl/uv.lock
index 800f331..37f031f 100644
--- a/etl/uv.lock
+++ b/etl/uv.lock
@@ -70,6 +70,7 @@ dependencies = [
     { name = "httpx" },
     { name = "neo4j" },
     { name = "pandas" },
+    { name = "pandera" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
     { name = "pypdf" },
@@ -107,7 +108,7 @@ dev = [
 requires-dist = [
     { name = "click", specifier = ">=8.1.0" },
     { name = "db-dtypes", marker = "extra == 'bigquery'", specifier = ">=1.3.0" },
-    { name = "defusedxml", specifier = ">=0.7.1" },
+    { name = "defusedxml", specifier = ">=0.7.0" },
     { name = "google-cloud-bigquery", marker = "extra == 'bigquery'", specifier = ">=3.25.0" },
     { name = "google-cloud-bigquery-storage", marker = "extra == 'bigquery'", specifier = ">=2.27.0" },
     { name = "httpx", specifier = ">=0.28.0" },
@@ -115,6 +116,7 @@ requires-dist = [
     { name = "neo4j", specifier = ">=5.27.0" },
     { name = "pandas", specifier = ">=2.2.0" },
     { name = "pandas-stubs", marker = "extra == 'dev'", specifier = ">=2.2.0" },
+    { name = "pandera", specifier = ">=0.21.0" },
     { name = "pyarrow", marker = "extra == 'bigquery'", specifier = ">=17.0.0" },
     { name = "pydantic", specifier = ">=2.10.0" },
     { name = "pydantic-settings", specifier = ">=2.7.0" },
@@ -1014,6 +1016,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7c/2f/f91e4eee21585ff548e83358332d5632ee49f6b2dcd96cb5dca4e0468951/pandas_stubs-3.0.0.260204-py3-none-any.whl", hash = "sha256:5ab9e4d55a6e2752e9720828564af40d48c4f709e6a2c69b743014a6fcb6c241", size = 168540, upload-time = "2026-02-04T15:17:15.615Z" },
 ]
 
+[[package]]
+name = "pandera"
+version = "0.29.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "pydantic" },
+    { name = "typeguard" },
+    { name = "typing-extensions" },
+    { name = "typing-inspect" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/80/ee/8e0d40dad2c0947b933fc9c0959b2c17cc3419ccdf50df683216f37a3f96/pandera-0.29.0.tar.gz", hash = "sha256:06bc4fc1e4ff02534dd44482a9bc704fb2e58fe3fbb11be906aa714f7f5ec801", size = 575324, upload-time = "2026-01-29T02:49:36.891Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/75/7b/03299e4ccc5e3cfb0f9e234207ac43ef08b3ba6c4c2882c890e550ceadba/pandera-0.29.0-py3-none-any.whl", hash = "sha256:b3b25d6c00d7c100fbab96aff0e81e52d3dae543a880d24135cca705fa97c516", size = 295876, upload-time = "2026-01-29T02:49:34.812Z" },
+]
+
 [[package]]
 name = "pathspec"
 version = "1.0.4"
@@ -1538,6 +1556,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/24/99/4772b8e00a136f3e01236de33b0efda31ee7077203ba5967fcc76da94d65/texttable-1.7.0-py2.py3-none-any.whl", hash = "sha256:72227d592c82b3d7f672731ae73e4d1f88cd8e2ef5b075a7a7f01a23a3743917", size = 10768, upload-time = "2023-10-03T09:48:10.434Z" },
 ]
 
+[[package]]
+name = "typeguard"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2b/e8/66e25efcc18542d58706ce4e50415710593721aae26e794ab1dec34fb66f/typeguard-4.5.1.tar.gz", hash = "sha256:f6f8ecbbc819c9bc749983cc67c02391e16a9b43b8b27f15dc70ed7c4a007274", size = 80121, upload-time = "2026-02-19T16:09:03.392Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/88/b55b3117287a8540b76dbdd87733808d4d01c8067a3b339408c250bb3600/typeguard-4.5.1-py3-none-any.whl", hash = "sha256:44d2bf329d49a244110a090b55f5f91aa82d9a9834ebfd30bcc73651e4a8cc40", size = 36745, upload-time = "2026-02-19T16:09:01.6Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
@@ -1547,6 +1577,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
 ]
 
+[[package]]
+name = "typing-inspect"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" },
+]
+
 [[package]]
 name = "typing-inspection"
 version = "0.4.2"
diff --git a/frontend/index.html b/frontend/index.html
index 11780c0..783b12e 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -4,7 +4,7 @@
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
-    <title>BRACC</title>
+    <title>BR-ACC</title>
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
     <link href="https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet" />
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 52db820..5fcbabc 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1675,9 +1675,9 @@
       "license": "MIT"
     },
     "node_modules/@rollup/rollup-android-arm-eabi": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz",
-      "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.58.0.tgz",
+      "integrity": "sha512-mr0tmS/4FoVk1cnaeN244A/wjvGDNItZKR8hRhnmCzygyRXYtKF5jVDSIILR1U97CTzAYmbgIj/Dukg62ggG5w==",
       "cpu": [
         "arm"
       ],
@@ -1689,9 +1689,9 @@
       ]
     },
     "node_modules/@rollup/rollup-android-arm64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz",
-      "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.58.0.tgz",
+      "integrity": "sha512-+s++dbp+/RTte62mQD9wLSbiMTV+xr/PeRJEc/sFZFSBRlHPNPVaf5FXlzAL77Mr8FtSfQqCN+I598M8U41ccQ==",
       "cpu": [
         "arm64"
       ],
@@ -1703,9 +1703,9 @@
       ]
     },
     "node_modules/@rollup/rollup-darwin-arm64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz",
-      "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.58.0.tgz",
+      "integrity": "sha512-MFWBwTcYs0jZbINQBXHfSrpSQJq3IUOakcKPzfeSznONop14Pxuqa0Kg19GD0rNBMPQI2tFtu3UzapZpH0Uc1Q==",
       "cpu": [
         "arm64"
       ],
@@ -1717,9 +1717,9 @@
       ]
     },
     "node_modules/@rollup/rollup-darwin-x64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz",
-      "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.58.0.tgz",
+      "integrity": "sha512-yiKJY7pj9c9JwzuKYLFaDZw5gma3fI9bkPEIyofvVfsPqjCWPglSHdpdwXpKGvDeYDms3Qal8qGMEHZ1M/4Udg==",
       "cpu": [
         "x64"
       ],
@@ -1731,9 +1731,9 @@
       ]
     },
     "node_modules/@rollup/rollup-freebsd-arm64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz",
-      "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.58.0.tgz",
+      "integrity": "sha512-x97kCoBh5MOevpn/CNK9W1x8BEzO238541BGWBc315uOlN0AD/ifZ1msg+ZQB05Ux+VF6EcYqpiagfLJ8U3LvQ==",
       "cpu": [
         "arm64"
       ],
@@ -1745,9 +1745,9 @@
       ]
     },
     "node_modules/@rollup/rollup-freebsd-x64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz",
-      "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.58.0.tgz",
+      "integrity": "sha512-Aa8jPoZ6IQAG2eIrcXPpjRcMjROMFxCt1UYPZZtCxRV68WkuSigYtQ/7Zwrcr2IvtNJo7T2JfDXyMLxq5L4Jlg==",
       "cpu": [
         "x64"
       ],
@@ -1759,9 +1759,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm-gnueabihf": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz",
-      "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.58.0.tgz",
+      "integrity": "sha512-Ob8YgT5kD/lSIYW2Rcngs5kNB/44Q2RzBSPz9brf2WEtcGR7/f/E9HeHn1wYaAwKBni+bdXEwgHvUd0x12lQSA==",
       "cpu": [
         "arm"
       ],
@@ -1773,9 +1773,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm-musleabihf": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz",
-      "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.58.0.tgz",
+      "integrity": "sha512-K+RI5oP1ceqoadvNt1FecL17Qtw/n9BgRSzxif3rTL2QlIu88ccvY+Y9nnHe/cmT5zbH9+bpiJuG1mGHRVwF4Q==",
       "cpu": [
         "arm"
       ],
@@ -1787,9 +1787,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm64-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz",
-      "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.58.0.tgz",
+      "integrity": "sha512-T+17JAsCKUjmbopcKepJjHWHXSjeW7O5PL7lEFaeQmiVyw4kkc5/lyYKzrv6ElWRX/MrEWfPiJWqbTvfIvjM1Q==",
       "cpu": [
         "arm64"
       ],
@@ -1801,9 +1801,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm64-musl": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz",
-      "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.58.0.tgz",
+      "integrity": "sha512-cCePktb9+6R9itIJdeCFF9txPU7pQeEHB5AbHu/MKsfH/k70ZtOeq1k4YAtBv9Z7mmKI5/wOLYjQ+B9QdxR6LA==",
       "cpu": [
         "arm64"
       ],
@@ -1815,9 +1815,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-loong64-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz",
-      "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.58.0.tgz",
+      "integrity": "sha512-iekUaLkfliAsDl4/xSdoCJ1gnnIXvoNz85C8U8+ZxknM5pBStfZjeXgB8lXobDQvvPRCN8FPmmuTtH+z95HTmg==",
       "cpu": [
         "loong64"
       ],
@@ -1829,9 +1829,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-loong64-musl": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz",
-      "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.58.0.tgz",
+      "integrity": "sha512-68ofRgJNl/jYJbxFjCKE7IwhbfxOl1muPN4KbIqAIe32lm22KmU7E8OPvyy68HTNkI2iV/c8y2kSPSm2mW/Q9Q==",
       "cpu": [
         "loong64"
       ],
@@ -1843,9 +1843,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-ppc64-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz",
-      "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.58.0.tgz",
+      "integrity": "sha512-dpz8vT0i+JqUKuSNPCP5SYyIV2Lh0sNL1+FhM7eLC457d5B9/BC3kDPp5BBftMmTNsBarcPcoz5UGSsnCiw4XQ==",
       "cpu": [
         "ppc64"
       ],
@@ -1857,9 +1857,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-ppc64-musl": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz",
-      "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.58.0.tgz",
+      "integrity": "sha512-4gdkkf9UJ7tafnweBCR/mk4jf3Jfl0cKX9Np80t5i78kjIH0ZdezUv/JDI2VtruE5lunfACqftJ8dIMGN4oHew==",
       "cpu": [
         "ppc64"
       ],
@@ -1871,9 +1871,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-riscv64-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz",
-      "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.58.0.tgz",
+      "integrity": "sha512-YFS4vPnOkDTD/JriUeeZurFYoJhPf9GQQEF/v4lltp3mVcBmnsAdjEWhr2cjUCZzZNzxCG0HZOvJU44UGHSdzw==",
       "cpu": [
         "riscv64"
       ],
@@ -1885,9 +1885,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-riscv64-musl": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz",
-      "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.58.0.tgz",
+      "integrity": "sha512-x2xgZlFne+QVNKV8b4wwaCS8pwq3y14zedZ5DqLzjdRITvreBk//4Knbcvm7+lWmms9V9qFp60MtUd0/t/PXPw==",
       "cpu": [
         "riscv64"
       ],
@@ -1899,9 +1899,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-s390x-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz",
-      "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.58.0.tgz",
+      "integrity": "sha512-jIhrujyn4UnWF8S+DHSkAkDEO3hLX0cjzxJZPLF80xFyzyUIYgSMRcYQ3+uqEoyDD2beGq7Dj7edi8OnJcS/hg==",
       "cpu": [
         "s390x"
       ],
@@ -1913,9 +1913,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-x64-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz",
-      "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.58.0.tgz",
+      "integrity": "sha512-+410Srdoh78MKSJxTQ+hZ/Mx+ajd6RjjPwBPNd0R3J9FtL6ZA0GqiiyNjCO9In0IzZkCNrpGymSfn+kgyPQocg==",
       "cpu": [
         "x64"
       ],
@@ -1927,9 +1927,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-x64-musl": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz",
-      "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.58.0.tgz",
+      "integrity": "sha512-ZjMyby5SICi227y1MTR3VYBpFTdZs823Rs/hpakufleBoufoOIB6jtm9FEoxn/cgO7l6PM2rCEl5Kre5vX0QrQ==",
       "cpu": [
         "x64"
       ],
@@ -1941,9 +1941,9 @@
       ]
     },
     "node_modules/@rollup/rollup-openbsd-x64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz",
-      "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.58.0.tgz",
+      "integrity": "sha512-ds4iwfYkSQ0k1nb8LTcyXw//ToHOnNTJtceySpL3fa7tc/AsE+UpUFphW126A6fKBGJD5dhRvg8zw1rvoGFxmw==",
       "cpu": [
         "x64"
       ],
@@ -1955,9 +1955,9 @@
       ]
     },
     "node_modules/@rollup/rollup-openharmony-arm64": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz",
-      "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.58.0.tgz",
+      "integrity": "sha512-fd/zpJniln4ICdPkjWFhZYeY/bpnaN9pGa6ko+5WD38I0tTqk9lXMgXZg09MNdhpARngmxiCg0B0XUamNw/5BQ==",
       "cpu": [
         "arm64"
       ],
@@ -1969,9 +1969,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-arm64-msvc": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz",
-      "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.58.0.tgz",
+      "integrity": "sha512-YpG8dUOip7DCz3nr/JUfPbIUo+2d/dy++5bFzgi4ugOGBIox+qMbbqt/JoORwvI/C9Kn2tz6+Bieoqd5+B1CjA==",
       "cpu": [
         "arm64"
       ],
@@ -1983,9 +1983,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-ia32-msvc": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz",
-      "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.58.0.tgz",
+      "integrity": "sha512-b9DI8jpFQVh4hIXFr0/+N/TzLdpBIoPzjt0Rt4xJbW3mzguV3mduR9cNgiuFcuL/TeORejJhCWiAXe3E/6PxWA==",
       "cpu": [
         "ia32"
       ],
@@ -1997,9 +1997,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-x64-gnu": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz",
-      "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.58.0.tgz",
+      "integrity": "sha512-CSrVpmoRJFN06LL9xhkitkwUcTZtIotYAF5p6XOR2zW0Zz5mzb3IPpcoPhB02frzMHFNo1reQ9xSF5fFm3hUsQ==",
       "cpu": [
         "x64"
       ],
@@ -2011,9 +2011,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-x64-msvc": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz",
-      "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.58.0.tgz",
+      "integrity": "sha512-QFsBgQNTnh5K0t/sBsjJLq24YVqEIVkGpfN2VHsnN90soZyhaiA9UUHufcctVNL4ypJY0wrwad0wslx2KJQ1/w==",
       "cpu": [
         "x64"
       ],
@@ -2413,24 +2413,37 @@
         "typescript": ">=4.8.4 <6.0.0"
       }
     },
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/balanced-match": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.3.tgz",
+      "integrity": "sha512-1pHv8LX9CpKut1Zp4EXey7Z8OfH11ONNH6Dhi2WDUt31VVZFXZzKwXcysBgqSumFCmR+0dqjMK5v5JiFHzi0+g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
     "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
-      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.2.tgz",
+      "integrity": "sha512-Pdk8c9poy+YhOgVWw1JNN22/HcivgKWwpxKq04M/jTmHyCZn12WPJebZxdjSa5TmBqISrUSgNYU3eRORljfCCw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "balanced-match": "^1.0.0"
+        "balanced-match": "^4.0.2"
+      },
+      "engines": {
+        "node": "20 || >=22"
       }
     },
     "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": {
-      "version": "9.0.9",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
-      "integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
+      "version": "9.0.6",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.6.tgz",
+      "integrity": "sha512-kQAVowdR33euIqeA0+VZTDqU+qo1IeVY+hrKYtZMio3Pg0P0vuh/kwRylLUddJhB6pf3q/botcOvRtx4IN1wqQ==",
       "dev": true,
       "license": "ISC",
       "dependencies": {
-        "brace-expansion": "^2.0.2"
+        "brace-expansion": "^5.0.2"
       },
       "engines": {
         "node": ">=16 || 14 >=14.17"
@@ -4537,9 +4550,9 @@
       }
     },
     "node_modules/minimatch": {
-      "version": "3.1.5",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
-      "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.3.tgz",
+      "integrity": "sha512-M2GCs7Vk83NxkUyQV1bkABc4yxgz9kILhHImZiBPAZ9ybuvCb0/H7lEl5XvIg3g+9d4eNotkZA5IWwYl0tibaA==",
       "dev": true,
       "license": "ISC",
       "dependencies": {
@@ -5067,9 +5080,9 @@
       }
     },
     "node_modules/rollup": {
-      "version": "4.59.0",
-      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz",
-      "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==",
+      "version": "4.58.0",
+      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.58.0.tgz",
+      "integrity": "sha512-wbT0mBmWbIvvq8NeEYWWvevvxnOyhKChir47S66WCxw1SXqhw7ssIYejnQEVt7XYQpsj2y8F9PM+Cr3SNEa0gw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -5083,31 +5096,31 @@
         "npm": ">=8.0.0"
       },
       "optionalDependencies": {
-        "@rollup/rollup-android-arm-eabi": "4.59.0",
-        "@rollup/rollup-android-arm64": "4.59.0",
-        "@rollup/rollup-darwin-arm64": "4.59.0",
-        "@rollup/rollup-darwin-x64": "4.59.0",
-        "@rollup/rollup-freebsd-arm64": "4.59.0",
-        "@rollup/rollup-freebsd-x64": "4.59.0",
-        "@rollup/rollup-linux-arm-gnueabihf": "4.59.0",
-        "@rollup/rollup-linux-arm-musleabihf": "4.59.0",
-        "@rollup/rollup-linux-arm64-gnu": "4.59.0",
-        "@rollup/rollup-linux-arm64-musl": "4.59.0",
-        "@rollup/rollup-linux-loong64-gnu": "4.59.0",
-        "@rollup/rollup-linux-loong64-musl": "4.59.0",
-        "@rollup/rollup-linux-ppc64-gnu": "4.59.0",
-        "@rollup/rollup-linux-ppc64-musl": "4.59.0",
-        "@rollup/rollup-linux-riscv64-gnu": "4.59.0",
-        "@rollup/rollup-linux-riscv64-musl": "4.59.0",
-        "@rollup/rollup-linux-s390x-gnu": "4.59.0",
-        "@rollup/rollup-linux-x64-gnu": "4.59.0",
-        "@rollup/rollup-linux-x64-musl": "4.59.0",
-        "@rollup/rollup-openbsd-x64": "4.59.0",
-        "@rollup/rollup-openharmony-arm64": "4.59.0",
-        "@rollup/rollup-win32-arm64-msvc": "4.59.0",
-        "@rollup/rollup-win32-ia32-msvc": "4.59.0",
-        "@rollup/rollup-win32-x64-gnu": "4.59.0",
-        "@rollup/rollup-win32-x64-msvc": "4.59.0",
+        "@rollup/rollup-android-arm-eabi": "4.58.0",
+        "@rollup/rollup-android-arm64": "4.58.0",
+        "@rollup/rollup-darwin-arm64": "4.58.0",
+        "@rollup/rollup-darwin-x64": "4.58.0",
+        "@rollup/rollup-freebsd-arm64": "4.58.0",
+        "@rollup/rollup-freebsd-x64": "4.58.0",
+        "@rollup/rollup-linux-arm-gnueabihf": "4.58.0",
+        "@rollup/rollup-linux-arm-musleabihf": "4.58.0",
+        "@rollup/rollup-linux-arm64-gnu": "4.58.0",
+        "@rollup/rollup-linux-arm64-musl": "4.58.0",
+        "@rollup/rollup-linux-loong64-gnu": "4.58.0",
+        "@rollup/rollup-linux-loong64-musl": "4.58.0",
+        "@rollup/rollup-linux-ppc64-gnu": "4.58.0",
+        "@rollup/rollup-linux-ppc64-musl": "4.58.0",
+        "@rollup/rollup-linux-riscv64-gnu": "4.58.0",
+        "@rollup/rollup-linux-riscv64-musl": "4.58.0",
+        "@rollup/rollup-linux-s390x-gnu": "4.58.0",
+        "@rollup/rollup-linux-x64-gnu": "4.58.0",
+        "@rollup/rollup-linux-x64-musl": "4.58.0",
+        "@rollup/rollup-openbsd-x64": "4.58.0",
+        "@rollup/rollup-openharmony-arm64": "4.58.0",
+        "@rollup/rollup-win32-arm64-msvc": "4.58.0",
+        "@rollup/rollup-win32-ia32-msvc": "4.58.0",
+        "@rollup/rollup-win32-x64-gnu": "4.58.0",
+        "@rollup/rollup-win32-x64-msvc": "4.58.0",
         "fsevents": "~2.3.2"
       }
     },
diff --git a/frontend/src/App.test.tsx b/frontend/src/App.test.tsx
index d8472cd..4ddde49 100644
--- a/frontend/src/App.test.tsx
+++ b/frontend/src/App.test.tsx
@@ -1,4 +1,4 @@
-import { render, screen } from "@testing-library/react";
+import { act, render, screen, waitFor } from "@testing-library/react";
 import { MemoryRouter } from "react-router";
 import { describe, expect, it, vi } from "vitest";
 
@@ -22,24 +22,37 @@ vi.mock("./stores/auth", () => ({
   ),
 }));
 
+// Keep App route test deterministic without Landing async effects.
+vi.mock("./pages/Landing", () => ({
+  Landing: () => <div>BR-ACC</div>,
+}));
+
 import { App } from "./App";
 
 describe("App", () => {
-  it("renders the landing page with title", () => {
-    render(
-      <MemoryRouter>
-        <App />
-      </MemoryRouter>,
-    );
-    expect(screen.getAllByText("BRACC").length).toBeGreaterThan(0);
+  it("renders the landing page with title", async () => {
+    await act(async () => {
+      render(
+        <MemoryRouter>
+          <App />
+        </MemoryRouter>,
+      );
+    });
+    await waitFor(() => {
+      expect(screen.getAllByText("BR-ACC").length).toBeGreaterThan(0);
+    });
   });
 
-  it("renders login page at /login", () => {
-    render(
-      <MemoryRouter initialEntries={["/login"]}>
-        <App />
-      </MemoryRouter>,
-    );
-    expect(screen.getByLabelText(/e-mail/i)).toBeInTheDocument();
+  it("renders login page at /login", async () => {
+    await act(async () => {
+      render(
+        <MemoryRouter initialEntries={["/login"]}>
+          <App />
+        </MemoryRouter>,
+      );
+    });
+    await waitFor(() => {
+      expect(screen.getByLabelText(/e-mail/i)).toBeInTheDocument();
+    });
   });
 });
diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts
index a0ba6b3..a7ea25d 100644
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -12,14 +12,13 @@ export class ApiError extends Error {
 
 export async function apiFetch<T>(path: string, init?: RequestInit): Promise<T> {
   const url = `${API_BASE}${path}`;
-  const headers = new Headers(init?.headers);
-  if (!headers.has("content-type")) {
-    headers.set("Content-Type", "application/json");
-  }
   const response = await fetch(url, {
     credentials: "include",
     ...init,
-    headers,
+    headers: {
+      "Content-Type": "application/json",
+      ...init?.headers,
+    },
   });
 
   if (!response.ok) {
@@ -33,6 +32,24 @@ export async function apiFetch<T>(path: string, init?: RequestInit): Promise<T>
   return response.json() as Promise<T>;
 }
 
+async function apiFetchBlob(path: string): Promise<Blob> {
+  const url = `${API_BASE}${path}`;
+  const response = await fetch(url, { credentials: "include" });
+
+  if (!response.ok) {
+    let detail = response.statusText;
+    try {
+      const err = await response.json();
+      detail = err.detail || detail;
+    } catch {
+      // response wasn't JSON
+    }
+    throw new ApiError(response.status, detail);
+  }
+
+  return response.blob();
+}
+
 export interface SourceAttribution {
   database: string;
   record_id?: string | null;
@@ -201,7 +218,7 @@ export interface Investigation {
   updated_at: string;
   entity_ids: string[];
   share_token: string | null;
-  share_expires_at?: string | null;
+  share_expires_at: string | null;
 }
 
 export interface InvestigationListResponse {
@@ -342,19 +359,22 @@ export function getSharedInvestigation(token: string): Promise<Investigation> {
 
 export function generateShareLink(
   investigationId: string,
-): Promise<{ share_token: string; share_expires_at?: string | null }> {
-  return apiFetch<{ share_token: string; share_expires_at?: string | null }>(
+): Promise<{ share_token: string; share_expires_at: string }> {
+  return apiFetch<{ share_token: string; share_expires_at: string }>(
     `/api/v1/investigations/${encodeURIComponent(investigationId)}/share`,
     { method: "POST" },
   );
 }
 
+export function revokeShareLink(investigationId: string): Promise<void> {
+  return apiFetch<void>(
+    `/api/v1/investigations/${encodeURIComponent(investigationId)}/share`,
+    { method: "DELETE" },
+  );
+}
+
 export function exportInvestigation(investigationId: string): Promise<Blob> {
-  const url = `${API_BASE}/api/v1/investigations/${encodeURIComponent(investigationId)}/export`;
-  return fetch(url, { credentials: "include" }).then((res) => {
-    if (!res.ok) throw new ApiError(res.status, `API error: ${res.statusText}`);
-    return res.blob();
-  });
+  return apiFetchBlob(`/api/v1/investigations/${encodeURIComponent(investigationId)}/export`);
 }
 
 // --- Stats ---
@@ -444,9 +464,7 @@ export function exportInvestigationPDF(
   lang = "pt",
 ): Promise<Blob> {
   const params = new URLSearchParams({ lang });
-  const url = `${API_BASE}/api/v1/investigations/${encodeURIComponent(investigationId)}/export/pdf?${params}`;
-  return fetch(url, { credentials: "include" }).then((res) => {
-    if (!res.ok) throw new ApiError(res.status, `API error: ${res.statusText}`);
-    return res.blob();
-  });
+  return apiFetchBlob(
+    `/api/v1/investigations/${encodeURIComponent(investigationId)}/export/pdf?${params}`,
+  );
 }
diff --git a/frontend/src/components/common/AppShell.tsx b/frontend/src/components/common/AppShell.tsx
index 852249e..3b46cfe 100644
--- a/frontend/src/components/common/AppShell.tsx
+++ b/frontend/src/components/common/AppShell.tsx
@@ -144,7 +144,7 @@ export function AppShell() {
       <nav className={`${styles.sidebar} ${sidebarCollapsed ? styles.collapsed : ""}`}>
         <div className={styles.sidebarHeader}>
           <Link to="/app" className={styles.logo}>
-            {sidebarCollapsed ? "I" : "BRACC"}
+            {sidebarCollapsed ? "B" : "BR-ACC"}
           </Link>
         </div>
 
diff --git a/frontend/src/components/common/PublicShell.tsx b/frontend/src/components/common/PublicShell.tsx
index f9973d4..e1b8f4b 100644
--- a/frontend/src/components/common/PublicShell.tsx
+++ b/frontend/src/components/common/PublicShell.tsx
@@ -9,7 +9,6 @@ import styles from "./PublicShell.module.css";
 export function PublicShell() {
   const { t, i18n } = useTranslation();
   const token = useAuthStore((s) => s.token);
-  const restored = useAuthStore((s) => s.restored);
 
   const toggleLang = () => {
     const next = i18n.language === "pt-BR" ? "en" : "pt-BR";
@@ -27,7 +26,7 @@ export function PublicShell() {
             <Link to="/app/search" className={styles.registerLink}>
               Open Explorer
             </Link>
-          ) : restored && !token && (
+          ) : !token && (
             <>
               <Link to="/login" className={styles.authLink}>
                 {t("nav.login")}
diff --git a/frontend/src/components/investigation/InvestigationPanel.test.tsx b/frontend/src/components/investigation/InvestigationPanel.test.tsx
index 4ff2dcb..5d6e591 100644
--- a/frontend/src/components/investigation/InvestigationPanel.test.tsx
+++ b/frontend/src/components/investigation/InvestigationPanel.test.tsx
@@ -44,6 +44,7 @@ describe("InvestigationPanel", () => {
         updated_at: "2026-01-01T00:00:00Z",
         entity_ids: ["e1", "e2"],
         share_token: null,
+        share_expires_at: null,
       },
     ];
 
diff --git a/frontend/src/i18n.ts b/frontend/src/i18n.ts
index bdf6996..ce7aacc 100644
--- a/frontend/src/i18n.ts
+++ b/frontend/src/i18n.ts
@@ -5,7 +5,7 @@ const resources = {
   "pt-BR": {
     translation: {
       app: {
-        title: "BRACC",
+        title: "BR-ACC",
         subtitle: "Plataforma de inteligência em dados públicos brasileiros",
         disclaimer:
           "Dados de registros públicos. Não constitui acusação.",
@@ -14,7 +14,7 @@ const resources = {
         hero: "Siga o dinheiro público",
         heroSubtitle:
           "87 milhões de conexões entre empresas, políticos e contratos. 13 fontes governamentais. Dados abertos.",
-        badge: "BRACC · Dados abertos",
+        badge: "BR-ACC · Dados abertos",
         cta: "Explorar o grafo",
         stats: {
           entities: "entidades",
@@ -23,7 +23,7 @@ const resources = {
         },
         features: {
           sectionLabel: "Capacidades",
-          sectionHeading: "O que o BRACC revela",
+          sectionHeading: "O que o BR-ACC revela",
           graph: "Mapeie qualquer conexão",
           graphDesc:
             "Navegue por 87 milhões de entidades — empresas, pessoas, contratos, doações, sanções — em um grafo interativo.",
@@ -100,7 +100,7 @@ const resources = {
           senado_cpis: "Senado — CPIs",
         },
         footer: {
-          brand: "BRACC v1.0",
+          brand: "BR-ACC v1.0",
           license: "AGPL-3.0",
           platform: "Plataforma",
           methodology: "Metodologia",
@@ -142,7 +142,7 @@ const resources = {
         registerError: "Erro ao registrar. Tente novamente.",
         invalidCredentials: "E-mail ou senha incorretos.",
         invalidInvite: "Código de convite inválido.",
-        loginTitle: "Acessar BRACC",
+        loginTitle: "Acessar BR-ACC",
         registerTitle: "Criar conta",
         loginSubtitle: "Plataforma de inteligência em dados públicos",
       },
@@ -411,7 +411,7 @@ const resources = {
         error: "Erro ao executar ação.",
       },
       mobile: {
-        title: "BRACC",
+        title: "BR-ACC",
         message: "Esta plataforma requer resolução mínima de 1024px para análise de grafos.",
         hint: "Acesse em um computador para a experiência completa.",
       },
@@ -432,7 +432,7 @@ const resources = {
         retry: "Tentar novamente",
       },
       error: {
-        title: "BRACC",
+        title: "BR-ACC",
         message: "Algo deu errado. Recarregue a página.",
         reload: "Recarregar",
       },
@@ -441,7 +441,7 @@ const resources = {
   en: {
     translation: {
       app: {
-        title: "BRACC",
+        title: "BR-ACC",
         subtitle: "Brazilian public data intelligence platform",
         disclaimer:
           "Data patterns from public records. Not accusations.",
@@ -450,7 +450,7 @@ const resources = {
         hero: "Follow the public money",
         heroSubtitle:
           "87 million connections between companies, politicians, and contracts. 13 government sources. Open data.",
-        badge: "BRACC · Open data",
+        badge: "BR-ACC · Open data",
         cta: "Explore the graph",
         stats: {
           entities: "entities",
@@ -459,7 +459,7 @@ const resources = {
         },
         features: {
           sectionLabel: "Capabilities",
-          sectionHeading: "What BRACC reveals",
+          sectionHeading: "What BR-ACC reveals",
           graph: "Map any connection",
           graphDesc:
             "Navigate 87 million entities — companies, people, contracts, donations, sanctions — in an interactive graph.",
@@ -536,7 +536,7 @@ const resources = {
           senado_cpis: "Senate — Parliamentary inquiry committees",
         },
         footer: {
-          brand: "BRACC v1.0",
+          brand: "BR-ACC v1.0",
           license: "AGPL-3.0",
           platform: "Platform",
           methodology: "Methodology",
@@ -578,7 +578,7 @@ const resources = {
         registerError: "Registration failed. Please try again.",
         invalidCredentials: "Invalid email or password.",
         invalidInvite: "Invalid invite code.",
-        loginTitle: "Access BRACC",
+        loginTitle: "Access BR-ACC",
         registerTitle: "Create account",
         loginSubtitle: "Public data intelligence platform",
       },
@@ -847,7 +847,7 @@ const resources = {
         error: "Action failed.",
       },
       mobile: {
-        title: "BRACC",
+        title: "BR-ACC",
         message: "This platform requires a minimum resolution of 1024px for graph analysis.",
         hint: "Access from a computer for the full experience.",
       },
@@ -868,7 +868,7 @@ const resources = {
         retry: "Retry",
       },
       error: {
-        title: "BRACC",
+        title: "BR-ACC",
         message: "Something went wrong. Please reload the page.",
         reload: "Reload",
       },
diff --git a/frontend/src/pages/GraphExplorer.test.tsx b/frontend/src/pages/GraphExplorer.test.tsx
index 45d9c43..2de33e6 100644
--- a/frontend/src/pages/GraphExplorer.test.tsx
+++ b/frontend/src/pages/GraphExplorer.test.tsx
@@ -31,31 +31,28 @@ vi.mock("@/hooks/useGraphData", () => ({
 }));
 
 // Mock graphExplorer store
-const mockGraphExplorerState = {
-  depth: 1,
-  enabledTypes: new Set<string>(),
-  enabledRelTypes: new Set<string>(),
-  selectedNodeIds: new Set<string>(),
-  sidebarCollapsed: false,
-  detailPanelOpen: false,
-  hoveredNodeId: null,
-  hiddenNodeIds: new Set<string>(),
-  layoutMode: "force" as const,
-  reset: vi.fn(),
-  toggleSidebar: vi.fn(),
-  setDepth: vi.fn(),
-  toggleType: vi.fn(),
-  toggleRelType: vi.fn(),
-  selectNode: vi.fn(),
-  setHoveredNode: vi.fn(),
-  setContextMenu: vi.fn(),
-  setLayoutMode: vi.fn(),
-  toggleFullscreen: vi.fn(),
-};
-
 vi.mock("@/stores/graphExplorer", () => ({
-  useGraphExplorerStore: (selector?: (state: typeof mockGraphExplorerState) => unknown) =>
-    selector ? selector(mockGraphExplorerState) : mockGraphExplorerState,
+  useGraphExplorerStore: () => ({
+    depth: 1,
+    enabledTypes: new Set<string>(),
+    enabledRelTypes: new Set<string>(),
+    selectedNodeIds: new Set<string>(),
+    sidebarCollapsed: false,
+    detailPanelOpen: false,
+    hoveredNodeId: null,
+    hiddenNodeIds: new Set<string>(),
+    layoutMode: "force" as const,
+    reset: vi.fn(),
+    toggleSidebar: vi.fn(),
+    setDepth: vi.fn(),
+    toggleType: vi.fn(),
+    toggleRelType: vi.fn(),
+    selectNode: vi.fn(),
+    setHoveredNode: vi.fn(),
+    setContextMenu: vi.fn(),
+    setLayoutMode: vi.fn(),
+    toggleFullscreen: vi.fn(),
+  }),
 }));
 
 import { GraphExplorer } from "./GraphExplorer";
diff --git a/frontend/src/pages/GraphExplorer.tsx b/frontend/src/pages/GraphExplorer.tsx
index eede594..9991b7c 100644
--- a/frontend/src/pages/GraphExplorer.tsx
+++ b/frontend/src/pages/GraphExplorer.tsx
@@ -16,9 +16,8 @@ export function GraphExplorer() {
   const { t } = useTranslation();
   const { entityId } = useParams<{ entityId: string }>();
 
-  const reset = useGraphExplorerStore((s) => s.reset);
   const store = useGraphExplorerStore();
-  const { depth, enabledTypes, enabledRelTypes, selectedNodeIds, sidebarCollapsed, detailPanelOpen } = store;
+  const { depth, enabledTypes, enabledRelTypes, selectedNodeIds, sidebarCollapsed, detailPanelOpen, reset } = store;
 
   const { data, loading, error } = useGraphData(entityId, depth);
 
diff --git a/frontend/src/pages/Landing.test.tsx b/frontend/src/pages/Landing.test.tsx
index 2717409..147a508 100644
--- a/frontend/src/pages/Landing.test.tsx
+++ b/frontend/src/pages/Landing.test.tsx
@@ -1,4 +1,4 @@
-import { render, screen, waitFor } from "@testing-library/react";
+import { act, render, screen, waitFor } from "@testing-library/react";
 import { MemoryRouter } from "react-router";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 
@@ -56,21 +56,33 @@ describe("Landing", () => {
     });
   });
 
-  it("renders without crashing", () => {
-    renderLanding();
+  it("renders without crashing", async () => {
+    await act(async () => {
+      renderLanding();
+    });
     // Hero heading should be rendered
     expect(screen.getByText("Siga o dinheiro p\u00FAblico")).toBeInTheDocument();
+    await waitFor(() => {
+      expect(screen.getByText("58.5M")).toBeInTheDocument();
+    });
   });
 
-  it("shows key heading text and CTA", () => {
-    renderLanding();
+  it("shows key heading text and CTA", async () => {
+    await act(async () => {
+      renderLanding();
+    });
     expect(screen.getByText("Siga o dinheiro p\u00FAblico")).toBeInTheDocument();
     expect(screen.getByText("Explorar o grafo")).toBeInTheDocument();
-    expect(screen.getByText("BRACC \u00B7 Dados abertos")).toBeInTheDocument();
+    expect(screen.getByText("BR-ACC \u00B7 Dados abertos")).toBeInTheDocument();
+    await waitFor(() => {
+      expect(screen.getByText("58.5M")).toBeInTheDocument();
+    });
   });
 
   it("shows all 13 data sources", async () => {
-    renderLanding();
+    await act(async () => {
+      renderLanding();
+    });
 
     await waitFor(() => {
       expect(screen.getByText("CNPJ")).toBeInTheDocument();
diff --git a/frontend/src/pages/SharedInvestigation.test.tsx b/frontend/src/pages/SharedInvestigation.test.tsx
index 9286687..44cebed 100644
--- a/frontend/src/pages/SharedInvestigation.test.tsx
+++ b/frontend/src/pages/SharedInvestigation.test.tsx
@@ -44,6 +44,7 @@ describe("SharedInvestigation", () => {
       updated_at: "2026-01-01T00:00:00Z",
       entity_ids: ["e1", "e2"],
       share_token: "abc-123",
+      share_expires_at: "2026-01-08T00:00:00Z",
     });
 
     renderSharedInvestigation();
diff --git a/frontend/src/stores/auth.test.ts b/frontend/src/stores/auth.test.ts
index b29d154..9309830 100644
--- a/frontend/src/stores/auth.test.ts
+++ b/frontend/src/stores/auth.test.ts
@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
 import { ApiError } from "@/api/client";
 
+// Must use vi.hoisted so the ref exists when vi.mock factory runs (hoisted)
 const { mockApiFetch } = vi.hoisted(() => ({
   mockApiFetch: vi.fn(),
 }));
@@ -35,7 +36,7 @@ describe("useAuthStore", () => {
     vi.restoreAllMocks();
   });
 
-  it("login success sets token and user", async () => {
+  it("login success sets token, user, and restored flag", async () => {
     const tokenRes = { access_token: "jwt-123", token_type: "bearer" };
     const userRes = {
       id: "u1",
@@ -44,8 +45,8 @@ describe("useAuthStore", () => {
     };
 
     mockApiFetch
-      .mockResolvedValueOnce(tokenRes)
-      .mockResolvedValueOnce(userRes);
+      .mockResolvedValueOnce(tokenRes) // login
+      .mockResolvedValueOnce(userRes); // /auth/me
 
     await useAuthStore.getState().login("test@example.com", "password123");
 
@@ -55,7 +56,6 @@ describe("useAuthStore", () => {
     expect(state.loading).toBe(false);
     expect(state.error).toBeNull();
     expect(state.restored).toBe(true);
-    expect(mockApiFetch).toHaveBeenNthCalledWith(2, "/api/v1/auth/me");
   });
 
   it("login 401 sets auth.invalidCredentials error", async () => {
@@ -71,17 +71,16 @@ describe("useAuthStore", () => {
     expect(state.restored).toBe(true);
   });
 
-  it("login non-401 sets auth.loginError and marks restored", async () => {
-    mockApiFetch.mockRejectedValueOnce(new ApiError(500, "Server Error"));
+  it("login other error sets auth.loginError", async () => {
+    mockApiFetch.mockRejectedValueOnce(
+      new ApiError(500, "Internal Server Error"),
+    );
 
     await useAuthStore.getState().login("test@example.com", "password123");
 
     const state = useAuthStore.getState();
     expect(state.token).toBeNull();
-    expect(state.user).toBeNull();
-    expect(state.loading).toBe(false);
     expect(state.error).toBe("auth.loginError");
-    expect(state.restored).toBe(true);
   });
 
   it("register success auto-calls login and sets token", async () => {
@@ -93,9 +92,9 @@ describe("useAuthStore", () => {
     };
 
     mockApiFetch
-      .mockResolvedValueOnce(undefined)
-      .mockResolvedValueOnce(tokenRes)
-      .mockResolvedValueOnce(userRes);
+      .mockResolvedValueOnce(undefined) // register
+      .mockResolvedValueOnce(tokenRes) // login
+      .mockResolvedValueOnce(userRes); // /auth/me
 
     await useAuthStore
       .getState()
@@ -104,7 +103,8 @@ describe("useAuthStore", () => {
     const state = useAuthStore.getState();
     expect(state.token).toBe("jwt-reg");
     expect(state.user).toEqual(userRes);
-    expect(state.restored).toBe(true);
+
+    // First call was register
     expect(mockApiFetch).toHaveBeenCalledWith("/api/v1/auth/register", {
       method: "POST",
       body: JSON.stringify({
@@ -120,30 +120,27 @@ describe("useAuthStore", () => {
 
     await useAuthStore
       .getState()
-      .register("new@example.com", "password123", "invite-abc");
+      .register("new@example.com", "password123", "bad-invite");
 
     const state = useAuthStore.getState();
     expect(state.loading).toBe(false);
     expect(state.error).toBe("auth.invalidInvite");
-    expect(state.token).toBeNull();
-    expect(state.user).toBeNull();
   });
 
-  it("register non-403 sets auth.registerError", async () => {
-    mockApiFetch.mockRejectedValueOnce(new ApiError(500, "Server Error"));
+  it("register other error sets auth.registerError", async () => {
+    mockApiFetch.mockRejectedValueOnce(
+      new ApiError(500, "Internal Server Error"),
+    );
 
     await useAuthStore
       .getState()
       .register("new@example.com", "password123", "invite-abc");
 
     const state = useAuthStore.getState();
-    expect(state.loading).toBe(false);
     expect(state.error).toBe("auth.registerError");
-    expect(state.token).toBeNull();
-    expect(state.user).toBeNull();
   });
 
-  it("logout clears token and user and calls API logout", () => {
+  it("logout clears token and user, calls POST /logout", () => {
     useAuthStore.setState({
       token: "jwt-123",
       user: {
@@ -151,9 +148,10 @@ describe("useAuthStore", () => {
         email: "test@example.com",
         created_at: "2026-01-01T00:00:00Z",
       },
-      restored: true,
     });
 
+    mockApiFetch.mockResolvedValueOnce(undefined); // /logout
+
     useAuthStore.getState().logout();
 
     const state = useAuthStore.getState();
@@ -161,15 +159,18 @@ describe("useAuthStore", () => {
     expect(state.user).toBeNull();
     expect(state.error).toBeNull();
     expect(state.restored).toBe(true);
-    expect(mockApiFetch).toHaveBeenCalledWith("/api/v1/auth/logout", { method: "POST" });
+    expect(mockApiFetch).toHaveBeenCalledWith("/api/v1/auth/logout", {
+      method: "POST",
+    });
   });
 
-  it("restore success sets user and cookie-session token when empty", async () => {
+  it("restore success sets user and restored flag via cookie session", async () => {
     const userRes = {
       id: "u1",
       email: "test@example.com",
       created_at: "2026-01-01T00:00:00Z",
     };
+
     mockApiFetch.mockResolvedValueOnce(userRes);
 
     await useAuthStore.getState().restore();
@@ -181,25 +182,7 @@ describe("useAuthStore", () => {
     expect(mockApiFetch).toHaveBeenCalledWith("/api/v1/auth/me");
   });
 
-  it("restore preserves existing token when session is valid", async () => {
-    const userRes = {
-      id: "u1",
-      email: "test@example.com",
-      created_at: "2026-01-01T00:00:00Z",
-    };
-    useAuthStore.setState({ token: "jwt-123" });
-    mockApiFetch.mockResolvedValueOnce(userRes);
-
-    await useAuthStore.getState().restore();
-
-    const state = useAuthStore.getState();
-    expect(state.user).toEqual(userRes);
-    expect(state.token).toBe("jwt-123");
-    expect(state.restored).toBe(true);
-  });
-
-  it("restore failure clears token and user", async () => {
-    useAuthStore.setState({ token: "expired-jwt" });
+  it("restore failure clears token and user, sets restored", async () => {
     mockApiFetch.mockRejectedValueOnce(new ApiError(401, "Unauthorized"));
 
     await useAuthStore.getState().restore();
diff --git a/infra/docker-compose.prod.yml b/infra/docker-compose.prod.yml
index 5c9dccc..fa0c000 100644
--- a/infra/docker-compose.prod.yml
+++ b/infra/docker-compose.prod.yml
@@ -44,11 +44,6 @@ services:
       NEO4J_USER: neo4j
       NEO4J_PASSWORD: ${NEO4J_PASSWORD}
       JWT_SECRET_KEY: ${JWT_SECRET_KEY}
-      APP_ENV: prod
-      AUTH_COOKIE_SECURE: "true"
-      AUTH_COOKIE_SAMESITE: ${AUTH_COOKIE_SAMESITE:-lax}
-      TRUST_PROXY_HEADERS: "true"
-      SHARE_TOKEN_TTL_HOURS: ${SHARE_TOKEN_TTL_HOURS:-168}
       INVITE_CODE: ${INVITE_CODE:-}
       CORS_ORIGINS: https://${DOMAIN}
       LOG_LEVEL: ${LOG_LEVEL:-info}
diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml
index fe2032b..1e4a534 100644
--- a/infra/docker-compose.yml
+++ b/infra/docker-compose.yml
@@ -1,7 +1,7 @@
 services:
   neo4j:
-    image: neo4j:5-community
     container_name: bracc-neo4j
+    image: neo4j:5-community
     ports:
       - "7474:7474"
       - "7687:7687"
@@ -30,13 +30,16 @@ services:
       NEO4J_URI: bolt://neo4j:7687
       NEO4J_USER: neo4j
       NEO4J_PASSWORD: ${NEO4J_PASSWORD}
-      APP_ENV: dev
-      AUTH_COOKIE_SECURE: "false"
-      TRUST_PROXY_HEADERS: "false"
       LOG_LEVEL: ${LOG_LEVEL:-info}
     depends_on:
       neo4j:
         condition: service_healthy
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 15s
 
   frontend:
     build: ../frontend
@@ -45,7 +48,8 @@ services:
     environment:
       VITE_API_URL: http://localhost:8000
     depends_on:
-      - api
+      api:
+        condition: service_healthy
 
   etl:
     build: ../etl
diff --git a/infra/neo4j/init.cypher b/infra/neo4j/init.cypher
index fb21e66..8e44897 100644
--- a/infra/neo4j/init.cypher
+++ b/infra/neo4j/init.cypher
@@ -1,4 +1,4 @@
-// BRACC Neo4j Schema — Constraints and Indexes
+// BR-ACC Neo4j Schema — Constraints and Indexes
 // Applied on database initialization
 
 // ── Uniqueness Constraints ──────────────────────────────
diff --git a/infra/scripts/deploy.sh b/infra/scripts/deploy.sh
index 7699b84..cd3e520 100755
--- a/infra/scripts/deploy.sh
+++ b/infra/scripts/deploy.sh
@@ -20,7 +20,7 @@ DOMAIN="${DOMAIN:-localhost}"
 
 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
 
-log "Deploying BRACC..."
+log "Deploying BR-ACC..."
 
 cd "$DEPLOY_DIR"
 
@@ -49,7 +49,7 @@ log "Waiting for health check..."
 if [ "$DRY_RUN" = false ]; then
     sleep 15
     HEALTH_URL="https://${DOMAIN}/health"
-    if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
+    if curl -sf -k "$HEALTH_URL" > /dev/null 2>&1; then
         log "Health check passed ($HEALTH_URL)."
     else
         log "Health check failed ($HEALTH_URL)!"
diff --git a/infra/scripts/healthcheck-cron.sh b/infra/scripts/healthcheck-cron.sh
index 7b20cd2..6894147 100755
--- a/infra/scripts/healthcheck-cron.sh
+++ b/infra/scripts/healthcheck-cron.sh
@@ -25,7 +25,7 @@ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Health check failed: $HEALTH_URL"
 if [ -n "$ALERT_WEBHOOK_URL" ]; then
     curl -sf -X POST "$ALERT_WEBHOOK_URL" \
         -H "Content-Type: application/json" \
-        -d "{\"text\":\"BRACC health check failed at $(date '+%Y-%m-%d %H:%M:%S') — ${HEALTH_URL}\"}" \
+        -d "{\"text\":\"BR-ACC health check failed at $(date '+%Y-%m-%d %H:%M:%S') — ${HEALTH_URL}\"}" \
         > /dev/null 2>&1 || true
 fi
 SCRIPT
diff --git a/infra/scripts/seed-dev.cypher b/infra/scripts/seed-dev.cypher
index 5882b69..83ed15a 100644
--- a/infra/scripts/seed-dev.cypher
+++ b/infra/scripts/seed-dev.cypher
@@ -1,4 +1,4 @@
-// BRACC Dev Seed Data
+// BR-ACC Dev Seed Data
 // Small fixture graph that exercises all 5 analysis patterns
 // Run: cypher-shell -f seed-dev.cypher
 
diff --git a/infra/scripts/seed-dev.sh b/infra/scripts/seed-dev.sh
index 9bad04c..2f4c245 100755
--- a/infra/scripts/seed-dev.sh
+++ b/infra/scripts/seed-dev.sh
@@ -15,17 +15,17 @@ echo "Seeding Neo4j at ${NEO4J_URI}..."
 
 export NEO4J_PASSWORD
 
-if command -v docker &>/dev/null && docker ps --format '{{.Names}}' | grep -q '^bracc-neo4j$'; then
-  docker exec -i -e NEO4J_PASSWORD="${NEO4J_PASSWORD}" bracc-neo4j cypher-shell \
-    -u "${NEO4J_USER}" \
-    -p "${NEO4J_PASSWORD}" \
-    < "${CYPHER_FILE}"
-elif command -v cypher-shell &>/dev/null; then
+if command -v cypher-shell &>/dev/null; then
   cypher-shell \
     -a "${NEO4J_URI}" \
     -u "${NEO4J_USER}" \
-    -p "${NEO4J_PASSWORD}" \
+    --env NEO4J_PASSWORD \
     -f "${CYPHER_FILE}"
+elif command -v docker &>/dev/null; then
+  docker exec -i -e NEO4J_PASSWORD="${NEO4J_PASSWORD}" bracc-neo4j cypher-shell \
+    -u "${NEO4J_USER}" \
+    --env NEO4J_PASSWORD \
+    < "${CYPHER_FILE}"
 else
   echo "Error: cypher-shell not found and docker not available."
   echo "Install cypher-shell or run 'docker compose up -d' first."
diff --git a/scripts/audit_openrouter.py b/scripts/audit_openrouter.py
deleted file mode 100644
index 6e33bda..0000000
--- a/scripts/audit_openrouter.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-"""Call OpenRouter API with bundled source files for audit."""
-
-import argparse
-import glob
-import os
-import sys
-from pathlib import Path
-
-import httpx
-
-
-def gather_files(patterns: list[str], base: Path) -> str:
-    """Resolve glob patterns and concatenate file contents."""
-    sections: list[str] = []
-    seen: set[Path] = set()
-    for pattern in patterns:
-        matches = sorted(glob.glob(str(base / pattern), recursive=True))
-        if not matches:
-            print(f"Warning: no files matched '{pattern}'", file=sys.stderr)
-        for m in matches:
-            p = Path(m).resolve()
-            if p in seen or not p.is_file():
-                continue
-            seen.add(p)
-            rel = p.relative_to(base)
-            try:
-                text = p.read_text(encoding="utf-8", errors="replace")
-            except Exception as e:
-                print(f"Warning: could not read {rel}: {e}", file=sys.stderr)
-                continue
-            sections.append(f"--- FILE: {rel} ---\n{text}")
-    return "\n\n".join(sections)
-
-
-def call_openrouter(system_prompt: str, user_prompt: str, model: str) -> str:
-    """Send chat completion to OpenRouter and return assistant content."""
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
-        print("Error: OPENROUTER_API_KEY env var not set", file=sys.stderr)
-        sys.exit(1)
-
-    with httpx.Client(timeout=300) as client:
-        resp = client.post(
-            "https://openrouter.ai/api/v1/chat/completions",
-            headers={
-                "Authorization": f"Bearer {api_key}",
-                "Content-Type": "application/json",
-                "HTTP-Referer": "https://github.com/bracc-project",
-            },
-            json={
-                "model": model,
-                "messages": [
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt},
-                ],
-                "max_tokens": 8192,
-                "temperature": 0.2,
-            },
-        )
-        resp.raise_for_status()
-        data = resp.json()
-
-    return data["choices"][0]["message"]["content"]
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Run audit via OpenRouter API")
-    parser.add_argument("--domain", required=True, help="Audit domain name")
-    parser.add_argument(
-        "--files",
-        required=True,
-        help="Comma-separated glob patterns relative to repo root",
-    )
-    parser.add_argument("--prompt-file", required=True, help="Path to prompt .md file")
-    parser.add_argument("--output", required=True, help="Output markdown file path")
-    parser.add_argument(
-        "--model",
-        default="google/gemini-2.5-pro",
-        help="OpenRouter model ID",
-    )
-    args = parser.parse_args()
-
-    base = Path(__file__).resolve().parent.parent
-    patterns = [p.strip() for p in args.files.split(",")]
-
-    print(f"[{args.domain}] Gathering files...")
-    source_bundle = gather_files(patterns, base)
-    if not source_bundle:
-        print(f"Error: no files gathered for {args.domain}", file=sys.stderr)
-        sys.exit(1)
-
-    prompt_text = Path(args.prompt_file).read_text(encoding="utf-8")
-
-    system_prompt = (
-        "You are a senior security and correctness auditor. "
-        "Analyze the provided source code and report findings in structured markdown."
-    )
-    user_prompt = f"{prompt_text}\n\n## Source Files\n\n{source_bundle}"
-
-    print(f"[{args.domain}] Calling {args.model} ({len(user_prompt):,} chars)...")
-    result = call_openrouter(system_prompt, user_prompt, args.model)
-
-    out_path = Path(args.output)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    out_path.write_text(f"# {args.domain} — Gemini Audit\n\n{result}\n", encoding="utf-8")
-    print(f"[{args.domain}] Done → {args.output}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/discover_public_sources.py b/scripts/discover_public_sources.py
deleted file mode 100644
index 43cc461..0000000
--- a/scripts/discover_public_sources.py
+++ /dev/null
@@ -1,277 +0,0 @@
-#!/usr/bin/env python3
-"""Discover public data sources and produce registry delta artifacts.
-
-This script is non-destructive by default: it never rewrites the canonical registry.
-"""
-
-from __future__ import annotations
-
-import argparse
-import csv
-import hashlib
-import html
-import json
-import re
-from dataclasses import dataclass
-from datetime import UTC, datetime
-from pathlib import Path
-from urllib.parse import urljoin, urlparse
-
-import httpx
-
-URL_RE = re.compile(r'href=["\']([^"\']+)["\']', re.IGNORECASE)
-SITEMAP_LOC_RE = re.compile(r"<loc>([^<]+)</loc>", re.IGNORECASE)
-RSS_LINK_RE = re.compile(r"<link>([^<]+)</link>", re.IGNORECASE)
-
-DISCOVERY_SEEDS = (
-    "https://dados.gov.br/dados/conjuntos-dados",
-    "https://www12.senado.leg.br/dados-abertos",
-    "https://dadosabertos.camara.leg.br/swagger/api.html",
-    "https://www.cnj.jus.br/sistemas/datajud/",
-    "https://api-publica.datajud.cnj.jus.br/",
-    "https://www.tesourotransparente.gov.br/",
-    "https://www.transferegov.sistema.gov.br/portal/download-de-dados",
-    "https://queridodiario.ok.org.br/api",
-    "https://basedosdados.org/dataset",
-)
-
-SIGNAL_KEYWORDS = (
-    "dados",
-    "dataset",
-    "download",
-    "api",
-    "transparencia",
-    "dados-abertos",
-    "open-data",
-    "csv",
-    "json",
-    "xml",
-    "parquet",
-    "sitemap",
-    "rss",
-)
-
-
-@dataclass(frozen=True)
-class RegistryRow:
-    source_id: str
-    primary_url: str
-    last_seen_url: str
-
-
-def _canonicalize(url: str) -> str:
-    parsed = urlparse(url.strip())
-    if parsed.scheme not in {"http", "https"}:
-        return ""
-    clean = parsed._replace(fragment="")
-    text = clean.geturl()
-    return text[:-1] if text.endswith("/") else text
-
-
-def _read_registry(path: Path) -> list[RegistryRow]:
-    rows: list[RegistryRow] = []
-    with path.open(encoding="utf-8", newline="") as fh:
-        reader = csv.DictReader(fh)
-        for row in reader:
-            rows.append(
-                RegistryRow(
-                    source_id=(row.get("source_id") or "").strip(),
-                    primary_url=(row.get("primary_url") or "").strip(),
-                    last_seen_url=(row.get("last_seen_url") or "").strip(),
-                )
-            )
-    return rows
-
-
-def _snapshot_path(output_dir: Path, url: str, content_type: str) -> Path:
-    suffix = ".txt"
-    lowered = content_type.lower()
-    if "html" in lowered:
-        suffix = ".html"
-    elif "xml" in lowered:
-        suffix = ".xml"
-    elif "json" in lowered:
-        suffix = ".json"
-    key = hashlib.sha256(url.encode("utf-8")).hexdigest()[:16]
-    return output_dir / "snapshots" / f"{key}{suffix}"
-
-
-def _extract_links(url: str, body: str, content_type: str) -> set[str]:
-    links: set[str] = set()
-    lowered = content_type.lower()
-    patterns = [URL_RE]
-    if "xml" in lowered:
-        patterns.extend([SITEMAP_LOC_RE, RSS_LINK_RE])
-    for pattern in patterns:
-        for match in pattern.findall(body):
-            joined = urljoin(url, html.unescape(match))
-            clean = _canonicalize(joined)
-            if not clean:
-                continue
-            if any(keyword in clean.lower() for keyword in SIGNAL_KEYWORDS):
-                links.add(clean)
-    return links
-
-
-def _fetch_all(
-    seeds: list[str],
-    output_dir: Path,
-    max_pages: int,
-    timeout_seconds: float,
-) -> tuple[dict[str, str], dict[str, str], set[str]]:
-    queue: list[str] = [_canonicalize(seed) for seed in seeds if _canonicalize(seed)]
-    seen: set[str] = set()
-    discovered: set[str] = set(queue)
-    content_map: dict[str, str] = {}
-    error_map: dict[str, str] = {}
-
-    output_snapshots = output_dir / "snapshots"
-    output_snapshots.mkdir(parents=True, exist_ok=True)
-
-    with httpx.Client(follow_redirects=True, timeout=timeout_seconds) as client:
-        while queue and len(seen) < max_pages:
-            current = queue.pop(0)
-            if current in seen:
-                continue
-            seen.add(current)
-            try:
-                response = client.get(current)
-                response.raise_for_status()
-                content_type = response.headers.get("content-type", "text/plain")
-                text = response.text
-                content_map[current] = content_type
-
-                snap = _snapshot_path(output_dir, current, content_type)
-                snap.write_text(text, encoding="utf-8")
-
-                for link in _extract_links(current, text, content_type):
-                    if link not in discovered:
-                        discovered.add(link)
-                    if link not in seen and len(seen) + len(queue) < max_pages:
-                        queue.append(link)
-            except Exception as exc:  # noqa: BLE001
-                error_map[current] = str(exc)
-
-    return content_map, error_map, discovered
-
-
-def _write_csv(path: Path, rows: list[dict[str, str]]) -> None:
-    if not rows:
-        path.write_text("", encoding="utf-8")
-        return
-    fieldnames = list(rows[0].keys())
-    with path.open("w", encoding="utf-8", newline="") as fh:
-        writer = csv.DictWriter(fh, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(rows)
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--registry-path",
-        default="docs/source_registry_br_v1.csv",
-        help="Path to source registry CSV.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        default=f"audit-results/discovery-{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}",
-        help="Output directory for discovery artifacts.",
-    )
-    parser.add_argument(
-        "--max-pages",
-        type=int,
-        default=250,
-        help="Max pages to crawl per run.",
-    )
-    parser.add_argument(
-        "--timeout-seconds",
-        type=float,
-        default=30.0,
-        help="HTTP timeout in seconds.",
-    )
-    args = parser.parse_args()
-
-    registry_path = Path(args.registry_path)
-    output_dir = Path(args.output_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    rows = _read_registry(registry_path)
-    known_urls: set[str] = set()
-    for row in rows:
-        primary = _canonicalize(row.primary_url)
-        last_seen = _canonicalize(row.last_seen_url)
-        if primary:
-            known_urls.add(primary)
-        if last_seen:
-            known_urls.add(last_seen)
-
-    seed_urls = list(DISCOVERY_SEEDS)
-    seed_urls.extend([row.primary_url for row in rows if row.primary_url])
-
-    content_map, error_map, discovered = _fetch_all(
-        seeds=seed_urls,
-        output_dir=output_dir,
-        max_pages=args.max_pages,
-        timeout_seconds=args.timeout_seconds,
-    )
-
-    discovered_only = sorted(url for url in discovered if url not in known_urls)
-    known_discovered = sorted(url for url in discovered if url in known_urls)
-
-    _write_csv(
-        output_dir / "discovered_urls.csv",
-        [
-            {
-                "url": url,
-                "is_known": "true" if url in known_urls else "false",
-                "content_type": content_map.get(url, ""),
-                "error": error_map.get(url, ""),
-            }
-            for url in sorted(discovered)
-        ],
-    )
-    _write_csv(
-        output_dir / "discovered_uningested_candidates.csv",
-        [{"url": url} for url in discovered_only],
-    )
-
-    summary = {
-        "timestamp_utc": datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ"),
-        "registry_path": str(registry_path),
-        "known_url_count": len(known_urls),
-        "discovered_url_count": len(discovered),
-        "known_discovered_count": len(known_discovered),
-        "discovered_uningested_count": len(discovered_only),
-        "errors_count": len(error_map),
-        "max_pages": args.max_pages,
-    }
-    (output_dir / "discovery_summary.json").write_text(
-        json.dumps(summary, ensure_ascii=True, indent=2) + "\n",
-        encoding="utf-8",
-    )
-
-    lines = [
-        "# Discovery Summary",
-        "",
-        f"- timestamp_utc: `{summary['timestamp_utc']}`",
-        f"- known_url_count: `{summary['known_url_count']}`",
-        f"- discovered_url_count: `{summary['discovered_url_count']}`",
-        f"- discovered_uningested_count: `{summary['discovered_uningested_count']}`",
-        f"- errors_count: `{summary['errors_count']}`",
-        "",
-        "## Files",
-        "",
-        "- `discovered_urls.csv`",
-        "- `discovered_uningested_candidates.csv`",
-        "- `discovery_summary.json`",
-        "- `snapshots/`",
-    ]
-    (output_dir / "discovery_summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
-
-    print(json.dumps(summary, ensure_ascii=True))
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/scripts/download_comprasnet.py b/scripts/download_comprasnet.py
deleted file mode 100644
index 014bc9b..0000000
--- a/scripts/download_comprasnet.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/usr/bin/env python3
-"""Download federal procurement contracts from PNCP API.
-
-Source: Portal Nacional de Contratações Públicas (pncp.gov.br)
-Data: Federal contracts (contratos) — distinct from Transparência convênios.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import sys
-import time
-from pathlib import Path
-
-import requests
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s %(levelname)s %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-BASE_URL = "https://pncp.gov.br/api/consulta/v1/contratos"
-# Smaller page size avoids oversized responses/timeouts on PNCP contracts API.
-PAGE_SIZE = 100
-OUTPUT_DIR = Path(__file__).resolve().parent.parent / "data" / "comprasnet"
-
-
-def fetch_page(
-    date_start: str,
-    date_end: str,
-    page: int,
-    retries: int = 3,
-) -> dict:
-    """Fetch a single page from the PNCP contracts API."""
-    params = {
-        "dataInicial": date_start,
-        "dataFinal": date_end,
-        "pagina": page,
-        "tamanhoPagina": PAGE_SIZE,
-    }
-    for attempt in range(retries):
-        try:
-            resp = requests.get(BASE_URL, params=params, timeout=(20, 30))
-            resp.raise_for_status()
-            return resp.json()
-        except (requests.RequestException, json.JSONDecodeError) as exc:
-            wait = 2 ** (attempt + 1)
-            logger.warning(
-                "Page %d attempt %d failed: %s — retrying in %ds",
-                page, attempt + 1, exc, wait,
-            )
-            time.sleep(wait)
-    logger.error("Page %d failed after %d retries, skipping", page, retries)
-    return {"data": []}
-
-
-def download_month(year: int, month: int) -> list[dict]:
-    """Download all contracts for a given month."""
-    # Calculate last day of month
-    from datetime import date, timedelta
-
-    last_day = (
-        date(
-            year + 1 if month == 12 else year,
-            1 if month == 12 else month + 1,
-            1,
-        )
-        - timedelta(days=1)
-    ).day
-
-    date_start = f"{year}{month:02d}01"
-    date_end = f"{year}{month:02d}{last_day:02d}"
-
-    logger.info("Fetching %s to %s...", date_start, date_end)
-
-    # Get first page to know total
-    first = fetch_page(date_start, date_end, 1)
-    total_records = first.get("totalRegistros", 0)
-    total_pages = first.get("totalPaginas", 0)
-
-    if not total_records:
-        logger.info("  No records for %d-%02d", year, month)
-        return []
-
-    logger.info(
-        "  %d records, %d pages for %d-%02d",
-        total_records, total_pages, year, month,
-    )
-
-    all_records = list(first.get("data", []))
-
-    for page in range(2, total_pages + 1):
-        if page % 10 == 0:
-            logger.info("  Page %d/%d...", page, total_pages)
-        data = fetch_page(date_start, date_end, page)
-        all_records.extend(data.get("data", []))
-        # Respect rate limits
-        time.sleep(0.2)
-
-    logger.info(
-        "  Downloaded %d records for %d-%02d",
-        len(all_records), year, month,
-    )
-    return all_records
-
-
-def main() -> None:
-    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-
-    years = [2024]
-    if len(sys.argv) > 1:
-        years = [int(y) for y in sys.argv[1:]]
-
-    total = 0
-    for year in years:
-        year_records: list[dict] = []
-        for month in range(1, 13):
-            out_file = OUTPUT_DIR / f"{year}_{month:02d}_contratos.json"
-            if out_file.exists():
-                existing = json.loads(out_file.read_text())
-                logger.info(
-                    "Skipping %d-%02d — already downloaded (%d records)",
-                    year, month, len(existing),
-                )
-                year_records.extend(existing)
-                continue
-
-            records = download_month(year, month)
-            out_file.write_text(json.dumps(records, ensure_ascii=False))
-            year_records.extend(records)
-
-        total += len(year_records)
-        logger.info(
-            "Year %d: %d total records downloaded", year, len(year_records),
-        )
-
-    logger.info("Grand total: %d contracts downloaded", total)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/download_datasus.py b/scripts/download_datasus.py
deleted file mode 100644
index 928e339..0000000
--- a/scripts/download_datasus.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-"""Download CNES health facility data from DATASUS Open Data API.
-
-Fetches all ~603K establishments using high-concurrency parallel requests.
-
-Usage:
-    python3 scripts/download_datasus.py
-"""
-
-from __future__ import annotations
-
-import csv
-import json
-import logging
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from pathlib import Path
-from urllib.request import Request, urlopen
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s %(levelname)s %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-API_BASE = "https://apidadosabertos.saude.gov.br/cnes/estabelecimentos"
-PAGE_SIZE = 20  # API maximum
-MAX_WORKERS = 50
-MAX_OFFSET = 604_000  # ~603K records total
-OUTPUT_DIR = Path(__file__).resolve().parent.parent / "data" / "datasus"
-
-FIELDS = [
-    "codigo_cnes",
-    "numero_cnpj_entidade",
-    "nome_razao_social",
-    "nome_fantasia",
-    "codigo_tipo_unidade",
-    "descricao_esfera_administrativa",
-    "codigo_municipio",
-    "codigo_uf",
-    "numero_cnpj",
-    "estabelecimento_faz_atendimento_ambulatorial_sus",
-    "estabelecimento_possui_atendimento_hospitalar",
-    "estabelecimento_possui_centro_cirurgico",
-    "descricao_natureza_juridica_estabelecimento",
-    "data_atualizacao",
-]
-
-
-def fetch_page(offset: int, retries: int = 3) -> list[dict]:
-    """Fetch a single page from the CNES API."""
-    url = f"{API_BASE}?limit={PAGE_SIZE}&offset={offset}"
-    for attempt in range(retries):
-        try:
-            req = Request(url, headers={"Accept": "application/json"})
-            with urlopen(req, timeout=60) as resp:
-                data = json.loads(resp.read().decode("utf-8"))
-                return data.get("estabelecimentos", [])
-        except Exception as e:
-            if attempt < retries - 1:
-                wait = 2 ** (attempt + 1)
-                time.sleep(wait)
-            else:
-                logger.error("Failed offset=%d after %d retries: %s", offset, retries, e)
-                return []
-    return []
-
-
-def main() -> None:
-    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-
-    outfile = OUTPUT_DIR / "cnes_all.csv"
-    if outfile.exists():
-        with open(outfile) as f:
-            existing = sum(1 for _ in f) - 1
-        if existing > 500_000:
-            logger.info("Found existing cnes_all.csv with %d records, skipping", existing)
-            return
-        logger.info("Found partial cnes_all.csv with %d records, re-downloading", existing)
-
-    offsets = list(range(0, MAX_OFFSET, PAGE_SIZE))
-    logger.info(
-        "Downloading ~%d records via %d API requests with %d workers...",
-        MAX_OFFSET, len(offsets), MAX_WORKERS,
-    )
-
-    start = time.time()
-    all_records: list[tuple[int, list[dict]]] = []
-    completed = 0
-    empty_streak = 0
-
-    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
-        futures = {pool.submit(fetch_page, off): off for off in offsets}
-        for future in as_completed(futures):
-            off = futures[future]
-            try:
-                page = future.result()
-                if page:
-                    all_records.append((off, page))
-                    empty_streak = 0
-                else:
-                    empty_streak += 1
-            except Exception as e:
-                logger.error("offset=%d: %s", off, e)
-
-            completed += 1
-            if completed % 1000 == 0:
-                elapsed = time.time() - start
-                records_so_far = sum(len(p) for _, p in all_records)
-                rate = records_so_far / elapsed if elapsed > 0 else 0
-                eta = (MAX_OFFSET - records_so_far) / rate / 60 if rate > 0 else 0
-                logger.info(
-                    "Progress: %d/%d requests, %d records, %.0f rec/s, ETA %.0f min",
-                    completed, len(offsets), records_so_far, rate, eta,
-                )
-
-    # Sort by offset to maintain order
-    all_records.sort(key=lambda x: x[0])
-
-    # Write CSV
-    total = 0
-    with open(outfile, "w", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(f, fieldnames=FIELDS, extrasaction="ignore")
-        writer.writeheader()
-        for _, page in all_records:
-            for record in page:
-                writer.writerow(record)
-                total += 1
-
-    elapsed = time.time() - start
-    logger.info("Downloaded %d records in %.1f minutes -> %s", total, elapsed / 60, outfile)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/download_dou.sh b/scripts/download_dou.sh
deleted file mode 100755
index 38af152..0000000
--- a/scripts/download_dou.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-# Download DOU (Diario Oficial da Uniao) gazette data from Querido Diário.
-#
-# The Querido Diário API (queridodiario.ok.org.br/api/gazettes) is behind
-# Cloudflare bot protection and cannot be accessed via curl/wget.
-#
-# Options to obtain data:
-#
-# 1. BROWSER EXPORT: Visit the Querido Diário website in a browser,
-#    search for gazettes, and use browser DevTools to capture the API
-#    response JSON. Save as data/dou/gazettes_<territory>.json
-#
-# 2. QUERIDO DIÁRIO DATA PACKAGE: The project publishes periodic data
-#    dumps. Check https://github.com/okfn-brasil/querido-diario for
-#    download links.
-#
-# 3. MANUAL CSV: Create a CSV with columns:
-#    date,territory_id,territory_name,edition,is_extra_edition,url,excerpt
-#
-# Expected output directory: data/dou/
-# Supported file formats: *.json (QD API format) or *.csv
-#
-# Example QD API JSON format:
-# {
-#   "gazettes": [
-#     {
-#       "date": "2024-01-15",
-#       "territory_id": "3550308",
-#       "territory_name": "São Paulo",
-#       "edition": "123",
-#       "is_extra_edition": false,
-#       "url": "https://...",
-#       "excerpts": ["...text mentioning CNPJs..."]
-#     }
-#   ]
-# }
-
-set -euo pipefail
-
-DATA_DIR="${1:-./data/dou}"
-mkdir -p "$DATA_DIR"
-
-echo "=== DOU Data Download ==="
-echo ""
-echo "The Querido Diario API is behind Cloudflare bot protection."
-echo "Automated download is not possible via curl/wget."
-echo ""
-echo "Please obtain gazette data manually using one of these methods:"
-echo ""
-echo "1. Browser: Visit https://queridodiario.ok.org.br"
-echo "   Search for gazettes, capture API responses from DevTools"
-echo "   Save JSON files to: $DATA_DIR/"
-echo ""
-echo "2. Data dumps: Check https://github.com/okfn-brasil/querido-diario"
-echo ""
-echo "3. Create CSV files with columns:"
-echo "   date,territory_id,territory_name,edition,is_extra_edition,url,excerpt"
-echo "   Save to: $DATA_DIR/"
-echo ""
-echo "Target directory: $DATA_DIR"
-echo ""
-
-# Check if any data already exists
-json_count=$(find "$DATA_DIR" -name "*.json" 2>/dev/null | wc -l | tr -d ' ')
-csv_count=$(find "$DATA_DIR" -name "*.csv" 2>/dev/null | wc -l | tr -d ' ')
-
-if [ "$json_count" -gt 0 ] || [ "$csv_count" -gt 0 ]; then
-    echo "Found existing data:"
-    echo "  JSON files: $json_count"
-    echo "  CSV files: $csv_count"
-else
-    echo "No data files found yet in $DATA_DIR"
-fi
diff --git a/scripts/ingestion_priority_gates.cypher b/scripts/ingestion_priority_gates.cypher
deleted file mode 100644
index 7b20c15..0000000
--- a/scripts/ingestion_priority_gates.cypher
+++ /dev/null
@@ -1,99 +0,0 @@
-// Ingestion Priority Gates (Phase-by-Phase)
-// Use in shadow before promote.
-
-// --- Freshness ---
-MATCH (b:Bid)
-WHERE b.source = 'pncp' AND b.date =~ '\\d{4}-\\d{2}-\\d{2}'
-RETURN max(date(b.date)) AS pncp_max_date;
-
-MATCH (b:Bid)
-WHERE b.source = 'pncp' AND b.date >= '2025-01-01' AND b.date < '2026-01-01'
-RETURN count(b) AS bid_2025_count;
-
-MATCH (c:Contract)
-WHERE c.source = 'comprasnet' AND c.date =~ '\\d{4}-\\d{2}-\\d{2}'
-RETURN max(date(c.date)) AS comprasnet_max_date;
-
-// --- CPMI/CPI coverage ---
-MATCH (i:Inquiry)
-RETURN count(i) AS inquiry_count;
-
-MATCH (i:Inquiry)
-WHERE toUpper(coalesce(i.name, '') + ' ' + coalesce(i.subject, '')) CONTAINS 'INSS'
-   OR toUpper(coalesce(i.name, '') + ' ' + coalesce(i.subject, '')) CONTAINS 'PREVID'
-RETURN count(i) AS inquiry_inss_or_previd_count;
-
-MATCH (r:InquiryRequirement)
-RETURN count(r) AS inquiry_requirement_count;
-
-MATCH (:Inquiry)-[rel:TEM_REQUERIMENTO]->(:InquiryRequirement)
-RETURN count(rel) AS inquiry_requirement_rel_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})
-WHERE i.inquiry_id = 'senado-cpmi-inss-2026'
-RETURN count(i) AS senado_fallback_rows_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})
-RETURN count(i) AS senado_inquiry_count;
-
-RETURN 3 AS senado_history_expected_count;
-
-MATCH (i:Inquiry)
-WHERE i.source = 'senado_cpis'
-  AND i.source_system = 'senado_archive'
-RETURN count(i) AS senado_history_loaded_count;
-
-MATCH (s:InquirySession)
-WHERE s.source = 'senado_cpis'
-RETURN count(s) AS senado_sessions_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->()
-WHERE r.temporal_status = 'invalid'
-RETURN count(r) AS senado_temporal_invalid_edges_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->()
-WHERE r.temporal_status = 'unknown'
-RETURN count(r) AS senado_temporal_unknown_edges_count;
-
-MATCH (i:Inquiry {source: 'camara_inquiries'})
-RETURN count(i) AS camara_inquiry_count;
-
-MATCH (r:InquiryRequirement {source: 'camara_inquiries'})
-RETURN count(r) AS camara_requirements_count;
-
-MATCH (s:InquirySession {source: 'camara_inquiries'})
-RETURN count(s) AS camara_sessions_count;
-
-// --- Date sanity ---
-MATCH (c:Contract)
-WHERE c.date =~ '\\d{4}-\\d{2}-\\d{2}'
-  AND date(c.date) > date() + duration('P365D')
-RETURN count(c) AS absurd_future_contract_dates;
-
-MATCH (c:MunicipalContract)
-WHERE c.signed_at =~ '\\d{4}-\\d{2}-\\d{2}'
-  AND date(c.signed_at) > date() + duration('P365D')
-RETURN count(c) AS absurd_future_municipal_contract_dates;
-
-MATCH (b:MunicipalBid)
-WHERE b.published_at =~ '\\d{4}-\\d{2}-\\d{2}'
-  AND date(b.published_at) > date() + duration('P365D')
-RETURN count(b) AS absurd_future_municipal_bid_dates;
-
-// --- Querido Diario quality ---
-MATCH (a:MunicipalGazetteAct)
-RETURN count(a) AS municipal_gazette_act_count;
-
-MATCH (a:MunicipalGazetteAct)
-RETURN count(a) AS total_acts,
-       sum(CASE WHEN a.text_status = 'available' THEN 1 ELSE 0 END) AS available_text_acts;
-
-MATCH (:Company)-[r:MENCIONADA_EM]->(:MunicipalGazetteAct)
-RETURN count(r) AS municipal_gazette_mention_count;
-
-// --- Identity integrity (must remain green) ---
-MATCH (p:Person) WHERE p.cpf CONTAINS '*' RETURN count(p) AS person_cpf_masked;
-
-MATCH (p:Person)
-WHERE replace(replace(p.cpf, '.', ''), '-', '') =~ '\\d{14}'
-RETURN count(p) AS person_cpf_14_digits;
diff --git a/scripts/integrity_gate.cypher b/scripts/integrity_gate.cypher
deleted file mode 100644
index 8118362..0000000
--- a/scripts/integrity_gate.cypher
+++ /dev/null
@@ -1,38 +0,0 @@
-// Neo4j Identity Integrity Gates
-// Run this file against the shadow database before cutover.
-// Expected results are documented per query.
-
-// G1: Person CPF must never be masked.
-MATCH (p:Person)
-WHERE p.cpf CONTAINS '*'
-RETURN count(p) AS person_cpf_masked;
-// expected: 0
-
-// G2: Person CPF must never be 14-digit (CNPJ-like).
-MATCH (p:Person)
-WHERE replace(replace(p.cpf, '.', ''), '-', '') =~ '\\d{14}'
-RETURN count(p) AS person_cpf_14_digits;
-// expected: 0
-
-// G3: Person->Company SOCIO_DE must come from formatted CPF identities only.
-MATCH (p:Person)-[:SOCIO_DE]->(:Company)
-WHERE NOT p.cpf =~ '\\d{3}\\.\\d{3}\\.\\d{3}-\\d{2}'
-RETURN count(p) AS invalid_person_company_socio_links;
-// expected: 0
-
-// G4: Company->Company SOCIO_DE must exist (PJ socios present).
-MATCH (:Company)-[r:SOCIO_DE]->(:Company)
-RETURN count(r) AS company_company_socio_links;
-// expected: > 0
-
-// G5: Partner->Company SOCIO_DE must exist for partial/invalid PF records.
-MATCH (:Partner)-[r:SOCIO_DE]->(:Company)
-RETURN count(r) AS partner_company_socio_links;
-// expected: > 0
-
-// G6: Disabled partial-doc SAME_AS method must be absent.
-MATCH ()-[r:SAME_AS]-()
-WHERE r.method = 'partial_cpf_name_match'
-RETURN count(r) AS partial_doc_same_as_edges;
-// expected: 0
-
diff --git a/scripts/linear_audit.py b/scripts/linear_audit.py
deleted file mode 100644
index 3131fd0..0000000
--- a/scripts/linear_audit.py
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/usr/bin/env python3
-"""Create Linear audit issues for AI PR governance decisions."""
-
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import sys
-import urllib.error
-import urllib.request
-from pathlib import Path
-from typing import Any
-
-
-def read_json(path: str) -> dict[str, Any]:
-    return json.loads(Path(path).read_text(encoding="utf-8"))
-
-
-def compact(value: Any) -> str:
-    if value is None:
-        return "n/a"
-    if isinstance(value, (dict, list)):
-        return json.dumps(value, ensure_ascii=False)
-    return str(value)
-
-
-def build_markdown(payload: dict[str, Any]) -> str:
-    lines = [
-        "## AI PR Governor Audit",
-        "",
-        f"- **Decision**: `{compact(payload.get('decision'))}`",
-        f"- **Repository**: `{compact(payload.get('repository'))}`",
-        f"- **PR**: {compact(payload.get('pr_url'))}",
-        f"- **PR Number**: `{compact(payload.get('pr_number'))}`",
-        f"- **Author**: `{compact(payload.get('author'))}`",
-        f"- **Head Branch**: `{compact(payload.get('head_ref'))}`",
-        f"- **Head SHA**: `{compact(payload.get('head_sha'))}`",
-        f"- **Base Branch**: `{compact(payload.get('base_ref'))}`",
-        f"- **Checks Status**: `{compact(payload.get('checks_status'))}`",
-        f"- **Claude Confidence**: `{compact(payload.get('confidence'))}`",
-        f"- **Claude Risk Level**: `{compact(payload.get('risk_level'))}`",
-        f"- **Claude Cost (USD)**: `{compact(payload.get('claude_cost_usd'))}`",
-        f"- **Workflow Run**: {compact(payload.get('workflow_run_url'))}",
-        "",
-        "### Summary",
-        compact(payload.get("summary")) or "n/a",
-        "",
-        "### Blocking Findings",
-    ]
-
-    findings = payload.get("blocking_findings")
-    if isinstance(findings, list) and findings:
-        lines.extend([f"- {compact(item)}" for item in findings])
-    else:
-        lines.append("- none")
-
-    lines.extend(
-        [
-            "",
-            "### Gate Reasons",
-        ]
-    )
-    reasons = payload.get("gate_reasons")
-    if isinstance(reasons, list) and reasons:
-        lines.extend([f"- {compact(item)}" for item in reasons])
-    else:
-        lines.append("- none")
-
-    lines.extend(
-        [
-            "",
-            "### Raw Evidence",
-            "```json",
-            json.dumps(payload, indent=2, ensure_ascii=False),
-            "```",
-        ]
-    )
-
-    return "\n".join(lines)
-
-
-def linear_issue_create(api_key: str, team_id: str, title: str, description: str, api_url: str) -> dict[str, Any]:
-    query = """
-mutation IssueCreate($input: IssueCreateInput!) {
-  issueCreate(input: $input) {
-    success
-    issue {
-      id
-      identifier
-      title
-      url
-    }
-  }
-}
-""".strip()
-
-    variables = {
-        "input": {
-            "teamId": team_id,
-            "title": title,
-            "description": description,
-        }
-    }
-
-    req_body = json.dumps({"query": query, "variables": variables}).encode("utf-8")
-    request = urllib.request.Request(
-        api_url,
-        data=req_body,
-        method="POST",
-        headers={
-            "Content-Type": "application/json",
-            "Authorization": api_key,
-        },
-    )
-
-    try:
-        with urllib.request.urlopen(request, timeout=20) as response:
-            payload = json.loads(response.read().decode("utf-8"))
-    except urllib.error.HTTPError as exc:
-        body = exc.read().decode("utf-8", errors="replace")
-        raise RuntimeError(f"Linear API HTTP {exc.code}: {body}") from exc
-    except urllib.error.URLError as exc:
-        raise RuntimeError(f"Linear API connection error: {exc}") from exc
-
-    if payload.get("errors"):
-        raise RuntimeError(f"Linear GraphQL errors: {json.dumps(payload['errors'], ensure_ascii=False)}")
-
-    result = (((payload.get("data") or {}).get("issueCreate") or {}))
-    if not result.get("success"):
-        raise RuntimeError("Linear issueCreate returned success=false")
-
-    issue = result.get("issue") or {}
-    return {
-        "id": issue.get("id"),
-        "identifier": issue.get("identifier"),
-        "title": issue.get("title"),
-        "url": issue.get("url"),
-    }
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Create Linear audit issue for AI PR governor decision.")
-    parser.add_argument("--input-json", required=True, help="Path to decision payload JSON.")
-    parser.add_argument("--team-id", help="Linear team ID (fallback: LINEAR_TEAM_ID env).")
-    parser.add_argument("--api-url", default="https://api.linear.app/graphql")
-    parser.add_argument("--dry-run", action="store_true")
-    parser.add_argument("--output", help="Optional output JSON path.")
-    return parser.parse_args()
-
-
-def main() -> int:
-    args = parse_args()
-
-    payload = read_json(args.input_json)
-
-    team_id = args.team_id or os.environ.get("LINEAR_TEAM_ID")
-    api_key = os.environ.get("LINEAR_API_KEY")
-
-    pr_number = payload.get("pr_number", "unknown")
-    decision = payload.get("decision", "unknown")
-    title = f"[AI Governor] PR #{pr_number} {decision}"
-    description = build_markdown(payload)
-
-    result: dict[str, Any]
-    if args.dry_run:
-        result = {
-            "dry_run": True,
-            "title": title,
-            "team_id": team_id,
-            "api_url": args.api_url,
-        }
-    else:
-        if not api_key:
-            print("LINEAR_API_KEY not set", file=sys.stderr)
-            return 2
-        if not team_id:
-            print("Linear team id missing (--team-id or LINEAR_TEAM_ID)", file=sys.stderr)
-            return 2
-
-        created = linear_issue_create(
-            api_key=api_key,
-            team_id=team_id,
-            title=title,
-            description=description,
-            api_url=args.api_url,
-        )
-        result = {
-            "dry_run": False,
-            "created": created,
-        }
-
-    out = json.dumps(result, indent=2, ensure_ascii=False)
-    if args.output:
-        Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-        Path(args.output).write_text(out + "\n", encoding="utf-8")
-    print(out)
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/scripts/link_global_peps.cypher b/scripts/link_global_peps.cypher
deleted file mode 100644
index d03d3a0..0000000
--- a/scripts/link_global_peps.cypher
+++ /dev/null
@@ -1,38 +0,0 @@
-// Post-load name matching for GlobalPEP entities.
-// Run after OpenSanctions pipeline to link unmatched PEPs to Person nodes.
-//
-// Phase 1 (CPF exact match) is handled in the pipeline itself.
-// This script handles Phase 2 (exact name match) as a post-load step.
-
-// Phase 2a: Exact normalized name match against known politicians.
-// Restrict to persons with election candidacy records (high confidence).
-// Unique-match-only rule: skip if multiple Person candidates share the name.
-MATCH (g:GlobalPEP)
-WHERE g.country = 'br'
-  AND g.name IS NOT NULL
-  AND NOT ()-[:GLOBAL_PEP_MATCH]->(g)
-WITH g
-MATCH (p:Person {name: g.name})
-WHERE (p)-[:CANDIDATO_EM]->(:Election)
-WITH g, collect(p) AS candidates
-WHERE size(candidates) = 1
-WITH g, candidates[0] AS p
-MERGE (p)-[r:GLOBAL_PEP_MATCH]->(g)
-SET r.match_type = 'exact_name_politician',
-    r.confidence = 0.90;
-
-// Phase 2b: Exact name match against all Person nodes (lower confidence).
-// For PEPs that didn't match in Phase 2a (no candidacy record).
-// Still requires unique match to avoid false positives.
-MATCH (g:GlobalPEP)
-WHERE g.country = 'br'
-  AND g.name IS NOT NULL
-  AND NOT ()-[:GLOBAL_PEP_MATCH]->(g)
-WITH g
-MATCH (p:Person {name: g.name})
-WITH g, collect(p) AS candidates
-WHERE size(candidates) = 1
-WITH g, candidates[0] AS p
-MERGE (p)-[r:GLOBAL_PEP_MATCH]->(g)
-SET r.match_type = 'exact_name_unique',
-    r.confidence = 0.85;
diff --git a/scripts/link_partners_probable.cypher b/scripts/link_partners_probable.cypher
deleted file mode 100644
index 389d5a0..0000000
--- a/scripts/link_partners_probable.cypher
+++ /dev/null
@@ -1,77 +0,0 @@
-// BRACC — Probabilistic Partner->Person linking (non-factual)
-//
-// Creates POSSIBLE_SAME_AS edges to bridge masked CPF partners to strong Person identities
-// without mutating factual SOCIO_DE semantics.
-//
-// Rules:
-// - Partner.doc_type = cpf_partial
-// - Partner.doc_partial has 6 digits
-// - Partner.name = Person.name
-// - Partner.doc_partial = Person.cpf_middle6
-// - Keep only 1:1 candidate pairs
-// - Exclude high-ambiguity names (candidate frequency > 5)
-//
-// Parameters expected:
-// - $run_id (string)
-
-// Phase 0: Ensure supporting index exists (idempotent)
-CREATE INDEX partner_name_doc_partial IF NOT EXISTS
-FOR (p:Partner) ON (p.name, p.doc_partial);
-
-// Phase 1: Remove previously generated edges for this run id (idempotent reruns)
-MATCH (:Partner)-[r:POSSIBLE_SAME_AS]->(:Person)
-WHERE r.run_id = $run_id
-DELETE r;
-
-// Phase 2: Build high-precision 1:1 candidate pairs and write edges in batches
-CALL {
-  MATCH (person:Person)
-  WHERE person.name IS NOT NULL
-    AND person.cpf_middle6 =~ "\\d{6}"
-  WITH person
-
-  // Candidate partial partner from exact (name, middle6) key
-  MATCH (partner:Partner)
-  WHERE partner.doc_type = "cpf_partial"
-    AND partner.name = person.name
-    AND partner.doc_partial = person.cpf_middle6
-  WITH person, collect(partner) AS partners
-  WHERE size(partners) = 1
-  WITH person, partners[0] AS partner
-
-  // 1:1 on (name, middle6): only one Person candidate for this key
-  MATCH (person_key:Person)
-  WHERE person_key.name = person.name
-    AND person_key.cpf_middle6 = person.cpf_middle6
-  WITH person, partner, count(person_key) AS person_key_count
-  WHERE person_key_count = 1
-
-  // 1:1 on (name, middle6): only one Partner candidate for this key
-  MATCH (partner_key:Partner)
-  WHERE partner_key.doc_type = "cpf_partial"
-    AND partner_key.name = person.name
-    AND partner_key.doc_partial = person.cpf_middle6
-  WITH person, partner, count(partner_key) AS partner_key_count
-  WHERE partner_key_count = 1
-
-  // Ambiguity guard by name frequency among partial partners
-  MATCH (name_peer:Partner)
-  WHERE name_peer.doc_type = "cpf_partial"
-    AND name_peer.name = person.name
-  WITH person, partner, count(name_peer) AS name_frequency
-  WHERE name_frequency <= 5
-
-  MERGE (partner)-[r:POSSIBLE_SAME_AS]->(person)
-  ON CREATE SET
-    r.confidence = 0.85,
-    r.method = "name_middle6_unique",
-    r.evidence = "cpf_partial+name_exact+uniqueness",
-    r.created_at = datetime(),
-    r.run_id = $run_id
-  ON MATCH SET
-    r.confidence = 0.85,
-    r.method = "name_middle6_unique",
-    r.evidence = "cpf_partial+name_exact+uniqueness",
-    r.created_at = datetime(),
-    r.run_id = $run_id
-} IN TRANSACTIONS OF 2000 ROWS;
diff --git a/scripts/link_persons.cypher b/scripts/link_persons.cypher
deleted file mode 100644
index c31a2b9..0000000
--- a/scripts/link_persons.cypher
+++ /dev/null
@@ -1,82 +0,0 @@
-// BRACC — Person Node SAME_AS Linking
-// Creates SAME_AS relationships between Person nodes representing the same individual.
-// Non-destructive: keeps separate nodes for source attribution, links them for traversal.
-// Run once as a migration, then periodically after ETL reloads.
-
-// ── Phase 0: Pre-compute cpf_middle6 on existing full-CPF Person nodes ──
-// Strips formatting (XXX.XXX.XXX-XX → 11 digits), extracts middle 6 digits
-// (positions [3:9]), stores as indexed property for partial-CPF matching.
-CALL {
-  MATCH (p:Person)
-  WHERE p.cpf IS NOT NULL AND p.cpf_middle6 IS NULL
-  WITH p, replace(replace(p.cpf, '.', ''), '-', '') AS digits
-  WHERE size(digits) = 11
-  SET p.cpf_middle6 = substring(digits, 3, 6)
-} IN TRANSACTIONS OF 10000 ROWS;
-
-// ── Phase 1: CPF match (confidence 0.95) ──────────────────────────
-// TSE candidates that have unmasked CPF → CNPJ persons with same CPF.
-// Both pipelines store formatted CPFs, so exact match is reliable.
-CALL {
-  MATCH (a:Person)
-  WHERE a.sq_candidato IS NOT NULL AND a.cpf IS NOT NULL
-  WITH a
-  MATCH (b:Person {cpf: a.cpf})
-  WHERE b.sq_candidato IS NULL AND b <> a
-  MERGE (a)-[:SAME_AS {confidence: 0.95, method: "cpf_match"}]->(b)
-} IN TRANSACTIONS OF 5000 ROWS;
-
-// ── Phase 2: Author → TSE candidate by name (confidence 0.90) ────
-// Transparencia/TransfereGov authors → TSE candidates.
-// Both use normalize_name() from same transform module → exact match safe.
-// Small set (~1K authors) vs medium set (TSE candidates).
-CALL {
-  MATCH (a:Person)
-  WHERE a.author_key IS NOT NULL AND a.name IS NOT NULL
-  WITH a
-  MATCH (b:Person {name: a.name})
-  WHERE b.sq_candidato IS NOT NULL AND b <> a
-  MERGE (a)-[:SAME_AS {confidence: 0.90, method: "name_match_author_tse"}]->(b)
-} IN TRANSACTIONS OF 2000 ROWS;
-
-// ── Phase 3: Author → CNPJ person by name (confidence 0.80) ──────
-// Transparencia/TransfereGov authors → CNPJ persons.
-// Small set (~1K) vs large set (2M). Person(name) index required.
-// Only links if no SAME_AS already exists between pair (avoids duplicates from Phase 2 chains).
-CALL {
-  MATCH (a:Person)
-  WHERE a.author_key IS NOT NULL AND a.name IS NOT NULL
-  WITH a
-  MATCH (b:Person {name: a.name})
-  WHERE b.cpf IS NOT NULL
-    AND b <> a
-    AND NOT EXISTS { (a)-[:SAME_AS]-(b) }
-  MERGE (a)-[:SAME_AS {confidence: 0.80, method: "name_match_author_cnpj"}]->(b)
-} IN TRANSACTIONS OF 2000 ROWS;
-
-// ── Phase 4: Disabled partial-document matching ─────────────────────
-// Partial CPF-based SAME_AS can create ambiguous merges at national scale.
-// Keep phase number for migration compatibility, but do not emit SAME_AS.
-MATCH ()-[r:SAME_AS]-()
-WHERE r.method = "partial_cpf_name_match"
-DELETE r;
-
-// ── Phase 5: Classified servidores — unique name match (confidence 0.85) ──
-// For ~34K servidores with blank CPF: match by name only when the name
-// appears exactly once among blank-CPF servidores AND exactly once among
-// full-CPF persons. Common names auto-excluded by size() != 1.
-CALL {
-  MATCH (s:Person)-[:RECEBEU_SALARIO]->(:PublicOffice)
-  WHERE s.cpf_partial IS NULL AND s.name IS NOT NULL
-  WITH s.name AS name, collect(DISTINCT s) AS servidores
-  WHERE size(servidores) = 1
-  WITH name, servidores[0] AS s
-  MATCH (p:Person {name: name})
-  WHERE p.cpf_middle6 IS NOT NULL
-    AND s <> p
-    AND NOT EXISTS { (s)-[:SAME_AS]-(p) }
-  WITH s, collect(p) AS targets
-  WHERE size(targets) = 1
-  WITH s, targets[0] AS target
-  MERGE (s)-[:SAME_AS {confidence: 0.85, method: "unique_name_match_servidor"}]->(target)
-} IN TRANSACTIONS OF 1000 ROWS;
diff --git a/scripts/prepare_public_snapshot.sh b/scripts/prepare_public_snapshot.sh
deleted file mode 100755
index f2fe764..0000000
--- a/scripts/prepare_public_snapshot.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-SRC_ROOT="${1:-$(pwd)}"
-OUT_DIR="${2:-/tmp/br-acc-public-$(date +%Y%m%d_%H%M%S)}"
-
-mkdir -p "$OUT_DIR"
-
-rsync -a \
-  --exclude='**/.venv/***' \
-  --exclude='**/__pycache__/***' \
-  --exclude='**/.pytest_cache/***' \
-  --exclude='**/.mypy_cache/***' \
-  --exclude='**/.ruff_cache/***' \
-  --exclude='frontend/node_modules/***' \
-  --exclude='etl/data/***' \
-  --exclude='**/dist/***' \
-  --exclude='**/build/***' \
-  --exclude='**/*.pyc' \
-  --exclude='.env' \
-  --exclude='api/.env' \
-  --exclude='etl/.env' \
-  --exclude='frontend/.env' \
-  --include='api/' \
-  --include='api/***' \
-  --include='etl/' \
-  --include='etl/***' \
-  --include='frontend/' \
-  --include='frontend/***' \
-  --include='infra/' \
-  --include='infra/***' \
-  --include='docs/' \
-  --include='docs/brand/' \
-  --include='docs/brand/wtg-header.png' \
-  --include='docs/demo/' \
-  --include='docs/demo/***' \
-  --include='docs/legal/' \
-  --include='docs/legal/***' \
-  --include='docs/pt-BR/' \
-  --include='docs/pt-BR/***' \
-  --include='docs/release/' \
-  --include='docs/release/public_boundary_matrix.csv' \
-  --include='docs/release/public_endpoint_matrix.md' \
-  --include='docs/release/public_repo_release_checklist.md' \
-  --include='docs/data-sources.md' \
-  --include='docs/source_registry_br_v1.csv' \
-  --include='docs/source_onboarding_contract.md' \
-  --include='.github/' \
-  --include='.github/***' \
-  --include='scripts/' \
-  --include='scripts/***' \
-  --include='data/' \
-  --include='data/demo/' \
-  --include='data/demo/***' \
-  --include='README.md' \
-  --include='ETHICS.md' \
-  --include='LGPD.md' \
-  --include='PRIVACY.md' \
-  --include='TERMS.md' \
-  --include='DISCLAIMER.md' \
-  --include='SECURITY.md' \
-  --include='ABUSE_RESPONSE.md' \
-  --include='LICENSE' \
-  --include='.env.example' \
-  --include='.gitignore' \
-  --include='.gitleaksignore' \
-  --exclude='*' \
-  "$SRC_ROOT/" "$OUT_DIR/"
-
-# Explicit removals for sensitive operational artifacts and disabled pattern engine.
-rm -f "$OUT_DIR/CLAUDE.md"
-rm -f "$OUT_DIR/AGENTS.md"
-rm -f "$OUT_DIR/AGENTS"*".md"
-rm -f "$OUT_DIR/.mcp.json"
-rm -f "$OUT_DIR/docs/shadow_rollout_runbook.md"
-rm -f "$OUT_DIR/docs/ingestion_priority_runbook.md"
-rm -f "$OUT_DIR/docs/ops/storage_operations.md"
-rm -f "$OUT_DIR/scripts/auto_finalize_pncp_backfill.sh"
-rm -f "$OUT_DIR/api/src/bracc/services/pattern_service.py"
-rm -f "$OUT_DIR/api/src/bracc/queries/pattern_"*.cypher
-rm -f "$OUT_DIR/api/tests/unit/test_patterns.py"
-rm -f "$OUT_DIR/api/tests/unit/test_patterns_new.py"
-
-rm -rf "$OUT_DIR/audit-results"
-
-python3 "$OUT_DIR/scripts/generate_demo_dataset.py" --output "$OUT_DIR/data/demo/synthetic_graph.json" >/dev/null
-python3 "$OUT_DIR/scripts/check_public_privacy.py" --repo-root "$OUT_DIR"
-python3 "$OUT_DIR/scripts/check_compliance_pack.py" --repo-root "$OUT_DIR"
-python3 "$OUT_DIR/scripts/check_open_core_boundary.py" --repo-root "$OUT_DIR"
-
-printf 'Public snapshot prepared at: %s\n' "$OUT_DIR"
diff --git a/scripts/run_ingestion_priority_gates.py b/scripts/run_ingestion_priority_gates.py
deleted file mode 100644
index 0c8216e..0000000
--- a/scripts/run_ingestion_priority_gates.py
+++ /dev/null
@@ -1,514 +0,0 @@
-#!/usr/bin/env python3
-"""Run ingestion-priority gates for shadow/promote workflow."""
-
-from __future__ import annotations
-
-import argparse
-import os
-import sys
-from dataclasses import dataclass
-from datetime import UTC, date, datetime
-
-from neo4j import GraphDatabase
-
-
-@dataclass(frozen=True)
-class NumericGate:
-    name: str
-    query: str
-    operator: str
-    expected: int
-
-
-@dataclass(frozen=True)
-class DateFreshnessGate:
-    name: str
-    query: str
-    max_age_days: int
-
-
-NUMERIC_GATES: list[NumericGate] = [
-    NumericGate(
-        name="bid_2025_count",
-        query=(
-            "MATCH (b:Bid) "
-            "WHERE b.source = 'pncp' AND b.date >= '2025-01-01' AND b.date < '2026-01-01' "
-            "RETURN count(b) AS value"
-        ),
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="inquiry_count",
-        query="MATCH (i:Inquiry) RETURN count(i) AS value",
-        operator="gt",
-        expected=3,
-    ),
-    NumericGate(
-        name="inquiry_inss_or_previd_count",
-        query=(
-            "MATCH (i:Inquiry) "
-            "WHERE toUpper(coalesce(i.name, '') + ' ' + coalesce(i.subject, '')) CONTAINS 'INSS' "
-            "   OR toUpper(coalesce(i.name, '') + ' ' + coalesce(i.subject, '')) CONTAINS 'PREVID' "
-            "RETURN count(i) AS value"
-        ),
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="inquiry_requirement_count",
-        query="MATCH (r:InquiryRequirement) RETURN count(r) AS value",
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="inquiry_requirement_rel_count",
-        query=(
-            "MATCH (:Inquiry)-[r:TEM_REQUERIMENTO]->(:InquiryRequirement) "
-            "RETURN count(r) AS value"
-        ),
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="senado_fallback_rows_count",
-        query=(
-            "MATCH (i:Inquiry) "
-            "WHERE i.source = 'senado_cpis' "
-            "  AND i.inquiry_id = 'senado-cpmi-inss-2026' "
-            "RETURN count(i) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    NumericGate(
-        name="senado_inquiry_count",
-        query="MATCH (i:Inquiry {source: 'senado_cpis'}) RETURN count(i) AS value",
-        operator="gt",
-        expected=1,
-    ),
-    NumericGate(
-        name="senado_history_expected_count",
-        query="RETURN 3 AS value",
-        operator="eq",
-        expected=3,
-    ),
-    NumericGate(
-        name="senado_history_loaded_count",
-        query=(
-            "MATCH (i:Inquiry) "
-            "WHERE i.source = 'senado_cpis' "
-            "  AND i.source_system = 'senado_archive' "
-            "RETURN count(i) AS value"
-        ),
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="senado_sessions_count",
-        query=(
-            "MATCH (s:InquirySession) "
-            "WHERE s.source = 'senado_cpis' "
-            "RETURN count(s) AS value"
-        ),
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="senado_temporal_invalid_edges_count",
-        query=(
-            "MATCH (i:Inquiry {source: 'senado_cpis'})-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->() "
-            "WHERE r.temporal_status = 'invalid' "
-            "RETURN count(r) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    NumericGate(
-        name="senado_temporal_unknown_edges_count",
-        query=(
-            "MATCH (i:Inquiry {source: 'senado_cpis'})-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->() "
-            "WHERE r.temporal_status = 'unknown' "
-            "RETURN count(r) AS value"
-        ),
-        operator="lte",
-        expected=5000,
-    ),
-    NumericGate(
-        name="camara_inquiry_count",
-        query="MATCH (i:Inquiry {source: 'camara_inquiries'}) RETURN count(i) AS value",
-        operator="gte",
-        expected=151,
-    ),
-    NumericGate(
-        name="camara_requirements_count",
-        query=(
-            "MATCH (r:InquiryRequirement {source: 'camara_inquiries'}) "
-            "RETURN count(r) AS value"
-        ),
-        operator="gte",
-        expected=1000,
-    ),
-    NumericGate(
-        name="camara_sessions_count",
-        query=(
-            "MATCH (s:InquirySession {source: 'camara_inquiries'}) "
-            "RETURN count(s) AS value"
-        ),
-        operator="gte",
-        expected=2000,
-    ),
-    NumericGate(
-        name="absurd_future_contract_dates",
-        query=(
-            "MATCH (c:Contract) "
-            "WHERE c.date =~ '\\d{4}-\\d{2}-\\d{2}' "
-            "AND date(c.date) > date() + duration('P365D') "
-            "RETURN count(c) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    NumericGate(
-        name="absurd_future_municipal_contract_dates",
-        query=(
-            "MATCH (c:MunicipalContract) "
-            "WHERE c.signed_at =~ '\\d{4}-\\d{2}-\\d{2}' "
-            "AND date(c.signed_at) > date() + duration('P365D') "
-            "RETURN count(c) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    NumericGate(
-        name="absurd_future_municipal_bid_dates",
-        query=(
-            "MATCH (b:MunicipalBid) "
-            "WHERE b.published_at =~ '\\d{4}-\\d{2}-\\d{2}' "
-            "AND date(b.published_at) > date() + duration('P365D') "
-            "RETURN count(b) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    NumericGate(
-        name="municipal_gazette_act_count",
-        query="MATCH (a:MunicipalGazetteAct) RETURN count(a) AS value",
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="person_cpf_masked",
-        query="MATCH (p:Person) WHERE p.cpf CONTAINS '*' RETURN count(p) AS value",
-        operator="eq",
-        expected=0,
-    ),
-    NumericGate(
-        name="person_cpf_14_digits",
-        query=(
-            "MATCH (p:Person) "
-            "WHERE replace(replace(p.cpf, '.', ''), '-', '') =~ '\\d{14}' "
-            "RETURN count(p) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-]
-
-DATE_GATES: list[DateFreshnessGate] = [
-    DateFreshnessGate(
-        name="pncp_max_date",
-        query=(
-            "MATCH (b:Bid) "
-            "WHERE b.source = 'pncp' AND b.date =~ '\\d{4}-\\d{2}-\\d{2}' "
-            "RETURN max(b.date) AS value"
-        ),
-        max_age_days=45,
-    ),
-    DateFreshnessGate(
-        name="comprasnet_max_date",
-        query=(
-            "MATCH (c:Contract) "
-            "WHERE c.source = 'comprasnet' AND c.date =~ '\\d{4}-\\d{2}-\\d{2}' "
-            "RETURN max(c.date) AS value"
-        ),
-        max_age_days=60,
-    ),
-]
-
-GAZETTE_TEXT_RATIO_QUERY = (
-    "MATCH (a:MunicipalGazetteAct) "
-    "RETURN count(a) AS total, "
-    "sum(CASE WHEN a.text_status = 'available' THEN 1 ELSE 0 END) AS available"
-)
-
-GAZETTE_MENTION_COUNT_QUERY = (
-    "MATCH (:Company)-[r:MENCIONADA_EM]->(:MunicipalGazetteAct) "
-    "RETURN count(r) AS value"
-)
-
-QSA_SNAPSHOT_MONTHS_QUERY = (
-    "MATCH ()-[r:SOCIO_DE_SNAPSHOT]->() "
-    "WHERE r.snapshot_date =~ '\\d{4}-\\d{2}-\\d{2}' "
-    "RETURN collect(DISTINCT substring(r.snapshot_date, 0, 7)) AS months, "
-    "       max(r.snapshot_date) AS max_snapshot_date"
-)
-
-QSA_HISTORY_COUNT_QUERY = "MATCH ()-[r:SOCIO_DE_SNAPSHOT]->() RETURN count(r) AS value"
-
-QSA_TEMPORAL_INVALID_QUERY = (
-    "MATCH ()-[r:SOCIO_DE_SNAPSHOT]->() "
-    "WHERE r.temporal_status = 'invalid' "
-    "   OR (coalesce(r.snapshot_date, '') <> '' "
-    "       AND coalesce(r.data_entrada, '') <> '' "
-    "       AND r.data_entrada > r.snapshot_date) "
-    "RETURN count(r) AS value"
-)
-
-PNCP_MONTHS_QUERY = (
-    "MATCH (b:Bid) "
-    "WHERE b.source = 'pncp' AND b.date =~ '\\d{4}-\\d{2}-\\d{2}' "
-    "RETURN collect(DISTINCT substring(b.date, 0, 7)) AS months"
-)
-
-
-def _passes(operator: str, value: int, expected: int) -> bool:
-    if operator == "eq":
-        return value == expected
-    if operator == "gt":
-        return value > expected
-    if operator == "gte":
-        return value >= expected
-    if operator == "lt":
-        return value < expected
-    if operator == "lte":
-        return value <= expected
-    raise ValueError(f"Unsupported operator: {operator}")
-
-
-def _parse_iso_date(value: str | None) -> date | None:
-    if not value:
-        return None
-    try:
-        return datetime.strptime(value[:10], "%Y-%m-%d").replace(tzinfo=UTC).date()
-    except ValueError:
-        return None
-
-
-def _month_range(start_ym: str, end_date: date) -> list[str]:
-    start_year, start_month = map(int, start_ym.split("-"))
-    current_year, current_month = start_year, start_month
-    out: list[str] = []
-    while (current_year, current_month) <= (end_date.year, end_date.month):
-        out.append(f"{current_year:04d}-{current_month:02d}")
-        if current_month == 12:
-            current_year += 1
-            current_month = 1
-        else:
-            current_month += 1
-    return out
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--uri", required=True, help="Neo4j bolt URI")
-    parser.add_argument("--user", default="neo4j", help="Neo4j user")
-    parser.add_argument("--database", default="neo4j", help="Neo4j database name")
-    parser.add_argument(
-        "--password-env",
-        default="NEO4J_PASSWORD",
-        help="Environment variable with password",
-    )
-    parser.add_argument(
-        "--reference-date",
-        default=date.today().isoformat(),
-        help="Reference date for freshness checks (YYYY-MM-DD)",
-    )
-    parser.add_argument(
-        "--check-qsa-history",
-        action="store_true",
-        help="Enable QSA historical coverage gates (SOCIO_DE_SNAPSHOT).",
-    )
-    parser.add_argument(
-        "--qsa-max-lag-days",
-        type=int,
-        default=45,
-        help="Max allowed lag from latest QSA snapshot month to reference date.",
-    )
-    args = parser.parse_args()
-
-    password = os.getenv(args.password_env, "")
-    if not password:
-        print(f"[ERROR] Missing password in env var: {args.password_env}")
-        return 2
-
-    ref_date = _parse_iso_date(args.reference_date)
-    if ref_date is None:
-        print(f"[ERROR] Invalid --reference-date: {args.reference_date}")
-        return 2
-
-    driver = GraphDatabase.driver(args.uri, auth=(args.user, password))
-    failed = 0
-
-    try:
-        with driver.session(database=args.database) as session:
-            for gate in NUMERIC_GATES:
-                value = int(session.run(gate.query).single()["value"])
-                ok = _passes(gate.operator, value, gate.expected)
-                if gate.operator == "eq":
-                    expected_desc = f"== {gate.expected}"
-                elif gate.operator == "gte":
-                    expected_desc = f">= {gate.expected}"
-                elif gate.operator == "lt":
-                    expected_desc = f"< {gate.expected}"
-                elif gate.operator == "lte":
-                    expected_desc = f"<= {gate.expected}"
-                else:
-                    expected_desc = f"> {gate.expected}"
-                print(
-                    f"[{'PASS' if ok else 'FAIL'}] {gate.name}: "
-                    f"value={value} expected {expected_desc}",
-                )
-                if not ok:
-                    failed += 1
-
-            for gate in DATE_GATES:
-                raw_value = session.run(gate.query).single()["value"]
-                parsed = _parse_iso_date(raw_value)
-                if parsed is None:
-                    print(f"[FAIL] {gate.name}: no valid max date found")
-                    failed += 1
-                    continue
-
-                age_days = (ref_date - parsed).days
-                ok = age_days <= gate.max_age_days
-                print(
-                    f"[{'PASS' if ok else 'FAIL'}] {gate.name}: "
-                    f"max_date={parsed.isoformat()} "
-                    f"age_days={age_days} max_allowed={gate.max_age_days}",
-                )
-                if not ok:
-                    failed += 1
-
-            pncp_months_raw = session.run(PNCP_MONTHS_QUERY).single()["months"]
-            actual_months = set(pncp_months_raw or [])
-            expected_months = _month_range("2021-08", ref_date)
-            missing_months = [m for m in expected_months if m not in actual_months]
-            pncp_month_ok = len(missing_months) == 0
-            suffix = ""
-            if missing_months:
-                preview = ",".join(missing_months[:12])
-                suffix = f" missing_sample={preview}"
-            print(
-                f"[{'PASS' if pncp_month_ok else 'FAIL'}] pncp_missing_months_count: "
-                f"value={len(missing_months)} expected == 0{suffix}",
-            )
-            if not pncp_month_ok:
-                failed += 1
-
-            ratio_row = session.run(GAZETTE_TEXT_RATIO_QUERY).single()
-            total_acts = int(ratio_row["total"] or 0)
-            available_acts = int(ratio_row["available"] or 0)
-            ratio = (available_acts / total_acts) if total_acts > 0 else 0.0
-            print(
-                "[INFO] gazette_text_available_ratio: "
-                f"available={available_acts} total={total_acts} ratio={ratio:.3f}",
-            )
-
-            if ratio >= 0.2:
-                mention_count = int(session.run(GAZETTE_MENTION_COUNT_QUERY).single()["value"])
-                ok = mention_count > 0
-                print(
-                    f"[{'PASS' if ok else 'FAIL'}] municipal_gazette_mentions: "
-                    f"value={mention_count} expected > 0 (ratio >= 0.2)",
-                )
-                if not ok:
-                    failed += 1
-            else:
-                print(
-                    "[WARN] municipal_gazette_mentions gate relaxed: "
-                    f"gazette_text_available_ratio={ratio:.3f} < 0.2",
-                )
-
-            if args.check_qsa_history:
-                qsa_history_count = int(session.run(QSA_HISTORY_COUNT_QUERY).single()["value"])
-                qsa_history_ok = qsa_history_count > 0
-                print(
-                    f"[{'PASS' if qsa_history_ok else 'FAIL'}] qsa_history_rows_loaded: "
-                    f"value={qsa_history_count} expected > 0",
-                )
-                if not qsa_history_ok:
-                    failed += 1
-
-                qsa_invalid_count = int(
-                    session.run(QSA_TEMPORAL_INVALID_QUERY).single()["value"],
-                )
-                qsa_invalid_ok = qsa_invalid_count == 0
-                print(
-                    f"[{'PASS' if qsa_invalid_ok else 'FAIL'}] qsa_temporal_invalid_count: "
-                    f"value={qsa_invalid_count} expected == 0",
-                )
-                if not qsa_invalid_ok:
-                    failed += 1
-
-                qsa_row = session.run(QSA_SNAPSHOT_MONTHS_QUERY).single()
-                qsa_months = set(qsa_row["months"] or [])
-                qsa_max_snapshot_date = qsa_row["max_snapshot_date"] or ""
-                qsa_snapshot_max_month = qsa_max_snapshot_date[:7] if qsa_max_snapshot_date else ""
-                print(
-                    f"[INFO] qsa_snapshot_max_month: "
-                    f"value={qsa_snapshot_max_month or 'N/A'}",
-                )
-
-                if qsa_months:
-                    qsa_min_month = min(qsa_months)
-                    qsa_max_month = max(qsa_months)
-                    qsa_max_month_dt = _parse_iso_date(f"{qsa_max_month}-01")
-                    qsa_expected_months = _month_range(
-                        qsa_min_month,
-                        qsa_max_month_dt if qsa_max_month_dt else ref_date,
-                    )
-                    qsa_missing_months = [
-                        month for month in qsa_expected_months if month not in qsa_months
-                    ]
-                else:
-                    qsa_missing_months = []
-
-                qsa_missing_ok = len(qsa_missing_months) == 0
-                qsa_suffix = ""
-                if qsa_missing_months:
-                    qsa_suffix = f" missing_sample={','.join(qsa_missing_months[:12])}"
-                print(
-                    f"[{'PASS' if qsa_missing_ok else 'FAIL'}] qsa_missing_months_count: "
-                    f"value={len(qsa_missing_months)} expected == 0{qsa_suffix}",
-                )
-                if not qsa_missing_ok:
-                    failed += 1
-
-                if qsa_max_snapshot_date:
-                    qsa_snapshot_dt = _parse_iso_date(qsa_max_snapshot_date)
-                    qsa_lag_days = (
-                        (ref_date - qsa_snapshot_dt).days if qsa_snapshot_dt else 9999
-                    )
-                else:
-                    qsa_lag_days = 9999
-                qsa_lag_ok = qsa_lag_days <= args.qsa_max_lag_days
-                print(
-                    f"[{'PASS' if qsa_lag_ok else 'FAIL'}] qsa_latest_projection_lag_days: "
-                    f"value={qsa_lag_days} expected <= {args.qsa_max_lag_days}",
-                )
-                if not qsa_lag_ok:
-                    failed += 1
-    finally:
-        driver.close()
-
-    if failed:
-        print(f"[SUMMARY] {failed} gate(s) failed.")
-        return 1
-
-    print("[SUMMARY] All ingestion-priority gates passed.")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/run_integrity_gates.py b/scripts/run_integrity_gates.py
deleted file mode 100644
index 6d4fa55..0000000
--- a/scripts/run_integrity_gates.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-"""Run hard integrity gates against Neo4j and return pass/fail."""
-
-from __future__ import annotations
-
-import argparse
-import os
-import sys
-from dataclasses import dataclass
-
-from neo4j import GraphDatabase
-
-
-@dataclass(frozen=True)
-class Gate:
-    name: str
-    query: str
-    operator: str  # one of: eq, gt
-    expected: int
-
-
-GATES: list[Gate] = [
-    Gate(
-        name="person_cpf_masked",
-        query="MATCH (p:Person) WHERE p.cpf CONTAINS '*' RETURN count(p) AS value",
-        operator="eq",
-        expected=0,
-    ),
-    Gate(
-        name="person_cpf_14_digits",
-        query=(
-            "MATCH (p:Person) "
-            "WHERE replace(replace(p.cpf, '.', ''), '-', '') =~ '\\\\d{14}' "
-            "RETURN count(p) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    Gate(
-        name="invalid_person_company_socio_links",
-        query=(
-            "MATCH (p:Person)-[:SOCIO_DE]->(:Company) "
-            "WHERE NOT p.cpf =~ '\\\\d{3}\\\\.\\\\d{3}\\\\.\\\\d{3}-\\\\d{2}' "
-            "RETURN count(p) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    Gate(
-        name="company_company_socio_links",
-        query="MATCH (:Company)-[r:SOCIO_DE]->(:Company) RETURN count(r) AS value",
-        operator="gt",
-        expected=0,
-    ),
-    Gate(
-        name="partner_company_socio_links",
-        query="MATCH (:Partner)-[r:SOCIO_DE]->(:Company) RETURN count(r) AS value",
-        operator="gt",
-        expected=0,
-    ),
-    Gate(
-        name="partial_doc_same_as_edges",
-        query=(
-            "MATCH ()-[r:SAME_AS]-() "
-            "WHERE r.method = 'partial_cpf_name_match' "
-            "RETURN count(r) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-]
-
-
-def _passes(operator: str, value: int, expected: int) -> bool:
-    if operator == "eq":
-        return value == expected
-    if operator == "gt":
-        return value > expected
-    raise ValueError(f"Unsupported operator: {operator}")
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--uri", required=True, help="Neo4j bolt URI")
-    parser.add_argument("--user", default="neo4j", help="Neo4j username")
-    parser.add_argument("--database", default="neo4j", help="Neo4j database name")
-    parser.add_argument(
-        "--password-env",
-        default="NEO4J_PASSWORD",
-        help="Environment variable containing Neo4j password",
-    )
-    args = parser.parse_args()
-
-    password = os.getenv(args.password_env, "")
-    if not password:
-        print(f"[ERROR] Missing password in env var: {args.password_env}")
-        return 2
-
-    driver = GraphDatabase.driver(args.uri, auth=(args.user, password))
-    failed = 0
-    try:
-        with driver.session(database=args.database) as session:
-            for gate in GATES:
-                value = int(session.run(gate.query).single()["value"])
-                ok = _passes(gate.operator, value, gate.expected)
-                status = "PASS" if ok else "FAIL"
-                expected_desc = (
-                    f"== {gate.expected}" if gate.operator == "eq" else f"> {gate.expected}"
-                )
-                print(f"[{status}] {gate.name}: value={value} expected {expected_desc}")
-                if not ok:
-                    failed += 1
-    finally:
-        driver.close()
-
-    if failed:
-        print(f"[SUMMARY] {failed} gate(s) failed.")
-        return 1
-
-    print("[SUMMARY] All integrity gates passed.")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/run_qsa_history_gates.py b/scripts/run_qsa_history_gates.py
deleted file mode 100644
index f0dbb13..0000000
--- a/scripts/run_qsa_history_gates.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python3
-"""Run QSA history-specific gates for CNPJ historical coverage."""
-
-from __future__ import annotations
-
-import argparse
-import os
-import sys
-from dataclasses import dataclass
-from datetime import UTC, date, datetime
-
-from neo4j import GraphDatabase
-
-
-@dataclass(frozen=True)
-class NumericGate:
-    name: str
-    query: str
-    operator: str
-    expected: int
-
-
-NUMERIC_GATES: list[NumericGate] = [
-    NumericGate(
-        name="qsa_history_rows_loaded",
-        query="MATCH ()-[r:SOCIO_DE_SNAPSHOT]->() RETURN count(r) AS value",
-        operator="gt",
-        expected=0,
-    ),
-    NumericGate(
-        name="qsa_temporal_invalid_count",
-        query=(
-            "MATCH ()-[r:SOCIO_DE_SNAPSHOT]->() "
-            "WHERE r.temporal_status = 'invalid' "
-            "   OR (coalesce(r.snapshot_date, '') <> '' "
-            "       AND coalesce(r.data_entrada, '') <> '' "
-            "       AND r.data_entrada > r.snapshot_date) "
-            "RETURN count(r) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-]
-
-SNAPSHOT_MONTHS_QUERY = (
-    "MATCH ()-[r:SOCIO_DE_SNAPSHOT]->() "
-    "WHERE r.snapshot_date =~ '\\d{4}-\\d{2}-\\d{2}' "
-    "RETURN collect(DISTINCT substring(r.snapshot_date, 0, 7)) AS months, "
-    "       max(r.snapshot_date) AS max_snapshot_date"
-)
-
-
-def _passes(operator: str, value: int, expected: int) -> bool:
-    if operator == "eq":
-        return value == expected
-    if operator == "gt":
-        return value > expected
-    if operator == "gte":
-        return value >= expected
-    if operator == "lt":
-        return value < expected
-    if operator == "lte":
-        return value <= expected
-    raise ValueError(f"Unsupported operator: {operator}")
-
-
-def _months_between(start_ym: str, end_ym: str) -> list[str]:
-    start = datetime.strptime(f"{start_ym}-01", "%Y-%m-%d").date()
-    end = datetime.strptime(f"{end_ym}-01", "%Y-%m-%d").date()
-    months: list[str] = []
-    current = start
-    while current <= end:
-        months.append(current.strftime("%Y-%m"))
-        year = current.year + (1 if current.month == 12 else 0)
-        month = 1 if current.month == 12 else current.month + 1
-        current = date(year, month, 1)
-    return months
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--uri", required=True, help="Neo4j bolt URI")
-    parser.add_argument("--user", default="neo4j", help="Neo4j username")
-    parser.add_argument("--database", default="neo4j", help="Neo4j database")
-    parser.add_argument(
-        "--password-env",
-        default="NEO4J_PASSWORD",
-        help="Environment variable containing Neo4j password",
-    )
-    parser.add_argument(
-        "--max-lag-days",
-        type=int,
-        default=45,
-        help="Max allowed lag (in days) from latest snapshot_date to today",
-    )
-    args = parser.parse_args()
-
-    password = os.getenv(args.password_env, "")
-    if not password:
-        print(f"[ERROR] Missing password in env var: {args.password_env}")
-        return 2
-
-    failed = 0
-    driver = GraphDatabase.driver(args.uri, auth=(args.user, password))
-    try:
-        with driver.session(database=args.database) as session:
-            for gate in NUMERIC_GATES:
-                value = int(session.run(gate.query).single()["value"])
-                ok = _passes(gate.operator, value, gate.expected)
-                status = "PASS" if ok else "FAIL"
-                expected_desc = (
-                    f"== {gate.expected}" if gate.operator == "eq" else f"> {gate.expected}"
-                )
-                print(f"[{status}] {gate.name}: value={value} expected {expected_desc}")
-                if not ok:
-                    failed += 1
-
-            record = session.run(SNAPSHOT_MONTHS_QUERY).single()
-            months = sorted(record["months"] or [])
-            max_snapshot_date = record["max_snapshot_date"] or ""
-            qsa_snapshot_max_month = max_snapshot_date[:7] if max_snapshot_date else ""
-
-            missing_months_count = 0
-            if months:
-                expected_months = _months_between(months[0], months[-1])
-                missing_months_count = len(set(expected_months) - set(months))
-            print(
-                f"[{'PASS' if missing_months_count == 0 else 'FAIL'}] "
-                f"qsa_missing_months_count: value={missing_months_count} expected == 0"
-            )
-            if missing_months_count != 0:
-                failed += 1
-
-            lag_days = 10_000
-            if max_snapshot_date:
-                lag_days = (datetime.now(UTC).date() - datetime.strptime(
-                    max_snapshot_date, "%Y-%m-%d",
-                ).date()).days
-            print(f"[INFO] qsa_snapshot_max_month: value={qsa_snapshot_max_month or 'N/A'}")
-            lag_ok = lag_days <= args.max_lag_days
-            print(
-                f"[{'PASS' if lag_ok else 'FAIL'}] qsa_latest_projection_lag_days: "
-                f"value={lag_days} expected <= {args.max_lag_days}"
-            )
-            if not lag_ok:
-                failed += 1
-    finally:
-        driver.close()
-
-    if failed:
-        print(f"[SUMMARY] {failed} QSA history gate(s) failed.")
-        return 1
-    print("[SUMMARY] All QSA history gates passed.")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/run_senado_temporal_gates.py b/scripts/run_senado_temporal_gates.py
deleted file mode 100644
index 05004d6..0000000
--- a/scripts/run_senado_temporal_gates.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python3
-"""Run Senado CPI/CPMI temporal gates against Neo4j."""
-
-from __future__ import annotations
-
-import argparse
-import os
-import sys
-from dataclasses import dataclass
-
-from neo4j import GraphDatabase
-
-
-@dataclass(frozen=True)
-class Gate:
-    name: str
-    query: str
-    operator: str  # eq, gt, gte, lte
-    expected: int
-
-
-def _build_gates(unknown_max: int) -> list[Gate]:
-    return [
-        Gate(
-            name="senado_inquiry_count",
-            query="MATCH (i:Inquiry {source: 'senado_cpis'}) RETURN count(i) AS value",
-            operator="gt",
-            expected=3,
-        ),
-        Gate(
-            name="senado_requirements_count",
-            query=(
-                "MATCH (r:InquiryRequirement {source: 'senado_cpis'}) "
-                "RETURN count(r) AS value"
-            ),
-            operator="gt",
-            expected=200,
-        ),
-        Gate(
-            name="senado_sessions_count",
-            query=(
-                "MATCH (s:InquirySession {source: 'senado_cpis'}) "
-                "RETURN count(s) AS value"
-            ),
-            operator="gt",
-            expected=0,
-        ),
-        Gate(
-            name="senado_fallback_rows_count",
-            query=(
-                "MATCH (i:Inquiry {source: 'senado_cpis'}) "
-                "WHERE i.inquiry_id = 'senado-cpmi-inss-2026' "
-                "RETURN count(i) AS value"
-            ),
-            operator="eq",
-            expected=0,
-        ),
-        Gate(
-            name="senado_temporal_invalid_edges_count",
-            query=(
-                "MATCH (i:Inquiry {source: 'senado_cpis'})"
-                "-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->() "
-                "WHERE r.temporal_status = 'invalid' "
-                "RETURN count(r) AS value"
-            ),
-            operator="eq",
-            expected=0,
-        ),
-        Gate(
-            name="senado_temporal_unknown_edges_count",
-            query=(
-                "MATCH (i:Inquiry {source: 'senado_cpis'})"
-                "-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->() "
-                "WHERE r.temporal_status = 'unknown' "
-                "RETURN count(r) AS value"
-            ),
-            operator="lte",
-            expected=unknown_max,
-        ),
-    ]
-
-
-def _passes(operator: str, value: int, expected: int) -> bool:
-    if operator == "eq":
-        return value == expected
-    if operator == "gt":
-        return value > expected
-    if operator == "gte":
-        return value >= expected
-    if operator == "lte":
-        return value <= expected
-    raise ValueError(f"Unsupported operator: {operator}")
-
-
-def _describe(operator: str, expected: int) -> str:
-    if operator == "eq":
-        return f"== {expected}"
-    if operator == "gt":
-        return f"> {expected}"
-    if operator == "gte":
-        return f">= {expected}"
-    if operator == "lte":
-        return f"<= {expected}"
-    return str(expected)
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--uri", required=True, help="Neo4j bolt URI")
-    parser.add_argument("--user", default="neo4j", help="Neo4j username")
-    parser.add_argument("--database", default="neo4j", help="Neo4j database name")
-    parser.add_argument(
-        "--password-env",
-        default="NEO4J_PASSWORD",
-        help="Environment variable containing Neo4j password",
-    )
-    parser.add_argument(
-        "--unknown-max",
-        type=int,
-        default=5000,
-        help="Max allowed unknown temporal edges before failing.",
-    )
-    args = parser.parse_args()
-
-    password = os.getenv(args.password_env, "")
-    if not password:
-        print(f"[ERROR] Missing password in env var: {args.password_env}")
-        return 2
-
-    gates = _build_gates(unknown_max=args.unknown_max)
-    driver = GraphDatabase.driver(args.uri, auth=(args.user, password))
-    failed = 0
-
-    try:
-        with driver.session(database=args.database) as session:
-            for gate in gates:
-                value = int(session.run(gate.query).single()["value"])
-                ok = _passes(gate.operator, value, gate.expected)
-                status = "PASS" if ok else "FAIL"
-                expectation = _describe(gate.operator, gate.expected)
-                print(
-                    f"[{status}] {gate.name}: value={value} expected {expectation}"
-                )
-                if not ok:
-                    failed += 1
-    finally:
-        driver.close()
-
-    if failed:
-        print(f"[SUMMARY] {failed} Senado temporal gate(s) failed.")
-        return 1
-
-    print("[SUMMARY] All Senado temporal gates passed.")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/run_source_completeness_gates.py b/scripts/run_source_completeness_gates.py
deleted file mode 100644
index ec3a53f..0000000
--- a/scripts/run_source_completeness_gates.py
+++ /dev/null
@@ -1,352 +0,0 @@
-#!/usr/bin/env python3
-"""Validate Brazil source registry completeness and code alignment."""
-
-from __future__ import annotations
-
-import argparse
-import csv
-import json
-import re
-from collections import Counter
-from dataclasses import dataclass
-from datetime import UTC, datetime
-from pathlib import Path
-
-REQUIRED_COLUMNS = {
-    "source_id",
-    "name",
-    "category",
-    "tier",
-    "status",
-    "implementation_state",
-    "load_state",
-    "frequency",
-    "in_universe_v1",
-    "primary_url",
-    "pipeline_id",
-    "owner_agent",
-    "access_mode",
-    "public_access_mode",
-    "discovery_status",
-    "last_seen_url",
-    "cadence_expected",
-    "cadence_observed",
-    "quality_status",
-    "notes",
-}
-
-VALID_STATUS = {
-    "loaded",
-    "partial",
-    "stale",
-    "blocked_external",
-    "quality_fail",
-    "not_built",
-}
-VALID_IMPLEMENTATION = {"implemented", "not_implemented"}
-VALID_LOAD_STATE = {"loaded", "partial", "not_loaded"}
-VALID_DISCOVERY_STATUS = {
-    "discovered",
-    "discovered_uningested",
-    "monitored",
-    "unreachable",
-}
-VALID_QUALITY_STATUS = {
-    "loaded",
-    "healthy",
-    "partial",
-    "stale",
-    "blocked_external",
-    "quality_fail",
-    "not_built",
-}
-PIPELINE_ENTRY_RE = re.compile(r'^\s*"([a-z0-9_]+)":\s*[A-Za-z_][A-Za-z0-9_]*,\s*$')
-
-
-@dataclass(frozen=True)
-class GateResult:
-    name: str
-    passed: bool
-    details: str
-
-
-def parse_bool(value: str) -> bool:
-    return value.strip().lower() in {"1", "true", "yes", "y"}
-
-
-def read_registry(path: Path) -> tuple[list[dict[str, str]], list[str]]:
-    if not path.exists():
-        return [], [f"registry file not found: {path}"]
-
-    with path.open(encoding="utf-8", newline="") as csv_file:
-        reader = csv.DictReader(csv_file)
-        missing_cols = REQUIRED_COLUMNS.difference(set(reader.fieldnames or []))
-        if missing_cols:
-            return [], [f"missing required columns: {sorted(missing_cols)}"]
-        rows = list(reader)
-    return rows, []
-
-
-def parse_runner_pipelines(path: Path) -> tuple[set[str], list[str]]:
-    if not path.exists():
-        return set(), [f"runner file not found: {path}"]
-
-    pipelines: set[str] = set()
-    inside_map = False
-    with path.open(encoding="utf-8") as runner_file:
-        for raw_line in runner_file:
-            line = raw_line.rstrip("\n")
-            if line.startswith("PIPELINES: dict[str, type] = {"):
-                inside_map = True
-                continue
-            if inside_map and line.strip() == "}":
-                break
-            if inside_map:
-                match = PIPELINE_ENTRY_RE.match(line)
-                if match:
-                    pipelines.add(match.group(1))
-    if not pipelines:
-        return set(), ["could not parse pipeline ids from runner"]
-    return pipelines, []
-
-
-def build_gate_results(
-    rows: list[dict[str, str]],
-    runner_pipelines: set[str],
-    expected_universe: int,
-    expected_implemented: int,
-) -> tuple[list[GateResult], dict[str, int], dict[str, int]]:
-    source_ids = [row["source_id"].strip() for row in rows]
-    duplicate_ids = [sid for sid, count in Counter(source_ids).items() if count > 1]
-    invalid_status = sorted(
-        {
-            row["status"].strip()
-            for row in rows
-            if row["status"].strip() and row["status"].strip() not in VALID_STATUS
-        }
-    )
-    invalid_implementation = sorted(
-        {
-            row["implementation_state"].strip()
-            for row in rows
-            if row["implementation_state"].strip()
-            and row["implementation_state"].strip() not in VALID_IMPLEMENTATION
-        }
-    )
-    invalid_load_state = sorted(
-        {
-            row["load_state"].strip()
-            for row in rows
-            if row["load_state"].strip() and row["load_state"].strip() not in VALID_LOAD_STATE
-        }
-    )
-    invalid_discovery_status = sorted(
-        {
-            str(row.get("discovery_status") or "").strip()
-            for row in rows
-            if str(row.get("discovery_status") or "").strip()
-            and str(row.get("discovery_status") or "").strip() not in VALID_DISCOVERY_STATUS
-        }
-    )
-    invalid_quality_status = sorted(
-        {
-            str(row.get("quality_status") or "").strip()
-            for row in rows
-            if str(row.get("quality_status") or "").strip()
-            and str(row.get("quality_status") or "").strip() not in VALID_QUALITY_STATUS
-        }
-    )
-
-    universe_rows = [row for row in rows if parse_bool(row["in_universe_v1"])]
-    implemented_rows = [
-        row for row in universe_rows if row["implementation_state"].strip() == "implemented"
-    ]
-    implemented_ids = {row["source_id"].strip() for row in implemented_rows}
-
-    status_counter = Counter(row["status"].strip() for row in universe_rows)
-    implementation_counter = Counter(
-        row["implementation_state"].strip() for row in universe_rows
-    )
-
-    missing_from_registry = sorted(runner_pipelines - implemented_ids)
-    not_in_runner = sorted(implemented_ids - runner_pipelines)
-
-    gates = [
-        GateResult(
-            name="registry_has_no_duplicate_source_ids",
-            passed=not duplicate_ids,
-            details=f"duplicates={duplicate_ids}" if duplicate_ids else "ok",
-        ),
-        GateResult(
-            name="registry_values_are_valid",
-            passed=(
-                not invalid_status
-                and not invalid_implementation
-                and not invalid_load_state
-                and not invalid_discovery_status
-                and not invalid_quality_status
-            ),
-            details=(
-                f"invalid_status={invalid_status}; "
-                f"invalid_implementation={invalid_implementation}; "
-                f"invalid_load_state={invalid_load_state}; "
-                f"invalid_discovery_status={invalid_discovery_status}; "
-                f"invalid_quality_status={invalid_quality_status}"
-            ),
-        ),
-        GateResult(
-            name="universe_v1_count_matches_expected",
-            passed=len(universe_rows) == expected_universe,
-            details=f"actual={len(universe_rows)} expected={expected_universe}",
-        ),
-        GateResult(
-            name="implemented_count_matches_expected",
-            passed=len(implemented_rows) == expected_implemented,
-            details=f"actual={len(implemented_rows)} expected={expected_implemented}",
-        ),
-        GateResult(
-            name="runner_pipelines_are_all_marked_implemented",
-            passed=not missing_from_registry,
-            details=f"missing={missing_from_registry}" if missing_from_registry else "ok",
-        ),
-        GateResult(
-            name="implemented_registry_ids_exist_in_runner",
-            passed=not not_in_runner,
-            details=f"extra={not_in_runner}" if not_in_runner else "ok",
-        ),
-    ]
-    return gates, dict(status_counter), dict(implementation_counter)
-
-
-def write_outputs(
-    output_dir: Path,
-    registry_path: Path,
-    runner_path: Path,
-    expected_universe: int,
-    expected_implemented: int,
-    gates: list[GateResult],
-    status_counter: dict[str, int],
-    implementation_counter: dict[str, int],
-) -> None:
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    payload = {
-        "timestamp_utc": datetime.now(UTC).isoformat(),
-        "registry_path": str(registry_path),
-        "runner_path": str(runner_path),
-        "expected_universe_v1": expected_universe,
-        "expected_implemented": expected_implemented,
-        "status_counter": status_counter,
-        "implementation_counter": implementation_counter,
-        "gates": [gate.__dict__ for gate in gates],
-        "all_passed": all(gate.passed for gate in gates),
-    }
-    (output_dir / "source_completeness_report.json").write_text(
-        json.dumps(payload, indent=2, ensure_ascii=True) + "\n",
-        encoding="utf-8",
-    )
-
-    lines = [
-        "# Source Completeness Gate Report",
-        "",
-        f"- Timestamp (UTC): `{payload['timestamp_utc']}`",
-        f"- Registry: `{registry_path}`",
-        f"- Runner: `{runner_path}`",
-        f"- Expected universe_v1: `{expected_universe}`",
-        f"- Expected implemented: `{expected_implemented}`",
-        "",
-        "## Counters",
-        "",
-        f"- status_counter: `{status_counter}`",
-        f"- implementation_counter: `{implementation_counter}`",
-        "",
-        "## Gate Results",
-        "",
-    ]
-    for gate in gates:
-        mark = "PASS" if gate.passed else "FAIL"
-        lines.append(f"- `{mark}` `{gate.name}`: {gate.details}")
-    lines.append("")
-    lines.append(f"## Final: `{'PASS' if payload['all_passed'] else 'FAIL'}`")
-    lines.append("")
-    (output_dir / "source_completeness_report.md").write_text(
-        "\n".join(lines), encoding="utf-8"
-    )
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description="Validate source registry completeness and code alignment."
-    )
-    parser.add_argument(
-        "--registry-path",
-        default="docs/source_registry_br_v1.csv",
-        help="Path to source registry CSV",
-    )
-    parser.add_argument(
-        "--runner-path",
-        default="etl/src/bracc_etl/runner.py",
-        help="Path to ETL runner with PIPELINES map",
-    )
-    parser.add_argument(
-        "--expected-universe-v1",
-        type=int,
-        default=108,
-        help="Expected count for in_universe_v1=true rows",
-    )
-    parser.add_argument(
-        "--expected-implemented",
-        type=int,
-        default=45,
-        help="Expected count for implementation_state=implemented rows",
-    )
-    parser.add_argument(
-        "--output-dir",
-        default=f"audit-results/brazil-coverage-{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}",
-        help="Directory for gate reports",
-    )
-    args = parser.parse_args()
-
-    registry_path = Path(args.registry_path)
-    runner_path = Path(args.runner_path)
-    output_dir = Path(args.output_dir)
-
-    rows, registry_errors = read_registry(registry_path)
-    runner_pipelines, runner_errors = parse_runner_pipelines(runner_path)
-
-    if registry_errors or runner_errors:
-        output_dir.mkdir(parents=True, exist_ok=True)
-        combined = registry_errors + runner_errors
-        (output_dir / "source_completeness_report.md").write_text(
-            "# Source Completeness Gate Report\n\n"
-            + "\n".join(f"- FAIL: {msg}" for msg in combined)
-            + "\n",
-            encoding="utf-8",
-        )
-        print("\n".join(combined))
-        return 1
-
-    gates, status_counter, implementation_counter = build_gate_results(
-        rows=rows,
-        runner_pipelines=runner_pipelines,
-        expected_universe=args.expected_universe_v1,
-        expected_implemented=args.expected_implemented,
-    )
-    write_outputs(
-        output_dir=output_dir,
-        registry_path=registry_path,
-        runner_path=runner_path,
-        expected_universe=args.expected_universe_v1,
-        expected_implemented=args.expected_implemented,
-        gates=gates,
-        status_counter=status_counter,
-        implementation_counter=implementation_counter,
-    )
-
-    all_passed = all(gate.passed for gate in gates)
-    print("PASS" if all_passed else "FAIL")
-    return 0 if all_passed else 1
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/scripts/run_temporal_gates.py b/scripts/run_temporal_gates.py
deleted file mode 100644
index f8b5147..0000000
--- a/scripts/run_temporal_gates.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/env python3
-"""Run global temporal integrity gates plus source-specific gate packs."""
-
-from __future__ import annotations
-
-import argparse
-import os
-import subprocess
-import sys
-from dataclasses import dataclass
-
-from neo4j import GraphDatabase
-
-
-@dataclass(frozen=True)
-class Gate:
-    name: str
-    query: str
-    operator: str
-    expected: int
-
-
-GLOBAL_GATES: list[Gate] = [
-    Gate(
-        name="global_temporal_invalid_edges_count",
-        query=(
-            "MATCH ()-[r]->() "
-            "WHERE r.temporal_status = 'invalid' "
-            "RETURN count(r) AS value"
-        ),
-        operator="eq",
-        expected=0,
-    ),
-    Gate(
-        name="global_temporal_unknown_edges_count",
-        query=(
-            "MATCH ()-[r]->() "
-            "WHERE r.temporal_status = 'unknown' "
-            "RETURN count(r) AS value"
-        ),
-        operator="lte",
-        expected=100000,
-    ),
-]
-
-
-def _passes(operator: str, value: int, expected: int) -> bool:
-    if operator == "eq":
-        return value == expected
-    if operator == "gt":
-        return value > expected
-    if operator == "gte":
-        return value >= expected
-    if operator == "lt":
-        return value < expected
-    if operator == "lte":
-        return value <= expected
-    raise ValueError(f"Unsupported operator: {operator}")
-
-
-def _describe(operator: str, expected: int) -> str:
-    if operator == "eq":
-        return f"== {expected}"
-    if operator == "gt":
-        return f"> {expected}"
-    if operator == "gte":
-        return f">= {expected}"
-    if operator == "lt":
-        return f"< {expected}"
-    if operator == "lte":
-        return f"<= {expected}"
-    return str(expected)
-
-
-def _run_global(uri: str, user: str, database: str, password: str, unknown_max: int) -> int:
-    gates = [
-        gate if gate.name != "global_temporal_unknown_edges_count"
-        else Gate(
-            name=gate.name,
-            query=gate.query,
-            operator=gate.operator,
-            expected=unknown_max,
-        )
-        for gate in GLOBAL_GATES
-    ]
-    driver = GraphDatabase.driver(uri, auth=(user, password))
-    failed = 0
-    try:
-        with driver.session(database=database) as session:
-            for gate in gates:
-                value = int(session.run(gate.query).single()["value"])
-                ok = _passes(gate.operator, value, gate.expected)
-                status = "PASS" if ok else "FAIL"
-                expectation = _describe(gate.operator, gate.expected)
-                print(
-                    f"[{status}] {gate.name}: value={value} expected {expectation}"
-                )
-                if not ok:
-                    failed += 1
-    finally:
-        driver.close()
-    return failed
-
-
-def _run_senado_subgate(
-    uri: str,
-    user: str,
-    database: str,
-    password_env: str,
-    unknown_max: int,
-) -> int:
-    cmd = [
-        "python3",
-        "scripts/run_senado_temporal_gates.py",
-        "--uri",
-        uri,
-        "--user",
-        user,
-        "--database",
-        database,
-        "--password-env",
-        password_env,
-        "--unknown-max",
-        str(unknown_max),
-    ]
-    result = subprocess.run(cmd, check=False)
-    return result.returncode
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--uri", required=True, help="Neo4j bolt URI")
-    parser.add_argument("--user", default="neo4j", help="Neo4j username")
-    parser.add_argument("--database", default="neo4j", help="Neo4j database name")
-    parser.add_argument(
-        "--password-env",
-        default="NEO4J_PASSWORD",
-        help="Environment variable containing Neo4j password",
-    )
-    parser.add_argument(
-        "--global-unknown-max",
-        type=int,
-        default=100000,
-        help="Max allowed unknown temporal edges globally.",
-    )
-    parser.add_argument(
-        "--senado-unknown-max",
-        type=int,
-        default=5000,
-        help="Max allowed unknown temporal edges for Senado-specific gate pack.",
-    )
-    parser.add_argument(
-        "--skip-senado",
-        action="store_true",
-        help="Run only global temporal gates.",
-    )
-    args = parser.parse_args()
-
-    password = os.getenv(args.password_env, "")
-    if not password:
-        print(f"[ERROR] Missing password in env var: {args.password_env}")
-        return 2
-
-    failed = _run_global(
-        uri=args.uri,
-        user=args.user,
-        database=args.database,
-        password=password,
-        unknown_max=args.global_unknown_max,
-    )
-    if not args.skip_senado:
-        senado_rc = _run_senado_subgate(
-            uri=args.uri,
-            user=args.user,
-            database=args.database,
-            password_env=args.password_env,
-            unknown_max=args.senado_unknown_max,
-        )
-        if senado_rc != 0:
-            failed += 1
-
-    if failed:
-        print(f"[SUMMARY] Temporal gates failed ({failed} failing block(s)).")
-        return 1
-    print("[SUMMARY] Temporal gates passed.")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/senado_temporal_gates.cypher b/scripts/senado_temporal_gates.cypher
deleted file mode 100644
index 3eec04d..0000000
--- a/scripts/senado_temporal_gates.cypher
+++ /dev/null
@@ -1,25 +0,0 @@
-// Senado CPI/CPMI temporal gates
-// Promotion blockers:
-// - senado_temporal_invalid_edges_count must be 0
-// - senado_fallback_rows_count must be 0
-
-MATCH (i:Inquiry {source: 'senado_cpis'})
-RETURN count(i) AS senado_inquiry_count;
-
-MATCH (r:InquiryRequirement {source: 'senado_cpis'})
-RETURN count(r) AS senado_requirements_count;
-
-MATCH (s:InquirySession {source: 'senado_cpis'})
-RETURN count(s) AS senado_sessions_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})
-WHERE i.inquiry_id = 'senado-cpmi-inss-2026'
-RETURN count(i) AS senado_fallback_rows_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->()
-WHERE r.temporal_status = 'invalid'
-RETURN count(r) AS senado_temporal_invalid_edges_count;
-
-MATCH (i:Inquiry {source: 'senado_cpis'})-[r:TEM_REQUERIMENTO|REALIZOU_SESSAO]->()
-WHERE r.temporal_status = 'unknown'
-RETURN count(r) AS senado_temporal_unknown_edges_count;
diff --git a/scripts/storage_capacity_report.sh b/scripts/storage_capacity_report.sh
deleted file mode 100755
index 314748b..0000000
--- a/scripts/storage_capacity_report.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# Daily storage report for operational capacity planning.
-# Writes markdown report + appends machine-readable history CSV.
-
-REPORT_PATH="${1:-/opt/bracc/audit-results/storage_weekly_capacity.md}"
-HISTORY_CSV="${2:-/opt/bracc/audit-results/storage-capacity/history.csv}"
-NOW_UTC="$(date -u +'%Y-%m-%dT%H:%M:%SZ')"
-NOW_DATE="$(date -u +'%Y-%m-%d')"
-
-mkdir -p "$(dirname "$REPORT_PATH")" "$(dirname "$HISTORY_CSV")"
-
-root_total_gb=$(df -BG --output=size / | tail -n 1 | tr -dc '0-9')
-root_used_gb=$(df -BG --output=used / | tail -n 1 | tr -dc '0-9')
-root_avail_gb=$(df -BG --output=avail / | tail -n 1 | tr -dc '0-9')
-root_used_pct=$(df -P / | awk 'NR==2{print $5}')
-
-data_total_gb=$(df -BG --output=size /data | tail -n 1 | tr -dc '0-9')
-data_used_gb=$(df -BG --output=used /data | tail -n 1 | tr -dc '0-9')
-data_avail_gb=$(df -BG --output=avail /data | tail -n 1 | tr -dc '0-9')
-data_used_pct=$(df -P /data | awk 'NR==2{print $5}')
-
-if [[ ! -f "$HISTORY_CSV" ]]; then
-  echo "date,root_used_gb,data_used_gb" > "$HISTORY_CSV"
-fi
-echo "${NOW_DATE},${root_used_gb},${data_used_gb}" >> "$HISTORY_CSV"
-
-prev_line="$(tail -n 2 "$HISTORY_CSV" | head -n 1)"
-growth_data_per_day="n/a"
-days_to_90_data="n/a"
-if [[ -n "$prev_line" && "$prev_line" != date,* ]]; then
-  prev_data_used="$(echo "$prev_line" | awk -F',' '{print $3}')"
-  if [[ "$prev_data_used" =~ ^[0-9]+$ ]]; then
-    delta=$((data_used_gb - prev_data_used))
-    growth_data_per_day="${delta}G/day"
-    target_90=$(( (data_total_gb * 90) / 100 ))
-    if (( delta > 0 )); then
-      remaining=$((target_90 - data_used_gb))
-      if (( remaining > 0 )); then
-        days_to_90_data="$((remaining / delta)) days"
-      else
-        days_to_90_data="at_or_above_90%"
-      fi
-    fi
-  fi
-fi
-
-{
-  echo "# Storage Weekly Capacity"
-  echo
-  echo "- generated_at_utc: ${NOW_UTC}"
-  echo "- policy_thresholds: /data free >= 80G, / free >= 15G"
-  echo
-  echo "## Filesystem Summary"
-  echo
-  echo "| mount | total_gb | used_gb | avail_gb | used_pct |"
-  echo "|---|---:|---:|---:|---:|"
-  echo "| / | ${root_total_gb} | ${root_used_gb} | ${root_avail_gb} | ${root_used_pct} |"
-  echo "| /data | ${data_total_gb} | ${data_used_gb} | ${data_avail_gb} | ${data_used_pct} |"
-  echo
-  echo "## Trend"
-  echo
-  echo "- /data growth: ${growth_data_per_day}"
-  echo "- estimate to 90% /data: ${days_to_90_data}"
-  echo
-  echo "## Top Directories (/data)"
-  echo
-  (sudo du -x -h --max-depth=1 /data 2>/dev/null | sort -h | tail -n 15) || true
-  echo
-  echo "## Top Directories (/opt/bracc)"
-  echo
-  (du -x -h --max-depth=1 /opt/bracc 2>/dev/null | sort -h | tail -n 20) || true
-} > "$REPORT_PATH"
-
-echo "report_path=$REPORT_PATH"
-echo "history_path=$HISTORY_CSV"