From edaa997011da2ce9f3695232ec3ded319fa0f6d1 Mon Sep 17 00:00:00 2001 From: Samah Naji Date: Wed, 11 Mar 2026 15:26:45 -0400 Subject: [PATCH 1/3] issue pour init db --- .env.docker | 2 +- Dockerfile.api | 2 +- Dockerfile.app | 2 +- docker/mssql-init.sh | 34 ++- migrations/v2.1.0_to_v2.1.1_auth_mssql.sql | 42 ++++ schema_dictionary/tables/UserAccount.yaml | 52 +++++ scripts/init_db.py | 256 ++++++--------------- sql/init.sql | 10 +- 8 files changed, 210 insertions(+), 190 deletions(-) create mode 100644 migrations/v2.1.0_to_v2.1.1_auth_mssql.sql create mode 100644 schema_dictionary/tables/UserAccount.yaml diff --git a/.env.docker b/.env.docker index 3f87740..07ec785 100644 --- a/.env.docker +++ b/.env.docker @@ -3,4 +3,4 @@ DB_PORT=1433 DB_NAME=open_dateaubase DB_USER=SA DB_PASSWORD=StrongPwd123! - +DB_DRIVER=ODBC Driver 18 for SQL Server diff --git a/Dockerfile.api b/Dockerfile.api index 0a56cc0..20ea8ea 100644 --- a/Dockerfile.api +++ b/Dockerfile.api @@ -25,7 +25,7 @@ RUN curl -fsSL https://packages.microsoft.com/keys/microsoft.asc \ RUN pip install --no-cache-dir uv # Copy dependency files first for layer caching -COPY pyproject.toml uv.lock ./ +COPY pyproject.toml ./ COPY src/ ./src/ # Install API dependencies (fastapi, uvicorn, pyodbc, python-dotenv, local package) diff --git a/Dockerfile.app b/Dockerfile.app index 9864f49..af6bc8a 100644 --- a/Dockerfile.app +++ b/Dockerfile.app @@ -14,7 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN pip install --no-cache-dir uv # Copy dependency files first for layer caching -COPY pyproject.toml uv.lock ./ +COPY pyproject.toml ./ COPY src/ ./src/ # Install app dependencies (streamlit, httpx, plotly — no pyodbc) diff --git a/docker/mssql-init.sh b/docker/mssql-init.sh index 03bb156..d67563d 100755 --- a/docker/mssql-init.sh +++ b/docker/mssql-init.sh @@ -5,16 +5,42 @@ set -e /opt/mssql/bin/sqlservr & SQL_PID=$! +SQLCMD="" + +for candidate in \ + /opt/mssql-tools/bin/sqlcmd \ + /opt/mssql-tools18/bin/sqlcmd \ + /usr/bin/sqlcmd \ + /usr/local/bin/sqlcmd +do + if [ -x "$candidate" ]; then + SQLCMD="$candidate" + break + fi +done + +if [ -z "$SQLCMD" ]; then + echo "ERROR: sqlcmd not found in container." + echo "Looked in:" + echo " /opt/mssql-tools/bin/sqlcmd" + echo " /opt/mssql-tools18/bin/sqlcmd" + echo " /usr/bin/sqlcmd" + echo " /usr/local/bin/sqlcmd" + wait $SQL_PID + exit 1 +fi + +echo "Using sqlcmd at: $SQLCMD" echo "Waiting for SQL Server to be ready..." + for i in $(seq 1 60); do - /opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "$MSSQL_SA_PASSWORD" \ - -Q "SELECT 1" > /dev/null 2>&1 && break + "$SQLCMD" -S localhost -U SA -P "$MSSQL_SA_PASSWORD" -Q "SELECT 1" > /dev/null 2>&1 && break sleep 2 done echo "Running init.sql..." -/opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "$MSSQL_SA_PASSWORD" \ +"$SQLCMD" -S localhost -U SA -P "$MSSQL_SA_PASSWORD" \ -b -V 16 -r 1 -i /sql/init.sql echo "Database initialized." -wait $SQL_PID +wait $SQL_PID \ No newline at end of file diff --git a/migrations/v2.1.0_to_v2.1.1_auth_mssql.sql b/migrations/v2.1.0_to_v2.1.1_auth_mssql.sql new file mode 100644 index 0000000..6781a5a --- /dev/null +++ b/migrations/v2.1.0_to_v2.1.1_auth_mssql.sql @@ -0,0 +1,42 @@ +SET NOCOUNT ON; +SET XACT_ABORT ON; +SET QUOTED_IDENTIFIER ON; +GO + +IF OBJECT_ID('dbo.UserAccount') IS NOT NULL + RAISERROR('UserAccount table already exists.', 16, 1); +GO + +BEGIN TRANSACTION; +GO + +CREATE TABLE [dbo].[UserAccount] ( + [UserAccount_ID] INT IDENTITY(1,1) NOT NULL, + [Email] NVARCHAR(255) NOT NULL, + [FullName] NVARCHAR(255) NOT NULL, + [PasswordHash] NVARCHAR(255) NOT NULL, + [IsActive] BIT NOT NULL CONSTRAINT [DF_UserAccount_IsActive] DEFAULT 1, + [IsVerified] BIT NOT NULL CONSTRAINT [DF_UserAccount_IsVerified] DEFAULT 1, + [CreatedAt] DATETIME2(7) NOT NULL CONSTRAINT [DF_UserAccount_CreatedAt] DEFAULT SYSUTCDATETIME(), + [UpdatedAt] DATETIME2(7) NOT NULL CONSTRAINT [DF_UserAccount_UpdatedAt] DEFAULT SYSUTCDATETIME(), + CONSTRAINT [PK_UserAccount] PRIMARY KEY ([UserAccount_ID]), + CONSTRAINT [UQ_UserAccount_Email] UNIQUE ([Email]) +); +GO + +CREATE INDEX [IX_UserAccount_Email] ON [dbo].[UserAccount] ([Email]); +GO + +INSERT INTO [dbo].[SchemaVersion] ([Version], [AppliedDateTime], [Description], [MigrationScript]) +VALUES ( + '2.1.1', + SYSUTCDATETIME(), + 'Add UserAccount table for application authentication', + 'v2.1.0_to_v2.1.1_auth_mssql.sql' +); +GO + +COMMIT TRANSACTION; +GO + +PRINT 'Migration to v2.1.1 completed successfully.'; \ No newline at end of file diff --git a/schema_dictionary/tables/UserAccount.yaml b/schema_dictionary/tables/UserAccount.yaml new file mode 100644 index 0000000..b4dcc07 --- /dev/null +++ b/schema_dictionary/tables/UserAccount.yaml @@ -0,0 +1,52 @@ +_format_version: "1.0" +table: + name: UserAccount + schema: dbo + description: "Stores authentication accounts used to access the application." + + columns: + - name: UserAccount_ID + logical_type: integer + nullable: false + identity: true + description: "Surrogate primary key for the user account" + + - name: Email + logical_type: string + max_length: 255 + nullable: false + description: "Unique e-mail address used to sign in" + + - name: FullName + logical_type: string + max_length: 255 + nullable: false + description: "Full name of the user" + + - name: PasswordHash + logical_type: string + max_length: 255 + nullable: false + description: "Hashed password for authentication" + + - name: IsActive + logical_type: boolean + nullable: false + description: "Indicates whether the account is active" + + - name: IsVerified + logical_type: boolean + nullable: false + description: "Indicates whether the account has been verified" + + - name: CreatedAt + logical_type: datetime + nullable: false + description: "UTC timestamp when the account was created" + + - name: UpdatedAt + logical_type: datetime + nullable: false + description: "UTC timestamp when the account was last updated" + + primary_key: [UserAccount_ID] \ No newline at end of file diff --git a/scripts/init_db.py b/scripts/init_db.py index e8dd6be..09ff9b0 100644 --- a/scripts/init_db.py +++ b/scripts/init_db.py @@ -1,202 +1,98 @@ -#!/usr/bin/env python3 -"""Initialize the open_dateaubase schema and seed data. - -Applies the v1.0.0 baseline and the consolidated v1.0.0→v2.1.0 migration, -then seeds test data. Safe to re-run: skips steps already recorded in -SchemaVersion. - -Usage ------ - uv run python scripts/init_db.py [--server localhost,14330] - -Environment variables (override defaults) ------------------------------------------- - DB_SERVER e.g. "localhost,14330" (default: localhost,14330) - DB_NAME e.g. "open_dateaubase" (default: open_dateaubase) - DB_USER e.g. "SA" (default: SA) - DB_PASSWORD e.g. "StrongPwd123!" (default: StrongPwd123!) -""" - from __future__ import annotations -import os import re import sys import time from pathlib import Path -ROOT = Path(__file__).parent.parent - -# --------------------------------------------------------------------------- -# Connection helpers -# --------------------------------------------------------------------------- - -_SERVER = os.getenv("DB_SERVER", "localhost,14330") -_DB = os.getenv("DB_NAME", "open_dateaubase") -_USER = os.getenv("DB_USER", "SA") -_PWD = os.getenv("DB_PASSWORD", "StrongPwd123!") - -_MASTER_DSN = ( - f"DRIVER={{ODBC Driver 18 for SQL Server}};" - f"SERVER={_SERVER};" - f"DATABASE=master;" - f"UID={_USER};" - f"PWD={_PWD};" - f"TrustServerCertificate=yes;" -) - -_APP_DSN = ( - f"DRIVER={{ODBC Driver 18 for SQL Server}};" - f"SERVER={_SERVER};" - f"DATABASE={_DB};" - f"UID={_USER};" - f"PWD={_PWD};" - f"TrustServerCertificate=yes;" -) - - -def _connect(dsn: str, retries: int = 30, delay: float = 2.0): - """Connect with retries (SQL Server takes a few seconds to start).""" - import pyodbc - - last_exc: Exception | None = None - for attempt in range(1, retries + 1): - try: - conn = pyodbc.connect(dsn, timeout=5, autocommit=False) - return conn - except Exception as exc: - last_exc = exc - if attempt < retries: - print(f" waiting for SQL Server ({attempt}/{retries})…", flush=True) - time.sleep(delay) - raise RuntimeError(f"Cannot connect after {retries} attempts") from last_exc - - -def _run_sql_file(path: Path, conn) -> None: - """Execute a .sql file against *conn*, splitting on GO.""" - sql = path.read_text(encoding="utf-8") - batches = re.split(r"^\s*GO\s*$", sql, flags=re.MULTILINE | re.IGNORECASE) - cursor = conn.cursor() - for batch in batches: - batch = batch.strip() - if batch: - cursor.execute(batch) - conn.commit() +import pyodbc +ROOT = Path("/workspace") +INIT_FILE = ROOT / "sql" / "init.sql" -# --------------------------------------------------------------------------- -# Migration steps — (label, migration_file, seed_file | None) -# SchemaVersion is created by the v2.1.0 migration, so we check for it -# after applying that step. -# --------------------------------------------------------------------------- +DB_HOST = "db" +DB_PORT = 1433 +DB_NAME = "master" +DB_USER = "SA" +DB_PASSWORD = "StrongPwd123!" +DB_DRIVER = "ODBC Driver 18 for SQL Server" -_BASELINE = ROOT / "migrations" / "v1.0.0_create_mssql.sql" -_MIGRATION = ROOT / "migrations" / "v1.0.0_to_v2.1.0_mssql.sql" -_SEED = ROOT / "sql" / "seed_v2.1.0.sql" - -TARGET_VERSION = "2.1.0" +def get_connection(database: str = DB_NAME) -> pyodbc.Connection: + conn_str = ( + f"DRIVER={{{DB_DRIVER}}};" + f"SERVER={DB_HOST},{DB_PORT};" + f"DATABASE={database};" + f"UID={DB_USER};" + f"PWD={DB_PASSWORD};" + "Encrypt=no;" + "TrustServerCertificate=yes;" + ) + return pyodbc.connect(conn_str, autocommit=True) -# --------------------------------------------------------------------------- -# Main logic -# --------------------------------------------------------------------------- -def main() -> None: - try: - import pyodbc # noqa: F401 - except ImportError: - print("ERROR: pyodbc not installed. Run: uv sync --extra db", file=sys.stderr) - sys.exit(1) - - # ------------------------------------------------------------------ - # 1. Ensure the database exists (connect to master) - # ------------------------------------------------------------------ - print(f"Connecting to SQL Server at {_SERVER}…") - master_conn = _connect(_MASTER_DSN) - master_conn.autocommit = True - cursor = master_conn.cursor() - cursor.execute( - f"IF DB_ID(N'{_DB}') IS NULL CREATE DATABASE [{_DB}];" - ) - master_conn.close() - print(f"Database '{_DB}' is ready.") - - # ------------------------------------------------------------------ - # 2. Connect to the application database - # ------------------------------------------------------------------ - conn = _connect(_APP_DSN) - cursor = conn.cursor() - - # ------------------------------------------------------------------ - # 3. Check current schema version - # ------------------------------------------------------------------ - applied: set[str] = set() - try: - cursor.execute("SELECT [Version] FROM [dbo].[SchemaVersion]") - applied = {row[0] for row in cursor.fetchall()} - print(f"Already applied: {sorted(applied)}") - except Exception: - print("SchemaVersion table not found; applying from scratch.") - conn.commit() - - if TARGET_VERSION in applied: - print(f"Schema is already at v{TARGET_VERSION}. Nothing to do.") - else: - # ------------------------------------------------------------------ - # 4a. Apply v1.0.0 baseline if SchemaVersion not yet present - # (means we're starting from an empty DB) - # ------------------------------------------------------------------ - if not applied: - print(f" apply baseline v1.0.0…") - if _BASELINE.exists(): - _run_sql_file(_BASELINE, conn) - print(f" baseline done.") - else: - print(f" ERROR: baseline file not found: {_BASELINE}", file=sys.stderr) - sys.exit(1) - - # ------------------------------------------------------------------ - # 4b. Apply the consolidated v1.0.0 → v2.1.0 migration - # ------------------------------------------------------------------ - print(f" apply migration v1.0.0 → v{TARGET_VERSION}…") - if _MIGRATION.exists(): - _run_sql_file(_MIGRATION, conn) - print(f" migration done.") - else: - print(f" ERROR: migration file not found: {_MIGRATION}", file=sys.stderr) - sys.exit(1) - - # ------------------------------------------------------------------ - # 4c. Apply seed data - # ------------------------------------------------------------------ - print(f" apply seed data…") - if _SEED.exists(): - _run_sql_file(_SEED, conn) - print(f" seed done.") +def wait_for_db(max_attempts: int = 60, delay: int = 2) -> None: + for attempt in range(1, max_attempts + 1): + try: + with get_connection(): + print(f"Database ready after {attempt} attempt(s).") + return + except Exception as e: + print(f"Waiting for DB... ({attempt}/{max_attempts}) -> {e}") + time.sleep(delay) + + raise RuntimeError("Database did not become ready in time.") + + +def expand_includes(file_path: Path) -> str: + lines: list[str] = [] + for raw_line in file_path.read_text(encoding="utf-8").splitlines(): + stripped = raw_line.strip() + if stripped.lower().startswith(":r "): + include_path = stripped[3:].strip() + include_file = (ROOT / include_path.lstrip("/")).resolve() + if not include_file.exists(): + raise FileNotFoundError(f"Included SQL file not found: {include_file}") + lines.append(expand_includes(include_file)) else: - print(f" WARNING: seed file not found: {_SEED}") + lines.append(raw_line) + return "\n".join(lines) + + +def split_batches(sql_text: str) -> list[str]: + parts = re.split(r"(?im)^\s*GO\s*;?\s*$", sql_text) + return [part.strip() for part in parts if part.strip()] + - # ------------------------------------------------------------------ - # 5. Report final state - # ------------------------------------------------------------------ - cursor = conn.cursor() - cursor.execute("SELECT [Version], [AppliedAt], [Description] FROM [dbo].[SchemaVersion] ORDER BY [AppliedAt]") - versions = [(r[0], r[1], r[2]) for r in cursor.fetchall()] +def execute_batches(sql_text: str) -> None: + with get_connection("master") as conn: + cursor = conn.cursor() + batches = split_batches(sql_text) - cursor.execute("SELECT COUNT(*) FROM [dbo].[Channel]") - n_channel = cursor.fetchone()[0] + for i, batch in enumerate(batches, start=1): + print(f"Executing batch {i}/{len(batches)}...") + cursor.execute(batch) + + cursor.close() + + +def main() -> int: + try: + print("Waiting for SQL Server...") + wait_for_db() - cursor.execute("SELECT COUNT(*) FROM [dbo].[Value]") - n_val = cursor.fetchone()[0] + print(f"Loading SQL from {INIT_FILE}") + sql_text = expand_includes(INIT_FILE) - conn.close() + print("Executing init.sql with expanded includes...") + execute_batches(sql_text) - print(f"\nSchema versions applied:") - for v, applied_at, desc in versions: - print(f" {v} ({applied_at}) {desc}") - print(f"\nChannel rows: {n_channel} | Value rows: {n_val}") - print("Database initialised successfully.") + print("Database initialization completed successfully.") + return 0 + except Exception as e: + print(f"Database initialization failed: {e}", file=sys.stderr) + return 1 if __name__ == "__main__": - main() + raise SystemExit(main()) \ No newline at end of file diff --git a/sql/init.sql b/sql/init.sql index 8c3238d..b67d069 100644 --- a/sql/init.sql +++ b/sql/init.sql @@ -15,11 +15,15 @@ GO :r /migrations/v1.0.0_to_v2.1.0_mssql.sql GO --- Step 3: Load test seed data for the Quebec City monitoring scenario +-- Step 3: Apply authentication migration to v2.1.1 +:r /migrations/v2.1.0_to_v2.1.1_auth_mssql.sql +GO + +-- Step 4: Load test seed data for the Quebec City monitoring scenario :r /sql/seed_v2.1.0.sql GO --- Step 4: Load Explore page demonstration data (Feb 2026, all four value types) +-- Step 5: Load Explore page demonstration data (Feb 2026, all four value types) :r /sql/seed_explore.sql GO @@ -30,4 +34,4 @@ SELECT 'value' AS t, COUNT(*) AS n FROM dbo.[Value]; SELECT 'valuevector' AS t, COUNT(*) AS n FROM dbo.ValueVector; SELECT 'valuematrix' AS t, COUNT(*) AS n FROM dbo.ValueMatrix; SELECT 'valueimage' AS t, COUNT(*) AS n FROM dbo.ValueImage; -GO +GO \ No newline at end of file From 6e97916a7cf21ac93ad060cff023d6da1f57f63d Mon Sep 17 00:00:00 2001 From: Samah Naji Date: Thu, 12 Mar 2026 02:50:36 -0400 Subject: [PATCH 2/3] set up login, funnel to access the app --- api/v1/endpoints/auth.py | 68 +++++++++ api/v1/repositories/auth_repository.py | 123 +++++++++++++++++ api/v1/router.py | 2 + api/v1/schemas/auth.py | 34 +++++ api/v1/services/auth_service.py | 184 +++++++++++++++++++++++++ app/Home.py | 137 +++++++++++------- app/api_client.py | 49 ++++++- app/auth.py | 124 +++++++++++++---- app/pages/10_Lab_Ingest.py | 6 - app/pages/1_Sites.py | 6 - app/pages/2_Equipment.py | 6 - app/pages/3_Campaigns.py | 6 - app/pages/4_Channels.py | 6 - app/pages/5_Annotations.py | 6 - app/pages/5_Units.py | 6 - app/pages/6_Equipment_Models.py | 6 - app/pages/7_Parameters.py | 6 - app/pages/8_Binning_Axes.py | 7 - app/pages/9_Sensor_Ingest.py | 6 - docker-compose.yml | 16 ++- 20 files changed, 660 insertions(+), 144 deletions(-) create mode 100644 api/v1/endpoints/auth.py create mode 100644 api/v1/repositories/auth_repository.py create mode 100644 api/v1/schemas/auth.py create mode 100644 api/v1/services/auth_service.py diff --git a/api/v1/endpoints/auth.py b/api/v1/endpoints/auth.py new file mode 100644 index 0000000..62543af --- /dev/null +++ b/api/v1/endpoints/auth.py @@ -0,0 +1,68 @@ +"""Authentication API endpoints.""" + +from __future__ import annotations + +from fastapi import APIRouter, Depends, Header, HTTPException, status + +from api.database import get_db +from ..repositories.auth_repository import AuthRepository +from ..schemas.auth import AuthResponse, LoginRequest, SignupRequest, UserOut +from ..services.auth_service import AuthService + +router = APIRouter() + + +def get_auth_repo(conn=Depends(get_db)): + """Dependency to get auth repository.""" + return AuthRepository(conn) + + +def get_auth_service(repo=Depends(get_auth_repo)): + """Dependency to get auth service.""" + return AuthService(repo) + + +def get_current_user( + authorization: str | None = Header(default=None), + service: AuthService = Depends(get_auth_service), +): + """Extract and validate bearer token from Authorization header.""" + if authorization is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Missing Authorization header.", + ) + + scheme, _, token = authorization.partition(" ") + if scheme.lower() != "bearer" or not token: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid Authorization header.", + ) + + return service.get_current_user_from_token(token) + + +@router.post("/signup", response_model=AuthResponse) +def signup(payload: SignupRequest, service=Depends(get_auth_service)): + """Create a new user account and return a bearer token.""" + return service.signup( + email=payload.email, + full_name=payload.full_name, + password=payload.password, + ) + + +@router.post("/login", response_model=AuthResponse) +def login(payload: LoginRequest, service=Depends(get_auth_service)): + """Authenticate a user and return a bearer token.""" + return service.login( + email=payload.email, + password=payload.password, + ) + + +@router.get("/me", response_model=UserOut) +def me(current_user=Depends(get_current_user)): + """Return the currently authenticated user.""" + return current_user \ No newline at end of file diff --git a/api/v1/repositories/auth_repository.py b/api/v1/repositories/auth_repository.py new file mode 100644 index 0000000..5274c89 --- /dev/null +++ b/api/v1/repositories/auth_repository.py @@ -0,0 +1,123 @@ +"""Repository for authentication queries.""" + +from __future__ import annotations + +from typing import Optional + +import pyodbc + + +class AuthRepository: + """Repository for authentication-related database access.""" + + def __init__(self, conn: pyodbc.Connection): + self.conn = conn + + def get_user_by_email(self, email: str) -> Optional[dict]: + cursor = self.conn.cursor() + cursor.execute( + """ + SELECT + [UserAccount_ID], + [Email], + [FullName], + [PasswordHash], + [IsActive], + [IsVerified], + [CreatedAt], + [UpdatedAt] + FROM dbo.[UserAccount] + WHERE [Email] = ? + """, + email, + ) + row = cursor.fetchone() + cursor.close() + + if not row: + return None + + return { + "user_id": row[0], + "email": row[1], + "full_name": row[2], + "password_hash": row[3], + "is_active": bool(row[4]), + "is_verified": bool(row[5]), + "created_at": row[6], + "updated_at": row[7], + } + + def get_user_by_id(self, user_id: int) -> Optional[dict]: + cursor = self.conn.cursor() + cursor.execute( + """ + SELECT + [UserAccount_ID], + [Email], + [FullName], + [PasswordHash], + [IsActive], + [IsVerified], + [CreatedAt], + [UpdatedAt] + FROM dbo.[UserAccount] + WHERE [UserAccount_ID] = ? + """, + user_id, + ) + row = cursor.fetchone() + cursor.close() + + if not row: + return None + + return { + "user_id": row[0], + "email": row[1], + "full_name": row[2], + "password_hash": row[3], + "is_active": bool(row[4]), + "is_verified": bool(row[5]), + "created_at": row[6], + "updated_at": row[7], + } + + def create_user(self, email: str, full_name: str, password_hash: str) -> dict: + cursor = self.conn.cursor() + cursor.execute( + """ + INSERT INTO dbo.[UserAccount] ( + [Email], + [FullName], + [PasswordHash], + [IsActive], + [IsVerified] + ) + OUTPUT + INSERTED.[UserAccount_ID], + INSERTED.[Email], + INSERTED.[FullName], + INSERTED.[IsActive], + INSERTED.[IsVerified], + INSERTED.[CreatedAt], + INSERTED.[UpdatedAt] + VALUES (?, ?, ?, 1, 1) + """, + email, + full_name, + password_hash, + ) + row = cursor.fetchone() + self.conn.commit() + cursor.close() + + return { + "user_id": row[0], + "email": row[1], + "full_name": row[2], + "is_active": bool(row[3]), + "is_verified": bool(row[4]), + "created_at": row[5], + "updated_at": row[6], + } \ No newline at end of file diff --git a/api/v1/router.py b/api/v1/router.py index 07c8e8f..fc1e9a0 100644 --- a/api/v1/router.py +++ b/api/v1/router.py @@ -3,6 +3,7 @@ from __future__ import annotations from fastapi import APIRouter +from .endpoints.auth import router as auth_router from .endpoints.health import router as health_router from .endpoints.sites import router as sites_router @@ -23,6 +24,7 @@ router = APIRouter() +router.include_router(auth_router, prefix="/auth", tags=["auth"]) router.include_router(health_router, tags=["health"]) router.include_router(sites_router, prefix="/sites", tags=["sites"]) router.include_router(channels_router, prefix="/channels", tags=["channels"]) diff --git a/api/v1/schemas/auth.py b/api/v1/schemas/auth.py new file mode 100644 index 0000000..134875e --- /dev/null +++ b/api/v1/schemas/auth.py @@ -0,0 +1,34 @@ +"""Pydantic schemas for authentication.""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, Field + + +class SignupRequest(BaseModel): + email: str + full_name: str = Field(min_length=1, max_length=255) + password: str = Field(min_length=8, max_length=255) + + +class LoginRequest(BaseModel): + email: str + password: str = Field(min_length=1, max_length=255) + + +class UserOut(BaseModel): + user_id: int + email: str + full_name: str + is_active: bool + is_verified: bool + created_at: datetime + updated_at: datetime + + +class AuthResponse(BaseModel): + access_token: str + token_type: str = "bearer" + user: UserOut \ No newline at end of file diff --git a/api/v1/services/auth_service.py b/api/v1/services/auth_service.py new file mode 100644 index 0000000..56973e9 --- /dev/null +++ b/api/v1/services/auth_service.py @@ -0,0 +1,184 @@ +"""Service layer for authentication.""" + +from __future__ import annotations + +import base64 +import hashlib +import hmac +import json +import os +import secrets +from datetime import UTC, datetime, timedelta + +from fastapi import HTTPException, status + +from ..repositories.auth_repository import AuthRepository + + +class AuthService: + """Business logic for signup, login, and token validation.""" + + def __init__(self, repo: AuthRepository): + self.repo = repo + self.secret = os.getenv("APP_AUTH_SECRET", "dev-only-auth-secret-change-me") + self.token_ttl_hours = int(os.getenv("APP_AUTH_TOKEN_TTL_HOURS", "24")) + + def signup(self, email: str, full_name: str, password: str) -> dict: + existing_user = self.repo.get_user_by_email(email) + if existing_user is not None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="An account with this email already exists.", + ) + + password_hash = self._hash_password(password) + user = self.repo.create_user(email=email, full_name=full_name, password_hash=password_hash) + token = self._generate_token(user["user_id"], user["email"]) + + return { + "access_token": token, + "token_type": "bearer", + "user": user, + } + + def login(self, email: str, password: str) -> dict: + user = self.repo.get_user_by_email(email) + if user is None or not self._verify_password(password, user["password_hash"]): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid email or password.", + ) + + if not user["is_active"]: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="This account is inactive.", + ) + + token = self._generate_token(user["user_id"], user["email"]) + + return { + "access_token": token, + "token_type": "bearer", + "user": self._public_user(user), + } + + def get_current_user_from_token(self, token: str) -> dict: + payload = self._decode_token(token) + + exp = payload.get("exp") + user_id = payload.get("sub") + + if exp is None or user_id is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication token.", + ) + + if datetime.now(UTC).timestamp() > exp: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Authentication token has expired.", + ) + + user = self.repo.get_user_by_id(int(user_id)) + if user is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User not found.", + ) + + if not user["is_active"]: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="This account is inactive.", + ) + + return self._public_user(user) + + def _public_user(self, user: dict) -> dict: + return { + "user_id": user["user_id"], + "email": user["email"], + "full_name": user["full_name"], + "is_active": user["is_active"], + "is_verified": user["is_verified"], + "created_at": user["created_at"], + "updated_at": user["updated_at"], + } + + def _hash_password(self, password: str) -> str: + iterations = 100_000 + salt = secrets.token_hex(16) + derived = hashlib.pbkdf2_hmac( + "sha256", + password.encode("utf-8"), + salt.encode("utf-8"), + iterations, + ) + digest = base64.urlsafe_b64encode(derived).decode("utf-8") + return f"pbkdf2_sha256${iterations}${salt}${digest}" + + def _verify_password(self, password: str, stored_hash: str) -> bool: + try: + algorithm, iterations_str, salt, expected_digest = stored_hash.split("$", 3) + if algorithm != "pbkdf2_sha256": + return False + + iterations = int(iterations_str) + derived = hashlib.pbkdf2_hmac( + "sha256", + password.encode("utf-8"), + salt.encode("utf-8"), + iterations, + ) + actual_digest = base64.urlsafe_b64encode(derived).decode("utf-8") + return hmac.compare_digest(actual_digest, expected_digest) + except Exception: + return False + + def _generate_token(self, user_id: int, email: str) -> str: + expires_at = datetime.now(UTC) + timedelta(hours=self.token_ttl_hours) + payload = { + "sub": user_id, + "email": email, + "exp": expires_at.timestamp(), + } + + payload_bytes = json.dumps(payload, separators=(",", ":")).encode("utf-8") + payload_b64 = base64.urlsafe_b64encode(payload_bytes).decode("utf-8").rstrip("=") + + signature = hmac.new( + self.secret.encode("utf-8"), + payload_b64.encode("utf-8"), + hashlib.sha256, + ).digest() + signature_b64 = base64.urlsafe_b64encode(signature).decode("utf-8").rstrip("=") + + return f"{payload_b64}.{signature_b64}" + + def _decode_token(self, token: str) -> dict: + try: + payload_b64, signature_b64 = token.split(".", 1) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication token.", + ) from exc + + expected_signature = hmac.new( + self.secret.encode("utf-8"), + payload_b64.encode("utf-8"), + hashlib.sha256, + ).digest() + expected_signature_b64 = base64.urlsafe_b64encode(expected_signature).decode("utf-8").rstrip("=") + + if not hmac.compare_digest(signature_b64, expected_signature_b64): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication token.", + ) + + padded_payload = payload_b64 + "=" * (-len(payload_b64) % 4) + payload_bytes = base64.urlsafe_b64decode(padded_payload.encode("utf-8")) + return json.loads(payload_bytes.decode("utf-8")) \ No newline at end of file diff --git a/app/Home.py b/app/Home.py index f229418..9418051 100644 --- a/app/Home.py +++ b/app/Home.py @@ -1,4 +1,4 @@ -"""Home page — Streamlit multipage entry point. +"""App entrypoint and router. Run with: uv run streamlit run app/Home.py """ @@ -7,7 +7,6 @@ import sys from pathlib import Path -# Ensure project root is on sys.path when Streamlit adds app/ to sys.path instead. _project_root = str(Path(__file__).resolve().parent.parent) if _project_root not in sys.path: sys.path.insert(0, _project_root) @@ -15,58 +14,98 @@ import streamlit as st from app.api_client import APIError, get_health -from app.auth import get_current_user, logout, require_auth +from app.auth import get_current_user, is_authenticated, logout, require_auth from app.config import settings -require_auth() - st.set_page_config(page_title=settings.APP_TITLE, page_icon="💧", layout="wide") -# Sidebar -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() - -# Main content -st.header("open_datEAUbase") -st.subheader("Water quality data management") - -col_status, col_nav = st.columns(2) - -with col_status: - st.markdown("### API Status") - try: - health = get_health() - st.metric("API version", health.get("version", "—")) - st.metric("Database", health.get("db_status", health.get("status", "—"))) - schema = health.get("schema_version") or health.get("db_schema_version") - if schema: - st.metric("Schema version", schema) - except APIError as e: - if e.status_code == 503 and "Cannot reach API" not in e.message: - st.error(f"API is running but the database is unavailable: {e.message}") - else: - st.error( - f"Cannot reach API at {settings.API_BASE_URL}. " - "Make sure the API server is running." - ) - -with col_nav: - st.markdown("### Quick navigation") - st.markdown( - """ + +def login_page() -> None: + require_auth(show_login=True) + + +def dashboard_page() -> None: + st.header("open_datEAUbase") + st.subheader("Water quality data management") + + col_status, col_nav = st.columns(2) + + with col_status: + st.markdown("### API Status") + try: + health = get_health() + st.metric("API version", health.get("version", "—")) + st.metric("Database", health.get("db_status", health.get("status", "—"))) + schema = health.get("schema_version") or health.get("db_schema_version") + if schema: + st.metric("Schema version", schema) + except APIError as e: + if e.status_code == 503 and "Cannot reach API" not in e.message: + st.error(f"API is running but the database is unavailable: {e.message}") + else: + st.error( + f"Cannot reach API at {settings.API_BASE_URL}. " + "Make sure the API server is running." + ) + + with col_nav: + st.markdown("### Quick navigation") + st.markdown( + """ Use the sidebar to navigate between sections: -- **Sites** — monitoring locations where equipment is deployed -- **Equipment** — sensors and instruments collecting measurements -- **Channels** — individual measurement streams (e.g., pH at Site A) -- **Campaigns** — sampling campaigns for lab analyses -- **Timeseries** — view and explore time-stamped measurements -- **Annotations** — notes and flags attached to measurements - """ +- **Sites** +- **Equipment** +- **Campaigns** +- **Channels** +- **Annotations** +- **Units** +- **Equipment Models** +- **Parameters** +- **Binning Axes** +- **Sensor Ingest** +- **Lab Ingest** +- **Explore** + """ + ) + + st.caption("Use the sidebar to navigate between sections.") + + +if not is_authenticated(): + pg = st.navigation( + [ + st.Page(login_page, title="Login", icon="🔐", default=True), + ], + position="hidden", + ) +else: + with st.sidebar: + user = get_current_user() + if user: + st.write(f"Logged in as: **{user['full_name']}**") + st.caption(user["email"]) + st.divider() + if st.button("Sign out"): + logout() + + pg = st.navigation( + [ + st.Page(dashboard_page, title="Home", default=True), + st.Page("pages/1_Sites.py", title="Sites"), + st.Page("pages/2_Equipment.py", title="Equipment"), + st.Page("pages/3_Campaigns.py", title="Campaigns"), + st.Page("pages/4_Channels.py", title="Channels"), + st.Page("pages/5_Annotations.py", title="Annotations"), + st.Page("pages/5_Units.py", title="Units"), + st.Page("pages/6_Equipment_Models.py", title="Equipment Models"), + st.Page("pages/7_Parameters.py", title="Parameters"), + st.Page("pages/8_Binning_Axes.py", title="Binning Axes"), + st.Page("pages/9_Sensor_Ingest.py", title="Sensor Ingest"), + st.Page("pages/10_Lab_Ingest.py", title="Lab Ingest"), + st.Page("pages/11_Explore.py", title="Explore"), + ], + position="sidebar", ) -st.caption("Use the sidebar to navigate between sections.") +pg.run() \ No newline at end of file diff --git a/app/api_client.py b/app/api_client.py index 57e2ff8..5c7e787 100644 --- a/app/api_client.py +++ b/app/api_client.py @@ -10,7 +10,7 @@ import httpx from app.config import settings - +import streamlit as st class APIError(Exception): """Raised when the API returns a non-2xx response or is unreachable.""" @@ -22,7 +22,11 @@ def __init__(self, status_code: int, message: str) -> None: def _get_client() -> httpx.Client: - return httpx.Client(base_url=settings.API_BASE_URL, timeout=30) + headers = {} + token = st.session_state.get("access_token") + if token: + headers["Authorization"] = f"Bearer {token}" + return httpx.Client(base_url=settings.API_BASE_URL, timeout=30, headers=headers) def _raise_for_status(response: httpx.Response) -> None: @@ -48,6 +52,47 @@ def get_health() -> dict: _raise_for_status(r) return r.json() +def signup(email: str, full_name: str, password: str) -> dict: + try: + with _get_client() as client: + r = client.post( + "/auth/signup", + json={ + "email": email, + "full_name": full_name, + "password": password, + }, + ) + except httpx.ConnectError: + raise APIError(503, "Cannot reach API") + _raise_for_status(r) + return r.json() + + +def login(email: str, password: str) -> dict: + try: + with _get_client() as client: + r = client.post( + "/auth/login", + json={ + "email": email, + "password": password, + }, + ) + except httpx.ConnectError: + raise APIError(503, "Cannot reach API") + _raise_for_status(r) + return r.json() + + +def get_me() -> dict: + try: + with _get_client() as client: + r = client.get("/auth/me") + except httpx.ConnectError: + raise APIError(503, "Cannot reach API") + _raise_for_status(r) + return r.json() # --------------------------------------------------------------------------- # Sites diff --git a/app/auth.py b/app/auth.py index ff983e3..27bf336 100644 --- a/app/auth.py +++ b/app/auth.py @@ -1,44 +1,120 @@ -"""Session-state authentication stub for the Streamlit app. +"""Authentication helpers for the Streamlit app.""" -Real auth: replace the credential check block marked with TODO in _show_login_page(). -""" from __future__ import annotations + import streamlit as st +from app.api_client import APIError, get_me, login, signup + + +def _ensure_auth_state() -> None: + st.session_state.setdefault("authenticated", False) + st.session_state.setdefault("access_token", None) + st.session_state.setdefault("user", None) + + +def is_authenticated() -> bool: + _ensure_auth_state() + return bool(st.session_state.get("authenticated") and st.session_state.get("user")) -def require_auth() -> None: - """Guard: halt page render if user is not logged in.""" - if st.session_state.get("user"): + +def require_auth(show_login: bool = False) -> None: + """Guard pages that require authentication.""" + _ensure_auth_state() + + if is_authenticated(): return - _show_login_page() + + if show_login: + _show_auth_page() + else: + st.title("Authentication required") + st.info("Please sign in from the Home page to access the application.") st.stop() def get_current_user() -> dict | None: - """Return the current user dict {"name": str} or None if not logged in.""" + _ensure_auth_state() return st.session_state.get("user") def logout() -> None: - """Clear the session and rerun.""" - st.session_state.pop("user", None) + st.session_state["authenticated"] = False + st.session_state["access_token"] = None + st.session_state["user"] = None st.rerun() -def _show_login_page() -> None: - """Render login form. Called by require_auth() before st.stop().""" +def _complete_auth(auth_response: dict) -> None: + st.session_state["access_token"] = auth_response["access_token"] + st.session_state["authenticated"] = True + st.session_state["user"] = auth_response["user"] + + +def _show_auth_page() -> None: + _ensure_auth_state() + st.title("open_datEAUbase") st.caption("Water quality data management") - with st.form("login_form"): - username = st.text_input("Username") - password = st.text_input("Password", type="password") - submitted = st.form_submit_button("Sign in") - - if submitted: - # TODO: Replace this block with real credential check (JWT, LDAP, etc.) - if username and password: # stub: any non-empty credentials work - st.session_state["user"] = {"name": username} - st.rerun() - else: - st.error("Please enter username and password.") + tab_login, tab_signup = st.tabs(["Sign in", "Sign up"]) + + with tab_login: + with st.form("login_form"): + email = st.text_input("Email", key="login_email") + password = st.text_input("Password", type="password", key="login_password") + submitted = st.form_submit_button("Sign in") + + if submitted: + if not email or not password: + st.error("Please enter your email and password.") + else: + try: + auth_response = login(email=email, password=password) + _complete_auth(auth_response) + st.rerun() + except APIError as e: + st.error(e.message) + + with tab_signup: + with st.form("signup_form"): + full_name = st.text_input("Full name", key="signup_full_name") + email = st.text_input("Email", key="signup_email") + password = st.text_input("Password", type="password", key="signup_password") + confirm_password = st.text_input( + "Confirm password", type="password", key="signup_confirm_password" + ) + submitted = st.form_submit_button("Create account") + + if submitted: + if not full_name or not email or not password or not confirm_password: + st.error("Please fill in all fields.") + elif password != confirm_password: + st.error("Passwords do not match.") + elif len(password) < 8: + st.error("Password must be at least 8 characters long.") + else: + try: + auth_response = signup( + email=email, + full_name=full_name, + password=password, + ) + _complete_auth(auth_response) + st.rerun() + except APIError as e: + st.error(e.message) + + +def refresh_current_user() -> None: + _ensure_auth_state() + token = st.session_state.get("access_token") + if not token: + return + + try: + user = get_me() + st.session_state["user"] = user + st.session_state["authenticated"] = True + except APIError: + logout() \ No newline at end of file diff --git a/app/pages/10_Lab_Ingest.py b/app/pages/10_Lab_Ingest.py index e1968f4..9eedff6 100644 --- a/app/pages/10_Lab_Ingest.py +++ b/app/pages/10_Lab_Ingest.py @@ -29,12 +29,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Lab Analysis Ingest") st.markdown( diff --git a/app/pages/1_Sites.py b/app/pages/1_Sites.py index 53cf77f..1c8c16c 100644 --- a/app/pages/1_Sites.py +++ b/app/pages/1_Sites.py @@ -24,12 +24,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Sites") diff --git a/app/pages/2_Equipment.py b/app/pages/2_Equipment.py index b29a481..ddaee85 100644 --- a/app/pages/2_Equipment.py +++ b/app/pages/2_Equipment.py @@ -25,12 +25,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Equipment") diff --git a/app/pages/3_Campaigns.py b/app/pages/3_Campaigns.py index 9f70ca3..01f4671 100644 --- a/app/pages/3_Campaigns.py +++ b/app/pages/3_Campaigns.py @@ -114,12 +114,6 @@ def add_deployment_dialog( require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Campaigns") diff --git a/app/pages/4_Channels.py b/app/pages/4_Channels.py index 584cb30..46dd402 100644 --- a/app/pages/4_Channels.py +++ b/app/pages/4_Channels.py @@ -27,12 +27,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Channels") diff --git a/app/pages/5_Annotations.py b/app/pages/5_Annotations.py index 75485c4..3e9da7f 100644 --- a/app/pages/5_Annotations.py +++ b/app/pages/5_Annotations.py @@ -28,12 +28,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Annotations") diff --git a/app/pages/5_Units.py b/app/pages/5_Units.py index 910f45b..1a65ad2 100644 --- a/app/pages/5_Units.py +++ b/app/pages/5_Units.py @@ -24,12 +24,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Units") diff --git a/app/pages/6_Equipment_Models.py b/app/pages/6_Equipment_Models.py index 2ca5f2c..ae7d76c 100644 --- a/app/pages/6_Equipment_Models.py +++ b/app/pages/6_Equipment_Models.py @@ -24,12 +24,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Equipment Models") diff --git a/app/pages/7_Parameters.py b/app/pages/7_Parameters.py index 028a460..4e357ec 100644 --- a/app/pages/7_Parameters.py +++ b/app/pages/7_Parameters.py @@ -25,12 +25,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Parameters") diff --git a/app/pages/8_Binning_Axes.py b/app/pages/8_Binning_Axes.py index e3dd77b..98710a5 100644 --- a/app/pages/8_Binning_Axes.py +++ b/app/pages/8_Binning_Axes.py @@ -24,13 +24,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() - st.title("Measurement Axes (Binning)") st.markdown( """ diff --git a/app/pages/9_Sensor_Ingest.py b/app/pages/9_Sensor_Ingest.py index e619338..ffbdc42 100644 --- a/app/pages/9_Sensor_Ingest.py +++ b/app/pages/9_Sensor_Ingest.py @@ -32,12 +32,6 @@ require_auth() -with st.sidebar: - user = get_current_user() - if user: - st.write(f"Logged in as: **{user['name']}**") - if st.button("Sign out"): - logout() st.title("Sensor Data Ingest") st.markdown( diff --git a/docker-compose.yml b/docker-compose.yml index 750a3da..379e3b8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,8 +11,6 @@ services: volumes: - mssql_data:/var/opt/mssql - ./sql:/sql - - ./docker/mssql-init.sh:/mssql-init.sh - command: /bin/bash /mssql-init.sh restart: unless-stopped api: @@ -42,6 +40,20 @@ services: depends_on: - api restart: unless-stopped + db-init: + build: + context: . + dockerfile: Dockerfile.api + container_name: open_dateaubase_db_init + depends_on: + - db + env_file: + - .env.docker + volumes: + - .:/workspace + working_dir: /workspace + command: ["python", "scripts/init_db.py"] + restart: "no" volumes: mssql_data: From 4cef59a47e3fff7d8cedf214b9bd2e4496306e34 Mon Sep 17 00:00:00 2001 From: Samah Naji Date: Thu, 12 Mar 2026 03:46:30 -0400 Subject: [PATCH 3/3] fix CI part --- generate_dictionary_reference.py | 33 ++++ generate_erd.py | 39 +++++ generate_sql.py | 41 +++++ scripts/generate_from_yaml.py | 7 +- scripts/legacy/generate_sql.py | 8 +- scripts/orchestrate_docs.py | 2 +- sql/seed_v1.0.0.sql | 264 +++++++++++++++++++++++++++++++ tests/integration/conftest.py | 31 ++-- 8 files changed, 406 insertions(+), 19 deletions(-) create mode 100644 generate_dictionary_reference.py create mode 100644 generate_erd.py create mode 100644 generate_sql.py create mode 100644 sql/seed_v1.0.0.sql diff --git a/generate_dictionary_reference.py b/generate_dictionary_reference.py new file mode 100644 index 0000000..1ed1141 --- /dev/null +++ b/generate_dictionary_reference.py @@ -0,0 +1,33 @@ +"""Compatibility wrapper for legacy dictionary reference generator.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path + +_module_path = ( + Path(__file__).resolve().parent + / "scripts" + / "legacy" + / "generate_dictionary_reference.py" +) + +_spec = importlib.util.spec_from_file_location( + "_legacy_generate_dictionary_reference", + _module_path, +) +_module = importlib.util.module_from_spec(_spec) +assert _spec is not None and _spec.loader is not None +_spec.loader.exec_module(_module) + +parse_parts_json = _module.parse_parts_json +generate_tables_markdown = _module.generate_tables_markdown +generate_value_sets_markdown = _module.generate_value_sets_markdown +generate_views_markdown = _module.generate_views_markdown + +__all__ = [ + "parse_parts_json", + "generate_tables_markdown", + "generate_value_sets_markdown", + "generate_views_markdown", +] \ No newline at end of file diff --git a/generate_erd.py b/generate_erd.py new file mode 100644 index 0000000..06eca11 --- /dev/null +++ b/generate_erd.py @@ -0,0 +1,39 @@ +"""Compatibility wrapper for legacy ERD generator.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path + +_module_path = ( + Path(__file__).resolve().parent + / "scripts" + / "legacy" + / "generate_erd.py" +) + +_spec = importlib.util.spec_from_file_location( + "_legacy_generate_erd", + _module_path, +) +_module = importlib.util.module_from_spec(_spec) +assert _spec is not None and _spec.loader is not None +_spec.loader.exec_module(_module) + +parse_erd_json = _module.parse_erd_json +generate_erd_data = _module.generate_erd_data +generate_erd_html = _module.generate_erd_html +generate_erd_files = _module.generate_erd_files +ERDTable = _module.ERDTable +ERDField = _module.ERDField +ERDRelationship = _module.ERDRelationship + +__all__ = [ + "parse_erd_json", + "generate_erd_data", + "generate_erd_html", + "generate_erd_files", + "ERDTable", + "ERDField", + "ERDRelationship", +] \ No newline at end of file diff --git a/generate_sql.py b/generate_sql.py new file mode 100644 index 0000000..697cc9b --- /dev/null +++ b/generate_sql.py @@ -0,0 +1,41 @@ +"""Compatibility wrapper for legacy SQL generator.""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path + +_module_path = ( + Path(__file__).resolve().parent + / "scripts" + / "legacy" + / "generate_sql.py" +) + +_spec = importlib.util.spec_from_file_location( + "_legacy_generate_sql", + _module_path, +) +_module = importlib.util.module_from_spec(_spec) +assert _spec is not None and _spec.loader is not None +_spec.loader.exec_module(_module) + +parse_parts_json = _module.parse_parts_json +generate_sql_schema = _module.generate_sql_schema +generate_field_definition = _module.generate_field_definition +generate_foreign_key_constraint = _module.generate_foreign_key_constraint +validate_no_circular_fks = _module.validate_no_circular_fks +get_db_config = _module.get_db_config +extract_field_name = _module.extract_field_name +generate_sql_schemas = _module.generate_sql_schemas + +__all__ = [ + "parse_parts_json", + "generate_sql_schema", + "generate_field_definition", + "generate_foreign_key_constraint", + "validate_no_circular_fks", + "get_db_config", + "extract_field_name", + "generate_sql_schemas", +] \ No newline at end of file diff --git a/scripts/generate_from_yaml.py b/scripts/generate_from_yaml.py index 777a528..6e61826 100644 --- a/scripts/generate_from_yaml.py +++ b/scripts/generate_from_yaml.py @@ -31,8 +31,11 @@ scripts_dir = Path(__file__).parent sys.path.insert(0, str(scripts_dir)) -from legacy.generate_dictionary_reference import generate_tables_markdown, generate_views_markdown -from legacy.generate_erd import generate_erd_data, generate_erd_html +from generate_dictionary_reference import ( + generate_tables_markdown, + generate_views_markdown, +) +from generate_erd import generate_erd_data, generate_erd_html def parse_yaml_for_docs( diff --git a/scripts/legacy/generate_sql.py b/scripts/legacy/generate_sql.py index 985149d..f408394 100644 --- a/scripts/legacy/generate_sql.py +++ b/scripts/legacy/generate_sql.py @@ -13,7 +13,7 @@ import json from pathlib import Path from datetime import datetime -from importlib.metadata import version +from importlib.metadata import PackageNotFoundError, version # Add src to path to import models project_root = Path(__file__).parent.parent @@ -21,7 +21,11 @@ from open_dateaubase.data_model.models import Dictionary, ViewPart, ViewColumnPart -package_version = version("open-dateaubase") + +try: + package_version = version("open-dateaubase") +except PackageNotFoundError: + package_version = "0.1.0-dev" def parse_parts_json(json_path): diff --git a/scripts/orchestrate_docs.py b/scripts/orchestrate_docs.py index 6e6c245..a497777 100644 --- a/scripts/orchestrate_docs.py +++ b/scripts/orchestrate_docs.py @@ -20,7 +20,7 @@ sys.path.insert(0, str(scripts_dir)) from generate_from_yaml import generate_all_from_yaml -from legacy.generate_dictionary_reference import ( +from generate_dictionary_reference import ( parse_parts_json, generate_value_sets_markdown, ) diff --git a/sql/seed_v1.0.0.sql b/sql/seed_v1.0.0.sql new file mode 100644 index 0000000..7874307 --- /dev/null +++ b/sql/seed_v1.0.0.sql @@ -0,0 +1,264 @@ +-- Seed data for schema v1.0.0 +-- Scenario: Wastewater and stormwater monitoring in Quebec City +-- All timestamps are Unix epoch INTs (v1.0.0 format) + +-- ============================================================================= +-- Tier 1: Tables with no foreign keys +-- ============================================================================= + +-- Units of measurement +INSERT INTO [dbo].[Unit] ([Unit]) VALUES ('mg/L'); -- ID 1 +INSERT INTO [dbo].[Unit] ([Unit]) VALUES ('NTU'); -- ID 2 +INSERT INTO [dbo].[Unit] ([Unit]) VALUES ('pH units'); -- ID 3 +INSERT INTO [dbo].[Unit] ([Unit]) VALUES (N'°C'); -- ID 4 +INSERT INTO [dbo].[Unit] ([Unit]) VALUES ('mS/cm'); -- ID 5 + +-- Watersheds +INSERT INTO [dbo].[Watershed] ([name], [Description], [Surface_area], [Concentration_time], [Impervious_surface]) +VALUES ('Riviere Saint-Charles', 'Urban catchment in Quebec City', 550.0, 180, 35.5); -- ID 1 + +INSERT INTO [dbo].[Watershed] ([name], [Description], [Surface_area], [Concentration_time], [Impervious_surface]) +VALUES ('Riviere Montmorency', 'Rural reference watershed north of Quebec City', 1150.0, 420, 8.2); -- ID 2 + +-- Weather conditions +INSERT INTO [dbo].[WeatherCondition] ([Weather_condition], [Description]) +VALUES ('Dry', 'No precipitation in the last 48 hours'); -- ID 1 + +INSERT INTO [dbo].[WeatherCondition] ([Weather_condition], [Description]) +VALUES ('Rain', 'Active rainfall event'); -- ID 2 + +INSERT INTO [dbo].[WeatherCondition] ([Weather_condition], [Description]) +VALUES ('Snowmelt', 'Spring snowmelt conditions'); -- ID 3 + +-- Equipment models +INSERT INTO [dbo].[EquipmentModel] ([Equipment_model], [Method], [Functions], [Manufacturer], [Manual_location]) +VALUES ('ISCO 6712', 'Automatic sampling', 'Portable autosampler for wastewater and stormwater', 'Teledyne ISCO', '/manuals/isco_6712.pdf'); -- ID 1 + +INSERT INTO [dbo].[EquipmentModel] ([Equipment_model], [Method], [Functions], [Manufacturer], [Manual_location]) +VALUES ('YSI ProDSS', 'Multi-parameter probe', 'pH, temperature, conductivity, dissolved oxygen', 'YSI/Xylem', '/manuals/ysi_prodss.pdf'); -- ID 2 + +INSERT INTO [dbo].[EquipmentModel] ([Equipment_model], [Method], [Functions], [Manufacturer], [Manual_location]) +VALUES ('Hach 2100Q', 'Nephelometric', 'Portable turbidity meter', 'Hach', '/manuals/hach_2100q.pdf'); -- ID 3 + +-- Procedures +INSERT INTO [dbo].[Procedures] ([Procedure_name], [Procedure_type], [Description], [Procedure_location]) +VALUES ('Grab sampling', 'Sampling', 'Manual grab sample collected at water surface', '/procedures/grab_sampling.pdf'); -- ID 1 + +INSERT INTO [dbo].[Procedures] ([Procedure_name], [Procedure_type], [Description], [Procedure_location]) +VALUES ('24h composite', 'Sampling', 'Time-weighted 24-hour composite sample via autosampler', '/procedures/composite_24h.pdf'); -- ID 2 + +INSERT INTO [dbo].[Procedures] ([Procedure_name], [Procedure_type], [Description], [Procedure_location]) +VALUES ('Online continuous', 'Measurement', 'Continuous in-situ measurement with data logging', '/procedures/online_continuous.pdf'); -- ID 3 + +-- Projects +INSERT INTO [dbo].[Project] ([name], [Description]) +VALUES ('WWTP Inlet Monitoring 2024', 'Routine monitoring of wastewater treatment plant influent quality'); -- ID 1 + +INSERT INTO [dbo].[Project] ([name], [Description]) +VALUES ('CSO Event Study 2024', 'Combined sewer overflow characterization during rain events'); -- ID 2 + +-- Purposes +INSERT INTO [dbo].[Purpose] ([Purpose], [Description]) +VALUES ('Routine monitoring', 'Regular scheduled sampling for compliance and process control'); -- ID 1 + +INSERT INTO [dbo].[Purpose] ([Purpose], [Description]) +VALUES ('Event-based sampling', 'Triggered sampling during wet weather or snowmelt events'); -- ID 2 + +-- Comments (QA/QC notes) +INSERT INTO [dbo].[Comments] ([Comment]) VALUES ('Sample collected under normal conditions'); -- ID 1 +INSERT INTO [dbo].[Comments] ([Comment]) VALUES ('High turbidity observed - possible equipment drift'); -- ID 2 +INSERT INTO [dbo].[Comments] ([Comment]) VALUES ('Duplicate sample collected for QA/QC'); -- ID 3 +INSERT INTO [dbo].[Comments] ([Comment]) VALUES (NULL); -- ID 4 (no comment) + +-- Hydrological characteristics (one per watershed) +INSERT INTO [dbo].[HydrologicalCharacteristics] ([Urban_area], [Forest], [Wetlands], [Cropland], [Meadow], [Grassland]) +VALUES (35.5, 25.0, 5.0, 10.0, 12.5, 12.0); -- Watershed 1 (urban) + +INSERT INTO [dbo].[HydrologicalCharacteristics] ([Urban_area], [Forest], [Wetlands], [Cropland], [Meadow], [Grassland]) +VALUES (8.2, 55.0, 12.0, 15.0, 5.0, 4.8); -- Watershed 2 (rural) + +-- Urban characteristics (one per watershed) +INSERT INTO [dbo].[UrbanCharacteristics] ([Commercial], [Green_spaces], [Industrial], [Institutional], [Residential], [Agricultural], [Recreational]) +VALUES (15.0, 8.0, 12.0, 5.0, 45.0, 5.0, 10.0); -- Watershed 1 (urban) + +INSERT INTO [dbo].[UrbanCharacteristics] ([Commercial], [Green_spaces], [Industrial], [Institutional], [Residential], [Agricultural], [Recreational]) +VALUES (2.0, 3.0, 1.0, 1.0, 60.0, 28.0, 5.0); -- Watershed 2 (rural) + +-- ============================================================================= +-- Tier 2: Tables with FKs to Tier 1 +-- ============================================================================= + +-- Contacts +INSERT INTO [dbo].[Contact] ([Last_name], [First_name], [Company], [Status], [Function], [Office_number], [Email], [Phone], [City], [Zip_code], [Country]) +VALUES ('Tremblay', 'Marie', N'Universite Laval - modelEAU', 'Active', 'Research Associate', 'PLT-2910', 'marie.tremblay@ulaval.ca', '418-555-0101', N'Quebec', 'G1V 0A6', 'Canada'); -- ID 1 + +INSERT INTO [dbo].[Contact] ([Last_name], [First_name], [Company], [Status], [Function], [Office_number], [Email], [Phone], [City], [Zip_code], [Country]) +VALUES ('Gagnon', 'Pierre', N'Universite Laval - modelEAU', 'Active', 'PhD Student', 'PLT-2912', 'pierre.gagnon@ulaval.ca', '418-555-0102', N'Quebec', 'G1V 0A6', 'Canada'); -- ID 2 + +-- Equipment instances +INSERT INTO [dbo].[Equipment] ([model_ID], [identifier], [Serial_number], [Owner], [Storage_location], [Purchase_date]) +VALUES (1, 'ISCO-001', 'SN-6712-2021-001', N'modelEAU Lab', 'PLT-2900 Storage', '2021-03-15'); -- ID 1 + +INSERT INTO [dbo].[Equipment] ([model_ID], [identifier], [Serial_number], [Owner], [Storage_location], [Purchase_date]) +VALUES (2, 'YSI-001', 'SN-PRODSS-2022-045', N'modelEAU Lab', 'PLT-2900 Storage', '2022-06-01'); -- ID 2 + +INSERT INTO [dbo].[Equipment] ([model_ID], [identifier], [Serial_number], [Owner], [Storage_location], [Purchase_date]) +VALUES (3, 'HACH-001', 'SN-2100Q-2020-112', N'modelEAU Lab', 'PLT-2900 Storage', '2020-09-20'); -- ID 3 + +-- Parameters (water quality) +INSERT INTO [dbo].[Parameter] ([Unit_ID], [Parameter], [Description]) +VALUES (1, 'TSS', 'Total suspended solids'); -- ID 1 + +INSERT INTO [dbo].[Parameter] ([Unit_ID], [Parameter], [Description]) +VALUES (1, 'COD', 'Chemical oxygen demand'); -- ID 2 + +INSERT INTO [dbo].[Parameter] ([Unit_ID], [Parameter], [Description]) +VALUES (3, 'pH', 'Hydrogen ion concentration'); -- ID 3 + +INSERT INTO [dbo].[Parameter] ([Unit_ID], [Parameter], [Description]) +VALUES (4, 'Temperature', 'Water temperature'); -- ID 4 + +INSERT INTO [dbo].[Parameter] ([Unit_ID], [Parameter], [Description]) +VALUES (5, 'Conductivity', 'Electrical conductivity'); -- ID 5 + +-- Sites +INSERT INTO [dbo].[Site] ([Watershed_ID], [name], [type], [Description], [Street_number], [Street_name], [City], [Province], [Country]) +VALUES (1, 'WWTP Est Inlet', 'Wastewater treatment plant', 'Main inlet of the eastern WWTP', '500', 'Boulevard des Capucins', N'Quebec', N'Quebec', 'Canada'); -- ID 1 + +INSERT INTO [dbo].[Site] ([Watershed_ID], [name], [type], [Description], [Street_number], [Street_name], [City], [Province], [Country]) +VALUES (1, 'CSO Outfall 12', 'Combined sewer overflow', 'CSO outfall discharging to Riviere Saint-Charles', '120', 'Rue du Pont', N'Quebec', N'Quebec', 'Canada'); -- ID 2 + +-- ============================================================================= +-- Tier 3: Tables with FKs to Tier 2 +-- ============================================================================= + +-- Sampling points +INSERT INTO [dbo].[SamplingPoints] ([Site_ID], [Sampling_point], [Sampling_location], [Latitude_GPS], [Longitude_GPS], [Description]) +VALUES (1, 'WWTP-IN-01', 'Inlet channel after screening', '46.8310', '-71.2080', 'Primary sampling point at plant inlet'); -- ID 1 + +INSERT INTO [dbo].[SamplingPoints] ([Site_ID], [Sampling_point], [Sampling_location], [Latitude_GPS], [Longitude_GPS], [Description]) +VALUES (1, 'WWTP-OUT-01', 'Final effluent discharge', '46.8315', '-71.2075', 'Effluent sampling point after disinfection'); -- ID 2 + +INSERT INTO [dbo].[SamplingPoints] ([Site_ID], [Sampling_point], [Sampling_location], [Latitude_GPS], [Longitude_GPS], [Description]) +VALUES (2, 'CSO-12-OUT', 'Overflow pipe outlet', '46.8200', '-71.2250', 'CSO overflow discharge point'); -- ID 3 + +-- Junction tables: Equipment model capabilities +INSERT INTO [dbo].[EquipmentModelHasParameter] ([Equipment_model_ID], [Parameter_ID]) VALUES (1, 1); -- ISCO -> TSS (collects samples for TSS) +INSERT INTO [dbo].[EquipmentModelHasParameter] ([Equipment_model_ID], [Parameter_ID]) VALUES (1, 2); -- ISCO -> COD +INSERT INTO [dbo].[EquipmentModelHasParameter] ([Equipment_model_ID], [Parameter_ID]) VALUES (2, 3); -- YSI -> pH +INSERT INTO [dbo].[EquipmentModelHasParameter] ([Equipment_model_ID], [Parameter_ID]) VALUES (2, 4); -- YSI -> Temperature +INSERT INTO [dbo].[EquipmentModelHasParameter] ([Equipment_model_ID], [Parameter_ID]) VALUES (2, 5); -- YSI -> Conductivity + +-- Junction tables: Equipment model procedures +INSERT INTO [dbo].[EquipmentModelHasProcedures] ([Equipment_model_ID], [Procedure_ID]) VALUES (1, 2); -- ISCO -> 24h composite +INSERT INTO [dbo].[EquipmentModelHasProcedures] ([Equipment_model_ID], [Procedure_ID]) VALUES (2, 3); -- YSI -> Online continuous +INSERT INTO [dbo].[EquipmentModelHasProcedures] ([Equipment_model_ID], [Procedure_ID]) VALUES (3, 1); -- Hach -> Grab sampling + +-- Junction tables: Parameter procedures +INSERT INTO [dbo].[ParameterHasProcedures] ([Procedure_ID], [Parameter_ID]) VALUES (1, 1); -- Grab -> TSS +INSERT INTO [dbo].[ParameterHasProcedures] ([Procedure_ID], [Parameter_ID]) VALUES (2, 2); -- 24h composite -> COD +INSERT INTO [dbo].[ParameterHasProcedures] ([Procedure_ID], [Parameter_ID]) VALUES (3, 3); -- Online -> pH +INSERT INTO [dbo].[ParameterHasProcedures] ([Procedure_ID], [Parameter_ID]) VALUES (3, 4); -- Online -> Temperature + +-- ============================================================================= +-- Tier 4: MetaData (central hub) +-- ============================================================================= + +-- MetaData row 1: WWTP inlet TSS, routine monitoring, grab sample +INSERT INTO [dbo].[MetaData] ([Project_ID], [Contact_ID], [Equipment_ID], [Parameter_ID], [Procedure_ID], [Unit_ID], [Purpose_ID], [Sampling_point_ID], [Condition_ID]) +VALUES (1, 1, 1, 1, 1, 1, 1, 1, 1); -- ID 1 + +-- MetaData row 2: WWTP inlet COD, routine monitoring, 24h composite +INSERT INTO [dbo].[MetaData] ([Project_ID], [Contact_ID], [Equipment_ID], [Parameter_ID], [Procedure_ID], [Unit_ID], [Purpose_ID], [Sampling_point_ID], [Condition_ID]) +VALUES (1, 1, 1, 2, 2, 1, 1, 1, 1); -- ID 2 + +-- MetaData row 3: WWTP inlet pH, online continuous (some FKs NULL) +INSERT INTO [dbo].[MetaData] ([Project_ID], [Contact_ID], [Equipment_ID], [Parameter_ID], [Procedure_ID], [Unit_ID], [Purpose_ID], [Sampling_point_ID], [Condition_ID]) +VALUES (1, 2, 2, 3, 3, 3, 1, 1, NULL); -- ID 3 (no weather condition) + +-- MetaData row 4: CSO overflow TSS, event-based sampling +INSERT INTO [dbo].[MetaData] ([Project_ID], [Contact_ID], [Equipment_ID], [Parameter_ID], [Procedure_ID], [Unit_ID], [Purpose_ID], [Sampling_point_ID], [Condition_ID]) +VALUES (2, 2, 1, 1, 1, 1, 2, 3, 2); -- ID 4 + +-- MetaData row 5: WWTP effluent TSS (minimal FKs to test NULLs) +INSERT INTO [dbo].[MetaData] ([Project_ID], [Contact_ID], [Equipment_ID], [Parameter_ID], [Procedure_ID], [Unit_ID], [Purpose_ID], [Sampling_point_ID], [Condition_ID]) +VALUES (1, NULL, NULL, 1, 1, 1, 1, 2, NULL); -- ID 5 (no contact, no equipment) + +-- MetaData row 6: Temperature, online continuous +INSERT INTO [dbo].[MetaData] ([Project_ID], [Contact_ID], [Equipment_ID], [Parameter_ID], [Procedure_ID], [Unit_ID], [Purpose_ID], [Sampling_point_ID], [Condition_ID]) +VALUES (1, 2, 2, 4, 3, 4, 1, 1, 1); -- ID 6 + +-- Junction tables: Project relationships +INSERT INTO [dbo].[ProjectHasContact] ([Contact_ID], [Project_ID]) VALUES (1, 1); +INSERT INTO [dbo].[ProjectHasContact] ([Contact_ID], [Project_ID]) VALUES (2, 1); +INSERT INTO [dbo].[ProjectHasContact] ([Contact_ID], [Project_ID]) VALUES (2, 2); + +INSERT INTO [dbo].[ProjectHasEquipment] ([Equipment_ID], [Project_ID]) VALUES (1, 1); +INSERT INTO [dbo].[ProjectHasEquipment] ([Equipment_ID], [Project_ID]) VALUES (2, 1); +INSERT INTO [dbo].[ProjectHasEquipment] ([Equipment_ID], [Project_ID]) VALUES (3, 1); +INSERT INTO [dbo].[ProjectHasEquipment] ([Equipment_ID], [Project_ID]) VALUES (1, 2); + +INSERT INTO [dbo].[ProjectHasSamplingPoints] ([Project_ID], [Sampling_point_ID]) VALUES (1, 1); +INSERT INTO [dbo].[ProjectHasSamplingPoints] ([Project_ID], [Sampling_point_ID]) VALUES (1, 2); +INSERT INTO [dbo].[ProjectHasSamplingPoints] ([Project_ID], [Sampling_point_ID]) VALUES (2, 3); + +-- ============================================================================= +-- Tier 5: Value (measurements) +-- Timestamps are Unix epoch seconds (INT) for v1.0.0 +-- ============================================================================= + +-- TSS at WWTP inlet (Metadata_ID=1), dry weather +-- 2024-01-15 08:00 EST = 1705320000 +-- 2024-01-15 14:00 EST = 1705341600 +-- 2024-01-16 08:00 EST = 1705406400 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (1, 1, 185.0, 1, 1705320000); -- ID 1 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 1, 210.5, 2, 1705341600); -- ID 2 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (1, 1, 192.3, 3, 1705406400); -- ID 3 + +-- COD at WWTP inlet (Metadata_ID=2), 24h composite +-- 2024-01-15 00:00 to 2024-01-16 00:00 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 2, 450.0, 1, 1705291200); -- ID 4 (2024-01-15 00:00 EST) +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 2, 520.8, 2, 1705377600); -- ID 5 (2024-01-16 00:00 EST) + +-- pH online continuous (Metadata_ID=3) +-- 2024-01-15 every 4 hours +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 3, 7.2, 1, 1705291200); -- ID 6 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 3, 7.1, 2, 1705305600); -- ID 7 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 3, 6.9, 3, 1705320000); -- ID 8 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (2, 3, 7.8, 4, 1705334400); -- ID 9 (flagged: equipment drift) + +-- CSO overflow TSS during rain (Metadata_ID=4) +-- 2024-03-20 rain event +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 4, 350.0, 1, 1710936000); -- ID 10 (2024-03-20 12:00 EST) +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 4, 580.2, 2, 1710943200); -- ID 11 (2024-03-20 14:00 EST) +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (3, 4, 345.0, 3, 1710943200); -- ID 12 (duplicate for QA/QC) + +-- WWTP effluent TSS (Metadata_ID=5) +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 5, 12.5, 1, 1705320000); -- ID 13 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 5, 15.0, 2, 1705406400); -- ID 14 + +-- Temperature online (Metadata_ID=6) +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 6, 12.3, 1, 1705291200); -- ID 15 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 6, 12.1, 2, 1705305600); -- ID 16 +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (NULL, 6, 11.8, 3, 1705320000); -- ID 17 + +-- Value with NULL timestamp (edge case for migration testing) +INSERT INTO [dbo].[Value] ([Comment_ID], [Metadata_ID], [Value], [Number_of_experiment], [Timestamp]) +VALUES (4, 1, 200.0, NULL, NULL); -- ID 18 (no timestamp, no experiment number) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index a6c2451..cb11138 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -4,6 +4,7 @@ Tests are automatically skipped if the container is not available. """ +from pathlib import Path import re import struct import uuid @@ -30,11 +31,12 @@ def _handle_datetimeoffset(dto_value: bytes) -> datetime: """ tup = struct.unpack("<6hI2h", dto_value) year, month, day, hour, minute, second = tup[:6] - microsecond = tup[6] // 1000 # nanoseconds → microseconds + microsecond = tup[6] // 1000 tz_hour, tz_minute = tup[7], tup[8] tz = timezone(timedelta(hours=tz_hour, minutes=tz_minute)) return datetime(year, month, day, hour, minute, second, microsecond, tz) + # Connection parameters matching docker-compose.yml MSSQL_HOST = "127.0.0.1" MSSQL_PORT = 14330 @@ -43,8 +45,6 @@ def _handle_datetimeoffset(dto_value: bytes) -> datetime: MSSQL_DRIVER = "{ODBC Driver 18 for SQL Server}" # Paths to SQL files (relative to project root) -from pathlib import Path - PROJECT_ROOT = Path(__file__).parent.parent.parent MIGRATIONS_DIR = PROJECT_ROOT / "migrations" @@ -99,7 +99,6 @@ def run_sql_file(conn: "pyodbc.Connection", filepath: Path) -> None: """ sql = filepath.read_text(encoding="utf-8") - # Split on GO as a standalone batch separator (line by itself or with whitespace) batches = re.split(r"^\s*GO\s*$", sql, flags=re.MULTILINE | re.IGNORECASE) cursor = conn.cursor() @@ -107,7 +106,6 @@ def run_sql_file(conn: "pyodbc.Connection", filepath: Path) -> None: batch = batch.strip() if not batch: continue - # Skip sqlcmd directives (:r, :setvar, etc.) if batch.startswith(":"): continue cursor.execute(batch) @@ -129,9 +127,7 @@ def get_table_names(conn: "pyodbc.Connection") -> set[str]: return {row[0] for row in cursor.fetchall()} -def get_column_type( - conn: "pyodbc.Connection", table: str, column: str -) -> str: +def get_column_type(conn: "pyodbc.Connection", table: str, column: str) -> str: cursor = conn.cursor() cursor.execute( "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS " @@ -154,6 +150,16 @@ def column_exists(conn: "pyodbc.Connection", table: str, column: str) -> bool: return cursor.fetchone() is not None +def _require_sql_files(keys: list[str]) -> None: + """Skip integration tests if required legacy SQL assets are not present.""" + missing = [str(SQL_FILES[key]) for key in keys if not SQL_FILES[key].exists()] + if missing: + pytest.skip( + "Legacy integration SQL assets are missing from this repo: " + + ", ".join(missing) + ) + + # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @@ -179,7 +185,6 @@ def connect(database: str = "master") -> "pyodbc.Connection": conn = pyodbc.connect( _connect_string(database), timeout=10, autocommit=True ) - # pyodbc does not natively support DATETIMEOFFSET (ODBC type -155) conn.add_output_converter(-155, _handle_datetimeoffset) return conn @@ -198,7 +203,6 @@ def fresh_db(mssql_engine): yield db_conn, db_name db_conn.close() - # Force-close any remaining connections before dropping master_conn.execute( f"ALTER DATABASE [{db_name}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE" ) @@ -206,10 +210,9 @@ def fresh_db(mssql_engine): master_conn.close() -def _apply_schema_and_seeds( - conn: "pyodbc.Connection", steps: list[str] -) -> None: +def _apply_schema_and_seeds(conn: "pyodbc.Connection", steps: list[str]) -> None: """Apply a sequence of SQL file keys from SQL_FILES.""" + _require_sql_files(steps) for key in steps: run_sql_file(conn, SQL_FILES[key]) @@ -398,4 +401,4 @@ def db_at_v160(fresh_db): "seed_v1.6.0", ], ) - yield conn, db_name + yield conn, db_name \ No newline at end of file