From 358f6754df210db93caed45938dddde53a8210f8 Mon Sep 17 00:00:00 2001 From: Aaron Alijani Date: Fri, 8 May 2026 20:06:14 +1000 Subject: [PATCH 1/3] feat(backend): add ControlVerificationTemplate table and CRUD endpoints Phase 1 of the semi-automated manual verification system. Adds a per-control template that stores auditor instructions, keywords, severity, and expected evidence type for the 14 manual CIS controls. - New table control_verification_template (Alembic 8a7b91ea95d9) - Pydantic schemas: Create, Update, Read - CRUD endpoints under /v1/verification-templates - Write operations (POST/PATCH/DELETE) gated to admin role - Read operations available to any authenticated user - Unique constraint on control_id with 409 Conflict on duplicates Smoke-tested locally: POST creates row, GET returns it, duplicate POST returns 409. Refs: Khan's product brief on manual control verification. --- backend-api/alembic/env.py | 1 + ...a95d9_add_control_verification_template.py | 41 +++++ backend-api/app/api/v1/router.py | 4 + .../app/api/v1/verification_templates.py | 167 ++++++++++++++++++ backend-api/app/models/__init__.py | 3 + .../models/control_verification_template.py | 61 +++++++ .../schemas/control_verification_template.py | 47 +++++ 7 files changed, 324 insertions(+) create mode 100644 backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py create mode 100644 backend-api/app/api/v1/verification_templates.py create mode 100644 backend-api/app/models/control_verification_template.py create mode 100644 backend-api/app/schemas/control_verification_template.py diff --git a/backend-api/alembic/env.py b/backend-api/alembic/env.py index b94188d88..c973ff509 100644 --- a/backend-api/alembic/env.py +++ b/backend-api/alembic/env.py @@ -20,6 +20,7 @@ from app.models.gcp_connection import GCPConnection # noqa from app.models.aws_connection import AWSConnection # noqa from app.models.user_settings import UserSettings # noqa +from app.models.control_verification_template import ControlVerificationTemplate # noqa from app.core.config import get_settings # this is the Alembic Config object, which provides diff --git a/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py b/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py new file mode 100644 index 000000000..ccaae9add --- /dev/null +++ b/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py @@ -0,0 +1,41 @@ +"""add control verification template + +Revision ID: 8a7b91ea95d9 +Revises: j1k2l3m4n567 +Create Date: 2026-05-08 09:22:25.327975 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '8a7b91ea95d9' +down_revision: Union[str, Sequence[str], None] = 'j1k2l3m4n567' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.create_table('control_verification_template', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('control_id', sa.String(length=50), nullable=False), + sa.Column('title', sa.String(length=200), nullable=False), + sa.Column('instructions', sa.Text(), nullable=False), + sa.Column('keywords', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column('severity', sa.String(length=20), nullable=False), + sa.Column('evidence_type', sa.String(length=50), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_control_verification_template_control_id'), 'control_verification_template', ['control_id'], unique=True) + + +def downgrade() -> None: + """Downgrade schema.""" + op.drop_index(op.f('ix_control_verification_template_control_id'), table_name='control_verification_template') + op.drop_table('control_verification_template') \ No newline at end of file diff --git a/backend-api/app/api/v1/router.py b/backend-api/app/api/v1/router.py index c648004af..c3b9214b5 100644 --- a/backend-api/app/api/v1/router.py +++ b/backend-api/app/api/v1/router.py @@ -9,6 +9,7 @@ scans, settings, test, + verification_templates, ) api_router = APIRouter() @@ -39,3 +40,6 @@ # User settings routes api_router.include_router(settings.router) + +# Manual control verification template routes +api_router.include_router(verification_templates.router) diff --git a/backend-api/app/api/v1/verification_templates.py b/backend-api/app/api/v1/verification_templates.py new file mode 100644 index 000000000..5c16f00f2 --- /dev/null +++ b/backend-api/app/api/v1/verification_templates.py @@ -0,0 +1,167 @@ +"""CRUD endpoints for ControlVerificationTemplate. + +Powers the manual control verification workflow: admins maintain the +templates that auditors see when verifying pending manual controls. + +Authorisation: +- POST / PATCH / DELETE: admin only (templates are GRC content; auditors + consume them but do not author them). +- GET endpoints: any authenticated user (auditors and viewers need read + access to surface instructions in the UI). +""" + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.auth import get_current_user +from app.core.permissions import require_admin +from app.db.session import get_async_session +from app.models.control_verification_template import ControlVerificationTemplate +from app.models.user import User +from app.schemas.control_verification_template import ( + ControlVerificationTemplateCreate, + ControlVerificationTemplateRead, + ControlVerificationTemplateUpdate, +) + +router = APIRouter( + prefix="/verification-templates", + tags=["Verification Templates"], +) + + +@router.post( + "/", + response_model=ControlVerificationTemplateRead, + status_code=status.HTTP_201_CREATED, + summary="Create a verification template (admin only)", +) +async def create_template( + data: ControlVerificationTemplateCreate, + db: AsyncSession = Depends(get_async_session), + _: User = Depends(require_admin), +) -> ControlVerificationTemplate: + """Create a new verification template for a manual control. + + Returns 409 Conflict if a template for the given control_id already exists. + """ + result = await db.execute( + select(ControlVerificationTemplate).where( + ControlVerificationTemplate.control_id == data.control_id + ) + ) + if result.scalar_one_or_none() is not None: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"Template for control_id '{data.control_id}' already exists", + ) + + template = ControlVerificationTemplate(**data.model_dump()) + db.add(template) + await db.commit() + await db.refresh(template) + return template + + +@router.get( + "/", + response_model=list[ControlVerificationTemplateRead], + summary="List all verification templates", +) +async def list_templates( + db: AsyncSession = Depends(get_async_session), + _: User = Depends(get_current_user), +) -> list[ControlVerificationTemplate]: + """List every verification template, ordered by control_id.""" + result = await db.execute( + select(ControlVerificationTemplate).order_by( + ControlVerificationTemplate.control_id + ) + ) + return list(result.scalars().all()) + + +@router.get( + "/{control_id}", + response_model=ControlVerificationTemplateRead, + summary="Get a verification template by control_id", +) +async def get_template( + control_id: str, + db: AsyncSession = Depends(get_async_session), + _: User = Depends(get_current_user), +) -> ControlVerificationTemplate: + """Fetch the verification template for the given control_id.""" + result = await db.execute( + select(ControlVerificationTemplate).where( + ControlVerificationTemplate.control_id == control_id + ) + ) + template = result.scalar_one_or_none() + if template is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Template for control_id '{control_id}' not found", + ) + return template + + +@router.patch( + "/{control_id}", + response_model=ControlVerificationTemplateRead, + summary="Partially update a verification template (admin only)", +) +async def update_template( + control_id: str, + data: ControlVerificationTemplateUpdate, + db: AsyncSession = Depends(get_async_session), + _: User = Depends(require_admin), +) -> ControlVerificationTemplate: + """Partially update a template. Only fields provided in the body are applied.""" + result = await db.execute( + select(ControlVerificationTemplate).where( + ControlVerificationTemplate.control_id == control_id + ) + ) + template = result.scalar_one_or_none() + if template is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Template for control_id '{control_id}' not found", + ) + + update_fields = data.model_dump(exclude_unset=True) + for field, value in update_fields.items(): + setattr(template, field, value) + + await db.commit() + await db.refresh(template) + return template + + +@router.delete( + "/{control_id}", + status_code=status.HTTP_204_NO_CONTENT, + summary="Delete a verification template (admin only)", +) +async def delete_template( + control_id: str, + db: AsyncSession = Depends(get_async_session), + _: User = Depends(require_admin), +) -> None: + """Delete the verification template for the given control_id.""" + result = await db.execute( + select(ControlVerificationTemplate).where( + ControlVerificationTemplate.control_id == control_id + ) + ) + template = result.scalar_one_or_none() + if template is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Template for control_id '{control_id}' not found", + ) + + await db.delete(template) + await db.commit() \ No newline at end of file diff --git a/backend-api/app/models/__init__.py b/backend-api/app/models/__init__.py index 27c476e4d..97a7fafbc 100644 --- a/backend-api/app/models/__init__.py +++ b/backend-api/app/models/__init__.py @@ -12,6 +12,8 @@ from app.models.evidence_validation import EvidenceValidation from app.models.contact import ContactSubmission, SubmissionNote, SubmissionHistory from app.models.user_settings import UserSettings +from app.models.user_settings import UserSettings +from app.models.control_verification_template import ControlVerificationTemplate __all__ = [ "User", @@ -29,4 +31,5 @@ "SubmissionNote", "SubmissionHistory", "UserSettings", + "ControlVerificationTemplate", ] diff --git a/backend-api/app/models/control_verification_template.py b/backend-api/app/models/control_verification_template.py new file mode 100644 index 000000000..cb857aa81 --- /dev/null +++ b/backend-api/app/models/control_verification_template.py @@ -0,0 +1,61 @@ +"""ControlVerificationTemplate model. + +Stores per-control auditor instructions, keywords, and severity for the 14 +manual controls that AutoAudit cannot automate via the M365 collectors. +Each row corresponds to one CIS control_id (e.g. "1.1.2") and powers the +semi-automated manual verification workflow: the auditor opens a pending +manual control, sees the instructions, uploads evidence, and the validator +matches the keywords to suggest a verdict. + +Keywords are stored as JSONB to match the patterns already used by +EvidenceValidation.matches_json and ScanResult.evidence. +""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import DateTime, Integer, String, Text +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.sql import func + +from app.db.base import Base + + +class ControlVerificationTemplate(Base): + """Verification template for a single manual CIS control. + + One row per control_id (enforced via unique constraint). + """ + + __tablename__ = "control_verification_template" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + + # CIS control identifier, e.g. "1.1.2". Must match scan_result.control_id. + control_id: Mapped[str] = mapped_column( + String(50), unique=True, nullable=False, index=True + ) + + # Human-readable control title from the CIS benchmark. + title: Mapped[str] = mapped_column(String(200), nullable=False) + + # Numbered, portal-specific auditor instructions. + instructions: Mapped[str] = mapped_column(Text, nullable=False) + + # List of keywords expected to appear in compliant evidence. + # JSONB so we can index/query individual keywords later if needed. + keywords: Mapped[list] = mapped_column(JSONB, nullable=False) + + # Risk severity: "high" | "medium" | "low". + severity: Mapped[str] = mapped_column(String(20), nullable=False) + + # Expected evidence format: "screenshot" | "pdf_export" | "comment_only". + evidence_type: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.now() + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.now(), onupdate=func.now() + ) \ No newline at end of file diff --git a/backend-api/app/schemas/control_verification_template.py b/backend-api/app/schemas/control_verification_template.py new file mode 100644 index 000000000..b340598f4 --- /dev/null +++ b/backend-api/app/schemas/control_verification_template.py @@ -0,0 +1,47 @@ +"""Pydantic schemas for ControlVerificationTemplate.""" + +from datetime import datetime + +from pydantic import BaseModel, Field + + +class ControlVerificationTemplateCreate(BaseModel): + """Schema for creating a new verification template.""" + + control_id: str = Field(..., max_length=50) + title: str = Field(..., max_length=200) + instructions: str + keywords: list[str] + severity: str = Field(..., max_length=20) + evidence_type: str | None = Field(None, max_length=50) + + +class ControlVerificationTemplateUpdate(BaseModel): + """Schema for partial update of a verification template. + + All fields optional — only provided fields are applied. + control_id is intentionally not updatable (it's the resource key). + """ + + title: str | None = Field(None, max_length=200) + instructions: str | None = None + keywords: list[str] | None = None + severity: str | None = Field(None, max_length=20) + evidence_type: str | None = Field(None, max_length=50) + + +class ControlVerificationTemplateRead(BaseModel): + """Schema for reading a verification template.""" + + id: int + control_id: str + title: str + instructions: str + keywords: list[str] + severity: str + evidence_type: str | None + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True \ No newline at end of file From 24c12162fa843bbf19ff69bda8ba853c6b6142ae Mon Sep 17 00:00:00 2001 From: Aaron Alijani Date: Thu, 14 May 2026 13:06:38 +1000 Subject: [PATCH 2/3] feat: key on (framework, benchmark, version, control_id) tuple --- ...a95d9_add_control_verification_template.py | 61 ++++-- .../app/api/v1/verification_templates.py | 186 +++++++++++------- backend-api/app/models/__init__.py | 1 - .../models/control_verification_template.py | 83 ++++---- .../schemas/control_verification_template.py | 101 +++++++--- 5 files changed, 279 insertions(+), 153 deletions(-) diff --git a/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py b/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py index ccaae9add..91ed48291 100644 --- a/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py +++ b/backend-api/alembic/versions/8a7b91ea95d9_add_control_verification_template.py @@ -1,41 +1,60 @@ """add control verification template - + Revision ID: 8a7b91ea95d9 Revises: j1k2l3m4n567 Create Date: 2026-05-08 09:22:25.327975 - + """ from typing import Sequence, Union - + from alembic import op import sqlalchemy as sa from sqlalchemy.dialects import postgresql - + # revision identifiers, used by Alembic. revision: str = '8a7b91ea95d9' down_revision: Union[str, Sequence[str], None] = 'j1k2l3m4n567' branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None - - + + def upgrade() -> None: """Upgrade schema.""" - op.create_table('control_verification_template', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('control_id', sa.String(length=50), nullable=False), - sa.Column('title', sa.String(length=200), nullable=False), - sa.Column('instructions', sa.Text(), nullable=False), - sa.Column('keywords', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.Column('severity', sa.String(length=20), nullable=False), - sa.Column('evidence_type', sa.String(length=50), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), - sa.PrimaryKeyConstraint('id') + op.create_table( + 'control_verification_template', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('framework', sa.String(length=50), nullable=False), + sa.Column('benchmark', sa.String(length=100), nullable=False), + sa.Column('version', sa.String(length=20), nullable=False), + sa.Column('control_id', sa.String(length=50), nullable=False), + sa.Column('title', sa.String(length=200), nullable=False), + sa.Column('instructions', sa.Text(), nullable=False), + sa.Column('keywords', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column('severity', sa.String(length=20), nullable=False), + sa.Column('evidence_type', sa.String(length=50), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint( + 'framework', + 'benchmark', + 'version', + 'control_id', + name='uq_template_framework_benchmark_version_control', + ), ) - op.create_index(op.f('ix_control_verification_template_control_id'), 'control_verification_template', ['control_id'], unique=True) - - + op.create_index( + op.f('ix_control_verification_template_control_id'), + 'control_verification_template', + ['control_id'], + unique=False, + ) + + def downgrade() -> None: """Downgrade schema.""" - op.drop_index(op.f('ix_control_verification_template_control_id'), table_name='control_verification_template') + op.drop_index( + op.f('ix_control_verification_template_control_id'), + table_name='control_verification_template', + ) op.drop_table('control_verification_template') \ No newline at end of file diff --git a/backend-api/app/api/v1/verification_templates.py b/backend-api/app/api/v1/verification_templates.py index 5c16f00f2..94fe2db50 100644 --- a/backend-api/app/api/v1/verification_templates.py +++ b/backend-api/app/api/v1/verification_templates.py @@ -1,19 +1,26 @@ """CRUD endpoints for ControlVerificationTemplate. - + Powers the manual control verification workflow: admins maintain the templates that auditors see when verifying pending manual controls. - + +Identity: + Each template is uniquely identified by the + (framework, benchmark, version, control_id) tuple. This matches the + same tuple Scan already uses (see app/api/v1/scans.py) so the lookup + pattern is consistent end-to-end and the same control_id can carry + different instructions across benchmark versions and across frameworks. + Authorisation: - POST / PATCH / DELETE: admin only (templates are GRC content; auditors consume them but do not author them). - GET endpoints: any authenticated user (auditors and viewers need read access to surface instructions in the UI). """ - + from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession - + from app.core.auth import get_current_user from app.core.permissions import require_admin from app.db.session import get_async_session @@ -24,13 +31,46 @@ ControlVerificationTemplateRead, ControlVerificationTemplateUpdate, ) - + router = APIRouter( prefix="/verification-templates", tags=["Verification Templates"], ) - - + + +# ── Internal helpers ──────────────────────────────────────────────────── + + +async def _get_template_or_404( + db: AsyncSession, + framework: str, + benchmark: str, + version: str, + control_id: str, +) -> ControlVerificationTemplate: + """Fetch the template for the given tuple or raise 404.""" + result = await db.execute( + select(ControlVerificationTemplate).where( + ControlVerificationTemplate.framework == framework, + ControlVerificationTemplate.benchmark == benchmark, + ControlVerificationTemplate.version == version, + ControlVerificationTemplate.control_id == control_id, + ) + ) + template = result.scalar_one_or_none() + if template is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=( + f"Template for {framework}/{benchmark}/{version}/{control_id} not found" + ), + ) + return template + + +# ── Endpoints ─────────────────────────────────────────────────────────── + + @router.post( "/", response_model=ControlVerificationTemplateRead, @@ -43,125 +83,131 @@ async def create_template( _: User = Depends(require_admin), ) -> ControlVerificationTemplate: """Create a new verification template for a manual control. - - Returns 409 Conflict if a template for the given control_id already exists. + + Returns 409 Conflict if a template for the given + (framework, benchmark, version, control_id) tuple already exists. """ result = await db.execute( select(ControlVerificationTemplate).where( - ControlVerificationTemplate.control_id == data.control_id + ControlVerificationTemplate.framework == data.framework, + ControlVerificationTemplate.benchmark == data.benchmark, + ControlVerificationTemplate.version == data.version, + ControlVerificationTemplate.control_id == data.control_id, ) ) if result.scalar_one_or_none() is not None: raise HTTPException( status_code=status.HTTP_409_CONFLICT, - detail=f"Template for control_id '{data.control_id}' already exists", + detail=( + f"Template for {data.framework}/{data.benchmark}/" + f"{data.version}/{data.control_id} already exists" + ), ) - + template = ControlVerificationTemplate(**data.model_dump()) db.add(template) await db.commit() await db.refresh(template) return template - - + + @router.get( "/", response_model=list[ControlVerificationTemplateRead], - summary="List all verification templates", + summary="List verification templates", ) async def list_templates( db: AsyncSession = Depends(get_async_session), _: User = Depends(get_current_user), + framework: str | None = None, + benchmark: str | None = None, + version: str | None = None, ) -> list[ControlVerificationTemplate]: - """List every verification template, ordered by control_id.""" - result = await db.execute( - select(ControlVerificationTemplate).order_by( - ControlVerificationTemplate.control_id - ) + """List verification templates, optionally filtered by framework/benchmark/version. + + Sorted by (framework, benchmark, version, control_id) so output is stable + across calls. Phase 2 seeding scripts can pass the tuple to fetch only + the templates for a single benchmark version. + """ + query = select(ControlVerificationTemplate) + if framework is not None: + query = query.where(ControlVerificationTemplate.framework == framework) + if benchmark is not None: + query = query.where(ControlVerificationTemplate.benchmark == benchmark) + if version is not None: + query = query.where(ControlVerificationTemplate.version == version) + query = query.order_by( + ControlVerificationTemplate.framework, + ControlVerificationTemplate.benchmark, + ControlVerificationTemplate.version, + ControlVerificationTemplate.control_id, ) + + result = await db.execute(query) return list(result.scalars().all()) - - + + @router.get( - "/{control_id}", + "/{framework}/{benchmark}/{version}/{control_id}", response_model=ControlVerificationTemplateRead, - summary="Get a verification template by control_id", + summary="Get a verification template by (framework, benchmark, version, control_id)", ) async def get_template( + framework: str, + benchmark: str, + version: str, control_id: str, db: AsyncSession = Depends(get_async_session), _: User = Depends(get_current_user), ) -> ControlVerificationTemplate: - """Fetch the verification template for the given control_id.""" - result = await db.execute( - select(ControlVerificationTemplate).where( - ControlVerificationTemplate.control_id == control_id - ) - ) - template = result.scalar_one_or_none() - if template is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"Template for control_id '{control_id}' not found", - ) - return template - - + """Fetch the verification template for the given tuple.""" + return await _get_template_or_404(db, framework, benchmark, version, control_id) + + @router.patch( - "/{control_id}", + "/{framework}/{benchmark}/{version}/{control_id}", response_model=ControlVerificationTemplateRead, summary="Partially update a verification template (admin only)", ) async def update_template( + framework: str, + benchmark: str, + version: str, control_id: str, data: ControlVerificationTemplateUpdate, db: AsyncSession = Depends(get_async_session), _: User = Depends(require_admin), ) -> ControlVerificationTemplate: - """Partially update a template. Only fields provided in the body are applied.""" - result = await db.execute( - select(ControlVerificationTemplate).where( - ControlVerificationTemplate.control_id == control_id - ) - ) - template = result.scalar_one_or_none() - if template is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"Template for control_id '{control_id}' not found", - ) - + """Partially update a template. Only fields provided in the body are applied. + + Explicit null values for non-nullable fields are rejected by the schema + validator (422), not allowed through to the DB. + """ + template = await _get_template_or_404(db, framework, benchmark, version, control_id) + update_fields = data.model_dump(exclude_unset=True) for field, value in update_fields.items(): setattr(template, field, value) - + await db.commit() await db.refresh(template) return template - - + + @router.delete( - "/{control_id}", + "/{framework}/{benchmark}/{version}/{control_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Delete a verification template (admin only)", ) async def delete_template( + framework: str, + benchmark: str, + version: str, control_id: str, db: AsyncSession = Depends(get_async_session), _: User = Depends(require_admin), ) -> None: - """Delete the verification template for the given control_id.""" - result = await db.execute( - select(ControlVerificationTemplate).where( - ControlVerificationTemplate.control_id == control_id - ) - ) - template = result.scalar_one_or_none() - if template is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"Template for control_id '{control_id}' not found", - ) - + """Delete the verification template for the given tuple.""" + template = await _get_template_or_404(db, framework, benchmark, version, control_id) await db.delete(template) await db.commit() \ No newline at end of file diff --git a/backend-api/app/models/__init__.py b/backend-api/app/models/__init__.py index 97a7fafbc..31a2accfd 100644 --- a/backend-api/app/models/__init__.py +++ b/backend-api/app/models/__init__.py @@ -12,7 +12,6 @@ from app.models.evidence_validation import EvidenceValidation from app.models.contact import ContactSubmission, SubmissionNote, SubmissionHistory from app.models.user_settings import UserSettings -from app.models.user_settings import UserSettings from app.models.control_verification_template import ControlVerificationTemplate __all__ = [ diff --git a/backend-api/app/models/control_verification_template.py b/backend-api/app/models/control_verification_template.py index cb857aa81..567fe7492 100644 --- a/backend-api/app/models/control_verification_template.py +++ b/backend-api/app/models/control_verification_template.py @@ -1,61 +1,78 @@ """ControlVerificationTemplate model. - -Stores per-control auditor instructions, keywords, and severity for the 14 -manual controls that AutoAudit cannot automate via the M365 collectors. -Each row corresponds to one CIS control_id (e.g. "1.1.2") and powers the -semi-automated manual verification workflow: the auditor opens a pending -manual control, sees the instructions, uploads evidence, and the validator -matches the keywords to suggest a verdict. - + +Stores per-control auditor instructions, keywords, and severity for the manual +controls that AutoAudit cannot automate via the M365 collectors. Each row is +uniquely identified by the (framework, benchmark, version, control_id) tuple +so that the same control_id can carry different instructions across CIS +benchmark versions (e.g. "1.1.2" in v3.1.0 vs v6.0.0) and across frameworks +(e.g. CIS M365 vs future Azure benchmarks). + +Powers the semi-automated manual verification workflow: the auditor opens a +pending manual control, sees the instructions, uploads evidence, and the +validator matches the keywords to suggest a verdict. + Keywords are stored as JSONB to match the patterns already used by EvidenceValidation.matches_json and ScanResult.evidence. """ - + from datetime import datetime from typing import Optional - -from sqlalchemy import DateTime, Integer, String, Text + +from sqlalchemy import DateTime, Integer, String, Text, UniqueConstraint from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql import func - + from app.db.base import Base - - + + class ControlVerificationTemplate(Base): - """Verification template for a single manual CIS control. - - One row per control_id (enforced via unique constraint). - """ - + """Verification template for a single manual control within a framework/benchmark/version.""" + __tablename__ = "control_verification_template" - + id: Mapped[int] = mapped_column(Integer, primary_key=True) - - # CIS control identifier, e.g. "1.1.2". Must match scan_result.control_id. - control_id: Mapped[str] = mapped_column( - String(50), unique=True, nullable=False, index=True - ) - - # Human-readable control title from the CIS benchmark. + + # Framework/benchmark/version tuple — mirrors Scan.framework/benchmark/version + # so the lookup pattern (and column types) are consistent across the schema. + framework: Mapped[str] = mapped_column(String(50), nullable=False) + benchmark: Mapped[str] = mapped_column(String(100), nullable=False) + version: Mapped[str] = mapped_column(String(20), nullable=False) + + # CIS control identifier within the (framework, benchmark, version) above, + # e.g. "1.1.2". Must match scan_result.control_id for the same tuple. + control_id: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + + # Human-readable control title from the benchmark. title: Mapped[str] = mapped_column(String(200), nullable=False) - + # Numbered, portal-specific auditor instructions. instructions: Mapped[str] = mapped_column(Text, nullable=False) - + # List of keywords expected to appear in compliant evidence. # JSONB so we can index/query individual keywords later if needed. keywords: Mapped[list] = mapped_column(JSONB, nullable=False) - - # Risk severity: "high" | "medium" | "low". + + # Risk severity: "high" | "medium" | "low". Enforced at the API layer via + # the Pydantic Literal type on the Create/Update schemas. severity: Mapped[str] = mapped_column(String(20), nullable=False) - + # Expected evidence format: "screenshot" | "pdf_export" | "comment_only". evidence_type: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) - + created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.now(), onupdate=func.now() + ) + + __table_args__ = ( + UniqueConstraint( + "framework", + "benchmark", + "version", + "control_id", + name="uq_template_framework_benchmark_version_control", + ), ) \ No newline at end of file diff --git a/backend-api/app/schemas/control_verification_template.py b/backend-api/app/schemas/control_verification_template.py index b340598f4..1706d8318 100644 --- a/backend-api/app/schemas/control_verification_template.py +++ b/backend-api/app/schemas/control_verification_template.py @@ -1,47 +1,92 @@ """Pydantic schemas for ControlVerificationTemplate.""" - + from datetime import datetime - -from pydantic import BaseModel, Field - - +from typing import Literal, Optional + +from pydantic import BaseModel, ConfigDict, Field, model_validator + + +# Allowed severity values. Enforced at the API layer; the DB still stores +# this as String(20) for forward compatibility. +Severity = Literal["high", "medium", "low"] + +# Allowed evidence_type values for templates that require uploaded evidence. +EvidenceType = Literal["screenshot", "pdf_export", "comment_only"] + + class ControlVerificationTemplateCreate(BaseModel): - """Schema for creating a new verification template.""" - + """Schema for creating a new verification template. + + The (framework, benchmark, version, control_id) tuple uniquely identifies + the template. A duplicate tuple returns 409 Conflict at the endpoint. + """ + + framework: str = Field(..., max_length=50) + benchmark: str = Field(..., max_length=100) + version: str = Field(..., max_length=20) control_id: str = Field(..., max_length=50) title: str = Field(..., max_length=200) instructions: str keywords: list[str] - severity: str = Field(..., max_length=20) - evidence_type: str | None = Field(None, max_length=50) - - + severity: Severity + evidence_type: Optional[EvidenceType] = None + + class ControlVerificationTemplateUpdate(BaseModel): """Schema for partial update of a verification template. - - All fields optional — only provided fields are applied. - control_id is intentionally not updatable (it's the resource key). + + Only fields provided are applied. The (framework, benchmark, version, + control_id) tuple is the resource key and is therefore not updatable — + callers wanting to relocate a template should DELETE and POST. + + Non-nullable columns (title, instructions, keywords, severity) must not + be set to None explicitly; doing so returns 422 rather than letting the + DB raise a 500 on the integrity error. """ - - title: str | None = Field(None, max_length=200) - instructions: str | None = None - keywords: list[str] | None = None - severity: str | None = Field(None, max_length=20) - evidence_type: str | None = Field(None, max_length=50) - - + + title: Optional[str] = Field(None, max_length=200) + instructions: Optional[str] = None + keywords: Optional[list[str]] = None + severity: Optional[Severity] = None + evidence_type: Optional[EvidenceType] = None + + @model_validator(mode="after") + def reject_explicit_nulls_on_required_fields(self) -> "ControlVerificationTemplateUpdate": + """Reject explicit null values for fields that are non-nullable in the DB. + + Pydantic's exclude_unset behaviour treats `{"title": null}` and + `{}` differently — the former is "set to None", the latter is "not + set". The endpoint's update loop uses exclude_unset=True, so an + explicit null would attempt to write NULL to a NOT NULL column and + surface as a 500. Rejecting at validation time gives the client a + clean 422 instead. + """ + explicit_nulls = [ + name + for name in ("title", "instructions", "keywords", "severity") + if name in self.model_fields_set and getattr(self, name) is None + ] + if explicit_nulls: + raise ValueError( + "Fields cannot be set to null: " + ", ".join(explicit_nulls) + ) + return self + + class ControlVerificationTemplateRead(BaseModel): """Schema for reading a verification template.""" - + + model_config = ConfigDict(from_attributes=True) + id: int + framework: str + benchmark: str + version: str control_id: str title: str instructions: str keywords: list[str] severity: str - evidence_type: str | None + evidence_type: Optional[str] created_at: datetime - updated_at: datetime - - class Config: - from_attributes = True \ No newline at end of file + updated_at: datetime \ No newline at end of file From d6a7b531d0f11eda74674a3ce606dca0d9ba1c96 Mon Sep 17 00:00:00 2001 From: Aaron Alijani Date: Thu, 4 Jun 2026 17:06:26 +1000 Subject: [PATCH 3/3] docs: add manual control verification user guide --- .../manual_control_verification_Guide.md | 259 ++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 docs/features/manual_control_verification_Guide.md diff --git a/docs/features/manual_control_verification_Guide.md b/docs/features/manual_control_verification_Guide.md new file mode 100644 index 000000000..2d0ebe2b2 --- /dev/null +++ b/docs/features/manual_control_verification_Guide.md @@ -0,0 +1,259 @@ +# Manual Control Verification - User Guide + +**Feature owner:** Aaron Alijani (backend/DevOps) + +--- + +## Overview + +AutoAudit scans Microsoft 365 tenants against the CIS M365 Foundations Benchmark +and writes a pass, fail, or pending result for each control. Most controls are +resolved automatically through the Microsoft Graph API. However, 14 controls +cannot be checked programmatically, either because Microsoft provides no API +for the setting or because the required API is not yet integrated into AutoAudit. + +This feature gives auditors a structured, consistent workflow for handling those +14 pending controls manually, and lays the backend foundation for semi-automated +verification in Phase 2. + +--- + +## The 14 Manual Controls + +Not all 14 controls are manual for the same reason. They fall into two groups. + +### Truly manual- no API exists + +These controls have no public Microsoft API and are only reachable through internal +Microsoft APIs that app registrations cannot access, or require a human judgment +call that cannot be expressed as a configuration value. They will remain manual +until Microsoft exposes the relevant API surface. + +| Control ID | Severity | Service | Why manual | +| ---------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | +| 1.1.2 | High | Entra ID | Which accounts are designated as break-glass is an organisational policy decision; no API can read this | +| 1.3.8 | Medium | Sway | Microsoft provides no API for Sway external sharing settings | +| 2.2.1 | High | Entra ID | Defining which accounts to monitor is a human decision; no API can confirm it is set up correctly | +| 2.4.3 | Medium | Defender | MCAS configuration must be verified through the security portal; no stable API exposes its enabled state | +| 5.1.2.1 | Medium | Entra ID | The relevant endpoint only exists in the Microsoft Graph beta API, which is not stable enough for production | +| 5.1.2.4 | Medium | Entra ID | Only accessible through an internal Azure API, not available to app registrations | +| 5.1.2.5 | Low | Entra ID | Microsoft does not expose this setting through the Graph API | +| 5.1.2.6 | Low | Entra ID | Same as 5.1.2.4, internal Azure API only | +| 5.1.8.1 | High | Entra ID | Verifying password hash sync requires checking on-premises AD Connect directly, no cloud API exists | +| 5.2.4.1 | Medium | Entra ID | SSPR settings are not exposed through the Graph API | +| 8.4.1 | Medium | Teams | Teams app permission policy configuration is only accessible through the Teams admin portal | + +### Automation candidates- manual for now + +These controls can eventually be automated once outstanding blockers are resolved. + +| Control ID | Severity | Service | Blocker | When it can be automated | +| ------------ | -------- | ---------- | ------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| 7.2.8 | Medium | SharePoint | The `sharepoint.spo_tenant` collector exists in the codebase but raises `NotImplementedError` | Once the collector implementation is completed, no other changes are needed. | +| 9.1.1–9.1.12 | Medium | Fabric | AutoAudit has not yet confirmed that app-only authentication works against Fabric admin endpoints | Once Fabric app-only auth is validated, all 12 controls can be automated in one go. | + +--- + +## How it works - the two database tables + +The feature is split across two tables that work together. + +### `manual_scan_result_detail` (PR #166) + +This is the **record-keeping** side. When an auditor manually verifies a pending +control, this table records that they did it. + +| Field | Purpose | +| --------------------------- | ---------------------------------------------------------------------------- | +| `scan_result_id` | Links one-to-one to the existing `scan_result` row for this control | +| `user_id` | Which auditor performed the verification | +| `comment` | An optional free-text note that the auditor leaves explaining their decision | +| `created_at` / `updated_at` | Timestamps | + +`scan_result` remains the single source of truth for the outcome (pass/fail/pending). +This table only adds the manual-specific fields that `scan_result` does not have. + +**Ownership rule:** An auditor can only read, update, or delete their own verification +records. Attempting to access another user's record returns HTTP 403. + +### `control_verification_template` (PR #229) + +This is the **instruction** side. For each of the 14 manual controls, this table +stores what the auditor needs to check and what compliant evidence looks like. + +| Field | Purpose | +| --------------- | --------------------------------------------------------------------------- | +| `framework` | e.g. `CIS` | +| `benchmark` | e.g. `CIS M365 Foundations` | +| `version` | e.g. `v6.0.0` | +| `control_id` | e.g. `1.1.2` - matches `scan_result.control_id` | +| `title` | Human-readable control title from the benchmark | +| `instructions` | Numbered, portal-specific steps telling the auditor where to look | +| `keywords` | JSONB list of terms expected to appear in compliant evidence | +| `severity` | `high`, `medium`, or `low`- drives confidence scoring thresholds in Phase 2 | +| `evidence_type` | Expected evidence format: `screenshot`, `pdf_export`, or `comment_only` | + +The combination of `(framework, benchmark, version, control_id)` is unique. This +means the same control ID can carry different instructions across the CIS benchmark +versions (e.g. `1.1.2` in v3.1.0 versus v6.0.0) without collision. When Phase 2 +seed the 14 templates, this design prevents unique constraint failures. + +--- + +## End-to-end flow + +**Step 1 : Scan runs** +AutoAudit runs a benchmark scan against the tenant. For each of the 14 +manual controls, a row is written to `scan_result` with status `pending`. +Automated controls resolve immediately; these 14 wait for an auditor. + +**Step 2: Auditor opens a pending control** +The auditor sees the pending controls in the frontend. When they open one, +the frontend calls +`GET /v1/verification-templates/{framework}/{benchmark}/{version}/{control_id}` +to fetch the template for that control. + +**Step 3 : Instructions displayed** +The template returns the numbered portal-specific instructions and the +keyword list for that control. The auditor sees exactly where to go in the +M365 admin portal and what compliant evidence should contain. + +**Step 4: Auditor uploads screenshot** +The auditor follows the instructions, navigates to the relevant M365 portal +screen, captures a screenshot or PDF export, and uploads it as evidence. + +**Step 5: OCR extracts text** +The existing evidence scanner in `/security` runs OCR on the uploaded file +and extracts the text content. + +**Step 6: Auditor reviews and confirms** +The auditor records their verdict via `POST /v1/manual-verification/` — pass +or fail, with an optional comment. Phase 2 will add confidence scoring here: +the scanner matches the extracted text against the template keywords and +suggests a verdict with a confidence score before the auditor confirms. + +**Step 7: Status propagation** +The backend updates `scan_result.status` from `pending` to `pass` or `fail`. +The `finalize_scan_if_complete` function fires to check whether all controls +in the scan are now resolved. + +**Step 8: Scan completes** +Once all 14 manual controls have a verdict, the scan reaches completed state. + +--- + +## API endpoints + +All endpoints require a valid JWT bearer token. Obtain one via `POST /v1/auth/login`. + +### Manual Verification - `/v1/manual-verification` + +| Method | Path | Description | Auth required | +| ------ | --------------------------------------------------------- | ---------------------------------------------- | ----------------------- | +| POST | `/v1/manual-verification/` | Submit a manual verification for a scan result | Auditor (own scan only) | +| GET | `/v1/manual-verification/{id}` | Get a verification by ID | Owner only | +| GET | `/v1/manual-verification/by-scan-result/{scan_result_id}` | Get a verification by scan result | Owner only | +| PATCH | `/v1/manual-verification/{id}` | Update the comment on a verification | Owner only | +| DELETE | `/v1/manual-verification/{id}` | Delete a verification record | Owner only | + +### Verification Templates - `/v1/verification-templates` + +| Method | Path | Description | Auth required | +| ------ | --------------------------------------------------------------------------- | ------------------------------ | ---------------------- | +| POST | `/v1/verification-templates/` | Create a template (admin only) | Admin | +| GET | `/v1/verification-templates/` | List all templates | Any authenticated user | +| GET | `/v1/verification-templates/{framework}/{benchmark}/{version}/{control_id}` | Get a specific template | Any authenticated user | +| PATCH | `/v1/verification-templates/{framework}/{benchmark}/{version}/{control_id}` | Update a template (admin only) | Admin | +| DELETE | `/v1/verification-templates/{framework}/{benchmark}/{version}/{control_id}` | Delete a template (admin only) | Admin | + +All endpoints are visible in Swagger at `http://localhost:8000/docs` under the +**Verification Templates** section. + +--- + +## Confidence scoring - how Phase 2 will suggest verdicts + +When an auditor uploads evidence in Phase 2, the existing evidence scanner +(OCR + keyword matching in `/security`) will extract text from the file and check +How many of the template's keywords appear in it? The match percentage is turned +into a verdict suggestion based on the severity of the control. + +### Base thresholds + +| Match % | Suggestion | Meaning | +| ------- | --------------- | ------------------------------------------------------------ | +| ≥ 80% | Suggest pass | Enough keywords found: compliant evidence likely captured | +| 50–79% | Flag for review | Some keywords matched, but not enough to be confident | +| < 50% | Suggest fail | Too few keywords : evidence does not clearly show compliance | + +### Thresholds adjusted by severity + +Higher-severity controls demand more keyword matches before suggesting pass, +because getting them wrong in the pass direction has serious real-world consequences. + +| Severity | Suggest pass | Flag for review | Suggest fail | +| -------- | ------------ | --------------- | ------------ | +| Critical | ≥ 90% | 60–89% | < 60% | +| High | ≥ 80% | 50–79% | < 50% | +| Medium | ≥ 70% | 40–69% | < 40% | +| Low | ≥ 60% | 30–59% | < 30% | + +**Example : why severity matters:** + +Control `1.1.2` (high) checks that two emergency break-glass accounts exist. +If the algorithm wrongly suggests pass when no break-glass accounts are configured, +the tenant has no fallback if all primary admin accounts are locked out. The higher +threshold exists to guard against this. + +Control `5.1.2.5` (low) checks whether the "stay signed in" prompt is hidden on +the login page. If the algorithm wrongly flags it as failed, an admin spends a few +minutes double-checking a low-risk setting. A lower threshold is appropriate here. + +Confidence scoring thresholds are implemented in +`backend-api/app/services/confidence_scorer.py` in the `PASS_THRESHOLDS` and +`REVIEW_THRESHOLDS` dictionaries. If the team adjusts any values after testing, +update both the code and this document at the same time. + +The auditor always has final say, the system suggests a verdict, the auditor +confirms or overrides. + +--- + +## Delivery phases + +### Phase 1 - Backend foundation (this trimester, complete) + +- `control_verification_template` table and CRUD API (PR #229) +- `manual_scan_result_detail` table and CRUD API (PR #166) +- Composite unique key enabling multi-version benchmark support +- Confidence scoring threshold design documented (see above) + +### Phase 2 - Semi-automated verification + +- Connect evidence uploads to `scan_result` for manual controls +- Run the evidence scanner against template keywords +- Return a confidence score and verdict suggestion to the auditor UI +- Auditor confirms or overrides the suggestion + +### Phase 3 - Status propagation and scan finalisation + +- Auditor verdict updates `scan_result.status` from pending to pass or fail +- Scan finalisation logic: a benchmark scan is only complete when all pending + Manual controls have been resolved +- Reporting: manual verdicts included in compliance reports alongside automated results + +--- + +## Related files + +| File | Purpose | +| --------------------------------------------------------- | --------------------------------------- | +| `backend-api/app/models/control_verification_template.py` | SQLAlchemy model - PR #229 | +| `backend-api/app/models/manual_scan_result_detail.py` | SQLAlchemy model - PR #166 | +| `backend-api/app/api/v1/verification_templates.py` | API endpoints - PR #229 | +| `backend-api/app/api/v1/manual_verification.py` | API endpoints - PR #166 | +| `backend-api/alembic/versions/8a7b91ea95d9_*.py` | Migration - PR #229 | +| `backend-api/alembic/versions/ccf7645372fc_*.py` | Migration - PR #166 | +| `backend-api/app/services/confidence_scorer.py` | Confidence scoring thresholds - Phase 2 | +| `docs/grc/manual_control_classification.md` | Classification of the 14 controls | +| `docs/grc/confidence_threshold_justification.md` | Threshold justification per severity |