From bc2d6848325ebb4f69cbb5332a1ba1c3a322bdc4 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 08:05:20 -0400 Subject: [PATCH 01/13] Implement CodeCollection Visibility and Image Sync Features - Added optional visibility field to CodeCollection model, allowing collections to be marked as 'public' or 'hidden'. - Updated codecollections.yaml to include image_source and image_registry fields for better image tracking. - Introduced a new scheduled task for syncing image tags from OCI registries, enhancing image catalog management. - Refactored various database queries to respect visibility settings, ensuring hidden collections are excluded from public-facing endpoints while still being accessible for internal processes. - Enhanced logging to reflect visibility status during collection creation and updates. --- .../004_add_image_metadata_and_visibility.py | 96 +++++++ cc-registry-v2/backend/app/core/visibility.py | 38 +++ cc-registry-v2/backend/app/main.py | 149 +++++++---- .../backend/app/models/code_collection.py | 6 + cc-registry-v2/backend/app/models/version.py | 18 +- .../backend/app/routers/cc_catalog.py | 240 ++++++++++++++++++ .../backend/app/routers/versions.py | 26 +- .../backend/app/schemas/__init__.py | 0 .../backend/app/schemas/cc_catalog.py | 74 ++++++ .../backend/app/sources/__init__.py | 17 ++ cc-registry-v2/backend/app/sources/base.py | 80 ++++++ cc-registry-v2/backend/app/sources/oci.py | 220 ++++++++++++++++ .../backend/app/sources/registry.py | 62 +++++ cc-registry-v2/backend/app/sources/static.py | 117 +++++++++ .../backend/app/tasks/celery_app.py | 1 + .../backend/app/tasks/image_sync_tasks.py | 203 +++++++++++++++ .../backend/app/tasks/registry_tasks.py | 16 +- cc-registry-v2/schedules.yaml | 17 ++ codecollections.yaml | 18 ++ 19 files changed, 1334 insertions(+), 64 deletions(-) create mode 100644 cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py create mode 100644 cc-registry-v2/backend/app/core/visibility.py create mode 100644 cc-registry-v2/backend/app/routers/cc_catalog.py create mode 100644 cc-registry-v2/backend/app/schemas/__init__.py create mode 100644 cc-registry-v2/backend/app/schemas/cc_catalog.py create mode 100644 cc-registry-v2/backend/app/sources/__init__.py create mode 100644 cc-registry-v2/backend/app/sources/base.py create mode 100644 cc-registry-v2/backend/app/sources/oci.py create mode 100644 cc-registry-v2/backend/app/sources/registry.py create mode 100644 cc-registry-v2/backend/app/sources/static.py create mode 100644 cc-registry-v2/backend/app/tasks/image_sync_tasks.py diff --git a/cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py b/cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py new file mode 100644 index 000000000000..de99d452c3cd --- /dev/null +++ b/cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py @@ -0,0 +1,96 @@ +"""add image metadata to codecollection_versions and visibility to codecollections + +Adds the columns needed to track versioned OCI image artifacts per ref so the +RunWhen platform (PAPI) can consume a built-image catalog directly from the +codecollection-registry instead of running its own corestate-operator. + +Also adds a `visibility` column on `codecollections` so a CC can be tracked +for image consumption but kept out of the public registry website / MCP / +AI search (e.g. customer-private, internal, deprecated CCs). + +Revision ID: 004 +Revises: 003 +Create Date: 2026-05-11 +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "004" +down_revision = "003" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # --- image metadata on codecollection_versions --- + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'codecollection_versions' + AND column_name = 'image_registry' + ) THEN + ALTER TABLE codecollection_versions + ADD COLUMN image_registry VARCHAR(500), + ADD COLUMN image_tag VARCHAR(200), + ADD COLUMN image_digest VARCHAR(80), + ADD COLUMN commit_hash VARCHAR(40), + ADD COLUMN rt_revision VARCHAR(40), + ADD COLUMN image_built_at TIMESTAMP; + END IF; + END $$; + """ + ) + + # Index for PAPI's "latest ref for this CC" lookups. + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_ccv_collection_image_tag + ON codecollection_versions (codecollection_id, image_tag); + """ + ) + + # --- visibility on codecollections --- + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'codecollections' + AND column_name = 'visibility' + ) THEN + ALTER TABLE codecollections + ADD COLUMN visibility VARCHAR(20) NOT NULL DEFAULT 'public'; + END IF; + END $$; + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_cc_visibility + ON codecollections (visibility); + """ + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_cc_visibility") + op.execute("ALTER TABLE codecollections DROP COLUMN IF EXISTS visibility") + + op.execute("DROP INDEX IF EXISTS ix_ccv_collection_image_tag") + op.execute( + """ + ALTER TABLE codecollection_versions + DROP COLUMN IF EXISTS image_built_at, + DROP COLUMN IF EXISTS rt_revision, + DROP COLUMN IF EXISTS commit_hash, + DROP COLUMN IF EXISTS image_digest, + DROP COLUMN IF EXISTS image_tag, + DROP COLUMN IF EXISTS image_registry; + """ + ) diff --git a/cc-registry-v2/backend/app/core/visibility.py b/cc-registry-v2/backend/app/core/visibility.py new file mode 100644 index 000000000000..192430d91381 --- /dev/null +++ b/cc-registry-v2/backend/app/core/visibility.py @@ -0,0 +1,38 @@ +""" +Visibility filter helpers. + +A CodeCollection's `visibility` flag controls whether it appears on +public-audience surfaces: + + - 'public' – default. Shown on the registry website, MCP, AI search, etc. + - 'hidden' – tracked for PAPI consumption but excluded from public lists. + +This is a UX/discovery toggle, NOT a security boundary. Image-level +access control still lives in the OCI registry. + +Centralizing the filter here keeps the rule consistent across endpoints — +if we ever add a third visibility tier (e.g. 'archived'), we change one +place rather than auditing every router. +""" +from __future__ import annotations + +from sqlalchemy.orm import Query + +from app.models import CodeCollection + +PUBLIC_VISIBILITY = "public" +HIDDEN_VISIBILITY = "hidden" + + +def public_only(query: Query) -> Query: + """ + Apply `visibility = 'public'` to a SQLAlchemy query that selects from + or joins to `codecollections`. Use this on every public-audience + endpoint (anything PAPI / corestate would NOT call). + """ + return query.filter(CodeCollection.visibility == PUBLIC_VISIBILITY) + + +def is_public(cc: CodeCollection) -> bool: + """Predicate version for code paths that already have a loaded row.""" + return (cc.visibility or PUBLIC_VISIBILITY) == PUBLIC_VISIBILITY diff --git a/cc-registry-v2/backend/app/main.py b/cc-registry-v2/backend/app/main.py index 2a0c2b88fcd8..501771847277 100644 --- a/cc-registry-v2/backend/app/main.py +++ b/cc-registry-v2/backend/app/main.py @@ -99,7 +99,7 @@ async def health_check(): } # Include routers -from app.routers import admin, tasks, raw_data, admin_crud, task_execution_admin, versions, task_management, admin_inventory, helm_charts, mcp_chat, chat_debug, github_issues, schedule_config, analytics, vector_search, intake +from app.routers import admin, tasks, raw_data, admin_crud, task_execution_admin, versions, task_management, admin_inventory, helm_charts, mcp_chat, chat_debug, github_issues, schedule_config, analytics, vector_search, intake, cc_catalog app.include_router(admin.router) app.include_router(tasks.router) app.include_router(raw_data.router) @@ -116,6 +116,9 @@ async def health_check(): app.include_router(analytics.router) app.include_router(vector_search.router) app.include_router(intake.router) +# CodeCollection catalog (PAPI-facing). Sees both public AND hidden CCs by +# design — PAPI needs to resolve image refs even for hidden collections. +app.include_router(cc_catalog.router) @app.get("/api/v1/registry/collections") async def list_collections(): @@ -127,8 +130,11 @@ async def list_collections(): db = SessionLocal() try: - collections = db.query(CodeCollection).filter(CodeCollection.is_active == True).all() - + from app.core.visibility import public_only + collections = public_only( + db.query(CodeCollection).filter(CodeCollection.is_active == True) + ).all() + result = [] for collection in collections: # Calculate statistics for each collection @@ -181,12 +187,16 @@ async def get_collection_by_slug(collection_slug: str): db = SessionLocal() try: - # Find the collection - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug, - CodeCollection.is_active == True + from app.core.visibility import public_only + # Find the collection — hidden CCs are treated as 404 on the + # public website even though PAPI can still see them via /catalog. + collection = public_only( + db.query(CodeCollection).filter( + CodeCollection.slug == collection_slug, + CodeCollection.is_active == True, + ) ).first() - + if not collection: return JSONResponse( status_code=404, @@ -252,12 +262,20 @@ async def get_all_tasks( db = SessionLocal() try: - # Build the query - query = db.query(Codebundle).filter(Codebundle.is_active == True) - + from app.core.visibility import public_only + # Build the query — always join CodeCollection so we can scope to + # public-visibility collections (hidden CCs and their codebundles + # do not appear on the public registry website). + query = ( + db.query(Codebundle) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ) + query = public_only(query) + # Filter by collection if specified if collection_slug: - query = query.join(CodeCollection).filter(CodeCollection.slug == collection_slug) + query = query.filter(CodeCollection.slug == collection_slug) # Filter by support tags if specified (multiple tags) if support_tags: @@ -426,9 +444,16 @@ async def list_codebundles( db = SessionLocal() try: - # Build base query - query = db.query(Codebundle).filter(Codebundle.is_active == True) - + from app.core.visibility import public_only + # Build base query — join the parent CC so we can scope to + # public-visibility collections. + query = ( + db.query(Codebundle) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ) + query = public_only(query) + # Apply search filter — supports natural language queries # by splitting into keywords and matching word-by-word if search: @@ -627,12 +652,16 @@ async def get_codebundle_by_slug(collection_slug: str, codebundle_slug: str): db = SessionLocal() try: - # First find the collection - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug, - CodeCollection.is_active == True + from app.core.visibility import public_only + # First find the collection. Hidden CCs are treated as 404 from + # the public website even though PAPI can still resolve them. + collection = public_only( + db.query(CodeCollection).filter( + CodeCollection.slug == collection_slug, + CodeCollection.is_active == True, + ) ).first() - + if not collection: return JSONResponse( status_code=404, @@ -720,13 +749,17 @@ async def get_recent_codebundles(): db = SessionLocal() try: - # Get recent codebundles ordered by git_updated_at only, excluding rw-generic-codecollection - codebundles = db.query(Codebundle).join( - CodeCollection, Codebundle.codecollection_id == CodeCollection.id - ).filter( - Codebundle.is_active == True, - Codebundle.git_updated_at.isnot(None), # Only codebundles with git dates - CodeCollection.slug != 'rw-generic-codecollection' # Exclude generics + from app.core.visibility import public_only + # Get recent codebundles ordered by git_updated_at only, + # excluding rw-generic-codecollection and any hidden CCs. + codebundles = public_only( + db.query(Codebundle).join( + CodeCollection, Codebundle.codecollection_id == CodeCollection.id + ).filter( + Codebundle.is_active == True, + Codebundle.git_updated_at.isnot(None), + CodeCollection.slug != 'rw-generic-codecollection', + ) ).order_by( desc(Codebundle.git_updated_at) ).limit(20).all() @@ -769,17 +802,21 @@ async def get_recent_tasks(): db = SessionLocal() try: - # Get codebundles with tasks, ordered by git_updated_at, excluding rw-generic-codecollection - codebundles = db.query(Codebundle).join( - CodeCollection, Codebundle.codecollection_id == CodeCollection.id - ).filter( - Codebundle.is_active == True, - Codebundle.git_updated_at.isnot(None), - Codebundle.tasks.isnot(None), - CodeCollection.slug != 'rw-generic-codecollection' # Exclude generics + from app.core.visibility import public_only + # Get codebundles with tasks, ordered by git_updated_at, + # excluding rw-generic-codecollection and hidden CCs. + codebundles = public_only( + db.query(Codebundle).join( + CodeCollection, Codebundle.codecollection_id == CodeCollection.id + ).filter( + Codebundle.is_active == True, + Codebundle.git_updated_at.isnot(None), + Codebundle.tasks.isnot(None), + CodeCollection.slug != 'rw-generic-codecollection', + ) ).order_by( desc(Codebundle.git_updated_at) - ).limit(100).all() # Get more codebundles to extract tasks from + ).limit(100).all() result = [] for cb in codebundles: @@ -882,20 +919,32 @@ async def get_registry_stats(): db = SessionLocal() try: - # Count collections - collections_count = db.query(CodeCollection).filter(CodeCollection.is_active == True).count() - - # Count codebundles - codebundles_count = db.query(Codebundle).filter(Codebundle.is_active == True).count() - - # Count tasks and SLIs using the authoritative integer fields (task_count, sli_count) - # set by the canonical parser. This is both more efficient (SQL SUM vs loading all - # records) and more reliable than counting JSON array lengths, which could drift - # if a competing code path updates the arrays without updating the counts. - stats = db.query( - func.coalesce(func.sum(Codebundle.task_count), 0).label('total_tasks'), - func.coalesce(func.sum(Codebundle.sli_count), 0).label('total_slis') - ).filter(Codebundle.is_active == True).first() + from app.core.visibility import public_only + # Count public collections only — homepage stats shouldn't expose + # the existence of hidden CCs. + collections_count = public_only( + db.query(CodeCollection).filter(CodeCollection.is_active == True) + ).count() + + # Count codebundles belonging to public collections. + cb_query = ( + db.query(Codebundle) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ) + codebundles_count = public_only(cb_query).count() + + # Count tasks and SLIs using the authoritative integer fields + # (task_count, sli_count) set by the canonical parser. Scoped to + # public CCs so the homepage stays consistent. + stats = public_only( + db.query( + func.coalesce(func.sum(Codebundle.task_count), 0).label('total_tasks'), + func.coalesce(func.sum(Codebundle.sli_count), 0).label('total_slis') + ) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ).first() total_tasks = int(stats.total_tasks) total_slis = int(stats.total_slis) diff --git a/cc-registry-v2/backend/app/models/code_collection.py b/cc-registry-v2/backend/app/models/code_collection.py index b33d2690d428..ef6e2dd4420c 100644 --- a/cc-registry-v2/backend/app/models/code_collection.py +++ b/cc-registry-v2/backend/app/models/code_collection.py @@ -18,6 +18,12 @@ class CodeCollection(Base): git_ref = Column(String(50), default="main") last_synced = Column(DateTime) is_active = Column(Boolean, default=True) + # 'public' – shown on registry website, MCP, AI search, etc. + # 'hidden' – CC is still synced & its images tracked for PAPI consumption, + # but it is excluded from all public-facing registry endpoints. + # NOTE: 'hidden' is a UX/discovery toggle, NOT a security boundary. The + # OCI registry remains the source of truth for image access control. + visibility = Column(String(20), nullable=False, default="public", index=True) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) diff --git a/cc-registry-v2/backend/app/models/version.py b/cc-registry-v2/backend/app/models/version.py index edbafe2ecd40..dd771d613487 100644 --- a/cc-registry-v2/backend/app/models/version.py +++ b/cc-registry-v2/backend/app/models/version.py @@ -30,7 +30,23 @@ class CodeCollectionVersion(Base): # Sync metadata synced_at = Column(DateTime) # When this version was last synced is_active = Column(Boolean, default=True) # Whether this version is available - + + # ------------------------------------------------------------------ + # Image catalog metadata (populated by image_sync_tasks) + # ------------------------------------------------------------------ + # Where the built image lives, e.g. "ghcr.io/runwhen-contrib/rw-cli-codecollection" + image_registry = Column(String(500)) + # Concrete pullable tag, e.g. "main-c1a2b3d-e4f5a6b" (PAPI uses this verbatim). + image_tag = Column(String(200), index=True) + # Optional content-addressable digest for stronger pinning. + image_digest = Column(String(80)) + # Full commit sha this image was built from (codecollection repo). + commit_hash = Column(String(40)) + # platform-robot-runtime sha embedded at build time (encoded in tag suffix). + rt_revision = Column(String(40)) + # When the build pushed this image, parsed from OCI manifest where available. + image_built_at = Column(DateTime) + # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) diff --git a/cc-registry-v2/backend/app/routers/cc_catalog.py b/cc-registry-v2/backend/app/routers/cc_catalog.py new file mode 100644 index 000000000000..d292b1df6fd5 --- /dev/null +++ b/cc-registry-v2/backend/app/routers/cc_catalog.py @@ -0,0 +1,240 @@ +""" +PAPI-facing CodeCollection catalog API. + +These endpoints intentionally bypass the `visibility = 'public'` filter +that protects the registry website: PAPI needs to see hidden CCs so it +can still resolve their image refs for workspaces that use them. + +Surface area: + + GET /api/v1/catalog/codecollections + GET /api/v1/catalog/codecollections/{slug} + GET /api/v1/catalog/codecollections/{slug}/refs + GET /api/v1/catalog/codecollections/{slug}/refs/{ref} + GET /api/v1/catalog/codecollections/{slug}/resolve?pointer=latest|stable + GET /api/v1/catalog/codecollections/{slug}/resolve?ref= + +Everything is read-only. Writes happen only through the image-sync +Celery task — there is no public write API and no auth needed. +""" +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy.orm import Session + +from app.core.database import get_db +from app.models import CodeCollection +from app.models.version import CodeCollectionVersion +from app.schemas.cc_catalog import ( + CatalogEntry, + CatalogEntryDetail, + ImageRef, + ResolveResponse, +) + +router = APIRouter(prefix="/api/v1/catalog", tags=["catalog"]) + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- +def _to_image_ref(v: CodeCollectionVersion) -> ImageRef: + return ImageRef( + ref=v.version_name, + ref_type=v.version_type or "branch", + image_registry=v.image_registry, + image_tag=v.image_tag, + image_digest=v.image_digest, + commit_hash=v.commit_hash, + rt_revision=v.rt_revision, + image_built_at=v.image_built_at, + is_latest=bool(v.is_latest), + is_prerelease=bool(v.is_prerelease), + is_active=bool(v.is_active), + synced_at=v.synced_at, + ) + + +def _entry_pointers(versions: list[CodeCollectionVersion]) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Pull (latest_tag, stable_tag, image_registry) out of a CC's versions.""" + latest_tag: Optional[str] = None + stable_tag: Optional[str] = None + image_registry: Optional[str] = None + for v in versions: + if v.image_registry and not image_registry: + image_registry = v.image_registry + if v.is_latest and v.image_tag: + latest_tag = v.image_tag + # `stable` = the highest semver tag (mirrors OCISource.resolve_stable). + if ( + v.image_tag + and v.version_type == "tag" + and (stable_tag is None or v.version_name > stable_tag) + ): + stable_tag = v.image_tag + # Fall back to `latest` if no semver tag is present. + return latest_tag, (stable_tag or latest_tag), image_registry + + +# --------------------------------------------------------------------------- +# endpoints +# --------------------------------------------------------------------------- +@router.get("/codecollections", response_model=list[CatalogEntry]) +def list_catalog( + visibility: Optional[str] = Query( + None, + description="Filter by visibility ('public' | 'hidden'). Omit to see all.", + ), + only_with_image: bool = Query( + True, + description="If true (default), only return CCs that have at least one tracked image.", + ), + db: Session = Depends(get_db), +) -> list[CatalogEntry]: + """List every CodeCollection PAPI may need to resolve.""" + q = db.query(CodeCollection).filter(CodeCollection.is_active.is_(True)) + if visibility: + q = q.filter(CodeCollection.visibility == visibility) + collections = q.order_by(CodeCollection.slug).all() + + entries: list[CatalogEntry] = [] + for cc in collections: + versions = [v for v in cc.versions if v.is_active] + if only_with_image and not any(v.image_tag for v in versions): + continue + latest_tag, stable_tag, image_registry = _entry_pointers(versions) + entries.append( + CatalogEntry( + slug=cc.slug, + name=cc.name, + git_url=cc.git_url, + visibility=cc.visibility or "public", + latest_image_tag=latest_tag, + stable_image_tag=stable_tag, + image_registry=image_registry, + last_synced=cc.last_synced, + ) + ) + return entries + + +@router.get("/codecollections/{slug}", response_model=CatalogEntryDetail) +def get_catalog_entry(slug: str, db: Session = Depends(get_db)) -> CatalogEntryDetail: + cc = ( + db.query(CodeCollection) + .filter(CodeCollection.slug == slug, CodeCollection.is_active.is_(True)) + .first() + ) + if cc is None: + raise HTTPException(status_code=404, detail=f"unknown codecollection: {slug}") + versions = [v for v in cc.versions if v.is_active and v.image_tag] + latest_tag, stable_tag, image_registry = _entry_pointers(versions) + return CatalogEntryDetail( + slug=cc.slug, + name=cc.name, + git_url=cc.git_url, + visibility=cc.visibility or "public", + latest_image_tag=latest_tag, + stable_image_tag=stable_tag, + image_registry=image_registry, + last_synced=cc.last_synced, + refs=[_to_image_ref(v) for v in versions], + ) + + +@router.get("/codecollections/{slug}/refs", response_model=list[ImageRef]) +def list_refs( + slug: str, + include_inactive: bool = Query(False), + db: Session = Depends(get_db), +) -> list[ImageRef]: + cc = db.query(CodeCollection).filter(CodeCollection.slug == slug).first() + if cc is None: + raise HTTPException(status_code=404, detail=f"unknown codecollection: {slug}") + versions = list(cc.versions) + if not include_inactive: + versions = [v for v in versions if v.is_active] + versions = [v for v in versions if v.image_tag] + return [_to_image_ref(v) for v in versions] + + +@router.get("/codecollections/{slug}/refs/{ref}", response_model=ImageRef) +def get_ref(slug: str, ref: str, db: Session = Depends(get_db)) -> ImageRef: + row = ( + db.query(CodeCollectionVersion) + .join(CodeCollection, CodeCollectionVersion.codecollection_id == CodeCollection.id) + .filter(CodeCollection.slug == slug, CodeCollectionVersion.version_name == ref) + .first() + ) + if row is None or not row.image_tag: + raise HTTPException( + status_code=404, detail=f"no image for {slug}@{ref}" + ) + return _to_image_ref(row) + + +@router.get("/codecollections/{slug}/resolve", response_model=ResolveResponse) +def resolve_image( + slug: str, + pointer: Optional[str] = Query( + None, regex="^(latest|stable)$", + description="Resolve a named pointer ('latest' or 'stable').", + ), + ref: Optional[str] = Query( + None, description="Resolve a specific git ref name (branch/tag)." + ), + db: Session = Depends(get_db), +) -> ResolveResponse: + """ + Resolve a pointer or git ref to a concrete OCI image tag. Exactly one of + `pointer` or `ref` must be supplied. This is the endpoint PAPI calls + on the workspace reconcile path. + """ + if bool(pointer) == bool(ref): + raise HTTPException( + status_code=400, + detail="exactly one of 'pointer' or 'ref' must be provided", + ) + + cc = ( + db.query(CodeCollection) + .filter(CodeCollection.slug == slug, CodeCollection.is_active.is_(True)) + .first() + ) + if cc is None: + raise HTTPException(status_code=404, detail=f"unknown codecollection: {slug}") + + versions = [v for v in cc.versions if v.is_active and v.image_tag] + if not versions: + raise HTTPException(status_code=404, detail=f"no images tracked for {slug}") + + selected: Optional[CodeCollectionVersion] = None + if pointer == "latest": + latest_tag, _, _ = _entry_pointers(versions) + selected = next((v for v in versions if v.image_tag == latest_tag), None) + requested = "latest" + elif pointer == "stable": + _, stable_tag, _ = _entry_pointers(versions) + selected = next((v for v in versions if v.image_tag == stable_tag), None) + requested = "stable" + else: + selected = next((v for v in versions if v.version_name == ref), None) + requested = ref or "" + + if selected is None: + raise HTTPException( + status_code=404, + detail=f"could not resolve {requested!r} for {slug}", + ) + + return ResolveResponse( + slug=slug, + requested=requested, + image_tag=selected.image_tag, + image_registry=selected.image_registry, + image_digest=selected.image_digest, + commit_hash=selected.commit_hash, + rt_revision=selected.rt_revision, + ) diff --git a/cc-registry-v2/backend/app/routers/versions.py b/cc-registry-v2/backend/app/routers/versions.py index 741e218cb3cf..f84cf822e1e7 100644 --- a/cc-registry-v2/backend/app/routers/versions.py +++ b/cc-registry-v2/backend/app/routers/versions.py @@ -7,6 +7,7 @@ from sqlalchemy import desc from app.core.database import get_db +from app.core.visibility import public_only from app.models.code_collection import CodeCollection from app.models.version import CodeCollectionVersion, VersionCodebundle @@ -22,10 +23,13 @@ async def get_collections_with_versions( Get all CodeCollections with their versions (tags and branches). """ try: - query = db.query(CodeCollection).options( - joinedload(CodeCollection.versions) + # Public website endpoint -- hidden CCs are excluded. + query = public_only( + db.query(CodeCollection).options( + joinedload(CodeCollection.versions) + ) ).order_by(CodeCollection.name) - + if limit: query = query.offset(offset).limit(limit) @@ -85,8 +89,8 @@ async def get_collection_versions( """ Get all versions for a specific CodeCollection. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: @@ -133,8 +137,8 @@ async def get_version_by_name( """ Get a specific version by collection slug and version name. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: @@ -178,8 +182,8 @@ async def get_latest_version( """ Get the latest version for a CodeCollection. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: @@ -229,8 +233,8 @@ async def get_version_codebundles( """ Get all codebundles for a specific version. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: diff --git a/cc-registry-v2/backend/app/schemas/__init__.py b/cc-registry-v2/backend/app/schemas/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/cc-registry-v2/backend/app/schemas/cc_catalog.py b/cc-registry-v2/backend/app/schemas/cc_catalog.py new file mode 100644 index 000000000000..49935a7b8530 --- /dev/null +++ b/cc-registry-v2/backend/app/schemas/cc_catalog.py @@ -0,0 +1,74 @@ +""" +Pydantic response models for the PAPI-facing CodeCollection catalog API. + +These shapes are part of the contract PAPI (and any other consumer) +depends on. Keep field names stable; add new fields rather than renaming. +""" +from __future__ import annotations + +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Field + + +class ImageRef(BaseModel): + """One built image ref (1:1 with a CodeCollectionVersion row).""" + + ref: str = Field(..., description="Git ref this build represents (branch/tag).") + ref_type: str = Field(..., description="'branch' | 'tag' | 'release'.") + image_registry: Optional[str] = Field( + None, description="OCI repository, e.g. 'ghcr.io/runwhen-contrib/rw-cli-codecollection'." + ) + image_tag: str = Field(..., description="Concrete OCI tag, pullable verbatim.") + image_digest: Optional[str] = Field( + None, description="sha256 digest when available; pin to this for reproducibility." + ) + commit_hash: Optional[str] = Field( + None, description="Full codecollection commit sha this image was built from." + ) + rt_revision: Optional[str] = Field( + None, description="platform-robot-runtime sha at build time." + ) + image_built_at: Optional[datetime] = None + is_latest: bool = False + is_prerelease: bool = False + is_active: bool = True + synced_at: Optional[datetime] = None + + +class CatalogEntry(BaseModel): + """A single CodeCollection plus its currently-resolved pointers.""" + + slug: str + name: str + git_url: str + visibility: str = Field( + "public", + description=( + "'public' or 'hidden'. PAPI returns both; public-audience surfaces " + "(website/MCP/AI) filter to public only." + ), + ) + latest_image_tag: Optional[str] = None + stable_image_tag: Optional[str] = None + image_registry: Optional[str] = None + last_synced: Optional[datetime] = None + + +class CatalogEntryDetail(CatalogEntry): + """Catalog entry with the full set of known refs attached.""" + + refs: list[ImageRef] = Field(default_factory=list) + + +class ResolveResponse(BaseModel): + """`/resolve` endpoint: ref-or-pointer -> concrete image.""" + + slug: str + requested: str = Field(..., description="The pointer or ref the caller asked for.") + image_tag: str + image_registry: Optional[str] = None + image_digest: Optional[str] = None + commit_hash: Optional[str] = None + rt_revision: Optional[str] = None diff --git a/cc-registry-v2/backend/app/sources/__init__.py b/cc-registry-v2/backend/app/sources/__init__.py new file mode 100644 index 000000000000..3d2a6140ec9b --- /dev/null +++ b/cc-registry-v2/backend/app/sources/__init__.py @@ -0,0 +1,17 @@ +""" +Image source plugin system. + +Each `ImageSource` implementation knows how to discover the set of built +image refs for a given CodeCollection and pick the `latest` / `stable` +pointers. Built-in sources: + + - oci – polls an OCI Distribution v2 registry (GHCR, GAR, Quay, ECR, ...) + - static – reads a hand-curated JSON file (useful for vendored / signed-off images) + +Add a new source by writing a class that satisfies `ImageSource` and +registering it in `SOURCE_REGISTRY` (see `registry.py`). +""" +from .base import ImageSource, DiscoveredImageRef +from .registry import SOURCE_REGISTRY, get_source + +__all__ = ["ImageSource", "DiscoveredImageRef", "SOURCE_REGISTRY", "get_source"] diff --git a/cc-registry-v2/backend/app/sources/base.py b/cc-registry-v2/backend/app/sources/base.py new file mode 100644 index 000000000000..3396d933b39f --- /dev/null +++ b/cc-registry-v2/backend/app/sources/base.py @@ -0,0 +1,80 @@ +""" +ImageSource abstract base + DiscoveredImageRef value object. + +The image-sync task drives sources in three phases: + + refs = source.discover_refs(cc) + latest = source.resolve_latest(cc, refs) + stable = source.resolve_stable(cc, refs) + +`discover_refs` is the only mandatory remote call; the resolvers should be +pure functions over the discovered list so they're easy to unit test. +""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + + +@dataclass(frozen=True) +class DiscoveredImageRef: + """ + One concrete image build that a source has found for a CodeCollection. + + A "ref" maps 1:1 to an OCI image tag we will track in the catalog. For + example a CC built from `main` at commit `c1a2b3d…` against a runtime + at `e4f5a6b…` produces a single `DiscoveredImageRef`: + + ref = "main" + ref_type = "branch" + commit = "c1a2b3d…" + rt_revision = "e4f5a6b…" + image_tag = "main-c1a2b3d-e4f5a6b" + """ + ref: str # git ref this build represents (branch/tag name) + ref_type: str # "branch" | "tag" | "release" + commit: str # full sha of the codecollection commit + rt_revision: str # full sha of the runtime used at build time + image_tag: str # concrete OCI tag (pullable) + image_digest: Optional[str] = None # sha256:... when available from manifest + built_at: Optional[datetime] = None + extra: dict = field(default_factory=dict) # source-specific overflow (free form) + + +class ImageSource(ABC): + """ + Abstract source of CodeCollection image metadata. + + Subclasses must be safe to call on a schedule from a Celery worker: + no global mutable state, network errors should raise rather than + swallow so the sync task can record them. + """ + + name: str # unique key registered in SOURCE_REGISTRY (e.g. "oci") + + @abstractmethod + def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: + """ + Return every image build currently known for this CodeCollection. + + `cc` is the raw mapping from `codecollections.yaml` (slug, git_url, + image_registry, image_source, etc.) so a source can read any + source-specific fields it needs without a separate config layer. + """ + ... + + @abstractmethod + def resolve_latest( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + """Return the image_tag that should be considered `latest`, or None.""" + ... + + @abstractmethod + def resolve_stable( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + """Return the image_tag that should be considered `stable`, or None.""" + ... diff --git a/cc-registry-v2/backend/app/sources/oci.py b/cc-registry-v2/backend/app/sources/oci.py new file mode 100644 index 000000000000..06f5818fdfb5 --- /dev/null +++ b/cc-registry-v2/backend/app/sources/oci.py @@ -0,0 +1,220 @@ +""" +OCI Distribution v2 image source. + +Lists every tag in a public OCI repository and shapes each into a +`DiscoveredImageRef`. The expected tag schema matches the one emitted by +the codecollection build workflows in this design: + + -- + +For example: + + main-c1a2b3d-e4f5a6b + pr-42-9988aabb-e4f5a6b + v1.2.0-aabbccd-e4f5a6b + +`latest` resolution: among tags whose ref-portion is `main`, pick the +newest (by manifest `created` if available, otherwise the lexicographically +last — tags are time-monotonic given the sha suffix). + +`stable` resolution: prefer the highest semver-looking ref (`v\\d+...`) if +one exists; otherwise fall back to `latest`. + +NOTE: this source intentionally treats the registry as the source of +truth. It never mutates the registry; the cc-registry-v2 catalog is a +read-only mirror that powers PAPI lookups. +""" +from __future__ import annotations + +import logging +import re +from datetime import datetime, timezone +from typing import Optional + +import requests + +from .base import DiscoveredImageRef, ImageSource + +logger = logging.getLogger(__name__) + + +# Tag schema: --. The ref portion may itself contain +# hyphens (e.g. "pr-42"), so we anchor on the two trailing 7-char sha groups. +TAG_PATTERN = re.compile( + r"^(?P.+?)-(?P[0-9a-f]{7,40})-(?P[0-9a-f]{7,40})$" +) + +SEMVER_TAG = re.compile(r"^v?\d+\.\d+(\.\d+)?") + + +class OCISource(ImageSource): + name = "oci" + + def __init__(self, timeout: float = 10.0, max_pages: int = 50): + # Defensive caps: a single CC shouldn't paginate forever, and + # individual HTTP calls shouldn't hang a Celery worker. + self.timeout = timeout + self.max_pages = max_pages + + # ------------------------------------------------------------------ + # public API + # ------------------------------------------------------------------ + def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: + registry_url = cc.get("image_registry") + if not registry_url: + logger.warning( + "oci source skipping %s: no image_registry configured", + cc.get("slug"), + ) + return [] + + host, repo = self._split_registry_url(registry_url) + tags = self._list_tags(host, repo) + + discovered: list[DiscoveredImageRef] = [] + for tag in tags: + ref = self._parse_tag(tag) + if ref is None: + continue + discovered.append(ref) + logger.info( + "oci source: %s -> %d tags, %d matched build schema", + cc.get("slug"), + len(tags), + len(discovered), + ) + return discovered + + def resolve_latest( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + # Prefer the newest build of the configured default branch. + default_ref = cc.get("default_ref", "main") + candidates = [r for r in refs if r.ref == default_ref] + if not candidates: + return None + candidates.sort( + key=lambda r: (r.built_at or datetime.min.replace(tzinfo=timezone.utc), r.image_tag) + ) + return candidates[-1].image_tag + + def resolve_stable( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + # Highest semver-looking ref wins; fall back to latest. + semver_refs = [r for r in refs if SEMVER_TAG.match(r.ref)] + if semver_refs: + semver_refs.sort(key=lambda r: self._semver_key(r.ref)) + return semver_refs[-1].image_tag + return self.resolve_latest(cc, refs) + + # ------------------------------------------------------------------ + # helpers + # ------------------------------------------------------------------ + @staticmethod + def _split_registry_url(url: str) -> tuple[str, str]: + """ + "ghcr.io/runwhen-contrib/rw-cli-codecollection" + -> ("ghcr.io", "runwhen-contrib/rw-cli-codecollection") + """ + url = url.strip().rstrip("/") + if "/" not in url: + raise ValueError(f"image_registry must include a repo path: {url}") + host, _, repo = url.partition("/") + return host, repo + + def _list_tags(self, host: str, repo: str) -> list[str]: + """Walk the v2 tags endpoint with Link-header pagination.""" + url = f"https://{host}/v2/{repo}/tags/list" + params = {"n": 200} + all_tags: list[str] = [] + for _ in range(self.max_pages): + resp = self._get_with_token(host, repo, url, params) + resp.raise_for_status() + payload = resp.json() + all_tags.extend(payload.get("tags") or []) + link = resp.headers.get("Link") or "" + next_url = self._parse_next_link(link, host) + if not next_url: + break + url, params = next_url, {} + return all_tags + + def _get_with_token(self, host: str, repo: str, url: str, params: dict): + """ + Some public registries (GHCR, Docker Hub) require an anonymous + bearer token even for public reads. Handle the 401 -> token -> + retry dance once. + """ + resp = requests.get(url, params=params, timeout=self.timeout) + if resp.status_code != 401: + return resp + + www_auth = resp.headers.get("WWW-Authenticate", "") + m = re.search(r'Bearer realm="([^"]+)"', www_auth) + realm = m.group(1) if m else None + if not realm: + return resp # nothing we can do, let caller raise + service_match = re.search(r'service="([^"]+)"', www_auth) + token_params = { + "scope": f"repository:{repo}:pull", + } + if service_match: + token_params["service"] = service_match.group(1) + token_resp = requests.get(realm, params=token_params, timeout=self.timeout) + token_resp.raise_for_status() + token = token_resp.json().get("token") or token_resp.json().get("access_token") + if not token: + return resp + return requests.get( + url, + params=params, + timeout=self.timeout, + headers={"Authorization": f"Bearer {token}"}, + ) + + @staticmethod + def _parse_next_link(link_header: str, host: str) -> Optional[str]: + # Link: ; rel="next" + m = re.search(r'<([^>]+)>;\s*rel="next"', link_header or "") + if not m: + return None + path = m.group(1) + if path.startswith("http"): + return path + return f"https://{host}{path}" + + @staticmethod + def _parse_tag(tag: str) -> Optional[DiscoveredImageRef]: + m = TAG_PATTERN.match(tag) + if not m: + return None + ref = m.group("ref") + return DiscoveredImageRef( + ref=ref, + ref_type=_classify_ref(ref), + commit=m.group("cc_sha"), + rt_revision=m.group("rt_sha"), + image_tag=tag, + ) + + @staticmethod + def _semver_key(ref: str) -> tuple: + # Cheap semver sort key; non-numeric suffixes sort last. + ref = ref.lstrip("v") + parts = re.split(r"[.\-+]", ref) + key: list = [] + for p in parts: + if p.isdigit(): + key.append((0, int(p))) + else: + key.append((1, p)) + return tuple(key) + + +def _classify_ref(ref: str) -> str: + if ref.startswith("pr-"): + return "branch" + if SEMVER_TAG.match(ref): + return "tag" + return "branch" diff --git a/cc-registry-v2/backend/app/sources/registry.py b/cc-registry-v2/backend/app/sources/registry.py new file mode 100644 index 000000000000..795c702766e9 --- /dev/null +++ b/cc-registry-v2/backend/app/sources/registry.py @@ -0,0 +1,62 @@ +""" +Source registry / loader. + +Built-in sources live in this package and are loaded eagerly. + +Third-party / customer-specific sources can be registered via the +`CC_REGISTRY_EXTRA_SOURCES` environment variable, a colon-separated list of +import paths to modules that expose a top-level `SOURCE` instance. This +lets self-hosted operators plug in custom discovery logic (e.g. an internal +Harbor with non-standard tag schemas) without forking the catalog. + +Example: + + CC_REGISTRY_EXTRA_SOURCES=mycorp.harbor:mycorp.gerrit +""" +from __future__ import annotations + +import importlib +import logging +import os +from typing import Dict + +from .base import ImageSource +from .oci import OCISource +from .static import StaticSource + +logger = logging.getLogger(__name__) + +SOURCE_REGISTRY: Dict[str, ImageSource] = { + OCISource.name: OCISource(), + StaticSource.name: StaticSource(), +} + + +def _load_extra_sources() -> None: + paths = os.environ.get("CC_REGISTRY_EXTRA_SOURCES", "").strip() + if not paths: + return + for module_path in paths.split(":"): + module_path = module_path.strip() + if not module_path: + continue + try: + mod = importlib.import_module(module_path) + source = getattr(mod, "SOURCE", None) + if not isinstance(source, ImageSource): + logger.warning( + "extra source %s did not expose a SOURCE: ImageSource", + module_path, + ) + continue + SOURCE_REGISTRY[source.name] = source + logger.info("registered extra image source: %s", source.name) + except Exception: # pragma: no cover - defensive logging + logger.exception("failed to load extra image source %s", module_path) + + +_load_extra_sources() + + +def get_source(name: str) -> ImageSource | None: + return SOURCE_REGISTRY.get(name) diff --git a/cc-registry-v2/backend/app/sources/static.py b/cc-registry-v2/backend/app/sources/static.py new file mode 100644 index 000000000000..e65b65bee620 --- /dev/null +++ b/cc-registry-v2/backend/app/sources/static.py @@ -0,0 +1,117 @@ +""" +Static JSON image source. + +Useful for: + + - Customer self-hosted catalogs where image discovery happens in the + customer's own pipeline and is dropped into a checked-in JSON file. + - Tests / fixtures. + - Pinning a CC to a known-good set of refs without polling. + +Expected file shape: + + { + "default_ref": "main", + "stable_ref": "v1.2.0", + "refs": [ + { + "ref": "main", + "ref_type": "branch", + "commit": "c1a2b3d…", + "rt_revision": "e4f5a6b…", + "image_tag": "main-c1a2b3d-e4f5a6b", + "image_digest": "sha256:…", // optional + "built_at": "2026-05-11T20:00:00Z" // optional + }, + ... + ] + } + +The CC entry in `codecollections.yaml` points at the file via `static_path`. +""" +from __future__ import annotations + +import json +import logging +import os +from datetime import datetime +from typing import Optional + +from .base import DiscoveredImageRef, ImageSource + +logger = logging.getLogger(__name__) + + +class StaticSource(ImageSource): + name = "static" + + def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: + path = cc.get("static_path") + if not path or not os.path.exists(path): + logger.warning( + "static source skipping %s: static_path missing (%r)", + cc.get("slug"), + path, + ) + return [] + with open(path, "r") as f: + payload = json.load(f) + + refs: list[DiscoveredImageRef] = [] + for entry in payload.get("refs", []): + built_at = entry.get("built_at") + built_at_dt: Optional[datetime] = None + if built_at: + try: + built_at_dt = datetime.fromisoformat(built_at.replace("Z", "+00:00")) + except ValueError: + built_at_dt = None + refs.append( + DiscoveredImageRef( + ref=entry["ref"], + ref_type=entry.get("ref_type", "branch"), + commit=entry["commit"], + rt_revision=entry["rt_revision"], + image_tag=entry["image_tag"], + image_digest=entry.get("image_digest"), + built_at=built_at_dt, + ) + ) + # Stash the explicit pointers for the resolvers. + return [ + DiscoveredImageRef( + ref=r.ref, + ref_type=r.ref_type, + commit=r.commit, + rt_revision=r.rt_revision, + image_tag=r.image_tag, + image_digest=r.image_digest, + built_at=r.built_at, + extra={ + "default_ref": payload.get("default_ref", "main"), + "stable_ref": payload.get("stable_ref"), + }, + ) + for r in refs + ] + + def resolve_latest( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + if not refs: + return None + default_ref = refs[0].extra.get("default_ref", "main") + matches = [r for r in refs if r.ref == default_ref] + return matches[-1].image_tag if matches else None + + def resolve_stable( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + if not refs: + return None + stable_ref = refs[0].extra.get("stable_ref") + if stable_ref: + for r in refs: + if r.ref == stable_ref: + return r.image_tag + return self.resolve_latest(cc, refs) diff --git a/cc-registry-v2/backend/app/tasks/celery_app.py b/cc-registry-v2/backend/app/tasks/celery_app.py index 69d371b74b32..9bfa001cfeb0 100644 --- a/cc-registry-v2/backend/app/tasks/celery_app.py +++ b/cc-registry-v2/backend/app/tasks/celery_app.py @@ -71,6 +71,7 @@ def _configure_broker_url(): "app.tasks.workflow_tasks", "app.tasks.analytics_tasks", "app.tasks.indexing_tasks", + "app.tasks.image_sync_tasks", ] ) diff --git a/cc-registry-v2/backend/app/tasks/image_sync_tasks.py b/cc-registry-v2/backend/app/tasks/image_sync_tasks.py new file mode 100644 index 000000000000..facec3a031c4 --- /dev/null +++ b/cc-registry-v2/backend/app/tasks/image_sync_tasks.py @@ -0,0 +1,203 @@ +""" +Image-tag sync tasks. + +Periodically reads `codecollections.yaml`, asks each CC's configured +`ImageSource` for every known build, and upserts `CodeCollectionVersion` +rows so PAPI (and any other consumer) can resolve refs to concrete image +tags without ever talking to a git server or running a CRD reconciler. + +Design notes: + + - This task is the single writer for image metadata in the catalog. It + is intentionally idempotent: re-running it converges the DB onto + whatever the OCI registry reports, including marking gone-from-registry + versions inactive. + - It does NOT push to any registry. The registry remains the source of + truth for whether an image exists. + - It runs on a regular celery-beat schedule (see schedules.yaml) and is + also exposed manually via the admin/task UI for on-demand refreshes. +""" +from __future__ import annotations + +import logging +import os +from datetime import datetime +from typing import Optional + +import yaml + +from app.core.database import SessionLocal +from app.models import CodeCollection +from app.models.version import CodeCollectionVersion +from app.sources import DiscoveredImageRef, get_source +from app.tasks.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +def _load_codecollections_yaml() -> list[dict]: + """Locate codecollections.yaml in the same order other tasks do.""" + candidate_paths = [ + "/app/codecollections.yaml", + os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "..", + "codecollections.yaml", + ), + "/workspaces/codecollection-registry/codecollections.yaml", + ] + for path in candidate_paths: + if os.path.exists(path): + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + return data.get("codecollections", []) or [] + logger.error("codecollections.yaml not found in any known location") + return [] + + +@celery_app.task(bind=True, name="app.tasks.image_sync_tasks.sync_image_tags_task") +def sync_image_tags_task(self): + """ + For every CC with an `image_source` configured, discover its image + refs and upsert one CodeCollectionVersion row per ref. + """ + logger.info("Starting sync_image_tags_task %s", self.request.id) + + collections = _load_codecollections_yaml() + summary = { + "collections_processed": 0, + "refs_upserted": 0, + "refs_deactivated": 0, + "errors": [], + } + + db = SessionLocal() + try: + for cc_yaml in collections: + source_name = cc_yaml.get("image_source") + if not source_name: + continue # CC opted out of image tracking + + slug = cc_yaml.get("slug") + if not slug: + logger.warning("Skipping CC without slug: %s", cc_yaml) + continue + + cc_row = ( + db.query(CodeCollection) + .filter(CodeCollection.slug == slug) + .first() + ) + if not cc_row: + # Image sync runs after collection sync, so a missing row + # almost always means the YAML edit hasn't reached the DB + # yet — bail rather than create a half-formed row. + logger.warning( + "Skipping image sync for %s: collection not yet in DB", slug + ) + continue + + source = get_source(source_name) + if source is None: + summary["errors"].append( + {"slug": slug, "error": f"unknown image_source {source_name!r}"} + ) + continue + + try: + refs = source.discover_refs(cc_yaml) + latest_tag = source.resolve_latest(cc_yaml, refs) + stable_tag = source.resolve_stable(cc_yaml, refs) + except Exception as exc: # pragma: no cover - logged for ops + logger.exception("source %s failed for %s", source_name, slug) + summary["errors"].append({"slug": slug, "error": str(exc)}) + continue + + upserted, deactivated = _upsert_versions( + db, + cc_row, + cc_yaml.get("image_registry"), + refs, + latest_tag, + stable_tag, + ) + db.commit() + summary["collections_processed"] += 1 + summary["refs_upserted"] += upserted + summary["refs_deactivated"] += deactivated + + logger.info("sync_image_tags_task finished: %s", summary) + return {"status": "success", **summary} + finally: + db.close() + + +def _upsert_versions( + db, + cc_row: CodeCollection, + image_registry: Optional[str], + refs: list[DiscoveredImageRef], + latest_tag: Optional[str], + stable_tag: Optional[str], +) -> tuple[int, int]: + """ + Mirror the discovered refs onto codecollection_versions. + + Strategy: + - Each (cc, ref) maps to a CodeCollectionVersion keyed by version_name=ref. + - Versions that exist in the DB but no longer appear in the source are + marked is_active=False (we keep the row for history rather than + deleting it — PAPI may still reference a now-gone image). + - `is_latest` is set ONLY on the latest-tag row; `is_prerelease` is + flipped off the stable row. + """ + upserted = 0 + deactivated = 0 + now = datetime.utcnow() + + refs_by_name = {r.ref: r for r in refs} + + existing_versions = ( + db.query(CodeCollectionVersion) + .filter(CodeCollectionVersion.codecollection_id == cc_row.id) + .all() + ) + existing_by_name = {v.version_name: v for v in existing_versions} + + # Deactivate rows that no longer appear in the source. + for name, row in existing_by_name.items(): + if name not in refs_by_name and row.is_active: + row.is_active = False + row.updated_at = now + deactivated += 1 + + # Upsert each discovered ref. + for name, ref in refs_by_name.items(): + is_latest_row = (latest_tag is not None and ref.image_tag == latest_tag) + is_stable_row = (stable_tag is not None and ref.image_tag == stable_tag) + row = existing_by_name.get(name) + if row is None: + row = CodeCollectionVersion( + codecollection_id=cc_row.id, + version_name=name, + git_ref=name, + display_name=name, + version_type=ref.ref_type, + ) + db.add(row) + + row.image_registry = image_registry + row.image_tag = ref.image_tag + row.image_digest = ref.image_digest + row.commit_hash = ref.commit + row.rt_revision = ref.rt_revision + row.image_built_at = ref.built_at + row.is_active = True + row.is_latest = is_latest_row + # Treat anything that isn't the stable pointer (and isn't semver) as a prerelease. + row.is_prerelease = not (is_stable_row or ref.ref_type == "tag") + row.synced_at = now + row.updated_at = now + upserted += 1 + + return upserted, deactivated diff --git a/cc-registry-v2/backend/app/tasks/registry_tasks.py b/cc-registry-v2/backend/app/tasks/registry_tasks.py index 9e4707d1a9c8..bb4e774232fe 100644 --- a/cc-registry-v2/backend/app/tasks/registry_tasks.py +++ b/cc-registry-v2/backend/app/tasks/registry_tasks.py @@ -82,6 +82,16 @@ def sync_all_collections_task(self): CodeCollection.slug == collection_slug ).first() + # Visibility defaults to 'public' if omitted; only ever + # take on the values declared in YAML so a CC can be + # toggled hidden/public by re-deploying config alone. + visibility = collection_data.get('visibility', 'public') + if visibility not in ('public', 'hidden'): + logger.warning( + f"Unknown visibility {visibility!r} for {collection_slug}, defaulting to 'public'" + ) + visibility = 'public' + if not collection: collection = CodeCollection( name=collection_data.get('name', collection_slug), @@ -92,16 +102,18 @@ def sync_all_collections_task(self): owner_email=collection_data.get('owner_email', ''), owner_icon=collection_data.get('owner_icon', ''), git_ref=collection_data.get('git_ref', 'main'), + visibility=visibility, is_active=True ) db.add(collection) - logger.info(f"Created collection: {collection_slug}") + logger.info(f"Created collection: {collection_slug} (visibility={visibility})") else: collection.name = collection_data.get('name', collection_slug) collection.git_url = git_url collection.description = collection_data.get('description', '') + collection.visibility = visibility collection.is_active = True - logger.info(f"Updated collection: {collection_slug}") + logger.info(f"Updated collection: {collection_slug} (visibility={visibility})") db.commit() collections_synced += 1 diff --git a/cc-registry-v2/schedules.yaml b/cc-registry-v2/schedules.yaml index cfbe388c2bf5..74ba11464123 100644 --- a/cc-registry-v2/schedules.yaml +++ b/cc-registry-v2/schedules.yaml @@ -73,6 +73,23 @@ schedules: minute: 0 enabled: false + # ============================================================================= + # IMAGE CATALOG SYNC (PAPI-facing CodeCollection image tracking) + # ============================================================================= + # Polls each CodeCollection's configured `image_source` (e.g. its OCI + # registry) and mirrors the discovered image tags into + # codecollection_versions. PAPI reads from this catalog instead of + # running the corestate-operator. The task is idempotent and fast (one + # HTTP listing per CC), so we run it frequently. + + - name: sync-image-tags + task: app.tasks.image_sync_tasks.sync_image_tags_task + description: "Poll OCI registries for each CC and refresh the image catalog" + schedule_type: interval + interval: + minutes: 5 + enabled: true + # ============================================================================= # METRICS & STATISTICS # ============================================================================= diff --git a/codecollections.yaml b/codecollections.yaml index e0d4e37e4de6..8bb304653f4f 100644 --- a/codecollections.yaml +++ b/codecollections.yaml @@ -1,3 +1,17 @@ +# ---------------------------------------------------------------------- +# Registry-tracked CodeCollections. +# +# Optional fields: +# visibility - 'public' (default) or 'hidden'. Hidden CCs are still +# synced and their image tags tracked for PAPI, but they +# are excluded from public-facing endpoints (website, +# MCP, AI search). 'hidden' is a UX toggle; OCI registry +# ACLs remain the source of truth for image access. +# image_source - which ImageSource plugin to drive ('oci' | 'static' +# | custom). Omit to skip image tracking entirely. +# image_registry - OCI repo path used by the 'oci' source. +# default_ref - branch the 'latest' pointer should follow (default 'main'). +# ---------------------------------------------------------------------- codecollections: - name: RunWhen Public CodeCollection slug: rw-public-codecollection @@ -7,6 +21,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: Python based CodeCollections that do not leverage a command line binary or bash script + image_source: oci + image_registry: ghcr.io/runwhen-contrib/rw-public-codecollection - name: RunWhen CLI CodeCollection slug: rw-cli-codecollection git_url: https://github.com/runwhen-contrib/rw-cli-codecollection @@ -14,6 +30,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: CodeCollections based on command line binaries and bash scripts + image_source: oci + image_registry: ghcr.io/runwhen-contrib/rw-cli-codecollection - name: RunWhen Generic CodeCollection slug: rw-generic-codecollection git_url: https://github.com/runwhen-contrib/rw-generic-codecollection From 3f02df757a5c65f7adba495c55449432bdd1d477 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 08:58:55 -0400 Subject: [PATCH 02/13] Register CCV image sources for 6 codecollections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the OCI image source into codecollections.yaml for every repo that now has a per-repo GitHub Actions build producing catalog-shaped tags (--): - rw-public-codecollection (already registered) - rw-cli-codecollection (already registered) - rw-generic-codecollection (new) - rw-workspace-utils (new) - aws-c7n-codecollection (new) - azure-c7n-codecollection (new) ternary-codecollection is deliberately left without an image_source — no Dockerfile + build-push workflow yet. Documented inline. Once PR #114 is merged, sync_image_tags_task will discover each of these on its 5-minute beat schedule and upsert one CodeCollectionVersion row per discovered ref. Co-authored-by: Cursor --- codecollections.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/codecollections.yaml b/codecollections.yaml index 8bb304653f4f..61b98e43021a 100644 --- a/codecollections.yaml +++ b/codecollections.yaml @@ -39,6 +39,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: Run Generic CLI Commands with User Input + image_source: oci + image_registry: ghcr.io/runwhen-contrib/rw-generic-codecollection - name: RunWhen Workspace Utilities CodeCollection slug: rw-workspace-utils git_url: https://github.com/runwhen-contrib/rw-workspace-utils @@ -46,6 +48,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: Workspace Utilies such as Webhook integrations + image_source: oci + image_registry: ghcr.io/runwhen-contrib/rw-workspace-utils - name: AWS CloudCustodian CodeCollection slug: aws-c7n-codecollection git_url: https://github.com/runwhen-contrib/aws-c7n-codecollection @@ -53,6 +57,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: AWS CloudCustodian CodeBundles + image_source: oci + image_registry: ghcr.io/runwhen-contrib/aws-c7n-codecollection - name: Azure CloudCustodian CodeCollection slug: azure-c7n-codecollection git_url: https://github.com/runwhen-contrib/azure-c7n-codecollection @@ -60,10 +66,17 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: Azure CloudCustodian CodeBundles + image_source: oci + image_registry: ghcr.io/runwhen-contrib/azure-c7n-codecollection - name: Ternary CodeCollection slug: ternary-codecollection git_url: https://github.com/runwhen-contrib/ternary-codecollection owner: RunWhen owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com - description: Ternary CodeBundles \ No newline at end of file + description: Ternary CodeBundles + # No image_source set: ternary-codecollection has not yet been + # migrated to the per-repo GitHub Actions build (no Dockerfile + workflow + # added). The image catalog will simply skip it on each poll. Add + # `image_source: oci` + `image_registry: ghcr.io/...` once the repo + # has a build-push.yaml workflow producing catalog-shaped tags. \ No newline at end of file From ebfdba0f47e4c5042c26671bd26f568d82613594 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 09:01:22 -0400 Subject: [PATCH 03/13] Remove stale duplicate codecollections.yaml copies These two byte-identical orphans were left behind by 4e1e0fc (Registry v2 Initial Release). Verified no code path references them: - All Celery tasks (registry_tasks.py, image_sync_tasks.py, sync_tasks.py) load from "/app/codecollections.yaml" (mounted from the repo-root file via docker-compose: ../../codecollections.yaml:/app/codecollections.yaml:ro). - mcp-server/indexer.py resolves via self.base_dir.parent.parent / "codecollections.yaml", which also lands on the repo-root file. - mcp-server/docker-compose.yml mounts the repo-root file at /app. - Frontend/Admin/openapi.yaml all reference "/app/codecollections.yaml". Removing now to prevent future accidental edits to a non-live config. Co-authored-by: Cursor --- cc-registry-v2/backend/codecollections.yaml | 51 --------------------- mcp-server/codecollections.yaml | 51 --------------------- 2 files changed, 102 deletions(-) delete mode 100644 cc-registry-v2/backend/codecollections.yaml delete mode 100644 mcp-server/codecollections.yaml diff --git a/cc-registry-v2/backend/codecollections.yaml b/cc-registry-v2/backend/codecollections.yaml deleted file mode 100644 index e0d4e37e4de6..000000000000 --- a/cc-registry-v2/backend/codecollections.yaml +++ /dev/null @@ -1,51 +0,0 @@ -codecollections: - - name: RunWhen Public CodeCollection - slug: rw-public-codecollection - git_url: https://github.com/runwhen-contrib/rw-public-codecollection - git_ref: main - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Python based CodeCollections that do not leverage a command line binary or bash script - - name: RunWhen CLI CodeCollection - slug: rw-cli-codecollection - git_url: https://github.com/runwhen-contrib/rw-cli-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: CodeCollections based on command line binaries and bash scripts - - name: RunWhen Generic CodeCollection - slug: rw-generic-codecollection - git_url: https://github.com/runwhen-contrib/rw-generic-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Run Generic CLI Commands with User Input - - name: RunWhen Workspace Utilities CodeCollection - slug: rw-workspace-utils - git_url: https://github.com/runwhen-contrib/rw-workspace-utils - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Workspace Utilies such as Webhook integrations - - name: AWS CloudCustodian CodeCollection - slug: aws-c7n-codecollection - git_url: https://github.com/runwhen-contrib/aws-c7n-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: AWS CloudCustodian CodeBundles - - name: Azure CloudCustodian CodeCollection - slug: azure-c7n-codecollection - git_url: https://github.com/runwhen-contrib/azure-c7n-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Azure CloudCustodian CodeBundles - - name: Ternary CodeCollection - slug: ternary-codecollection - git_url: https://github.com/runwhen-contrib/ternary-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Ternary CodeBundles \ No newline at end of file diff --git a/mcp-server/codecollections.yaml b/mcp-server/codecollections.yaml deleted file mode 100644 index e0d4e37e4de6..000000000000 --- a/mcp-server/codecollections.yaml +++ /dev/null @@ -1,51 +0,0 @@ -codecollections: - - name: RunWhen Public CodeCollection - slug: rw-public-codecollection - git_url: https://github.com/runwhen-contrib/rw-public-codecollection - git_ref: main - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Python based CodeCollections that do not leverage a command line binary or bash script - - name: RunWhen CLI CodeCollection - slug: rw-cli-codecollection - git_url: https://github.com/runwhen-contrib/rw-cli-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: CodeCollections based on command line binaries and bash scripts - - name: RunWhen Generic CodeCollection - slug: rw-generic-codecollection - git_url: https://github.com/runwhen-contrib/rw-generic-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Run Generic CLI Commands with User Input - - name: RunWhen Workspace Utilities CodeCollection - slug: rw-workspace-utils - git_url: https://github.com/runwhen-contrib/rw-workspace-utils - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Workspace Utilies such as Webhook integrations - - name: AWS CloudCustodian CodeCollection - slug: aws-c7n-codecollection - git_url: https://github.com/runwhen-contrib/aws-c7n-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: AWS CloudCustodian CodeBundles - - name: Azure CloudCustodian CodeCollection - slug: azure-c7n-codecollection - git_url: https://github.com/runwhen-contrib/azure-c7n-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Azure CloudCustodian CodeBundles - - name: Ternary CodeCollection - slug: ternary-codecollection - git_url: https://github.com/runwhen-contrib/ternary-codecollection - owner: RunWhen - owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg - owner_email: shea.stewart@runwhen.com - description: Ternary CodeBundles \ No newline at end of file From e132d97d21d79cd9a65e17faf43a317b2d8ac42e Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 09:04:00 -0400 Subject: [PATCH 04/13] Add dry-run script for OCI image source validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `scripts/dry_run_oci_sources.py` exercises the real `app.sources` plugins against every CC in `codecollections.yaml` without touching the database, Celery, or FastAPI. For each CC with `image_source` configured it: 1. Loads the source via `get_source(name)` 2. Calls `discover_refs(cc)` against the live registry 3. Calls `resolve_latest(cc, refs)` / `resolve_stable(cc, refs)` …and prints a per-CC summary plus (with `-v`) every parsed `DiscoveredImageRef`. Use cases: - Pre-flight check before flipping a new CC's `image_source` to "oci" - Catching tag-schema regressions (build workflows changing the suffix format would show up as zero parsed refs) - Surfacing transient registry flakiness vs. real misconfiguration Exit codes are intentionally distinct so this can drop into CI later: 0 = healthy 1 = source raised (network / auth / parser error) 2 = source returned zero refs (likely a tag-schema mismatch) First run today flagged three useful real-world findings: - rw-cli, aws-c7n, azure-c7n have only feature-branch / pr-* tags in GHCR — no `main--` build yet, so `resolve_latest` correctly returns `(none)`. These will resolve once the open ccv/* PRs merge. - rw-public, rw-generic, rw-workspace-utils all parse and resolve to a clean `main--` ref pair, exactly as designed. No new runtime deps (uses requests + pyyaml that the backend already pins). Co-authored-by: Cursor --- .../backend/scripts/dry_run_oci_sources.py | 274 ++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 cc-registry-v2/backend/scripts/dry_run_oci_sources.py diff --git a/cc-registry-v2/backend/scripts/dry_run_oci_sources.py b/cc-registry-v2/backend/scripts/dry_run_oci_sources.py new file mode 100644 index 000000000000..5f5006b4e962 --- /dev/null +++ b/cc-registry-v2/backend/scripts/dry_run_oci_sources.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +""" +Dry-run the image-sync pipeline against the live registries listed in +codecollections.yaml — without a database, Celery, or the FastAPI app. + +For every CodeCollection that has `image_source` configured, this script: + + 1. Loads the configured `ImageSource` from `app.sources` (oci, static, ...). + 2. Calls `source.discover_refs(cc)` to fetch the full tag list and parse + it into the catalog's `DiscoveredImageRef` shape. + 3. Calls `source.resolve_latest(cc, refs)` and `source.resolve_stable(cc, refs)` + so you can see what the catalog would expose as the canonical pointers. + +It prints a per-CC summary and (with `--verbose`) every parsed ref. It is +purely read-only — no DB writes, no Celery dispatch, no side effects. + +Usage: + + # Default: read ../../codecollections.yaml relative to this file + python scripts/dry_run_oci_sources.py + + # Point at a specific config (useful for testing draft changes) + python scripts/dry_run_oci_sources.py --config /path/to/codecollections.yaml + + # Filter by slug + python scripts/dry_run_oci_sources.py --only rw-cli-codecollection + + # Filter by source type + python scripts/dry_run_oci_sources.py --source oci + + # Show every parsed ref, not just the summary + python scripts/dry_run_oci_sources.py --verbose + +Exit codes: + 0 = every configured CC discovered at least one ref successfully + 1 = one or more sources errored (network / auth / parse) + 2 = one or more sources returned zero refs (likely a tag-schema mismatch) + +The last case is the one we care about most before flipping image_source +from unset -> "oci" in production: it usually means the registry has tags +but they don't match `--`. Re-run with +`--verbose` to see which tags were rejected. +""" +from __future__ import annotations + +import argparse +import logging +import os +import sys +from collections import Counter +from pathlib import Path +from typing import Optional + +# Make `app.sources` importable when this script is invoked directly. +# Layout: cc-registry-v2/backend/scripts/dry_run_oci_sources.py +# cc-registry-v2/backend/app/sources/... +HERE = Path(__file__).resolve() +BACKEND_DIR = HERE.parent.parent # cc-registry-v2/backend +sys.path.insert(0, str(BACKEND_DIR)) + +import yaml # noqa: E402 (after sys.path tweak) + +from app.sources import DiscoveredImageRef, get_source # noqa: E402 + +logger = logging.getLogger("dry_run_oci_sources") + + +# --------------------------------------------------------------------------- +# config loading +# --------------------------------------------------------------------------- + + +def _default_config_path() -> Path: + """ + Resolve codecollections.yaml the same way the Celery tasks do, but + starting from this file's location instead of `/app/...`. + + Search order (first hit wins): + 1. Explicit env var CC_REGISTRY_YAML + 2. /codecollections.yaml (devcontainer / local checkout) + 3. /app/codecollections.yaml (running inside the backend image) + 4. /workspaces/codecollection-registry/codecollections.yaml + """ + env = os.environ.get("CC_REGISTRY_YAML") + if env: + return Path(env) + + repo_root = BACKEND_DIR.parent.parent # cc-registry-v2/.. = repo root + candidates = [ + repo_root / "codecollections.yaml", + Path("/app/codecollections.yaml"), + Path("/workspaces/codecollection-registry/codecollections.yaml"), + ] + for c in candidates: + if c.exists(): + return c + # Fall back to the first candidate even if it doesn't exist; the caller + # will hit a clearer error from yaml.safe_load. + return candidates[0] + + +def _load_codecollections(config_path: Path) -> list[dict]: + with open(config_path, "r") as f: + data = yaml.safe_load(f) or {} + return data.get("codecollections", []) or [] + + +# --------------------------------------------------------------------------- +# rendering +# --------------------------------------------------------------------------- + + +def _fmt_ref(r: DiscoveredImageRef) -> str: + built = r.built_at.isoformat() if r.built_at else "-" + return ( + f" • {r.image_tag:<60s} ref={r.ref!r:<14s} " + f"type={r.ref_type:<7s} cc={r.commit[:7]} rt={r.rt_revision[:7]} built={built}" + ) + + +def _print_cc_result( + cc: dict, + refs: list[DiscoveredImageRef], + latest: Optional[str], + stable: Optional[str], + verbose: bool, +) -> None: + slug = cc.get("slug", "") + source = cc.get("image_source") + registry = cc.get("image_registry", "-") + default_ref = cc.get("default_ref", "main") + + by_type = Counter(r.ref_type for r in refs) + type_summary = ", ".join(f"{k}={v}" for k, v in sorted(by_type.items())) or "-" + + print(f" source={source} registry={registry} default_ref={default_ref}") + print(f" discovered={len(refs)} refs ({type_summary})") + print(f" latest -> {latest or '(none)'}") + print(f" stable -> {stable or '(none)'}") + if verbose and refs: + print(" ----") + # Sort by ref then tag for readability + for r in sorted(refs, key=lambda r: (r.ref, r.image_tag)): + print(_fmt_ref(r)) + if not refs: + print( + " WARN: source returned 0 refs — likely a tag-schema mismatch. " + "Re-run with --verbose against a less restrictive parser, or " + "inspect the registry directly:" + ) + if registry and registry != "-": + host, _, repo = registry.partition("/") + print(f" curl -s https://{host}/v2/{repo}/tags/list?n=20 | jq .") + print() + + +# --------------------------------------------------------------------------- +# main +# --------------------------------------------------------------------------- + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--config", + type=Path, + default=None, + help="Path to codecollections.yaml (default: auto-discover)", + ) + parser.add_argument( + "--only", + type=str, + default=None, + help="Comma-separated list of CC slugs to include (default: all configured)", + ) + parser.add_argument( + "--source", + type=str, + default=None, + help='Only run CCs whose image_source matches (e.g. "oci", "static")', + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="List every parsed ref" + ) + parser.add_argument( + "-q", "--quiet", action="store_true", help="Suppress per-source INFO logs" + ) + args = parser.parse_args() + + logging.basicConfig( + level=logging.WARNING if args.quiet else logging.INFO, + format="%(levelname)s %(name)s: %(message)s", + ) + + config_path = args.config or _default_config_path() + if not config_path.exists(): + print(f"ERROR: codecollections.yaml not found at {config_path}", file=sys.stderr) + return 1 + print(f"# Using config: {config_path}\n") + + all_ccs = _load_codecollections(config_path) + if args.only: + wanted = {s.strip() for s in args.only.split(",") if s.strip()} + all_ccs = [c for c in all_ccs if c.get("slug") in wanted] + if args.source: + all_ccs = [c for c in all_ccs if c.get("image_source") == args.source] + + configured = [c for c in all_ccs if c.get("image_source")] + unconfigured = [c for c in all_ccs if not c.get("image_source")] + + if unconfigured and not args.source: + print( + f"# Skipping {len(unconfigured)} CC(s) without image_source: " + + ", ".join(sorted(c.get("slug", "?") for c in unconfigured)) + + "\n" + ) + + if not configured: + print("# No CodeCollections matched the filters; nothing to dry-run.") + return 0 + + errors: list[str] = [] + empty: list[str] = [] + + for cc in configured: + slug = cc.get("slug", "") + source_name = cc.get("image_source") + print(f"== {slug} ==") + + source = get_source(source_name) + if source is None: + msg = f"unknown image_source '{source_name}' for {slug}" + print(f" ERROR: {msg}\n") + errors.append(msg) + continue + + try: + refs = source.discover_refs(cc) + latest = source.resolve_latest(cc, refs) + stable = source.resolve_stable(cc, refs) + except Exception as e: # noqa: BLE001 - we want to keep going + msg = f"{slug}: {type(e).__name__}: {e}" + print(f" ERROR: {msg}\n") + errors.append(msg) + continue + + _print_cc_result(cc, refs, latest, stable, verbose=args.verbose) + if not refs: + empty.append(slug) + + # ---------------- summary ---------------- + print("=" * 70) + print(f"Summary: {len(configured)} CC(s) dry-run, " + f"{len(errors)} error(s), {len(empty)} returned zero refs.") + if errors: + print("\nErrors:") + for e in errors: + print(f" - {e}") + if empty: + print("\nZero-ref CCs (likely tag-schema mismatch):") + for s in empty: + print(f" - {s}") + + if errors: + return 1 + if empty: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 01bc8d463f03c27dd16eb625a12e152fa3196407 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 10:50:12 -0400 Subject: [PATCH 05/13] Enhance GitHub Actions Workflows with GitHub App Token Retrieval - Added steps to retrieve a GitHub App token in both `build-images.yaml` and `release.yaml` workflows. - Updated the token usage in the image update steps to utilize the newly retrieved token instead of the previous Personal Access Token, improving security and access management. --- .github/workflows/build-images.yaml | 11 ++++++++++- .github/workflows/release.yaml | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-images.yaml b/.github/workflows/build-images.yaml index e93f42a844a3..b3b1b267a631 100644 --- a/.github/workflows/build-images.yaml +++ b/.github/workflows/build-images.yaml @@ -223,10 +223,19 @@ jobs: needs: [generate-tag, build-backend, build-frontend, build-worker, build-mcp-server] if: needs.generate-tag.outputs.should_push == 'true' steps: + - name: Get GitHub App token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.CI_GITHUB_APP_ID }} + private-key: ${{ secrets.CI_GITHUB_APP_PRIVATE_KEY }} + owner: runwhen + repositories: infra-flux-nonprod-shared + - name: Trigger registry-test image update uses: actions/github-script@v7 with: - github-token: ${{ secrets.PAT }} + github-token: ${{ steps.app-token.outputs.token }} script: | const tag = '${{ needs.generate-tag.outputs.tag }}'; await github.rest.actions.createWorkflowDispatch({ diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index add6d166afff..c6f5ce4284c3 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -234,10 +234,19 @@ jobs: runs-on: ubuntu-latest needs: [create-release, build-backend, build-frontend, build-worker, build-mcp-server] steps: + - name: Get GitHub App token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.CI_GITHUB_APP_ID }} + private-key: ${{ secrets.CI_GITHUB_APP_PRIVATE_KEY }} + owner: runwhen + repositories: infra-flux-nonprod-shared + - name: Trigger registry-prod image update uses: actions/github-script@v7 with: - github-token: ${{ secrets.PAT }} + github-token: ${{ steps.app-token.outputs.token }} script: | const tag = '${{ needs.create-release.outputs.release_tag }}'; await github.rest.actions.createWorkflowDispatch({ From 3e9a34164f044e82e79dd75e752c222064afb290 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 11:22:11 -0400 Subject: [PATCH 06/13] Document the CCV catalog (tag contract, API, ops) Adds cc-registry-v2/docs/CCV.md as the canonical reference for the PAPI-facing CodeCollection image catalog. Covers: - End-to-end flow (per-repo CI -> OCI registry -> 5-min poll -> codecollection_versions -> /api/v1/catalog -> PAPI) - codecollections.yaml schema (image_source, image_registry, default_ref, static_path, visibility) with public/hidden semantics pinned to "UX toggle, not security boundary" - The -- tag-schema contract that build pipelines must honor, including the parser regex and worked examples - latest/stable pointer resolution (newest build on default_ref; highest semver-looking ref with fallback) - Full API reference with copy-pasteable curl examples for /codecollections, /{slug}, /{slug}/refs[/{ref}], /{slug}/resolve - Pluggable sources: oci (default), static (JSON file for self-hosted catalogs + tests), and CC_REGISTRY_EXTRA_SOURCES for custom plugins - Sync cadence (5-min Celery beat) + how to trigger a manual run - Operational tooling: dry-run script with its exit-code contract - Troubleshooting matrix (zero refs, null latest, transient timeouts, unknown source) with the actual curl commands to diagnose each - How PAPI consumes the catalog vs. the previous corestate-operator flow (CRD-less, one HTTP read per workspace reconcile) Also threads the new doc through both indexes (repo-root README and cc-registry-v2/docs/README.md). Co-authored-by: Cursor --- README.md | 1 + cc-registry-v2/docs/CCV.md | 406 ++++++++++++++++++++++++++++++++++ cc-registry-v2/docs/README.md | 1 + 3 files changed, 408 insertions(+) create mode 100644 cc-registry-v2/docs/CCV.md diff --git a/README.md b/README.md index 3fb8986f5ee9..808435badeb9 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ See [mcp-server/README.md](mcp-server/README.md) for details. | Topic | Location | |---|---| | Architecture | [cc-registry-v2/docs/ARCHITECTURE.md](cc-registry-v2/docs/ARCHITECTURE.md) | +| CCV catalog (image tracking + PAPI API) | [cc-registry-v2/docs/CCV.md](cc-registry-v2/docs/CCV.md) | | Configuration | [cc-registry-v2/docs/CONFIGURATION.md](cc-registry-v2/docs/CONFIGURATION.md) | | Indexing pipeline | [cc-registry-v2/docs/MCP_WORKFLOW.md](cc-registry-v2/docs/MCP_WORKFLOW.md) | | Deployment | [cc-registry-v2/docs/DEPLOYMENT_GUIDE.md](cc-registry-v2/docs/DEPLOYMENT_GUIDE.md) | diff --git a/cc-registry-v2/docs/CCV.md b/cc-registry-v2/docs/CCV.md new file mode 100644 index 000000000000..27c05ce5b63e --- /dev/null +++ b/cc-registry-v2/docs/CCV.md @@ -0,0 +1,406 @@ +# CCV Catalog (CodeCollection Versions) + +The **CCV catalog** is the read-only mirror of every published image tag for +every tracked CodeCollection. PAPI calls into it on the workspace-reconcile +path to translate a logical reference like `rw-cli-codecollection@main` or +`rw-cli-codecollection@stable` into a concrete, pullable OCI image tag. This +document covers everything you need to: + +- Add a new CodeCollection to the catalog +- Understand the contract a CC's build pipeline must honor +- Query the catalog over HTTP +- Diagnose why a CC is showing zero refs or stale pointers + +If you just need a one-screen summary, skip to [TL;DR](#tldr). + +--- + +## TL;DR + +``` + ┌──────────────────────────────┐ + per-repo CI │ codecollection build-push.yaml│ → pushes ghcr.io/.../:-- + └──────────────────────────────┘ + │ + ▼ (every 5 min — Celery beat) + ┌──────────────────────────────┐ + cc-registry-v2│ sync_image_tags_task │ → ImageSource.discover_refs(cc) + │ (per CC in codecollections.yaml)│ resolve_latest / resolve_stable + └──────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────┐ + │ codecollection_versions (DB) │ ← one row per discovered image tag + └──────────────────────────────┘ + │ + ▼ read-only, no auth + ┌──────────────────────────────┐ + PAPI│ GET /api/v1/catalog/... │ + └──────────────────────────────┘ +``` + +The catalog **does not build images** and **does not push tags**. Codecollection +repos own their builds (via their own `.github/workflows/build-push.yaml`); the +catalog polls each repo's registry every 5 minutes and mirrors what it finds. + +--- + +## Registering a CodeCollection + +Catalog membership is declared in [`codecollections.yaml`](../../codecollections.yaml) +at the repo root. + +### Minimal entry (catalog-tracked) + +```yaml +- name: My CodeCollection + slug: my-codecollection + git_url: https://github.com/runwhen-contrib/my-codecollection + owner: RunWhen + owner_icon: https://.../icon.svg + owner_email: you@example.com + description: Short blurb + image_source: oci # turn on tracking + image_registry: ghcr.io/runwhen-contrib/my-codecollection # OCI repo path +``` + +Omit `image_source` to keep the CC indexed by the website but skip image +catalog polling — useful for CCs that don't yet have a build workflow. + +### All optional fields the image catalog honors + +| Field | Default | Effect | +|---|---|---| +| `image_source` | _(unset)_ | Which plugin drives discovery: `oci`, `static`, or a name registered via `CC_REGISTRY_EXTRA_SOURCES`. If unset, the CC is skipped on every poll. | +| `image_registry` | _(required for `oci`)_ | The OCI repo path the `oci` source lists tags from (e.g. `ghcr.io/runwhen-contrib/rw-cli-codecollection`). No scheme, no trailing slash. | +| `default_ref` | `main` | Git ref whose newest build is considered `latest`. | +| `static_path` | _(required for `static`)_ | Filesystem path to a JSON file with refs (see [Sources → static](#static)). | +| `visibility` | `public` | `public` (default) or `hidden`. Hidden CCs are still catalogued for PAPI but excluded from website / MCP / AI search surfaces. **OCI ACLs remain the source of truth for who can pull the image.** This is a UX toggle, not a security boundary. | + +--- + +## The tag-schema contract + +The default `oci` source parses tags with this regex: + +``` +^(?P.+?)-(?P[0-9a-f]{7,40})-(?P[0-9a-f]{7,40})$ +``` + +Translated to English: every tag must end in two hex SHA groups (7-40 chars +each) separated by hyphens, and everything before those is the git ref. + +| Tag | Parsed `ref` | Parsed `cc_sha` | Parsed `rt_sha` | +|---|---|---|---| +| `main-c1a2b3d-e4f5a6b` | `main` | `c1a2b3d` | `e4f5a6b` | +| `pr-42-9988aab-e4f5a6b` | `pr-42` | `9988aab` | `e4f5a6b` | +| `v1.2.0-aabbccd-e4f5a6b` | `v1.2.0` | `aabbccd` | `e4f5a6b` | +| `latest` | _(rejected)_ | _(rejected)_ | _(rejected)_ | +| `main-only` | _(rejected — missing rt_sha)_ | + +**`cc_sha`** is the codecollection commit. **`rt_sha`** is the +`rw-base-runtime` commit that was baked in at build time. The dual-sha shape +is what lets the catalog reason about which runtime + which CC produced a +given image without manifest introspection. + +Tags that don't match the regex are silently ignored. That's a feature, not a +bug — it lets `:latest` and other floating tags coexist on the registry +without confusing the catalog. The trade-off is that **misconfigured +pipelines produce zero parsed refs**, which the catalog reports but cannot +auto-recover from. See [troubleshooting](#troubleshooting). + +### Pointers + +After tags are parsed, two pointers are resolved per CC: + +- **`latest`** — newest build whose `ref` equals `default_ref` (defaults to + `main`). "Newest" is by `built_at` from the OCI manifest when available, or + lexicographic tag order otherwise (the dual-sha suffix makes this + monotonic in practice). +- **`stable`** — highest semver-looking `ref` (`v?\d+\.\d+(\.\d+)?...`). Falls + back to `latest` if no semver tag exists. + +--- + +## API reference + +Base path: `/api/v1/catalog`. All endpoints are GET-only and unauthenticated. +Responses are JSON. Pretty-print is up to the caller. + +### `GET /codecollections` + +List every tracked CC plus its currently-resolved pointers. + +| Query param | Default | Meaning | +|---|---|---| +| `visibility` | _(none)_ | Filter to `public` or `hidden`. Omit to see all. | +| `only_with_image` | `true` | Skip CCs that have no parsed image tags yet. Set `false` to see opted-in CCs that haven't built anything. | + +```bash +curl https://registry.runwhen.com/api/v1/catalog/codecollections | jq '.[0]' +``` + +```json +{ + "slug": "rw-cli-codecollection", + "name": "RunWhen CLI CodeCollection", + "git_url": "https://github.com/runwhen-contrib/rw-cli-codecollection", + "visibility": "public", + "latest_image_tag": "main-c1a2b3d-e4f5a6b", + "stable_image_tag": "v1.2.0-aabbccd-e4f5a6b", + "image_registry": "ghcr.io/runwhen-contrib/rw-cli-codecollection", + "last_synced": "2026-05-12T14:55:03Z" +} +``` + +### `GET /codecollections/{slug}` + +Same as above but includes the full list of known refs in `.refs[]`. + +### `GET /codecollections/{slug}/refs` + +Just the refs for one CC. Add `?include_inactive=true` to see refs that +were tracked previously but have since disappeared from the registry. + +```bash +curl https://registry.runwhen.com/api/v1/catalog/codecollections/rw-cli-codecollection/refs | jq '.[] | .ref' | sort -u +``` + +### `GET /codecollections/{slug}/refs/{ref}` + +Look up a single ref by name (branch or tag). Returns 404 if the ref isn't +tracked or has no image. + +### `GET /codecollections/{slug}/resolve` + +The endpoint PAPI hits on the workspace-reconcile path. Pass **exactly one** +of `pointer` or `ref`: + +```bash +# Named pointer +curl 'https://registry.runwhen.com/api/v1/catalog/codecollections/rw-cli-codecollection/resolve?pointer=latest' + +# Specific git ref +curl 'https://registry.runwhen.com/api/v1/catalog/codecollections/rw-cli-codecollection/resolve?ref=v1.2.0' +``` + +Response: + +```json +{ + "slug": "rw-cli-codecollection", + "requested": "latest", + "image_tag": "main-c1a2b3d-e4f5a6b", + "image_registry": "ghcr.io/runwhen-contrib/rw-cli-codecollection", + "image_digest": null, + "commit_hash": "c1a2b3def123...", + "rt_revision": "e4f5a6b789..." +} +``` + +The full pull reference for the workspace's runner is +`{image_registry}:{image_tag}` (or `{image_registry}@{image_digest}` once +digest pinning rolls in). + +### Visibility filter + +`/codecollections` and `/codecollections/{slug}` **intentionally bypass** the +`visibility = 'public'` filter that protects the registry website. PAPI +needs to resolve images for hidden CCs (workspaces use them), so it sees +the full list. The website / MCP / AI search go through different routers +(`versions.py`, `cc.py`, etc.) which apply `public_only()`. + +--- + +## Sources + +Each `image_source` is a plugin under +[`backend/app/sources/`](../backend/app/sources/) implementing +`ImageSource.discover_refs / resolve_latest / resolve_stable`. Built-ins: + +### `oci` + +Walks the OCI Distribution v2 `/v2//tags/list` endpoint with +`Link`-header pagination. Handles the anonymous-bearer-token dance for GHCR +and Docker Hub. No auth secrets required — the catalog only reads public +listings. + +Configure with `image_registry: /`. + +### `static` + +Reads refs from a checked-in JSON file. Use for: +- Customer self-hosted catalogs where image discovery happens in the + customer's own pipeline and lands as a committed file +- Test fixtures +- Pinning a CC to a known-good ref set without polling a registry + +Configure with `image_source: static` + `static_path: /path/to/refs.json`. +JSON shape is documented inline in +[`sources/static.py`](../backend/app/sources/static.py). + +### Custom (third-party) sources + +Set the env var `CC_REGISTRY_EXTRA_SOURCES` to a colon-separated list of +import paths. Each module must expose a top-level `SOURCE` of type +`ImageSource`. Useful for self-hosted Harbor / internal registries with +non-standard tag schemas, without forking the catalog. + +```bash +CC_REGISTRY_EXTRA_SOURCES=mycorp.harbor_source:mycorp.gerrit_source +``` + +--- + +## Sync schedule + +The Celery beat schedule lives in +[`cc-registry-v2/schedules.yaml`](../schedules.yaml). The relevant entry: + +```yaml +- name: sync-image-tags + task: app.tasks.image_sync_tasks.sync_image_tags_task + description: "Poll OCI registries for each CC and refresh the image catalog" + schedule_type: interval + interval: + minutes: 5 + enabled: true +``` + +The task is idempotent (one HTTP listing per CC, upsert by `(cc_id, version_name)`) +and fast (the slow leg is the network call, capped at a 10s read timeout per +CC). A CC that errors does not block the others — errors are logged and +included in the task's return summary. + +### Manually triggering a sync + +From inside the backend container: + +```bash +docker compose exec backend python -c "from app.tasks.image_sync_tasks import sync_image_tags_task; print(sync_image_tags_task.delay())" +``` + +Or via the admin task-runner UI in the registry frontend (Admin → Tasks). + +--- + +## Operational tools + +### `scripts/dry_run_oci_sources.py` — offline catalog validation + +Exercises the real source plugins against every CC in `codecollections.yaml` +without touching the database, Celery, or FastAPI. Useful for: + +- Pre-flight check before flipping a new CC's `image_source` to `oci` +- Catching tag-schema regressions (a pipeline emitting `-` + with no `rt_sha` would show up as zero parsed refs) +- Separating transient registry flakiness from real misconfiguration + +```bash +cd cc-registry-v2/backend +python scripts/dry_run_oci_sources.py # full pre-flight +python scripts/dry_run_oci_sources.py --only rw-cli-codecollection -v +python scripts/dry_run_oci_sources.py --source oci -q +``` + +Exit codes: +- `0` — every configured CC discovered ≥1 ref +- `1` — one or more sources raised (network / auth / parse error) +- `2` — one or more sources returned 0 refs (likely a tag-schema mismatch) + +The non-zero exit codes are designed so you can drop this into a CI job +later. See the script's module docstring for the full CLI. + +--- + +## Troubleshooting + +### A CC shows 0 refs in the catalog + +Most likely the build pipeline isn't producing tags that match the +`--` regex. Verify: + +```bash +# Public GHCR repos do not require auth for tag listing +curl -s https://ghcr.io/v2/runwhen-contrib//tags/list?n=50 | jq .tags +``` + +If the tags look like `latest`, `main`, `2026-05-12`, or `sha-<7chars>`, +they will not match. The build workflow has to emit catalog-shaped tags +on top of (or instead of) those. See the +[`rw-cli-codecollection/.github/workflows/build-push.yaml`](https://github.com/runwhen-contrib/rw-cli-codecollection/blob/main/.github/workflows/build-push.yaml) +template; the `prepare` job computes `tags:` with the canonical schema. + +The dry-run script will surface this case explicitly with exit code 2. + +### `latest_image_tag` is null but refs exist + +The CC has builds, but none of them are on `default_ref`. Run: + +```bash +curl https://registry.runwhen.com/api/v1/catalog/codecollections//refs | jq '[.[].ref] | unique' +``` + +If you only see `pr-*` or feature-branch refs, no PR has merged to `main` +yet. Once it does, the next sync (≤ 5 min) will populate `latest`. + +### `stable_image_tag` equals `latest_image_tag` + +That's by design — `stable` falls back to `latest` when no semver-looking +ref (`v1.2.3`, etc.) exists for the CC. Tag a release on the codecollection +repo to populate `stable` independently. + +### Transient `ReadTimeout` against a registry + +The default per-CC timeout is 10s. GHCR occasionally goes slow on a +specific repo for ~30s at a time; the next sync picks up where this one +left off. If you see persistent timeouts, raise it via `OCISource(timeout=...)` +in `app/sources/registry.py`. + +### "Unknown image_source" errors + +A typo in `codecollections.yaml` or an `image_source: my-custom` that +isn't registered. Check the `SOURCE_REGISTRY` in +[`backend/app/sources/registry.py`](../backend/app/sources/registry.py) +and `CC_REGISTRY_EXTRA_SOURCES` env if you're plugging in a custom one. + +--- + +## How PAPI consumes the catalog + +PAPI does not run a registry of its own. On the workspace-reconcile path +it makes one HTTP call per used CC: + +``` +GET /api/v1/catalog/codecollections//resolve?pointer= +``` + +…and rewrites the workspace's container manifest to use `{image_registry}:{image_tag}` +returned by that call. Workspaces that pin to a specific git ref instead +of a pointer go through `?ref=`. + +This is the radically-simple replacement for the previous +`corestate-operator` flow that ran inside each remote cluster: + +| Before | After | +|---|---| +| `corestate-operator` per cluster | one HTTP read per workspace reconcile | +| CRDs in remote clusters | none (zero remote install footprint) | +| build-manager polls & pushes to registry | each CC repo owns its build via GitHub Actions | +| custom image-listing service | OCI Distribution v2 (vendor standard) | + +See the [radically-simple design doc](/docs/migration/radically-simple-design.md) +in `platform-robot-runtime` for the full rationale. + +--- + +## Related docs + +- [`ARCHITECTURE.md`](ARCHITECTURE.md) — services, data flow, DB schema +- [`CONFIGURATION.md`](CONFIGURATION.md) — environment variables for the + backend container +- [`SCHEDULES.md`](SCHEDULES.md) — Celery beat schedule semantics +- [`../backend/app/sources/oci.py`](../backend/app/sources/oci.py) — + reference implementation of the tag parser +- [`../backend/scripts/dry_run_oci_sources.py`](../backend/scripts/dry_run_oci_sources.py) + — offline catalog validation diff --git a/cc-registry-v2/docs/README.md b/cc-registry-v2/docs/README.md index f6f54457e018..157996f1065e 100644 --- a/cc-registry-v2/docs/README.md +++ b/cc-registry-v2/docs/README.md @@ -5,6 +5,7 @@ All project documentation, organized by topic. ## Architecture and Design - **[ARCHITECTURE.md](ARCHITECTURE.md)** - System architecture: services, data flow, PostgreSQL + pgvector, MCP server, Celery tasks +- **[CCV.md](CCV.md)** - CodeCollection Version catalog: tag-schema contract, image-source plugins, `/api/v1/catalog` endpoints, PAPI integration - **[MCP_WORKFLOW.md](MCP_WORKFLOW.md)** - Document indexing pipeline, embedding generation, vector store, and search flow - **[CHAT.md](CHAT.md)** - Chat system architecture, dual search pipeline, LLM synthesis, follow-up detection From ff89b6d2fe8fe32f8e07c58c7e1e53bc689cba64 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 11:32:24 -0400 Subject: [PATCH 07/13] Expose API docs at /api/docs + add manual catalog sync trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two operability fixes prompted by hitting registry-test: 1) /docs / /redoc / /openapi.json were unreachable through the production ingress. The frontend SPA owns /, and only /api/* routes to the backend, so FastAPI's default docs URLs (mounted at the container root) were being served as the SPA's index.html. Move them under /api/: - docs_url -> /api/docs - redoc_url -> /api/redoc - openapi_url -> /api/openapi.json - hand-written -> /api/openapi.yaml The local-dev URLs change correspondingly (localhost:8001/api/docs); updated cc-registry-v2/README.md, start.sh, and docs/CONFIGURATION.md to match. MCP server (port 8000) is unaffected — separate FastAPI app. 2) Added POST /api/v1/tasks/sync-image-tags so operators can kick the CCV image catalog sync on demand instead of waiting up to 5 minutes for the Celery beat. Mirrors the existing /sync-collections pattern (admin bearer auth, returns the Celery task_id for status polling). This is the same task the scheduler runs every 5 min — useful immediately after a deploy or when validating a new image_source config. CCV.md updated with both the live Swagger UI URL and the new curl recipe. Co-authored-by: Cursor --- cc-registry-v2/README.md | 7 +++-- cc-registry-v2/backend/app/main.py | 29 +++++++++++++------- cc-registry-v2/backend/app/routers/tasks.py | 30 +++++++++++++++++++++ cc-registry-v2/docs/CCV.md | 28 +++++++++++++++++-- cc-registry-v2/docs/CONFIGURATION.md | 2 +- cc-registry-v2/start.sh | 2 +- 6 files changed, 83 insertions(+), 15 deletions(-) diff --git a/cc-registry-v2/README.md b/cc-registry-v2/README.md index e5f12fa1401e..0c06b6e5c719 100644 --- a/cc-registry-v2/README.md +++ b/cc-registry-v2/README.md @@ -78,7 +78,7 @@ For full architecture details, see [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md). 3. **Access the application** - Frontend: http://localhost:3000 - Backend API: http://localhost:8001 - - API Documentation: http://localhost:8001/docs + - API Documentation: http://localhost:8001/api/docs - Task Monitor (Flower): http://localhost:5555 - MCP Server: http://localhost:8000 (optional - see [MCP Server Integration](MCP_SERVER_INTEGRATION.md)) @@ -213,7 +213,10 @@ codecollection-registry/ ## API Documentation -Visit http://localhost:8001/docs for interactive API documentation. +Visit http://localhost:8001/api/docs for interactive API documentation. In +production the same path is exposed through the ingress at +`https:///api/docs` (the frontend SPA owns `/`, so the backend's +Swagger UI is intentionally mounted under `/api/`). ## Configuration diff --git a/cc-registry-v2/backend/app/main.py b/cc-registry-v2/backend/app/main.py index 501771847277..7266ceac6f35 100644 --- a/cc-registry-v2/backend/app/main.py +++ b/cc-registry-v2/backend/app/main.py @@ -17,13 +17,20 @@ # Database tables are now managed via Alembic migrations # Migrations run automatically on container startup via run_migrations.py -# Create FastAPI app +# Create FastAPI app. +# +# IMPORTANT: docs / redoc / openapi.json are mounted under /api/ so they +# are reachable through the production ingress, which only routes /api/* +# to the backend (everything else falls through to the frontend SPA). +# Changing these paths is a breaking change for anyone who has bookmarked +# /docs against a local-only backend; update bookmarks to /api/docs. app = FastAPI( title=settings.PROJECT_NAME, - description="Interactive CodeCollection Registry API — see /openapi.yaml for the full spec.", + description="Interactive CodeCollection Registry API — see /api/openapi.yaml for the full spec.", version="2.0.0", - docs_url="/docs", - redoc_url="/redoc", + docs_url="/api/docs", + redoc_url="/api/redoc", + openapi_url="/api/openapi.json", ) # Add middleware @@ -53,7 +60,7 @@ async def global_exception_handler(request: Request, exc: Exception): content={"detail": "Internal server error"} ) -@app.get("/openapi.yaml", include_in_schema=False) +@app.get("/api/openapi.yaml", include_in_schema=False) async def openapi_yaml(): """Serve the hand-written OpenAPI spec as YAML.""" from pathlib import Path @@ -64,15 +71,19 @@ async def openapi_yaml(): raise HTTPException(status_code=404, detail="openapi.yaml not found") -@app.get("/") +# / is owned by the frontend SPA in production; this handler is mostly a +# convenience for direct backend access during local development. +@app.get("/", include_in_schema=False) +@app.get("/api", include_in_schema=False) async def root(): """Root endpoint""" return { "message": "CodeCollection Registry API", "version": "2.0.0", - "docs": "/docs", - "redoc": "/redoc", - "openapi_yaml": "/openapi.yaml", + "docs": "/api/docs", + "redoc": "/api/redoc", + "openapi_json": "/api/openapi.json", + "openapi_yaml": "/api/openapi.yaml", "health": "/api/v1/health" } diff --git a/cc-registry-v2/backend/app/routers/tasks.py b/cc-registry-v2/backend/app/routers/tasks.py index b26ae0dee2af..8cacb6fba714 100644 --- a/cc-registry-v2/backend/app/routers/tasks.py +++ b/cc-registry-v2/backend/app/routers/tasks.py @@ -128,6 +128,36 @@ async def trigger_sync_single_collection( raise HTTPException(status_code=500, detail=str(e)) +@router.post("/sync-image-tags", response_model=TaskResponse) +async def trigger_sync_image_tags( + _: dict = Depends(verify_admin_token) +): + """SYNC: Refresh the CCV image catalog by polling each CC's image_source. + + Reads codecollections.yaml, runs the configured ImageSource plugin + (e.g. OCI) per CC, and upserts CodeCollectionVersion rows with the + discovered image_tag / image_registry / commit_hash / rt_revision. + + Idempotent. Safe to call on demand from the admin UI when waiting on + the 5-minute beat schedule is too slow, or when debugging why + /api/v1/catalog is missing image data. + + See docs/CCV.md for the full pipeline. + """ + try: + from app.tasks.image_sync_tasks import sync_image_tags_task + + task = sync_image_tags_task.apply_async() + + return TaskResponse( + task_id=task.id, + status="started", + message="Image-tag catalog sync started", + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/parse-codebundles", response_model=TaskResponse) async def trigger_parse_codebundles( _: dict = Depends(verify_admin_token) diff --git a/cc-registry-v2/docs/CCV.md b/cc-registry-v2/docs/CCV.md index 27c05ce5b63e..a287e46f0587 100644 --- a/cc-registry-v2/docs/CCV.md +++ b/cc-registry-v2/docs/CCV.md @@ -127,6 +127,13 @@ After tags are parsed, two pointers are resolved per CC: Base path: `/api/v1/catalog`. All endpoints are GET-only and unauthenticated. Responses are JSON. Pretty-print is up to the caller. +> **Interactive Swagger UI:** `https:///api/docs` (e.g. +> `https://registry-test.shared.runwhen.com/api/docs`). The OpenAPI JSON +> schema lives at `/api/openapi.json` and the hand-written YAML mirror at +> `/api/openapi.yaml`. Note the `/api/` prefix — the frontend SPA owns `/` +> at the public hostname, so the backend's docs UI is intentionally mounted +> under `/api/`. + ### `GET /codecollections` List every tracked CC plus its currently-resolved pointers. @@ -274,13 +281,30 @@ included in the task's return summary. ### Manually triggering a sync -From inside the backend container: +**Over HTTP** (requires the admin bearer token — any value starting with +`admin-` per `verify_admin_token`): + +```bash +curl -X POST \ + -H "Authorization: Bearer admin-" \ + https://registry-test.shared.runwhen.com/api/v1/tasks/sync-image-tags +``` + +Response is a Celery task id you can poll at +`GET /api/v1/tasks/status/{task_id}`. + +**From inside the backend container** (no auth needed): ```bash docker compose exec backend python -c "from app.tasks.image_sync_tasks import sync_image_tags_task; print(sync_image_tags_task.delay())" ``` -Or via the admin task-runner UI in the registry frontend (Admin → Tasks). +**From the admin UI:** Admin → Tasks → "Sync Image Tags" (same endpoint +under the hood). + +Use the HTTP path when the 5-minute beat schedule is too slow, or when +debugging why `/api/v1/catalog` is missing image data after a fresh +deploy. --- diff --git a/cc-registry-v2/docs/CONFIGURATION.md b/cc-registry-v2/docs/CONFIGURATION.md index 6ed84fa64666..5e2a18fd337c 100644 --- a/cc-registry-v2/docs/CONFIGURATION.md +++ b/cc-registry-v2/docs/CONFIGURATION.md @@ -24,7 +24,7 @@ The file is loaded by `docker-compose.yml` as `env_file` for the backend, worker |---|---|---| | Frontend | 3000 | http://localhost:3000 | | Backend API | 8001 | http://localhost:8001/api/v1/ | -| Backend Swagger | 8001 | http://localhost:8001/docs | +| Backend Swagger | 8001 | http://localhost:8001/api/docs | | MCP Server | 8000 | http://localhost:8000 | | MCP Server Docs | 8000 | http://localhost:8000/docs | | PostgreSQL | 5432 | `postgresql://user:password@localhost:5432/codecollection_registry` | diff --git a/cc-registry-v2/start.sh b/cc-registry-v2/start.sh index 0e15af998027..960fe634a6c9 100755 --- a/cc-registry-v2/start.sh +++ b/cc-registry-v2/start.sh @@ -25,7 +25,7 @@ echo "" echo "📱 Once ready, access the application at:" echo " Frontend: http://localhost:3000" echo " Backend API: http://localhost:8001" -echo " API Docs: http://localhost:8001/docs" +echo " API Docs: http://localhost:8001/api/docs" echo " Task Monitor: http://localhost:5555" echo "" echo "🔧 Useful commands:" From 2fd8dfeb37f73a3c56213c3f0a326a3b584a8d17 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 11:44:43 -0400 Subject: [PATCH 08/13] Refactor Redis URL configuration in settings and Celery broker setup - Updated the logic for constructing the REDIS_URL to prioritize Redis Sentinel when REDIS_SENTINEL_HOSTS is set, ensuring proper handling of both REDIS_URL and Sentinel configurations. - Enhanced the _configure_broker_url function to clarify precedence rules and added guardrails to prevent misconfigurations that could lead to connection errors. - Improved logging to provide clearer information about the chosen Redis configuration, aiding in troubleshooting and deployment clarity. --- cc-registry-v2/backend/app/core/config.py | 40 +++++++---- .../backend/app/tasks/celery_app.py | 67 +++++++++++++------ 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/cc-registry-v2/backend/app/core/config.py b/cc-registry-v2/backend/app/core/config.py index 12a39e90e953..931345516743 100644 --- a/cc-registry-v2/backend/app/core/config.py +++ b/cc-registry-v2/backend/app/core/config.py @@ -84,20 +84,32 @@ def construct_urls(self): # Fallback to default for development self.DATABASE_URL = "postgresql://user:password@database:5432/codecollection_registry" - # Build REDIS_URL from Sentinel config or components if not provided - if not self.REDIS_URL: - if self.REDIS_SENTINEL_HOSTS: - # For Redis Sentinel, we'll use a sentinel:// URL format - # Format: sentinel://[:password@]host1:port1,host2:port2/service_name/db_number - auth = f":{self.REDIS_PASSWORD}@" if self.REDIS_PASSWORD else "" - self.REDIS_URL = f"sentinel://{auth}{self.REDIS_SENTINEL_HOSTS}/{self.REDIS_SENTINEL_MASTER}/{self.REDIS_DB}" - - # Important: Don't let REDIS_URL override our explicit REDIS_DB setting - # When using Sentinel, REDIS_DB must remain as the integer/string we set explicitly - logger.info(f"Constructed Sentinel URL. REDIS_DB remains: {self.REDIS_DB} (type: {type(self.REDIS_DB)})") - else: - # Fallback to default for development - self.REDIS_URL = "redis://redis:6379/0" + # Build REDIS_URL. Sentinel ALWAYS wins when REDIS_SENTINEL_HOSTS + # is set, even if a REDIS_URL was also provided via env. Helm + # charts commonly set both, and pointing a Redis client at a + # Sentinel data-plane port (26379) results in "Only HELLO + # messages are accepted" errors on every command. See + # tasks/celery_app.py::_configure_broker_url for the matching + # Celery-side precedence rule. + if self.REDIS_SENTINEL_HOSTS: + if self.REDIS_URL: + logger.info( + "Both REDIS_SENTINEL_HOSTS and REDIS_URL are set; " + "preferring Sentinel. Remove REDIS_URL from the deployment " + "to silence this notice." + ) + # Format: sentinel://[:password@]host1:port1,host2:port2/service_name/db_number + auth = f":{self.REDIS_PASSWORD}@" if self.REDIS_PASSWORD else "" + self.REDIS_URL = ( + f"sentinel://{auth}{self.REDIS_SENTINEL_HOSTS}/" + f"{self.REDIS_SENTINEL_MASTER}/{self.REDIS_DB}" + ) + logger.info( + f"Constructed Sentinel URL. REDIS_DB remains: {self.REDIS_DB} " + f"(type: {type(self.REDIS_DB)})" + ) + elif not self.REDIS_URL: + self.REDIS_URL = "redis://redis:6379/0" # Validate REDIS_DB is correct type if self.REDIS_SENTINEL_HOSTS: diff --git a/cc-registry-v2/backend/app/tasks/celery_app.py b/cc-registry-v2/backend/app/tasks/celery_app.py index 9bfa001cfeb0..e60abaa0681a 100644 --- a/cc-registry-v2/backend/app/tasks/celery_app.py +++ b/cc-registry-v2/backend/app/tasks/celery_app.py @@ -14,31 +14,45 @@ logger = logging.getLogger(__name__) def _configure_broker_url(): - """Configure broker URL for Redis or Redis Sentinel""" - if settings.REDIS_SENTINEL_HOSTS and not (settings.REDIS_URL and settings.REDIS_URL.startswith('redis://')): - # For Sentinel, we use sentinel:// URL which Kombu/redis-py supports - # Format: sentinel://[:password@]host1:port1;host2:port2;host3:port3 - # Then master_name and db are passed via transport_options - - # Parse and convert to semicolon-separated (Kombu format) - sentinel_hosts = [] - for host_port in settings.REDIS_SENTINEL_HOSTS.split(','): - sentinel_hosts.append(host_port.strip()) - + """Configure broker URL for Redis or Redis Sentinel. + + Precedence rules: + 1. If REDIS_SENTINEL_HOSTS is set, always use the sentinel:// scheme + with proper transport_options. This wins even when REDIS_URL is + also set, because Helm charts commonly set both and silently + speaking Redis protocol to a Sentinel endpoint (port 26379) is a + fail-closed nightmare: Sentinel rejects every non-HELLO command, + which makes the entire Celery beat/worker fan unable to dispatch. + 2. Otherwise fall back to REDIS_URL. + + Guardrail: if REDIS_URL targets port 26379 (the standard Sentinel + port) and REDIS_SENTINEL_HOSTS is *not* set, refuse to start with a + clear error — we'd rather crash fast than connect, fail every + command, and pretend everything is fine. + """ + if settings.REDIS_SENTINEL_HOSTS: + if settings.REDIS_URL: + logger.info( + "Both REDIS_SENTINEL_HOSTS and REDIS_URL are set; " + "preferring Sentinel. (Remove REDIS_URL from the deployment " + "to silence this notice.)" + ) + + # Parse and convert to semicolon-separated (Kombu format). + sentinel_hosts = [hp.strip() for hp in settings.REDIS_SENTINEL_HOSTS.split(',')] sentinel_hosts_str = ';'.join(sentinel_hosts) password_part = f":{settings.REDIS_PASSWORD}@" if settings.REDIS_PASSWORD else "" - - # sentinel:// URL with master_name and db in transport_options + + # sentinel:// URL with master_name and db in transport_options. broker_url = f"sentinel://{password_part}{sentinel_hosts_str}" - - # Transport options tell Kombu which master and db to use - # Ensure REDIS_DB is properly converted to int, handling string inputs + + # Ensure REDIS_DB is an int; tolerate stringy env-var inputs. try: redis_db = int(settings.REDIS_DB) if isinstance(settings.REDIS_DB, (str, int)) else 0 except (ValueError, TypeError): logger.warning(f"Invalid REDIS_DB value '{settings.REDIS_DB}', defaulting to 0") redis_db = 0 - + transport_options = { 'master_name': settings.REDIS_SENTINEL_MASTER, 'db': redis_db, @@ -48,11 +62,22 @@ def _configure_broker_url(): 'password': settings.REDIS_PASSWORD if settings.REDIS_PASSWORD else None, }, } - return broker_url, transport_options - else: - # Use regular Redis URL - return settings.REDIS_URL, {} + + # Guardrail: catch REDIS_URL=redis://...:26379/... when Sentinel + # hosts are unset. That's the misconfiguration that causes "Only + # HELLO messages are accepted by Sentinel instances" in worker logs. + if settings.REDIS_URL and ':26379/' in settings.REDIS_URL: + raise RuntimeError( + "REDIS_URL points at port 26379 (standard Sentinel port) but " + "REDIS_SENTINEL_HOSTS is not set. Either set REDIS_SENTINEL_HOSTS " + "(recommended — the broker will speak Sentinel protocol properly), " + "or change REDIS_URL to target the actual Redis master/replica " + "on its data-plane port (typically 6379). Refusing to start to " + "avoid silently producing broker errors on every task dispatch." + ) + + return settings.REDIS_URL, {} broker_url, transport_options = _configure_broker_url() From fd4a8a5ab1e024ed413d2e18719d7bcb4ded4fe3 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 12:00:28 -0400 Subject: [PATCH 09/13] Enhance Celery task schedule loading and Flower configuration - Improved error handling in `load_schedules_from_yaml` to manage empty or comment-only YAML files, preventing crashes and ensuring a default empty schedule is used. - Updated the Flower deployment configuration to utilize the backend image, streamlining the broker URL setup and eliminating redundant shell commands for Redis Sentinel configuration. - Simplified the command for starting Flower, enhancing clarity and maintainability of the deployment script. --- .../backend/app/tasks/celery_app.py | 22 +++++++-- cc-registry-v2/k8s/scheduler-deployment.yaml | 49 ++++++------------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/cc-registry-v2/backend/app/tasks/celery_app.py b/cc-registry-v2/backend/app/tasks/celery_app.py index e60abaa0681a..ca20af6706a8 100644 --- a/cc-registry-v2/backend/app/tasks/celery_app.py +++ b/cc-registry-v2/backend/app/tasks/celery_app.py @@ -154,11 +154,27 @@ def load_schedules_from_yaml(): try: with open(schedules_file, 'r') as f: config = yaml.safe_load(f) - + + # `yaml.safe_load` returns None for empty files or files containing + # only comments. Treat that as "no schedules" instead of crashing + # with `'NoneType' object has no attribute 'get'` (which would + # leave the scheduler running with a silent empty beat schedule). + if config is None: + logger.warning( + f"{schedules_file} is empty or contains only comments; " + f"using empty beat schedule" + ) + return {} + logger.info(f"Loaded schedules from {schedules_file}") - + beat_schedule = {} - for schedule_config in config.get('schedules', []): + # Tolerate both a missing `schedules:` key and an explicit `schedules: null`. + schedules_list = config.get('schedules') or [] + for schedule_config in schedules_list: + # Skip empty list items (e.g. a `- ` with nothing after it). + if not schedule_config: + continue if not schedule_config.get('enabled', True): logger.info(f"Skipping disabled schedule: {schedule_config['name']}") continue diff --git a/cc-registry-v2/k8s/scheduler-deployment.yaml b/cc-registry-v2/k8s/scheduler-deployment.yaml index 8d5fc065b13a..755b2a28c9d2 100644 --- a/cc-registry-v2/k8s/scheduler-deployment.yaml +++ b/cc-registry-v2/k8s/scheduler-deployment.yaml @@ -298,45 +298,28 @@ spec: spec: containers: - name: flower - image: mher/flower:2.0 - imagePullPolicy: IfNotPresent + # Use the backend image so Flower loads our `app.tasks.celery_app` + # module, which has the Sentinel-aware broker URL builder. This + # avoids duplicating broker config in a shell entrypoint (which + # historically produced `redis://...@redis-sentinel:26379/0` and + # spammed "Only HELLO messages are accepted by Sentinel + # instances" on every event capture). + image: us-docker.pkg.dev/runwhen-nonprod-shared/public-images/cc-registry-v2-backend:latest + imagePullPolicy: Always command: - - sh - - -c - - | - set -ex - - echo "=== Flower Sentinel Configuration ===" - echo "REDIS_SENTINEL_HOSTS: $REDIS_SENTINEL_HOSTS" - echo "REDIS_SENTINEL_MASTER: $REDIS_SENTINEL_MASTER" - echo "REDIS_DB: $REDIS_DB" - - # Extract first sentinel for the broker URL placeholder - SENTINEL_HOST=$(echo "$REDIS_SENTINEL_HOSTS" | cut -d',' -f1 | cut -d':' -f1 | tr -d ' ') - SENTINEL_PORT=$(echo "$REDIS_SENTINEL_HOSTS" | cut -d',' -f1 | cut -d':' -f2 | tr -d ' ') - SENTINEL_PORT="${SENTINEL_PORT:-26379}" - DB="${REDIS_DB:-0}" - - # Construct broker URL - Celery/Kombu will use transport_options for Sentinel discovery - if [ ! -z "$REDIS_PASSWORD" ]; then - BROKER_URL="redis://:$REDIS_PASSWORD@$SENTINEL_HOST:$SENTINEL_PORT/$DB" - else - BROKER_URL="redis://$SENTINEL_HOST:$SENTINEL_PORT/$DB" - fi - - echo "Broker URL: redis://:**@$SENTINEL_HOST:$SENTINEL_PORT/$DB" - - # Pass Sentinel configuration via environment for Celery to pick up - export CELERY_BROKER_TRANSPORT_OPTIONS='{"master_name":"'$REDIS_SENTINEL_MASTER'","sentinels":[["'$SENTINEL_HOST'",'$SENTINEL_PORT']],"password":"'$REDIS_PASSWORD'","db":'$DB',"socket_timeout":1.0,"socket_connect_timeout":1.0}' - - echo "Transport options configured for Sentinel" - - exec celery --broker="$BROKER_URL" flower + - celery + - -A + - app.tasks.celery_app + - flower + - --port=5555 ports: - containerPort: 5555 name: http protocol: TCP env: + # These match the scheduler/worker exactly; the Sentinel branch + # in _configure_broker_url() consumes them to build a proper + # sentinel:// URL + transport_options. - name: REDIS_SENTINEL_HOSTS value: "redis-sentinel:26379" - name: REDIS_SENTINEL_MASTER From eeb70893dbe3194bc1549cc2b46afac1919658f1 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 12:29:48 -0400 Subject: [PATCH 10/13] Improve error handling in Celery tasks and enhance logging - Refactored exception handling in multiple Celery tasks to utilize `logger.exception`, capturing full tracebacks and ensuring tasks are marked as FAILURE in case of errors. - Removed error message returns in favor of raising exceptions, aligning with Celery's error handling practices. - Updated YAML data loading tasks to raise exceptions for missing configurations, preventing silent failures and improving task reliability. - Added a new AdminCCVersions component to the frontend for better management of CodeCollection versions. --- .../backend/app/tasks/analytics_tasks.py | 13 +- .../backend/app/tasks/parse_user_vars_only.py | 13 +- .../backend/app/tasks/raw_data_tasks.py | 11 +- .../backend/app/tasks/registry_tasks.py | 36 +- cc-registry-v2/frontend/src/pages/Admin.tsx | 6 + .../frontend/src/pages/AdminCCVersions.tsx | 582 ++++++++++++++++++ cc-registry-v2/frontend/src/services/api.ts | 16 + 7 files changed, 648 insertions(+), 29 deletions(-) create mode 100644 cc-registry-v2/frontend/src/pages/AdminCCVersions.tsx diff --git a/cc-registry-v2/backend/app/tasks/analytics_tasks.py b/cc-registry-v2/backend/app/tasks/analytics_tasks.py index 17fd9bb25494..4f14254acf13 100644 --- a/cc-registry-v2/backend/app/tasks/analytics_tasks.py +++ b/cc-registry-v2/backend/app/tasks/analytics_tasks.py @@ -216,12 +216,13 @@ def compute_task_growth_analytics(self): "months_generated": len(months) } - except Exception as e: - logger.error(f"Error computing task growth analytics: {e}", exc_info=True) + except Exception: + # logger.exception captures the full traceback. Bare `raise` + # re-throws so Celery marks the task FAILURE — task_executions + # then records error_message + traceback via task_monitor (see + # task_failure_handler in celery_app.py). + logger.exception("Error computing task growth analytics") db.rollback() - return { - "status": "error", - "message": str(e) - } + raise finally: db.close() diff --git a/cc-registry-v2/backend/app/tasks/parse_user_vars_only.py b/cc-registry-v2/backend/app/tasks/parse_user_vars_only.py index 361419ec9863..e1ffa9b62c5f 100644 --- a/cc-registry-v2/backend/app/tasks/parse_user_vars_only.py +++ b/cc-registry-v2/backend/app/tasks/parse_user_vars_only.py @@ -92,12 +92,13 @@ def parse_user_variables_task(self): 'message': f'Successfully parsed user variables from {processed} files, updated {updated} codebundles' } - except Exception as e: - logger.error(f"Failed to parse user variables: {e}", exc_info=True) + except Exception: + # logger.exception captures the full traceback. Bare `raise` + # re-throws so Celery marks the task FAILURE instead of + # SUCCESS-with-error-payload (which task_executions and the + # Admin UI both interpret as a successful run). + logger.exception("Failed to parse user variables") db.rollback() - return { - 'status': 'error', - 'message': str(e) - } + raise finally: db.close() diff --git a/cc-registry-v2/backend/app/tasks/raw_data_tasks.py b/cc-registry-v2/backend/app/tasks/raw_data_tasks.py index ef4245dba5d6..b0a24a0b0381 100644 --- a/cc-registry-v2/backend/app/tasks/raw_data_tasks.py +++ b/cc-registry-v2/backend/app/tasks/raw_data_tasks.py @@ -28,15 +28,20 @@ def store_yaml_data_task(self, yaml_data: Dict[str, Any] = None): try: logger.info(f"Starting YAML data storage task {self.request.id}") - # Use provided YAML data or load from file as fallback + # Use provided YAML data or load from file as fallback. Missing + # config is a hard failure — re-raise so the task is recorded as + # FAILURE in Celery + task_executions instead of silently + # returning SUCCESS with an error payload nobody checks. if not yaml_data: yaml_path = "/app/codecollections.yaml" try: with open(yaml_path, 'r') as file: yaml_data = yaml.safe_load(file) except FileNotFoundError: - logger.error("No YAML data provided and file not found") - return {'status': 'error', 'message': 'No YAML data available'} + logger.exception( + f"No YAML data provided and file not found at {yaml_path}" + ) + raise db = SessionLocal() try: diff --git a/cc-registry-v2/backend/app/tasks/registry_tasks.py b/cc-registry-v2/backend/app/tasks/registry_tasks.py index bb4e774232fe..48c5c9711b2e 100644 --- a/cc-registry-v2/backend/app/tasks/registry_tasks.py +++ b/cc-registry-v2/backend/app/tasks/registry_tasks.py @@ -53,12 +53,15 @@ def sync_all_collections_task(self): """ try: logger.info(f"Starting sync_all_collections_task {self.request.id}") - - # Load YAML + + # Load YAML. Missing config is a hard failure — raise so the task + # is recorded as FAILURE in Celery + task_executions, rather than + # silently returning SUCCESS with an error payload that nobody + # checks. See AGENTS.md "task error handling". yaml_path = "/app/codecollections.yaml" if not os.path.exists(yaml_path): - return {"status": "error", "message": f"YAML file not found: {yaml_path}"} - + raise FileNotFoundError(f"codecollections.yaml not found at {yaml_path}") + with open(yaml_path, 'r') as file: yaml_data = yaml.safe_load(file) @@ -120,13 +123,18 @@ def sync_all_collections_task(self): logger.info(f"Synced {collections_synced} collections") return {"status": "success", "collections_synced": collections_synced} - + finally: db.close() - - except Exception as e: - logger.error(f"sync_all_collections_task failed: {e}") - return {"status": "error", "message": str(e)} + + except Exception: + # logger.exception captures the full traceback into the log. + # The bare `raise` re-throws the original exception so Celery + # marks the task FAILURE (which task_failure_handler in + # celery_app.py persists to task_executions.error_message + + # task_executions.traceback via task_monitor.update_task_status). + logger.exception("sync_all_collections_task failed") + raise @celery_app.task(bind=True) def parse_all_codebundles_task(self): @@ -284,13 +292,13 @@ def parse_all_codebundles_task(self): "codebundles_created": codebundles_created, "codebundles_updated": codebundles_updated } - + finally: db.close() - - except Exception as e: - logger.error(f"parse_all_codebundles_task failed: {e}") - return {"status": "error", "message": str(e)} + + except Exception: + logger.exception("parse_all_codebundles_task failed") + raise def _get_git_last_commit_date(repo_path: str, folder_path: str) -> Optional[datetime]: """Get the last commit date for files in a folder, excluding meta.yml""" diff --git a/cc-registry-v2/frontend/src/pages/Admin.tsx b/cc-registry-v2/frontend/src/pages/Admin.tsx index 959cabea9b8d..bef421031ff0 100644 --- a/cc-registry-v2/frontend/src/pages/Admin.tsx +++ b/cc-registry-v2/frontend/src/pages/Admin.tsx @@ -16,6 +16,7 @@ import { } from '@mui/material'; import { apiService } from '../services/api'; import AdminInventory from './AdminInventory'; +import AdminCCVersions from './AdminCCVersions'; const Admin: React.FC = () => { const [token, setToken] = useState('admin-dev-token'); @@ -175,6 +176,7 @@ const Admin: React.FC = () => { + {currentTab === 0 && ( @@ -492,6 +494,10 @@ const Admin: React.FC = () => { )} )} + + {currentTab === 3 && ( + + )} ); }; diff --git a/cc-registry-v2/frontend/src/pages/AdminCCVersions.tsx b/cc-registry-v2/frontend/src/pages/AdminCCVersions.tsx new file mode 100644 index 000000000000..2ce8cdaf0d22 --- /dev/null +++ b/cc-registry-v2/frontend/src/pages/AdminCCVersions.tsx @@ -0,0 +1,582 @@ +/** + * AdminCCVersions — admin view of the PAPI-facing CodeCollection + * image catalog. Renders the data exposed at: + * + * GET /api/v1/catalog/codecollections + * GET /api/v1/catalog/codecollections/{slug} + * + * The catalog is populated by `sync_image_tags_task` (see CCV.md), which + * polls each CC's configured OCI registry on a schedule and upserts a + * CodeCollectionVersion row per discovered ref. This page is purely + * read-only — for manual sync triggers, use the Schedules tab or the + * Data Management tab. + */ +import React, { useState, useEffect, useCallback } from 'react'; +import { + Box, + Container, + Typography, + Card, + CardContent, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + Chip, + Button, + TextField, + Select, + MenuItem, + FormControl, + FormControlLabel, + Switch, + InputLabel, + GridLegacy as Grid, + Alert, + CircularProgress, + Dialog, + DialogTitle, + DialogContent, + DialogActions, + IconButton, + Tooltip, + Link, +} from '@mui/material'; +import { + FilterList as FilterIcon, + Search as SearchIcon, + Refresh as RefreshIcon, + Visibility as ViewIcon, + Inventory2 as CatalogIcon, + OpenInNew as ExternalLinkIcon, +} from '@mui/icons-material'; +import { apiService } from '../services/api'; + +// Mirrors the Pydantic schemas in app/schemas/cc_catalog.py. Keep field +// names aligned with backend — these are part of the PAPI contract. +interface ImageRef { + ref: string; + ref_type: string; + image_registry: string | null; + image_tag: string; + image_digest: string | null; + commit_hash: string | null; + rt_revision: string | null; + image_built_at: string | null; + is_latest: boolean; + is_prerelease: boolean; + is_active: boolean; + synced_at: string | null; +} + +interface CatalogEntry { + slug: string; + name: string; + git_url: string; + visibility: string; + latest_image_tag: string | null; + stable_image_tag: string | null; + image_registry: string | null; + last_synced: string | null; +} + +interface CatalogEntryDetail extends CatalogEntry { + refs: ImageRef[]; +} + +const formatTime = (iso: string | null): string => { + if (!iso) return '—'; + try { + const d = new Date(iso); + return d.toLocaleString(); + } catch { + return iso; + } +}; + +const truncate = (s: string | null | undefined, n = 12): string => { + if (!s) return '—'; + return s.length > n ? `${s.slice(0, n)}…` : s; +}; + +const AdminCCVersions: React.FC = () => { + const [entries, setEntries] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + // Filters + const [searchTerm, setSearchTerm] = useState(''); + const [visibilityFilter, setVisibilityFilter] = useState<'' | 'public' | 'hidden'>(''); + const [onlyWithImage, setOnlyWithImage] = useState(true); + + // Detail dialog + const [detail, setDetail] = useState(null); + const [detailLoading, setDetailLoading] = useState(false); + const [detailOpen, setDetailOpen] = useState(false); + + const loadEntries = useCallback(async () => { + try { + setLoading(true); + setError(null); + const params: Record = { + only_with_image: onlyWithImage, + }; + if (visibilityFilter) params.visibility = visibilityFilter; + const data: CatalogEntry[] = await apiService.getCatalogList(params); + setEntries(data); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + console.error('Failed to load catalog:', err); + setError(`Failed to load image catalog: ${msg}`); + } finally { + setLoading(false); + } + }, [onlyWithImage, visibilityFilter]); + + useEffect(() => { + loadEntries(); + }, [loadEntries]); + + const filteredEntries = entries.filter((e) => { + if (!searchTerm) return true; + const q = searchTerm.toLowerCase(); + return ( + e.slug.toLowerCase().includes(q) || + e.name.toLowerCase().includes(q) || + (e.image_registry ?? '').toLowerCase().includes(q) + ); + }); + + // --------------------------------------------------------------------------- + // Derived stats + // --------------------------------------------------------------------------- + const totalTracked = entries.length; + const publicCount = entries.filter((e) => e.visibility === 'public').length; + const hiddenCount = entries.filter((e) => e.visibility === 'hidden').length; + const mostRecentSync = entries.reduce((acc, e) => { + if (!e.last_synced) return acc; + if (!acc) return e.last_synced; + return new Date(e.last_synced) > new Date(acc) ? e.last_synced : acc; + }, null); + + const openDetail = async (slug: string) => { + try { + setDetailLoading(true); + setDetail(null); + setDetailOpen(true); + const data: CatalogEntryDetail = await apiService.getCatalogDetail(slug); + setDetail(data); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + console.error('Failed to load CC detail:', err); + setError(`Failed to load details for ${slug}: ${msg}`); + setDetailOpen(false); + } finally { + setDetailLoading(false); + } + }; + + const closeDetail = () => { + setDetailOpen(false); + setDetail(null); + }; + + const refTypeColor = (t: string) => { + switch (t) { + case 'tag': + case 'release': + return 'success'; + case 'branch': + return 'info'; + default: + return 'default'; + } + }; + + return ( + + {/* Header */} + + + + CodeCollection Image Catalog + + + PAPI-facing image catalog: tracked OCI tags per CodeCollection, populated by{' '} + sync_image_tags_task. Read-only view of{' '} + /api/v1/catalog/codecollections. + + + + {/* Stats */} + + + + + + Tracked CCs + + {totalTracked} + + {onlyWithImage ? 'with ≥1 image' : 'all'} + + + + + + + + + Public + + {publicCount} + + + + + + + + Hidden + + {hiddenCount} + + synced for PAPI only + + + + + + + + + Most recent sync + + + {formatTime(mostRecentSync)} + + + + + + + {/* Filters */} + + + + + Filters + + + + setSearchTerm(e.target.value)} + InputProps={{ + endAdornment: ( + + + + ), + }} + /> + + + + Visibility + + + + + setOnlyWithImage(e.target.checked)} + /> + } + label="Only with image" + /> + + + + + + + + + {/* Error / Loading / Empty / Table */} + {error && ( + setError(null)}> + {error} + + )} + + {loading ? ( + + + + ) : filteredEntries.length === 0 ? ( + + No catalog entries match the current filters. If you expect rows here, + confirm that sync_image_tags_task has run successfully + (Schedules tab → manual trigger). + + ) : ( + + + + + CodeCollection + Visibility + Image Registry + Latest + Stable + Last Synced + Actions + + + + {filteredEntries.map((e) => ( + + + + {e.name} + + + {e.slug} + + + + + + + {e.image_registry ? ( + + {e.image_registry} + + ) : ( + + — + + )} + + + {e.latest_image_tag ? ( + + ) : ( + '—' + )} + + + {e.stable_image_tag ? ( + + ) : ( + '—' + )} + + + {formatTime(e.last_synced)} + + + + openDetail(e.slug)}> + + + + {e.git_url && ( + + + + + + )} + + + ))} + +
+
+ )} + + {/* Detail dialog: full ref list for the selected CC */} + + + {detail ? ( + <> + {detail.name}{' '} + + ({detail.slug}) + + + ) : ( + 'Loading…' + )} + + + {detailLoading || !detail ? ( + + + + ) : ( + <> + + + + Registry:{' '} + + {detail.image_registry ?? '—'} + + + + + + Latest: {detail.latest_image_tag ?? '—'} + + + + + Stable: {detail.stable_image_tag ?? '—'} + + + + + Last synced: {formatTime(detail.last_synced)} · {detail.refs.length} refs + + + + + + + + + Ref + Type + Image Tag + Digest + Commit + Runtime + Built + Flags + + + + {detail.refs.map((r) => ( + + {r.ref} + + + + {r.image_tag} + + + + {truncate(r.image_digest, 14)} + + + + + + + {truncate(r.commit_hash, 7)} + + + + + + + {truncate(r.rt_revision, 7)} + + + + + + {formatTime(r.image_built_at)} + + + + + {r.is_latest && ( + + )} + {r.is_prerelease && ( + + )} + {!r.is_active && ( + + )} + + + + ))} + +
+
+ + )} +
+ + + +
+
+ ); +}; + +export default AdminCCVersions; diff --git a/cc-registry-v2/frontend/src/services/api.ts b/cc-registry-v2/frontend/src/services/api.ts index 39b8fa85f2ac..e3c1e71e2fc2 100644 --- a/cc-registry-v2/frontend/src/services/api.ts +++ b/cc-registry-v2/frontend/src/services/api.ts @@ -622,6 +622,22 @@ export const apiService = { return response.data; }, + // CodeCollection Image Catalog (PAPI-facing, public/unauthenticated). + // These mirror app/routers/cc_catalog.py — read-only views of what + // the image-sync task has discovered in each CC's OCI registry. + async getCatalogList(params: { + visibility?: 'public' | 'hidden'; + only_with_image?: boolean; + } = {}) { + const response = await api.get('/catalog/codecollections', { params }); + return response.data; + }, + + async getCatalogDetail(slug: string) { + const response = await api.get(`/catalog/codecollections/${slug}`); + return response.data; + }, + // Version management endpoints async triggerSyncAllVersions(token: string) { const response = await api.post('/admin/sync-all-versions', {}, { From c24a8da8bcc208d23dfbec2698cf28b02ef79eb4 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 17:10:25 -0400 Subject: [PATCH 11/13] Refactor task management and configuration handling - Removed the legacy task management endpoints and integrated task triggering through the new schedules API, enhancing clarity and maintainability. - Updated the application to utilize environment-driven configuration for YAML file paths, allowing for directory-mounted ConfigMaps in Kubernetes, which auto-propagate updates without requiring pod restarts. - Cleaned up the frontend by removing the Task Manager page and adjusting navigation, as task triggering is now handled within the Schedules tab. - Enhanced documentation to reflect the new task management flow and configuration settings. --- cc-registry-v2/backend/app/core/config.py | 30 +- cc-registry-v2/backend/app/main.py | 3 +- cc-registry-v2/backend/app/routers/admin.py | 2 +- .../backend/app/routers/admin_crud.py | 5 +- .../backend/app/routers/raw_data.py | 2 +- cc-registry-v2/backend/app/routers/tasks.py | 319 -------- .../app/services/data_migration_service.py | 2 +- .../services/documentation_source_loader.py | 9 +- .../backend/app/tasks/celery_app.py | 19 +- .../backend/app/tasks/data_tasks.py | 3 +- .../backend/app/tasks/image_sync_tasks.py | 9 +- .../backend/app/tasks/raw_data_tasks.py | 2 +- .../backend/app/tasks/registry_tasks.py | 91 ++- cc-registry-v2/frontend/src/App.tsx | 9 - .../frontend/src/components/Header.tsx | 3 - cc-registry-v2/frontend/src/pages/Admin.tsx | 23 +- .../src/pages/AdminTaskOperations.tsx | 450 +++++++++++ .../frontend/src/pages/TaskManager.tsx | 735 ------------------ cc-registry-v2/frontend/src/services/api.ts | 78 +- cc-registry-v2/k8s/backend-deployment.yaml | 13 +- cc-registry-v2/k8s/scheduler-deployment.yaml | 16 +- cc-registry-v2/k8s/worker-deployment.yaml | 13 +- 22 files changed, 643 insertions(+), 1193 deletions(-) delete mode 100644 cc-registry-v2/backend/app/routers/tasks.py create mode 100644 cc-registry-v2/frontend/src/pages/AdminTaskOperations.tsx delete mode 100644 cc-registry-v2/frontend/src/pages/TaskManager.tsx diff --git a/cc-registry-v2/backend/app/core/config.py b/cc-registry-v2/backend/app/core/config.py index 931345516743..439a3b3e3b9e 100644 --- a/cc-registry-v2/backend/app/core/config.py +++ b/cc-registry-v2/backend/app/core/config.py @@ -69,7 +69,35 @@ class Settings(BaseSettings): # API Settings API_V1_STR: str = "/api/v1" PROJECT_NAME: str = "CodeCollection Registry" - + + # Config-file paths (codecollections.yaml, schedules.yaml, sources.yaml). + # + # Historically these were hardcoded to /app/.yaml and mounted from + # ConfigMaps using `subPath`, which has a critical limitation: subPath + # mounts do NOT receive ConfigMap updates from the kubelet, so any + # change to the ConfigMap requires a pod restart to take effect. This + # is the bug that made the stewartshea typo "stick" in registry-test + # even after the ConfigMap was fixed. + # + # By exposing these as env-driven settings, k8s deployments can mount + # each ConfigMap as a *directory* (no subPath) and point the env vars + # at the resulting paths (e.g. /etc/cc-registry/codecollections/codecollections.yaml). + # Directory mounts auto-update; subPath mounts don't. + # + # Defaults preserve the legacy /app/.yaml behavior so local dev + # (docker-compose bind-mounts) and existing deployments continue to + # work unchanged. + # + # CAVEAT: `schedules.yaml` is only loaded at Celery beat startup + # (see app/tasks/celery_app.py::load_schedules_from_yaml). Hot-reload + # via directory mount avoids the *deploy* step but you still need to + # restart the scheduler pod for new schedules to take effect. + # `codecollections.yaml` and `sources.yaml` are re-read on every task + # invocation and pick up changes within ~60s of a ConfigMap edit. + CODECOLLECTIONS_FILE: str = "/app/codecollections.yaml" + SCHEDULES_FILE: str = "/app/schedules.yaml" + SOURCES_FILE: str = "/app/sources.yaml" + @model_validator(mode='after') def construct_urls(self): """Construct DATABASE_URL and REDIS_URL from components if not provided""" diff --git a/cc-registry-v2/backend/app/main.py b/cc-registry-v2/backend/app/main.py index 7266ceac6f35..d332ba43587c 100644 --- a/cc-registry-v2/backend/app/main.py +++ b/cc-registry-v2/backend/app/main.py @@ -110,9 +110,8 @@ async def health_check(): } # Include routers -from app.routers import admin, tasks, raw_data, admin_crud, task_execution_admin, versions, task_management, admin_inventory, helm_charts, mcp_chat, chat_debug, github_issues, schedule_config, analytics, vector_search, intake, cc_catalog +from app.routers import admin, raw_data, admin_crud, task_execution_admin, versions, task_management, admin_inventory, helm_charts, mcp_chat, chat_debug, github_issues, schedule_config, analytics, vector_search, intake, cc_catalog app.include_router(admin.router) -app.include_router(tasks.router) app.include_router(raw_data.router) app.include_router(admin_crud.router) app.include_router(task_execution_admin.router, prefix="/api/v1") diff --git a/cc-registry-v2/backend/app/routers/admin.py b/cc-registry-v2/backend/app/routers/admin.py index 20b4ad8630c7..3fe05b7e36c2 100644 --- a/cc-registry-v2/backend/app/routers/admin.py +++ b/cc-registry-v2/backend/app/routers/admin.py @@ -66,7 +66,7 @@ async def trigger_data_population(token: str = Depends(verify_admin_token)): logger.info("Starting data population triggered by admin") # Step 1: Load YAML data - yaml_path = "/app/codecollections.yaml" + yaml_path = settings.CODECOLLECTIONS_FILE if not os.path.exists(yaml_path): raise HTTPException(status_code=404, detail=f"YAML file not found: {yaml_path}") diff --git a/cc-registry-v2/backend/app/routers/admin_crud.py b/cc-registry-v2/backend/app/routers/admin_crud.py index dc3cc683da34..484d11218b1f 100644 --- a/cc-registry-v2/backend/app/routers/admin_crud.py +++ b/cc-registry-v2/backend/app/routers/admin_crud.py @@ -8,6 +8,7 @@ from pydantic import BaseModel from datetime import datetime, timedelta +from app.core.config import settings from app.core.database import get_db from app.models import CodeCollection, Codebundle, CodeCollectionMetrics, SystemMetrics from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials @@ -333,7 +334,7 @@ async def get_metrics( @router.post("/seed-from-yaml") async def seed_database_from_yaml( - yaml_file_path: str = "/app/codecollections.yaml", + yaml_file_path: str = settings.CODECOLLECTIONS_FILE, db: Session = Depends(get_db), _: dict = Depends(verify_admin_token) ): @@ -353,7 +354,7 @@ async def seed_database_from_yaml( @router.post("/validate-yaml-seed") async def validate_yaml_seed( - yaml_file_path: str = "/app/codecollections.yaml", + yaml_file_path: str = settings.CODECOLLECTIONS_FILE, db: Session = Depends(get_db), _: dict = Depends(verify_admin_token) ): diff --git a/cc-registry-v2/backend/app/routers/raw_data.py b/cc-registry-v2/backend/app/routers/raw_data.py index 10845fcaf934..0729dda19efa 100644 --- a/cc-registry-v2/backend/app/routers/raw_data.py +++ b/cc-registry-v2/backend/app/routers/raw_data.py @@ -34,7 +34,7 @@ async def store_yaml_data(db: Session = Depends(get_db)): logger.info("Starting store_yaml_data endpoint") # Load YAML data - yaml_path = "/app/codecollections.yaml" + yaml_path = settings.CODECOLLECTIONS_FILE logger.info(f"Reading YAML file from: {yaml_path}") if not os.path.exists(yaml_path): diff --git a/cc-registry-v2/backend/app/routers/tasks.py b/cc-registry-v2/backend/app/routers/tasks.py deleted file mode 100644 index 8cacb6fba714..000000000000 --- a/cc-registry-v2/backend/app/routers/tasks.py +++ /dev/null @@ -1,319 +0,0 @@ -""" -Registry Task Management Endpoints -- Register CodeCollections, index repositories, parse codebundles -- Enhance metadata with AI, generate system metrics -""" -from fastapi import APIRouter, Depends, HTTPException -from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials -from typing import Dict, Any, List, Optional -from pydantic import BaseModel -from celery.result import AsyncResult - -from app.tasks.registry_tasks import celery_app -from app.services.task_monitoring_service import task_monitor - -# Simple token-based auth for now -security = HTTPBearer() - -def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(security)): - """Verify admin token - in production, use proper JWT or OAuth""" - if not credentials.credentials.startswith('admin-'): - raise HTTPException(status_code=401, detail="Invalid admin token") - return credentials.credentials - -router = APIRouter(prefix="/api/v1/tasks", tags=["registry-tasks"]) - - -class TaskRequest(BaseModel): - collection_ids: Optional[List[int]] = None - collection_slugs: Optional[List[str]] = None - - -class TaskResponse(BaseModel): - task_id: str - status: str - message: str - - -class TaskStatusResponse(BaseModel): - task_id: str - status: str - result: Optional[Dict[str, Any]] = None - progress: Optional[Dict[str, Any]] = None - error: Optional[str] = None - - -@router.post("/seed-database", response_model=TaskResponse) -async def trigger_seed_database( - yaml_file_path: str = "/app/codecollections.yaml", - _: dict = Depends(verify_admin_token) -): - """SEED: Trigger the canonical sync workflow (sync → parse → enhance)""" - try: - from app.tasks.workflow_tasks import sync_parse_enhance_workflow_task - - task = sync_parse_enhance_workflow_task.apply_async() - - return TaskResponse( - task_id=task.id, - status="started", - message="Database seeding started (sync → parse → enhance workflow)" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/validate-yaml", response_model=TaskResponse) -async def trigger_validate_yaml( - yaml_file_path: str = "/app/codecollections.yaml", - _: dict = Depends(verify_admin_token) -): - """VALIDATE: Ensure YAML entries exist in database by syncing collections""" - try: - from app.tasks.registry_tasks import sync_all_collections_task - - task = sync_all_collections_task.apply_async() - - return TaskResponse( - task_id=task.id, - status="started", - message="YAML validation started (syncing collections from YAML)" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/sync-collections", response_model=TaskResponse) -async def trigger_sync_collections( - _: dict = Depends(verify_admin_token) -): - """SYNC: Read from database and sync all active collections""" - try: - from app.tasks.registry_tasks import sync_all_collections_task - - task = sync_all_collections_task.apply_async() - - # Don't create task record here - the task creates its own record - # (Avoids duplicate key constraint violation) - - return TaskResponse( - task_id=task.id, - status="started", - message="Database-driven collection sync started" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/sync-collection/{collection_id}", response_model=TaskResponse) -async def trigger_sync_single_collection( - collection_id: int, - _: dict = Depends(verify_admin_token) -): - """SYNC: Sync a single collection by ID""" - try: - from app.tasks.registry_tasks import sync_single_collection_task - - task = sync_single_collection_task.apply_async(args=[collection_id]) - - # Don't create task record here - the task creates its own record - # (Avoids duplicate key constraint violation) - - return TaskResponse( - task_id=task.id, - status="started", - message=f"Sync started for collection ID {collection_id}" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/sync-image-tags", response_model=TaskResponse) -async def trigger_sync_image_tags( - _: dict = Depends(verify_admin_token) -): - """SYNC: Refresh the CCV image catalog by polling each CC's image_source. - - Reads codecollections.yaml, runs the configured ImageSource plugin - (e.g. OCI) per CC, and upserts CodeCollectionVersion rows with the - discovered image_tag / image_registry / commit_hash / rt_revision. - - Idempotent. Safe to call on demand from the admin UI when waiting on - the 5-minute beat schedule is too slow, or when debugging why - /api/v1/catalog is missing image data. - - See docs/CCV.md for the full pipeline. - """ - try: - from app.tasks.image_sync_tasks import sync_image_tags_task - - task = sync_image_tags_task.apply_async() - - return TaskResponse( - task_id=task.id, - status="started", - message="Image-tag catalog sync started", - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/parse-codebundles", response_model=TaskResponse) -async def trigger_parse_codebundles( - _: dict = Depends(verify_admin_token) -): - """PARSE: Parse codebundles from stored repository data""" - try: - from app.tasks.registry_tasks import parse_all_codebundles_task - - task = parse_all_codebundles_task.apply_async() - - # Don't create task record here - the task creates its own record - # (Avoids duplicate key constraint violation) - - return TaskResponse( - task_id=task.id, - status="started", - message="Codebundle parsing from database started" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/parse-collection/{collection_id}", response_model=TaskResponse) -async def trigger_parse_collection_codebundles( - collection_id: int, - _: dict = Depends(verify_admin_token) -): - """PARSE: Parse codebundles for a single collection""" - try: - from app.tasks.registry_tasks import parse_collection_codebundles_task - - task = parse_collection_codebundles_task.apply_async(args=[collection_id]) - - return TaskResponse( - task_id=task.id, - status="started", - message=f"Codebundle parsing started for collection ID {collection_id}" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/enhance-codebundles", response_model=TaskResponse) -async def trigger_enhance_codebundles( - _: dict = Depends(verify_admin_token) -): - """ENHANCE: Use AI to enhance all codebundle metadata""" - try: - from app.tasks.registry_tasks import enhance_all_codebundles_task - - task = enhance_all_codebundles_task.apply_async() - - return TaskResponse( - task_id=task.id, - status="started", - message="AI enhancement of codebundles started" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/enhance-codebundle/{codebundle_id}", response_model=TaskResponse) -async def trigger_enhance_single_codebundle( - codebundle_id: int, - _: dict = Depends(verify_admin_token) -): - """ENHANCE: Use AI to enhance a single codebundle""" - try: - from app.tasks.registry_tasks import enhance_single_codebundle_task - - task = enhance_single_codebundle_task.apply_async(args=[codebundle_id]) - - return TaskResponse( - task_id=task.id, - status="started", - message=f"AI enhancement started for codebundle ID {codebundle_id}" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.post("/generate-metrics", response_model=TaskResponse) -async def trigger_generate_metrics( - _: dict = Depends(verify_admin_token) -): - """METRICS: Generate system and collection metrics""" - try: - from app.tasks.registry_tasks import generate_metrics_task - - task = generate_metrics_task.apply_async() - - return TaskResponse( - task_id=task.id, - status="started", - message="Metrics generation started" - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.get("/status/{task_id}", response_model=TaskStatusResponse) -async def get_task_status( - task_id: str, - _: dict = Depends(verify_admin_token) -): - """Get status of a specific task""" - try: - result = AsyncResult(task_id, app=celery_app) - - response = TaskStatusResponse( - task_id=task_id, - status=result.status - ) - - if result.ready(): - if result.successful(): - response.result = result.result - else: - response.error = str(result.result) - else: - # Get progress info if available - if hasattr(result, 'info') and isinstance(result.info, dict): - response.progress = result.info - - return response - - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -@router.get("/health") -async def health_check(): - """Health check for registry task system""" - try: - # Test Celery connection - inspect = celery_app.control.inspect() - stats = inspect.stats() - - if stats: - return { - "status": "healthy", - "celery_status": "connected", - "workers": list(stats.keys()), - "task_system": "registry_tasks" - } - else: - return { - "status": "unhealthy", - "celery_status": "no_workers", - "task_system": "registry_tasks" - } - - except Exception as e: - return { - "status": "unhealthy", - "celery_status": "disconnected", - "error": str(e), - "task_system": "database_driven" - } \ No newline at end of file diff --git a/cc-registry-v2/backend/app/services/data_migration_service.py b/cc-registry-v2/backend/app/services/data_migration_service.py index 2b3a98f728a6..2c79da75c68c 100644 --- a/cc-registry-v2/backend/app/services/data_migration_service.py +++ b/cc-registry-v2/backend/app/services/data_migration_service.py @@ -41,7 +41,7 @@ def populate_registry_data(self) -> Dict[str, Any]: def _read_codecollections_yaml(self) -> Dict[str, Any]: """Read the original codecollections.yaml file""" - yaml_path = "/app/codecollections.yaml" + yaml_path = settings.CODECOLLECTIONS_FILE if not os.path.exists(yaml_path): raise FileNotFoundError(f"codecollections.yaml not found at {yaml_path}") diff --git a/cc-registry-v2/backend/app/services/documentation_source_loader.py b/cc-registry-v2/backend/app/services/documentation_source_loader.py index 7ac51336354a..e1a53930fcd8 100644 --- a/cc-registry-v2/backend/app/services/documentation_source_loader.py +++ b/cc-registry-v2/backend/app/services/documentation_source_loader.py @@ -1,8 +1,10 @@ """ Load documentation sources from sources.yaml and crawl their content. -sources.yaml lives in cc-registry-v2/ alongside schedules.yaml and is -mounted into the backend container at /app/sources.yaml. +sources.yaml lives in cc-registry-v2/ alongside schedules.yaml. The path +is configurable via settings.SOURCES_FILE (env: SOURCES_FILE) so k8s +deployments can directory-mount the ConfigMap (no subPath) and pick up +changes without restarting the pod. """ import logging from pathlib import Path @@ -10,12 +12,13 @@ import yaml +from app.core.config import settings from app.services.web_crawler import WebCrawler logger = logging.getLogger(__name__) SOURCES_PATHS = [ - Path("/app/sources.yaml"), + Path(settings.SOURCES_FILE), Path("/workspaces/codecollection-registry/cc-registry-v2/sources.yaml"), ] diff --git a/cc-registry-v2/backend/app/tasks/celery_app.py b/cc-registry-v2/backend/app/tasks/celery_app.py index ca20af6706a8..d9c210aa93be 100644 --- a/cc-registry-v2/backend/app/tasks/celery_app.py +++ b/cc-registry-v2/backend/app/tasks/celery_app.py @@ -132,14 +132,21 @@ def _configure_broker_url(): # Load schedules from YAML file def load_schedules_from_yaml(): - """Load schedule configuration from schedules.yaml""" - # Try multiple possible locations for the YAML file + """Load schedule configuration from schedules.yaml. + + Path resolution: + 1. settings.SCHEDULES_FILE — usually /app/schedules.yaml in containers + (set via env var SCHEDULES_FILE in k8s to point at a directory-mounted + ConfigMap path like /etc/cc-registry/schedules/schedules.yaml). + 2. Relative fallback for running directly from a source checkout. + 3. /workspaces dev fallback for the in-tree dev container. + """ possible_paths = [ - '/app/schedules.yaml', # Docker container path - os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'schedules.yaml'), # Relative to backend/app/tasks - '/workspaces/codecollection-registry/cc-registry-v2/schedules.yaml', # Development path + settings.SCHEDULES_FILE, + os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'schedules.yaml'), + '/workspaces/codecollection-registry/cc-registry-v2/schedules.yaml', ] - + schedules_file = None for path in possible_paths: if os.path.exists(path): diff --git a/cc-registry-v2/backend/app/tasks/data_tasks.py b/cc-registry-v2/backend/app/tasks/data_tasks.py index 94fd6b65f14a..1e9c1f0b9e2e 100644 --- a/cc-registry-v2/backend/app/tasks/data_tasks.py +++ b/cc-registry-v2/backend/app/tasks/data_tasks.py @@ -6,6 +6,7 @@ from typing import Dict, Any, List from celery import current_task from app.tasks.celery_app import celery_app +from app.core.config import settings from app.core.database import SessionLocal from app.models import RawYamlData, RawRepositoryData, CodeCollection, Codebundle from app.services.robot_parser import parse_all_robot_files @@ -24,7 +25,7 @@ def store_yaml_data_task(self, yaml_content: str = None): # Read codecollections.yaml if not provided if not yaml_content: - yaml_file_path = "/app/codecollections.yaml" + yaml_file_path = settings.CODECOLLECTIONS_FILE logger.info(f"Reading YAML file from: {yaml_file_path}") if not os.path.exists(yaml_file_path): logger.error(f"YAML file not found: {yaml_file_path}") diff --git a/cc-registry-v2/backend/app/tasks/image_sync_tasks.py b/cc-registry-v2/backend/app/tasks/image_sync_tasks.py index facec3a031c4..164d3f8d397c 100644 --- a/cc-registry-v2/backend/app/tasks/image_sync_tasks.py +++ b/cc-registry-v2/backend/app/tasks/image_sync_tasks.py @@ -26,6 +26,7 @@ import yaml +from app.core.config import settings from app.core.database import SessionLocal from app.models import CodeCollection from app.models.version import CodeCollectionVersion @@ -36,9 +37,13 @@ def _load_codecollections_yaml() -> list[dict]: - """Locate codecollections.yaml in the same order other tasks do.""" + """Locate codecollections.yaml. + + Primary path is `settings.CODECOLLECTIONS_FILE` (env-overridable); + fallbacks cover source-checkout and devcontainer layouts. + """ candidate_paths = [ - "/app/codecollections.yaml", + settings.CODECOLLECTIONS_FILE, os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "..", diff --git a/cc-registry-v2/backend/app/tasks/raw_data_tasks.py b/cc-registry-v2/backend/app/tasks/raw_data_tasks.py index b0a24a0b0381..10bb968464c4 100644 --- a/cc-registry-v2/backend/app/tasks/raw_data_tasks.py +++ b/cc-registry-v2/backend/app/tasks/raw_data_tasks.py @@ -33,7 +33,7 @@ def store_yaml_data_task(self, yaml_data: Dict[str, Any] = None): # FAILURE in Celery + task_executions instead of silently # returning SUCCESS with an error payload nobody checks. if not yaml_data: - yaml_path = "/app/codecollections.yaml" + yaml_path = settings.CODECOLLECTIONS_FILE try: with open(yaml_path, 'r') as file: yaml_data = yaml.safe_load(file) diff --git a/cc-registry-v2/backend/app/tasks/registry_tasks.py b/cc-registry-v2/backend/app/tasks/registry_tasks.py index 48c5c9711b2e..104045b7853c 100644 --- a/cc-registry-v2/backend/app/tasks/registry_tasks.py +++ b/cc-registry-v2/backend/app/tasks/registry_tasks.py @@ -12,7 +12,9 @@ from typing import Dict, Any, List, Optional from datetime import datetime from git import Repo +import requests +from app.core.config import settings from app.core.database import SessionLocal from app.models import CodeCollection, Codebundle, RawRepositoryData from app.models.version import CodeCollectionVersion @@ -43,6 +45,41 @@ def _parse_robot_file_content(content: str, file_path: str, collection_slug: str from app.tasks.fixed_parser import parse_robot_file_content return parse_robot_file_content(content, file_path, collection_slug) + +def _validate_git_url(git_url: str, collection_slug: str) -> Optional[str]: + """Sanity-check a CC's git_url at YAML ingestion time. + + Returns None if the URL resolves (HTTP 200/301/302), or an + error string if it doesn't. The check is intentionally lenient: + + - HEAD before GET (cheap; GitHub returns the same status codes). + - Short timeout (5s) — sync_all_collections_task runs ~30 CCs and + we don't want one DNS hang to stall the whole sync. + - Network errors are reported but never fatal: a transient outage + shouldn't make a sync fail. We log + collect the error and let + the rest of the task continue. + + This is the guard that would have caught the stewartshea typo + (`rw-cli-codecollectionn`) at ingestion time, before image-sync + started silently skipping the bad entry. + """ + if not git_url: + return "missing git_url" + if not git_url.startswith(("http://", "https://")): + return f"git_url {git_url!r} is not an http(s) URL" + try: + resp = requests.head(git_url, allow_redirects=True, timeout=5) + if resp.status_code in (200, 301, 302): + return None + if resp.status_code == 404: + return ( + f"git_url returned 404 — repository does not exist or is " + f"private without auth (got {git_url})" + ) + return f"git_url returned HTTP {resp.status_code} for {git_url}" + except requests.RequestException as e: + return f"git_url request failed: {type(e).__name__}: {e}" + @celery_app.task(bind=True) def sync_all_collections_task(self): """ @@ -50,6 +87,15 @@ def sync_all_collections_task(self): - Load codecollections.yaml - Create/update CodeCollection records in DB - Clone repositories to temp directory for parsing + + NOT updated here: `image_registry`. That field is not a column on + CodeCollection; image refs are stored per-version on + CodeCollectionVersion by `sync_image_tags_task`. That task reads + `image_registry` directly from codecollections.yaml on every run, so + edits to the YAML's `image_registry` field take effect the next time + `sync_image_tags_task` runs — no `CodeCollection` row update is + needed (or possible) here. See docs/CCV.md for the image-catalog + pipeline. """ try: logger.info(f"Starting sync_all_collections_task {self.request.id}") @@ -58,7 +104,7 @@ def sync_all_collections_task(self): # is recorded as FAILURE in Celery + task_executions, rather than # silently returning SUCCESS with an error payload that nobody # checks. See AGENTS.md "task error handling". - yaml_path = "/app/codecollections.yaml" + yaml_path = settings.CODECOLLECTIONS_FILE if not os.path.exists(yaml_path): raise FileNotFoundError(f"codecollections.yaml not found at {yaml_path}") @@ -67,19 +113,43 @@ def sync_all_collections_task(self): collections_data = yaml_data.get('codecollections', []) logger.info(f"Loaded {len(collections_data)} collections from YAML") - + collections_synced = 0 + # Collect per-CC ingestion warnings so the task result surfaces + # them in one place (task_executions.result), instead of forcing + # operators to scrape worker logs. Non-fatal: a single CC with a + # bad git_url shouldn't block the rest of the sync. + ingestion_warnings: List[Dict[str, str]] = [] db = SessionLocal() - + try: for collection_data in collections_data: collection_slug = collection_data.get('slug') git_url = collection_data.get('git_url') - + if not collection_slug or not git_url: logger.warning(f"Skipping collection with missing slug or git_url") + ingestion_warnings.append({ + "slug": collection_slug or "", + "error": "missing slug or git_url in codecollections.yaml", + }) continue - + + # Validate git_url is reachable. This catches typos in + # codecollections.yaml (e.g. `rw-cli-codecollectionn`) + # at the earliest possible point, instead of letting + # them silently break image-sync and other downstream + # consumers. Non-fatal — we still write the row so the + # admin UI surfaces the CC alongside its error. + url_err = _validate_git_url(git_url, collection_slug) + if url_err: + logger.warning(f"[{collection_slug}] git_url validation failed: {url_err}") + ingestion_warnings.append({ + "slug": collection_slug, + "git_url": git_url, + "error": url_err, + }) + # Create/update collection in DB collection = db.query(CodeCollection).filter( CodeCollection.slug == collection_slug @@ -121,8 +191,15 @@ def sync_all_collections_task(self): db.commit() collections_synced += 1 - logger.info(f"Synced {collections_synced} collections") - return {"status": "success", "collections_synced": collections_synced} + logger.info( + f"Synced {collections_synced} collections " + f"({len(ingestion_warnings)} ingestion warning(s))" + ) + return { + "status": "success", + "collections_synced": collections_synced, + "ingestion_warnings": ingestion_warnings, + } finally: db.close() diff --git a/cc-registry-v2/frontend/src/App.tsx b/cc-registry-v2/frontend/src/App.tsx index 7a93231db9f9..3307e0e82825 100644 --- a/cc-registry-v2/frontend/src/App.tsx +++ b/cc-registry-v2/frontend/src/App.tsx @@ -12,7 +12,6 @@ import VersionDetail from './pages/VersionDetail'; import AllTasks from './pages/AllTasks'; import TestAPI from './pages/TestAPI'; import Admin from './pages/Admin'; -import TaskManager from './pages/TaskManager'; import Login from './pages/Login'; import ConfigBuilder from './pages/ConfigBuilder'; import Chat from './pages/Chat'; @@ -60,14 +59,6 @@ function AppContent() { } /> - - - - } - /> } diff --git a/cc-registry-v2/frontend/src/components/Header.tsx b/cc-registry-v2/frontend/src/components/Header.tsx index 436afa5d6abd..8842d68b40ce 100644 --- a/cc-registry-v2/frontend/src/components/Header.tsx +++ b/cc-registry-v2/frontend/src/components/Header.tsx @@ -310,9 +310,6 @@ const Header: React.FC = () => { handleMenuNavigate('/chat-debug')}> Chat Debug - handleMenuNavigate('/tasks')}> - Task Manager - )} diff --git a/cc-registry-v2/frontend/src/pages/Admin.tsx b/cc-registry-v2/frontend/src/pages/Admin.tsx index bef421031ff0..647d70189fdb 100644 --- a/cc-registry-v2/frontend/src/pages/Admin.tsx +++ b/cc-registry-v2/frontend/src/pages/Admin.tsx @@ -17,6 +17,7 @@ import { import { apiService } from '../services/api'; import AdminInventory from './AdminInventory'; import AdminCCVersions from './AdminCCVersions'; +import AdminTaskOperations from './AdminTaskOperations'; const Admin: React.FC = () => { const [token, setToken] = useState('admin-dev-token'); @@ -144,31 +145,22 @@ const Admin: React.FC = () => { Admin Panel - {/* Quick Navigation */} + {/* Quick Navigation — Task Manager removed; task triggering lives in the + "Schedules" tab below, observability lives in the "Operations" tab. */} Quick Navigation - - - - Access specialized admin interfaces for task management and chat quality debugging - @@ -177,6 +169,7 @@ const Admin: React.FC = () => { + {currentTab === 0 && ( @@ -498,6 +491,10 @@ const Admin: React.FC = () => { {currentTab === 3 && ( )} + + {currentTab === 4 && ( + + )} ); }; diff --git a/cc-registry-v2/frontend/src/pages/AdminTaskOperations.tsx b/cc-registry-v2/frontend/src/pages/AdminTaskOperations.tsx new file mode 100644 index 000000000000..34cdc0e3ec76 --- /dev/null +++ b/cc-registry-v2/frontend/src/pages/AdminTaskOperations.tsx @@ -0,0 +1,450 @@ +/** + * AdminTaskOperations — task observability inside /admin. + * + * This is the read-side counterpart to the schedules-tab "Run Now" + * trigger surface. It shows: + * - Currently-running Celery tasks (auto-refreshing) + * - Recent task history with failure tracebacks expandable inline + * + * Triggering tasks lives EXCLUSIVELY on the Schedules tab — every + * triggerable task is declared in schedules.yaml (set `enabled: false` + * to keep it manual-only) and the Schedules tab auto-generates a + * "Run Now" button for each. There is intentionally no hardcoded + * trigger list here. + */ +import React, { useCallback, useEffect, useState } from 'react'; +import { + Alert, + Box, + Button, + Card, + CardContent, + Chip, + CircularProgress, + Collapse, + Dialog, + DialogActions, + DialogContent, + DialogTitle, + IconButton, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, +} from '@mui/material'; +import { + BugReport, + CheckCircle, + Error as ErrorIcon, + KeyboardArrowDown, + KeyboardArrowUp, + Refresh, + Schedule, +} from '@mui/icons-material'; +import { apiService } from '../services/api'; +import { useAuth, getAuthToken } from '../contexts/AuthContext'; + +interface RunningTask { + task_id: string; + task_name?: string; + status?: string; + progress?: number; + current_step?: string; + started_at?: string; + duration_seconds?: number; +} + +interface HistoryTask { + task_id: string; + task_name?: string; + status?: string; + is_successful?: boolean; + is_failed?: boolean; + started_at?: string; + completed_at?: string; + duration_seconds?: number; + triggered_by?: string; + error_message?: string; + traceback?: string; +} + +const statusIcon = (status?: string) => { + switch (status) { + case 'SUCCESS': + return ; + case 'FAILURE': + return ; + case 'PROGRESS': + return ; + case 'PENDING': + case 'STARTED': + return ; + default: + return ; + } +}; + +const statusColor = (status?: string, is_failed?: boolean, is_successful?: boolean) => { + if (is_successful) return 'success'; + if (is_failed) return 'error'; + switch (status) { + case 'SUCCESS': + return 'success'; + case 'FAILURE': + return 'error'; + case 'STARTED': + case 'PROGRESS': + return 'info'; + case 'PENDING': + return 'warning'; + default: + return 'default'; + } +}; + +const AdminTaskOperations: React.FC = () => { + const { isAuthenticated } = useAuth(); + const [running, setRunning] = useState([]); + const [history, setHistory] = useState([]); + const [loading, setLoading] = useState(false); + const [autoRefresh, setAutoRefresh] = useState(true); + const [error, setError] = useState(null); + const [expandedTask, setExpandedTask] = useState(null); + + const [tracebackTask, setTracebackTask] = useState(null); + + const refresh = useCallback(async () => { + if (!isAuthenticated) return; + const token = getAuthToken(); + if (!token) { + setError('Not authenticated'); + return; + } + + try { + setLoading(true); + setError(null); + + const [runningData, historyData] = await Promise.all([ + apiService.getRunningTasks(token), + apiService.getTaskHistory(token, 50, 0), + ]); + + setRunning(runningData.running_tasks || []); + setHistory(historyData.tasks || []); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + setError(`Failed to load task ops: ${msg}`); + } finally { + setLoading(false); + } + }, [isAuthenticated]); + + useEffect(() => { + refresh(); + if (!autoRefresh) return; + const interval = setInterval(refresh, 5000); + return () => clearInterval(interval); + }, [refresh, autoRefresh]); + + return ( + + {/* Controls */} + + + + + Trigger tasks from the Schedules tab — this view is + observability only. + + + + {error && ( + setError(null)}> + {error} + + )} + + {/* Running tasks */} + + + + Currently Running ({running.length}) + + {running.length === 0 ? ( + No tasks running right now. + ) : ( + + + + + Task + Status + Progress + Step + Started + Duration + + + + {running.map((t) => ( + + + + {t.task_name || t.task_id} + + + {t.task_id} + + + + + + + + + + {Math.round(t.progress ?? 0)}% + + + + + {t.current_step || '—'} + + + + {t.started_at ? new Date(t.started_at).toLocaleString() : '—'} + + + + + {t.duration_seconds != null + ? `${Math.round(t.duration_seconds)}s` + : t.started_at + ? `${Math.round( + (Date.now() - new Date(t.started_at).getTime()) / 1000, + )}s` + : '—'} + + + + ))} + +
+
+ )} +
+
+ + {/* History */} + + + + Recent History ({history.length}) + + {history.length === 0 ? ( + No completed task runs yet. + ) : ( + + + + + + Task + Status + Started + Duration + Triggered by + + + + {history.map((t) => { + const hasError = t.is_failed && t.error_message; + return ( + + *': { borderBottom: 'unset' } } : undefined} + > + + {hasError && ( + + setExpandedTask( + expandedTask === t.task_id ? null : t.task_id, + ) + } + > + {expandedTask === t.task_id ? ( + + ) : ( + + )} + + )} + + + + {t.task_name || t.task_id} + + + {t.task_id} + + + + + + + + {t.started_at ? new Date(t.started_at).toLocaleString() : '—'} + + + + + {t.duration_seconds != null + ? `${Math.round(t.duration_seconds)}s` + : '—'} + + + + + {t.triggered_by || 'system'} + + + + {hasError && ( + + + + + + + + Error + + + + {t.error_message} + + {t.traceback && ( + + )} + + + + + )} + + ); + })} + +
+
+ )} +
+
+ + {/* Traceback dialog */} + setTracebackTask(null)} + maxWidth="md" + fullWidth + > + + {tracebackTask?.task_name || 'Task'} — traceback + + + + {tracebackTask?.traceback || 'No traceback recorded.'} + + + + + + +
+ ); +}; + +export default AdminTaskOperations; diff --git a/cc-registry-v2/frontend/src/pages/TaskManager.tsx b/cc-registry-v2/frontend/src/pages/TaskManager.tsx deleted file mode 100644 index b80fe8806b0c..000000000000 --- a/cc-registry-v2/frontend/src/pages/TaskManager.tsx +++ /dev/null @@ -1,735 +0,0 @@ -import React, { useState, useEffect } from 'react'; -import { - Box, - Container, - Typography, - Button, - Card, - CardContent, - Alert, - CircularProgress, - Chip, - Table, - TableBody, - TableCell, - TableContainer, - TableHead, - TableRow, - Paper, - Tabs, - Tab, - Dialog, - DialogTitle, - DialogContent, - DialogActions, - IconButton, - Collapse, -} from '@mui/material'; -import { - PlayArrow, - Refresh, - CheckCircle, - Error as ErrorIcon, - Schedule, - KeyboardArrowDown, - KeyboardArrowUp, - BugReport, -} from '@mui/icons-material'; -import { apiService } from '../services/api'; -import { useAuth, getAuthToken } from '../contexts/AuthContext'; - -interface TaskStatus { - task_id: string; - status: string; - result?: any; - progress?: number; - current_step?: string; - started_at?: string; - is_ready?: boolean; - is_successful?: boolean; - is_failed?: boolean; - error?: string; - name?: string; - worker?: string; - eta?: string; -} - -interface TabPanelProps { - children?: React.ReactNode; - index: number; - value: number; -} - -function TabPanel(props: TabPanelProps) { - const { children, value, index, ...other } = props; - return ( - - ); -} - -const TaskManager: React.FC = () => { - const { user, isAuthenticated } = useAuth(); - const [activeTab, setActiveTab] = useState(0); - const [taskHistory, setTaskHistory] = useState([]); - const [runningTasks, setRunningTasks] = useState([]); - const [taskStats, setTaskStats] = useState(null); - const [taskMetrics, setTaskMetrics] = useState(null); - const [workerStats, setWorkerStats] = useState(null); - const [loading, setLoading] = useState(false); - const [error, setError] = useState(null); - const [message, setMessage] = useState(null); - const [autoRefresh, setAutoRefresh] = useState(true); - const [expandedTask, setExpandedTask] = useState(null); - const [errorDialogOpen, setErrorDialogOpen] = useState(false); - const [selectedTaskError, setSelectedTaskError] = useState<{ - task_name: string; - error_message?: string; - traceback?: string; - } | null>(null); - - const handleTabChange = (event: React.SyntheticEvent, newValue: number) => { - setActiveTab(newValue); - }; - - const refreshData = async () => { - if (!isAuthenticated) { - setError('Authentication required'); - return; - } - - try { - setLoading(true); - setError(null); - - const token = getAuthToken(); - if (!token) { - setError('No authentication token available'); - return; - } - - // Get live task data, task health and admin metrics - const [taskHistoryData, runningTasksData, taskStatsData, taskHealth, adminMetrics] = await Promise.all([ - apiService.getTaskHistory(token, 50, 0), - apiService.getRunningTasks(token), - apiService.getTaskStats(token, 7), - apiService.getTaskHealth(token), - apiService.getAdminMetrics(token, 7) - ]); - - setTaskHistory(taskHistoryData.tasks || []); - setRunningTasks(runningTasksData.running_tasks || []); - setTaskStats(taskStatsData); - setTaskMetrics(adminMetrics); - setWorkerStats(taskHealth); - - } catch (err) { - setError(`Failed to refresh data: ${err}`); - } finally { - setLoading(false); - } - }; - - const triggerTask = async (taskType: string, params: any = {}) => { - if (!isAuthenticated) { - setError('Authentication required'); - return; - } - - const token = getAuthToken(); - if (!token) { - setError('No authentication token available'); - return; - } - - try { - setLoading(true); - setError(null); - setMessage(null); - - let response: any; - switch (taskType) { - case 'seed-database': - response = await apiService.triggerSeedDatabase(token); - break; - case 'validate-yaml': - response = await apiService.triggerValidateYaml(token); - break; - case 'sync-collections': - response = await apiService.triggerSyncCollections(token); - break; - case 'parse-codebundles': - response = await apiService.triggerParseCodebundles(token); - break; - case 'enhance-codebundles': - response = await apiService.triggerEnhanceCodebundles(token); - break; - case 'generate-metrics': - response = await apiService.triggerGenerateMetrics(token); - break; - default: - throw new Error(`Unknown task type: ${taskType}`); - } - - setMessage(`Task started: ${taskType} (ID: ${response.task_id})`); - - // Refresh to get updated task list - setTimeout(() => refreshData(), 1000); - - } catch (err) { - setError(`Failed to trigger task: ${err}`); - } finally { - setLoading(false); - } - }; - - // Auto-refresh effect - useEffect(() => { - refreshData(); // Initial load - - if (autoRefresh) { - const interval = setInterval(() => { - refreshData(); - }, 5000); // Refresh every 5 seconds - - return () => clearInterval(interval); - } - }, [isAuthenticated, autoRefresh]); - - // Note: Cancel and retry functions removed - not available in database-driven task system - // Tasks are managed through the new database-driven architecture - - const getStatusIcon = (status: string) => { - switch (status) { - case 'SUCCESS': - return ; - case 'FAILURE': - return ; - case 'PENDING': - return ; - case 'PROGRESS': - return ; - default: - return ; - } - }; - - const getStatusColor = (status: string) => { - switch (status) { - case 'SUCCESS': - return 'success'; - case 'FAILURE': - return 'error'; - case 'PENDING': - return 'warning'; - case 'PROGRESS': - return 'info'; - default: - return 'default'; - } - }; - - useEffect(() => { - if (isAuthenticated) { - refreshData(); - } - }, [isAuthenticated]); // Remove refreshData from dependencies to prevent infinite loop - - // Show loading if not authenticated (should be handled by ProtectedRoute, but just in case) - if (!isAuthenticated) { - return ( - - - - - - ); - } - - return ( - - - Task Manager - - - {/* User Info and Refresh */} - - - - - - Welcome, {user?.name} - - - {user?.email} • {user?.roles?.join(', ')} - - - - - - - - {/* Messages */} - {message && ( - - {message} - - )} - - {error && ( - - {error} - - )} - - {/* Tabs */} - - - - - - - - - - - {/* Auto-refresh toggle */} - - - {autoRefresh && ( - - Refreshing every 5 seconds - - )} - - - {/* Running Tasks Tab */} - - - - Currently Running Tasks ({runningTasks.length}) - - - {runningTasks.length === 0 ? ( - No tasks are currently running - ) : ( - - - - - Task Name - Status - Progress - Current Step - Started - Duration - - - - {runningTasks.map((task) => ( - - - - - {task.task_name} - - - {task.task_id} - - - - - - - - - - - {Math.round(task.progress || 0)}% - - - - - - {task.current_step || 'N/A'} - - - - - {task.started_at ? new Date(task.started_at).toLocaleString() : 'N/A'} - - - - - {task.duration_seconds ? `${Math.round(task.duration_seconds)}s` : - task.started_at ? `${Math.round((Date.now() - new Date(task.started_at).getTime()) / 1000)}s` : 'N/A'} - - - - ))} - -
-
- )} -
-
- - {/* Task History Tab */} - - - - Task History ({taskHistory.length}) - - - {taskHistory.length === 0 ? ( - No task history available - ) : ( - - - - - Task Name - Status - Started - Completed - Duration - Triggered By - - - - {taskHistory.map((task) => ( - - *': { borderBottom: 'unset' } }}> - - - {task.is_failed && task.error_message && ( - setExpandedTask(expandedTask === task.task_id ? null : task.task_id)} - > - {expandedTask === task.task_id ? : } - - )} - - - {task.task_name} - - - {task.task_id} - - - - - - - - - - {task.started_at ? new Date(task.started_at).toLocaleString() : 'N/A'} - - - - - {task.completed_at ? new Date(task.completed_at).toLocaleString() : 'N/A'} - - - - - {task.duration_seconds ? `${Math.round(task.duration_seconds)}s` : 'N/A'} - - - - - {task.triggered_by || 'system'} - - - - {task.is_failed && task.error_message && ( - - - - - - - - Error Details - - - - {task.error_message} - - {task.traceback && ( - - )} - - - - - )} - - ))} - -
-
- )} -
-
- - {/* Trigger Tasks Tab */} - - - - Trigger New Tasks - - - - - - - - - - - - - - {/* System Info Tab */} - - - System Information - - - - - Task System Status - - - - Status: {workerStats?.status || 'Unknown'} - - - Celery Status: {workerStats?.celery_status || 'Unknown'} - - - Task System: {workerStats?.task_system || 'Database-driven'} - - {workerStats?.workers && ( - - Workers: {workerStats.workers.join(', ')} - - )} - - - {taskMetrics && ( - <> - - System Metrics - - - Total Collections: {taskMetrics.system_metrics?.total_collections || 0} - - - Total Codebundles: {taskMetrics.system_metrics?.total_codebundles || 0} - - - Total Tasks: {taskMetrics.system_metrics?.total_tasks || 0} - - - )} - - - - - - {/* Metrics & Stats Tab */} - - - - - - - Task Metrics - - {taskMetrics && ( - - Total Tasks: {taskMetrics.total_tasks_executed || 0} - Successful: {taskMetrics.successful_tasks || 0} - Failed: {taskMetrics.failed_tasks || 0} - Success Rate: {(taskMetrics.success_rate * 100).toFixed(1)}% - Avg Execution Time: {taskMetrics.average_execution_time || 0}s - - )} - - - - - - - - - Worker Stats - - {workerStats && ( - - Total Workers: {workerStats.total_workers || 0} - Total Tasks: {workerStats.total_tasks || 0} - Active Workers: {Object.keys(workerStats.workers || {}).length} - - )} - - - - - - - {/* Error Details Dialog */} - setErrorDialogOpen(false)} - maxWidth="md" - fullWidth - > - - - - Task Error Details - - - - {selectedTaskError && ( - - - Task: {selectedTaskError.task_name} - - - - Error Message: - - - - {selectedTaskError.error_message} - - - - {selectedTaskError.traceback && ( - <> - - Traceback: - - - - {selectedTaskError.traceback} - - - - )} - - )} - - - - - -
- ); -}; - -export default TaskManager; - diff --git a/cc-registry-v2/frontend/src/services/api.ts b/cc-registry-v2/frontend/src/services/api.ts index e3c1e71e2fc2..688e3de77457 100644 --- a/cc-registry-v2/frontend/src/services/api.ts +++ b/cc-registry-v2/frontend/src/services/api.ts @@ -467,80 +467,10 @@ export const apiService = { return response.data; }, - // Database-driven task management endpoints - async triggerSeedDatabase(token: string, yamlPath: string = '/app/codecollections.yaml') { - const response = await api.post('/tasks/seed-database', null, { - params: { yaml_file_path: yamlPath }, - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerValidateYaml(token: string, yamlPath: string = '/app/codecollections.yaml') { - const response = await api.post('/tasks/validate-yaml', null, { - params: { yaml_file_path: yamlPath }, - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerSyncCollections(token: string) { - const response = await api.post('/tasks/sync-collections', {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerSyncCollection(token: string, collectionId: number) { - const response = await api.post(`/tasks/sync-collection/${collectionId}`, {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerParseCodebundles(token: string) { - const response = await api.post('/tasks/parse-codebundles', {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerParseCollection(token: string, collectionId: number) { - const response = await api.post(`/tasks/parse-collection/${collectionId}`, {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerEnhanceCodebundles(token: string) { - const response = await api.post('/tasks/enhance-codebundles', {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerEnhanceCodebundle(token: string, codebundleId: number) { - const response = await api.post(`/tasks/enhance-codebundle/${codebundleId}`, {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - async triggerGenerateMetrics(token: string) { - const response = await api.post('/tasks/generate-metrics', {}, { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, - - // Old getTaskStatus method removed - now using task management endpoint - - async getTaskHealth(token: string) { - const response = await api.get('/tasks/health', { - headers: { Authorization: `Bearer ${token}` } - }); - return response.data; - }, + // NOTE: The legacy /api/v1/tasks/* trigger surface was removed. + // All task triggers now flow through /api/v1/schedules/{name}/trigger + // (configured in schedules.yaml + exposed by the Schedules admin tab). + // For task observability use /api/v1/task-management/tasks/{running,history}. // Admin CRUD endpoints async getCollections(token: string, includeInactive: boolean = false) { diff --git a/cc-registry-v2/k8s/backend-deployment.yaml b/cc-registry-v2/k8s/backend-deployment.yaml index 36bfca2d82f9..35a503f633bd 100644 --- a/cc-registry-v2/k8s/backend-deployment.yaml +++ b/cc-registry-v2/k8s/backend-deployment.yaml @@ -62,6 +62,13 @@ spec: value: "azure-openai" - name: AI_ENHANCEMENT_ENABLED value: "true" + # Directory-mounted ConfigMap paths (no subPath) so the kubelet + # auto-propagates ConfigMap updates without a pod restart. See + # scheduler-deployment.yaml for full rationale. + - name: SCHEDULES_FILE + value: /etc/cc-registry/schedules/schedules.yaml + - name: SOURCES_FILE + value: /etc/cc-registry/sources/sources.yaml envFrom: - secretRef: name: azure-openai-credentials @@ -70,12 +77,10 @@ spec: optional: true volumeMounts: - name: schedules-config - mountPath: /app/schedules.yaml - subPath: schedules.yaml + mountPath: /etc/cc-registry/schedules readOnly: true - name: documentation-sources - mountPath: /app/sources.yaml - subPath: sources.yaml + mountPath: /etc/cc-registry/sources readOnly: true # - name: tag-icons # mountPath: /app/map-tag-icons.yaml diff --git a/cc-registry-v2/k8s/scheduler-deployment.yaml b/cc-registry-v2/k8s/scheduler-deployment.yaml index 755b2a28c9d2..76dd205e13f6 100644 --- a/cc-registry-v2/k8s/scheduler-deployment.yaml +++ b/cc-registry-v2/k8s/scheduler-deployment.yaml @@ -218,17 +218,25 @@ spec: value: "azure-openai" - name: AI_ENHANCEMENT_ENABLED value: "true" + # Point the app at directory-mounted ConfigMap paths instead of + # the legacy /app/.yaml subPath mounts. Directory mounts + # auto-propagate ConfigMap updates from the kubelet (subPath + # mounts don't). schedules.yaml is still only re-read on Celery + # beat startup, but codecollections.yaml + sources.yaml are now + # picked up within ~60s of a `kubectl apply` with no pod restart. + - name: SCHEDULES_FILE + value: /etc/cc-registry/schedules/schedules.yaml + - name: SOURCES_FILE + value: /etc/cc-registry/sources/sources.yaml envFrom: - secretRef: name: azure-openai-credentials volumeMounts: - name: schedules-config - mountPath: /app/schedules.yaml - subPath: schedules.yaml + mountPath: /etc/cc-registry/schedules readOnly: true - name: documentation-sources - mountPath: /app/sources.yaml - subPath: sources.yaml + mountPath: /etc/cc-registry/sources readOnly: true - name: celerybeat-schedule mountPath: /tmp diff --git a/cc-registry-v2/k8s/worker-deployment.yaml b/cc-registry-v2/k8s/worker-deployment.yaml index c8c3dfa4ecc7..acf97e3c139f 100644 --- a/cc-registry-v2/k8s/worker-deployment.yaml +++ b/cc-registry-v2/k8s/worker-deployment.yaml @@ -32,6 +32,13 @@ spec: env: - name: ENVIRONMENT value: "production" + # Directory-mounted ConfigMap paths (no subPath) so the kubelet + # auto-propagates ConfigMap updates without a pod restart. See + # scheduler-deployment.yaml for full rationale. + - name: SCHEDULES_FILE + value: /etc/cc-registry/schedules/schedules.yaml + - name: SOURCES_FILE + value: /etc/cc-registry/sources/sources.yaml envFrom: # Load all database and Redis configuration (supports both URL and component-based config) - secretRef: @@ -44,12 +51,10 @@ spec: mountPath: /app readOnly: true - name: schedules-config - mountPath: /app/schedules.yaml - subPath: schedules.yaml + mountPath: /etc/cc-registry/schedules readOnly: true - name: documentation-sources - mountPath: /app/sources.yaml - subPath: sources.yaml + mountPath: /etc/cc-registry/sources readOnly: true resources: requests: From 57fa08da8af55d3870108156f6f50c0e54b850b3 Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 18:34:39 -0400 Subject: [PATCH 12/13] Remove hourly statistics update task and related configurations - Deleted the `update-statistics-hourly` task from the schedules and YAML configurations, as it was deemed unnecessary. - Removed references to the task in the backend code, including the Celery task imports and API responses. - Updated documentation to reflect the removal of the hourly statistics update from the scheduling and architecture sections. --- cc-registry-v2/backend/app/main.py | 31 +++++------- .../backend/app/routers/schedule_config.py | 1 - cc-registry-v2/backend/app/tasks/__init__.py | 1 - .../backend/app/tasks/celery_app.py | 1 - .../app/tasks/data_population_tasks.py | 48 ------------------- .../backend/app/tasks/registry_tasks.py | 11 ++++- cc-registry-v2/backend/openapi.yaml | 3 -- cc-registry-v2/docs/ARCHITECTURE.md | 2 - cc-registry-v2/docs/MCP_INDEXING_SCHEDULE.md | 1 - cc-registry-v2/docs/SCHEDULES.md | 1 - cc-registry-v2/frontend/src/pages/Home.tsx | 1 - cc-registry-v2/frontend/src/services/api.ts | 4 +- cc-registry-v2/k8s/scheduler-deployment.yaml | 9 ---- cc-registry-v2/schedules.yaml | 10 ---- 14 files changed, 24 insertions(+), 100 deletions(-) delete mode 100644 cc-registry-v2/backend/app/tasks/data_population_tasks.py diff --git a/cc-registry-v2/backend/app/main.py b/cc-registry-v2/backend/app/main.py index d332ba43587c..ca6b4bf403f3 100644 --- a/cc-registry-v2/backend/app/main.py +++ b/cc-registry-v2/backend/app/main.py @@ -959,36 +959,27 @@ async def get_registry_stats(): total_tasks = int(stats.total_tasks) total_slis = int(stats.total_slis) total_items = total_tasks + total_slis - - # Get tasks over time (by collection for now - simulated growth data) - # In production, you'd track this in a separate table - tasks_over_time = [ - {"month": "Jan 2024", "tasks": int(total_items * 0.4)}, - {"month": "Feb 2024", "tasks": int(total_items * 0.5)}, - {"month": "Mar 2024", "tasks": int(total_items * 0.6)}, - {"month": "Apr 2024", "tasks": int(total_items * 0.7)}, - {"month": "May 2024", "tasks": int(total_items * 0.75)}, - {"month": "Jun 2024", "tasks": int(total_items * 0.8)}, - {"month": "Jul 2024", "tasks": int(total_items * 0.85)}, - {"month": "Aug 2024", "tasks": int(total_items * 0.9)}, - {"month": "Sep 2024", "tasks": int(total_items * 0.92)}, - {"month": "Oct 2024", "tasks": int(total_items * 0.95)}, - {"month": "Nov 2024", "tasks": int(total_items * 0.98)}, - {"month": "Dec 2024", "tasks": int(total_items)}, - ] - + + # NOTE: This endpoint intentionally does NOT return a + # `tasks_over_time` field. Historical task growth is served + # by /api/v1/analytics/tasks-by-week(-cached), backed by the + # `compute_task_growth_analytics` Celery task which derives + # real first-commit dates from each CC's git history. The + # homepage's TaskGrowthChart consumes that endpoint directly. + # The old `tasks_over_time` array here was a synthetic + # 0.4×/0.5×/0.6× ramp left over from scaffolding and was + # never displayed in the UI. return { "collections": collections_count, "codebundles": codebundles_count, "tasks": total_items, "slis": total_slis, - "tasks_over_time": tasks_over_time } finally: db.close() except Exception as e: logger.error(f"Error getting stats: {e}") - return {"collections": 0, "codebundles": 0, "tasks": 0, "slis": 0, "tasks_over_time": []} + return {"collections": 0, "codebundles": 0, "tasks": 0, "slis": 0} if __name__ == "__main__": diff --git a/cc-registry-v2/backend/app/routers/schedule_config.py b/cc-registry-v2/backend/app/routers/schedule_config.py index 82eb8987f716..df5c283ea49e 100644 --- a/cc-registry-v2/backend/app/routers/schedule_config.py +++ b/cc-registry-v2/backend/app/routers/schedule_config.py @@ -157,7 +157,6 @@ def format_time_val(val): 'parse-codebundles-daily': 'Parse all codebundles from cloned repositories', 'enhance-codebundles-weekly': 'Run AI enhancement on all codebundles', 'generate-metrics-daily': 'Generate daily metrics and statistics', - 'update-statistics-hourly': 'Update collection statistics', 'health-check': 'System health check', 'scheduled-sync': 'Full registry population (clone, parse, sync)', 'cleanup-old-tasks': 'Clean up old completed tasks', diff --git a/cc-registry-v2/backend/app/tasks/__init__.py b/cc-registry-v2/backend/app/tasks/__init__.py index 2d97b8e5249b..2073f6141e9f 100644 --- a/cc-registry-v2/backend/app/tasks/__init__.py +++ b/cc-registry-v2/backend/app/tasks/__init__.py @@ -4,7 +4,6 @@ from . import ( sync_tasks, registry_tasks, - data_population_tasks, ai_enhancement_tasks, workflow_tasks, task_monitoring, diff --git a/cc-registry-v2/backend/app/tasks/celery_app.py b/cc-registry-v2/backend/app/tasks/celery_app.py index d9c210aa93be..1e4928ca2b30 100644 --- a/cc-registry-v2/backend/app/tasks/celery_app.py +++ b/cc-registry-v2/backend/app/tasks/celery_app.py @@ -91,7 +91,6 @@ def _configure_broker_url(): "app.tasks.sync_tasks", "app.tasks.registry_tasks", "app.tasks.ai_enhancement_tasks", - "app.tasks.data_population_tasks", "app.tasks.task_monitoring", "app.tasks.workflow_tasks", "app.tasks.analytics_tasks", diff --git a/cc-registry-v2/backend/app/tasks/data_population_tasks.py b/cc-registry-v2/backend/app/tasks/data_population_tasks.py deleted file mode 100644 index 1a2254c236af..000000000000 --- a/cc-registry-v2/backend/app/tasks/data_population_tasks.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Data Population Tasks - Statistics updates and collection management - -Note: The primary sync/parse workflow is in registry_tasks.py and workflow_tasks.py. -This file contains supplementary tasks (statistics updates, etc.). - -DO NOT define sync_all_collections_task or parse_all_codebundles_task here. -Those canonical tasks live in registry_tasks.py to avoid duplicate registrations. -""" -import logging -from datetime import datetime - -from app.core.database import SessionLocal -from app.models import CodeCollection - -logger = logging.getLogger(__name__) - -# Use the shared Celery app (single instance for the entire application) -from app.tasks.celery_app import celery_app - - -@celery_app.task(bind=True) -def update_collection_statistics_task(self): - """ - Update collection statistics and metrics. - Referenced by schedules.yaml as 'update-statistics-hourly'. - """ - try: - logger.info(f"Starting statistics update task {self.request.id}") - - db = SessionLocal() - try: - # Update statistics for all collections - collections = db.query(CodeCollection).all() - for collection in collections: - collection.last_synced = datetime.utcnow() - - db.commit() - - finally: - db.close() - - logger.info(f"Statistics update task {self.request.id} completed") - return {'status': 'success', 'statistics_updated': True} - - except Exception as e: - logger.error(f"Statistics update task {self.request.id} failed: {e}") - raise diff --git a/cc-registry-v2/backend/app/tasks/registry_tasks.py b/cc-registry-v2/backend/app/tasks/registry_tasks.py index 104045b7853c..3897a42a03b3 100644 --- a/cc-registry-v2/backend/app/tasks/registry_tasks.py +++ b/cc-registry-v2/backend/app/tasks/registry_tasks.py @@ -165,6 +165,13 @@ def sync_all_collections_task(self): ) visibility = 'public' + # `last_synced` is OWNED by this task. It means + # "when was this CC last (re-)ingested from + # codecollections.yaml". Per-version image refreshes + # (sync_image_tags_task, every 5 min) and stats reads + # do NOT bump it — only an actual YAML→DB sync does. + now = datetime.utcnow() + if not collection: collection = CodeCollection( name=collection_data.get('name', collection_slug), @@ -176,7 +183,8 @@ def sync_all_collections_task(self): owner_icon=collection_data.get('owner_icon', ''), git_ref=collection_data.get('git_ref', 'main'), visibility=visibility, - is_active=True + is_active=True, + last_synced=now, ) db.add(collection) logger.info(f"Created collection: {collection_slug} (visibility={visibility})") @@ -186,6 +194,7 @@ def sync_all_collections_task(self): collection.description = collection_data.get('description', '') collection.visibility = visibility collection.is_active = True + collection.last_synced = now logger.info(f"Updated collection: {collection_slug} (visibility={visibility})") db.commit() diff --git a/cc-registry-v2/backend/openapi.yaml b/cc-registry-v2/backend/openapi.yaml index 8896650edfc4..3adb2942d08d 100644 --- a/cc-registry-v2/backend/openapi.yaml +++ b/cc-registry-v2/backend/openapi.yaml @@ -319,9 +319,6 @@ paths: codebundles: { type: integer } tasks: { type: integer } slis: { type: integer } - tasks_over_time: - type: array - items: { type: object } # ========================================================================= # Chat diff --git a/cc-registry-v2/docs/ARCHITECTURE.md b/cc-registry-v2/docs/ARCHITECTURE.md index 347b26acfb46..02ab783af7d3 100644 --- a/cc-registry-v2/docs/ARCHITECTURE.md +++ b/cc-registry-v2/docs/ARCHITECTURE.md @@ -223,7 +223,6 @@ The server is **stateless**. `server_http.py` runs as a FastAPI app, registers M | `registry_tasks` | `sync_all_collections_task`, `parse_all_codebundles_task` | Steps 1-2 of the pipeline | | `ai_enhancement_tasks` | `enhance_pending_codebundles_task` | Step 3: AI metadata enhancement | | `indexing_tasks` | `index_codebundles_task`, `index_documentation_task`, `reindex_all_task` | Step 4: embedding generation + pgvector storage | -| `data_population_tasks` | `update_collection_statistics_task` | Hourly stats refresh | | `analytics_tasks` | `compute_task_growth_analytics` | Daily analytics | | `task_monitoring` | `cleanup_old_tasks_task`, `health_check_tasks_task` | Maintenance | | `mcp_tasks` | *(deprecated stubs)* | Redirect to `indexing_tasks` | @@ -237,7 +236,6 @@ All schedules are defined in `schedules.yaml` and loaded by Celery Beat. | `scheduled-sync` | Every 6 hours | Full pipeline: sync → parse → enhance → embed | | `index-documentation-daily` | Daily 3 AM | Crawl documentation URLs, generate embeddings | | `reindex-vectors-weekly` | Sunday 2 AM | Full rebuild of all vector embeddings | -| `update-statistics-hourly` | Hourly | Refresh collection statistics | | `compute-task-growth-analytics` | Daily 2:30 AM | Git history analysis for task growth | | `health-check` | Every 5 min | System health check | | `cleanup-old-tasks` | Daily 12:30 AM | Purge old task execution records | diff --git a/cc-registry-v2/docs/MCP_INDEXING_SCHEDULE.md b/cc-registry-v2/docs/MCP_INDEXING_SCHEDULE.md index 489afef54a13..03e75d73d6e3 100644 --- a/cc-registry-v2/docs/MCP_INDEXING_SCHEDULE.md +++ b/cc-registry-v2/docs/MCP_INDEXING_SCHEDULE.md @@ -52,7 +52,6 @@ Rebuilds all vector tables from scratch (codebundles + codecollections + documen | Schedule | Frequency | Task | Purpose | |---|---|---|---| | `validate-yaml-seed-daily` | Daily 1 AM | `sync_all_collections_task` | Ensure all YAML-defined collections exist in the database | -| `update-statistics-hourly` | Hourly | `update_collection_statistics_task` | Refresh collection statistics | | `compute-task-growth-analytics` | Daily 2:30 AM | `compute_task_growth_analytics` | Analyze git history for task growth | | `health-check` | Every 5 min | `health_check_task` | System health check | | `health-check-tasks` | Every 10 min | `health_check_tasks_task` | Task queue health check | diff --git a/cc-registry-v2/docs/SCHEDULES.md b/cc-registry-v2/docs/SCHEDULES.md index e337a1686b6d..3ee3671fe8f4 100644 --- a/cc-registry-v2/docs/SCHEDULES.md +++ b/cc-registry-v2/docs/SCHEDULES.md @@ -106,7 +106,6 @@ crontab: | validate-yaml-seed-daily | Daily at 01:00 | Validates YAML consistency | | generate-metrics-daily | Daily at 05:00 | Generates system metrics | | scheduled-sync | Daily at 06:00 | Legacy full sync | -| update-statistics-hourly | Every hour | Updates collection statistics | | health-check | Every 5 minutes | System health check | | cleanup-old-tasks | Daily at 00:30 | Cleans old task records | | health-check-tasks | Every 10 minutes | Task queue health check | diff --git a/cc-registry-v2/frontend/src/pages/Home.tsx b/cc-registry-v2/frontend/src/pages/Home.tsx index 90abf8be3834..a88d3d3b14e2 100644 --- a/cc-registry-v2/frontend/src/pages/Home.tsx +++ b/cc-registry-v2/frontend/src/pages/Home.tsx @@ -37,7 +37,6 @@ interface RegistryStats { codebundles: number; tasks: number; slis: number; - tasks_over_time: Array<{ month: string; tasks: number }>; } interface RecentCodebundle { diff --git a/cc-registry-v2/frontend/src/services/api.ts b/cc-registry-v2/frontend/src/services/api.ts index 688e3de77457..a682eea99246 100644 --- a/cc-registry-v2/frontend/src/services/api.ts +++ b/cc-registry-v2/frontend/src/services/api.ts @@ -309,12 +309,14 @@ export const apiService = { }, // Registry Stats + // NOTE: historical task growth is served separately by + // /api/v1/analytics/tasks-by-week (see getTasksByWeek below), backed + // by real git-history analysis. This endpoint is just live counts. async getRegistryStats(): Promise<{ collections: number; codebundles: number; tasks: number; slis: number; - tasks_over_time: Array<{ month: string; tasks: number }>; }> { const response = await api.get('/registry/stats'); return response.data; diff --git a/cc-registry-v2/k8s/scheduler-deployment.yaml b/cc-registry-v2/k8s/scheduler-deployment.yaml index 76dd205e13f6..4d23ca76c3a1 100644 --- a/cc-registry-v2/k8s/scheduler-deployment.yaml +++ b/cc-registry-v2/k8s/scheduler-deployment.yaml @@ -80,15 +80,6 @@ data: minute: 0 enabled: false - - name: update-statistics-hourly - task: app.tasks.data_population_tasks.update_collection_statistics_task - description: Update collection statistics - schedule_type: crontab - crontab: - hour: null - minute: 0 - enabled: true - # ============================================================================= # HEALTH CHECKS & CLEANUP # ============================================================================= diff --git a/cc-registry-v2/schedules.yaml b/cc-registry-v2/schedules.yaml index 74ba11464123..cd71091a3c89 100644 --- a/cc-registry-v2/schedules.yaml +++ b/cc-registry-v2/schedules.yaml @@ -105,16 +105,6 @@ schedules: minute: 0 enabled: false - # Statistics Update - Update collection statistics every hour - - name: update-statistics-hourly - task: app.tasks.data_population_tasks.update_collection_statistics_task - description: Update collection statistics - schedule_type: crontab - crontab: - hour: null # null = every hour - minute: 0 - enabled: true - # ============================================================================= # HEALTH CHECKS & CLEANUP # ============================================================================= From cd5d9d8874585018976007466fb165564d46932a Mon Sep 17 00:00:00 2001 From: stewartshea Date: Tue, 12 May 2026 19:23:57 -0400 Subject: [PATCH 13/13] Enhance analytics tasks for task growth metrics - Expanded the documentation in `analytics_tasks.py` to clarify the purpose and methodology of the task growth analytics computation. - Implemented a new function to extract task names from Robot Framework files, improving the accuracy of task attribution based on git history. - Updated the `compute_task_growth_analytics` function to refine the algorithm for calculating monthly cumulative task counts, ensuring recent additions are accurately reflected in the growth metrics. - Enhanced logging and error handling to improve the reliability of the analytics computation process. --- .../backend/app/tasks/analytics_tasks.py | 482 ++++++++++++------ 1 file changed, 328 insertions(+), 154 deletions(-) diff --git a/cc-registry-v2/backend/app/tasks/analytics_tasks.py b/cc-registry-v2/backend/app/tasks/analytics_tasks.py index 4f14254acf13..a293a59fc9ae 100644 --- a/cc-registry-v2/backend/app/tasks/analytics_tasks.py +++ b/cc-registry-v2/backend/app/tasks/analytics_tasks.py @@ -1,5 +1,30 @@ """ -Analytics computation tasks +Analytics computation tasks. + +The Task Library Growth chart on the homepage is fed by the +`task_growth_metrics` table, which this module populates. The historical +data is reconstructed by walking the git history of each codebundle's +`runbook.robot` / `sli.robot` and recording the first commit where each +*currently-existing* task name appears. + +Why per-task attribution (and not per-codebundle "directory created" date)? +------------------------------------------------------------------------- +The previous implementation timestamped every codebundle once — using the +commit that first added the bundle's directory — and then attributed the +codebundle's *current* `task_count + sli_count` to that single date. That +produced a smooth-but-wrong ramp: 100 tasks added to existing codebundles +last month were back-dated to those codebundles' original creation months +(sometimes years ago), so genuine bursts of growth were invisible on the +chart. We now bucket by per-task first-introduction date so recent +additions to long-lived bundles surface in the correct month. + +Semantic note +------------- +"Cumulative at month M" = count of CURRENTLY-existing tasks/SLIs whose +first appearance in git history is on-or-before M. Tasks renamed in git +are counted from the rename commit (the new name didn't exist before +that). Tasks deleted from the codebase don't appear in this curve at all. +This keeps the chart monotonic over time and stable across re-runs. """ import logging import tempfile @@ -8,214 +33,363 @@ import time from datetime import datetime, timedelta from collections import defaultdict -from typing import Dict, Any +from typing import Dict, Set from app.tasks.celery_app import celery_app from app.core.database import SessionLocal +from app.core.visibility import PUBLIC_VISIBILITY from app.models import CodeCollection, Codebundle, TaskGrowthMetric logger = logging.getLogger(__name__) +# Robot Framework section headers that contain task definitions. +# (`*** Tasks ***` is the modern spelling, `*** Test Cases ***` is the +# legacy spelling; both are accepted by the runner.) +_TASK_SECTION_NAMES = frozenset({"tasks", "test cases"}) + + +def _extract_task_names_from_robot(content: str) -> Set[str]: + """ + Parse a Robot Framework file and return the set of task/test-case + names defined in it. + + A task name in Robot Framework is any line inside a `*** Tasks ***` + (or `*** Test Cases ***`) section that: + - starts at column 0 (no leading whitespace), + - is not a comment (`#`), + - is not itself a section header. + + Lines belonging to the task body are indented and therefore ignored. + """ + names: Set[str] = set() + in_task_section = False + for raw_line in content.splitlines(): + if not raw_line.strip(): + continue + # Section headers can have leading spaces in some files; normalize. + stripped = raw_line.lstrip() + if stripped.startswith("***"): + header = stripped.strip("* \t").lower() + in_task_section = header in _TASK_SECTION_NAMES + continue + if not in_task_section: + continue + # Task body lines are indented; comments start with '#'. + if raw_line[0] in (" ", "\t"): + continue + if stripped.startswith("#"): + continue + names.add(stripped.rstrip()) + return names + + +def _first_introduction_dates( + repo_path: str, + file_path: str, + target_names: Set[str], +) -> Dict[str, datetime]: + """ + For each name in `target_names` that exists in `file_path` at some + point in git history, return the timestamp of the earliest commit + where it appears. + + Walks commits oldest-first; stops as soon as every target name has + been attributed. Names that never appear in any committed version + are simply omitted from the result. + """ + if not target_names: + return {} + + log = subprocess.run( + ["git", "log", "--reverse", "--format=%ct|%H", "--", file_path], + cwd=repo_path, + capture_output=True, + text=True, + timeout=30, + ) + if log.returncode != 0 or not log.stdout.strip(): + return {} + + seen: Dict[str, datetime] = {} + remaining = set(target_names) + for entry in log.stdout.strip().splitlines(): + if not remaining: + break + try: + ts_str, sha = entry.split("|", 1) + commit_time = datetime.fromtimestamp(int(ts_str)) + except (ValueError, OSError): + continue + + show = subprocess.run( + ["git", "show", f"{sha}:{file_path}"], + cwd=repo_path, + capture_output=True, + text=True, + timeout=10, + ) + if show.returncode != 0: + continue + + names_at_commit = _extract_task_names_from_robot(show.stdout) + newly_seen = remaining & names_at_commit + if newly_seen: + for name in newly_seen: + seen[name] = commit_time + remaining -= newly_seen + return seen + + @celery_app.task(bind=True) def compute_task_growth_analytics(self): """ - Compute task growth analytics using git history. - - This task: - 1. Clones all codecollection repositories - 2. Analyzes git history to find when each codebundle folder first appeared - 3. Calculates monthly cumulative task counts for last 18 months - 4. Stores results in task_growth_metrics table - - Runs as background job (scheduled via Celery Beat). + Recompute monthly cumulative task-library growth and persist to + `task_growth_metrics`. + + Algorithm: + 1. For every PUBLIC, active codecollection: clone the repo. + 2. For every active codebundle in that codecollection: union the + current task names (`cb.tasks`) and SLI names (`cb.slis`). + 3. Walk the git history of the codebundle's `runbook.robot` and + `sli.robot` and find the earliest commit containing each name. + 4. Bucket each (codebundle, name) introduction date by month. + 5. Generate a cumulative series for the last 18 months and the + historical pre-window total. Store in `task_growth_metrics`. + + Excludes: + - CodeCollections with `visibility = 'hidden'` (PAPI-only entries + like internal/private codecollections — they must never feed any + public-audience surface, including this chart). + - Inactive codecollections / inactive codebundles. """ db = SessionLocal() start_time = time.time() - + try: - logger.info(f"Starting task growth analytics computation (task {self.request.id})") - - # Calculate date 18 months ago - eighteen_months_ago = datetime.now() - timedelta(days=18*30) - - # Get all active codebundles with their collections - codebundles = db.query(Codebundle).join(CodeCollection).filter( - Codebundle.is_active == True, - CodeCollection.is_active == True - ).all() - - logger.info(f"Analyzing {len(codebundles)} codebundles for first-commit dates") - - # Group by collection to minimize git operations - collections_map = {} + logger.info( + f"Starting task growth analytics computation (task {self.request.id})" + ) + + eighteen_months_ago = datetime.now() - timedelta(days=18 * 30) + + # Only PUBLIC, active codecollections and their active codebundles. + # Hidden codecollections exist for PAPI but must not skew public + # registry analytics. See app.core.visibility for context. + codebundles = ( + db.query(Codebundle) + .join(CodeCollection) + .filter( + Codebundle.is_active.is_(True), + CodeCollection.is_active.is_(True), + CodeCollection.visibility == PUBLIC_VISIBILITY, + ) + .all() + ) + + logger.info( + f"Analyzing {len(codebundles)} codebundles " + "(public + active) for per-task introduction dates" + ) + + # Group by collection so each repo is cloned exactly once. + collections_map: Dict[int, Dict] = {} for cb in codebundles: - if cb.codecollection_id not in collections_map: - collections_map[cb.codecollection_id] = { - 'collection': cb.codecollection, - 'codebundles': [] - } - collections_map[cb.codecollection_id]['codebundles'].append(cb) - - # Extract FIRST-commit dates from git history - codebundle_first_dates = [] - + entry = collections_map.setdefault( + cb.codecollection_id, + {"collection": cb.codecollection, "codebundles": []}, + ) + entry["codebundles"].append(cb) + + # `attribution_dates` is a flat list of (introduction_date) entries, + # one per (codebundle, name). Each contributes exactly 1 to its + # bucket month; the cumulative line is the running sum. + attribution_dates = [] + per_name_attributed = 0 + per_name_fallback = 0 + with tempfile.TemporaryDirectory() as tmp_dir: for coll_id, data in collections_map.items(): - collection = data['collection'] - codebundles_list = data['codebundles'] - - try: - # Clone repository - repo_path = os.path.join(tmp_dir, collection.slug) - logger.info(f"Cloning {collection.git_url} to analyze first-commit dates") - - subprocess.run( - ['git', 'clone', '--quiet', collection.git_url, repo_path], - capture_output=True, - text=True, - timeout=60 + collection: CodeCollection = data["collection"] + bundles = data["codebundles"] + repo_path = os.path.join(tmp_dir, collection.slug) + + logger.info( + f"Cloning {collection.git_url} to attribute task introductions" + ) + clone = subprocess.run( + [ + "git", + "clone", + "--quiet", + "--no-checkout", + "--filter=blob:none", + collection.git_url, + repo_path, + ], + capture_output=True, + text=True, + timeout=120, + ) + clone_ok = clone.returncode == 0 + + if not clone_ok: + logger.warning( + f"Clone failed for {collection.slug}: " + f"{(clone.stderr or '').strip()[:200]}" ) - - # For each codebundle, get the FIRST commit date - for cb in codebundles_list: - bundle_path = f"codebundles/{cb.slug}" - - # Get first commit when directory was added - result = subprocess.run( - ['git', 'log', '--format=%ct', '--reverse', '--diff-filter=A', '--', bundle_path], - cwd=repo_path, - capture_output=True, - text=True, - timeout=10 - ) - - if result.returncode == 0 and result.stdout.strip(): - timestamps = result.stdout.strip().split('\n') - if timestamps: - first_commit = int(timestamps[0]) - first_date = datetime.fromtimestamp(first_commit) - - task_count = (cb.task_count or 0) + (cb.sli_count or 0) - - codebundle_first_dates.append({ - 'date': first_date, - 'task_count': task_count, - 'codebundle': cb.slug, - 'collection': collection.slug - }) + + for cb in bundles: + current_task_names = set(cb.tasks or []) + current_sli_names = set(cb.slis or []) + all_names = current_task_names | current_sli_names + if not all_names: + continue + + # Map each name to the most likely file it lives in. + # We try runbook first for tasks, sli for SLIs, and + # fall back to "either file" to be tolerant of layout + # drift. Files are repo-relative because that's how + # git log expects them. + runbook_rel = (cb.runbook_path or "").lstrip("/") or None + sli_rel = (cb.sli_path or "").lstrip("/") or None + + # Collect introduction dates per name, preferring the + # canonical file but falling back to the other if the + # name only shows up there. + found: Dict[str, datetime] = {} + if clone_ok: + if runbook_rel: + found.update( + _first_introduction_dates( + repo_path, runbook_rel, current_task_names + ) + ) + if sli_rel: + found.update( + _first_introduction_dates( + repo_path, sli_rel, current_sli_names + ) + ) + # Edge case: a name we didn't find in its canonical + # file might exist in the other one (some bundles + # mix tasks+slis in a single .robot historically). + missing = all_names - set(found.keys()) + if missing and runbook_rel and sli_rel: + for fallback_path in (sli_rel, runbook_rel): + if not missing: + break + extra = _first_introduction_dates( + repo_path, fallback_path, missing + ) + found.update(extra) + missing -= set(extra.keys()) + + for name in all_names: + if name in found: + attribution_dates.append(found[name]) + per_name_attributed += 1 else: - # Fallback to created_at - fallback_date = cb.created_at - task_count = (cb.task_count or 0) + (cb.sli_count or 0) - codebundle_first_dates.append({ - 'date': fallback_date, - 'task_count': task_count, - 'codebundle': cb.slug, - 'collection': collection.slug - }) - - except Exception as e: - logger.error(f"Error analyzing git history for {collection.slug}: {e}") - # Fallback: use created_at from database - for cb in codebundles_list: - fallback_date = cb.created_at - task_count = (cb.task_count or 0) + (cb.sli_count or 0) - codebundle_first_dates.append({ - 'date': fallback_date, - 'task_count': task_count, - 'codebundle': cb.slug, - 'collection': collection.slug - }) - - # Sort by date - codebundle_first_dates.sort(key=lambda x: x['date']) - - if not codebundle_first_dates: + # Couldn't attribute via git — fall back to the + # codebundle's database created_at, which is at + # worst as wrong as the old algorithm was. + fallback = cb.created_at or datetime.utcnow() + attribution_dates.append(fallback) + per_name_fallback += 1 + + if not attribution_dates: logger.warning("No codebundle data found") return {"status": "no_data", "message": "No codebundles found"} - - # Build MONTHLY aggregates for all time - monthly_data = defaultdict(int) - for entry in codebundle_first_dates: - entry_date = entry['date'] - month_key = entry_date.strftime('%Y-%m-01') - monthly_data[month_key] += entry['task_count'] - - # Generate cumulative counts for last 18 months + + attribution_dates.sort() + + # Bucket by calendar month. + monthly_data: Dict[str, int] = defaultdict(int) + for ts in attribution_dates: + month_key = ts.strftime("%Y-%m-01") + monthly_data[month_key] += 1 + months = [] cumulative = [] - + start_month = eighteen_months_ago.replace(day=1) latest = datetime.now().replace(day=1) - - # Calculate total tasks before 18 months ago + + # Pre-window cumulative (everything older than the visible range). running_total = 0 - earliest_date = codebundle_first_dates[0]['date'].replace(day=1) - temp_month = earliest_date - - while temp_month < start_month: - month_key = temp_month.strftime('%Y-%m-01') - running_total += monthly_data.get(month_key, 0) - # Move to next month - if temp_month.month == 12: - temp_month = temp_month.replace(year=temp_month.year + 1, month=1) + earliest = attribution_dates[0].replace(day=1) + cursor = earliest + while cursor < start_month: + running_total += monthly_data.get(cursor.strftime("%Y-%m-01"), 0) + if cursor.month == 12: + cursor = cursor.replace(year=cursor.year + 1, month=1) else: - temp_month = temp_month.replace(month=temp_month.month + 1) - - # Generate visible data (last 18 months) - current_month = start_month - while current_month <= latest: - month_key = current_month.strftime('%Y-%m-01') + cursor = cursor.replace(month=cursor.month + 1) + + cursor = start_month + while cursor <= latest: + month_key = cursor.strftime("%Y-%m-01") running_total += monthly_data.get(month_key, 0) - months.append(month_key) cumulative.append(running_total) - - # Move to next month - if current_month.month == 12: - current_month = current_month.replace(year=current_month.year + 1, month=1) + if cursor.month == 12: + cursor = cursor.replace(year=cursor.year + 1, month=1) else: - current_month = current_month.replace(month=current_month.month + 1) - - # Store results in database + cursor = cursor.replace(month=cursor.month + 1) + result_data = { "months": months, "cumulative": cumulative, - "total_tasks": running_total + "total_tasks": running_total, } - + duration = int(time.time() - start_time) - - # Delete old metrics for this type + db.query(TaskGrowthMetric).filter( TaskGrowthMetric.metric_type == "monthly_growth", - TaskGrowthMetric.time_period == "18_months" + TaskGrowthMetric.time_period == "18_months", ).delete() - - # Create new metric + + attributed_pct = ( + int(100 * per_name_attributed / (per_name_attributed + per_name_fallback)) + if (per_name_attributed + per_name_fallback) + else 0 + ) + metric = TaskGrowthMetric( metric_type="monthly_growth", time_period="18_months", data=result_data, computation_duration_seconds=duration, codebundles_analyzed=len(codebundles), - notes=f"Analyzed {len(codebundle_first_dates)} codebundles across {len(collections_map)} collections" + notes=( + f"Per-task git attribution across {len(codebundles)} codebundles " + f"in {len(collections_map)} public codecollections; " + f"{per_name_attributed} names dated from git, " + f"{per_name_fallback} fell back to codebundle.created_at " + f"({attributed_pct}% git-attributed)" + ), ) - + db.add(metric) db.commit() - - logger.info(f"Task growth analytics computed successfully in {duration}s: {running_total} total tasks") - + + logger.info( + f"Task growth analytics computed successfully in {duration}s: " + f"{running_total} total tasks; {per_name_attributed} git-attributed, " + f"{per_name_fallback} fallback" + ) + return { "status": "success", "duration_seconds": duration, "codebundles_analyzed": len(codebundles), + "names_git_attributed": per_name_attributed, + "names_fallback_attributed": per_name_fallback, "total_tasks": running_total, - "months_generated": len(months) + "months_generated": len(months), } - + except Exception: # logger.exception captures the full traceback. Bare `raise` # re-throws so Celery marks the task FAILURE — task_executions