diff --git a/cc-registry-v2/Taskfile.yml b/cc-registry-v2/Taskfile.yml index 096ba89cd90..bc052c25dc1 100644 --- a/cc-registry-v2/Taskfile.yml +++ b/cc-registry-v2/Taskfile.yml @@ -15,7 +15,7 @@ tasks: desc: Start all services cmds: - echo "🚀 Starting all services..." - - docker-compose up -d --build + - source az.secret && docker-compose up -d --build stop: desc: Stop all services @@ -27,7 +27,7 @@ tasks: desc: Restart all services cmds: - echo "🔄 Restarting all services..." - - docker-compose restart + - source az.secret && docker-compose restart # Monitoring commands logs: @@ -83,7 +83,7 @@ tasks: desc: Start only backend services (db, redis, backend) cmds: - echo "🔧 Starting backend services..." - - docker-compose up -d database redis backend + - source az.secret && docker-compose up -d database redis backend frontend: desc: Start only frontend (requires backend) @@ -95,14 +95,14 @@ tasks: desc: Start worker services cmds: - echo "🔄 Starting worker services..." - - docker-compose up -d worker scheduler flower + - source az.secret && docker-compose up -d worker scheduler flower # Development commands dev: desc: Start development environment (backend + frontend) cmds: - echo "💻 Starting development environment..." - - docker-compose up -d database redis backend frontend + - source az.secret && docker-compose up -d database redis backend frontend dev:logs: desc: Show logs for development services diff --git a/cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py b/cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py new file mode 100644 index 00000000000..de99d452c3c --- /dev/null +++ b/cc-registry-v2/backend/alembic/versions/004_add_image_metadata_and_visibility.py @@ -0,0 +1,96 @@ +"""add image metadata to codecollection_versions and visibility to codecollections + +Adds the columns needed to track versioned OCI image artifacts per ref so the +RunWhen platform (PAPI) can consume a built-image catalog directly from the +codecollection-registry instead of running its own corestate-operator. + +Also adds a `visibility` column on `codecollections` so a CC can be tracked +for image consumption but kept out of the public registry website / MCP / +AI search (e.g. customer-private, internal, deprecated CCs). + +Revision ID: 004 +Revises: 003 +Create Date: 2026-05-11 +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "004" +down_revision = "003" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # --- image metadata on codecollection_versions --- + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'codecollection_versions' + AND column_name = 'image_registry' + ) THEN + ALTER TABLE codecollection_versions + ADD COLUMN image_registry VARCHAR(500), + ADD COLUMN image_tag VARCHAR(200), + ADD COLUMN image_digest VARCHAR(80), + ADD COLUMN commit_hash VARCHAR(40), + ADD COLUMN rt_revision VARCHAR(40), + ADD COLUMN image_built_at TIMESTAMP; + END IF; + END $$; + """ + ) + + # Index for PAPI's "latest ref for this CC" lookups. + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_ccv_collection_image_tag + ON codecollection_versions (codecollection_id, image_tag); + """ + ) + + # --- visibility on codecollections --- + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'codecollections' + AND column_name = 'visibility' + ) THEN + ALTER TABLE codecollections + ADD COLUMN visibility VARCHAR(20) NOT NULL DEFAULT 'public'; + END IF; + END $$; + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_cc_visibility + ON codecollections (visibility); + """ + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_cc_visibility") + op.execute("ALTER TABLE codecollections DROP COLUMN IF EXISTS visibility") + + op.execute("DROP INDEX IF EXISTS ix_ccv_collection_image_tag") + op.execute( + """ + ALTER TABLE codecollection_versions + DROP COLUMN IF EXISTS image_built_at, + DROP COLUMN IF EXISTS rt_revision, + DROP COLUMN IF EXISTS commit_hash, + DROP COLUMN IF EXISTS image_digest, + DROP COLUMN IF EXISTS image_tag, + DROP COLUMN IF EXISTS image_registry; + """ + ) diff --git a/cc-registry-v2/backend/app/core/config.py b/cc-registry-v2/backend/app/core/config.py index 073e2816011..12a39e90e95 100644 --- a/cc-registry-v2/backend/app/core/config.py +++ b/cc-registry-v2/backend/app/core/config.py @@ -17,6 +17,14 @@ class Settings(BaseSettings): GITHUB_WEBHOOK_SECRET: str = "your_webhook_secret_here" GITHUB_OWNER: str = "runwhen-contrib" GITHUB_REPO: str = "codecollection-registry" + + # GitHub App Authentication (preferred over GITHUB_TOKEN when configured) + GITHUB_APP_ID: Optional[str] = None + GITHUB_APP_PRIVATE_KEY: Optional[str] = None + GITHUB_APP_INSTALLATION_ID: Optional[int] = None + + # Target repo for intake wizard issue creation + GITHUB_INTAKE_REPO: str = "runwhen-contrib/codecollection-registry" # Security SECRET_KEY: str = "your-secret-key-change-in-production" diff --git a/cc-registry-v2/backend/app/core/visibility.py b/cc-registry-v2/backend/app/core/visibility.py new file mode 100644 index 00000000000..192430d9138 --- /dev/null +++ b/cc-registry-v2/backend/app/core/visibility.py @@ -0,0 +1,38 @@ +""" +Visibility filter helpers. + +A CodeCollection's `visibility` flag controls whether it appears on +public-audience surfaces: + + - 'public' – default. Shown on the registry website, MCP, AI search, etc. + - 'hidden' – tracked for PAPI consumption but excluded from public lists. + +This is a UX/discovery toggle, NOT a security boundary. Image-level +access control still lives in the OCI registry. + +Centralizing the filter here keeps the rule consistent across endpoints — +if we ever add a third visibility tier (e.g. 'archived'), we change one +place rather than auditing every router. +""" +from __future__ import annotations + +from sqlalchemy.orm import Query + +from app.models import CodeCollection + +PUBLIC_VISIBILITY = "public" +HIDDEN_VISIBILITY = "hidden" + + +def public_only(query: Query) -> Query: + """ + Apply `visibility = 'public'` to a SQLAlchemy query that selects from + or joins to `codecollections`. Use this on every public-audience + endpoint (anything PAPI / corestate would NOT call). + """ + return query.filter(CodeCollection.visibility == PUBLIC_VISIBILITY) + + +def is_public(cc: CodeCollection) -> bool: + """Predicate version for code paths that already have a loaded row.""" + return (cc.visibility or PUBLIC_VISIBILITY) == PUBLIC_VISIBILITY diff --git a/cc-registry-v2/backend/app/main.py b/cc-registry-v2/backend/app/main.py index dcd7f24c117..50177184727 100644 --- a/cc-registry-v2/backend/app/main.py +++ b/cc-registry-v2/backend/app/main.py @@ -99,7 +99,7 @@ async def health_check(): } # Include routers -from app.routers import admin, tasks, raw_data, admin_crud, task_execution_admin, versions, task_management, admin_inventory, helm_charts, mcp_chat, chat_debug, github_issues, schedule_config, analytics, vector_search +from app.routers import admin, tasks, raw_data, admin_crud, task_execution_admin, versions, task_management, admin_inventory, helm_charts, mcp_chat, chat_debug, github_issues, schedule_config, analytics, vector_search, intake, cc_catalog app.include_router(admin.router) app.include_router(tasks.router) app.include_router(raw_data.router) @@ -115,6 +115,10 @@ async def health_check(): app.include_router(github_issues.router, prefix="/api/v1") app.include_router(analytics.router) app.include_router(vector_search.router) +app.include_router(intake.router) +# CodeCollection catalog (PAPI-facing). Sees both public AND hidden CCs by +# design — PAPI needs to resolve image refs even for hidden collections. +app.include_router(cc_catalog.router) @app.get("/api/v1/registry/collections") async def list_collections(): @@ -126,8 +130,11 @@ async def list_collections(): db = SessionLocal() try: - collections = db.query(CodeCollection).filter(CodeCollection.is_active == True).all() - + from app.core.visibility import public_only + collections = public_only( + db.query(CodeCollection).filter(CodeCollection.is_active == True) + ).all() + result = [] for collection in collections: # Calculate statistics for each collection @@ -180,12 +187,16 @@ async def get_collection_by_slug(collection_slug: str): db = SessionLocal() try: - # Find the collection - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug, - CodeCollection.is_active == True + from app.core.visibility import public_only + # Find the collection — hidden CCs are treated as 404 on the + # public website even though PAPI can still see them via /catalog. + collection = public_only( + db.query(CodeCollection).filter( + CodeCollection.slug == collection_slug, + CodeCollection.is_active == True, + ) ).first() - + if not collection: return JSONResponse( status_code=404, @@ -251,12 +262,20 @@ async def get_all_tasks( db = SessionLocal() try: - # Build the query - query = db.query(Codebundle).filter(Codebundle.is_active == True) - + from app.core.visibility import public_only + # Build the query — always join CodeCollection so we can scope to + # public-visibility collections (hidden CCs and their codebundles + # do not appear on the public registry website). + query = ( + db.query(Codebundle) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ) + query = public_only(query) + # Filter by collection if specified if collection_slug: - query = query.join(CodeCollection).filter(CodeCollection.slug == collection_slug) + query = query.filter(CodeCollection.slug == collection_slug) # Filter by support tags if specified (multiple tags) if support_tags: @@ -425,9 +444,16 @@ async def list_codebundles( db = SessionLocal() try: - # Build base query - query = db.query(Codebundle).filter(Codebundle.is_active == True) - + from app.core.visibility import public_only + # Build base query — join the parent CC so we can scope to + # public-visibility collections. + query = ( + db.query(Codebundle) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ) + query = public_only(query) + # Apply search filter — supports natural language queries # by splitting into keywords and matching word-by-word if search: @@ -626,12 +652,16 @@ async def get_codebundle_by_slug(collection_slug: str, codebundle_slug: str): db = SessionLocal() try: - # First find the collection - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug, - CodeCollection.is_active == True + from app.core.visibility import public_only + # First find the collection. Hidden CCs are treated as 404 from + # the public website even though PAPI can still resolve them. + collection = public_only( + db.query(CodeCollection).filter( + CodeCollection.slug == collection_slug, + CodeCollection.is_active == True, + ) ).first() - + if not collection: return JSONResponse( status_code=404, @@ -719,13 +749,17 @@ async def get_recent_codebundles(): db = SessionLocal() try: - # Get recent codebundles ordered by git_updated_at only, excluding rw-generic-codecollection - codebundles = db.query(Codebundle).join( - CodeCollection, Codebundle.codecollection_id == CodeCollection.id - ).filter( - Codebundle.is_active == True, - Codebundle.git_updated_at.isnot(None), # Only codebundles with git dates - CodeCollection.slug != 'rw-generic-codecollection' # Exclude generics + from app.core.visibility import public_only + # Get recent codebundles ordered by git_updated_at only, + # excluding rw-generic-codecollection and any hidden CCs. + codebundles = public_only( + db.query(Codebundle).join( + CodeCollection, Codebundle.codecollection_id == CodeCollection.id + ).filter( + Codebundle.is_active == True, + Codebundle.git_updated_at.isnot(None), + CodeCollection.slug != 'rw-generic-codecollection', + ) ).order_by( desc(Codebundle.git_updated_at) ).limit(20).all() @@ -768,17 +802,21 @@ async def get_recent_tasks(): db = SessionLocal() try: - # Get codebundles with tasks, ordered by git_updated_at, excluding rw-generic-codecollection - codebundles = db.query(Codebundle).join( - CodeCollection, Codebundle.codecollection_id == CodeCollection.id - ).filter( - Codebundle.is_active == True, - Codebundle.git_updated_at.isnot(None), - Codebundle.tasks.isnot(None), - CodeCollection.slug != 'rw-generic-codecollection' # Exclude generics + from app.core.visibility import public_only + # Get codebundles with tasks, ordered by git_updated_at, + # excluding rw-generic-codecollection and hidden CCs. + codebundles = public_only( + db.query(Codebundle).join( + CodeCollection, Codebundle.codecollection_id == CodeCollection.id + ).filter( + Codebundle.is_active == True, + Codebundle.git_updated_at.isnot(None), + Codebundle.tasks.isnot(None), + CodeCollection.slug != 'rw-generic-codecollection', + ) ).order_by( desc(Codebundle.git_updated_at) - ).limit(100).all() # Get more codebundles to extract tasks from + ).limit(100).all() result = [] for cb in codebundles: @@ -881,20 +919,32 @@ async def get_registry_stats(): db = SessionLocal() try: - # Count collections - collections_count = db.query(CodeCollection).filter(CodeCollection.is_active == True).count() - - # Count codebundles - codebundles_count = db.query(Codebundle).filter(Codebundle.is_active == True).count() - - # Count tasks and SLIs using the authoritative integer fields (task_count, sli_count) - # set by the canonical parser. This is both more efficient (SQL SUM vs loading all - # records) and more reliable than counting JSON array lengths, which could drift - # if a competing code path updates the arrays without updating the counts. - stats = db.query( - func.coalesce(func.sum(Codebundle.task_count), 0).label('total_tasks'), - func.coalesce(func.sum(Codebundle.sli_count), 0).label('total_slis') - ).filter(Codebundle.is_active == True).first() + from app.core.visibility import public_only + # Count public collections only — homepage stats shouldn't expose + # the existence of hidden CCs. + collections_count = public_only( + db.query(CodeCollection).filter(CodeCollection.is_active == True) + ).count() + + # Count codebundles belonging to public collections. + cb_query = ( + db.query(Codebundle) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ) + codebundles_count = public_only(cb_query).count() + + # Count tasks and SLIs using the authoritative integer fields + # (task_count, sli_count) set by the canonical parser. Scoped to + # public CCs so the homepage stays consistent. + stats = public_only( + db.query( + func.coalesce(func.sum(Codebundle.task_count), 0).label('total_tasks'), + func.coalesce(func.sum(Codebundle.sli_count), 0).label('total_slis') + ) + .join(CodeCollection, Codebundle.codecollection_id == CodeCollection.id) + .filter(Codebundle.is_active == True) + ).first() total_tasks = int(stats.total_tasks) total_slis = int(stats.total_slis) diff --git a/cc-registry-v2/backend/app/models/code_collection.py b/cc-registry-v2/backend/app/models/code_collection.py index b33d2690d42..ef6e2dd4420 100644 --- a/cc-registry-v2/backend/app/models/code_collection.py +++ b/cc-registry-v2/backend/app/models/code_collection.py @@ -18,6 +18,12 @@ class CodeCollection(Base): git_ref = Column(String(50), default="main") last_synced = Column(DateTime) is_active = Column(Boolean, default=True) + # 'public' – shown on registry website, MCP, AI search, etc. + # 'hidden' – CC is still synced & its images tracked for PAPI consumption, + # but it is excluded from all public-facing registry endpoints. + # NOTE: 'hidden' is a UX/discovery toggle, NOT a security boundary. The + # OCI registry remains the source of truth for image access control. + visibility = Column(String(20), nullable=False, default="public", index=True) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) diff --git a/cc-registry-v2/backend/app/models/version.py b/cc-registry-v2/backend/app/models/version.py index edbafe2ecd4..dd771d61348 100644 --- a/cc-registry-v2/backend/app/models/version.py +++ b/cc-registry-v2/backend/app/models/version.py @@ -30,7 +30,23 @@ class CodeCollectionVersion(Base): # Sync metadata synced_at = Column(DateTime) # When this version was last synced is_active = Column(Boolean, default=True) # Whether this version is available - + + # ------------------------------------------------------------------ + # Image catalog metadata (populated by image_sync_tasks) + # ------------------------------------------------------------------ + # Where the built image lives, e.g. "ghcr.io/runwhen-contrib/rw-cli-codecollection" + image_registry = Column(String(500)) + # Concrete pullable tag, e.g. "main-c1a2b3d-e4f5a6b" (PAPI uses this verbatim). + image_tag = Column(String(200), index=True) + # Optional content-addressable digest for stronger pinning. + image_digest = Column(String(80)) + # Full commit sha this image was built from (codecollection repo). + commit_hash = Column(String(40)) + # platform-robot-runtime sha embedded at build time (encoded in tag suffix). + rt_revision = Column(String(40)) + # When the build pushed this image, parsed from OCI manifest where available. + image_built_at = Column(DateTime) + # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) diff --git a/cc-registry-v2/backend/app/routers/cc_catalog.py b/cc-registry-v2/backend/app/routers/cc_catalog.py new file mode 100644 index 00000000000..d292b1df6fd --- /dev/null +++ b/cc-registry-v2/backend/app/routers/cc_catalog.py @@ -0,0 +1,240 @@ +""" +PAPI-facing CodeCollection catalog API. + +These endpoints intentionally bypass the `visibility = 'public'` filter +that protects the registry website: PAPI needs to see hidden CCs so it +can still resolve their image refs for workspaces that use them. + +Surface area: + + GET /api/v1/catalog/codecollections + GET /api/v1/catalog/codecollections/{slug} + GET /api/v1/catalog/codecollections/{slug}/refs + GET /api/v1/catalog/codecollections/{slug}/refs/{ref} + GET /api/v1/catalog/codecollections/{slug}/resolve?pointer=latest|stable + GET /api/v1/catalog/codecollections/{slug}/resolve?ref= + +Everything is read-only. Writes happen only through the image-sync +Celery task — there is no public write API and no auth needed. +""" +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy.orm import Session + +from app.core.database import get_db +from app.models import CodeCollection +from app.models.version import CodeCollectionVersion +from app.schemas.cc_catalog import ( + CatalogEntry, + CatalogEntryDetail, + ImageRef, + ResolveResponse, +) + +router = APIRouter(prefix="/api/v1/catalog", tags=["catalog"]) + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- +def _to_image_ref(v: CodeCollectionVersion) -> ImageRef: + return ImageRef( + ref=v.version_name, + ref_type=v.version_type or "branch", + image_registry=v.image_registry, + image_tag=v.image_tag, + image_digest=v.image_digest, + commit_hash=v.commit_hash, + rt_revision=v.rt_revision, + image_built_at=v.image_built_at, + is_latest=bool(v.is_latest), + is_prerelease=bool(v.is_prerelease), + is_active=bool(v.is_active), + synced_at=v.synced_at, + ) + + +def _entry_pointers(versions: list[CodeCollectionVersion]) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Pull (latest_tag, stable_tag, image_registry) out of a CC's versions.""" + latest_tag: Optional[str] = None + stable_tag: Optional[str] = None + image_registry: Optional[str] = None + for v in versions: + if v.image_registry and not image_registry: + image_registry = v.image_registry + if v.is_latest and v.image_tag: + latest_tag = v.image_tag + # `stable` = the highest semver tag (mirrors OCISource.resolve_stable). + if ( + v.image_tag + and v.version_type == "tag" + and (stable_tag is None or v.version_name > stable_tag) + ): + stable_tag = v.image_tag + # Fall back to `latest` if no semver tag is present. + return latest_tag, (stable_tag or latest_tag), image_registry + + +# --------------------------------------------------------------------------- +# endpoints +# --------------------------------------------------------------------------- +@router.get("/codecollections", response_model=list[CatalogEntry]) +def list_catalog( + visibility: Optional[str] = Query( + None, + description="Filter by visibility ('public' | 'hidden'). Omit to see all.", + ), + only_with_image: bool = Query( + True, + description="If true (default), only return CCs that have at least one tracked image.", + ), + db: Session = Depends(get_db), +) -> list[CatalogEntry]: + """List every CodeCollection PAPI may need to resolve.""" + q = db.query(CodeCollection).filter(CodeCollection.is_active.is_(True)) + if visibility: + q = q.filter(CodeCollection.visibility == visibility) + collections = q.order_by(CodeCollection.slug).all() + + entries: list[CatalogEntry] = [] + for cc in collections: + versions = [v for v in cc.versions if v.is_active] + if only_with_image and not any(v.image_tag for v in versions): + continue + latest_tag, stable_tag, image_registry = _entry_pointers(versions) + entries.append( + CatalogEntry( + slug=cc.slug, + name=cc.name, + git_url=cc.git_url, + visibility=cc.visibility or "public", + latest_image_tag=latest_tag, + stable_image_tag=stable_tag, + image_registry=image_registry, + last_synced=cc.last_synced, + ) + ) + return entries + + +@router.get("/codecollections/{slug}", response_model=CatalogEntryDetail) +def get_catalog_entry(slug: str, db: Session = Depends(get_db)) -> CatalogEntryDetail: + cc = ( + db.query(CodeCollection) + .filter(CodeCollection.slug == slug, CodeCollection.is_active.is_(True)) + .first() + ) + if cc is None: + raise HTTPException(status_code=404, detail=f"unknown codecollection: {slug}") + versions = [v for v in cc.versions if v.is_active and v.image_tag] + latest_tag, stable_tag, image_registry = _entry_pointers(versions) + return CatalogEntryDetail( + slug=cc.slug, + name=cc.name, + git_url=cc.git_url, + visibility=cc.visibility or "public", + latest_image_tag=latest_tag, + stable_image_tag=stable_tag, + image_registry=image_registry, + last_synced=cc.last_synced, + refs=[_to_image_ref(v) for v in versions], + ) + + +@router.get("/codecollections/{slug}/refs", response_model=list[ImageRef]) +def list_refs( + slug: str, + include_inactive: bool = Query(False), + db: Session = Depends(get_db), +) -> list[ImageRef]: + cc = db.query(CodeCollection).filter(CodeCollection.slug == slug).first() + if cc is None: + raise HTTPException(status_code=404, detail=f"unknown codecollection: {slug}") + versions = list(cc.versions) + if not include_inactive: + versions = [v for v in versions if v.is_active] + versions = [v for v in versions if v.image_tag] + return [_to_image_ref(v) for v in versions] + + +@router.get("/codecollections/{slug}/refs/{ref}", response_model=ImageRef) +def get_ref(slug: str, ref: str, db: Session = Depends(get_db)) -> ImageRef: + row = ( + db.query(CodeCollectionVersion) + .join(CodeCollection, CodeCollectionVersion.codecollection_id == CodeCollection.id) + .filter(CodeCollection.slug == slug, CodeCollectionVersion.version_name == ref) + .first() + ) + if row is None or not row.image_tag: + raise HTTPException( + status_code=404, detail=f"no image for {slug}@{ref}" + ) + return _to_image_ref(row) + + +@router.get("/codecollections/{slug}/resolve", response_model=ResolveResponse) +def resolve_image( + slug: str, + pointer: Optional[str] = Query( + None, regex="^(latest|stable)$", + description="Resolve a named pointer ('latest' or 'stable').", + ), + ref: Optional[str] = Query( + None, description="Resolve a specific git ref name (branch/tag)." + ), + db: Session = Depends(get_db), +) -> ResolveResponse: + """ + Resolve a pointer or git ref to a concrete OCI image tag. Exactly one of + `pointer` or `ref` must be supplied. This is the endpoint PAPI calls + on the workspace reconcile path. + """ + if bool(pointer) == bool(ref): + raise HTTPException( + status_code=400, + detail="exactly one of 'pointer' or 'ref' must be provided", + ) + + cc = ( + db.query(CodeCollection) + .filter(CodeCollection.slug == slug, CodeCollection.is_active.is_(True)) + .first() + ) + if cc is None: + raise HTTPException(status_code=404, detail=f"unknown codecollection: {slug}") + + versions = [v for v in cc.versions if v.is_active and v.image_tag] + if not versions: + raise HTTPException(status_code=404, detail=f"no images tracked for {slug}") + + selected: Optional[CodeCollectionVersion] = None + if pointer == "latest": + latest_tag, _, _ = _entry_pointers(versions) + selected = next((v for v in versions if v.image_tag == latest_tag), None) + requested = "latest" + elif pointer == "stable": + _, stable_tag, _ = _entry_pointers(versions) + selected = next((v for v in versions if v.image_tag == stable_tag), None) + requested = "stable" + else: + selected = next((v for v in versions if v.version_name == ref), None) + requested = ref or "" + + if selected is None: + raise HTTPException( + status_code=404, + detail=f"could not resolve {requested!r} for {slug}", + ) + + return ResolveResponse( + slug=slug, + requested=requested, + image_tag=selected.image_tag, + image_registry=selected.image_registry, + image_digest=selected.image_digest, + commit_hash=selected.commit_hash, + rt_revision=selected.rt_revision, + ) diff --git a/cc-registry-v2/backend/app/routers/github_issues.py b/cc-registry-v2/backend/app/routers/github_issues.py index 510099fd259..4c54bb272d6 100644 --- a/cc-registry-v2/backend/app/routers/github_issues.py +++ b/cc-registry-v2/backend/app/routers/github_issues.py @@ -8,6 +8,7 @@ import requests from app.core.config import settings +from app.services.github_auth import get_github_auth logger = logging.getLogger(__name__) router = APIRouter(prefix="/github", tags=["github"]) @@ -32,19 +33,18 @@ class IssueResponse(BaseModel): async def create_task_request_issue(request: TaskRequestIssue): """Create a GitHub issue requesting new tasks for the registry""" - if settings.GITHUB_TOKEN == "your_github_token_here": + gh = get_github_auth() + if not gh.is_configured: raise HTTPException( status_code=status.HTTP_501_NOT_IMPLEMENTED, - detail="GitHub integration not configured. Please set GITHUB_TOKEN in environment variables." + detail="GitHub integration not configured. Set GITHUB_APP_ID/GITHUB_APP_PRIVATE_KEY or GITHUB_TOKEN.", ) - + try: - # Create issue title title = f"Task Request: {request.user_query}" - - # Create issue body + body_parts = [ - "## 🚀 New Task Request", + "## New Task Request", "", f"**User Query:** {request.user_query}", "", @@ -58,12 +58,10 @@ async def create_task_request_issue(request: TaskRequestIssue): f"- **Platform:** {request.platform}", f"- **Priority:** {request.priority}", ] - + if request.user_email: - body_parts.extend([ - f"- **Requested by:** {request.user_email}", - ]) - + body_parts.append(f"- **Requested by:** {request.user_email}") + body_parts.extend([ "", "### Acceptance Criteria", @@ -75,51 +73,52 @@ async def create_task_request_issue(request: TaskRequestIssue): "---", "*This issue was automatically created from the CodeCollection Registry chat interface.*" ]) - + body = "\n".join(body_parts) - - # Create GitHub issue + github_api_url = f"https://api.github.com/repos/{settings.GITHUB_OWNER}/{settings.GITHUB_REPO}/issues" - + headers = { - "Authorization": f"token {settings.GITHUB_TOKEN}", + **gh.auth_header(), "Accept": "application/vnd.github.v3+json", - "Content-Type": "application/json" + "Content-Type": "application/json", } - + issue_data = { "title": title, "body": body, - "labels": ["enhancement", "task-request", f"platform:{request.platform.lower()}", f"priority:{request.priority}"] + "labels": ["enhancement", "task-request", f"platform:{request.platform.lower()}", f"priority:{request.priority}"], } - + response = requests.post(github_api_url, json=issue_data, headers=headers) - + if response.status_code == 201: issue_info = response.json() return IssueResponse( issue_url=issue_info["html_url"], issue_number=issue_info["number"], - message=f"Successfully created GitHub issue #{issue_info['number']}" + message=f"Successfully created GitHub issue #{issue_info['number']}", ) else: logger.error(f"GitHub API error: {response.status_code} - {response.text}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Failed to create GitHub issue: {response.text}" + detail=f"GitHub API error: {response.text}", ) - + + except HTTPException: + raise except requests.RequestException as e: logger.error(f"Error creating GitHub issue: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Error communicating with GitHub: {str(e)}" + detail=f"Error communicating with GitHub: {str(e)}", ) except Exception as e: logger.error(f"Unexpected error creating GitHub issue: {e}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Unexpected error: {str(e)}" + detail=f"Unexpected error: {str(e)}", ) diff --git a/cc-registry-v2/backend/app/routers/intake.py b/cc-registry-v2/backend/app/routers/intake.py new file mode 100644 index 00000000000..2ccac21ff46 --- /dev/null +++ b/cc-registry-v2/backend/app/routers/intake.py @@ -0,0 +1,505 @@ +""" +CodeBundle Intake Wizard Router + +Guides users through a conversational flow to define CodeBundle requirements, +searches existing coverage via the MCP server, and files structured issues +to the codebundle-farm repository. +""" +import logging +from typing import Dict, List, Optional, Any +from datetime import datetime, timezone +from fastapi import APIRouter, HTTPException, status +from pydantic import BaseModel +import requests + +from app.core.config import settings +from app.services.github_auth import get_github_auth +from app.services.mcp_client import get_mcp_client, MCPError + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/v1/intake", tags=["intake"]) + +CODEBUNDLE_FARM_REPO = settings.GITHUB_INTAKE_REPO + +PLATFORMS = [ + "Kubernetes", "AWS", "Azure", "GCP", "Linux", + "Database", "Terraform", "Docker", "GitHub", "Other", +] + + +# ============================================================================= +# Request/Response Models +# ============================================================================= + +class SearchRequest(BaseModel): + """Step 1–2: User describes their need and we search for existing coverage.""" + description: str + platform: Optional[str] = None + + +class SearchMatch(BaseModel): + display_name: str + slug: str + collection_slug: str + platform: str + description: str + tasks: List[str] = [] + tags: List[str] = [] + relevance_score: float = 0.0 + source_url: str = "" + + +class ExistingRequest(BaseModel): + number: int + title: str + url: str + created_at: str + + +class SearchResponse(BaseModel): + matches: List[SearchMatch] + existing_requests: List[ExistingRequest] + suggested_platform: str + query_used: str + + +class DesignSpecDraft(BaseModel): + """Legacy: structured Design Spec. Kept for backwards compat; minimal intake uses SubmitRequest.""" + codebundle_name: str + target_collection: str = "rw-cli-codecollection" + platform: str = "" + purpose: str + tasks: List[Dict[str, str]] = [] + resource_types: List[str] = [] + env_vars: List[Dict[str, str]] = [] + secrets: List[Dict[str, str]] = [] + tools_required: List[str] = [] + related_bundles: List[str] = [] + user_description: str + coverage_notes: str = "" + + +class SubmitRequest(BaseModel): + """Minimal intake: title + description required. Search results included for the designer.""" + title: str + description: str + extra_context: Optional[str] = None + contact_email: Optional[str] = None + contact_ok: Optional[bool] = False + matches: List[SearchMatch] = [] + existing_requests: List[ExistingRequest] = [] + + +class SubmitResponse(BaseModel): + issue_url: str + issue_number: int + message: str + + +# ============================================================================= +# Endpoints +# ============================================================================= + +@router.get("/platforms") +async def get_platforms(): + """Return the list of supported platforms for the wizard.""" + return {"platforms": PLATFORMS} + + +@router.post("/search", response_model=SearchResponse) +async def search_existing_coverage(req: SearchRequest): + """ + Search for existing CodeBundles and open requests that match the + user's description. Called during wizard steps 1–3. + """ + mcp = get_mcp_client() + matches: List[SearchMatch] = [] + existing_requests: List[ExistingRequest] = [] + suggested_platform = req.platform or "" + + # Search existing CodeBundles via MCP + try: + if await mcp.is_available(): + args: Dict[str, Any] = {"query": req.description, "max_results": 8} + if req.platform: + args["platform"] = req.platform + + result = await mcp.call_tool("find_codebundle", args) + if result and result.get("success"): + parsed = _parse_codebundle_results(result.get("result", "")) + matches = parsed + + # Check for open requests + search_term = req.platform or req.description.split()[0] if req.description else "" + if search_term: + req_result = await mcp.call_tool("check_existing_requests", {"search_term": search_term}) + if req_result and req_result.get("success"): + existing_requests = _parse_existing_requests(req_result.get("result", "")) + except MCPError as e: + logger.warning(f"MCP search failed, continuing without results: {e}") + except Exception as e: + logger.warning(f"Unexpected error during MCP search: {e}") + + # Infer platform from description if not provided + if not suggested_platform: + suggested_platform = _infer_platform(req.description) + + return SearchResponse( + matches=matches, + existing_requests=existing_requests, + suggested_platform=suggested_platform, + query_used=req.description, + ) + + +@router.post("/generate-spec", response_model=DesignSpecDraft) +async def generate_design_spec( + description: str, + platform: str, + tasks_description: str, + resource_types: str = "", + tools: str = "", +): + """ + Generate a draft Design Spec from user-provided information. + This is a helper that pre-fills the spec structure; the frontend + lets the user refine it before submission. + """ + task_list = [t.strip() for t in tasks_description.split("\n") if t.strip()] + tasks = [{"name": _slugify_task(t), "checks": t} for t in task_list] + resources = [r.strip() for r in resource_types.split(",") if r.strip()] + tool_list = [t.strip() for t in tools.split(",") if t.strip()] + + name = _generate_bundle_name(platform, description) + + return DesignSpecDraft( + codebundle_name=name, + platform=platform, + purpose=description, + tasks=tasks, + resource_types=resources, + tools_required=tool_list, + user_description=description, + ) + + +@router.post("/submit", response_model=SubmitResponse) +async def submit_intake(req: SubmitRequest): + """ + Create a GitHub Issue in codebundle-farm. Requires only title + description. + Search results (matches, existing_requests) are included in the issue body + so the designer can see existing coverage and avoid duplication. + """ + gh = get_github_auth() + if not gh.is_configured: + raise HTTPException( + status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="GitHub integration not configured. Set GITHUB_APP_ID/GITHUB_APP_PRIVATE_KEY or GITHUB_TOKEN.", + ) + + title = f"[intake] {req.title[:100]}" + body = _build_minimal_issue_body(req) + platform = _infer_platform(req.description) + labels = ["intake", "needs-architect"] + if platform: + labels.append(f"platform:{platform.lower()}") + + try: + api_url = f"https://api.github.com/repos/{CODEBUNDLE_FARM_REPO}/issues" + headers = { + **gh.auth_header(), + "Accept": "application/vnd.github.v3+json", + } + issue_data = {"title": title, "body": body, "labels": labels} + + response = requests.post(api_url, json=issue_data, headers=headers) + + if response.status_code == 201: + info = response.json() + return SubmitResponse( + issue_url=info["html_url"], + issue_number=info["number"], + message=f"Created issue #{info['number']} in {CODEBUNDLE_FARM_REPO}", + ) + else: + logger.error(f"GitHub API error: {response.status_code} - {response.text}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"GitHub API error: {response.text}", + ) + except HTTPException: + raise + except requests.RequestException as e: + logger.error(f"Error creating GitHub issue: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error communicating with GitHub: {e}", + ) + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def _build_minimal_issue_body(req: SubmitRequest) -> str: + """Build issue body with original request + full existing coverage details for the designer.""" + parts = [ + "## Request", + "", + f"**Title:** {req.title}", + "", + "**Description:**", + "", + f"> {req.description}", + "", + ] + + if req.extra_context: + parts.extend([ + "**Additional context:**", + "", + req.extra_context, + "", + ]) + + # Existing CodeBundles found by search — full details for the designer + if req.matches: + parts.extend([ + "---", + "## Existing Coverage (from registry search)", + "", + "The following CodeBundles may overlap with this request. Designer: consider reusing, extending, or differentiating.", + "", + ]) + for i, m in enumerate(req.matches, 1): + score_str = f" ({int(m.relevance_score * 100)}% match)" if m.relevance_score > 0 else "" + parts.append(f"### {i}. {m.display_name}{score_str}") + parts.append("") + parts.append(f"- **Collection:** `{m.collection_slug}`") + parts.append(f"- **Platform:** {m.platform or '—'}") + parts.append(f"- **Description:** {m.description[:500]}{'…' if len(m.description) > 500 else ''}") + if m.tasks: + parts.append(f"- **Tasks:** {', '.join(m.tasks[:8])}{'…' if len(m.tasks) > 8 else ''}") + if m.tags: + parts.append(f"- **Tags:** {', '.join(m.tags[:10])}") + if m.source_url: + parts.append(f"- **Link:** {m.source_url}") + parts.append("") + + # Existing open requests — designer can consolidate + if req.existing_requests: + parts.extend([ + "---", + "## Open Requests (may overlap)", + "", + "Consider commenting on an existing issue instead of duplicating work.", + "", + ]) + for r in req.existing_requests: + parts.append(f"- [#{r.number} {r.title}]({r.url})") + parts.append("") + + if req.contact_email: + parts.append(f"**Contact:** {req.contact_email}") + parts.append("") + if req.contact_ok: + parts.append("**Contact OK:** Yes, please reach out.") + parts.append("") + + parts.extend([ + "---", + f"*Created via CodeCollection Registry intake at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}.*", + ]) + return "\n".join(parts) + + +def _build_issue_body(spec: DesignSpecDraft, contact_email: Optional[str]) -> str: + """Build the GitHub Issue body with the Design Spec in YAML.""" + tasks_yaml = "" + for t in spec.tasks: + tasks_yaml += f' - name: "{t.get("name", "")}"\n' + tasks_yaml += f' checks: "{t.get("checks", "")}"\n' + + env_yaml = "" + for v in spec.env_vars: + env_yaml += f' - name: "{v.get("name", "")}"\n' + env_yaml += f' description: "{v.get("description", "")}"\n' + env_yaml += f' example: "{v.get("example", "")}"\n' + + secrets_yaml = "" + for s in spec.secrets: + secrets_yaml += f' - name: "{s.get("name", "")}"\n' + secrets_yaml += f' description: "{s.get("description", "")}"\n' + + parts = [ + "## Original Request", + "", + f"> {spec.user_description}", + "", + ] + + if spec.coverage_notes: + parts.extend([ + "## Existing Coverage Notes", + "", + spec.coverage_notes, + "", + ]) + + parts.extend([ + "## Design Spec (draft)", + "", + "```yaml", + f"codebundle_name: {spec.codebundle_name}", + f"target_collection: {spec.target_collection}", + f"platform: {spec.platform}", + f'purpose: "{spec.purpose}"', + "", + "tasks:", + tasks_yaml.rstrip(), + "", + "resource_types:", + ]) + for r in spec.resource_types: + parts.append(f" - {r}") + + if spec.env_vars: + parts.extend(["", "env_vars:", env_yaml.rstrip()]) + if spec.secrets: + parts.extend(["", "secrets:", secrets_yaml.rstrip()]) + if spec.tools_required: + parts.append("") + parts.append("tools_required:") + for t in spec.tools_required: + parts.append(f" - {t}") + if spec.related_bundles: + parts.append("") + parts.append("related_bundles:") + for b in spec.related_bundles: + parts.append(f" - {b}") + + parts.extend(["```", ""]) + + if contact_email: + parts.append(f"**Contact**: {contact_email}") + parts.append("") + + parts.extend([ + "---", + f"*Created via the CodeCollection Registry intake wizard at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}.*", + ]) + + return "\n".join(parts) + + +def _infer_platform(description: str) -> str: + """Best-effort platform detection from free text.""" + d = description.lower() + mapping = [ + (["kubernetes", "k8s", "kubectl", "pod", "deployment", "namespace", "helm"], "Kubernetes"), + (["aws", "amazon", "s3", "ec2", "lambda", "cloudwatch", "iam"], "AWS"), + (["azure", "az ", "aks", "app service", "resource group"], "Azure"), + (["gcp", "google cloud", "gke", "bigquery", "pubsub"], "GCP"), + (["terraform", "tfstate", "hcl"], "Terraform"), + (["docker", "container", "dockerfile"], "Docker"), + (["github", "gh ", "actions", "repository"], "GitHub"), + (["postgres", "mysql", "redis", "database", "sql"], "Database"), + (["linux", "ssh", "systemd", "cron"], "Linux"), + ] + for keywords, platform in mapping: + if any(kw in d for kw in keywords): + return platform + return "" + + +def _generate_bundle_name(platform: str, description: str) -> str: + """Generate a bundle name slug from platform and description.""" + prefix = platform.lower().replace(" ", "-") if platform else "generic" + words = description.lower().split()[:4] + slug = "-".join(w for w in words if len(w) > 2 and w.isalnum()) + if not slug: + slug = "healthcheck" + return f"{prefix}-{slug}" + + +def _slugify_task(description: str) -> str: + """Turn a task description into a Robot Framework task name.""" + words = description.strip().split()[:8] + return " ".join(w.capitalize() for w in words) + + +def _parse_codebundle_results(markdown: str) -> List[SearchMatch]: + """Parse the markdown output from MCP find_codebundle into structured matches.""" + matches = [] + current: Dict[str, Any] = {} + + for line in markdown.split("\n"): + line = line.strip() + if line.startswith("## ") and "**" in line: + if current.get("display_name"): + matches.append(SearchMatch(**current)) + name = line.split("**")[1] if "**" in line else line[3:] + current = { + "display_name": name.strip(), + "slug": "", + "collection_slug": "", + "platform": "", + "description": "", + "tasks": [], + "tags": [], + "relevance_score": 0.0, + "source_url": "", + } + elif line.startswith("**Collection:**"): + current["collection_slug"] = line.split("**Collection:**")[1].strip() + elif line.startswith("**Platform:**"): + current["platform"] = line.split("**Platform:**")[1].strip() + elif line.startswith("**Description:**"): + current["description"] = line.split("**Description:**")[1].strip() + elif line.startswith("**Relevance:**"): + try: + score_str = line.split("**Relevance:**")[1].strip().rstrip("%") + current["relevance_score"] = float(score_str) / 100 + except (ValueError, IndexError): + pass + elif line.startswith("**Tags:**"): + tags_str = line.split("**Tags:**")[1].strip() + current["tags"] = [t.strip() for t in tags_str.split(",") if t.strip()] + elif line.startswith("**Source:**"): + # Extract slug from source link + if "/codebundles/" in line: + slug_part = line.split("/codebundles/")[-1].rstrip(")") + current["slug"] = slug_part + current["source_url"] = line.split("(")[-1].rstrip(")") if "(" in line else "" + elif line.startswith("- ") and current.get("display_name"): + task = line[2:].strip() + if task and "tasks" in current: + current["tasks"].append(task) + + if current.get("display_name"): + matches.append(SearchMatch(**current)) + + return matches + + +def _parse_existing_requests(markdown: str) -> List[ExistingRequest]: + """Parse the markdown output from MCP check_existing_requests.""" + requests_list = [] + for line in markdown.split("\n"): + line = line.strip() + if line.startswith("- **#"): + try: + number = int(line.split("**#")[1].split("**")[0]) + title = line.split("[")[1].split("]")[0] if "[" in line else "" + url = line.split("(")[1].split(")")[0] if "(" in line else "" + created = "" + if "Created:" in line: + created = line.split("Created:")[1].strip() + requests_list.append(ExistingRequest( + number=number, title=title, url=url, created_at=created, + )) + except (IndexError, ValueError): + continue + return requests_list diff --git a/cc-registry-v2/backend/app/routers/versions.py b/cc-registry-v2/backend/app/routers/versions.py index 741e218cb3c..f84cf822e1e 100644 --- a/cc-registry-v2/backend/app/routers/versions.py +++ b/cc-registry-v2/backend/app/routers/versions.py @@ -7,6 +7,7 @@ from sqlalchemy import desc from app.core.database import get_db +from app.core.visibility import public_only from app.models.code_collection import CodeCollection from app.models.version import CodeCollectionVersion, VersionCodebundle @@ -22,10 +23,13 @@ async def get_collections_with_versions( Get all CodeCollections with their versions (tags and branches). """ try: - query = db.query(CodeCollection).options( - joinedload(CodeCollection.versions) + # Public website endpoint -- hidden CCs are excluded. + query = public_only( + db.query(CodeCollection).options( + joinedload(CodeCollection.versions) + ) ).order_by(CodeCollection.name) - + if limit: query = query.offset(offset).limit(limit) @@ -85,8 +89,8 @@ async def get_collection_versions( """ Get all versions for a specific CodeCollection. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: @@ -133,8 +137,8 @@ async def get_version_by_name( """ Get a specific version by collection slug and version name. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: @@ -178,8 +182,8 @@ async def get_latest_version( """ Get the latest version for a CodeCollection. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: @@ -229,8 +233,8 @@ async def get_version_codebundles( """ Get all codebundles for a specific version. """ - collection = db.query(CodeCollection).filter( - CodeCollection.slug == collection_slug + collection = public_only( + db.query(CodeCollection).filter(CodeCollection.slug == collection_slug) ).first() if not collection: diff --git a/cc-registry-v2/backend/app/schemas/__init__.py b/cc-registry-v2/backend/app/schemas/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cc-registry-v2/backend/app/schemas/cc_catalog.py b/cc-registry-v2/backend/app/schemas/cc_catalog.py new file mode 100644 index 00000000000..49935a7b853 --- /dev/null +++ b/cc-registry-v2/backend/app/schemas/cc_catalog.py @@ -0,0 +1,74 @@ +""" +Pydantic response models for the PAPI-facing CodeCollection catalog API. + +These shapes are part of the contract PAPI (and any other consumer) +depends on. Keep field names stable; add new fields rather than renaming. +""" +from __future__ import annotations + +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Field + + +class ImageRef(BaseModel): + """One built image ref (1:1 with a CodeCollectionVersion row).""" + + ref: str = Field(..., description="Git ref this build represents (branch/tag).") + ref_type: str = Field(..., description="'branch' | 'tag' | 'release'.") + image_registry: Optional[str] = Field( + None, description="OCI repository, e.g. 'ghcr.io/runwhen-contrib/rw-cli-codecollection'." + ) + image_tag: str = Field(..., description="Concrete OCI tag, pullable verbatim.") + image_digest: Optional[str] = Field( + None, description="sha256 digest when available; pin to this for reproducibility." + ) + commit_hash: Optional[str] = Field( + None, description="Full codecollection commit sha this image was built from." + ) + rt_revision: Optional[str] = Field( + None, description="platform-robot-runtime sha at build time." + ) + image_built_at: Optional[datetime] = None + is_latest: bool = False + is_prerelease: bool = False + is_active: bool = True + synced_at: Optional[datetime] = None + + +class CatalogEntry(BaseModel): + """A single CodeCollection plus its currently-resolved pointers.""" + + slug: str + name: str + git_url: str + visibility: str = Field( + "public", + description=( + "'public' or 'hidden'. PAPI returns both; public-audience surfaces " + "(website/MCP/AI) filter to public only." + ), + ) + latest_image_tag: Optional[str] = None + stable_image_tag: Optional[str] = None + image_registry: Optional[str] = None + last_synced: Optional[datetime] = None + + +class CatalogEntryDetail(CatalogEntry): + """Catalog entry with the full set of known refs attached.""" + + refs: list[ImageRef] = Field(default_factory=list) + + +class ResolveResponse(BaseModel): + """`/resolve` endpoint: ref-or-pointer -> concrete image.""" + + slug: str + requested: str = Field(..., description="The pointer or ref the caller asked for.") + image_tag: str + image_registry: Optional[str] = None + image_digest: Optional[str] = None + commit_hash: Optional[str] = None + rt_revision: Optional[str] = None diff --git a/cc-registry-v2/backend/app/services/github_auth.py b/cc-registry-v2/backend/app/services/github_auth.py new file mode 100644 index 00000000000..46d96e3f9ba --- /dev/null +++ b/cc-registry-v2/backend/app/services/github_auth.py @@ -0,0 +1,182 @@ +""" +GitHub App authentication service. + +Generates short-lived installation access tokens from a GitHub App's +private key. Falls back to the static GITHUB_TOKEN PAT when App +credentials are not configured. +""" +import base64 +import logging +import threading +import time +from typing import Optional + +import httpx +from jose import jwt as jose_jwt + +from app.core.config import settings + +logger = logging.getLogger(__name__) + +GITHUB_API = "https://api.github.com" + + +class GitHubAuth: + """Manages GitHub authentication via App credentials or PAT fallback.""" + + def __init__(self) -> None: + self._private_key: Optional[str] = None + self._app_id: Optional[str] = None + self._installation_id: Optional[int] = settings.GITHUB_APP_INSTALLATION_ID + self._token: Optional[str] = None + self._token_expires_at: float = 0 + self._lock = threading.Lock() + + raw_key = settings.GITHUB_APP_PRIVATE_KEY + if raw_key and settings.GITHUB_APP_ID: + self._app_id = settings.GITHUB_APP_ID + self._private_key = self._decode_key(raw_key) + if self._private_key: + logger.info("GitHub App authentication configured (app_id=%s)", self._app_id) + else: + logger.warning("GITHUB_APP_PRIVATE_KEY could not be decoded; falling back to PAT") + + @property + def is_app_auth(self) -> bool: + return bool(self._app_id and self._private_key) + + @property + def is_configured(self) -> bool: + if self.is_app_auth: + return True + return bool(settings.GITHUB_TOKEN and settings.GITHUB_TOKEN != "your_github_token_here") + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def get_token(self) -> str: + """Return a valid GitHub token (installation token or PAT).""" + if self.is_app_auth: + return self._get_installation_token() + return settings.GITHUB_TOKEN + + def auth_header(self) -> dict: + """Return an Authorization header dict ready for requests.""" + token = self.get_token() + if self.is_app_auth: + return {"Authorization": f"Bearer {token}"} + return {"Authorization": f"token {token}"} + + # ------------------------------------------------------------------ + # Internals + # ------------------------------------------------------------------ + + @staticmethod + def _decode_key(raw: str) -> Optional[str]: + """Accept either a PEM string or a base64-encoded PEM.""" + if raw.startswith("-----BEGIN"): + return raw + try: + decoded = base64.b64decode(raw).decode("utf-8") + if "-----BEGIN" in decoded: + return decoded + except Exception: + pass + logger.error("GITHUB_APP_PRIVATE_KEY is not a valid PEM or base64-encoded PEM") + return None + + def _make_jwt(self) -> str: + """Create a short-lived JWT signed with the App's private key.""" + now = int(time.time()) + payload = { + "iat": now - 60, + "exp": now + (10 * 60), + "iss": self._app_id, + } + return jose_jwt.encode(payload, self._private_key, algorithm="RS256") + + def _discover_installation_id(self, app_jwt: str) -> Optional[int]: + """Find the first installation of the App.""" + try: + resp = httpx.get( + f"{GITHUB_API}/app/installations", + headers={ + "Authorization": f"Bearer {app_jwt}", + "Accept": "application/vnd.github+json", + }, + timeout=15, + ) + if resp.status_code != 200: + logger.error("Failed to list installations: %s %s", resp.status_code, resp.text) + return None + installations = resp.json() + if not installations: + logger.error("No installations found for GitHub App %s", self._app_id) + return None + inst_id = installations[0]["id"] + logger.info("Auto-discovered GitHub App installation_id=%s", inst_id) + return inst_id + except Exception as exc: + logger.error("Error discovering installation: %s", exc) + return None + + def _request_installation_token(self, app_jwt: str, installation_id: int) -> Optional[str]: + """Exchange the JWT for an installation access token.""" + try: + resp = httpx.post( + f"{GITHUB_API}/app/installations/{installation_id}/access_tokens", + headers={ + "Authorization": f"Bearer {app_jwt}", + "Accept": "application/vnd.github+json", + }, + timeout=15, + ) + if resp.status_code != 201: + logger.error("Failed to create installation token: %s %s", resp.status_code, resp.text) + return None + data = resp.json() + self._token_expires_at = time.time() + 3500 + return data["token"] + except Exception as exc: + logger.error("Error requesting installation token: %s", exc) + return None + + def _get_installation_token(self) -> str: + """Return a cached installation token, refreshing if needed.""" + if self._token and time.time() < self._token_expires_at: + return self._token + + with self._lock: + if self._token and time.time() < self._token_expires_at: + return self._token + + app_jwt = self._make_jwt() + + if not self._installation_id: + self._installation_id = self._discover_installation_id(app_jwt) + if not self._installation_id: + raise RuntimeError("Cannot discover GitHub App installation; set GITHUB_APP_INSTALLATION_ID") + + token = self._request_installation_token(app_jwt, self._installation_id) + if not token: + raise RuntimeError("Failed to obtain GitHub App installation token") + + self._token = token + return self._token + + +# --------------------------------------------------------------------------- +# Singleton +# --------------------------------------------------------------------------- +_instance: Optional[GitHubAuth] = None +_singleton_lock = threading.Lock() + + +def get_github_auth() -> GitHubAuth: + global _instance + if _instance is None: + with _singleton_lock: + if _instance is None: + _instance = GitHubAuth() + return _instance diff --git a/cc-registry-v2/backend/app/sources/__init__.py b/cc-registry-v2/backend/app/sources/__init__.py new file mode 100644 index 00000000000..3d2a6140ec9 --- /dev/null +++ b/cc-registry-v2/backend/app/sources/__init__.py @@ -0,0 +1,17 @@ +""" +Image source plugin system. + +Each `ImageSource` implementation knows how to discover the set of built +image refs for a given CodeCollection and pick the `latest` / `stable` +pointers. Built-in sources: + + - oci – polls an OCI Distribution v2 registry (GHCR, GAR, Quay, ECR, ...) + - static – reads a hand-curated JSON file (useful for vendored / signed-off images) + +Add a new source by writing a class that satisfies `ImageSource` and +registering it in `SOURCE_REGISTRY` (see `registry.py`). +""" +from .base import ImageSource, DiscoveredImageRef +from .registry import SOURCE_REGISTRY, get_source + +__all__ = ["ImageSource", "DiscoveredImageRef", "SOURCE_REGISTRY", "get_source"] diff --git a/cc-registry-v2/backend/app/sources/base.py b/cc-registry-v2/backend/app/sources/base.py new file mode 100644 index 00000000000..3396d933b39 --- /dev/null +++ b/cc-registry-v2/backend/app/sources/base.py @@ -0,0 +1,80 @@ +""" +ImageSource abstract base + DiscoveredImageRef value object. + +The image-sync task drives sources in three phases: + + refs = source.discover_refs(cc) + latest = source.resolve_latest(cc, refs) + stable = source.resolve_stable(cc, refs) + +`discover_refs` is the only mandatory remote call; the resolvers should be +pure functions over the discovered list so they're easy to unit test. +""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + + +@dataclass(frozen=True) +class DiscoveredImageRef: + """ + One concrete image build that a source has found for a CodeCollection. + + A "ref" maps 1:1 to an OCI image tag we will track in the catalog. For + example a CC built from `main` at commit `c1a2b3d…` against a runtime + at `e4f5a6b…` produces a single `DiscoveredImageRef`: + + ref = "main" + ref_type = "branch" + commit = "c1a2b3d…" + rt_revision = "e4f5a6b…" + image_tag = "main-c1a2b3d-e4f5a6b" + """ + ref: str # git ref this build represents (branch/tag name) + ref_type: str # "branch" | "tag" | "release" + commit: str # full sha of the codecollection commit + rt_revision: str # full sha of the runtime used at build time + image_tag: str # concrete OCI tag (pullable) + image_digest: Optional[str] = None # sha256:... when available from manifest + built_at: Optional[datetime] = None + extra: dict = field(default_factory=dict) # source-specific overflow (free form) + + +class ImageSource(ABC): + """ + Abstract source of CodeCollection image metadata. + + Subclasses must be safe to call on a schedule from a Celery worker: + no global mutable state, network errors should raise rather than + swallow so the sync task can record them. + """ + + name: str # unique key registered in SOURCE_REGISTRY (e.g. "oci") + + @abstractmethod + def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: + """ + Return every image build currently known for this CodeCollection. + + `cc` is the raw mapping from `codecollections.yaml` (slug, git_url, + image_registry, image_source, etc.) so a source can read any + source-specific fields it needs without a separate config layer. + """ + ... + + @abstractmethod + def resolve_latest( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + """Return the image_tag that should be considered `latest`, or None.""" + ... + + @abstractmethod + def resolve_stable( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + """Return the image_tag that should be considered `stable`, or None.""" + ... diff --git a/cc-registry-v2/backend/app/sources/oci.py b/cc-registry-v2/backend/app/sources/oci.py new file mode 100644 index 00000000000..06f5818fdfb --- /dev/null +++ b/cc-registry-v2/backend/app/sources/oci.py @@ -0,0 +1,220 @@ +""" +OCI Distribution v2 image source. + +Lists every tag in a public OCI repository and shapes each into a +`DiscoveredImageRef`. The expected tag schema matches the one emitted by +the codecollection build workflows in this design: + + -- + +For example: + + main-c1a2b3d-e4f5a6b + pr-42-9988aabb-e4f5a6b + v1.2.0-aabbccd-e4f5a6b + +`latest` resolution: among tags whose ref-portion is `main`, pick the +newest (by manifest `created` if available, otherwise the lexicographically +last — tags are time-monotonic given the sha suffix). + +`stable` resolution: prefer the highest semver-looking ref (`v\\d+...`) if +one exists; otherwise fall back to `latest`. + +NOTE: this source intentionally treats the registry as the source of +truth. It never mutates the registry; the cc-registry-v2 catalog is a +read-only mirror that powers PAPI lookups. +""" +from __future__ import annotations + +import logging +import re +from datetime import datetime, timezone +from typing import Optional + +import requests + +from .base import DiscoveredImageRef, ImageSource + +logger = logging.getLogger(__name__) + + +# Tag schema: --. The ref portion may itself contain +# hyphens (e.g. "pr-42"), so we anchor on the two trailing 7-char sha groups. +TAG_PATTERN = re.compile( + r"^(?P.+?)-(?P[0-9a-f]{7,40})-(?P[0-9a-f]{7,40})$" +) + +SEMVER_TAG = re.compile(r"^v?\d+\.\d+(\.\d+)?") + + +class OCISource(ImageSource): + name = "oci" + + def __init__(self, timeout: float = 10.0, max_pages: int = 50): + # Defensive caps: a single CC shouldn't paginate forever, and + # individual HTTP calls shouldn't hang a Celery worker. + self.timeout = timeout + self.max_pages = max_pages + + # ------------------------------------------------------------------ + # public API + # ------------------------------------------------------------------ + def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: + registry_url = cc.get("image_registry") + if not registry_url: + logger.warning( + "oci source skipping %s: no image_registry configured", + cc.get("slug"), + ) + return [] + + host, repo = self._split_registry_url(registry_url) + tags = self._list_tags(host, repo) + + discovered: list[DiscoveredImageRef] = [] + for tag in tags: + ref = self._parse_tag(tag) + if ref is None: + continue + discovered.append(ref) + logger.info( + "oci source: %s -> %d tags, %d matched build schema", + cc.get("slug"), + len(tags), + len(discovered), + ) + return discovered + + def resolve_latest( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + # Prefer the newest build of the configured default branch. + default_ref = cc.get("default_ref", "main") + candidates = [r for r in refs if r.ref == default_ref] + if not candidates: + return None + candidates.sort( + key=lambda r: (r.built_at or datetime.min.replace(tzinfo=timezone.utc), r.image_tag) + ) + return candidates[-1].image_tag + + def resolve_stable( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + # Highest semver-looking ref wins; fall back to latest. + semver_refs = [r for r in refs if SEMVER_TAG.match(r.ref)] + if semver_refs: + semver_refs.sort(key=lambda r: self._semver_key(r.ref)) + return semver_refs[-1].image_tag + return self.resolve_latest(cc, refs) + + # ------------------------------------------------------------------ + # helpers + # ------------------------------------------------------------------ + @staticmethod + def _split_registry_url(url: str) -> tuple[str, str]: + """ + "ghcr.io/runwhen-contrib/rw-cli-codecollection" + -> ("ghcr.io", "runwhen-contrib/rw-cli-codecollection") + """ + url = url.strip().rstrip("/") + if "/" not in url: + raise ValueError(f"image_registry must include a repo path: {url}") + host, _, repo = url.partition("/") + return host, repo + + def _list_tags(self, host: str, repo: str) -> list[str]: + """Walk the v2 tags endpoint with Link-header pagination.""" + url = f"https://{host}/v2/{repo}/tags/list" + params = {"n": 200} + all_tags: list[str] = [] + for _ in range(self.max_pages): + resp = self._get_with_token(host, repo, url, params) + resp.raise_for_status() + payload = resp.json() + all_tags.extend(payload.get("tags") or []) + link = resp.headers.get("Link") or "" + next_url = self._parse_next_link(link, host) + if not next_url: + break + url, params = next_url, {} + return all_tags + + def _get_with_token(self, host: str, repo: str, url: str, params: dict): + """ + Some public registries (GHCR, Docker Hub) require an anonymous + bearer token even for public reads. Handle the 401 -> token -> + retry dance once. + """ + resp = requests.get(url, params=params, timeout=self.timeout) + if resp.status_code != 401: + return resp + + www_auth = resp.headers.get("WWW-Authenticate", "") + m = re.search(r'Bearer realm="([^"]+)"', www_auth) + realm = m.group(1) if m else None + if not realm: + return resp # nothing we can do, let caller raise + service_match = re.search(r'service="([^"]+)"', www_auth) + token_params = { + "scope": f"repository:{repo}:pull", + } + if service_match: + token_params["service"] = service_match.group(1) + token_resp = requests.get(realm, params=token_params, timeout=self.timeout) + token_resp.raise_for_status() + token = token_resp.json().get("token") or token_resp.json().get("access_token") + if not token: + return resp + return requests.get( + url, + params=params, + timeout=self.timeout, + headers={"Authorization": f"Bearer {token}"}, + ) + + @staticmethod + def _parse_next_link(link_header: str, host: str) -> Optional[str]: + # Link: ; rel="next" + m = re.search(r'<([^>]+)>;\s*rel="next"', link_header or "") + if not m: + return None + path = m.group(1) + if path.startswith("http"): + return path + return f"https://{host}{path}" + + @staticmethod + def _parse_tag(tag: str) -> Optional[DiscoveredImageRef]: + m = TAG_PATTERN.match(tag) + if not m: + return None + ref = m.group("ref") + return DiscoveredImageRef( + ref=ref, + ref_type=_classify_ref(ref), + commit=m.group("cc_sha"), + rt_revision=m.group("rt_sha"), + image_tag=tag, + ) + + @staticmethod + def _semver_key(ref: str) -> tuple: + # Cheap semver sort key; non-numeric suffixes sort last. + ref = ref.lstrip("v") + parts = re.split(r"[.\-+]", ref) + key: list = [] + for p in parts: + if p.isdigit(): + key.append((0, int(p))) + else: + key.append((1, p)) + return tuple(key) + + +def _classify_ref(ref: str) -> str: + if ref.startswith("pr-"): + return "branch" + if SEMVER_TAG.match(ref): + return "tag" + return "branch" diff --git a/cc-registry-v2/backend/app/sources/registry.py b/cc-registry-v2/backend/app/sources/registry.py new file mode 100644 index 00000000000..795c702766e --- /dev/null +++ b/cc-registry-v2/backend/app/sources/registry.py @@ -0,0 +1,62 @@ +""" +Source registry / loader. + +Built-in sources live in this package and are loaded eagerly. + +Third-party / customer-specific sources can be registered via the +`CC_REGISTRY_EXTRA_SOURCES` environment variable, a colon-separated list of +import paths to modules that expose a top-level `SOURCE` instance. This +lets self-hosted operators plug in custom discovery logic (e.g. an internal +Harbor with non-standard tag schemas) without forking the catalog. + +Example: + + CC_REGISTRY_EXTRA_SOURCES=mycorp.harbor:mycorp.gerrit +""" +from __future__ import annotations + +import importlib +import logging +import os +from typing import Dict + +from .base import ImageSource +from .oci import OCISource +from .static import StaticSource + +logger = logging.getLogger(__name__) + +SOURCE_REGISTRY: Dict[str, ImageSource] = { + OCISource.name: OCISource(), + StaticSource.name: StaticSource(), +} + + +def _load_extra_sources() -> None: + paths = os.environ.get("CC_REGISTRY_EXTRA_SOURCES", "").strip() + if not paths: + return + for module_path in paths.split(":"): + module_path = module_path.strip() + if not module_path: + continue + try: + mod = importlib.import_module(module_path) + source = getattr(mod, "SOURCE", None) + if not isinstance(source, ImageSource): + logger.warning( + "extra source %s did not expose a SOURCE: ImageSource", + module_path, + ) + continue + SOURCE_REGISTRY[source.name] = source + logger.info("registered extra image source: %s", source.name) + except Exception: # pragma: no cover - defensive logging + logger.exception("failed to load extra image source %s", module_path) + + +_load_extra_sources() + + +def get_source(name: str) -> ImageSource | None: + return SOURCE_REGISTRY.get(name) diff --git a/cc-registry-v2/backend/app/sources/static.py b/cc-registry-v2/backend/app/sources/static.py new file mode 100644 index 00000000000..e65b65bee62 --- /dev/null +++ b/cc-registry-v2/backend/app/sources/static.py @@ -0,0 +1,117 @@ +""" +Static JSON image source. + +Useful for: + + - Customer self-hosted catalogs where image discovery happens in the + customer's own pipeline and is dropped into a checked-in JSON file. + - Tests / fixtures. + - Pinning a CC to a known-good set of refs without polling. + +Expected file shape: + + { + "default_ref": "main", + "stable_ref": "v1.2.0", + "refs": [ + { + "ref": "main", + "ref_type": "branch", + "commit": "c1a2b3d…", + "rt_revision": "e4f5a6b…", + "image_tag": "main-c1a2b3d-e4f5a6b", + "image_digest": "sha256:…", // optional + "built_at": "2026-05-11T20:00:00Z" // optional + }, + ... + ] + } + +The CC entry in `codecollections.yaml` points at the file via `static_path`. +""" +from __future__ import annotations + +import json +import logging +import os +from datetime import datetime +from typing import Optional + +from .base import DiscoveredImageRef, ImageSource + +logger = logging.getLogger(__name__) + + +class StaticSource(ImageSource): + name = "static" + + def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: + path = cc.get("static_path") + if not path or not os.path.exists(path): + logger.warning( + "static source skipping %s: static_path missing (%r)", + cc.get("slug"), + path, + ) + return [] + with open(path, "r") as f: + payload = json.load(f) + + refs: list[DiscoveredImageRef] = [] + for entry in payload.get("refs", []): + built_at = entry.get("built_at") + built_at_dt: Optional[datetime] = None + if built_at: + try: + built_at_dt = datetime.fromisoformat(built_at.replace("Z", "+00:00")) + except ValueError: + built_at_dt = None + refs.append( + DiscoveredImageRef( + ref=entry["ref"], + ref_type=entry.get("ref_type", "branch"), + commit=entry["commit"], + rt_revision=entry["rt_revision"], + image_tag=entry["image_tag"], + image_digest=entry.get("image_digest"), + built_at=built_at_dt, + ) + ) + # Stash the explicit pointers for the resolvers. + return [ + DiscoveredImageRef( + ref=r.ref, + ref_type=r.ref_type, + commit=r.commit, + rt_revision=r.rt_revision, + image_tag=r.image_tag, + image_digest=r.image_digest, + built_at=r.built_at, + extra={ + "default_ref": payload.get("default_ref", "main"), + "stable_ref": payload.get("stable_ref"), + }, + ) + for r in refs + ] + + def resolve_latest( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + if not refs: + return None + default_ref = refs[0].extra.get("default_ref", "main") + matches = [r for r in refs if r.ref == default_ref] + return matches[-1].image_tag if matches else None + + def resolve_stable( + self, cc: dict, refs: list[DiscoveredImageRef] + ) -> Optional[str]: + if not refs: + return None + stable_ref = refs[0].extra.get("stable_ref") + if stable_ref: + for r in refs: + if r.ref == stable_ref: + return r.image_tag + return self.resolve_latest(cc, refs) diff --git a/cc-registry-v2/backend/app/tasks/celery_app.py b/cc-registry-v2/backend/app/tasks/celery_app.py index 69d371b74b3..9bfa001cfeb 100644 --- a/cc-registry-v2/backend/app/tasks/celery_app.py +++ b/cc-registry-v2/backend/app/tasks/celery_app.py @@ -71,6 +71,7 @@ def _configure_broker_url(): "app.tasks.workflow_tasks", "app.tasks.analytics_tasks", "app.tasks.indexing_tasks", + "app.tasks.image_sync_tasks", ] ) diff --git a/cc-registry-v2/backend/app/tasks/image_sync_tasks.py b/cc-registry-v2/backend/app/tasks/image_sync_tasks.py new file mode 100644 index 00000000000..facec3a031c --- /dev/null +++ b/cc-registry-v2/backend/app/tasks/image_sync_tasks.py @@ -0,0 +1,203 @@ +""" +Image-tag sync tasks. + +Periodically reads `codecollections.yaml`, asks each CC's configured +`ImageSource` for every known build, and upserts `CodeCollectionVersion` +rows so PAPI (and any other consumer) can resolve refs to concrete image +tags without ever talking to a git server or running a CRD reconciler. + +Design notes: + + - This task is the single writer for image metadata in the catalog. It + is intentionally idempotent: re-running it converges the DB onto + whatever the OCI registry reports, including marking gone-from-registry + versions inactive. + - It does NOT push to any registry. The registry remains the source of + truth for whether an image exists. + - It runs on a regular celery-beat schedule (see schedules.yaml) and is + also exposed manually via the admin/task UI for on-demand refreshes. +""" +from __future__ import annotations + +import logging +import os +from datetime import datetime +from typing import Optional + +import yaml + +from app.core.database import SessionLocal +from app.models import CodeCollection +from app.models.version import CodeCollectionVersion +from app.sources import DiscoveredImageRef, get_source +from app.tasks.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +def _load_codecollections_yaml() -> list[dict]: + """Locate codecollections.yaml in the same order other tasks do.""" + candidate_paths = [ + "/app/codecollections.yaml", + os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "..", + "codecollections.yaml", + ), + "/workspaces/codecollection-registry/codecollections.yaml", + ] + for path in candidate_paths: + if os.path.exists(path): + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + return data.get("codecollections", []) or [] + logger.error("codecollections.yaml not found in any known location") + return [] + + +@celery_app.task(bind=True, name="app.tasks.image_sync_tasks.sync_image_tags_task") +def sync_image_tags_task(self): + """ + For every CC with an `image_source` configured, discover its image + refs and upsert one CodeCollectionVersion row per ref. + """ + logger.info("Starting sync_image_tags_task %s", self.request.id) + + collections = _load_codecollections_yaml() + summary = { + "collections_processed": 0, + "refs_upserted": 0, + "refs_deactivated": 0, + "errors": [], + } + + db = SessionLocal() + try: + for cc_yaml in collections: + source_name = cc_yaml.get("image_source") + if not source_name: + continue # CC opted out of image tracking + + slug = cc_yaml.get("slug") + if not slug: + logger.warning("Skipping CC without slug: %s", cc_yaml) + continue + + cc_row = ( + db.query(CodeCollection) + .filter(CodeCollection.slug == slug) + .first() + ) + if not cc_row: + # Image sync runs after collection sync, so a missing row + # almost always means the YAML edit hasn't reached the DB + # yet — bail rather than create a half-formed row. + logger.warning( + "Skipping image sync for %s: collection not yet in DB", slug + ) + continue + + source = get_source(source_name) + if source is None: + summary["errors"].append( + {"slug": slug, "error": f"unknown image_source {source_name!r}"} + ) + continue + + try: + refs = source.discover_refs(cc_yaml) + latest_tag = source.resolve_latest(cc_yaml, refs) + stable_tag = source.resolve_stable(cc_yaml, refs) + except Exception as exc: # pragma: no cover - logged for ops + logger.exception("source %s failed for %s", source_name, slug) + summary["errors"].append({"slug": slug, "error": str(exc)}) + continue + + upserted, deactivated = _upsert_versions( + db, + cc_row, + cc_yaml.get("image_registry"), + refs, + latest_tag, + stable_tag, + ) + db.commit() + summary["collections_processed"] += 1 + summary["refs_upserted"] += upserted + summary["refs_deactivated"] += deactivated + + logger.info("sync_image_tags_task finished: %s", summary) + return {"status": "success", **summary} + finally: + db.close() + + +def _upsert_versions( + db, + cc_row: CodeCollection, + image_registry: Optional[str], + refs: list[DiscoveredImageRef], + latest_tag: Optional[str], + stable_tag: Optional[str], +) -> tuple[int, int]: + """ + Mirror the discovered refs onto codecollection_versions. + + Strategy: + - Each (cc, ref) maps to a CodeCollectionVersion keyed by version_name=ref. + - Versions that exist in the DB but no longer appear in the source are + marked is_active=False (we keep the row for history rather than + deleting it — PAPI may still reference a now-gone image). + - `is_latest` is set ONLY on the latest-tag row; `is_prerelease` is + flipped off the stable row. + """ + upserted = 0 + deactivated = 0 + now = datetime.utcnow() + + refs_by_name = {r.ref: r for r in refs} + + existing_versions = ( + db.query(CodeCollectionVersion) + .filter(CodeCollectionVersion.codecollection_id == cc_row.id) + .all() + ) + existing_by_name = {v.version_name: v for v in existing_versions} + + # Deactivate rows that no longer appear in the source. + for name, row in existing_by_name.items(): + if name not in refs_by_name and row.is_active: + row.is_active = False + row.updated_at = now + deactivated += 1 + + # Upsert each discovered ref. + for name, ref in refs_by_name.items(): + is_latest_row = (latest_tag is not None and ref.image_tag == latest_tag) + is_stable_row = (stable_tag is not None and ref.image_tag == stable_tag) + row = existing_by_name.get(name) + if row is None: + row = CodeCollectionVersion( + codecollection_id=cc_row.id, + version_name=name, + git_ref=name, + display_name=name, + version_type=ref.ref_type, + ) + db.add(row) + + row.image_registry = image_registry + row.image_tag = ref.image_tag + row.image_digest = ref.image_digest + row.commit_hash = ref.commit + row.rt_revision = ref.rt_revision + row.image_built_at = ref.built_at + row.is_active = True + row.is_latest = is_latest_row + # Treat anything that isn't the stable pointer (and isn't semver) as a prerelease. + row.is_prerelease = not (is_stable_row or ref.ref_type == "tag") + row.synced_at = now + row.updated_at = now + upserted += 1 + + return upserted, deactivated diff --git a/cc-registry-v2/backend/app/tasks/registry_tasks.py b/cc-registry-v2/backend/app/tasks/registry_tasks.py index 9e4707d1a9c..bb4e774232f 100644 --- a/cc-registry-v2/backend/app/tasks/registry_tasks.py +++ b/cc-registry-v2/backend/app/tasks/registry_tasks.py @@ -82,6 +82,16 @@ def sync_all_collections_task(self): CodeCollection.slug == collection_slug ).first() + # Visibility defaults to 'public' if omitted; only ever + # take on the values declared in YAML so a CC can be + # toggled hidden/public by re-deploying config alone. + visibility = collection_data.get('visibility', 'public') + if visibility not in ('public', 'hidden'): + logger.warning( + f"Unknown visibility {visibility!r} for {collection_slug}, defaulting to 'public'" + ) + visibility = 'public' + if not collection: collection = CodeCollection( name=collection_data.get('name', collection_slug), @@ -92,16 +102,18 @@ def sync_all_collections_task(self): owner_email=collection_data.get('owner_email', ''), owner_icon=collection_data.get('owner_icon', ''), git_ref=collection_data.get('git_ref', 'main'), + visibility=visibility, is_active=True ) db.add(collection) - logger.info(f"Created collection: {collection_slug}") + logger.info(f"Created collection: {collection_slug} (visibility={visibility})") else: collection.name = collection_data.get('name', collection_slug) collection.git_url = git_url collection.description = collection_data.get('description', '') + collection.visibility = visibility collection.is_active = True - logger.info(f"Updated collection: {collection_slug}") + logger.info(f"Updated collection: {collection_slug} (visibility={visibility})") db.commit() collections_synced += 1 diff --git a/cc-registry-v2/frontend/src/App.tsx b/cc-registry-v2/frontend/src/App.tsx index 5777e1d3e8e..7a93231db9f 100644 --- a/cc-registry-v2/frontend/src/App.tsx +++ b/cc-registry-v2/frontend/src/App.tsx @@ -17,6 +17,7 @@ import Login from './pages/Login'; import ConfigBuilder from './pages/ConfigBuilder'; import Chat from './pages/Chat'; import ChatDebug from './pages/ChatDebug'; +import IntakeWizard from './pages/IntakeWizard'; import Footer from './components/Footer'; import { CartProvider } from './contexts/CartContext'; import { AuthProvider } from './contexts/AuthContext'; @@ -38,6 +39,7 @@ function AppContent() { } /> } /> } /> + } /> } /> } /> diff --git a/cc-registry-v2/frontend/src/components/Header.tsx b/cc-registry-v2/frontend/src/components/Header.tsx index 6548f574b1c..9e62ce0249e 100644 --- a/cc-registry-v2/frontend/src/components/Header.tsx +++ b/cc-registry-v2/frontend/src/components/Header.tsx @@ -9,6 +9,7 @@ import { Menu, MenuItem, Divider, + Tooltip, } from '@mui/material'; import { Link, useLocation, useNavigate } from 'react-router-dom'; import { @@ -17,6 +18,7 @@ import { MoreVert as MoreVertIcon, DarkMode as DarkModeIcon, LightMode as LightModeIcon, + ChatBubbleOutline as ChatIcon, } from '@mui/icons-material'; import { useCart } from '../contexts/CartContext'; import { useAuth } from '../contexts/AuthContext'; @@ -126,7 +128,22 @@ const Header: React.FC = () => { - + + + + + {/* Browse Dropdown */} diff --git a/cc-registry-v2/frontend/src/pages/Chat.tsx b/cc-registry-v2/frontend/src/pages/Chat.tsx index c06fae03f0e..df6485ead0e 100644 --- a/cc-registry-v2/frontend/src/pages/Chat.tsx +++ b/cc-registry-v2/frontend/src/pages/Chat.tsx @@ -12,10 +12,6 @@ import { Link, Avatar, Button, - Dialog, - DialogTitle, - DialogContent, - DialogActions } from '@mui/material'; import { Send as SendIcon, @@ -30,8 +26,8 @@ import { } from '@mui/icons-material'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; -import { useLocation } from 'react-router-dom'; -import { chatApi, githubApi, ChatResponse, ExampleQueries, TaskRequestIssue } from '../services/api'; +import { useLocation, useNavigate } from 'react-router-dom'; +import { chatApi, ChatResponse, ExampleQueries } from '../services/api'; interface ChatMessage { id: string; @@ -45,6 +41,7 @@ interface ChatMessage { const Chat: React.FC = () => { const location = useLocation(); + const navigate = useNavigate(); const [messages, setMessages] = useState([]); const [inputValue, setInputValue] = useState(''); const [loading, setLoading] = useState(false); @@ -53,10 +50,6 @@ const Chat: React.FC = () => { const [examples, setExamples] = useState(null); const [copiedId, setCopiedId] = useState(null); const [expandedTasks, setExpandedTasks] = useState>(new Set()); - const [requestDialogOpen, setRequestDialogOpen] = useState(false); - const [requestContext, setRequestContext] = useState(''); - const [currentRequestQuery, setCurrentRequestQuery] = useState(''); - const [submittingRequest, setSubmittingRequest] = useState(false); const [initialQueryProcessed, setInitialQueryProcessed] = useState(false); const [codebundleContext, setCodebundleContext] = useState(null); const messagesEndRef = useRef(null); @@ -240,43 +233,8 @@ const Chat: React.FC = () => { } }; - const handleOpenRequestDialog = (userQuery: string) => { - setCurrentRequestQuery(userQuery); - setRequestContext(''); - setRequestDialogOpen(true); - }; - - const handleCloseRequestDialog = () => { - setRequestDialogOpen(false); - setRequestContext(''); - setCurrentRequestQuery(''); - }; - - const handleSubmitRequest = async () => { - setSubmittingRequest(true); - try { - // Combine the original query with additional context - const fullQuery = requestContext.trim() - ? `${currentRequestQuery}\n\nAdditional context: ${requestContext}` - : currentRequestQuery; - - const template = await githubApi.getIssueTemplate(fullQuery); - const issueData: TaskRequestIssue = { - user_query: template.user_query, - task_description: template.task_description, - use_case: template.use_case, - platform: template.platform, - priority: template.priority - }; - const result = await githubApi.createTaskRequest(issueData); - window.open(result.issue_url, '_blank'); - handleCloseRequestDialog(); - } catch (error: any) { - console.error('Error creating GitHub issue:', error); - alert(error.response?.data?.detail || 'Error creating GitHub issue'); - } finally { - setSubmittingRequest(false); - } + const handleRequestCodeBundle = (userQuery: string) => { + navigate('/intake', { state: { initialQuery: userQuery } }); }; const handleKeyPress = (event: React.KeyboardEvent) => { @@ -559,7 +517,7 @@ const Chat: React.FC = () => { size="small" variant="outlined" startIcon={} - onClick={() => handleOpenRequestDialog(message.userQuery || message.content)} + onClick={() => handleRequestCodeBundle(message.userQuery || message.content)} sx={{ textTransform: 'none', fontSize: '0.75rem', @@ -624,7 +582,7 @@ const Chat: React.FC = () => { variant="contained" size="small" startIcon={} - onClick={() => handleOpenRequestDialog(message.userQuery || message.content)} + onClick={() => handleRequestCodeBundle(message.userQuery || message.content)} sx={{ backgroundColor: 'primary.main', color: 'white', @@ -845,74 +803,6 @@ const Chat: React.FC = () => { - {/* Request CodeBundle Dialog */} - - - Request a CodeBundle - - - - - Your original query: - - - - {currentRequestQuery} - - - - - Add more context to help us understand your needs (optional): - - setRequestContext(e.target.value)} - placeholder="e.g., I need this to work with AWS EKS clusters, integrate with PagerDuty alerts, and run every 5 minutes..." - variant="outlined" - /> - - - This will create a GitHub issue with your request. The more details you provide, the better we can help! - - - - - - - - ); }; diff --git a/cc-registry-v2/frontend/src/pages/IntakeWizard.tsx b/cc-registry-v2/frontend/src/pages/IntakeWizard.tsx new file mode 100644 index 00000000000..08611da262e --- /dev/null +++ b/cc-registry-v2/frontend/src/pages/IntakeWizard.tsx @@ -0,0 +1,310 @@ +import React, { useState, useEffect } from 'react'; +import { useLocation } from 'react-router-dom'; +import { + Box, Container, Typography, TextField, Button, Paper, + CircularProgress, Alert, Collapse, FormControlLabel, Checkbox, + ToggleButton, ToggleButtonGroup, +} from '@mui/material'; +import { + CheckCircle as CheckIcon, + OpenInNew as ExternalLinkIcon, + ExpandMore as ExpandMoreIcon, + ExpandLess as ExpandLessIcon, + RocketLaunch as RocketIcon, + Chat as ChatIcon, + Tune as TuneIcon, +} from '@mui/icons-material'; +import { intakeApi, IntakeSearchMatch, IntakeExistingRequest } from '../services/api'; + +type Mode = 'simple' | 'explicit'; + +export default function IntakeWizard() { + const location = useLocation(); + const [mode, setMode] = useState('simple'); + + // Core answers (question-driven) + const [problemDescription, setProblemDescription] = useState(''); + const [platform, setPlatform] = useState(''); + const [healthyLooksLike, setHealthyLooksLike] = useState(''); + const [anythingElse, setAnythingElse] = useState(''); + + // Explicit mode only + const [explicitTasks, setExplicitTasks] = useState(''); + const [explicitVariables, setExplicitVariables] = useState(''); + + // Meta + const [contactEmail, setContactEmail] = useState(''); + const [contactOk, setContactOk] = useState(false); + const [showContact, setShowContact] = useState(false); + + const [submitting, setSubmitting] = useState(false); + const [submitResult, setSubmitResult] = useState<{ url: string; number: number } | null>(null); + const [error, setError] = useState(''); + + // Pre-fill from Chat "Request CodeBundle" or other entry points + useEffect(() => { + const state = location.state as { initialQuery?: string } | null; + if (state?.initialQuery?.trim()) { + setProblemDescription(state.initialQuery.trim()); + } + }, [location.state]); + + const buildTitle = () => { + const firstLine = problemDescription.split('\n')[0].trim(); + return firstLine.length > 60 ? firstLine.slice(0, 57) + '...' : firstLine || 'CodeBundle request'; + }; + + const buildDescription = () => { + const parts: string[] = [problemDescription]; + if (platform.trim()) parts.push(`\n**Platform:** ${platform.trim()}`); + if (healthyLooksLike.trim()) parts.push(`\n**What healthy looks like:** ${healthyLooksLike.trim()}`); + if (anythingElse.trim()) parts.push(`\n**Additional context:** ${anythingElse.trim()}`); + if (mode === 'explicit' && (explicitTasks.trim() || explicitVariables.trim())) { + if (explicitTasks.trim()) parts.push(`\n**Suggested tasks:**\n${explicitTasks.trim()}`); + if (explicitVariables.trim()) parts.push(`\n**Variables/config:**\n${explicitVariables.trim()}`); + } + return parts.join('\n'); + }; + + const handleSubmit = async () => { + setSubmitting(true); + setError(''); + try { + const description = buildDescription(); + let matches: IntakeSearchMatch[] = []; + let existingRequests: IntakeExistingRequest[] = []; + try { + const searchRes = await intakeApi.search(description); + matches = searchRes.matches; + existingRequests = searchRes.existing_requests; + } catch { + // Search failed — continue + } + + const res = await intakeApi.submit({ + title: buildTitle(), + description, + extra_context: undefined, + contact_email: contactEmail.trim() || undefined, + contact_ok: contactOk, + matches, + existing_requests: existingRequests, + }); + setSubmitResult({ url: res.issue_url, number: res.issue_number }); + } catch (err: any) { + setError(err?.response?.data?.detail || 'Failed to create request. Please try again.'); + } finally { + setSubmitting(false); + } + }; + + const canSubmit = problemDescription.trim().length > 0; + + if (submitResult) { + return ( + + + + + Request Submitted + + + Issue #{submitResult.number} has been created. The designer will review + your request and any existing coverage we found. + + + + + + ); + } + + return ( + + + + Request a CodeBundle + + + Describe the problem you're solving. The designer will figure out the rest. + + + + {error && ( + setError('')}> + {error} + + )} + + + {/* Mode toggle */} + + v && setMode(v)} + size="small" + > + + + Simple + + + + Explicit + + + + {mode === 'simple' + ? 'Describe naturally — the designer has autonomy' + : 'Specify tasks and variables if you know them'} + + + + {/* Question 1: Core */} + + What problem are you trying to solve? + + + Describe the infrastructure task, health check, or troubleshooting scenario in your own words. + + setProblemDescription(e.target.value)} + sx={{ mb: 3 }} + /> + + {/* Question 2: Platform */} + + What platform or infrastructure does this involve? + + + Optional — we can often infer this from your description. + + setPlatform(e.target.value)} + sx={{ mb: 3 }} + /> + + {/* Question 3: Healthy */} + + What does "healthy" or "working" look like? How would you know something is wrong? + + + Optional — helps the designer scope the checks. + + setHealthyLooksLike(e.target.value)} + sx={{ mb: 3 }} + /> + + {/* Question 4: Anything else */} + + Anything else the designer should know? + + setAnythingElse(e.target.value)} + sx={{ mb: 3 }} + /> + + {/* Explicit mode: tasks and variables */} + + + + Only fill these if you already know the structure. Otherwise leave blank. + + setExplicitTasks(e.target.value)} + sx={{ mb: 2 }} + /> + setExplicitVariables(e.target.value)} + /> + + + + {/* Contact (collapsible) */} + + + + + setContactEmail(e.target.value)} + sx={{ mb: 1 }} + /> + setContactOk(e.target.checked)} />} + label="It's OK to reach out for clarification" + /> + + + + + + + We search existing CodeBundles first, then create your request with the results attached for the designer. + + + + ); +} diff --git a/cc-registry-v2/frontend/src/services/api.ts b/cc-registry-v2/frontend/src/services/api.ts index 773bd7f5664..39b8fa85f2a 100644 --- a/cc-registry-v2/frontend/src/services/api.ts +++ b/cc-registry-v2/frontend/src/services/api.ts @@ -1031,6 +1031,69 @@ export const chatApi = { } }; +// ============================================================================= +// Intake Wizard API +// ============================================================================= + +export interface IntakeSearchMatch { + display_name: string; + slug: string; + collection_slug: string; + platform: string; + description: string; + tasks: string[]; + tags: string[]; + relevance_score: number; + source_url: string; +} + +export interface IntakeExistingRequest { + number: number; + title: string; + url: string; + created_at: string; +} + +export interface IntakeSearchResponse { + matches: IntakeSearchMatch[]; + existing_requests: IntakeExistingRequest[]; + suggested_platform: string; + query_used: string; +} + +export interface IntakeSubmitRequest { + title: string; + description: string; + extra_context?: string; + contact_email?: string; + contact_ok?: boolean; + matches: IntakeSearchMatch[]; + existing_requests: IntakeExistingRequest[]; +} + +export interface IntakeSubmitResponse { + issue_url: string; + issue_number: number; + message: string; +} + +export const intakeApi = { + async getPlatforms(): Promise<{ platforms: string[] }> { + const response = await api.get('/intake/platforms'); + return response.data; + }, + + async search(description: string, platform?: string): Promise { + const response = await api.post('/intake/search', { description, platform }); + return response.data; + }, + + async submit(payload: IntakeSubmitRequest): Promise { + const response = await api.post('/intake/submit', payload); + return response.data; + }, +}; + export const githubApi = { // Get issue template for a query async getIssueTemplate(userQuery: string): Promise { diff --git a/cc-registry-v2/schedules.yaml b/cc-registry-v2/schedules.yaml index cfbe388c2bf..74ba1146412 100644 --- a/cc-registry-v2/schedules.yaml +++ b/cc-registry-v2/schedules.yaml @@ -73,6 +73,23 @@ schedules: minute: 0 enabled: false + # ============================================================================= + # IMAGE CATALOG SYNC (PAPI-facing CodeCollection image tracking) + # ============================================================================= + # Polls each CodeCollection's configured `image_source` (e.g. its OCI + # registry) and mirrors the discovered image tags into + # codecollection_versions. PAPI reads from this catalog instead of + # running the corestate-operator. The task is idempotent and fast (one + # HTTP listing per CC), so we run it frequently. + + - name: sync-image-tags + task: app.tasks.image_sync_tasks.sync_image_tags_task + description: "Poll OCI registries for each CC and refresh the image catalog" + schedule_type: interval + interval: + minutes: 5 + enabled: true + # ============================================================================= # METRICS & STATISTICS # ============================================================================= diff --git a/codecollections.yaml b/codecollections.yaml index e0d4e37e4de..8bb304653f4 100644 --- a/codecollections.yaml +++ b/codecollections.yaml @@ -1,3 +1,17 @@ +# ---------------------------------------------------------------------- +# Registry-tracked CodeCollections. +# +# Optional fields: +# visibility - 'public' (default) or 'hidden'. Hidden CCs are still +# synced and their image tags tracked for PAPI, but they +# are excluded from public-facing endpoints (website, +# MCP, AI search). 'hidden' is a UX toggle; OCI registry +# ACLs remain the source of truth for image access. +# image_source - which ImageSource plugin to drive ('oci' | 'static' +# | custom). Omit to skip image tracking entirely. +# image_registry - OCI repo path used by the 'oci' source. +# default_ref - branch the 'latest' pointer should follow (default 'main'). +# ---------------------------------------------------------------------- codecollections: - name: RunWhen Public CodeCollection slug: rw-public-codecollection @@ -7,6 +21,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: Python based CodeCollections that do not leverage a command line binary or bash script + image_source: oci + image_registry: ghcr.io/runwhen-contrib/rw-public-codecollection - name: RunWhen CLI CodeCollection slug: rw-cli-codecollection git_url: https://github.com/runwhen-contrib/rw-cli-codecollection @@ -14,6 +30,8 @@ codecollections: owner_icon: https://assets-global.website-files.com/64f9646ad0f39e9ee5c116c4/659f80c7391d64a0ec2a840e_icon_rw-platform.svg owner_email: shea.stewart@runwhen.com description: CodeCollections based on command line binaries and bash scripts + image_source: oci + image_registry: ghcr.io/runwhen-contrib/rw-cli-codecollection - name: RunWhen Generic CodeCollection slug: rw-generic-codecollection git_url: https://github.com/runwhen-contrib/rw-generic-codecollection diff --git a/mcp-server/requirements.txt b/mcp-server/requirements.txt index 3b220f2ae9f..0b526a4e4f5 100644 --- a/mcp-server/requirements.txt +++ b/mcp-server/requirements.txt @@ -5,5 +5,8 @@ uvicorn[standard]>=0.24.0 pyyaml>=6.0.0 httpx>=0.25.0 +# GitHub App authentication +PyJWT[crypto]>=2.8.0 + # Utilities GitPython>=3.1.0 diff --git a/mcp-server/tools/github_issue.py b/mcp-server/tools/github_issue.py index 437424c7040..7c48c558dc6 100644 --- a/mcp-server/tools/github_issue.py +++ b/mcp-server/tools/github_issue.py @@ -8,7 +8,10 @@ https://github.com/runwhen-contrib/codecollection-registry/blob/main/.github/ISSUE_TEMPLATE/codebundle-wanted.yaml """ +import base64 import os +import time +import threading import httpx import logging from typing import Optional, List, Dict, Any @@ -33,29 +36,128 @@ class CodeBundleRequest: contact_ok: bool = False # Willing to be contacted? +class _AppTokenManager: + """Manages GitHub App JWT -> installation token lifecycle.""" + + def __init__(self, app_id: str, private_key_raw: str, installation_id: Optional[int] = None): + self._app_id = app_id + self._installation_id = installation_id + self._private_key = self._decode_key(private_key_raw) + self._token: Optional[str] = None + self._expires_at: float = 0 + self._lock = threading.Lock() + + @property + def available(self) -> bool: + return bool(self._private_key) + + @staticmethod + def _decode_key(raw: str) -> Optional[str]: + if raw.startswith("-----BEGIN"): + return raw + try: + decoded = base64.b64decode(raw).decode("utf-8") + if "-----BEGIN" in decoded: + return decoded + except Exception: + pass + logger.error("GITHUB_APP_PRIVATE_KEY is not a valid PEM or base64-encoded PEM") + return None + + def _make_jwt(self) -> str: + import jwt as pyjwt + now = int(time.time()) + payload = {"iat": now - 60, "exp": now + 600, "iss": self._app_id} + return pyjwt.encode(payload, self._private_key, algorithm="RS256") + + def _discover_installation(self, app_jwt: str) -> Optional[int]: + try: + resp = httpx.get( + f"{GITHUB_API_BASE}/app/installations", + headers={"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github+json"}, + timeout=15, + ) + if resp.status_code != 200: + logger.error("Failed to list installations: %s %s", resp.status_code, resp.text) + return None + installs = resp.json() + if not installs: + logger.error("No installations for GitHub App %s", self._app_id) + return None + return installs[0]["id"] + except Exception as exc: + logger.error("Error discovering installation: %s", exc) + return None + + def get_token(self) -> str: + if self._token and time.time() < self._expires_at: + return self._token + with self._lock: + if self._token and time.time() < self._expires_at: + return self._token + app_jwt = self._make_jwt() + if not self._installation_id: + self._installation_id = self._discover_installation(app_jwt) + if not self._installation_id: + raise RuntimeError("Cannot discover GitHub App installation") + resp = httpx.post( + f"{GITHUB_API_BASE}/app/installations/{self._installation_id}/access_tokens", + headers={"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github+json"}, + timeout=15, + ) + if resp.status_code != 201: + raise RuntimeError(f"Failed to get installation token: {resp.status_code} {resp.text}") + self._token = resp.json()["token"] + self._expires_at = time.time() + 3500 + return self._token + + class GitHubIssueClient: """ Low-level client for GitHub issue operations. - - Requires GITHUB_TOKEN environment variable with repo scope. + + Authenticates via GitHub App (GITHUB_APP_ID + GITHUB_APP_PRIVATE_KEY) + or falls back to GITHUB_TOKEN PAT. """ - + def __init__(self): + self._app_mgr: Optional[_AppTokenManager] = None self.token = os.getenv("GITHUB_TOKEN") + + app_id = os.getenv("GITHUB_APP_ID") + app_key = os.getenv("GITHUB_APP_PRIVATE_KEY") + inst_id = os.getenv("GITHUB_APP_INSTALLATION_ID") + if app_id and app_key: + mgr = _AppTokenManager(app_id, app_key, int(inst_id) if inst_id else None) + if mgr.available: + self._app_mgr = mgr + logger.info("GitHub App authentication configured (app_id=%s)", app_id) + self.client = httpx.Client( base_url=GITHUB_API_BASE, headers={ "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", }, - timeout=30.0 + timeout=30.0, ) + self._refresh_auth() + + def _refresh_auth(self): + """Set the Authorization header using the best available credential.""" + if self._app_mgr: + try: + token = self._app_mgr.get_token() + self.client.headers["Authorization"] = f"Bearer {token}" + return + except Exception as exc: + logger.warning("App token refresh failed, falling back to PAT: %s", exc) if self.token: self.client.headers["Authorization"] = f"Bearer {self.token}" - + def is_configured(self) -> bool: - """Check if GitHub token is configured.""" - return bool(self.token) + """Check if any GitHub credential is available.""" + return bool(self._app_mgr and self._app_mgr.available) or bool(self.token) def _format_tasks(self, tasks: List[str]) -> str: """Format tasks as a numbered list.""" @@ -108,9 +210,10 @@ def create_issue(self, request: CodeBundleRequest) -> Dict[str, Any]: if not self.is_configured(): return { "success": False, - "error": "GitHub token not configured. Set GITHUB_TOKEN environment variable." + "error": "GitHub not configured. Set GITHUB_APP_ID/GITHUB_APP_PRIVATE_KEY or GITHUB_TOKEN." } + self._refresh_auth() title = self._generate_title(request) body = self._build_issue_body(request) @@ -160,8 +263,8 @@ def check_existing_issues(self, search_term: str) -> List[Dict[str, Any]]: Returns list of potentially related open issues. """ + self._refresh_auth() try: - # Search for open issues with the search term query = f"repo:{REPO_OWNER}/{REPO_NAME} is:issue is:open {search_term}" response = self.client.get( "/search/issues",