-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Summary
Create a registry system to store, retrieve, and auto-detect schemas based on URLs.
Design
# fetcharoo/schemas/registry.py
from typing import Dict, Optional, List
from .base import SiteSchema
_SCHEMAS: Dict[str, SiteSchema] = {}
def register_schema(schema: SiteSchema) -> None:
"""Register a schema in the global registry."""
if schema.name in _SCHEMAS:
raise ValueError(f"Schema '{schema.name}' already registered")
_SCHEMAS[schema.name] = schema
def get_schema(name: str) -> Optional[SiteSchema]:
"""Get schema by name."""
return _SCHEMAS.get(name)
def detect_schema(url: str) -> Optional[SiteSchema]:
"""Auto-detect schema from URL by testing all registered patterns."""
for schema in _SCHEMAS.values():
if schema.matches(url):
return schema
return None
def list_schemas() -> List[str]:
"""List all registered schema names."""
return list(_SCHEMAS.keys())
def get_all_schemas() -> Dict[str, SiteSchema]:
"""Get all registered schemas."""
return _SCHEMAS.copy()
def clear_registry() -> None:
"""Clear all schemas (mainly for testing)."""
_SCHEMAS.clear()
# Decorator for easy class-based registration
def schema(cls):
"""Decorator to register a schema class."""
instance = cls() if isinstance(cls, type) else cls
register_schema(instance)
return clsUsage
from fetcharoo.schemas import register_schema, detect_schema, SiteSchema
# Register directly
my_schema = SiteSchema(name="mysite", url_pattern=r"https://mysite\.com/.*")
register_schema(my_schema)
# Or use decorator
@schema
class MySiteSchema(SiteSchema):
name = "mysite"
url_pattern = r"https://mysite\.com/.*"
# Auto-detect
schema = detect_schema("https://mysite.com/docs")Tasks
- Implement registry module with
_SCHEMASdict - Add
register_schema(),get_schema(),list_schemas() - Implement
detect_schema()with URL matching - Add
@schemadecorator for class-based registration - Add
clear_registry()for test isolation - Export from
fetcharoo.schemas - Add unit tests for all registry functions
Acceptance Criteria
- Can register schemas by instance or decorator
detect_schema()returns correct schema for matching URLs- Returns
Nonefor unrecognized URLs list_schemas()shows all registered names
Dependencies
- Create SiteSchema base dataclass #11 (SiteSchema base class)
Part of
Parent issue: #10
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels