Summary
Create a registry system to store, retrieve, and auto-detect schemas based on URLs.
Design
# fetcharoo/schemas/registry.py
from typing import Dict, Optional, List
from .base import SiteSchema
_SCHEMAS: Dict[str, SiteSchema] = {}
def register_schema(schema: SiteSchema) -> None:
"""Register a schema in the global registry."""
if schema.name in _SCHEMAS:
raise ValueError(f"Schema '{schema.name}' already registered")
_SCHEMAS[schema.name] = schema
def get_schema(name: str) -> Optional[SiteSchema]:
"""Get schema by name."""
return _SCHEMAS.get(name)
def detect_schema(url: str) -> Optional[SiteSchema]:
"""Auto-detect schema from URL by testing all registered patterns."""
for schema in _SCHEMAS.values():
if schema.matches(url):
return schema
return None
def list_schemas() -> List[str]:
"""List all registered schema names."""
return list(_SCHEMAS.keys())
def get_all_schemas() -> Dict[str, SiteSchema]:
"""Get all registered schemas."""
return _SCHEMAS.copy()
def clear_registry() -> None:
"""Clear all schemas (mainly for testing)."""
_SCHEMAS.clear()
# Decorator for easy class-based registration
def schema(cls):
"""Decorator to register a schema class."""
instance = cls() if isinstance(cls, type) else cls
register_schema(instance)
return cls
Usage
from fetcharoo.schemas import register_schema, detect_schema, SiteSchema
# Register directly
my_schema = SiteSchema(name="mysite", url_pattern=r"https://mysite\.com/.*")
register_schema(my_schema)
# Or use decorator
@schema
class MySiteSchema(SiteSchema):
name = "mysite"
url_pattern = r"https://mysite\.com/.*"
# Auto-detect
schema = detect_schema("https://mysite.com/docs")
Tasks
Acceptance Criteria
- Can register schemas by instance or decorator
detect_schema() returns correct schema for matching URLs
- Returns
None for unrecognized URLs
list_schemas() shows all registered names
Dependencies
Part of
Parent issue: #10
Summary
Create a registry system to store, retrieve, and auto-detect schemas based on URLs.
Design
Usage
Tasks
_SCHEMASdictregister_schema(),get_schema(),list_schemas()detect_schema()with URL matching@schemadecorator for class-based registrationclear_registry()for test isolationfetcharoo.schemasAcceptance Criteria
detect_schema()returns correct schema for matching URLsNonefor unrecognized URLslist_schemas()shows all registered namesDependencies
Part of
Parent issue: #10