Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions frontend/osa-chat-widget.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,16 @@
}

// Default model options for settings dropdown
// Last updated: 2026-03
const DEFAULT_MODELS = [
{ value: 'anthropic/claude-sonnet-4.6', label: 'Claude Sonnet 4.6' },
{ value: 'anthropic/claude-haiku-4.5', label: 'Claude Haiku 4.5' },
{ value: 'openai/gpt-5.2-chat', label: 'GPT-5.2 Chat' },
{ value: 'openai/gpt-5-mini', label: 'GPT-5 Mini' },
{ value: 'anthropic/claude-haiku-4.5', label: 'Claude Haiku 4.5' },
{ value: 'anthropic/claude-sonnet-4.5', label: 'Claude Sonnet 4.5' },
{ value: 'google/gemini-3-flash-preview', label: 'Gemini 3 Flash' },
{ value: 'google/gemini-3-pro-preview', label: 'Gemini 3 Pro' },
{ value: 'moonshotai/kimi-k2-0905', label: 'Kimi K2' },
{ value: 'qwen/qwen3-235b-a22b-2507', label: 'Qwen3 235B' }
{ value: 'deepseek/deepseek-v3.2', label: 'DeepSeek V3.2' },
{ value: 'qwen/qwen3.5-397b-a17b', label: 'Qwen 3.5 397B' }
];

// Helper to get human-readable label for a model
Expand Down
58 changes: 58 additions & 0 deletions src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,23 @@
create_community_router,
metrics_public_router,
metrics_router,
mirrors_router,
sync_router,
)
from src.api.routers.health import router as health_router
from src.api.routers.widget_test import router as widget_test_router
from src.api.scheduler import start_scheduler, stop_scheduler
from src.assistants import discover_assistants, registry
from src.core.logging import configure_secure_logging
from src.knowledge.db import reset_active_mirror, set_active_mirror
from src.knowledge.mirror import CorruptMirrorError, get_mirror
from src.metrics.db import init_metrics_db
from src.metrics.middleware import MetricsMiddleware

# Must run before any getLogger() calls to ensure handlers with
# SecureFormatter are installed on the root logger first.
configure_secure_logging()

logger = logging.getLogger(__name__)

# Discover assistants at module load time to populate registry
Expand Down Expand Up @@ -181,6 +189,53 @@ def create_app() -> FastAPI:
# Metrics middleware - captures request timing and logs to metrics DB
app.add_middleware(MetricsMiddleware)

# Mirror routing middleware - sets ContextVar for transparent DB routing
@app.middleware("http")
async def mirror_routing_middleware(request: Any, call_next: Any) -> Any:
"""Route database access to mirror when X-Mirror-ID header is present."""
from fastapi.responses import JSONResponse

mirror_id = request.headers.get("x-mirror-id")
if not mirror_id:
return await call_next(request)

try:
info = get_mirror(mirror_id)
except ValueError:
# Invalid mirror ID format (path traversal attempt, etc.)
return JSONResponse(
status_code=400,
content={"detail": f"Invalid mirror ID format: '{mirror_id}'"},
)
except CorruptMirrorError:
return JSONResponse(
status_code=500,
content={"detail": f"Mirror '{mirror_id}' has corrupt metadata"},
)
except OSError:
logger.error("Filesystem error reading mirror %s", mirror_id, exc_info=True)
return JSONResponse(
status_code=500,
content={"detail": f"Failed to read mirror '{mirror_id}' metadata"},
)

if not info:
return JSONResponse(
status_code=404,
content={"detail": f"Mirror '{mirror_id}' not found"},
)
if info.is_expired():
return JSONResponse(
status_code=410,
content={"detail": f"Mirror '{mirror_id}' has expired"},
)

token = set_active_mirror(mirror_id)
try:
return await call_next(request)
finally:
reset_active_mirror(token)

# Register routes
register_routes(app)

Expand All @@ -207,6 +262,9 @@ def register_routes(app: FastAPI) -> None:
# Sync router (not community-specific)
app.include_router(sync_router)

# Mirror management router
app.include_router(mirrors_router)

# Metrics routers (admin + public)
app.include_router(metrics_router)
app.include_router(metrics_public_router)
Expand Down
2 changes: 2 additions & 0 deletions src/api/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
from src.api.routers.community import create_community_router
from src.api.routers.metrics import router as metrics_router
from src.api.routers.metrics_public import router as metrics_public_router
from src.api.routers.mirrors import router as mirrors_router
from src.api.routers.sync import router as sync_router

__all__ = [
"communities_router",
"create_community_router",
"metrics_public_router",
"metrics_router",
"mirrors_router",
"sync_router",
]
59 changes: 58 additions & 1 deletion src/api/routers/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from src.assistants.registry import AssistantInfo
from src.core.config.community import WidgetConfig
from src.core.services.litellm_llm import create_openrouter_llm
from src.metrics.cost import estimate_cost
from src.metrics.cost import COST_BLOCK_THRESHOLD, COST_WARN_THRESHOLD, MODEL_PRICING, estimate_cost
from src.metrics.db import (
RequestLogEntry,
extract_token_usage,
Expand Down Expand Up @@ -602,6 +602,59 @@ def _select_model(
return (default_model, default_provider)


def _check_model_cost(model: str, key_source: str) -> None:
"""Check if a model's cost exceeds platform thresholds.

Only enforced when using platform or community API keys (not BYOK).
Logs a warning for moderately expensive models and blocks very expensive ones.

Args:
model: Model identifier (e.g., "openai/gpt-4o").
key_source: One of "byok", "community", or "platform".

Raises:
HTTPException(403): If model cost exceeds the block threshold.
"""
if key_source == "byok":
return

pricing = MODEL_PRICING.get(model)
if pricing is None:
logger.error(
"Model %s not in pricing table; blocking on platform/community key. "
"Add this model to MODEL_PRICING in src/metrics/cost.py.",
model,
)
raise HTTPException(
status_code=403,
detail=(
f"Model '{model}' is not in the approved pricing list and cannot be used "
"with platform or community keys. To use this model, provide your own "
"API key via the X-OpenRouter-Key header."
),
)
input_rate = pricing.input_per_1m

if input_rate >= COST_BLOCK_THRESHOLD:
raise HTTPException(
status_code=403,
detail=(
f"Model '{model}' costs ${input_rate:.2f}/1M input tokens, "
f"which exceeds the platform limit of ${COST_BLOCK_THRESHOLD:.2f}/1M. "
"To use expensive models, provide your own API key via the "
"X-OpenRouter-Key header. Get a key at: https://openrouter.ai/keys"
),
)

if input_rate >= COST_WARN_THRESHOLD:
logger.warning(
"Model %s costs $%.2f/1M input tokens (warn threshold: $%.2f)",
model,
input_rate,
COST_WARN_THRESHOLD,
)


def _derive_user_id(token: str) -> str:
"""Derive a stable user ID from API token for cache optimization.

Expand Down Expand Up @@ -717,6 +770,10 @@ def create_community_assistant(
selected_model, selected_provider = _select_model(
community_info, requested_model, has_byok=bool(byok)
)

# Block expensive models on platform/community keys
_check_model_cost(selected_model, key_source)

logger.debug(
"Using model %s",
selected_model,
Expand Down
Loading