Harden LLM access: secrets only in server .env, no URL in repo.
Require LLM_BASE_URL and LLM_API_KEY for automatic generation, add per-user rate limits, stop publishing backend/LLM settings in docker-compose, and document secure deployment.
This commit is contained in:
@@ -18,9 +18,22 @@ class Settings(BaseSettings):
|
||||
rate_limit_window_seconds: int = Field(default=60, ge=1)
|
||||
max_request_bytes: int = Field(default=1_048_576, ge=1_024)
|
||||
llm_api_key: str | None = None
|
||||
llm_base_url: str = ""
|
||||
llm_base_url: str = Field(
|
||||
default="",
|
||||
description="URL base del LLM (solo servidor). No incluir en el repositorio.",
|
||||
)
|
||||
llm_model: str = "qwen3.5:35b"
|
||||
llm_timeout_seconds: int = Field(default=180, ge=5)
|
||||
llm_generate_rate_limit_requests: int = Field(
|
||||
default=5,
|
||||
ge=1,
|
||||
description="Máximo de POST /exam/generate por usuario y ventana.",
|
||||
)
|
||||
llm_generate_rate_limit_window_seconds: int = Field(
|
||||
default=3600,
|
||||
ge=60,
|
||||
description="Ventana en segundos para el límite de generación con LLM.",
|
||||
)
|
||||
jwt_secret_key: str = Field(min_length=32)
|
||||
jwt_algorithm: str = "HS256"
|
||||
jwt_expire_minutes: int = Field(default=60 * 24, ge=5)
|
||||
@@ -56,6 +69,13 @@ class Settings(BaseSettings):
|
||||
def trusted_hosts_list(self) -> list[str]:
|
||||
return [host.strip() for host in self.trusted_hosts.split(",") if host.strip()]
|
||||
|
||||
@property
|
||||
def llm_ready(self) -> bool:
|
||||
"""True solo si URL y clave del LLM están definidas en el entorno del servidor."""
|
||||
return bool(self.llm_base_url.strip()) and bool(
|
||||
self.llm_api_key and self.llm_api_key.strip()
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
|
||||
@@ -51,9 +51,14 @@ def error_payload(code: str, message: str, details: object | None = None) -> dic
|
||||
def register_exception_handlers(app: FastAPI) -> None:
|
||||
@app.exception_handler(AppError)
|
||||
async def app_error_handler(_: Request, exc: AppError) -> ORJSONResponse:
|
||||
headers: dict[str, str] | None = None
|
||||
retry_after = getattr(exc, "retry_after", None)
|
||||
if retry_after is not None:
|
||||
headers = {"Retry-After": str(retry_after)}
|
||||
return ORJSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content=error_payload(exc.code, exc.message),
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
@app.exception_handler(StarletteHTTPException)
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import time
|
||||
from collections import defaultdict, deque
|
||||
from threading import Lock
|
||||
from uuid import UUID
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.errors import AppError
|
||||
|
||||
_lock = Lock()
|
||||
_buckets: dict[str, deque[float]] = defaultdict(deque)
|
||||
|
||||
|
||||
class LLMRateLimitError(AppError):
|
||||
def __init__(self, retry_after: int) -> None:
|
||||
super().__init__(
|
||||
message="Too many AI generation requests. Try again later.",
|
||||
status_code=429,
|
||||
code="llm_rate_limited",
|
||||
)
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
def enforce_llm_rate_limit(user_id: UUID, settings: Settings) -> None:
|
||||
key = str(user_id)
|
||||
now = time.monotonic()
|
||||
limit = settings.llm_generate_rate_limit_requests
|
||||
window = settings.llm_generate_rate_limit_window_seconds
|
||||
|
||||
with _lock:
|
||||
bucket = _buckets[key]
|
||||
while bucket and now - bucket[0] > window:
|
||||
bucket.popleft()
|
||||
if len(bucket) >= limit:
|
||||
raise LLMRateLimitError(retry_after=window)
|
||||
bucket.append(now)
|
||||
Reference in New Issue
Block a user