Harden LLM access: secrets only in server .env, no URL in repo.

Require LLM_BASE_URL and LLM_API_KEY for automatic generation, add per-user rate limits, stop publishing backend/LLM settings in docker-compose, and document secure deployment.
2026-06-04 13:24:40 +02:00
parent 182eae1e36
commit 4d2ced85a3
11 changed files with 487 additions and 169 deletions
@@ -18,9 +18,22 @@ class Settings(BaseSettings):
    rate_limit_window_seconds: int = Field(default=60, ge=1)
    max_request_bytes: int = Field(default=1_048_576, ge=1_024)
    llm_api_key: str | None = None
-    llm_base_url: str = ""
+    llm_base_url: str = Field(
+        default="",
+        description="URL base del LLM (solo servidor). No incluir en el repositorio.",
+    )
    llm_model: str = "qwen3.5:35b"
    llm_timeout_seconds: int = Field(default=180, ge=5)
+    llm_generate_rate_limit_requests: int = Field(
+        default=5,
+        ge=1,
+        description="Máximo de POST /exam/generate por usuario y ventana.",
+    )
+    llm_generate_rate_limit_window_seconds: int = Field(
+        default=3600,
+        ge=60,
+        description="Ventana en segundos para el límite de generación con LLM.",
+    )
    jwt_secret_key: str = Field(min_length=32)
    jwt_algorithm: str = "HS256"
    jwt_expire_minutes: int = Field(default=60 * 24, ge=5)
@@ -56,6 +69,13 @@ class Settings(BaseSettings):
    def trusted_hosts_list(self) -> list[str]:
        return [host.strip() for host in self.trusted_hosts.split(",") if host.strip()]

+    @property
+    def llm_ready(self) -> bool:
+        """True solo si URL y clave del LLM están definidas en el entorno del servidor."""
+        return bool(self.llm_base_url.strip()) and bool(
+            self.llm_api_key and self.llm_api_key.strip()
+        )
+

@lru_cache
 def get_settings() -> Settings:
@@ -51,9 +51,14 @@ def error_payload(code: str, message: str, details: object | None = None) -> dic
 def register_exception_handlers(app: FastAPI) -> None:
    @app.exception_handler(AppError)
    async def app_error_handler(_: Request, exc: AppError) -> ORJSONResponse:
+        headers: dict[str, str] | None = None
+        retry_after = getattr(exc, "retry_after", None)
+        if retry_after is not None:
+            headers = {"Retry-After": str(retry_after)}
        return ORJSONResponse(
            status_code=exc.status_code,
            content=error_payload(exc.code, exc.message),
+            headers=headers,
        )

    @app.exception_handler(StarletteHTTPException)
@@ -0,0 +1,35 @@
+import time
+from collections import defaultdict, deque
+from threading import Lock
+from uuid import UUID
+
+from app.core.config import Settings
+from app.core.errors import AppError
+
+_lock = Lock()
+_buckets: dict[str, deque[float]] = defaultdict(deque)
+
+
+class LLMRateLimitError(AppError):
+    def __init__(self, retry_after: int) -> None:
+        super().__init__(
+            message="Too many AI generation requests. Try again later.",
+            status_code=429,
+            code="llm_rate_limited",
+        )
+        self.retry_after = retry_after
+
+
+def enforce_llm_rate_limit(user_id: UUID, settings: Settings) -> None:
+    key = str(user_id)
+    now = time.monotonic()
+    limit = settings.llm_generate_rate_limit_requests
+    window = settings.llm_generate_rate_limit_window_seconds
+
+    with _lock:
+        bucket = _buckets[key]
+        while bucket and now - bucket[0] > window:
+            bucket.popleft()
+        if len(bucket) >= limit:
+            raise LLMRateLimitError(retry_after=window)
+        bucket.append(now)