Harden LLM access: secrets only in server .env, no URL in repo.

Require LLM_BASE_URL and LLM_API_KEY for automatic generation, add per-user rate limits, stop publishing backend/LLM settings in docker-compose, and document secure deployment.
This commit is contained in:
Mireya Cueto Garrido
2026-06-04 13:24:40 +02:00
parent 182eae1e36
commit 4d2ced85a3
11 changed files with 487 additions and 169 deletions
+7 -4
View File
@@ -48,10 +48,13 @@ JWT_EXPIRE_MINUTES=1440
# El frontend obtiene un id_token con Google Identity Services y lo envía a POST /auth/google.
GOOGLE_CLIENT_ID=123456789012-abcdefghijklmnopqrstuvwxyz123456.apps.googleusercontent.com
# --- LLM (Sinbad2IA UJA — sin clave) ---
# URL base del servidor; el cliente llama a {LLM_BASE_URL}/api/chat
# --- LLM (solo servidor; NO commitear valores reales) ---
# Obligatorias para POST /exam/generate. Sin ellas, la generación automática queda desactivada.
# La URL no debe aparecer en el repositorio: configúrala solo en el .env del servidor.
LLM_BASE_URL=
LLM_API_KEY=
LLM_MODEL=qwen3.5:35b
LLM_TIMEOUT_SECONDS=180
# Opcional, solo si el servidor exige autenticación:
# LLM_API_KEY=
# Límite por usuario (generación automática)
LLM_GENERATE_RATE_LIMIT_REQUESTS=5
LLM_GENERATE_RATE_LIMIT_WINDOW_SECONDS=3600
+20
View File
@@ -0,0 +1,20 @@
from typing import Annotated
from fastapi import Depends
from app.core.auth import get_current_user
from app.core.config import Settings, get_settings
from app.core.errors import LLMUnavailableError
from app.core.llm_rate_limit import enforce_llm_rate_limit
from app.models.user import User
def require_llm_generation(
settings: Annotated[Settings, Depends(get_settings)],
current_user: Annotated[User, Depends(get_current_user)],
) -> User:
"""Solo permite generación automática si el LLM está configurado por entorno (no en el repo)."""
if not settings.llm_ready:
raise LLMUnavailableError("Automatic AI generation is not available")
enforce_llm_rate_limit(current_user.id, settings)
return current_user
+2 -1
View File
@@ -4,6 +4,7 @@ from typing import Annotated
from fastapi import APIRouter, Depends
from app.api.dependencies import get_exam_service, get_llm_client
from app.api.llm_guard import require_llm_generation
from app.core.auth import get_current_user
from app.models.user import User
from app.schemas.exam import (
@@ -37,7 +38,7 @@ def build_prompt(
@router.post("/generate", response_model=ParsedQuestionsResponse)
async def generate_exam(
payload: GenerateExamRequest,
current_user: Annotated[User, Depends(get_current_user)],
current_user: Annotated[User, Depends(require_llm_generation)],
service: Annotated[ExamService, Depends(get_exam_service)],
llm_client: Annotated[LLMClient, Depends(get_llm_client)],
) -> ParsedQuestionsResponse:
+21 -1
View File
@@ -18,9 +18,22 @@ class Settings(BaseSettings):
rate_limit_window_seconds: int = Field(default=60, ge=1)
max_request_bytes: int = Field(default=1_048_576, ge=1_024)
llm_api_key: str | None = None
llm_base_url: str = ""
llm_base_url: str = Field(
default="",
description="URL base del LLM (solo servidor). No incluir en el repositorio.",
)
llm_model: str = "qwen3.5:35b"
llm_timeout_seconds: int = Field(default=180, ge=5)
llm_generate_rate_limit_requests: int = Field(
default=5,
ge=1,
description="Máximo de POST /exam/generate por usuario y ventana.",
)
llm_generate_rate_limit_window_seconds: int = Field(
default=3600,
ge=60,
description="Ventana en segundos para el límite de generación con LLM.",
)
jwt_secret_key: str = Field(min_length=32)
jwt_algorithm: str = "HS256"
jwt_expire_minutes: int = Field(default=60 * 24, ge=5)
@@ -56,6 +69,13 @@ class Settings(BaseSettings):
def trusted_hosts_list(self) -> list[str]:
return [host.strip() for host in self.trusted_hosts.split(",") if host.strip()]
@property
def llm_ready(self) -> bool:
"""True solo si URL y clave del LLM están definidas en el entorno del servidor."""
return bool(self.llm_base_url.strip()) and bool(
self.llm_api_key and self.llm_api_key.strip()
)
@lru_cache
def get_settings() -> Settings:
+5
View File
@@ -51,9 +51,14 @@ def error_payload(code: str, message: str, details: object | None = None) -> dic
def register_exception_handlers(app: FastAPI) -> None:
@app.exception_handler(AppError)
async def app_error_handler(_: Request, exc: AppError) -> ORJSONResponse:
headers: dict[str, str] | None = None
retry_after = getattr(exc, "retry_after", None)
if retry_after is not None:
headers = {"Retry-After": str(retry_after)}
return ORJSONResponse(
status_code=exc.status_code,
content=error_payload(exc.code, exc.message),
headers=headers,
)
@app.exception_handler(StarletteHTTPException)
+35
View File
@@ -0,0 +1,35 @@
import time
from collections import defaultdict, deque
from threading import Lock
from uuid import UUID
from app.core.config import Settings
from app.core.errors import AppError
_lock = Lock()
_buckets: dict[str, deque[float]] = defaultdict(deque)
class LLMRateLimitError(AppError):
def __init__(self, retry_after: int) -> None:
super().__init__(
message="Too many AI generation requests. Try again later.",
status_code=429,
code="llm_rate_limited",
)
self.retry_after = retry_after
def enforce_llm_rate_limit(user_id: UUID, settings: Settings) -> None:
key = str(user_id)
now = time.monotonic()
limit = settings.llm_generate_rate_limit_requests
window = settings.llm_generate_rate_limit_window_seconds
with _lock:
bucket = _buckets[key]
while bucket and now - bucket[0] > window:
bucket.popleft()
if len(bucket) >= limit:
raise LLMRateLimitError(retry_after=window)
bucket.append(now)
+3
View File
@@ -17,6 +17,9 @@ class LLMClient:
return f"{base}/api/chat"
async def generate(self, prompt: str) -> str:
if not self.settings.llm_ready:
raise LLMUnavailableError("Automatic AI generation is not available")
payload = {
"model": self.settings.llm_model,
"messages": [