Harden LLM access: secrets only in server .env, no URL in repo.
Require LLM_BASE_URL and LLM_API_KEY for automatic generation, add per-user rate limits, stop publishing backend/LLM settings in docker-compose, and document secure deployment.
This commit is contained in:
@@ -48,10 +48,13 @@ JWT_EXPIRE_MINUTES=1440
|
||||
# El frontend obtiene un id_token con Google Identity Services y lo envía a POST /auth/google.
|
||||
GOOGLE_CLIENT_ID=123456789012-abcdefghijklmnopqrstuvwxyz123456.apps.googleusercontent.com
|
||||
|
||||
# --- LLM (Sinbad2IA UJA — sin clave) ---
|
||||
# URL base del servidor; el cliente llama a {LLM_BASE_URL}/api/chat
|
||||
# --- LLM (solo servidor; NO commitear valores reales) ---
|
||||
# Obligatorias para POST /exam/generate. Sin ellas, la generación automática queda desactivada.
|
||||
# La URL no debe aparecer en el repositorio: configúrala solo en el .env del servidor.
|
||||
LLM_BASE_URL=
|
||||
LLM_API_KEY=
|
||||
LLM_MODEL=qwen3.5:35b
|
||||
LLM_TIMEOUT_SECONDS=180
|
||||
# Opcional, solo si el servidor exige autenticación:
|
||||
# LLM_API_KEY=
|
||||
# Límite por usuario (generación automática)
|
||||
LLM_GENERATE_RATE_LIMIT_REQUESTS=5
|
||||
LLM_GENERATE_RATE_LIMIT_WINDOW_SECONDS=3600
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Depends
|
||||
|
||||
from app.core.auth import get_current_user
|
||||
from app.core.config import Settings, get_settings
|
||||
from app.core.errors import LLMUnavailableError
|
||||
from app.core.llm_rate_limit import enforce_llm_rate_limit
|
||||
from app.models.user import User
|
||||
|
||||
|
||||
def require_llm_generation(
|
||||
settings: Annotated[Settings, Depends(get_settings)],
|
||||
current_user: Annotated[User, Depends(get_current_user)],
|
||||
) -> User:
|
||||
"""Solo permite generación automática si el LLM está configurado por entorno (no en el repo)."""
|
||||
if not settings.llm_ready:
|
||||
raise LLMUnavailableError("Automatic AI generation is not available")
|
||||
enforce_llm_rate_limit(current_user.id, settings)
|
||||
return current_user
|
||||
@@ -4,6 +4,7 @@ from typing import Annotated
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from app.api.dependencies import get_exam_service, get_llm_client
|
||||
from app.api.llm_guard import require_llm_generation
|
||||
from app.core.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.schemas.exam import (
|
||||
@@ -37,7 +38,7 @@ def build_prompt(
|
||||
@router.post("/generate", response_model=ParsedQuestionsResponse)
|
||||
async def generate_exam(
|
||||
payload: GenerateExamRequest,
|
||||
current_user: Annotated[User, Depends(get_current_user)],
|
||||
current_user: Annotated[User, Depends(require_llm_generation)],
|
||||
service: Annotated[ExamService, Depends(get_exam_service)],
|
||||
llm_client: Annotated[LLMClient, Depends(get_llm_client)],
|
||||
) -> ParsedQuestionsResponse:
|
||||
|
||||
@@ -18,9 +18,22 @@ class Settings(BaseSettings):
|
||||
rate_limit_window_seconds: int = Field(default=60, ge=1)
|
||||
max_request_bytes: int = Field(default=1_048_576, ge=1_024)
|
||||
llm_api_key: str | None = None
|
||||
llm_base_url: str = ""
|
||||
llm_base_url: str = Field(
|
||||
default="",
|
||||
description="URL base del LLM (solo servidor). No incluir en el repositorio.",
|
||||
)
|
||||
llm_model: str = "qwen3.5:35b"
|
||||
llm_timeout_seconds: int = Field(default=180, ge=5)
|
||||
llm_generate_rate_limit_requests: int = Field(
|
||||
default=5,
|
||||
ge=1,
|
||||
description="Máximo de POST /exam/generate por usuario y ventana.",
|
||||
)
|
||||
llm_generate_rate_limit_window_seconds: int = Field(
|
||||
default=3600,
|
||||
ge=60,
|
||||
description="Ventana en segundos para el límite de generación con LLM.",
|
||||
)
|
||||
jwt_secret_key: str = Field(min_length=32)
|
||||
jwt_algorithm: str = "HS256"
|
||||
jwt_expire_minutes: int = Field(default=60 * 24, ge=5)
|
||||
@@ -56,6 +69,13 @@ class Settings(BaseSettings):
|
||||
def trusted_hosts_list(self) -> list[str]:
|
||||
return [host.strip() for host in self.trusted_hosts.split(",") if host.strip()]
|
||||
|
||||
@property
|
||||
def llm_ready(self) -> bool:
|
||||
"""True solo si URL y clave del LLM están definidas en el entorno del servidor."""
|
||||
return bool(self.llm_base_url.strip()) and bool(
|
||||
self.llm_api_key and self.llm_api_key.strip()
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
|
||||
@@ -51,9 +51,14 @@ def error_payload(code: str, message: str, details: object | None = None) -> dic
|
||||
def register_exception_handlers(app: FastAPI) -> None:
|
||||
@app.exception_handler(AppError)
|
||||
async def app_error_handler(_: Request, exc: AppError) -> ORJSONResponse:
|
||||
headers: dict[str, str] | None = None
|
||||
retry_after = getattr(exc, "retry_after", None)
|
||||
if retry_after is not None:
|
||||
headers = {"Retry-After": str(retry_after)}
|
||||
return ORJSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content=error_payload(exc.code, exc.message),
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
@app.exception_handler(StarletteHTTPException)
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import time
|
||||
from collections import defaultdict, deque
|
||||
from threading import Lock
|
||||
from uuid import UUID
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.errors import AppError
|
||||
|
||||
_lock = Lock()
|
||||
_buckets: dict[str, deque[float]] = defaultdict(deque)
|
||||
|
||||
|
||||
class LLMRateLimitError(AppError):
|
||||
def __init__(self, retry_after: int) -> None:
|
||||
super().__init__(
|
||||
message="Too many AI generation requests. Try again later.",
|
||||
status_code=429,
|
||||
code="llm_rate_limited",
|
||||
)
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
def enforce_llm_rate_limit(user_id: UUID, settings: Settings) -> None:
|
||||
key = str(user_id)
|
||||
now = time.monotonic()
|
||||
limit = settings.llm_generate_rate_limit_requests
|
||||
window = settings.llm_generate_rate_limit_window_seconds
|
||||
|
||||
with _lock:
|
||||
bucket = _buckets[key]
|
||||
while bucket and now - bucket[0] > window:
|
||||
bucket.popleft()
|
||||
if len(bucket) >= limit:
|
||||
raise LLMRateLimitError(retry_after=window)
|
||||
bucket.append(now)
|
||||
@@ -17,6 +17,9 @@ class LLMClient:
|
||||
return f"{base}/api/chat"
|
||||
|
||||
async def generate(self, prompt: str) -> str:
|
||||
if not self.settings.llm_ready:
|
||||
raise LLMUnavailableError("Automatic AI generation is not available")
|
||||
|
||||
payload = {
|
||||
"model": self.settings.llm_model,
|
||||
"messages": [
|
||||
|
||||
Reference in New Issue
Block a user