feat: enhance backend security and configuration

- Updated Dockerfile to improve security with a non-root user and added health checks.
- Modified docker-compose.yml to set containers as read-only, restrict ports to localhost, and implement health checks.
- Enhanced .env.example with additional environment variables for security and configuration.
- Improved FastAPI application with middleware for security headers, CORS, and body size limits.
- Refactored authentication flow in auth.py to include state validation and improved error handling.
- Added rate limiting to various endpoints to prevent abuse.
- Updated researcher and publication handling to ensure better validation and error management.
This commit is contained in:
Mireya Cueto Garrido
2026-05-08 11:19:52 +02:00
parent 96e58dbd16
commit af1b8e9956
37 changed files with 1375 additions and 282 deletions
View File
+35
View File
@@ -0,0 +1,35 @@
"""
Middleware que limita el tamaño máximo del cuerpo de la petición.
Evita ataques de agotamiento de memoria/CPU enviando bodies enormes a
endpoints POST. Se aplica antes de que FastAPI deserialice el JSON.
"""
from __future__ import annotations
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse, Response
class BodySizeLimitMiddleware(BaseHTTPMiddleware):
def __init__(self, app, *, max_bytes: int):
super().__init__(app)
self._max_bytes = max_bytes
async def dispatch(self, request: Request, call_next) -> Response:
content_length = request.headers.get("content-length")
if content_length is not None:
try:
if int(content_length) > self._max_bytes:
return JSONResponse(
status_code=413,
content={"detail": "Request body too large"},
)
except ValueError:
return JSONResponse(
status_code=400,
content={"detail": "Invalid Content-Length header"},
)
return await call_next(request)
+182
View File
@@ -0,0 +1,182 @@
"""
Configuración tipada y validada del backend.
Centraliza la lectura de variables de entorno, valida secretos críticos al
arranque y evita fallbacks inseguros (p. ej. JWT_SECRET="change_me") en
entornos productivos.
"""
from __future__ import annotations
import os
from functools import lru_cache
from pathlib import Path
from typing import List, Literal
from urllib.parse import urlparse
from dotenv import load_dotenv
from pydantic import Field, field_validator, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
_ENV_PATH = Path(__file__).resolve().parents[2] / ".env"
load_dotenv(dotenv_path=_ENV_PATH, override=False)
def _split_csv(value: str | List[str] | None) -> List[str]:
if value is None:
return []
if isinstance(value, list):
return [str(v).strip().rstrip("/") for v in value if str(v).strip()]
return [v.strip().rstrip("/") for v in value.split(",") if v.strip()]
class Settings(BaseSettings):
"""
Settings inmutables para toda la aplicación.
En `production` se aplican validaciones más estrictas:
- JWT_SECRET no puede ser un valor débil ni por defecto.
- CORS_ALLOWED_ORIGINS no puede contener "*".
- Se exige ORCID_CLIENT_ID/SECRET y API_KEY_VALUE.
"""
model_config = SettingsConfigDict(
env_file=str(_ENV_PATH),
env_file_encoding="utf-8",
extra="ignore",
case_sensitive=False,
)
ENVIRONMENT: Literal["development", "staging", "production"] = "development"
DEBUG: bool = False
DATABASE_URL: str = Field(...)
REDIS_URL: str | None = None
BASE_URL: str = "http://localhost:8000/api"
JWT_SECRET: str = Field(...)
JWT_ALGORITHM: str = "HS256"
JWT_EXPIRES_MINUTES: int = 720
JWT_ISSUER: str = "orcid-sword-backend"
JWT_AUDIENCE: str = "orcid-sword-frontend"
API_KEY_NAME: str = "X-API-Key"
API_KEY_VALUE: str = Field(...)
ORCID_CLIENT_ID: str = Field(...)
ORCID_CLIENT_SECRET: str = Field(...)
ORCID_REDIRECT_URI: str = "http://localhost:8000/api/auth/orcid/callback"
ORCID_OAUTH_STATE_ENABLED: bool = True
ORCID_OAUTH_STATE_COOKIE: str = "orcid_oauth_state"
ORCID_OAUTH_STATE_TTL_SECONDS: int = 600
CORS_ALLOWED_ORIGINS: List[str] = Field(default_factory=list)
TRUSTED_HOSTS: List[str] = Field(default_factory=lambda: ["*"])
RATE_LIMIT_DEFAULT: str = "60/minute"
RATE_LIMIT_AUTH: str = "10/minute"
RATE_LIMIT_SEARCH_ANON: str = "5/minute"
RATE_LIMIT_SEARCH_AUTH: str = "30/minute"
RATE_LIMIT_EXPORT: str = "20/minute"
RATE_LIMIT_SYNC: str = "5/minute"
MAX_ORCID_BATCH: int = 25
MAX_PUB_IDS_BATCH: int = 500
MAX_REQUEST_BODY_BYTES: int = 1_048_576 # 1 MiB
DOCS_ENABLED: bool = True
SECURITY_HSTS_SECONDS: int = 31_536_000
SECURITY_HSTS_INCLUDE_SUBDOMAINS: bool = True
SECURITY_HSTS_PRELOAD: bool = False
@field_validator("CORS_ALLOWED_ORIGINS", mode="before")
@classmethod
def _parse_cors(cls, v):
return _split_csv(v)
@field_validator("TRUSTED_HOSTS", mode="before")
@classmethod
def _parse_trusted_hosts(cls, v):
parsed = _split_csv(v) if not isinstance(v, list) else v
return parsed or ["*"]
@model_validator(mode="after")
def _validate_security(self) -> "Settings":
if self.ENVIRONMENT == "production":
weak = {"change_me", "changeme", "secret", "password", ""}
if self.JWT_SECRET.strip().lower() in weak:
raise ValueError(
"JWT_SECRET es débil o está sin configurar. "
"Define un secreto aleatorio fuerte (>= 32 bytes)."
)
if len(self.JWT_SECRET) < 32:
raise ValueError(
"JWT_SECRET debe tener al menos 32 caracteres en producción."
)
if "*" in self.CORS_ALLOWED_ORIGINS:
raise ValueError(
"CORS_ALLOWED_ORIGINS no puede contener '*' en producción."
)
if not self.CORS_ALLOWED_ORIGINS:
raise ValueError(
"CORS_ALLOWED_ORIGINS debe definirse explícitamente en producción."
)
if not self.API_KEY_VALUE or len(self.API_KEY_VALUE) < 24:
raise ValueError(
"API_KEY_VALUE debe tener al menos 24 caracteres en producción."
)
if self.TRUSTED_HOSTS == ["*"]:
raise ValueError(
"TRUSTED_HOSTS debe definirse explícitamente en producción."
)
for origin in self.CORS_ALLOWED_ORIGINS:
parsed = urlparse(origin)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError(f"Origen CORS inválido: {origin!r}")
return self
@property
def is_production(self) -> bool:
return self.ENVIRONMENT == "production"
@property
def docs_url(self) -> str | None:
return "/docs" if self.DOCS_ENABLED else None
@property
def redoc_url(self) -> str | None:
return "/redoc" if self.DOCS_ENABLED else None
@property
def openapi_url(self) -> str | None:
return "/openapi.json" if self.DOCS_ENABLED else None
@lru_cache(maxsize=1)
def get_settings() -> Settings:
"""
Devuelve la instancia única de configuración.
Se cachea para no releer entorno/archivos en cada request.
"""
return Settings() # type: ignore[call-arg]
settings = get_settings()
def reload_settings_for_tests() -> Settings:
"""
Helper para tests: invalida la caché y recarga settings.
"""
get_settings.cache_clear()
globals()["settings"] = get_settings()
return globals()["settings"]
__all__ = ["Settings", "get_settings", "reload_settings_for_tests", "settings"]
+67
View File
@@ -0,0 +1,67 @@
"""
Manejadores de errores que NO filtran información sensible.
- En producción, las excepciones no controladas devuelven un mensaje genérico.
- En desarrollo, se incluye `type` para depurar (sin trazas).
- Errores de validación se devuelven con 422 estándar de FastAPI.
"""
from __future__ import annotations
import logging
import uuid
from fastapi import HTTPException, Request
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from sqlalchemy.exc import SQLAlchemyError
from app.core.config import settings
logger = logging.getLogger("app.error")
async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
return JSONResponse(
status_code=exc.status_code,
content={"detail": exc.detail},
headers=getattr(exc, "headers", None),
)
async def validation_exception_handler(
request: Request, exc: RequestValidationError
) -> JSONResponse:
safe_errors = []
for err in exc.errors():
safe_errors.append(
{
"loc": err.get("loc"),
"msg": err.get("msg"),
"type": err.get("type"),
}
)
return JSONResponse(status_code=422, content={"detail": safe_errors})
async def sqlalchemy_exception_handler(
request: Request, exc: SQLAlchemyError
) -> JSONResponse:
error_id = str(uuid.uuid4())
logger.exception("DB error [%s] on %s %s", error_id, request.method, request.url.path)
return JSONResponse(
status_code=500,
content={"detail": "Database error", "error_id": error_id},
)
async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONResponse:
error_id = str(uuid.uuid4())
logger.exception(
"Unhandled error [%s] on %s %s", error_id, request.method, request.url.path
)
payload: dict = {"detail": "Internal server error", "error_id": error_id}
if not settings.is_production and settings.DEBUG:
payload["type"] = exc.__class__.__name__
return JSONResponse(status_code=500, content=payload)
+28
View File
@@ -0,0 +1,28 @@
"""
Configuración de logging estructurada y minimalista.
- Formatea con timestamp, nivel y logger.
- En producción usa nivel INFO; en desarrollo DEBUG.
- Silencia logs ruidosos de librerías externas para no filtrar headers.
"""
from __future__ import annotations
import logging
from app.core.config import settings
_LOG_FORMAT = "%(asctime)s %(levelname)s %(name)s :: %(message)s"
def configure_logging() -> None:
level = logging.DEBUG if settings.DEBUG else logging.INFO
logging.basicConfig(level=level, format=_LOG_FORMAT)
for noisy in ("httpx", "httpcore", "sqlalchemy.engine.Engine"):
logging.getLogger(noisy).setLevel(logging.WARNING)
logging.getLogger("uvicorn.error").setLevel(level)
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+60
View File
@@ -0,0 +1,60 @@
"""
Rate limiting basado en SlowAPI.
- Usa Redis como backend si `REDIS_URL` está definido (compartido entre workers).
- Cae a memoria local en desarrollo si Redis no está disponible.
- Identifica al cliente por IP y, cuando hay JWT, también por `sub` (orcid_id),
para que un atacante autenticado no comparta cupo con su IP.
"""
from __future__ import annotations
from typing import Optional
from slowapi import Limiter
from slowapi.errors import RateLimitExceeded
from slowapi.util import get_remote_address
from starlette.requests import Request
from starlette.responses import JSONResponse
from app.core.config import settings
def _key_func(request: Request) -> str:
"""
Devuelve la clave de rate limit para el request.
- Si hay un investigador autenticado en el state, usa su orcid_id.
- En caso contrario, usa la IP remota.
"""
researcher = getattr(request.state, "researcher", None)
if researcher is not None:
return f"user:{getattr(researcher, 'orcid_id', None) or researcher.id}"
return f"ip:{get_remote_address(request)}"
def _build_limiter() -> Limiter:
storage_uri: Optional[str] = settings.REDIS_URL
return Limiter(
key_func=_key_func,
default_limits=[settings.RATE_LIMIT_DEFAULT],
storage_uri=storage_uri,
headers_enabled=True,
strategy="fixed-window-elastic-expiry",
)
limiter = _build_limiter()
def rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
"""
Respuesta uniforme cuando se supera el límite.
No revela límites internos exactos para reducir oráculo a atacantes.
"""
return JSONResponse(
status_code=429,
content={"detail": "Too many requests, slow down."},
headers={"Retry-After": "60"},
)
+88
View File
@@ -0,0 +1,88 @@
"""
Middleware de cabeceras de seguridad HTTP.
Aplica un perfil seguro por defecto:
- Strict-Transport-Security (HSTS) — fuerza HTTPS en navegadores compatibles.
- X-Content-Type-Options: nosniff
- X-Frame-Options: DENY (clickjacking)
- Referrer-Policy: strict-origin-when-cross-origin
- Permissions-Policy: bloquea APIs sensibles por defecto
- Cross-Origin-Opener-Policy / Resource-Policy: aislamiento del navegador
- Content-Security-Policy laxa para Swagger/OpenAPI (CDN), restrictiva para el resto.
NOTA: El frontend SPA tiene su propia CSP en su servidor. Aquí
endurecemos lo que sirve el backend (JSON, XML, ZIP, /docs, /redoc, etc.).
"""
from __future__ import annotations
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
from app.core.config import Settings
_DOCS_PATHS = ("/docs", "/redoc", "/openapi.json")
_BASE_CSP = (
"default-src 'none'; "
"frame-ancestors 'none'; "
"base-uri 'none'; "
"form-action 'none'"
)
_SWAGGER_CSP = (
"default-src 'self'; "
"img-src 'self' data: https://fastapi.tiangolo.com; "
"script-src 'self' https://cdn.jsdelivr.net 'unsafe-inline'; "
"style-src 'self' https://cdn.jsdelivr.net 'unsafe-inline'; "
"font-src 'self' data: https://cdn.jsdelivr.net; "
"connect-src 'self'; "
"frame-ancestors 'none'; "
"base-uri 'self'; "
"form-action 'self'"
)
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
"""
Inserta cabeceras de seguridad en cada respuesta.
"""
def __init__(self, app, settings: Settings):
super().__init__(app)
self._settings = settings
async def dispatch(self, request: Request, call_next) -> Response:
response: Response = await call_next(request)
response.headers.setdefault("X-Content-Type-Options", "nosniff")
response.headers.setdefault("X-Frame-Options", "DENY")
response.headers.setdefault("Referrer-Policy", "strict-origin-when-cross-origin")
response.headers.setdefault(
"Permissions-Policy",
"geolocation=(), microphone=(), camera=(), payment=(), usb=(), "
"accelerometer=(), gyroscope=(), magnetometer=(), interest-cohort=()",
)
response.headers.setdefault("Cross-Origin-Opener-Policy", "same-origin")
response.headers.setdefault("Cross-Origin-Resource-Policy", "same-site")
response.headers.setdefault("X-Permitted-Cross-Domain-Policies", "none")
if request.url.path in _DOCS_PATHS:
response.headers.setdefault("Content-Security-Policy", _SWAGGER_CSP)
else:
response.headers.setdefault("Content-Security-Policy", _BASE_CSP)
if request.url.scheme == "https" or self._settings.is_production:
hsts = f"max-age={self._settings.SECURITY_HSTS_SECONDS}"
if self._settings.SECURITY_HSTS_INCLUDE_SUBDOMAINS:
hsts += "; includeSubDomains"
if self._settings.SECURITY_HSTS_PRELOAD:
hsts += "; preload"
response.headers.setdefault("Strict-Transport-Security", hsts)
response.headers.pop("Server", None)
response.headers.pop("X-Powered-By", None)
return response