Files
ORCID2SWORD/backend/app/core/config.py
T
Mireya Cueto Garrido 1dd1096744 feat: enhance error handling and configuration in backend
- Added ORCID_REDIRECT_URI to docker-compose for OAuth callback.
- Refactored CORS and trusted hosts settings in configuration for better clarity.
- Introduced a new function to validate publication IDs and provide explicit error messages for researcher IDs.
- Updated rate limiting strategy to simplify configuration.
- Improved security headers middleware to safely remove sensitive headers.
2026-05-08 12:13:05 +02:00

184 lines
5.7 KiB
Python

"""
Configuración tipada y validada del backend.
Centraliza la lectura de variables de entorno, valida secretos críticos al
arranque y evita fallbacks inseguros (p. ej. JWT_SECRET="change_me") en
entornos productivos.
"""
from __future__ import annotations
import os
from functools import lru_cache
from pathlib import Path
from typing import List, Literal
from urllib.parse import urlparse
from dotenv import load_dotenv
from pydantic import Field, field_validator, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
_ENV_PATH = Path(__file__).resolve().parents[2] / ".env"
load_dotenv(dotenv_path=_ENV_PATH, override=False)
def _split_csv(value: str | List[str] | None) -> List[str]:
if value is None:
return []
if isinstance(value, list):
return [str(v).strip().rstrip("/") for v in value if str(v).strip()]
return [v.strip().rstrip("/") for v in value.split(",") if v.strip()]
class Settings(BaseSettings):
"""
Settings inmutables para toda la aplicación.
En `production` se aplican validaciones más estrictas:
- JWT_SECRET no puede ser un valor débil ni por defecto.
- CORS_ALLOWED_ORIGINS no puede contener "*".
- Se exige ORCID_CLIENT_ID/SECRET y API_KEY_VALUE.
"""
model_config = SettingsConfigDict(
env_file=str(_ENV_PATH),
env_file_encoding="utf-8",
extra="ignore",
case_sensitive=False,
)
ENVIRONMENT: Literal["development", "staging", "production"] = "development"
DEBUG: bool = False
DATABASE_URL: str = Field(...)
REDIS_URL: str | None = None
BASE_URL: str = "http://localhost:8000/api"
JWT_SECRET: str = Field(...)
JWT_ALGORITHM: str = "HS256"
JWT_EXPIRES_MINUTES: int = 720
JWT_ISSUER: str = "orcid-sword-backend"
JWT_AUDIENCE: str = "orcid-sword-frontend"
API_KEY_NAME: str = "X-API-Key"
API_KEY_VALUE: str = Field(...)
ORCID_CLIENT_ID: str = Field(...)
ORCID_CLIENT_SECRET: str = Field(...)
ORCID_REDIRECT_URI: str = "http://localhost:8000/api/auth/orcid/callback"
ORCID_OAUTH_STATE_ENABLED: bool = True
ORCID_OAUTH_STATE_COOKIE: str = "orcid_oauth_state"
ORCID_OAUTH_STATE_TTL_SECONDS: int = 600
CORS_ALLOWED_ORIGINS: str = ""
TRUSTED_HOSTS: str = "*"
RATE_LIMIT_DEFAULT: str = "60/minute"
RATE_LIMIT_AUTH: str = "10/minute"
RATE_LIMIT_SEARCH_ANON: str = "5/minute"
RATE_LIMIT_SEARCH_AUTH: str = "30/minute"
RATE_LIMIT_EXPORT: str = "20/minute"
RATE_LIMIT_SYNC: str = "5/minute"
MAX_ORCID_BATCH: int = 25
MAX_PUB_IDS_BATCH: int = 500
MAX_REQUEST_BODY_BYTES: int = 1_048_576 # 1 MiB
DOCS_ENABLED: bool = True
SECURITY_HSTS_SECONDS: int = 31_536_000
SECURITY_HSTS_INCLUDE_SUBDOMAINS: bool = True
SECURITY_HSTS_PRELOAD: bool = False
@model_validator(mode="after")
def _validate_security(self) -> "Settings":
cors_origins = self.cors_allowed_origins
trusted_hosts = self.trusted_hosts
if self.ENVIRONMENT == "production":
weak = {"change_me", "changeme", "secret", "password", ""}
if self.JWT_SECRET.strip().lower() in weak:
raise ValueError(
"JWT_SECRET es débil o está sin configurar. "
"Define un secreto aleatorio fuerte (>= 32 bytes)."
)
if len(self.JWT_SECRET) < 32:
raise ValueError(
"JWT_SECRET debe tener al menos 32 caracteres en producción."
)
if "*" in cors_origins:
raise ValueError(
"CORS_ALLOWED_ORIGINS no puede contener '*' en producción."
)
if not cors_origins:
raise ValueError(
"CORS_ALLOWED_ORIGINS debe definirse explícitamente en producción."
)
if not self.API_KEY_VALUE or len(self.API_KEY_VALUE) < 24:
raise ValueError(
"API_KEY_VALUE debe tener al menos 24 caracteres en producción."
)
if trusted_hosts == ["*"]:
raise ValueError(
"TRUSTED_HOSTS debe definirse explícitamente en producción."
)
for origin in cors_origins:
parsed = urlparse(origin)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError(f"Origen CORS inválido: {origin!r}")
return self
@property
def is_production(self) -> bool:
return self.ENVIRONMENT == "production"
@property
def cors_allowed_origins(self) -> List[str]:
return _split_csv(self.CORS_ALLOWED_ORIGINS)
@property
def trusted_hosts(self) -> List[str]:
parsed = _split_csv(self.TRUSTED_HOSTS)
return parsed or ["*"]
@property
def docs_url(self) -> str | None:
return "/docs" if self.DOCS_ENABLED else None
@property
def redoc_url(self) -> str | None:
return "/redoc" if self.DOCS_ENABLED else None
@property
def openapi_url(self) -> str | None:
return "/openapi.json" if self.DOCS_ENABLED else None
@lru_cache(maxsize=1)
def get_settings() -> Settings:
"""
Devuelve la instancia única de configuración.
Se cachea para no releer entorno/archivos en cada request.
"""
return Settings() # type: ignore[call-arg]
settings = get_settings()
def reload_settings_for_tests() -> Settings:
"""
Helper para tests: invalida la caché y recarga settings.
"""
get_settings.cache_clear()
globals()["settings"] = get_settings()
return globals()["settings"]
__all__ = ["Settings", "get_settings", "reload_settings_for_tests", "settings"]