diff --git a/.gitignore b/.gitignore
index 1cc1173..6932fd8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
.env.*
!.env.example
+
# --- PYTHON BACKEND ---
__pycache__/
*.pyc
@@ -22,6 +23,13 @@ ENV/
# FastAPI / Uvicorn
*.pid
+# Test / type checkers
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+htmlcov/
+
# --- NODE FRONTEND ---
node_modules/
dist/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1e6e5da
--- /dev/null
+++ b/README.md
@@ -0,0 +1,334 @@
+# ORCID SWORD System
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Full-stack platform for ORCID authentication, researcher synchronization, and publication export in SWORD XML / ZIP formats.
+
+
+---
+
+## . Overview: What is this project meant
+
+`orcid-system` is designed for research workflows where ORCID data must be ingested, normalized, and exported.
+
+Core capabilities:
+
+- ORCID OAuth 3-legged login
+- researcher search and synchronization against ORCID
+- publication export by selection or by researcher
+- export formats: **SWORD XML** and **ZIP**
+- dual access model: user JWT or service API key (for export)
+
+> [!NOTE]
+> The stack is local-first with Docker, but includes production-oriented hardening (CORS policy, trusted hosts, security headers, rate limiting, etc.).
+
+---
+
+## . Tech Stack
+
+### Backend
+- FastAPI
+- SQLAlchemy
+- PostgreSQL
+- Redis
+- python-jose (JWT)
+- slowapi (rate limit)
+- APScheduler
+- httpx
+
+### Frontend
+- React 19
+- Vite 8
+- React Router
+- TailwindCSS 4
+- Sonner (notifications)
+
+### Infrastructure
+- Docker / Docker Compose
+
+---
+
+## . Quick Start
+
+From the project root:
+
+```bash
+docker compose down
+docker compose up --build
+```
+
+Default local URLs:
+
+- Frontend: `http://localhost:5173`
+- Backend: `http://localhost:8000`
+
+> [!IMPORTANT]
+> Current compose mapping uses loopback binding:
+> - `127.0.0.1:5173:5173`
+> - `127.0.0.1:8000:8000`
+> This means services are reachable from the host machine, not exposed publicly by default.
+
+---
+
+## . Environment Configuration
+
+Backend:
+- Main file: `backend/.env`
+- Reference: `backend/.env.example`
+
+Frontend:
+- Compose/dev file: `frontend/.env`
+- Optional local override for host dev: `frontend/.env.local`
+
+Important backend variables:
+
+- `ORCID_CLIENT_ID`
+- `ORCID_CLIENT_SECRET`
+- `ORCID_REDIRECT_URI`
+- `JWT_SECRET`
+- `API_KEY_NAME`
+- `API_KEY_VALUE`
+- `CORS_ALLOWED_ORIGINS`
+- `TRUSTED_HOSTS`
+- `DATABASE_URL`
+- `REDIS_URL`
+
+Important frontend variables:
+
+- `VITE_API_URL` (empty in Docker setup, so Vite proxy handles `/api`)
+- `VITE_API_PROXY_TARGET` (defaults to `http://backend:8000` in compose network)
+- `VITE_API_KEY` (must match backend `API_KEY_VALUE` when using API key mode)
+- `VITE_USE_MOCKS`
+- `VITE_ORCID_PUBLIC_API_BASE` (optional override)
+
+> [!WARNING]
+> Never commit real production secrets. Rotate `JWT_SECRET`, `API_KEY_VALUE`, and `ORCID_CLIENT_SECRET` before deployment.
+
+---
+
+## . ngrok Bridge for Local OAuth Callback
+
+To test OAuth callback from ORCID in local environments, compose can inject a public callback URL:
+
+```yaml
+environment:
+ ORCID_REDIRECT_URI: https://jargon-supreme-palpable.ngrok-free.dev/callback
+```
+
+> [!NOTE]
+> Values under `docker-compose.yml -> services.backend.environment` override `backend/.env` inside the container.
+
+---
+
+## . API Endpoints
+
+Base backend URL: `http://localhost:8000`
+
+| Module | Method | Endpoint | Auth | Parameters | Body | Notes |
+| :--- | :---: | :--- | :--- | :--- | :--- | ---: |
+| Health | `GET` | `/health` | None | None | None | Liveness check |
+| ORCID Auth | `GET` | `/api/auth/orcid/authorize` | None | None | None | Redirects to ORCID |
+| ORCID Auth | `GET` | `/api/auth/orcid/callback` | None | `code`, `state` | None | Exchanges OAuth code for backend JWT |
+| ORCID Auth | `GET` | `/callback` | None | `code`, `state` | None | Alias for callback flow |
+| Researchers | `POST` | `/api/researchers/search` | Optional Bearer | None | `{"orcid_ids":[...]}` | Batch search/sync |
+| Researchers | `POST` | `/api/researchers/{orcid_id}/sync` | Optional Bearer | `orcid_id` | None | Full sync of one researcher |
+| Export SWORD | `POST` | `/api/export/sword/publications` | Bearer or API key | None | `["publication_uuid", ...]` | Export selected publications |
+| Export SWORD | `GET` | `/api/export/sword/researcher/{orcid_id}` | Bearer or API key | `orcid_id` | None | Export all by researcher |
+| Export ZIP | `POST` | `/api/export/zip/publications` | Bearer or API key | None | `["publication_uuid", ...]` | Export selected publications |
+| Export ZIP | `GET` | `/api/export/zip/researcher/{orcid_id}` | Bearer or API key | `orcid_id` | None | Export all by researcher |
+
+> [!IMPORTANT]
+> `.../publications` endpoints require **publication IDs**, not `researcher.id`.
+
+---
+
+## . Request Examples
+
+### Health
+```bash
+curl http://localhost:8000/health
+```
+
+### Search one or more researchers
+```bash
+curl -X POST "http://localhost:8000/api/researchers/search" \
+ -H "Content-Type: application/json" \
+ -d "{\"orcid_ids\":[\"0009-0000-0793-5376\"]}"
+```
+
+### Sync one researcher
+```bash
+curl -X POST "http://localhost:8000/api/researchers/0009-0000-0793-5376/sync"
+```
+
+### Export SWORD by researcher (API key mode)
+```bash
+curl "http://localhost:8000/api/export/sword/researcher/0009-0000-0793-5376" \
+ -H "X-API-Key: YOUR_API_KEY" \
+ -o sword.xml
+```
+
+### Export ZIP by publication IDs (Bearer mode)
+```bash
+curl -X POST "http://localhost:8000/api/export/zip/publications" \
+ -H "Authorization: Bearer YOUR_JWT" \
+ -H "Content-Type: application/json" \
+ -d "[\"04f6a2a6-b753-4432-982b-b88160f627fe\"]" \
+ -o export.zip
+```
+
+---
+
+## . Security Controls
+
+Implemented controls in backend:
+
+- strict CORS allowlist
+- trusted host filtering
+- request body size limit
+- OAuth `state` validation
+- JWT validation with issuer/audience claims
+- API key validation via constant-time comparison
+- rate limiting
+- security headers middleware
+- non-root container and reduced privileges
+
+> [!WARNING]
+> For production, enforce HTTPS behind a reverse proxy and set concrete values for `CORS_ALLOWED_ORIGINS` and `TRUSTED_HOSTS`.
+
+---
+
+## . Frontend Details
+
+The frontend is a React SPA using route-based navigation and a centralized API client.
+
+### Frontend routing
+
+- `/` → landing page
+- `/dashboard/:orcid` → researcher dashboard
+- `/group` → multi-researcher results
+- `/callback` → OAuth callback handler
+
+### Frontend API behavior (`frontend/src/services/api.js`)
+
+- central HTTP wrapper with `ApiError`
+- includes `X-API-Key` on requests when configured
+- includes `Authorization: Bearer ` when token exists in `localStorage`
+- supports mock mode through `VITE_USE_MOCKS`
+- supports Vite proxy mode when `VITE_API_URL` is empty
+
+### Vite proxy (`frontend/vite.config.js`)
+
+- `/api` proxied to `VITE_API_PROXY_TARGET` (`http://backend:8000` in compose)
+- `/health` proxied to same target
+- dev host settings allow tunnel scenarios (ngrok callback testing)
+
+---
+
+## . Project Structure
+
+```text
+orcid-system/
+├── backend/
+│ ├── app/
+│ │ ├── api/
+│ │ │ ├── auth.py
+│ │ │ ├── researchers.py
+│ │ │ └── export.py
+│ │ ├── core/
+│ │ │ ├── config.py
+│ │ │ ├── rate_limit.py
+│ │ │ ├── security_headers.py
+│ │ │ ├── error_handlers.py
+│ │ │ └── body_size.py
+│ │ ├── db/
+│ │ │ ├── models.py
+│ │ │ ├── session.py
+│ │ │ └── repositories/
+│ │ ├── security/
+│ │ │ ├── jwt.py
+│ │ │ ├── api_key.py
+│ │ │ └── oauth_state.py
+│ │ ├── services/
+│ │ │ ├── orcid_client.py
+│ │ │ ├── sword_generator.py
+│ │ │ ├── zip_generator.py
+│ │ │ └── sync_service.py
+│ │ ├── utils/
+│ │ └── main.py
+│ ├── .env
+│ ├── .env.example
+│ ├── .env.production
+│ ├── Dockerfile
+│ └── requirements.txt
+├── frontend/
+│ ├── src/
+│ │ ├── components/
+│ │ │ ├── dashboard/
+│ │ │ ├── layout/
+│ │ │ └── ui/
+│ │ ├── contexts/
+│ │ │ └── AuthContext.jsx
+│ │ ├── pages/
+│ │ │ ├── LandingPage.jsx
+│ │ │ ├── DashboardPage.jsx
+│ │ │ ├── GroupResultsPage.jsx
+│ │ │ └── AuthCallbackPage.jsx
+│ │ ├── services/
+│ │ │ ├── api.js
+│ │ │ └── mocks.js
+│ │ ├── utils/
+│ │ ├── App.jsx
+│ │ └── main.jsx
+│ ├── .env
+│ ├── package.json
+│ ├── vite.config.js
+│ └── eslint.config.js
+├── docker-compose.yml
+└── README.md
+```
+
+---
+
+## . Production Checklist
+
+- [ ] `ENVIRONMENT=production`
+- [ ] `DEBUG=false`
+- [ ] rotate all secrets
+- [ ] define strict `CORS_ALLOWED_ORIGINS`
+- [ ] define strict `TRUSTED_HOSTS`
+- [ ] enforce HTTPS via reverse proxy
+- [ ] keep DB/Redis private
+- [ ] configure monitoring and backups
+
+---
+
+## . Authors and Team
+
+This project is the result of the collaboration with the **University of Jaén**.
+
+| Role | Developer | GitHub |
+| :--- | :--- | :--- |
+| **Frontend** | Alexis López Moral | [@AlexisLopez-Dev](https://github.com/AlexisLopez-Dev) |
+| **Backend** | Mireya Cueto Garrido | [@MireyaCueto](https://github.com/MireyaCueto) |
+
+### Direction
+* **Proyect Supervisor:** Luis Martínez López
+
+---
+
+
+ Built with professional care and ❤️ for secure research data workflows at the University of Jaén.
+
diff --git a/backend/.dockerignore b/backend/.dockerignore
new file mode 100644
index 0000000..42336ce
--- /dev/null
+++ b/backend/.dockerignore
@@ -0,0 +1,20 @@
+.env
+.env.*
+!.env.example
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.log
+*.sqlite3
+*.db
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.venv/
+venv/
+.git/
+.gitignore
+README.md
+docs/
+tests/
diff --git a/backend/.env.example b/backend/.env.example
index be02114..c6b9fe5 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -1,19 +1,81 @@
-ORCID_CLIENT_ID=123412341234
-ORCID_CLIENT_SECRET=123412341234
-
-API_KEY_NAME=X-API-Key
-API_KEY_VALUE=123412341234
+# ============================================================
+# ENVIRONMENT
+# ============================================================
+ENVIRONMENT=development
+DEBUG=false
+# ============================================================
+# DATABASE / CACHE
+# ============================================================
DATABASE_URL=postgresql://postgres:postgres@db:5432/orcid_db
REDIS_URL=redis://redis:6379/0
+# ============================================================
+# BASE URL (uso interno del scheduler)
+# ============================================================
BASE_URL=http://localhost:8000/api
+# ============================================================
+# CORS — lista blanca estricta separada por comas
+# Nunca uses "*" si allow_credentials=true.
+# ============================================================
+CORS_ALLOWED_ORIGINS=http://localhost:5173
+
+# ============================================================
+# Trusted Hosts — anti Host-header injection (en prod, sé explícito)
+# ============================================================
+TRUSTED_HOSTS=*
+
+# ============================================================
# JWT (login ORCID)
-JWT_SECRET=change_me
+# Genera un secreto fuerte: `openssl rand -base64 64`
+# ============================================================
+JWT_SECRET=change_me_to_a_long_random_value_at_least_32_chars
JWT_ALGORITHM=HS256
JWT_EXPIRES_MINUTES=720
+JWT_ISSUER=orcid-sword-backend
+JWT_AUDIENCE=orcid-sword-frontend
+# ============================================================
+# API key máquina-a-máquina (scheduler interno)
+# Genera con: `python -c "import secrets;print(secrets.token_urlsafe(48))"`
+# ============================================================
+API_KEY_NAME=X-API-Key
+API_KEY_VALUE=replace_with_a_strong_random_value_min_24_chars
+
+# ============================================================
# ORCID OAuth 3-legged (authorization code)
-# Debe coincidir exactamente con el redirect URI configurado en tu app ORCID.
-ORCID_REDIRECT_URI=http://localhost:8000/api/auth/orcid/callback
\ No newline at end of file
+# ============================================================
+ORCID_CLIENT_ID=APP-XXXXXXXXXXXXXXXX
+ORCID_CLIENT_SECRET=replace_me
+ORCID_REDIRECT_URI=http://localhost:8000/api/auth/orcid/callback
+ORCID_OAUTH_STATE_ENABLED=true
+
+# ============================================================
+# Rate limits (formato slowapi: "/")
+# ============================================================
+RATE_LIMIT_DEFAULT=60/minute
+RATE_LIMIT_AUTH=10/minute
+RATE_LIMIT_SEARCH_ANON=5/minute
+RATE_LIMIT_SEARCH_AUTH=30/minute
+RATE_LIMIT_EXPORT=20/minute
+RATE_LIMIT_SYNC=5/minute
+
+# ============================================================
+# Tope de tamaños (anti DoS)
+# ============================================================
+MAX_ORCID_BATCH=25
+MAX_PUB_IDS_BATCH=500
+MAX_REQUEST_BODY_BYTES=1048576
+
+# ============================================================
+# Documentación interactiva (deshabilita en producción si no es necesaria)
+# ============================================================
+DOCS_ENABLED=true
+
+# ============================================================
+# HSTS
+# ============================================================
+SECURITY_HSTS_SECONDS=31536000
+SECURITY_HSTS_INCLUDE_SUBDOMAINS=true
+SECURITY_HSTS_PRELOAD=false
diff --git a/backend/Dockerfile b/backend/Dockerfile
index e3f2064..5251f77 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,10 +1,36 @@
-FROM python:3.12-slim
+FROM python:3.12-slim AS base
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONUNBUFFERED=1 \
+ PIP_NO_CACHE_DIR=1 \
+ PIP_DISABLE_PIP_VERSION_CHECK=1
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends curl \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN groupadd --system --gid 1001 app \
+ && useradd --system --uid 1001 --gid app --home /app --shell /usr/sbin/nologin app
WORKDIR /app
-COPY requirements.txt .
+COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY app ./app
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+RUN chown -R app:app /app
+
+USER app
+
+EXPOSE 8000
+
+HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
+ CMD curl -fsS http://127.0.0.1:8000/health || exit 1
+
+CMD ["uvicorn", "app.main:app", \
+ "--host", "0.0.0.0", \
+ "--port", "8000", \
+ "--proxy-headers", \
+ "--forwarded-allow-ips", "*", \
+ "--no-server-header"]
diff --git a/backend/app/api/auth.py b/backend/app/api/auth.py
index 205cb95..89ecc96 100644
--- a/backend/app/api/auth.py
+++ b/backend/app/api/auth.py
@@ -1,64 +1,68 @@
+import logging
+
import httpx
-import os
-from pathlib import Path
-from dotenv import load_dotenv
-from fastapi import APIRouter, Depends, HTTPException, status
-from fastapi.responses import RedirectResponse
+from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi.responses import JSONResponse, RedirectResponse
from sqlalchemy.orm import Session
+from app.core.config import settings
+from app.core.rate_limit import limiter
from app.db.models import Researcher
from app.db.session import get_db
from app.schema.auth import OrcidLoginResponseSchema
from app.security.jwt import create_access_token
+from app.security.oauth_state import (
+ attach_state_cookie,
+ clear_state_cookie,
+ generate_state,
+ validate_state,
+)
from app.services.orcid_client import ORCIDClient
from app.utils.orcid_validator import is_valid_orcid
-# Asegura que al ejecutar `uvicorn` local también se carga `backend/.env`.
-_ENV_PATH = Path(__file__).resolve().parents[2] / ".env"
-load_dotenv(dotenv_path=_ENV_PATH, override=False)
-
router = APIRouter(prefix="/auth", tags=["auth"])
+logger = logging.getLogger("app.auth")
def _extract_display_name(record: dict) -> str | None:
person = (record or {}).get("person") or {}
name = person.get("name") or {}
- given = ((name.get("given-names") or {}).get("value")) if isinstance(name.get("given-names"), dict) else None
- family = ((name.get("family-name") or {}).get("value")) if isinstance(name.get("family-name"), dict) else None
- full = " ".join([p for p in [given, family] if p])
+ given_obj = name.get("given-names")
+ family_obj = name.get("family-name")
+ given = given_obj.get("value") if isinstance(given_obj, dict) else None
+ family = family_obj.get("value") if isinstance(family_obj, dict) else None
+ full = " ".join(p for p in [given, family] if p)
return full or None
def _orcid_redirect_uri() -> str:
- # Debe coincidir con el `redirect_uri` registrado en tu integración ORCID.
- return os.getenv("ORCID_REDIRECT_URI") or "http://localhost:8000/api/auth/orcid/callback"
+ return settings.ORCID_REDIRECT_URI
def _complete_oauth_login(*, code: str, db: Session) -> OrcidLoginResponseSchema:
"""
- Completa el login OAuth:
- 1) intercambio del `code` en ORCID (server-side)
- 2) crea/actualiza el investigador
- 3) emite nuestro JWT
+ 1) Intercambia el `code` con ORCID (server-side).
+ 2) Crea/actualiza el investigador.
+ 3) Emite el JWT propio.
"""
- if not code:
- raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing ORCID authorization code")
+ if not code or len(code) > 256:
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid ORCID authorization code")
client = ORCIDClient()
- redirect_uri = _orcid_redirect_uri()
-
try:
- token_data = client.exchange_authorization_code(code=code, redirect_uri=redirect_uri)
+ token_data = client.exchange_authorization_code(code=code, redirect_uri=_orcid_redirect_uri())
except httpx.HTTPStatusError as exc:
+ logger.warning("ORCID token exchange failed: %s", exc.response.status_code)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
- detail=f"ORCID token error ({exc.response.status_code})",
- )
- except httpx.TimeoutException:
- raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="ORCID timeout")
- except Exception:
- raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="ORCID unavailable")
+ detail="ORCID token exchange failed",
+ ) from exc
+ except httpx.TimeoutException as exc:
+ raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="ORCID timeout") from exc
+ except Exception as exc:
+ logger.exception("Unexpected error during ORCID token exchange")
+ raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="ORCID unavailable") from exc
orcid_id = (token_data.get("orcid") or "").strip()
if not is_valid_orcid(orcid_id):
@@ -66,7 +70,6 @@ def _complete_oauth_login(*, code: str, db: Session) -> OrcidLoginResponseSchema
display_name = token_data.get("name")
if not display_name:
- # Fallback si ORCID no devuelve `name` en el token response.
try:
record = client.fetch_record(orcid_id)
display_name = _extract_display_name(record)
@@ -89,21 +92,54 @@ def _complete_oauth_login(*, code: str, db: Session) -> OrcidLoginResponseSchema
return OrcidLoginResponseSchema(access_token=token)
-@router.get("/orcid/authorize")
-def authorize_orcid():
+def complete_oauth_login_response(
+ *, request: Request, code: str, state: str | None, db: Session
+) -> JSONResponse:
"""
- Inicia el flujo OAuth 3-legged (authorization code) hacia ORCID.
+ Valida `state`, completa el login y limpia la cookie del state.
+ Devuelve directamente la JSONResponse (para poder borrar cookie).
+ """
+ validate_state(request, state)
+ payload = _complete_oauth_login(code=code, db=db)
+ json_resp = JSONResponse(content=payload.model_dump())
+ clear_state_cookie(json_resp)
+ return json_resp
+
+
+# ---------------------------------------------------------
+# ENDPOINT 1: Iniciar flujo OAuth 3-legged hacia ORCID
+# ---------------------------------------------------------
+
+@router.get("/orcid/authorize")
+@limiter.limit(settings.RATE_LIMIT_AUTH)
+def authorize_orcid(request: Request):
+ """
+ Genera la URL de autorización ORCID y persiste el `state` en cookie
+ HttpOnly para validarlo en el callback (anti-CSRF).
"""
client = ORCIDClient()
+ state = generate_state() if settings.ORCID_OAUTH_STATE_ENABLED else None
authorize_url = client.build_authorize_url(
redirect_uri=_orcid_redirect_uri(),
- # Solo necesitamos el Authenticated iD del usuario.
scope="/authenticate",
+ state=state,
)
- return RedirectResponse(authorize_url)
+ response = RedirectResponse(authorize_url)
+ if state:
+ attach_state_cookie(response, state)
+ return response
+# ---------------------------------------------------------
+# ENDPOINT 2: Callback OAuth 3-legged desde ORCID
+# ---------------------------------------------------------
+
@router.get("/orcid/callback", response_model=OrcidLoginResponseSchema)
-def orcid_callback(code: str, db: Session = Depends(get_db)):
- return _complete_oauth_login(code=code, db=db)
-
+@limiter.limit(settings.RATE_LIMIT_AUTH)
+def orcid_callback(
+ request: Request,
+ code: str,
+ state: str | None = None,
+ db: Session = Depends(get_db),
+):
+ return complete_oauth_login_response(request=request, code=code, state=state, db=db)
diff --git a/backend/app/api/export.py b/backend/app/api/export.py
index 2152105..c3a9a6a 100644
--- a/backend/app/api/export.py
+++ b/backend/app/api/export.py
@@ -1,167 +1,208 @@
-from fastapi import APIRouter, Depends, HTTPException
-from fastapi.responses import Response
-from sqlalchemy.orm import Session
+from typing import Iterable, List
from uuid import UUID
+from fastapi import APIRouter, Body, Depends, HTTPException, Path, Request
+from fastapi.responses import Response
+from sqlalchemy.orm import Session
+
+from app.core.config import settings
+from app.core.rate_limit import limiter
+from app.db.models import Publication, PublicationDownload, Researcher
from app.db.session import get_db
-from app.db.models import Publication, Researcher, PublicationDownload
from app.security.api_key import get_api_key_optional
from app.security.jwt import get_optional_current_researcher
from app.services.sword_generator import SWORDGenerator
from app.services.zip_generator import ZIPGenerator
+from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
+
router = APIRouter(prefix="/export")
-def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
- valid_ids = []
- for pid in pub_ids:
- try:
- valid_ids.append(UUID(pid))
- except Exception:
- raise HTTPException(
- status_code=400,
- detail=f"Invalid publication ID (not UUID): {pid}"
- )
- return valid_ids
+def _ensure_credentials(api_key: str | None, current: Researcher | None) -> None:
+ if not api_key and not current:
+ raise HTTPException(status_code=401, detail="Authentication required")
+def _record_downloads(db: Session, current: Researcher, pubs: Iterable[Publication]) -> None:
+ """
+ Inserta marcadores de descarga (researcher_id, publication_id).
+
+ - Resuelve descargas existentes con UNA sola query.
+ - Solo añade las que faltan.
+ """
+ pub_ids = [p.id for p in pubs]
+ if not pub_ids:
+ return
+
+ existing_ids = {
+ row[0]
+ for row in (
+ db.query(PublicationDownload.publication_id)
+ .filter(
+ PublicationDownload.researcher_id == current.id,
+ PublicationDownload.publication_id.in_(pub_ids),
+ )
+ .all()
+ )
+ }
+
+ new_rows = [
+ PublicationDownload(researcher_id=current.id, publication_id=pid)
+ for pid in pub_ids
+ if pid not in existing_ids
+ ]
+ if new_rows:
+ db.add_all(new_rows)
+ db.commit()
+
+
+def _validate_pub_ids(pub_ids: List[UUID]) -> List[UUID]:
+ if len(pub_ids) > settings.MAX_PUB_IDS_BATCH:
+ raise HTTPException(status_code=413, detail="Too many publication IDs")
+ return pub_ids
+
+
+def _raise_clear_error_if_researcher_id_was_used(db: Session, pub_ids: List[UUID]) -> None:
+ """
+ Si el cliente envía por error el UUID de un investigador al endpoint
+ de publicaciones, devolvemos un mensaje explícito para guiar el uso.
+ """
+ if len(pub_ids) != 1:
+ return
+
+ researcher = db.query(Researcher).filter(Researcher.id == pub_ids[0]).first()
+ if researcher:
+ raise HTTPException(
+ status_code=400,
+ detail=(
+ "The provided UUID belongs to a researcher, not a publication. "
+ "Use publication IDs for this endpoint, or call "
+ f"/api/export/sword/researcher/{researcher.orcid_id} "
+ f"(or /api/export/zip/researcher/{researcher.orcid_id})."
+ ),
+ )
+
+
+# ---------------------------------------------------------
+# ENDPOINT 1: SWORD múltiples publicaciones
+# ---------------------------------------------------------
+
@router.post("/sword/publications")
+@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_multiple_sword(
- pub_ids: list[str],
+ request: Request,
+ pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
- if not api_key and not current:
- raise HTTPException(status_code=401, detail="Missing credentials")
- validate_uuid_list(pub_ids)
+ _ensure_credentials(api_key, current)
+ _validate_pub_ids(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
-
if not pubs:
+ _raise_clear_error_if_researcher_id_was_used(db, pub_ids)
raise HTTPException(status_code=404, detail="No publications found")
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
- # Registrar descarga solo si hay usuario logueado
if current:
- for p in pubs:
- exists = (
- db.query(PublicationDownload)
- .filter(
- PublicationDownload.researcher_id == current.id,
- PublicationDownload.publication_id == p.id,
- )
- .first()
- )
- if not exists:
- db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
- db.commit()
+ _record_downloads(db, current, pubs)
+
return Response(content=xml_bytes, media_type="application/xml")
+# ---------------------------------------------------------
+# ENDPOINT 2: SWORD por investigador
+# ---------------------------------------------------------
+
@router.get("/sword/researcher/{orcid_id}")
+@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_researcher_sword(
- orcid_id: str,
+ request: Request,
+ orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
- if not api_key and not current:
- raise HTTPException(status_code=401, detail="Missing credentials")
+ _ensure_credentials(api_key, current)
+ if not is_valid_orcid(orcid_id):
+ raise HTTPException(status_code=400, detail="Invalid ORCID iD")
+
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
-
if not pubs:
raise HTTPException(status_code=404, detail="No publications found for this researcher")
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
if current:
- for p in pubs:
- exists = (
- db.query(PublicationDownload)
- .filter(
- PublicationDownload.researcher_id == current.id,
- PublicationDownload.publication_id == p.id,
- )
- .first()
- )
- if not exists:
- db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
- db.commit()
+ _record_downloads(db, current, pubs)
+
return Response(content=xml_bytes, media_type="application/xml")
+# ---------------------------------------------------------
+# ENDPOINT 3: ZIP múltiples publicaciones
+# ---------------------------------------------------------
+
@router.post("/zip/publications")
+@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_multiple_zip(
- pub_ids: list[str],
+ request: Request,
+ pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
- if not api_key and not current:
- raise HTTPException(status_code=401, detail="Missing credentials")
- validate_uuid_list(pub_ids)
+ _ensure_credentials(api_key, current)
+ _validate_pub_ids(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
-
if not pubs:
+ _raise_clear_error_if_researcher_id_was_used(db, pub_ids)
raise HTTPException(status_code=404, detail="No publications found")
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
- for p in pubs:
- exists = (
- db.query(PublicationDownload)
- .filter(
- PublicationDownload.researcher_id == current.id,
- PublicationDownload.publication_id == p.id,
- )
- .first()
- )
- if not exists:
- db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
- db.commit()
+ _record_downloads(db, current, pubs)
+
return Response(content=zip_bytes, media_type="application/zip")
+# ---------------------------------------------------------
+# ENDPOINT 4: ZIP por investigador
+# ---------------------------------------------------------
+
@router.get("/zip/researcher/{orcid_id}")
+@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_researcher_zip(
- orcid_id: str,
+ request: Request,
+ orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
- if not api_key and not current:
- raise HTTPException(status_code=401, detail="Missing credentials")
+ _ensure_credentials(api_key, current)
+ if not is_valid_orcid(orcid_id):
+ raise HTTPException(status_code=400, detail="Invalid ORCID iD")
+
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
-
if not pubs:
raise HTTPException(status_code=404, detail="No publications found for this researcher")
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
- for p in pubs:
- exists = (
- db.query(PublicationDownload)
- .filter(
- PublicationDownload.researcher_id == current.id,
- PublicationDownload.publication_id == p.id,
- )
- .first()
- )
- if not exists:
- db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
- db.commit()
+ _record_downloads(db, current, pubs)
+
return Response(content=zip_bytes, media_type="application/zip")
diff --git a/backend/app/api/researchers.py b/backend/app/api/researchers.py
index 927aafa..82859df 100644
--- a/backend/app/api/researchers.py
+++ b/backend/app/api/researchers.py
@@ -2,11 +2,14 @@ from datetime import datetime
from typing import List
import httpx
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Path, Request
from sqlalchemy.orm import Session
-from app.db.models import Publication, Researcher
+from app.core.config import settings
+from app.core.rate_limit import limiter
+from app.db.models import Publication, PublicationDownload, Researcher
from app.db.session import get_db
+from app.schema.publication import PublicationSchema
from app.schema.researcher import (
ResearcherBatchSearchRequestSchema,
ResearcherBatchSearchResponseSchema,
@@ -14,18 +17,15 @@ from app.schema.researcher import (
ResearcherStatsSchema,
ResearcherWithPublicationsSchema,
)
-from app.services.normalizer import PublicationNormalizer
-from app.services.orcid_client import get_works_summary, get_work_detail
-from app.schema.publication import PublicationSchema
-from app.db.models import PublicationDownload
from app.security.jwt import get_optional_current_researcher
+from app.services.normalizer import PublicationNormalizer
+from app.services.orcid_client import get_display_name, get_work_detail, get_works_summary
+from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
+
router = APIRouter(prefix="/researchers", tags=["researchers"])
-# ---------------------------------------------------------
-# Función auxiliar: detectar si una publicación ha cambiado
-# ---------------------------------------------------------
def publication_changed(existing: Publication, data: dict) -> bool:
fields = [
"title", "subtitle", "type", "journal",
@@ -33,18 +33,13 @@ def publication_changed(existing: Publication, data: dict) -> bool:
"doi", "url", "short_description",
"citation_type", "citation_value",
"language_code", "country",
- "external_ids", "contributors"
+ "external_ids", "contributors",
]
-
- for f in fields:
- if getattr(existing, f) != data[f]:
- return True
- return False
+ return any(getattr(existing, f) != data[f] for f in fields)
def build_researcher_stats(publications: list) -> ResearcherStatsSchema:
publication_types: dict[str, int] = {}
-
for publication in publications:
pub_type = getattr(publication, "type", None) or "unknown"
publication_types[pub_type] = publication_types.get(pub_type, 0) + 1
@@ -98,7 +93,7 @@ def _upsert_researcher_publications(
"doi", "url", "short_description",
"citation_type", "citation_value",
"language_code", "country",
- "external_ids", "contributors"
+ "external_ids", "contributors",
]:
setattr(existing, field, data[field])
existing.last_modified = datetime.utcnow()
@@ -142,12 +137,17 @@ def _decorate_downloaded_by_me(
out: List[PublicationSchema] = []
for p in publications:
out.append(
- PublicationSchema.model_validate(p).model_copy(update={"downloaded_by_me": p.id in downloaded_ids})
+ PublicationSchema.model_validate(p).model_copy(
+ update={"downloaded_by_me": p.id in downloaded_ids}
+ )
)
return out
def build_search_response(orcid_id: str, db: Session, current: Researcher | None) -> ResearcherWithPublicationsSchema:
+ if not is_valid_orcid(orcid_id):
+ raise HTTPException(status_code=400, detail="Invalid ORCID iD")
+
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(
@@ -159,6 +159,12 @@ def build_search_response(orcid_id: str, db: Session, current: Researcher | None
db.add(researcher)
db.flush()
+ if not researcher.name:
+ display_name = get_display_name(orcid_id)
+ if display_name:
+ researcher.name = display_name
+ db.flush()
+
publications = _upsert_researcher_publications(researcher, orcid_id, db)
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications)
stats = build_researcher_stats(publications_out)
@@ -175,10 +181,18 @@ def build_search_response(orcid_id: str, db: Session, current: Researcher | None
# ---------------------------------------------------------
-# ENDPOINT 1: SEARCH + SYNC (sin contadores)
+# ENDPOINT 1: SEARCH + SYNC
# ---------------------------------------------------------
-@router.post("/search", response_model=ResearcherBatchSearchResponseSchema, response_model_exclude_none=True)
+
+
+@router.post(
+ "/search",
+ response_model=ResearcherBatchSearchResponseSchema,
+ response_model_exclude_none=True,
+)
+@limiter.limit(settings.RATE_LIMIT_SEARCH_ANON)
def search_and_sync_researchers(
+ request: Request,
payload: ResearcherBatchSearchRequestSchema,
db: Session = Depends(get_db),
current: Researcher | None = Depends(get_optional_current_researcher),
@@ -186,26 +200,33 @@ def search_and_sync_researchers(
results: List[ResearcherWithPublicationsSchema] = []
errors: List[ResearcherSearchErrorSchema] = []
- # Evita llamadas duplicadas a ORCID conservando el orden de entrada.
unique_orcid_ids = list(dict.fromkeys(payload.orcid_ids))
for orcid_id in unique_orcid_ids:
try:
results.append(build_search_response(orcid_id, db, current))
+ except HTTPException as exc:
+ db.rollback()
+ errors.append(
+ ResearcherSearchErrorSchema(
+ orcid_id=orcid_id,
+ detail=str(exc.detail),
+ )
+ )
except httpx.HTTPStatusError as exc:
db.rollback()
errors.append(
ResearcherSearchErrorSchema(
orcid_id=orcid_id,
- detail=f"ORCID devolvió {exc.response.status_code} para {orcid_id}.",
+ detail=f"ORCID returned {exc.response.status_code}",
)
)
- except Exception as exc:
+ except Exception:
db.rollback()
errors.append(
ResearcherSearchErrorSchema(
orcid_id=orcid_id,
- detail=str(exc),
+ detail="Unexpected error while processing ORCID iD",
)
)
@@ -218,14 +239,24 @@ def search_and_sync_researchers(
# ---------------------------------------------------------
-# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
+# ENDPOINT 2: SYNC COMPLETO (requiere autenticación)
# ---------------------------------------------------------
-@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema, response_model_exclude_none=True)
+
+@router.post(
+ "/{orcid_id}/sync",
+ response_model=ResearcherWithPublicationsSchema,
+ response_model_exclude_none=True,
+)
+@limiter.limit(settings.RATE_LIMIT_SYNC)
def sync_researcher(
- orcid_id: str,
+ request: Request,
+ orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
db: Session = Depends(get_db),
current: Researcher | None = Depends(get_optional_current_researcher),
):
+ if not is_valid_orcid(orcid_id):
+ raise HTTPException(status_code=400, detail="Invalid ORCID iD")
+
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -234,7 +265,6 @@ def sync_researcher(
groups = works.get("group", [])
publications_output = []
-
new_count = 0
updated_count = 0
unchanged_count = 0
@@ -267,21 +297,17 @@ def sync_researcher(
if existing:
if publication_changed(existing, data):
- # updated
for field in data:
setattr(existing, field, data[field])
existing.last_modified = datetime.utcnow()
existing.status = "updated"
updated_count += 1
else:
- # unchanged
existing.status = "unchanged"
unchanged_count += 1
pub = existing
-
else:
- # new
pub = Publication(
researcher_id=researcher.id,
**data,
diff --git a/backend/app/core/__init__.py b/backend/app/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/app/core/body_size.py b/backend/app/core/body_size.py
new file mode 100644
index 0000000..323e01f
--- /dev/null
+++ b/backend/app/core/body_size.py
@@ -0,0 +1,35 @@
+"""
+Middleware que limita el tamaño máximo del cuerpo de la petición.
+
+Evita ataques de agotamiento de memoria/CPU enviando bodies enormes a
+endpoints POST. Se aplica antes de que FastAPI deserialice el JSON.
+"""
+
+from __future__ import annotations
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import JSONResponse, Response
+
+
+class BodySizeLimitMiddleware(BaseHTTPMiddleware):
+ def __init__(self, app, *, max_bytes: int):
+ super().__init__(app)
+ self._max_bytes = max_bytes
+
+ async def dispatch(self, request: Request, call_next) -> Response:
+ content_length = request.headers.get("content-length")
+ if content_length is not None:
+ try:
+ if int(content_length) > self._max_bytes:
+ return JSONResponse(
+ status_code=413,
+ content={"detail": "Request body too large"},
+ )
+ except ValueError:
+ return JSONResponse(
+ status_code=400,
+ content={"detail": "Invalid Content-Length header"},
+ )
+
+ return await call_next(request)
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
new file mode 100644
index 0000000..b77834f
--- /dev/null
+++ b/backend/app/core/config.py
@@ -0,0 +1,183 @@
+"""
+Configuración tipada y validada del backend.
+
+Centraliza la lectura de variables de entorno, valida secretos críticos al
+arranque y evita fallbacks inseguros (p. ej. JWT_SECRET="change_me") en
+entornos productivos.
+"""
+
+from __future__ import annotations
+
+import os
+from functools import lru_cache
+from pathlib import Path
+from typing import List, Literal
+from urllib.parse import urlparse
+
+from dotenv import load_dotenv
+from pydantic import Field, field_validator, model_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+_ENV_PATH = Path(__file__).resolve().parents[2] / ".env"
+load_dotenv(dotenv_path=_ENV_PATH, override=False)
+
+
+def _split_csv(value: str | List[str] | None) -> List[str]:
+ if value is None:
+ return []
+ if isinstance(value, list):
+ return [str(v).strip().rstrip("/") for v in value if str(v).strip()]
+ return [v.strip().rstrip("/") for v in value.split(",") if v.strip()]
+
+
+class Settings(BaseSettings):
+ """
+ Settings inmutables para toda la aplicación.
+
+ En `production` se aplican validaciones más estrictas:
+ - JWT_SECRET no puede ser un valor débil ni por defecto.
+ - CORS_ALLOWED_ORIGINS no puede contener "*".
+ - Se exige ORCID_CLIENT_ID/SECRET y API_KEY_VALUE.
+ """
+
+ model_config = SettingsConfigDict(
+ env_file=str(_ENV_PATH),
+ env_file_encoding="utf-8",
+ extra="ignore",
+ case_sensitive=False,
+ )
+
+ ENVIRONMENT: Literal["development", "staging", "production"] = "development"
+ DEBUG: bool = False
+
+ DATABASE_URL: str = Field(...)
+ REDIS_URL: str | None = None
+ BASE_URL: str = "http://localhost:8000/api"
+
+ JWT_SECRET: str = Field(...)
+ JWT_ALGORITHM: str = "HS256"
+ JWT_EXPIRES_MINUTES: int = 720
+ JWT_ISSUER: str = "orcid-sword-backend"
+ JWT_AUDIENCE: str = "orcid-sword-frontend"
+
+ API_KEY_NAME: str = "X-API-Key"
+ API_KEY_VALUE: str = Field(...)
+
+ ORCID_CLIENT_ID: str = Field(...)
+ ORCID_CLIENT_SECRET: str = Field(...)
+ ORCID_REDIRECT_URI: str = "http://localhost:8000/api/auth/orcid/callback"
+ ORCID_OAUTH_STATE_ENABLED: bool = True
+ ORCID_OAUTH_STATE_COOKIE: str = "orcid_oauth_state"
+ ORCID_OAUTH_STATE_TTL_SECONDS: int = 600
+
+ CORS_ALLOWED_ORIGINS: str = ""
+
+ TRUSTED_HOSTS: str = "*"
+
+ RATE_LIMIT_DEFAULT: str = "60/minute"
+ RATE_LIMIT_AUTH: str = "10/minute"
+ RATE_LIMIT_SEARCH_ANON: str = "5/minute"
+ RATE_LIMIT_SEARCH_AUTH: str = "30/minute"
+ RATE_LIMIT_EXPORT: str = "20/minute"
+ RATE_LIMIT_SYNC: str = "5/minute"
+
+ MAX_ORCID_BATCH: int = 25
+ MAX_PUB_IDS_BATCH: int = 500
+ MAX_REQUEST_BODY_BYTES: int = 1_048_576 # 1 MiB
+
+ DOCS_ENABLED: bool = True
+
+ SECURITY_HSTS_SECONDS: int = 31_536_000
+ SECURITY_HSTS_INCLUDE_SUBDOMAINS: bool = True
+ SECURITY_HSTS_PRELOAD: bool = False
+
+ @model_validator(mode="after")
+ def _validate_security(self) -> "Settings":
+ cors_origins = self.cors_allowed_origins
+ trusted_hosts = self.trusted_hosts
+
+ if self.ENVIRONMENT == "production":
+ weak = {"change_me", "changeme", "secret", "password", ""}
+ if self.JWT_SECRET.strip().lower() in weak:
+ raise ValueError(
+ "JWT_SECRET es débil o está sin configurar. "
+ "Define un secreto aleatorio fuerte (>= 32 bytes)."
+ )
+ if len(self.JWT_SECRET) < 32:
+ raise ValueError(
+ "JWT_SECRET debe tener al menos 32 caracteres en producción."
+ )
+ if "*" in cors_origins:
+ raise ValueError(
+ "CORS_ALLOWED_ORIGINS no puede contener '*' en producción."
+ )
+ if not cors_origins:
+ raise ValueError(
+ "CORS_ALLOWED_ORIGINS debe definirse explícitamente en producción."
+ )
+ if not self.API_KEY_VALUE or len(self.API_KEY_VALUE) < 24:
+ raise ValueError(
+ "API_KEY_VALUE debe tener al menos 24 caracteres en producción."
+ )
+ if trusted_hosts == ["*"]:
+ raise ValueError(
+ "TRUSTED_HOSTS debe definirse explícitamente en producción."
+ )
+
+ for origin in cors_origins:
+ parsed = urlparse(origin)
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
+ raise ValueError(f"Origen CORS inválido: {origin!r}")
+
+ return self
+
+ @property
+ def is_production(self) -> bool:
+ return self.ENVIRONMENT == "production"
+
+ @property
+ def cors_allowed_origins(self) -> List[str]:
+ return _split_csv(self.CORS_ALLOWED_ORIGINS)
+
+ @property
+ def trusted_hosts(self) -> List[str]:
+ parsed = _split_csv(self.TRUSTED_HOSTS)
+ return parsed or ["*"]
+
+ @property
+ def docs_url(self) -> str | None:
+ return "/docs" if self.DOCS_ENABLED else None
+
+ @property
+ def redoc_url(self) -> str | None:
+ return "/redoc" if self.DOCS_ENABLED else None
+
+ @property
+ def openapi_url(self) -> str | None:
+ return "/openapi.json" if self.DOCS_ENABLED else None
+
+
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+ """
+ Devuelve la instancia única de configuración.
+
+ Se cachea para no releer entorno/archivos en cada request.
+ """
+ return Settings() # type: ignore[call-arg]
+
+
+settings = get_settings()
+
+
+def reload_settings_for_tests() -> Settings:
+ """
+ Helper para tests: invalida la caché y recarga settings.
+ """
+ get_settings.cache_clear()
+ globals()["settings"] = get_settings()
+ return globals()["settings"]
+
+
+__all__ = ["Settings", "get_settings", "reload_settings_for_tests", "settings"]
diff --git a/backend/app/core/error_handlers.py b/backend/app/core/error_handlers.py
new file mode 100644
index 0000000..52803fc
--- /dev/null
+++ b/backend/app/core/error_handlers.py
@@ -0,0 +1,67 @@
+"""
+Manejadores de errores que NO filtran información sensible.
+
+- En producción, las excepciones no controladas devuelven un mensaje genérico.
+- En desarrollo, se incluye `type` para depurar (sin trazas).
+- Errores de validación se devuelven con 422 estándar de FastAPI.
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+
+from fastapi import HTTPException, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from sqlalchemy.exc import SQLAlchemyError
+
+from app.core.config import settings
+
+
+logger = logging.getLogger("app.error")
+
+
+async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
+ return JSONResponse(
+ status_code=exc.status_code,
+ content={"detail": exc.detail},
+ headers=getattr(exc, "headers", None),
+ )
+
+
+async def validation_exception_handler(
+ request: Request, exc: RequestValidationError
+) -> JSONResponse:
+ safe_errors = []
+ for err in exc.errors():
+ safe_errors.append(
+ {
+ "loc": err.get("loc"),
+ "msg": err.get("msg"),
+ "type": err.get("type"),
+ }
+ )
+ return JSONResponse(status_code=422, content={"detail": safe_errors})
+
+
+async def sqlalchemy_exception_handler(
+ request: Request, exc: SQLAlchemyError
+) -> JSONResponse:
+ error_id = str(uuid.uuid4())
+ logger.exception("DB error [%s] on %s %s", error_id, request.method, request.url.path)
+ return JSONResponse(
+ status_code=500,
+ content={"detail": "Database error", "error_id": error_id},
+ )
+
+
+async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+ error_id = str(uuid.uuid4())
+ logger.exception(
+ "Unhandled error [%s] on %s %s", error_id, request.method, request.url.path
+ )
+ payload: dict = {"detail": "Internal server error", "error_id": error_id}
+ if not settings.is_production and settings.DEBUG:
+ payload["type"] = exc.__class__.__name__
+ return JSONResponse(status_code=500, content=payload)
diff --git a/backend/app/core/logging_config.py b/backend/app/core/logging_config.py
new file mode 100644
index 0000000..0dd3f7c
--- /dev/null
+++ b/backend/app/core/logging_config.py
@@ -0,0 +1,28 @@
+"""
+Configuración de logging estructurada y minimalista.
+
+- Formatea con timestamp, nivel y logger.
+- En producción usa nivel INFO; en desarrollo DEBUG.
+- Silencia logs ruidosos de librerías externas para no filtrar headers.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from app.core.config import settings
+
+
+_LOG_FORMAT = "%(asctime)s %(levelname)s %(name)s :: %(message)s"
+
+
+def configure_logging() -> None:
+ level = logging.DEBUG if settings.DEBUG else logging.INFO
+
+ logging.basicConfig(level=level, format=_LOG_FORMAT)
+
+ for noisy in ("httpx", "httpcore", "sqlalchemy.engine.Engine"):
+ logging.getLogger(noisy).setLevel(logging.WARNING)
+
+ logging.getLogger("uvicorn.error").setLevel(level)
+ logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
diff --git a/backend/app/core/rate_limit.py b/backend/app/core/rate_limit.py
new file mode 100644
index 0000000..942bb2b
--- /dev/null
+++ b/backend/app/core/rate_limit.py
@@ -0,0 +1,66 @@
+"""
+Rate limiting basado en SlowAPI.
+
+- Usa Redis como backend si `REDIS_URL` está definido (compartido entre workers).
+- Cae a memoria local en desarrollo si Redis no está disponible.
+- Identifica al cliente por IP y, cuando hay JWT, también por `sub` (orcid_id),
+ para que un atacante autenticado no comparta cupo con su IP.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from slowapi import Limiter
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from app.core.config import settings
+
+
+def _key_func(request: Request) -> str:
+ """
+ Devuelve la clave de rate limit para el request.
+
+ - Si hay un investigador autenticado en el state, usa su orcid_id.
+ - Si hay cabecera X-Forwarded-For (ngrok, nginx, cualquier proxy inverso),
+ usa la primera IP de la cadena (la del cliente real).
+ - En caso contrario, usa la IP remota del socket.
+ """
+ researcher = getattr(request.state, "researcher", None)
+ if researcher is not None:
+ return f"user:{getattr(researcher, 'orcid_id', None) or researcher.id}"
+ forwarded_for = request.headers.get("x-forwarded-for")
+ if forwarded_for:
+ client_ip = forwarded_for.split(",")[0].strip()
+ return f"ip:{client_ip}"
+ return f"ip:{get_remote_address(request)}"
+
+
+def _build_limiter() -> Limiter:
+ storage_uri: Optional[str] = settings.REDIS_URL
+ return Limiter(
+ key_func=_key_func,
+ default_limits=[settings.RATE_LIMIT_DEFAULT],
+ storage_uri=storage_uri,
+ headers_enabled=False,
+ strategy="fixed-window",
+ )
+
+
+limiter = _build_limiter()
+
+
+def rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
+ """
+ Respuesta uniforme cuando se supera el límite.
+
+ No revela límites internos exactos para reducir oráculo a atacantes.
+ """
+ return JSONResponse(
+ status_code=429,
+ content={"detail": "Too many requests, slow down."},
+ headers={"Retry-After": "60"},
+ )
diff --git a/backend/app/core/security_headers.py b/backend/app/core/security_headers.py
new file mode 100644
index 0000000..9a20ea8
--- /dev/null
+++ b/backend/app/core/security_headers.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+
+from app.core.config import Settings
+
+
+_DOCS_PATHS = ("/docs", "/redoc", "/openapi.json")
+
+_BASE_CSP = (
+ "default-src 'none'; "
+ "frame-ancestors 'none'; "
+ "base-uri 'none'; "
+ "form-action 'none'"
+)
+
+_SWAGGER_CSP = (
+ "default-src 'self'; "
+ "img-src 'self' data: https://fastapi.tiangolo.com; "
+ "script-src 'self' https://cdn.jsdelivr.net 'unsafe-inline'; "
+ "style-src 'self' https://cdn.jsdelivr.net 'unsafe-inline'; "
+ "font-src 'self' data: https://cdn.jsdelivr.net; "
+ "connect-src 'self'; "
+ "frame-ancestors 'none'; "
+ "base-uri 'self'; "
+ "form-action 'self'"
+)
+
+
+class SecurityHeadersMiddleware(BaseHTTPMiddleware):
+ """
+ Inserta cabeceras de seguridad en cada respuesta.
+ """
+
+ def __init__(self, app, settings: Settings):
+ super().__init__(app)
+ self._settings = settings
+
+ async def dispatch(self, request: Request, call_next) -> Response:
+ response: Response = await call_next(request)
+
+ response.headers.setdefault("X-Content-Type-Options", "nosniff")
+ response.headers.setdefault("X-Frame-Options", "DENY")
+ response.headers.setdefault("Referrer-Policy", "strict-origin-when-cross-origin")
+ response.headers.setdefault(
+ "Permissions-Policy",
+ "geolocation=(), microphone=(), camera=(), payment=(), usb=(), "
+ "accelerometer=(), gyroscope=(), magnetometer=(), interest-cohort=()",
+ )
+
+ response.headers.setdefault("Cross-Origin-Opener-Policy", "same-origin-allow-popups")
+ response.headers.setdefault("Cross-Origin-Resource-Policy", "same-site")
+ response.headers.setdefault("X-Permitted-Cross-Domain-Policies", "none")
+
+ if request.url.path in _DOCS_PATHS:
+ response.headers.setdefault("Content-Security-Policy", _SWAGGER_CSP)
+ else:
+ response.headers.setdefault("Content-Security-Policy", _BASE_CSP)
+
+ if request.url.scheme == "https" or self._settings.is_production:
+ hsts = f"max-age={self._settings.SECURITY_HSTS_SECONDS}"
+ if self._settings.SECURITY_HSTS_INCLUDE_SUBDOMAINS:
+ hsts += "; includeSubDomains"
+ if self._settings.SECURITY_HSTS_PRELOAD:
+ hsts += "; preload"
+ response.headers.setdefault("Strict-Transport-Security", hsts)
+
+ if "server" in response.headers:
+ del response.headers["server"]
+ if "x-powered-by" in response.headers:
+ del response.headers["x-powered-by"]
+
+ return response
diff --git a/backend/app/db/base.py b/backend/app/db/base.py
index 59be703..d350806 100644
--- a/backend/app/db/base.py
+++ b/backend/app/db/base.py
@@ -1,3 +1,7 @@
from sqlalchemy.orm import declarative_base
+# ---------------------------------------------------------
+# Base de datos
+# ---------------------------------------------------------
+
Base = declarative_base()
diff --git a/backend/app/db/models.py b/backend/app/db/models.py
index ae61527..e4138b4 100644
--- a/backend/app/db/models.py
+++ b/backend/app/db/models.py
@@ -6,6 +6,9 @@ from datetime import datetime
from app.db.session import Base
+# ---------------------------------------------------------
+# Modelo de investigador
+# ---------------------------------------------------------
class Researcher(Base):
__tablename__ = "researchers"
@@ -18,6 +21,9 @@ class Researcher(Base):
publications = relationship("Publication", back_populates="researcher", cascade="all, delete-orphan")
+# ---------------------------------------------------------
+# Modelo de publicación
+# ---------------------------------------------------------
class Publication(Base):
__tablename__ = "publications"
@@ -65,6 +71,9 @@ class Publication(Base):
# Legacy: descargado global (deprecado). Mantener por compatibilidad de DB.
downloaded = Column(Boolean, nullable=False, default=False)
+# ---------------------------------------------------------
+# Modelo de descarga de publicación
+# ---------------------------------------------------------
class PublicationDownload(Base):
"""
diff --git a/backend/app/db/repositories/publication_repository.py b/backend/app/db/repositories/publication_repository.py
index 590010b..ca23694 100644
--- a/backend/app/db/repositories/publication_repository.py
+++ b/backend/app/db/repositories/publication_repository.py
@@ -1,8 +1,16 @@
from sqlalchemy.orm import Session
from app.db.models import Publication
+# ---------------------------------------------------------
+# Repositorio de publicaciones
+# ---------------------------------------------------------
+
class PublicationRepository:
+ # ---------------------------------------------------------
+ # Función auxiliar: obtener publicación por put_code
+ # ---------------------------------------------------------
+
@staticmethod
def get_by_put_code(db: Session, researcher_id: str, put_code: int):
"""
@@ -17,6 +25,10 @@ class PublicationRepository:
.first()
)
+ # ---------------------------------------------------------
+ # Función auxiliar: crear una nueva publicación
+ # ---------------------------------------------------------
+
@staticmethod
def create(db: Session, researcher_id: str, data: dict):
"""
@@ -37,6 +49,10 @@ class PublicationRepository:
db.refresh(pub)
return pub
+ # ---------------------------------------------------------
+ # Función auxiliar: actualizar una publicación existente
+ # ---------------------------------------------------------
+
@staticmethod
def update(db: Session, publication: Publication, data: dict):
"""
@@ -53,6 +69,10 @@ class PublicationRepository:
db.refresh(publication)
return publication
+ # ---------------------------------------------------------
+ # Función auxiliar: listar publicaciones de un investigador
+ # ---------------------------------------------------------
+
@staticmethod
def list_by_researcher(db: Session, researcher_id: str):
"""
diff --git a/backend/app/db/repositories/researcher_repository.py b/backend/app/db/repositories/researcher_repository.py
index 4aba7af..1b8c3b2 100644
--- a/backend/app/db/repositories/researcher_repository.py
+++ b/backend/app/db/repositories/researcher_repository.py
@@ -2,13 +2,24 @@ from sqlalchemy.orm import Session
from app.db.models import Researcher
from sqlalchemy.sql import func
+# ---------------------------------------------------------
+# Repositorio de investigadores
+# ---------------------------------------------------------
class ResearcherRepository:
+ # ---------------------------------------------------------
+ # Función auxiliar: obtener investigador por ORCID ID
+ # ---------------------------------------------------------
+
@staticmethod
def get_by_orcid(db: Session, orcid_id: str):
return db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
+ # ---------------------------------------------------------
+ # Función auxiliar: crear un nuevo investigador
+ # ---------------------------------------------------------
+
@staticmethod
def create(db: Session, orcid_id: str, name: str = None):
researcher = Researcher(orcid_id=orcid_id, name=name)
@@ -17,6 +28,10 @@ class ResearcherRepository:
db.refresh(researcher)
return researcher
+ # ---------------------------------------------------------
+ # Función auxiliar: actualizar la última sincronización
+ # ---------------------------------------------------------
+
@staticmethod
def update_last_sync(db: Session, researcher: Researcher):
researcher.last_sync_at = func.now()
diff --git a/backend/app/db/repositories/syncjob_repository.py b/backend/app/db/repositories/syncjob_repository.py
index 1cb00a1..7860789 100644
--- a/backend/app/db/repositories/syncjob_repository.py
+++ b/backend/app/db/repositories/syncjob_repository.py
@@ -2,9 +2,16 @@ from sqlalchemy.orm import Session
from app.db.models import SyncJob
from sqlalchemy.sql import func
+# ---------------------------------------------------------
+# Repositorio de trabajos de sincronización
+# ---------------------------------------------------------
class SyncJobRepository:
+ # ---------------------------------------------------------
+ # Función auxiliar: iniciar un nuevo trabajo de sincronización
+ # ---------------------------------------------------------
+
@staticmethod
def start_job(db: Session, researcher_id: str):
job = SyncJob(
@@ -17,6 +24,10 @@ class SyncJobRepository:
db.refresh(job)
return job
+ # ---------------------------------------------------------
+ # Función auxiliar: finalizar un trabajo de sincronización
+ # ---------------------------------------------------------
+
@staticmethod
def finish_job(db: Session, job: SyncJob, new_records: int, updated_records: int):
job.status = "finished"
diff --git a/backend/app/db/session.py b/backend/app/db/session.py
index 37b271e..afdd051 100644
--- a/backend/app/db/session.py
+++ b/backend/app/db/session.py
@@ -9,6 +9,7 @@ load_dotenv()
# -----------------------------
# DATABASE URL
# -----------------------------
+
DATABASE_URL = os.getenv("DATABASE_URL")
engine = create_engine(
@@ -29,6 +30,7 @@ Base = declarative_base()
# -----------------------------
# DB SESSION DEPENDENCY
# -----------------------------
+
def get_db():
db = SessionLocal()
try:
@@ -40,17 +42,25 @@ def get_db():
# -----------------------------
# INIT DB (CREA TABLAS)
# -----------------------------
+
def init_db():
+
# Importa modelos para que SQLAlchemy los registre
+
import app.db.models # noqa
# Crea todas las tablas si no existen
+
Base.metadata.create_all(bind=engine)
# Pequeñas migraciones "best-effort" para entornos sin Alembic.
# (create_all no altera tablas existentes)
+
_ensure_columns()
+# ---------------------------------------------------------
+# Función auxiliar: asegurar columnas existentes
+# ---------------------------------------------------------
def _ensure_columns():
insp = inspect(engine)
diff --git a/backend/app/main.py b/backend/app/main.py
index 1e5d6c8..fe98b86 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,68 +1,154 @@
-from fastapi import Depends, FastAPI
+"""
+Entry point del backend FastAPI.
+
+Aplica un perfil de seguridad por defecto:
+- Configuración tipada (Pydantic Settings) que falla rápido en producción.
+- TrustedHostMiddleware (anti Host-header injection).
+- CORS con lista blanca estricta (sin `*`).
+- Body size limit (anti DoS por payload).
+- Cabeceras de seguridad HTTP.
+- Rate limiting (slowapi) con backend Redis si está configurado.
+- Error handlers que NO filtran trazas ni internals.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from fastapi import Depends, FastAPI, HTTPException, Request
+from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
+from slowapi.errors import RateLimitExceeded
+from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session
-from app.db.session import init_db
-from app.db.session import get_db
-from app.api.researchers import router as researchers_router
+from app.api.auth import complete_oauth_login_response, router as auth_router
from app.api.export import router as export_router
-from app.api.auth import router as auth_router
-from app.api.auth import _complete_oauth_login
-from app.schema.auth import OrcidLoginResponseSchema
+from app.api.researchers import router as researchers_router
+from app.core.body_size import BodySizeLimitMiddleware
+from app.core.config import settings
+from app.core.error_handlers import (
+ http_exception_handler,
+ sqlalchemy_exception_handler,
+ unhandled_exception_handler,
+ validation_exception_handler,
+)
+from app.core.logging_config import configure_logging
+from app.core.rate_limit import limiter, rate_limit_exceeded_handler
+from app.core.security_headers import SecurityHeadersMiddleware
+from app.db.session import get_db, init_db
from app.scheduler.sync_scheduler import start_scheduler
+from app.schema.auth import OrcidLoginResponseSchema
+
+
+configure_logging()
+logger = logging.getLogger("app.main")
-# ---------------------------------------------------------
-# Crear instancia principal de FastAPI
-# ---------------------------------------------------------
app = FastAPI(
title="ORCID SWORD Backend",
description="Backend para sincronización ORCID y exportación SWORD",
- version="1.0.0"
+ version="1.0.0",
+ docs_url=settings.docs_url,
+ redoc_url=settings.redoc_url,
+ openapi_url=settings.openapi_url,
)
# ---------------------------------------------------------
-# Crear tablas al iniciar la aplicación
+# Middlewares (orden importa: el último añadido es el más externo)
# ---------------------------------------------------------
+
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, rate_limit_exceeded_handler)
+
+app.add_middleware(SecurityHeadersMiddleware, settings=settings)
+
+app.add_middleware(
+ BodySizeLimitMiddleware,
+ max_bytes=settings.MAX_REQUEST_BODY_BYTES,
+)
+
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=settings.cors_allowed_origins,
+ allow_credentials=True,
+ allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
+ allow_headers=[
+ "Authorization",
+ "Content-Type",
+ "Accept",
+ "Origin",
+ "X-Requested-With",
+ settings.API_KEY_NAME,
+ ],
+ expose_headers=["Content-Disposition", "X-RateLimit-Remaining", "X-RateLimit-Reset"],
+ max_age=600,
+)
+
+app.add_middleware(
+ TrustedHostMiddleware,
+ allowed_hosts=settings.trusted_hosts,
+)
+
+
+# ---------------------------------------------------------
+# Exception handlers
+# ---------------------------------------------------------
+
+app.add_exception_handler(HTTPException, http_exception_handler)
+app.add_exception_handler(RequestValidationError, validation_exception_handler)
+app.add_exception_handler(SQLAlchemyError, sqlalchemy_exception_handler)
+app.add_exception_handler(Exception, unhandled_exception_handler)
+
+
+# ---------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------
+
@app.on_event("startup")
-def startup_event():
- init_db() # 🔥 CREA TABLAS
- start_scheduler() # 🔥 INICIA SCHEDULER
+def on_startup() -> None:
+ init_db()
+ start_scheduler()
+ logger.info(
+ "Backend ready (env=%s, docs=%s)",
+ settings.ENVIRONMENT,
+ bool(settings.DOCS_ENABLED),
+ )
# ---------------------------------------------------------
# Healthcheck
# ---------------------------------------------------------
+
@app.get("/health")
-def health():
+def health() -> dict[str, str]:
return {"status": "ok"}
+# ---------------------------------------------------------
+# Alias del callback OAuth (mismo flujo, mismo endurecimiento)
+# ---------------------------------------------------------
+
@app.get("/callback", response_model=OrcidLoginResponseSchema)
-def oauth_callback_root(code: str, db: Session = Depends(get_db)):
+def oauth_callback_root(
+ request: Request,
+ code: str,
+ state: str | None = None,
+ db: Session = Depends(get_db),
+):
"""
- Alias para probar redirect URIs como `https://127.0.0.1/callback` en local.
- Intercambia el code con ORCID y emite el JWT.
+ Alias para integraciones que registran un redirect_uri tipo
+ `https:///callback` en ORCID.
"""
- return _complete_oauth_login(code=code, db=db)
+ return complete_oauth_login_response(request=request, code=code, state=state, db=db)
# ---------------------------------------------------------
-# Registrar routers
+# Routers
# ---------------------------------------------------------
+
app.include_router(researchers_router, prefix="/api")
app.include_router(export_router, prefix="/api")
app.include_router(auth_router, prefix="/api")
-
-
-# ---------------------------------------------------------
-# CORS
-# ---------------------------------------------------------
-app.add_middleware(
- CORSMiddleware,
- allow_origins=["*"], # en producción limitar
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
-)
diff --git a/backend/app/scheduler/sync_scheduler.py b/backend/app/scheduler/sync_scheduler.py
index 586e054..69ce594 100644
--- a/backend/app/scheduler/sync_scheduler.py
+++ b/backend/app/scheduler/sync_scheduler.py
@@ -9,9 +9,16 @@ import os
# Cargar variables del .env
load_dotenv()
+# ---------------------------------------------------------
+# Variables de entorno
+# ---------------------------------------------------------
+
API_KEY = os.getenv("API_KEY_VALUE")
BASE_URL = os.getenv("BASE_URL")
+# ---------------------------------------------------------
+# Función auxiliar: ejecutar sincronización mensual
+# ---------------------------------------------------------
def run_monthly_sync():
db = SessionLocal()
@@ -36,6 +43,9 @@ def run_monthly_sync():
db.close()
+# ---------------------------------------------------------
+# Función auxiliar: iniciar el scheduler
+# ---------------------------------------------------------
def start_scheduler():
scheduler = BackgroundScheduler()
diff --git a/backend/app/schema/auth.py b/backend/app/schema/auth.py
index 869fde1..bd09626 100644
--- a/backend/app/schema/auth.py
+++ b/backend/app/schema/auth.py
@@ -1,11 +1,17 @@
from pydantic import BaseModel, Field
+# ---------------------------------------------------------
+# Modelo de solicitud de login OAuth
+# ---------------------------------------------------------
class OrcidLoginRequestSchema(BaseModel):
# `code` is the authorization code returned by ORCID OAuth after the user signs in.
# Exchanging it for tokens must happen server-side.
code: str = Field(..., examples=["Q70Y3A"])
+# ---------------------------------------------------------
+# Modelo de respuesta de login OAuth
+# ---------------------------------------------------------
class OrcidLoginResponseSchema(BaseModel):
access_token: str
diff --git a/backend/app/schema/export.py b/backend/app/schema/export.py
new file mode 100644
index 0000000..18ef6f7
--- /dev/null
+++ b/backend/app/schema/export.py
@@ -0,0 +1,23 @@
+"""
+Schemas de los endpoints de export.
+
+El backend recibe `pub_ids` como UUIDs en formato string. Pydantic ya los
+valida y convierte; aquí además aplicamos un tope de tamaño para impedir
+peticiones gigantes.
+"""
+
+from __future__ import annotations
+
+from typing import List
+from uuid import UUID
+
+from pydantic import BaseModel, Field
+
+from app.core.config import settings
+
+
+class PublicationIdsRequestSchema(BaseModel):
+ pub_ids: List[UUID] = Field(
+ min_length=1,
+ max_length=settings.MAX_PUB_IDS_BATCH,
+ )
diff --git a/backend/app/schema/publication.py b/backend/app/schema/publication.py
index a36c813..45bb473 100644
--- a/backend/app/schema/publication.py
+++ b/backend/app/schema/publication.py
@@ -3,6 +3,10 @@ from uuid import UUID
from typing import Optional, List, Any
from datetime import datetime
+# ---------------------------------------------------------
+# Modelo de publicación
+# ---------------------------------------------------------
+
class PublicationSchema(BaseModel):
id: UUID
put_code: int | None = None
diff --git a/backend/app/schema/researcher.py b/backend/app/schema/researcher.py
index 2be69a4..8753bc6 100644
--- a/backend/app/schema/researcher.py
+++ b/backend/app/schema/researcher.py
@@ -1,13 +1,18 @@
-from pydantic import BaseModel, Field
-from uuid import UUID
-from typing import Optional, List, Dict
from datetime import datetime
+from typing import Dict, List, Optional
+from uuid import UUID
+
+from pydantic import BaseModel, Field, field_validator
+
+from app.core.config import settings
from app.schema.publication import PublicationSchema
+from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
+
class ResearcherSchema(BaseModel):
id: UUID
- orcid_id: str
- name: Optional[str]
+ orcid_id: str = Field(min_length=19, max_length=19, pattern=ORCID_PATTERN)
+ name: Optional[str] = Field(default=None, max_length=255)
authenticated: bool
last_sync_at: Optional[datetime]
@@ -33,7 +38,25 @@ class ResearcherWithPublicationsSchema(BaseModel):
class ResearcherBatchSearchRequestSchema(BaseModel):
- orcid_ids: List[str] = Field(min_length=1)
+ orcid_ids: List[str] = Field(
+ min_length=1,
+ max_length=settings.MAX_ORCID_BATCH,
+ )
+
+ @field_validator("orcid_ids")
+ @classmethod
+ def _validate_each(cls, value: List[str]) -> List[str]:
+ deduped: List[str] = []
+ seen = set()
+ for v in value:
+ if not isinstance(v, str):
+ raise ValueError("ORCID iD debe ser string")
+ if not is_valid_orcid(v):
+ raise ValueError(f"ORCID iD inválido: {v}")
+ if v not in seen:
+ seen.add(v)
+ deduped.append(v)
+ return deduped
class ResearcherSearchErrorSchema(BaseModel):
diff --git a/backend/app/security/api_key.py b/backend/app/security/api_key.py
index 7dc9197..c4b4336 100644
--- a/backend/app/security/api_key.py
+++ b/backend/app/security/api_key.py
@@ -1,43 +1,52 @@
-import os
-from dotenv import load_dotenv
+"""
+Autenticación por API key (uso máquina-a-máquina, p. ej. el scheduler interno).
+
+Endurecimiento:
+- Comparación constante en tiempo (`hmac.compare_digest`) para evitar timing attacks.
+- No se loggea el valor de la cabecera bajo ninguna circunstancia.
+- Se separa este mecanismo del JWT de usuario; la API key NO debe usarse como
+ prueba de identidad de un investigador.
+"""
+
+from __future__ import annotations
+
+import hmac
+
from fastapi import Depends, HTTPException, status
from fastapi.security import APIKeyHeader
-# Cargar variables del .env
-load_dotenv()
-
-API_KEY_NAME = os.getenv("API_KEY_NAME")
-API_KEY_VALUE = os.getenv("API_KEY_VALUE")
-
-if not API_KEY_NAME:
- raise RuntimeError("ERROR: La variable API_KEY_NAME no está definida en el .env")
-
-if not API_KEY_VALUE:
- raise RuntimeError("ERROR: La variable API_KEY_VALUE no está definida en el .env")
-
-api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
+from app.core.config import settings
-def get_api_key(api_key: str = Depends(api_key_header)):
- if api_key != API_KEY_VALUE:
+api_key_header = APIKeyHeader(name=settings.API_KEY_NAME, auto_error=False)
+
+
+def _is_valid_key(provided: str | None) -> bool:
+ if not provided or not settings.API_KEY_VALUE:
+ return False
+ return hmac.compare_digest(provided.encode("utf-8"), settings.API_KEY_VALUE.encode("utf-8"))
+
+
+def get_api_key(api_key: str | None = Depends(api_key_header)) -> str:
+ if not _is_valid_key(api_key):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
- detail="API key inválida o ausente."
+ detail="Invalid or missing API key",
)
- return api_key
+ return api_key # type: ignore[return-value]
-def get_api_key_optional(api_key: str = Depends(api_key_header)) -> str | None:
+def get_api_key_optional(api_key: str | None = Depends(api_key_header)) -> str | None:
"""
- Devuelve la API key si está presente y es correcta.
- - Si no está presente: None
- - Si está presente pero incorrecta: 401
+ - Si no llega cabecera: None.
+ - Si llega y es válida: la devuelve.
+ - Si llega pero es inválida: 401.
"""
if api_key is None:
return None
- if api_key != API_KEY_VALUE:
+ if not _is_valid_key(api_key):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
- detail="API key inválida."
+ detail="Invalid API key",
)
return api_key
diff --git a/backend/app/security/jwt.py b/backend/app/security/jwt.py
index e8a930c..7edab3d 100644
--- a/backend/app/security/jwt.py
+++ b/backend/app/security/jwt.py
@@ -1,75 +1,138 @@
-import os
+"""
+Emisión y verificación de JWT.
+
+Endurecimiento aplicado:
+- Sin fallback de secreto débil: si la configuración no es válida, falla al arranque.
+- `iss` y `aud` obligatorios.
+- `nbf` (not-before) y `iat` validados.
+- `typ=access` para evitar mezclar tipos de token.
+- Algoritmo fijo (no se acepta "none" ni cambios por payload).
+- Errores opacos: nunca se expone el motivo del fallo de verificación al cliente.
+"""
+
+from __future__ import annotations
+
from datetime import datetime, timedelta, timezone
from typing import Any
+from uuid import uuid4
-from fastapi import Depends, HTTPException, status
+from fastapi import Depends, HTTPException, Request, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jose import JWTError, jwt
from sqlalchemy.orm import Session
-from dotenv import load_dotenv
+from app.core.config import settings
from app.db.models import Researcher
from app.db.session import get_db
-
-load_dotenv()
+from app.utils.orcid_validator import is_valid_orcid
_bearer = HTTPBearer(auto_error=False)
-def _settings() -> tuple[str, str, int]:
- # Fallback de desarrollo para evitar 500 por configuración ausente.
- secret = os.getenv("JWT_SECRET") or "change_me"
- algorithm = os.getenv("JWT_ALGORITHM") or "HS256"
- expires_minutes = int(os.getenv("JWT_EXPIRES_MINUTES") or "720")
- return secret, algorithm, expires_minutes
-
-
def create_access_token(*, subject: str, extra: dict[str, Any] | None = None) -> str:
- secret, algorithm, expires_minutes = _settings()
+ """
+ Emite un access token firmado con HS256 (configurable).
+
+ `subject` debe ser el ORCID iD verificado del investigador.
+ """
+ if not is_valid_orcid(subject):
+ raise ValueError("subject must be a valid ORCID iD")
+
now = datetime.now(timezone.utc)
payload: dict[str, Any] = {
+ "iss": settings.JWT_ISSUER,
+ "aud": settings.JWT_AUDIENCE,
"sub": subject,
"iat": int(now.timestamp()),
- "exp": int((now + timedelta(minutes=expires_minutes)).timestamp()),
+ "nbf": int(now.timestamp()),
+ "exp": int((now + timedelta(minutes=settings.JWT_EXPIRES_MINUTES)).timestamp()),
+ "jti": uuid4().hex,
+ "typ": "access",
}
if extra:
+ for reserved in ("iss", "aud", "sub", "iat", "nbf", "exp", "jti", "typ"):
+ extra.pop(reserved, None)
payload.update(extra)
- return jwt.encode(payload, secret, algorithm=algorithm)
+
+ return jwt.encode(payload, settings.JWT_SECRET, algorithm=settings.JWT_ALGORITHM)
+
+
+def _decode_token(token: str) -> dict[str, Any]:
+ try:
+ return jwt.decode(
+ token,
+ settings.JWT_SECRET,
+ algorithms=[settings.JWT_ALGORITHM],
+ audience=settings.JWT_AUDIENCE,
+ issuer=settings.JWT_ISSUER,
+ options={
+ "require_iat": True,
+ "require_nbf": True,
+ "require_exp": True,
+ "require_aud": True,
+ "require_iss": True,
+ },
+ )
+ except JWTError as exc:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid or expired token",
+ headers={"WWW-Authenticate": "Bearer"},
+ ) from exc
def get_current_researcher(
- creds: HTTPAuthorizationCredentials = Depends(_bearer),
+ request: Request,
+ creds: HTTPAuthorizationCredentials | None = Depends(_bearer),
db: Session = Depends(get_db),
) -> Researcher:
if not creds or not creds.credentials:
- raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing bearer token")
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Missing bearer token",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
- secret, algorithm, _ = _settings()
- try:
- payload = jwt.decode(creds.credentials, secret, algorithms=[algorithm])
- except JWTError:
- raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token")
+ payload = _decode_token(creds.credentials)
+
+ if payload.get("typ") != "access":
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid token type",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
orcid_id = payload.get("sub")
- if not isinstance(orcid_id, str) or not orcid_id:
- raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token subject")
+ if not isinstance(orcid_id, str) or not is_valid_orcid(orcid_id):
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid token subject",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher or not researcher.authenticated:
- raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Researcher not authenticated")
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Researcher not authenticated",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ request.state.researcher = researcher
return researcher
def get_optional_current_researcher(
- creds: HTTPAuthorizationCredentials = Depends(_bearer),
+ request: Request,
+ creds: HTTPAuthorizationCredentials | None = Depends(_bearer),
db: Session = Depends(get_db),
) -> Researcher | None:
"""
- Devuelve el investigador autenticado si hay Bearer token.
- Si no hay token, devuelve None.
- Si hay token inválido, lanza 401.
+ Devuelve el investigador autenticado si hay Bearer válido.
+ Si no hay Bearer, devuelve None.
+ Si hay Bearer inválido, lanza 401 (no se acepta como anónimo).
"""
if not creds or not creds.credentials:
return None
- return get_current_researcher(creds=creds, db=db)
+ return get_current_researcher(request=request, creds=creds, db=db)
diff --git a/backend/app/security/oauth_state.py b/backend/app/security/oauth_state.py
new file mode 100644
index 0000000..92475b8
--- /dev/null
+++ b/backend/app/security/oauth_state.py
@@ -0,0 +1,76 @@
+"""
+OAuth state anti-CSRF para el flujo de login con ORCID.
+
+El parámetro `state` se genera en `/auth/orcid/authorize`, se guarda en una
+cookie HttpOnly + SameSite=Lax con TTL corto, y se valida en el callback.
+
+Si el `state` falta, no coincide o ha expirado, el login se rechaza.
+"""
+
+from __future__ import annotations
+
+import hmac
+import secrets
+from datetime import datetime, timezone
+
+from fastapi import HTTPException, status
+from starlette.requests import Request
+from starlette.responses import Response
+
+from app.core.config import settings
+
+
+_STATE_BYTES = 32
+
+
+def generate_state() -> str:
+ return secrets.token_urlsafe(_STATE_BYTES)
+
+
+def attach_state_cookie(response: Response, state: str) -> None:
+ """
+ Persiste el `state` en una cookie segura y devuelve el valor crudo.
+ """
+ response.set_cookie(
+ key=settings.ORCID_OAUTH_STATE_COOKIE,
+ value=state,
+ max_age=settings.ORCID_OAUTH_STATE_TTL_SECONDS,
+ secure=settings.is_production,
+ httponly=True,
+ samesite="lax",
+ path="/",
+ )
+
+
+def clear_state_cookie(response: Response) -> None:
+ response.delete_cookie(
+ key=settings.ORCID_OAUTH_STATE_COOKIE,
+ path="/",
+ )
+
+
+def validate_state(request: Request, received_state: str | None) -> None:
+ """
+ Compara el state recibido en el callback con el almacenado en cookie.
+
+ Lanza 400 si no coincide o falta. Comparación en tiempo constante.
+ """
+ if not settings.ORCID_OAUTH_STATE_ENABLED:
+ return
+
+ cookie_value = request.cookies.get(settings.ORCID_OAUTH_STATE_COOKIE)
+ if not cookie_value or not received_state:
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="OAuth state missing",
+ )
+
+ if not hmac.compare_digest(cookie_value.encode("utf-8"), received_state.encode("utf-8")):
+ raise HTTPException(
+ status_code=status.HTTP_400_BAD_REQUEST,
+ detail="OAuth state mismatch",
+ )
+
+
+def now_ts() -> int:
+ return int(datetime.now(timezone.utc).timestamp())
diff --git a/backend/app/services/normalizer.py b/backend/app/services/normalizer.py
index fbe41bb..c9e9688 100644
--- a/backend/app/services/normalizer.py
+++ b/backend/app/services/normalizer.py
@@ -1,5 +1,8 @@
from typing import List
+# ---------------------------------------------------------
+# Función auxiliar: obtener valor de un diccionario
+# ---------------------------------------------------------
def _get(d: dict | None, *keys, default=None):
cur = d or {}
@@ -11,6 +14,9 @@ def _get(d: dict | None, *keys, default=None):
return default
return cur
+# ---------------------------------------------------------
+# Clase de normalización de publicaciones
+# ---------------------------------------------------------
class PublicationNormalizer:
@staticmethod
diff --git a/backend/app/services/orcid_client.py b/backend/app/services/orcid_client.py
index 2e15add..c143aeb 100644
--- a/backend/app/services/orcid_client.py
+++ b/backend/app/services/orcid_client.py
@@ -14,8 +14,14 @@ BASE_URL_SANDBOX = "https://pub.sandbox.orcid.org/v3.0"
# TOKEN_URL_PROD = "https://orcid.org/oauth/token"
# BASE_URL_PROD = "https://pub.orcid.org/v3.0"
+# ---------------------------------------------------------
+# Clase de cliente de ORCID
+# ---------------------------------------------------------
class ORCIDClient:
+ # ---------------------------------------------------------
+ # Función auxiliar: inicializar el cliente de ORCID
+ # ---------------------------------------------------------
def __init__(self):
# Asegura que al ejecutar `uvicorn` local también se carga `backend/.env`.
# (En docker `ORCID_REDIRECT_URI` y secretos llegan por env_file, así que esto no molesta.)
@@ -115,6 +121,10 @@ class ORCIDClient:
params["state"] = state
return f"{self.authorization_url}?{urllib.parse.urlencode(params)}"
+ # ---------------------------------------------------------
+ # Función auxiliar: intercambiar código de autorización
+ # ---------------------------------------------------------
+
def exchange_authorization_code(
self,
*,
@@ -148,3 +158,50 @@ def get_works_summary(orcid_id: str) -> dict:
def get_work_detail(orcid_id: str, put_code: int) -> dict | None:
client = ORCIDClient()
return client.fetch_work_detail(orcid_id, put_code)
+
+
+def get_record(orcid_id: str) -> dict:
+ client = ORCIDClient()
+ return client.fetch_record(orcid_id)
+
+
+def extract_display_name(record: dict | None) -> str | None:
+ """
+ Devuelve un nombre legible a partir de la respuesta de `/record` de ORCID.
+
+ Prioriza `credit-name` (el nombre tal y como el investigador prefiere mostrarlo);
+ si no está disponible, compone `given-names` + `family-name`.
+ """
+ if not record:
+ return None
+
+ name = (record.get("person") or {}).get("name") or {}
+
+ credit = name.get("credit-name")
+ if isinstance(credit, dict):
+ credit_value = credit.get("value")
+ if credit_value:
+ return credit_value
+
+ given_obj = name.get("given-names")
+ family_obj = name.get("family-name")
+ given = given_obj.get("value") if isinstance(given_obj, dict) else None
+ family = family_obj.get("value") if isinstance(family_obj, dict) else None
+
+ full = " ".join(part for part in (given, family) if part)
+ return full or None
+
+
+def get_display_name(orcid_id: str) -> str | None:
+ """
+ Obtiene el nombre público del investigador desde ORCID.
+
+ Devuelve `None` (sin propagar la excepción) si la API de ORCID no responde
+ o el `record` no contiene un nombre utilizable, para no romper el flujo de
+ búsqueda cuando solo falla la resolución del nombre.
+ """
+ try:
+ record = get_record(orcid_id)
+ except Exception:
+ return None
+ return extract_display_name(record)
diff --git a/backend/app/services/sword_generator.py b/backend/app/services/sword_generator.py
index a6a0f58..b1ce806 100644
--- a/backend/app/services/sword_generator.py
+++ b/backend/app/services/sword_generator.py
@@ -6,6 +6,9 @@ ATOM_NS = "http://www.w3.org/2005/Atom"
DC_NS = "http://purl.org/dc/elements/1.1/"
EXTRA_NS = "http://example.org/orcid-extra" # namespace para campos extendidos
+# ---------------------------------------------------------
+# Clase de generador de feed SWORD
+# ---------------------------------------------------------
class SWORDGenerator:
diff --git a/backend/app/services/sync_service.py b/backend/app/services/sync_service.py
index baf4b23..911d048 100644
--- a/backend/app/services/sync_service.py
+++ b/backend/app/services/sync_service.py
@@ -8,12 +8,23 @@ from app.db.repositories.researcher_repository import ResearcherRepository
from app.db.repositories.publication_repository import PublicationRepository
from app.db.repositories.syncjob_repository import SyncJobRepository
+# ---------------------------------------------------------
+# Clase de servicio de sincronización
+# ---------------------------------------------------------
class SyncService:
+ # ---------------------------------------------------------
+ # Función auxiliar: inicializar el servicio de sincronización
+ # ---------------------------------------------------------
+
def __init__(self):
self.orcid_client = ORCIDClient()
+ # ---------------------------------------------------------
+ # Función auxiliar: sincronizar las publicaciones de un investigador
+ # ---------------------------------------------------------
+
def sync_researcher(self, db: Session, orcid_id: str):
"""
Sincroniza las publicaciones de un investigador con manejo robusto de errores.
@@ -109,6 +120,10 @@ class SyncService:
"total": new_records + updated_records
}
+ # ---------------------------------------------------------
+ # Función auxiliar: sincronizar y obtener investigador + publicaciones
+ # ---------------------------------------------------------
+
def sync_and_get_full(self, db: Session, orcid_id: str):
"""
Sincroniza (si es necesario) y devuelve investigador + publicaciones.
diff --git a/backend/app/services/zip_generator.py b/backend/app/services/zip_generator.py
index f37e8fc..e0ed31b 100644
--- a/backend/app/services/zip_generator.py
+++ b/backend/app/services/zip_generator.py
@@ -7,12 +7,16 @@ from xml.etree.ElementTree import Element, SubElement, tostring
from app.db.models import Publication, Researcher
from app.services.sword_generator import SWORDGenerator
+# ---------------------------------------------------------
+# Clase de generador de ZIP
+# ---------------------------------------------------------
class ZIPGenerator:
# ---------------------------------------------------------
- # MANIFEST.TXT — más completo
+ # Función auxiliar: generar manifest.txt
# ---------------------------------------------------------
+
@staticmethod
def generate_manifest(researcher, publications):
lines = [
diff --git a/backend/app/utils/orcid_validator.py b/backend/app/utils/orcid_validator.py
index 235a88b..7eb9f4d 100644
--- a/backend/app/utils/orcid_validator.py
+++ b/backend/app/utils/orcid_validator.py
@@ -2,27 +2,38 @@ import re
ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$")
+ORCID_PATTERN = r"^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$"
-def is_valid_orcid(orcid_id: str) -> bool:
+
+def is_valid_orcid(orcid_id: str | None) -> bool:
"""
Valida un ORCID ID:
- Formato: 0000-0000-0000-0000
- Dígito de control según ISO 7064 Mod 11-2
"""
+ if not isinstance(orcid_id, str):
+ return False
if not ORCID_REGEX.match(orcid_id):
return False
- # Quitar guiones
digits = orcid_id.replace("-", "")
total = 0
- # Los primeros 15 dígitos
for char in digits[:-1]:
total = (total + int(char)) * 2
- # Resto
remainder = total % 11
result = (12 - remainder) % 11
check_digit = "X" if result == 10 else str(result)
return digits[-1] == check_digit
+
+
+def assert_valid_orcid(orcid_id: str) -> str:
+ """
+ Devuelve el ORCID si es válido. Lanza ValueError si no.
+ Útil para usar como Pydantic validator.
+ """
+ if not is_valid_orcid(orcid_id):
+ raise ValueError("ORCID iD inválido")
+ return orcid_id
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 39dcb09..9c4863f 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -1,14 +1,16 @@
fastapi
-uvicorn
+uvicorn[standard]
sqlalchemy
psycopg2-binary
httpx
pydantic
+pydantic-settings
python-dotenv
lxml
-apscheduler
+defusedxml
+APScheduler==3.10.4
authlib
redis
-APScheduler==3.10.4
requests
python-jose[cryptography]
+slowapi
diff --git a/frontend/src/contexts/AuthContext.jsx b/frontend/src/contexts/AuthContext.jsx
index 4e4c833..5648c96 100644
--- a/frontend/src/contexts/AuthContext.jsx
+++ b/frontend/src/contexts/AuthContext.jsx
@@ -78,6 +78,20 @@ export function AuthProvider({ children }) {
return () => window.removeEventListener("message", handleMessage);
}, []);
+ // Fallback when postMessage cannot reach the opener (e.g. browser policy
+ // severs window.opener during the OAuth redirect chain). localStorage is
+ // shared between same-origin windows, so the popup's `setItem(...)` fires
+ // a storage event in this window and we can pick up the new token.
+ useEffect(() => {
+ function handleStorage(event) {
+ if (event.key !== STORAGE_KEY) return;
+ if (event.newValue) setToken(event.newValue);
+ else setToken(null);
+ }
+ window.addEventListener("storage", handleStorage);
+ return () => window.removeEventListener("storage", handleStorage);
+ }, []);
+
/**
* Stores a JWT directly (used by AuthCallbackPage).
* Does NOT trigger any network request.
diff --git a/frontend/src/pages/AuthCallbackPage.jsx b/frontend/src/pages/AuthCallbackPage.jsx
index ae5bb8a..21d852e 100644
--- a/frontend/src/pages/AuthCallbackPage.jsx
+++ b/frontend/src/pages/AuthCallbackPage.jsx
@@ -36,6 +36,7 @@ export function AuthCallbackPage() {
hasHandledCodeRef.current = true;
const code = searchParams.get("code");
+ const state = searchParams.get("state");
const oauthError = searchParams.get("error");
const errorDescription = searchParams.get("error_description");
@@ -69,7 +70,7 @@ export function AuthCallbackPage() {
}
sessionStorage.setItem(consumedKey, "1");
- exchangeOrcidCode(code)
+ exchangeOrcidCode(code, { state })
.then(({ access_token }) => {
storeToken(access_token);
setStatus("success");
@@ -87,16 +88,29 @@ export function AuthCallbackPage() {
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
- // After a short delay, redirect to home if we're NOT in a popup
- // (fallback for browsers that block window.open).
+ // After a short delay, always attempt window.close():
+ // - If we're in the OAuth popup (opened by window.open()), the browser
+ // allows close() and the window disappears.
+ // - If the window doesn't close (browser blocked it, or the user opened
+ // /callback directly as a plain tab), we detect that via window.closed
+ // and fall back to navigating to "/" so the user sees the landing page.
+ //
+ // NOTE: Neither window.opener nor window.name are reliable here.
+ // - window.name is cleared by Chrome on cross-origin navigation
+ // (our domain → sandbox.orcid.org → our domain clears the name).
+ // - window.opener is severed by ORCID Sandbox's own COOP header
+ // while the popup passes through their domain.
useEffect(() => {
- if (status === "success" || status === "error") {
- const isPopup = Boolean(window.opener);
- if (!isPopup) {
- const timer = setTimeout(() => navigate("/"), 2000);
- return () => clearTimeout(timer);
- }
- }
+ if (status !== "success" && status !== "error") return;
+ const outer = setTimeout(() => {
+ window.close();
+ // Give the browser a tick to process the close. If the window
+ // is still open, we're in a plain tab — navigate to home instead.
+ setTimeout(() => {
+ if (!window.closed) navigate("/");
+ }, 300);
+ }, 1500);
+ return () => clearTimeout(outer);
}, [status, navigate]);
return (
@@ -154,9 +168,16 @@ export function AuthCallbackPage() {
/* ─────────────────────────── Helpers ───────────────────────────── */
/**
- * If running in a popup, posts a message to the opener and closes the
- * window. If not in a popup (e.g. browser blocked it), the message is
- * irrelevant — the useEffect above handles the redirect to "/".
+ * If running in a popup, posts a message to the opener so the parent
+ * window can update its auth state without waiting for the storage event
+ * fallback in AuthContext. The actual `window.close()` is handled by the
+ * delayed effect above so we don't race with the success/error UI.
+ *
+ * `window.opener` may be `null` here when the browser severed the opener
+ * relationship during the OAuth redirect chain (some COOP combinations
+ * trigger this). In that case AuthContext picks up the new token via the
+ * `storage` event instead — that's why we still call `storeToken()` even
+ * when we can't postMessage.
*/
function notifyAndClose(message) {
if (window.opener && !window.opener.closed) {
@@ -165,8 +186,6 @@ function notifyAndClose(message) {
} catch {
/* opener may have navigated away */
}
- // Small delay so the user sees the success/error state before close.
- setTimeout(() => window.close(), 1200);
}
}
diff --git a/frontend/src/services/api.js b/frontend/src/services/api.js
index b6bde80..a192494 100644
--- a/frontend/src/services/api.js
+++ b/frontend/src/services/api.js
@@ -237,9 +237,11 @@ export function getOrcidAuthorizeUrl() {
* Intercambia el authorization code (recibido de ORCID tras el OAuth)
* por un JWT propio del backend. Devuelve `{ access_token, token_type }`.
*/
-export async function exchangeOrcidCode(code, { signal } = {}) {
+export async function exchangeOrcidCode(code, { state, signal } = {}) {
+ const params = { code };
+ if (state) params.state = state;
return request(
- `/auth/orcid/callback?${new URLSearchParams({ code }).toString()}`,
+ `/auth/orcid/callback?${new URLSearchParams(params).toString()}`,
{ signal },
);
}