feat: enhance backend security and configuration

- Updated Dockerfile to improve security with a non-root user and added health checks.
- Modified docker-compose.yml to set containers as read-only, restrict ports to localhost, and implement health checks.
- Enhanced .env.example with additional environment variables for security and configuration.
- Improved FastAPI application with middleware for security headers, CORS, and body size limits.
- Refactored authentication flow in auth.py to include state validation and improved error handling.
- Added rate limiting to various endpoints to prevent abuse.
- Updated researcher and publication handling to ensure better validation and error management.
This commit is contained in:
Mireya Cueto Garrido
2026-05-08 11:19:52 +02:00
parent 96e58dbd16
commit af1b8e9956
37 changed files with 1375 additions and 282 deletions
+100 -82
View File
@@ -1,115 +1,146 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import Response
from sqlalchemy.orm import Session
from typing import Iterable, List
from uuid import UUID
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Request
from fastapi.responses import Response
from sqlalchemy.orm import Session
from app.core.config import settings
from app.core.rate_limit import limiter
from app.db.models import Publication, PublicationDownload, Researcher
from app.db.session import get_db
from app.db.models import Publication, Researcher, PublicationDownload
from app.security.api_key import get_api_key_optional
from app.security.jwt import get_optional_current_researcher
from app.services.sword_generator import SWORDGenerator
from app.services.zip_generator import ZIPGenerator
from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
router = APIRouter(prefix="/export")
def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
valid_ids = []
for pid in pub_ids:
try:
valid_ids.append(UUID(pid))
except Exception:
raise HTTPException(
status_code=400,
detail=f"Invalid publication ID (not UUID): {pid}"
)
return valid_ids
def _ensure_credentials(api_key: str | None, current: Researcher | None) -> None:
if not api_key and not current:
raise HTTPException(status_code=401, detail="Authentication required")
def _record_downloads(db: Session, current: Researcher, pubs: Iterable[Publication]) -> None:
"""
Inserta marcadores de descarga (researcher_id, publication_id).
- Resuelve descargas existentes con UNA sola query.
- Solo añade las que faltan.
"""
pub_ids = [p.id for p in pubs]
if not pub_ids:
return
existing_ids = {
row[0]
for row in (
db.query(PublicationDownload.publication_id)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id.in_(pub_ids),
)
.all()
)
}
new_rows = [
PublicationDownload(researcher_id=current.id, publication_id=pid)
for pid in pub_ids
if pid not in existing_ids
]
if new_rows:
db.add_all(new_rows)
db.commit()
def _validate_pub_ids(pub_ids: List[UUID]) -> List[UUID]:
if len(pub_ids) > settings.MAX_PUB_IDS_BATCH:
raise HTTPException(status_code=413, detail="Too many publication IDs")
return pub_ids
# ---------------------------------------------------------
# ENDPOINT 1: SWORD múltiples publicaciones
# ---------------------------------------------------------
@router.post("/sword/publications")
@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_multiple_sword(
pub_ids: list[str],
request: Request,
pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
validate_uuid_list(pub_ids)
_ensure_credentials(api_key, current)
_validate_pub_ids(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
if not pubs:
raise HTTPException(status_code=404, detail="No publications found")
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
# Registrar descarga solo si hay usuario logueado
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
_record_downloads(db, current, pubs)
return Response(content=xml_bytes, media_type="application/xml")
# ---------------------------------------------------------
# ENDPOINT 2: SWORD por investigador
# ---------------------------------------------------------
@router.get("/sword/researcher/{orcid_id}")
@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_researcher_sword(
orcid_id: str,
request: Request,
orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
_ensure_credentials(api_key, current)
if not is_valid_orcid(orcid_id):
raise HTTPException(status_code=400, detail="Invalid ORCID iD")
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
if not pubs:
raise HTTPException(status_code=404, detail="No publications found for this researcher")
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
_record_downloads(db, current, pubs)
return Response(content=xml_bytes, media_type="application/xml")
# ---------------------------------------------------------
# ENDPOINT 3: ZIP múltiples publicaciones
# ---------------------------------------------------------
@router.post("/zip/publications")
@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_multiple_zip(
pub_ids: list[str],
request: Request,
pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
validate_uuid_list(pub_ids)
_ensure_credentials(api_key, current)
_validate_pub_ids(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
if not pubs:
raise HTTPException(status_code=404, detail="No publications found")
@@ -117,51 +148,38 @@ async def export_multiple_zip(
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
_record_downloads(db, current, pubs)
return Response(content=zip_bytes, media_type="application/zip")
# ---------------------------------------------------------
# ENDPOINT 4: ZIP por investigador
# ---------------------------------------------------------
@router.get("/zip/researcher/{orcid_id}")
@limiter.limit(settings.RATE_LIMIT_EXPORT)
async def export_researcher_zip(
orcid_id: str,
request: Request,
orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
db: Session = Depends(get_db),
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
_ensure_credentials(api_key, current)
if not is_valid_orcid(orcid_id):
raise HTTPException(status_code=400, detail="Invalid ORCID iD")
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
if not pubs:
raise HTTPException(status_code=404, detail="No publications found for this researcher")
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
_record_downloads(db, current, pubs)
return Response(content=zip_bytes, media_type="application/zip")