feat: enhance authentication and publication download tracking

- Added JWT authentication support with configurable secret and expiration. - Introduced optional API key validation for endpoints. - Implemented tracking of publication downloads by researchers, storing records in a new PublicationDownload model. - Updated export endpoints to conditionally register downloads based on user authentication. - Enhanced researcher search response to indicate if publications were downloaded by the current user. - Updated environment configuration to include new JWT settings.
2026-04-29 10:27:17 +02:00
parent 579a23e2f9
commit fec26089ed
13 changed files with 426 additions and 30 deletions
@@ -0,0 +1,97 @@
+import httpx
+import os
+from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi.responses import RedirectResponse
+from sqlalchemy.orm import Session
+
+from app.db.models import Researcher
+from app.db.session import get_db
+from app.schema.auth import OrcidLoginResponseSchema
+from app.security.jwt import create_access_token
+from app.services.orcid_client import ORCIDClient
+from app.utils.orcid_validator import is_valid_orcid
+
+
+router = APIRouter(prefix="/auth", tags=["auth"])
+
+
+def _extract_display_name(record: dict) -> str | None:
+    person = (record or {}).get("person") or {}
+    name = person.get("name") or {}
+    given = ((name.get("given-names") or {}).get("value")) if isinstance(name.get("given-names"), dict) else None
+    family = ((name.get("family-name") or {}).get("value")) if isinstance(name.get("family-name"), dict) else None
+    full = " ".join([p for p in [given, family] if p])
+    return full or None
+
+
+def _orcid_redirect_uri() -> str:
+    # Debe coincidir con el `redirect_uri` registrado en tu integración ORCID.
+    return os.getenv("ORCID_REDIRECT_URI") or "http://localhost:8000/api/auth/orcid/callback"
+
+
+@router.get("/orcid/authorize")
+def authorize_orcid():
+    """
+    Inicia el flujo OAuth 3-legged (authorization code) hacia ORCID.
+    """
+    client = ORCIDClient()
+    authorize_url = client.build_authorize_url(
+        redirect_uri=_orcid_redirect_uri(),
+        # Solo necesitamos el Authenticated iD del usuario.
+        scope="/authenticate",
+    )
+    return RedirectResponse(authorize_url)
+
+
+@router.get("/orcid/callback", response_model=OrcidLoginResponseSchema)
+def orcid_callback(code: str, db: Session = Depends(get_db)):
+    """
+    Recibe el `code` devuelto por ORCID, lo intercambia por tokens en el servidor
+    y emite nuestro JWT solo para el ORCID autenticado por ORCID.
+    """
+    if not code:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing ORCID authorization code")
+
+    client = ORCIDClient()
+    redirect_uri = _orcid_redirect_uri()
+
+    try:
+        token_data = client.exchange_authorization_code(code=code, redirect_uri=redirect_uri)
+    except httpx.HTTPStatusError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"ORCID token error ({exc.response.status_code})",
+        )
+    except httpx.TimeoutException:
+        raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="ORCID timeout")
+    except Exception:
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="ORCID unavailable")
+
+    orcid_id = (token_data.get("orcid") or "").strip()
+    if not is_valid_orcid(orcid_id):
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid ORCID returned by OAuth")
+
+    display_name = token_data.get("name")
+    if not display_name:
+        # Fallback si ORCID no devuelve `name` en el token response.
+        try:
+            record = client.fetch_record(orcid_id)
+            display_name = _extract_display_name(record)
+        except Exception:
+            display_name = None
+
+    researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
+    if not researcher:
+        researcher = Researcher(orcid_id=orcid_id, name=display_name, authenticated=True)
+        db.add(researcher)
+    else:
+        researcher.authenticated = True
+        if display_name and not researcher.name:
+            researcher.name = display_name
+
+    db.commit()
+    db.refresh(researcher)
+
+    token = create_access_token(subject=orcid_id, extra={"rid": str(researcher.id)})
+    return OrcidLoginResponseSchema(access_token=token)
+
@@ -4,8 +4,9 @@ from sqlalchemy.orm import Session
 from uuid import UUID

 from app.db.session import get_db
-from app.db.models import Publication, Researcher
-from app.security.api_key import get_api_key
+from app.db.models import Publication, Researcher, PublicationDownload
+from app.security.api_key import get_api_key_optional
+from app.security.jwt import get_optional_current_researcher
 from app.services.sword_generator import SWORDGenerator
 from app.services.zip_generator import ZIPGenerator

@@ -29,8 +30,11 @@ def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
 async def export_multiple_sword(
    pub_ids: list[str],
    db: Session = Depends(get_db),
-    api_key: str = Depends(get_api_key)
+    api_key: str | None = Depends(get_api_key_optional),
+    current: Researcher | None = Depends(get_optional_current_researcher),
 ):
+    if not api_key and not current:
+        raise HTTPException(status_code=401, detail="Missing credentials")
    validate_uuid_list(pub_ids)

    pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
@@ -41,6 +45,20 @@ async def export_multiple_sword(
    researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()

    xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
+    # Registrar descarga solo si hay usuario logueado
+    if current:
+        for p in pubs:
+            exists = (
+                db.query(PublicationDownload)
+                .filter(
+                    PublicationDownload.researcher_id == current.id,
+                    PublicationDownload.publication_id == p.id,
+                )
+                .first()
+            )
+            if not exists:
+                db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
+        db.commit()
    return Response(content=xml_bytes, media_type="application/xml")


@@ -48,8 +66,11 @@ async def export_multiple_sword(
 async def export_researcher_sword(
    orcid_id: str,
    db: Session = Depends(get_db),
-    api_key: str = Depends(get_api_key)
+    api_key: str | None = Depends(get_api_key_optional),
+    current: Researcher | None = Depends(get_optional_current_researcher),
 ):
+    if not api_key and not current:
+        raise HTTPException(status_code=401, detail="Missing credentials")
    researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
    if not researcher:
        raise HTTPException(status_code=404, detail="Researcher not found")
@@ -60,6 +81,19 @@ async def export_researcher_sword(
        raise HTTPException(status_code=404, detail="No publications found for this researcher")

    xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
+    if current:
+        for p in pubs:
+            exists = (
+                db.query(PublicationDownload)
+                .filter(
+                    PublicationDownload.researcher_id == current.id,
+                    PublicationDownload.publication_id == p.id,
+                )
+                .first()
+            )
+            if not exists:
+                db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
+        db.commit()
    return Response(content=xml_bytes, media_type="application/xml")


@@ -67,8 +101,11 @@ async def export_researcher_sword(
 async def export_multiple_zip(
    pub_ids: list[str],
    db: Session = Depends(get_db),
-    api_key: str = Depends(get_api_key)
+    api_key: str | None = Depends(get_api_key_optional),
+    current: Researcher | None = Depends(get_optional_current_researcher),
 ):
+    if not api_key and not current:
+        raise HTTPException(status_code=401, detail="Missing credentials")
    validate_uuid_list(pub_ids)

    pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
@@ -79,6 +116,19 @@ async def export_multiple_zip(
    researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()

    zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
+    if current:
+        for p in pubs:
+            exists = (
+                db.query(PublicationDownload)
+                .filter(
+                    PublicationDownload.researcher_id == current.id,
+                    PublicationDownload.publication_id == p.id,
+                )
+                .first()
+            )
+            if not exists:
+                db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
+        db.commit()
    return Response(content=zip_bytes, media_type="application/zip")


@@ -86,8 +136,11 @@ async def export_multiple_zip(
 async def export_researcher_zip(
    orcid_id: str,
    db: Session = Depends(get_db),
-    api_key: str = Depends(get_api_key)
+    api_key: str | None = Depends(get_api_key_optional),
+    current: Researcher | None = Depends(get_optional_current_researcher),
 ):
+    if not api_key and not current:
+        raise HTTPException(status_code=401, detail="Missing credentials")
    researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
    if not researcher:
        raise HTTPException(status_code=404, detail="Researcher not found")
@@ -98,4 +151,17 @@ async def export_researcher_zip(
        raise HTTPException(status_code=404, detail="No publications found for this researcher")

    zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
+    if current:
+        for p in pubs:
+            exists = (
+                db.query(PublicationDownload)
+                .filter(
+                    PublicationDownload.researcher_id == current.id,
+                    PublicationDownload.publication_id == p.id,
+                )
+                .first()
+            )
+            if not exists:
+                db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
+        db.commit()
    return Response(content=zip_bytes, media_type="application/zip")
@@ -16,6 +16,9 @@ from app.schema.researcher import (
 )
 from app.services.normalizer import PublicationNormalizer
 from app.services.orcid_client import get_works_summary, get_work_detail
+from app.schema.publication import PublicationSchema
+from app.db.models import PublicationDownload
+from app.security.jwt import get_optional_current_researcher

 router = APIRouter(prefix="/researchers", tags=["researchers"])

@@ -39,11 +42,11 @@ def publication_changed(existing: Publication, data: dict) -> bool:
    return False


-def build_researcher_stats(publications: List[Publication]) -> ResearcherStatsSchema:
+def build_researcher_stats(publications: list) -> ResearcherStatsSchema:
    publication_types: dict[str, int] = {}

    for publication in publications:
-        pub_type = publication.type or "unknown"
+        pub_type = getattr(publication, "type", None) or "unknown"
        publication_types[pub_type] = publication_types.get(pub_type, 0) + 1

    return ResearcherStatsSchema(
@@ -118,7 +121,33 @@ def _upsert_researcher_publications(
    return publications


-def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicationsSchema:
+def _decorate_downloaded_by_me(
+    *,
+    db: Session,
+    current: Researcher | None,
+    publications: List[Publication],
+) -> List[PublicationSchema] | List[Publication]:
+    if not current:
+        return publications
+
+    downloaded_ids = {
+        row[0]
+        for row in (
+            db.query(PublicationDownload.publication_id)
+            .filter(PublicationDownload.researcher_id == current.id)
+            .all()
+        )
+    }
+
+    out: List[PublicationSchema] = []
+    for p in publications:
+        out.append(
+            PublicationSchema.model_validate(p).model_copy(update={"downloaded_by_me": p.id in downloaded_ids})
+        )
+    return out
+
+
+def build_search_response(orcid_id: str, db: Session, current: Researcher | None) -> ResearcherWithPublicationsSchema:
    researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
    if not researcher:
        researcher = Researcher(
@@ -131,31 +160,28 @@ def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicati
        db.flush()

    publications = _upsert_researcher_publications(researcher, orcid_id, db)
-    stats = build_researcher_stats(publications)
+    publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications)
+    stats = build_researcher_stats(publications_out)

    return ResearcherWithPublicationsSchema(
        researcher=researcher,
-        publications=publications,
+        publications=publications_out,
        stats=stats,
        new_records=0,
        updated_records=0,
        unchanged_records=0,
-        total_records=len(publications),
+        total_records=len(publications_out),
    )


 # ---------------------------------------------------------
 # ENDPOINT 1: SEARCH + SYNC (sin contadores)
 # ---------------------------------------------------------
-@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
-def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
-    return build_search_response(orcid_id, db)
-
-
-@router.post("/search", response_model=ResearcherBatchSearchResponseSchema)
+@router.post("/search", response_model=ResearcherBatchSearchResponseSchema, response_model_exclude_none=True)
 def search_and_sync_researchers(
    payload: ResearcherBatchSearchRequestSchema,
    db: Session = Depends(get_db),
+    current: Researcher | None = Depends(get_optional_current_researcher),
 ):
    results: List[ResearcherWithPublicationsSchema] = []
    errors: List[ResearcherSearchErrorSchema] = []
@@ -165,7 +191,7 @@ def search_and_sync_researchers(

    for orcid_id in unique_orcid_ids:
        try:
-            results.append(build_search_response(orcid_id, db))
+            results.append(build_search_response(orcid_id, db, current))
        except httpx.HTTPStatusError as exc:
            db.rollback()
            errors.append(
@@ -194,8 +220,12 @@ def search_and_sync_researchers(
 # ---------------------------------------------------------
 # ENDPOINT 2: SYNC COMPLETO (con contadores + status)
 # ---------------------------------------------------------
-@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema)
-def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
+@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema, response_model_exclude_none=True)
+def sync_researcher(
+    orcid_id: str,
+    db: Session = Depends(get_db),
+    current: Researcher | None = Depends(get_optional_current_researcher),
+):
    researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
    if not researcher:
        raise HTTPException(status_code=404, detail="Researcher not found")
@@ -268,10 +298,12 @@ def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
    db.commit()
    db.refresh(researcher)

+    publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications_output)
+
    return ResearcherWithPublicationsSchema(
        researcher=researcher,
-        publications=publications_output,
-        stats=build_researcher_stats(publications_output),
+        publications=publications_out,
+        stats=build_researcher_stats(publications_out),
        new_records=new_count,
        updated_records=updated_count,
        unchanged_records=unchanged_count,