feat: enhance authentication and publication download tracking

- Added JWT authentication support with configurable secret and expiration.
- Introduced optional API key validation for endpoints.
- Implemented tracking of publication downloads by researchers, storing records in a new PublicationDownload model.
- Updated export endpoints to conditionally register downloads based on user authentication.
- Enhanced researcher search response to indicate if publications were downloaded by the current user.
- Updated environment configuration to include new JWT settings.
This commit is contained in:
Mireya Cueto Garrido
2026-04-29 10:27:17 +02:00
parent 579a23e2f9
commit fec26089ed
13 changed files with 426 additions and 30 deletions
+97
View File
@@ -0,0 +1,97 @@
import httpx
import os
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.responses import RedirectResponse
from sqlalchemy.orm import Session
from app.db.models import Researcher
from app.db.session import get_db
from app.schema.auth import OrcidLoginResponseSchema
from app.security.jwt import create_access_token
from app.services.orcid_client import ORCIDClient
from app.utils.orcid_validator import is_valid_orcid
router = APIRouter(prefix="/auth", tags=["auth"])
def _extract_display_name(record: dict) -> str | None:
person = (record or {}).get("person") or {}
name = person.get("name") or {}
given = ((name.get("given-names") or {}).get("value")) if isinstance(name.get("given-names"), dict) else None
family = ((name.get("family-name") or {}).get("value")) if isinstance(name.get("family-name"), dict) else None
full = " ".join([p for p in [given, family] if p])
return full or None
def _orcid_redirect_uri() -> str:
# Debe coincidir con el `redirect_uri` registrado en tu integración ORCID.
return os.getenv("ORCID_REDIRECT_URI") or "http://localhost:8000/api/auth/orcid/callback"
@router.get("/orcid/authorize")
def authorize_orcid():
"""
Inicia el flujo OAuth 3-legged (authorization code) hacia ORCID.
"""
client = ORCIDClient()
authorize_url = client.build_authorize_url(
redirect_uri=_orcid_redirect_uri(),
# Solo necesitamos el Authenticated iD del usuario.
scope="/authenticate",
)
return RedirectResponse(authorize_url)
@router.get("/orcid/callback", response_model=OrcidLoginResponseSchema)
def orcid_callback(code: str, db: Session = Depends(get_db)):
"""
Recibe el `code` devuelto por ORCID, lo intercambia por tokens en el servidor
y emite nuestro JWT solo para el ORCID autenticado por ORCID.
"""
if not code:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing ORCID authorization code")
client = ORCIDClient()
redirect_uri = _orcid_redirect_uri()
try:
token_data = client.exchange_authorization_code(code=code, redirect_uri=redirect_uri)
except httpx.HTTPStatusError as exc:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"ORCID token error ({exc.response.status_code})",
)
except httpx.TimeoutException:
raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="ORCID timeout")
except Exception:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="ORCID unavailable")
orcid_id = (token_data.get("orcid") or "").strip()
if not is_valid_orcid(orcid_id):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid ORCID returned by OAuth")
display_name = token_data.get("name")
if not display_name:
# Fallback si ORCID no devuelve `name` en el token response.
try:
record = client.fetch_record(orcid_id)
display_name = _extract_display_name(record)
except Exception:
display_name = None
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(orcid_id=orcid_id, name=display_name, authenticated=True)
db.add(researcher)
else:
researcher.authenticated = True
if display_name and not researcher.name:
researcher.name = display_name
db.commit()
db.refresh(researcher)
token = create_access_token(subject=orcid_id, extra={"rid": str(researcher.id)})
return OrcidLoginResponseSchema(access_token=token)
+72 -6
View File
@@ -4,8 +4,9 @@ from sqlalchemy.orm import Session
from uuid import UUID
from app.db.session import get_db
from app.db.models import Publication, Researcher
from app.security.api_key import get_api_key
from app.db.models import Publication, Researcher, PublicationDownload
from app.security.api_key import get_api_key_optional
from app.security.jwt import get_optional_current_researcher
from app.services.sword_generator import SWORDGenerator
from app.services.zip_generator import ZIPGenerator
@@ -29,8 +30,11 @@ def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
async def export_multiple_sword(
pub_ids: list[str],
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
validate_uuid_list(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
@@ -41,6 +45,20 @@ async def export_multiple_sword(
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
# Registrar descarga solo si hay usuario logueado
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=xml_bytes, media_type="application/xml")
@@ -48,8 +66,11 @@ async def export_multiple_sword(
async def export_researcher_sword(
orcid_id: str,
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -60,6 +81,19 @@ async def export_researcher_sword(
raise HTTPException(status_code=404, detail="No publications found for this researcher")
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=xml_bytes, media_type="application/xml")
@@ -67,8 +101,11 @@ async def export_researcher_sword(
async def export_multiple_zip(
pub_ids: list[str],
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
validate_uuid_list(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
@@ -79,6 +116,19 @@ async def export_multiple_zip(
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=zip_bytes, media_type="application/zip")
@@ -86,8 +136,11 @@ async def export_multiple_zip(
async def export_researcher_zip(
orcid_id: str,
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -98,4 +151,17 @@ async def export_researcher_zip(
raise HTTPException(status_code=404, detail="No publications found for this researcher")
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=zip_bytes, media_type="application/zip")
+49 -17
View File
@@ -16,6 +16,9 @@ from app.schema.researcher import (
)
from app.services.normalizer import PublicationNormalizer
from app.services.orcid_client import get_works_summary, get_work_detail
from app.schema.publication import PublicationSchema
from app.db.models import PublicationDownload
from app.security.jwt import get_optional_current_researcher
router = APIRouter(prefix="/researchers", tags=["researchers"])
@@ -39,11 +42,11 @@ def publication_changed(existing: Publication, data: dict) -> bool:
return False
def build_researcher_stats(publications: List[Publication]) -> ResearcherStatsSchema:
def build_researcher_stats(publications: list) -> ResearcherStatsSchema:
publication_types: dict[str, int] = {}
for publication in publications:
pub_type = publication.type or "unknown"
pub_type = getattr(publication, "type", None) or "unknown"
publication_types[pub_type] = publication_types.get(pub_type, 0) + 1
return ResearcherStatsSchema(
@@ -118,7 +121,33 @@ def _upsert_researcher_publications(
return publications
def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicationsSchema:
def _decorate_downloaded_by_me(
*,
db: Session,
current: Researcher | None,
publications: List[Publication],
) -> List[PublicationSchema] | List[Publication]:
if not current:
return publications
downloaded_ids = {
row[0]
for row in (
db.query(PublicationDownload.publication_id)
.filter(PublicationDownload.researcher_id == current.id)
.all()
)
}
out: List[PublicationSchema] = []
for p in publications:
out.append(
PublicationSchema.model_validate(p).model_copy(update={"downloaded_by_me": p.id in downloaded_ids})
)
return out
def build_search_response(orcid_id: str, db: Session, current: Researcher | None) -> ResearcherWithPublicationsSchema:
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(
@@ -131,31 +160,28 @@ def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicati
db.flush()
publications = _upsert_researcher_publications(researcher, orcid_id, db)
stats = build_researcher_stats(publications)
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications)
stats = build_researcher_stats(publications_out)
return ResearcherWithPublicationsSchema(
researcher=researcher,
publications=publications,
publications=publications_out,
stats=stats,
new_records=0,
updated_records=0,
unchanged_records=0,
total_records=len(publications),
total_records=len(publications_out),
)
# ---------------------------------------------------------
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
# ---------------------------------------------------------
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
return build_search_response(orcid_id, db)
@router.post("/search", response_model=ResearcherBatchSearchResponseSchema)
@router.post("/search", response_model=ResearcherBatchSearchResponseSchema, response_model_exclude_none=True)
def search_and_sync_researchers(
payload: ResearcherBatchSearchRequestSchema,
db: Session = Depends(get_db),
current: Researcher | None = Depends(get_optional_current_researcher),
):
results: List[ResearcherWithPublicationsSchema] = []
errors: List[ResearcherSearchErrorSchema] = []
@@ -165,7 +191,7 @@ def search_and_sync_researchers(
for orcid_id in unique_orcid_ids:
try:
results.append(build_search_response(orcid_id, db))
results.append(build_search_response(orcid_id, db, current))
except httpx.HTTPStatusError as exc:
db.rollback()
errors.append(
@@ -194,8 +220,12 @@ def search_and_sync_researchers(
# ---------------------------------------------------------
# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
# ---------------------------------------------------------
@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema)
def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema, response_model_exclude_none=True)
def sync_researcher(
orcid_id: str,
db: Session = Depends(get_db),
current: Researcher | None = Depends(get_optional_current_researcher),
):
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -268,10 +298,12 @@ def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
db.commit()
db.refresh(researcher)
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications_output)
return ResearcherWithPublicationsSchema(
researcher=researcher,
publications=publications_output,
stats=build_researcher_stats(publications_output),
publications=publications_out,
stats=build_researcher_stats(publications_out),
new_records=new_count,
updated_records=updated_count,
unchanged_records=unchanged_count,