Merge pull request #8 from uja-dev-practices/feature/backend-v4

feat: enhance authentication and publication download tracking
This commit is contained in:
Mireya Cueto Garrido
2026-04-29 10:28:02 +02:00
committed by GitHub
13 changed files with 426 additions and 30 deletions
+9
View File
@@ -8,3 +8,12 @@ DATABASE_URL=postgresql://postgres:postgres@db:5432/orcid_db
REDIS_URL=redis://redis:6379/0
BASE_URL=http://localhost:8000/api
# JWT (login ORCID)
JWT_SECRET=change_me
JWT_ALGORITHM=HS256
JWT_EXPIRES_MINUTES=720
# ORCID OAuth 3-legged (authorization code)
# Debe coincidir exactamente con el redirect URI configurado en tu app ORCID.
ORCID_REDIRECT_URI=http://localhost:8000/api/auth/orcid/callback
+97
View File
@@ -0,0 +1,97 @@
import httpx
import os
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.responses import RedirectResponse
from sqlalchemy.orm import Session
from app.db.models import Researcher
from app.db.session import get_db
from app.schema.auth import OrcidLoginResponseSchema
from app.security.jwt import create_access_token
from app.services.orcid_client import ORCIDClient
from app.utils.orcid_validator import is_valid_orcid
router = APIRouter(prefix="/auth", tags=["auth"])
def _extract_display_name(record: dict) -> str | None:
person = (record or {}).get("person") or {}
name = person.get("name") or {}
given = ((name.get("given-names") or {}).get("value")) if isinstance(name.get("given-names"), dict) else None
family = ((name.get("family-name") or {}).get("value")) if isinstance(name.get("family-name"), dict) else None
full = " ".join([p for p in [given, family] if p])
return full or None
def _orcid_redirect_uri() -> str:
# Debe coincidir con el `redirect_uri` registrado en tu integración ORCID.
return os.getenv("ORCID_REDIRECT_URI") or "http://localhost:8000/api/auth/orcid/callback"
@router.get("/orcid/authorize")
def authorize_orcid():
"""
Inicia el flujo OAuth 3-legged (authorization code) hacia ORCID.
"""
client = ORCIDClient()
authorize_url = client.build_authorize_url(
redirect_uri=_orcid_redirect_uri(),
# Solo necesitamos el Authenticated iD del usuario.
scope="/authenticate",
)
return RedirectResponse(authorize_url)
@router.get("/orcid/callback", response_model=OrcidLoginResponseSchema)
def orcid_callback(code: str, db: Session = Depends(get_db)):
"""
Recibe el `code` devuelto por ORCID, lo intercambia por tokens en el servidor
y emite nuestro JWT solo para el ORCID autenticado por ORCID.
"""
if not code:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing ORCID authorization code")
client = ORCIDClient()
redirect_uri = _orcid_redirect_uri()
try:
token_data = client.exchange_authorization_code(code=code, redirect_uri=redirect_uri)
except httpx.HTTPStatusError as exc:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"ORCID token error ({exc.response.status_code})",
)
except httpx.TimeoutException:
raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="ORCID timeout")
except Exception:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="ORCID unavailable")
orcid_id = (token_data.get("orcid") or "").strip()
if not is_valid_orcid(orcid_id):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid ORCID returned by OAuth")
display_name = token_data.get("name")
if not display_name:
# Fallback si ORCID no devuelve `name` en el token response.
try:
record = client.fetch_record(orcid_id)
display_name = _extract_display_name(record)
except Exception:
display_name = None
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(orcid_id=orcid_id, name=display_name, authenticated=True)
db.add(researcher)
else:
researcher.authenticated = True
if display_name and not researcher.name:
researcher.name = display_name
db.commit()
db.refresh(researcher)
token = create_access_token(subject=orcid_id, extra={"rid": str(researcher.id)})
return OrcidLoginResponseSchema(access_token=token)
+72 -6
View File
@@ -4,8 +4,9 @@ from sqlalchemy.orm import Session
from uuid import UUID
from app.db.session import get_db
from app.db.models import Publication, Researcher
from app.security.api_key import get_api_key
from app.db.models import Publication, Researcher, PublicationDownload
from app.security.api_key import get_api_key_optional
from app.security.jwt import get_optional_current_researcher
from app.services.sword_generator import SWORDGenerator
from app.services.zip_generator import ZIPGenerator
@@ -29,8 +30,11 @@ def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
async def export_multiple_sword(
pub_ids: list[str],
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
validate_uuid_list(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
@@ -41,6 +45,20 @@ async def export_multiple_sword(
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
# Registrar descarga solo si hay usuario logueado
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=xml_bytes, media_type="application/xml")
@@ -48,8 +66,11 @@ async def export_multiple_sword(
async def export_researcher_sword(
orcid_id: str,
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -60,6 +81,19 @@ async def export_researcher_sword(
raise HTTPException(status_code=404, detail="No publications found for this researcher")
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=xml_bytes, media_type="application/xml")
@@ -67,8 +101,11 @@ async def export_researcher_sword(
async def export_multiple_zip(
pub_ids: list[str],
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
validate_uuid_list(pub_ids)
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
@@ -79,6 +116,19 @@ async def export_multiple_zip(
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=zip_bytes, media_type="application/zip")
@@ -86,8 +136,11 @@ async def export_multiple_zip(
async def export_researcher_zip(
orcid_id: str,
db: Session = Depends(get_db),
api_key: str = Depends(get_api_key)
api_key: str | None = Depends(get_api_key_optional),
current: Researcher | None = Depends(get_optional_current_researcher),
):
if not api_key and not current:
raise HTTPException(status_code=401, detail="Missing credentials")
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -98,4 +151,17 @@ async def export_researcher_zip(
raise HTTPException(status_code=404, detail="No publications found for this researcher")
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
if current:
for p in pubs:
exists = (
db.query(PublicationDownload)
.filter(
PublicationDownload.researcher_id == current.id,
PublicationDownload.publication_id == p.id,
)
.first()
)
if not exists:
db.add(PublicationDownload(researcher_id=current.id, publication_id=p.id))
db.commit()
return Response(content=zip_bytes, media_type="application/zip")
+49 -17
View File
@@ -16,6 +16,9 @@ from app.schema.researcher import (
)
from app.services.normalizer import PublicationNormalizer
from app.services.orcid_client import get_works_summary, get_work_detail
from app.schema.publication import PublicationSchema
from app.db.models import PublicationDownload
from app.security.jwt import get_optional_current_researcher
router = APIRouter(prefix="/researchers", tags=["researchers"])
@@ -39,11 +42,11 @@ def publication_changed(existing: Publication, data: dict) -> bool:
return False
def build_researcher_stats(publications: List[Publication]) -> ResearcherStatsSchema:
def build_researcher_stats(publications: list) -> ResearcherStatsSchema:
publication_types: dict[str, int] = {}
for publication in publications:
pub_type = publication.type or "unknown"
pub_type = getattr(publication, "type", None) or "unknown"
publication_types[pub_type] = publication_types.get(pub_type, 0) + 1
return ResearcherStatsSchema(
@@ -118,7 +121,33 @@ def _upsert_researcher_publications(
return publications
def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicationsSchema:
def _decorate_downloaded_by_me(
*,
db: Session,
current: Researcher | None,
publications: List[Publication],
) -> List[PublicationSchema] | List[Publication]:
if not current:
return publications
downloaded_ids = {
row[0]
for row in (
db.query(PublicationDownload.publication_id)
.filter(PublicationDownload.researcher_id == current.id)
.all()
)
}
out: List[PublicationSchema] = []
for p in publications:
out.append(
PublicationSchema.model_validate(p).model_copy(update={"downloaded_by_me": p.id in downloaded_ids})
)
return out
def build_search_response(orcid_id: str, db: Session, current: Researcher | None) -> ResearcherWithPublicationsSchema:
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(
@@ -131,31 +160,28 @@ def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicati
db.flush()
publications = _upsert_researcher_publications(researcher, orcid_id, db)
stats = build_researcher_stats(publications)
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications)
stats = build_researcher_stats(publications_out)
return ResearcherWithPublicationsSchema(
researcher=researcher,
publications=publications,
publications=publications_out,
stats=stats,
new_records=0,
updated_records=0,
unchanged_records=0,
total_records=len(publications),
total_records=len(publications_out),
)
# ---------------------------------------------------------
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
# ---------------------------------------------------------
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
return build_search_response(orcid_id, db)
@router.post("/search", response_model=ResearcherBatchSearchResponseSchema)
@router.post("/search", response_model=ResearcherBatchSearchResponseSchema, response_model_exclude_none=True)
def search_and_sync_researchers(
payload: ResearcherBatchSearchRequestSchema,
db: Session = Depends(get_db),
current: Researcher | None = Depends(get_optional_current_researcher),
):
results: List[ResearcherWithPublicationsSchema] = []
errors: List[ResearcherSearchErrorSchema] = []
@@ -165,7 +191,7 @@ def search_and_sync_researchers(
for orcid_id in unique_orcid_ids:
try:
results.append(build_search_response(orcid_id, db))
results.append(build_search_response(orcid_id, db, current))
except httpx.HTTPStatusError as exc:
db.rollback()
errors.append(
@@ -194,8 +220,12 @@ def search_and_sync_researchers(
# ---------------------------------------------------------
# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
# ---------------------------------------------------------
@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema)
def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema, response_model_exclude_none=True)
def sync_researcher(
orcid_id: str,
db: Session = Depends(get_db),
current: Researcher | None = Depends(get_optional_current_researcher),
):
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
if not researcher:
raise HTTPException(status_code=404, detail="Researcher not found")
@@ -268,10 +298,12 @@ def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
db.commit()
db.refresh(researcher)
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications_output)
return ResearcherWithPublicationsSchema(
researcher=researcher,
publications=publications_output,
stats=build_researcher_stats(publications_output),
publications=publications_out,
stats=build_researcher_stats(publications_out),
new_records=new_count,
updated_records=updated_count,
unchanged_records=unchanged_count,
+21 -1
View File
@@ -1,4 +1,4 @@
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey, UniqueConstraint
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.orm import relationship
import uuid
@@ -61,3 +61,23 @@ class Publication(Base):
# Tu campo existente
hash_fingerprint = Column(String, nullable=True)
last_modified = Column(DateTime, nullable=True, default=None)
# Legacy: descargado global (deprecado). Mantener por compatibilidad de DB.
downloaded = Column(Boolean, nullable=False, default=False)
class PublicationDownload(Base):
"""
Marca de descarga por usuario (researcher) sobre cualquier publicación.
Una fila por (researcher_id, publication_id).
"""
__tablename__ = "publication_downloads"
__table_args__ = (
UniqueConstraint("researcher_id", "publication_id", name="uq_publication_download"),
)
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"), nullable=False, index=True)
publication_id = Column(UUID(as_uuid=True), ForeignKey("publications.id"), nullable=False, index=True)
downloaded_at = Column(DateTime, nullable=False, default=datetime.utcnow)
+20 -1
View File
@@ -1,6 +1,10 @@
from sqlalchemy import create_engine
from sqlalchemy import create_engine, inspect, text
from sqlalchemy.orm import sessionmaker, declarative_base
import os
from dotenv import load_dotenv
# Cargar variables del .env para ejecuciones locales (en Docker ya vendrán por entorno).
load_dotenv()
# -----------------------------
# DATABASE URL
@@ -42,3 +46,18 @@ def init_db():
# Crea todas las tablas si no existen
Base.metadata.create_all(bind=engine)
# Pequeñas migraciones "best-effort" para entornos sin Alembic.
# (create_all no altera tablas existentes)
_ensure_columns()
def _ensure_columns():
insp = inspect(engine)
if "publications" in insp.get_table_names():
cols = {c["name"] for c in insp.get_columns("publications")}
if "downloaded" not in cols:
with engine.begin() as conn:
conn.execute(
text("ALTER TABLE publications ADD COLUMN downloaded BOOLEAN NOT NULL DEFAULT FALSE")
)
+2
View File
@@ -4,6 +4,7 @@ from fastapi.middleware.cors import CORSMiddleware
from app.db.session import init_db
from app.api.researchers import router as researchers_router
from app.api.export import router as export_router
from app.api.auth import router as auth_router
from app.scheduler.sync_scheduler import start_scheduler
@@ -39,6 +40,7 @@ def health():
# ---------------------------------------------------------
app.include_router(researchers_router, prefix="/api")
app.include_router(export_router, prefix="/api")
app.include_router(auth_router, prefix="/api")
# ---------------------------------------------------------
+12
View File
@@ -0,0 +1,12 @@
from pydantic import BaseModel, Field
class OrcidLoginRequestSchema(BaseModel):
# `code` is the authorization code returned by ORCID OAuth after the user signs in.
# Exchanging it for tokens must happen server-side.
code: str = Field(..., examples=["Q70Y3A"])
class OrcidLoginResponseSchema(BaseModel):
access_token: str
token_type: str = "bearer"
+2 -3
View File
@@ -25,6 +25,5 @@ class PublicationSchema(BaseModel):
hash_fingerprint: str | None = None
last_modified: datetime | None = None
status: str | None = None
class Config:
from_attributes = True
downloaded_by_me: bool | None = None
model_config = {"from_attributes": True}
+16
View File
@@ -25,3 +25,19 @@ def get_api_key(api_key: str = Depends(api_key_header)):
detail="API key inválida o ausente."
)
return api_key
def get_api_key_optional(api_key: str = Depends(api_key_header)) -> str | None:
"""
Devuelve la API key si está presente y es correcta.
- Si no está presente: None
- Si está presente pero incorrecta: 401
"""
if api_key is None:
return None
if api_key != API_KEY_VALUE:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="API key inválida."
)
return api_key
+75
View File
@@ -0,0 +1,75 @@
import os
from datetime import datetime, timedelta, timezone
from typing import Any
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jose import JWTError, jwt
from sqlalchemy.orm import Session
from dotenv import load_dotenv
from app.db.models import Researcher
from app.db.session import get_db
load_dotenv()
_bearer = HTTPBearer(auto_error=False)
def _settings() -> tuple[str, str, int]:
# Fallback de desarrollo para evitar 500 por configuración ausente.
secret = os.getenv("JWT_SECRET") or "change_me"
algorithm = os.getenv("JWT_ALGORITHM") or "HS256"
expires_minutes = int(os.getenv("JWT_EXPIRES_MINUTES") or "720")
return secret, algorithm, expires_minutes
def create_access_token(*, subject: str, extra: dict[str, Any] | None = None) -> str:
secret, algorithm, expires_minutes = _settings()
now = datetime.now(timezone.utc)
payload: dict[str, Any] = {
"sub": subject,
"iat": int(now.timestamp()),
"exp": int((now + timedelta(minutes=expires_minutes)).timestamp()),
}
if extra:
payload.update(extra)
return jwt.encode(payload, secret, algorithm=algorithm)
def get_current_researcher(
creds: HTTPAuthorizationCredentials = Depends(_bearer),
db: Session = Depends(get_db),
) -> Researcher:
if not creds or not creds.credentials:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing bearer token")
secret, algorithm, _ = _settings()
try:
payload = jwt.decode(creds.credentials, secret, algorithms=[algorithm])
except JWTError:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token")
orcid_id = payload.get("sub")
if not isinstance(orcid_id, str) or not orcid_id:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token subject")
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher or not researcher.authenticated:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Researcher not authenticated")
return researcher
def get_optional_current_researcher(
creds: HTTPAuthorizationCredentials = Depends(_bearer),
db: Session = Depends(get_db),
) -> Researcher | None:
"""
Devuelve el investigador autenticado si hay Bearer token.
Si no hay token, devuelve None.
Si hay token inválido, lanza 401.
"""
if not creds or not creds.credentials:
return None
return get_current_researcher(creds=creds, db=db)
+49 -1
View File
@@ -1,9 +1,11 @@
import os
from typing import Optional
import urllib.parse
from typing import Any, Optional
import httpx
TOKEN_URL_SANDBOX = "https://sandbox.orcid.org/oauth/token"
AUTHORIZATION_URL_SANDBOX = "https://sandbox.orcid.org/oauth/authorize"
BASE_URL_SANDBOX = "https://pub.sandbox.orcid.org/v3.0"
# Si en algún momento pasas a producción, cambiarías a:
@@ -17,6 +19,7 @@ class ORCIDClient:
self.client_secret = os.getenv("ORCID_CLIENT_SECRET")
self._token_cache: Optional[str] = None
self.token_url = TOKEN_URL_SANDBOX
self.authorization_url = AUTHORIZATION_URL_SANDBOX
self.base_url = BASE_URL_SANDBOX
# ---------------------------------------------------------
@@ -81,6 +84,51 @@ class ORCIDClient:
return None
return response.json()
# ---------------------------------------------------------
# OAuth 3-legged (authorization code)
# ---------------------------------------------------------
def build_authorize_url(
self,
*,
redirect_uri: str,
scope: str = "/authenticate",
state: str | None = None,
) -> str:
"""
Creates the ORCID authorization URL (user signs in at ORCID and returns an auth code).
"""
params: dict[str, Any] = {
"client_id": self.client_id,
"response_type": "code",
# Scope(s) are space-separated in the authorize URL.
"scope": scope,
"redirect_uri": redirect_uri,
}
if state:
params["state"] = state
return f"{self.authorization_url}?{urllib.parse.urlencode(params)}"
def exchange_authorization_code(
self,
*,
code: str,
redirect_uri: str,
) -> dict:
"""
Server-side code exchange. Response includes at least `orcid` and usually `name`.
"""
data = {
"client_id": self.client_id,
"client_secret": self.client_secret,
"grant_type": "authorization_code",
"code": code,
"redirect_uri": redirect_uri,
}
with httpx.Client(timeout=20.0) as client:
response = client.post(self.token_url, data=data, headers={"Accept": "application/json"})
response.raise_for_status()
return response.json()
# -------------------------------------------------------------------
# Funciones de módulo usadas en researchers.py
+1
View File
@@ -11,3 +11,4 @@ authlib
redis
APScheduler==3.10.4
requests
python-jose[cryptography]