238742bda4
Reutiliza cliente HTTP ORCID, reduce consultas DB al sincronizar works y añade SYNC_* en settings y scheduler (cron mensual o intervalo). Actualiza backend/.env, frontend/.env y .env.example para despliegue.
335 lines
10 KiB
Python
335 lines
10 KiB
Python
from datetime import datetime
|
|
from typing import List
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, Depends, HTTPException, Path, Request
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import settings
|
|
from app.core.rate_limit import limiter
|
|
from app.db.models import Publication, PublicationDownload, Researcher
|
|
from app.db.session import get_db
|
|
from app.schema.publication import PublicationSchema
|
|
from app.schema.researcher import (
|
|
ResearcherBatchSearchRequestSchema,
|
|
ResearcherBatchSearchResponseSchema,
|
|
ResearcherSearchErrorSchema,
|
|
ResearcherStatsSchema,
|
|
ResearcherWithPublicationsSchema,
|
|
)
|
|
from app.security.jwt import get_optional_current_researcher
|
|
from app.services.normalizer import PublicationNormalizer
|
|
from app.services.orcid_client import get_display_name, get_orcid_client
|
|
from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
|
|
|
|
|
|
router = APIRouter(prefix="/researchers", tags=["researchers"])
|
|
|
|
|
|
def publication_changed(existing: Publication, data: dict) -> bool:
|
|
fields = [
|
|
"title", "subtitle", "type", "journal",
|
|
"pub_year", "pub_month", "pub_day",
|
|
"doi", "url", "short_description",
|
|
"citation_type", "citation_value",
|
|
"language_code", "country",
|
|
"external_ids", "contributors",
|
|
]
|
|
return any(getattr(existing, f) != data[f] for f in fields)
|
|
|
|
|
|
def build_researcher_stats(publications: list) -> ResearcherStatsSchema:
|
|
publication_types: dict[str, int] = {}
|
|
for publication in publications:
|
|
pub_type = getattr(publication, "type", None) or "unknown"
|
|
publication_types[pub_type] = publication_types.get(pub_type, 0) + 1
|
|
|
|
return ResearcherStatsSchema(
|
|
total_publications=len(publications),
|
|
publication_types=publication_types,
|
|
)
|
|
|
|
|
|
def _upsert_researcher_publications(
|
|
researcher: Researcher,
|
|
orcid_id: str,
|
|
db: Session,
|
|
) -> List[Publication]:
|
|
orcid_client = get_orcid_client()
|
|
works = orcid_client.fetch_works(orcid_id)
|
|
groups = works.get("group", [])
|
|
|
|
publications: List[Publication] = []
|
|
existing_by_put_code = {
|
|
publication.put_code: publication
|
|
for publication in db.query(Publication).filter(Publication.researcher_id == researcher.id).all()
|
|
}
|
|
|
|
for g in groups:
|
|
summaries = g.get("work-summary") or []
|
|
if not summaries:
|
|
continue
|
|
|
|
summary = summaries[0]
|
|
put_code = summary.get("put-code")
|
|
if put_code is None:
|
|
continue
|
|
|
|
try:
|
|
detail = orcid_client.fetch_work_detail(orcid_id, put_code)
|
|
except Exception:
|
|
detail = None
|
|
|
|
data = PublicationNormalizer.normalize(summary, detail)
|
|
|
|
existing = existing_by_put_code.get(data["put_code"])
|
|
|
|
if existing:
|
|
for field in [
|
|
"title", "subtitle", "type", "journal",
|
|
"pub_year", "pub_month", "pub_day",
|
|
"doi", "url", "short_description",
|
|
"citation_type", "citation_value",
|
|
"language_code", "country",
|
|
"external_ids", "contributors",
|
|
]:
|
|
setattr(existing, field, data[field])
|
|
existing.last_modified = datetime.utcnow()
|
|
existing.status = None
|
|
publications.append(existing)
|
|
else:
|
|
pub = Publication(
|
|
researcher_id=researcher.id,
|
|
**data,
|
|
last_modified=datetime.utcnow(),
|
|
)
|
|
pub.status = None
|
|
db.add(pub)
|
|
publications.append(pub)
|
|
existing_by_put_code[data["put_code"]] = pub
|
|
|
|
researcher.last_sync_at = datetime.utcnow()
|
|
db.commit()
|
|
db.refresh(researcher)
|
|
|
|
return publications
|
|
|
|
|
|
def _decorate_downloaded_by_me(
|
|
*,
|
|
db: Session,
|
|
current: Researcher | None,
|
|
publications: List[Publication],
|
|
) -> List[PublicationSchema] | List[Publication]:
|
|
if not current:
|
|
return publications
|
|
|
|
downloaded_ids = {
|
|
row[0]
|
|
for row in (
|
|
db.query(PublicationDownload.publication_id)
|
|
.filter(PublicationDownload.researcher_id == current.id)
|
|
.all()
|
|
)
|
|
}
|
|
|
|
out: List[PublicationSchema] = []
|
|
for p in publications:
|
|
out.append(
|
|
PublicationSchema.model_validate(p).model_copy(
|
|
update={"downloaded_by_me": p.id in downloaded_ids}
|
|
)
|
|
)
|
|
return out
|
|
|
|
|
|
def build_search_response(orcid_id: str, db: Session, current: Researcher | None) -> ResearcherWithPublicationsSchema:
|
|
if not is_valid_orcid(orcid_id):
|
|
raise HTTPException(status_code=400, detail="Invalid ORCID iD")
|
|
|
|
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
|
|
if not researcher:
|
|
researcher = Researcher(
|
|
orcid_id=orcid_id,
|
|
name=None,
|
|
authenticated=False,
|
|
last_sync_at=None,
|
|
)
|
|
db.add(researcher)
|
|
db.flush()
|
|
|
|
if not researcher.name:
|
|
display_name = get_display_name(orcid_id)
|
|
if display_name:
|
|
researcher.name = display_name
|
|
db.flush()
|
|
|
|
publications = _upsert_researcher_publications(researcher, orcid_id, db)
|
|
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications)
|
|
stats = build_researcher_stats(publications_out)
|
|
|
|
return ResearcherWithPublicationsSchema(
|
|
researcher=researcher,
|
|
publications=publications_out,
|
|
stats=stats,
|
|
new_records=0,
|
|
updated_records=0,
|
|
unchanged_records=0,
|
|
total_records=len(publications_out),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------
|
|
# ENDPOINT 1: SEARCH + SYNC
|
|
# ---------------------------------------------------------
|
|
|
|
|
|
@router.post(
|
|
"/search",
|
|
response_model=ResearcherBatchSearchResponseSchema,
|
|
response_model_exclude_none=True,
|
|
)
|
|
@limiter.limit(settings.RATE_LIMIT_SEARCH_ANON)
|
|
def search_and_sync_researchers(
|
|
request: Request,
|
|
payload: ResearcherBatchSearchRequestSchema,
|
|
db: Session = Depends(get_db),
|
|
current: Researcher | None = Depends(get_optional_current_researcher),
|
|
):
|
|
results: List[ResearcherWithPublicationsSchema] = []
|
|
errors: List[ResearcherSearchErrorSchema] = []
|
|
|
|
unique_orcid_ids = list(dict.fromkeys(payload.orcid_ids))
|
|
|
|
for orcid_id in unique_orcid_ids:
|
|
try:
|
|
results.append(build_search_response(orcid_id, db, current))
|
|
except HTTPException as exc:
|
|
db.rollback()
|
|
errors.append(
|
|
ResearcherSearchErrorSchema(
|
|
orcid_id=orcid_id,
|
|
detail=str(exc.detail),
|
|
)
|
|
)
|
|
except httpx.HTTPStatusError as exc:
|
|
db.rollback()
|
|
errors.append(
|
|
ResearcherSearchErrorSchema(
|
|
orcid_id=orcid_id,
|
|
detail=f"ORCID returned {exc.response.status_code}",
|
|
)
|
|
)
|
|
except Exception:
|
|
db.rollback()
|
|
errors.append(
|
|
ResearcherSearchErrorSchema(
|
|
orcid_id=orcid_id,
|
|
detail="Unexpected error while processing ORCID iD",
|
|
)
|
|
)
|
|
|
|
return ResearcherBatchSearchResponseSchema(
|
|
results=results,
|
|
errors=errors,
|
|
total_requested=len(unique_orcid_ids),
|
|
total_processed=len(results),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------
|
|
# ENDPOINT 2: SYNC COMPLETO (requiere autenticación)
|
|
# ---------------------------------------------------------
|
|
|
|
@router.post(
|
|
"/{orcid_id}/sync",
|
|
response_model=ResearcherWithPublicationsSchema,
|
|
response_model_exclude_none=True,
|
|
)
|
|
@limiter.limit(settings.RATE_LIMIT_SYNC)
|
|
def sync_researcher(
|
|
request: Request,
|
|
orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
|
|
db: Session = Depends(get_db),
|
|
current: Researcher | None = Depends(get_optional_current_researcher),
|
|
):
|
|
if not is_valid_orcid(orcid_id):
|
|
raise HTTPException(status_code=400, detail="Invalid ORCID iD")
|
|
|
|
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
|
|
if not researcher:
|
|
raise HTTPException(status_code=404, detail="Researcher not found")
|
|
|
|
orcid_client = get_orcid_client()
|
|
works = orcid_client.fetch_works(orcid_id)
|
|
groups = works.get("group", [])
|
|
|
|
publications_output = []
|
|
new_count = 0
|
|
updated_count = 0
|
|
unchanged_count = 0
|
|
existing_by_put_code = {
|
|
publication.put_code: publication
|
|
for publication in db.query(Publication).filter(Publication.researcher_id == researcher.id).all()
|
|
}
|
|
|
|
for g in groups:
|
|
summaries = g.get("work-summary") or []
|
|
if not summaries:
|
|
continue
|
|
|
|
summary = summaries[0]
|
|
put_code = summary.get("put-code")
|
|
if put_code is None:
|
|
continue
|
|
|
|
try:
|
|
detail = orcid_client.fetch_work_detail(orcid_id, put_code)
|
|
except Exception:
|
|
detail = None
|
|
|
|
data = PublicationNormalizer.normalize(summary, detail)
|
|
|
|
existing = existing_by_put_code.get(data["put_code"])
|
|
|
|
if existing:
|
|
if publication_changed(existing, data):
|
|
for field in data:
|
|
setattr(existing, field, data[field])
|
|
existing.last_modified = datetime.utcnow()
|
|
existing.status = "updated"
|
|
updated_count += 1
|
|
else:
|
|
existing.status = "unchanged"
|
|
unchanged_count += 1
|
|
|
|
pub = existing
|
|
else:
|
|
pub = Publication(
|
|
researcher_id=researcher.id,
|
|
**data,
|
|
last_modified=datetime.utcnow(),
|
|
)
|
|
pub.status = "new"
|
|
db.add(pub)
|
|
new_count += 1
|
|
existing_by_put_code[data["put_code"]] = pub
|
|
|
|
publications_output.append(pub)
|
|
|
|
researcher.last_sync_at = datetime.utcnow()
|
|
db.commit()
|
|
db.refresh(researcher)
|
|
|
|
publications_out = _decorate_downloaded_by_me(db=db, current=current, publications=publications_output)
|
|
|
|
return ResearcherWithPublicationsSchema(
|
|
researcher=researcher,
|
|
publications=publications_out,
|
|
stats=build_researcher_stats(publications_out),
|
|
new_records=new_count,
|
|
updated_records=updated_count,
|
|
unchanged_records=unchanged_count,
|
|
total_records=new_count + updated_count + unchanged_count,
|
|
)
|