209 lines
6.3 KiB
Python
209 lines
6.3 KiB
Python
from datetime import datetime
|
|
from typing import List
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.db.models import Publication, Researcher
|
|
from app.db.session import get_db
|
|
from app.schema.researcher import ResearcherWithPublicationsSchema
|
|
from app.services.normalizer import PublicationNormalizer
|
|
from app.services.orcid_client import get_works_summary, get_work_detail
|
|
|
|
router = APIRouter(prefix="/researchers", tags=["researchers"])
|
|
|
|
|
|
# ---------------------------------------------------------
|
|
# Función auxiliar: detectar si una publicación ha cambiado
|
|
# ---------------------------------------------------------
|
|
def publication_changed(existing: Publication, data: dict) -> bool:
|
|
fields = [
|
|
"title", "subtitle", "type", "journal",
|
|
"pub_year", "pub_month", "pub_day",
|
|
"doi", "url", "short_description",
|
|
"citation_type", "citation_value",
|
|
"language_code", "country",
|
|
"external_ids", "contributors"
|
|
]
|
|
|
|
for f in fields:
|
|
if getattr(existing, f) != data[f]:
|
|
return True
|
|
return False
|
|
|
|
|
|
# ---------------------------------------------------------
|
|
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
|
|
# ---------------------------------------------------------
|
|
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
|
|
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|
# Buscar o crear Researcher
|
|
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
|
|
if not researcher:
|
|
researcher = Researcher(
|
|
orcid_id=orcid_id,
|
|
name=None,
|
|
authenticated=False,
|
|
last_sync_at=None,
|
|
)
|
|
db.add(researcher)
|
|
db.flush()
|
|
|
|
# Obtener works summary desde ORCID
|
|
works = get_works_summary(orcid_id)
|
|
groups = works.get("group", [])
|
|
|
|
publications: List[Publication] = []
|
|
|
|
for g in groups:
|
|
summaries = g.get("work-summary") or []
|
|
if not summaries:
|
|
continue
|
|
|
|
summary = summaries[0]
|
|
put_code = summary.get("put-code")
|
|
if put_code is None:
|
|
continue
|
|
|
|
# Obtener detalle del work
|
|
try:
|
|
detail = get_work_detail(orcid_id, put_code)
|
|
except Exception:
|
|
detail = None
|
|
|
|
# Normalizar datos
|
|
data = PublicationNormalizer.normalize(summary, detail)
|
|
|
|
# Ver si ya existe la publicación
|
|
existing = (
|
|
db.query(Publication)
|
|
.filter(
|
|
Publication.researcher_id == researcher.id,
|
|
Publication.put_code == data["put_code"],
|
|
)
|
|
.first()
|
|
)
|
|
|
|
if existing:
|
|
for field in [
|
|
"title", "subtitle", "type", "journal",
|
|
"pub_year", "pub_month", "pub_day",
|
|
"doi", "url", "short_description",
|
|
"citation_type", "citation_value",
|
|
"language_code", "country",
|
|
"external_ids", "contributors"
|
|
]:
|
|
setattr(existing, field, data[field])
|
|
existing.last_modified = datetime.utcnow()
|
|
existing.status = None
|
|
publications.append(existing)
|
|
else:
|
|
pub = Publication(
|
|
researcher_id=researcher.id,
|
|
**data,
|
|
last_modified=datetime.utcnow(),
|
|
)
|
|
pub.status = None
|
|
db.add(pub)
|
|
publications.append(pub)
|
|
|
|
researcher.last_sync_at = datetime.utcnow()
|
|
db.commit()
|
|
db.refresh(researcher)
|
|
|
|
return ResearcherWithPublicationsSchema(
|
|
researcher=researcher,
|
|
publications=publications,
|
|
new_records=0,
|
|
updated_records=0,
|
|
unchanged_records=0,
|
|
total_records=len(publications),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------
|
|
# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
|
|
# ---------------------------------------------------------
|
|
@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema)
|
|
def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
|
|
if not researcher:
|
|
raise HTTPException(status_code=404, detail="Researcher not found")
|
|
|
|
works = get_works_summary(orcid_id)
|
|
groups = works.get("group", [])
|
|
|
|
publications_output = []
|
|
|
|
new_count = 0
|
|
updated_count = 0
|
|
unchanged_count = 0
|
|
|
|
for g in groups:
|
|
summaries = g.get("work-summary") or []
|
|
if not summaries:
|
|
continue
|
|
|
|
summary = summaries[0]
|
|
put_code = summary.get("put-code")
|
|
if put_code is None:
|
|
continue
|
|
|
|
try:
|
|
detail = get_work_detail(orcid_id, put_code)
|
|
except Exception:
|
|
detail = None
|
|
|
|
data = PublicationNormalizer.normalize(summary, detail)
|
|
|
|
existing = (
|
|
db.query(Publication)
|
|
.filter(
|
|
Publication.researcher_id == researcher.id,
|
|
Publication.put_code == data["put_code"],
|
|
)
|
|
.first()
|
|
)
|
|
|
|
if existing:
|
|
if publication_changed(existing, data):
|
|
# updated
|
|
for field in data:
|
|
setattr(existing, field, data[field])
|
|
existing.last_modified = datetime.utcnow()
|
|
existing.status = "updated"
|
|
updated_count += 1
|
|
else:
|
|
# unchanged
|
|
existing.status = "unchanged"
|
|
unchanged_count += 1
|
|
|
|
pub = existing
|
|
|
|
else:
|
|
# new
|
|
pub = Publication(
|
|
researcher_id=researcher.id,
|
|
**data,
|
|
last_modified=datetime.utcnow(),
|
|
)
|
|
pub.status = "new"
|
|
db.add(pub)
|
|
new_count += 1
|
|
|
|
db.flush()
|
|
publications_output.append(pub)
|
|
|
|
researcher.last_sync_at = datetime.utcnow()
|
|
db.commit()
|
|
db.refresh(researcher)
|
|
|
|
return ResearcherWithPublicationsSchema(
|
|
researcher=researcher,
|
|
publications=publications_output,
|
|
new_records=new_count,
|
|
updated_records=updated_count,
|
|
unchanged_records=unchanged_count,
|
|
total_records=new_count + updated_count + unchanged_count,
|
|
)
|