97 lines
3.4 KiB
Python
97 lines
3.4 KiB
Python
from sqlalchemy.orm import Session
|
|
from app.services.orcid_client import ORCIDClient
|
|
from app.services.normalizer import PublicationNormalizer
|
|
from app.repositories.researcher_repository import ResearcherRepository
|
|
from app.repositories.publication_repository import PublicationRepository
|
|
from app.repositories.syncjob_repository import SyncJobRepository
|
|
import httpx
|
|
|
|
|
|
class SyncService:
|
|
|
|
def __init__(self):
|
|
self.orcid_client = ORCIDClient()
|
|
|
|
def sync_researcher(self, db: Session, orcid_id: str):
|
|
"""
|
|
Sincroniza las publicaciones de un investigador con manejo robusto de errores.
|
|
"""
|
|
|
|
# 1. Obtener o crear investigador
|
|
try:
|
|
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
|
|
|
if not researcher:
|
|
record = self.orcid_client.fetch_record(orcid_id)
|
|
name = (
|
|
record.get("person", {})
|
|
.get("name", {})
|
|
.get("given-names", {})
|
|
.get("value")
|
|
)
|
|
researcher = ResearcherRepository.create(db, orcid_id, name)
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
if e.response.status_code == 404:
|
|
return {
|
|
"status": "error",
|
|
"message": f"El ORCID {orcid_id} no existe en Sandbox."
|
|
}
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
# 2. Crear SyncJob
|
|
job = SyncJobRepository.start_job(db, researcher.id)
|
|
|
|
# 3. Obtener works
|
|
try:
|
|
works_raw = self.orcid_client.fetch_works(orcid_id)
|
|
except httpx.HTTPStatusError as e:
|
|
if e.response.status_code == 404:
|
|
SyncJobRepository.finish_job(db, job, 0, 0)
|
|
ResearcherRepository.update_last_sync(db, researcher)
|
|
return {
|
|
"status": "ok",
|
|
"message": "El ORCID existe pero no tiene publicaciones públicas.",
|
|
"new_records": 0,
|
|
"updated_records": 0,
|
|
"total": 0
|
|
}
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
groups = works_raw.get("group", [])
|
|
|
|
new_records = 0
|
|
updated_records = 0
|
|
|
|
# 4. Procesar works
|
|
for group in groups:
|
|
summary = group["work-summary"][0]
|
|
normalized = PublicationNormalizer.normalize_work(summary)
|
|
|
|
# 🔥 AHORA SE DETECTAN DUPLICADOS POR put_code
|
|
existing = PublicationRepository.get_by_put_code(
|
|
db, researcher.id, normalized["put_code"]
|
|
)
|
|
|
|
if existing:
|
|
PublicationRepository.update(db, existing, normalized)
|
|
updated_records += 1
|
|
else:
|
|
PublicationRepository.create(db, researcher.id, normalized)
|
|
new_records += 1
|
|
|
|
# 5. Finalizar SyncJob
|
|
SyncJobRepository.finish_job(db, job, new_records, updated_records)
|
|
|
|
# 6. Actualizar last_sync_at
|
|
ResearcherRepository.update_last_sync(db, researcher)
|
|
|
|
return {
|
|
"status": "ok",
|
|
"message": "Sincronización completada correctamente.",
|
|
"researcher": researcher.orcid_id,
|
|
"new_records": new_records,
|
|
"updated_records": updated_records,
|
|
"total": new_records + updated_records
|
|
}
|