Files
ORCID2SWORD/backend/app/services/sync_service.py
T

97 lines
3.4 KiB
Python

from sqlalchemy.orm import Session
from app.services.orcid_client import ORCIDClient
from app.services.normalizer import PublicationNormalizer
from app.repositories.researcher_repository import ResearcherRepository
from app.repositories.publication_repository import PublicationRepository
from app.repositories.syncjob_repository import SyncJobRepository
import httpx
class SyncService:
def __init__(self):
self.orcid_client = ORCIDClient()
def sync_researcher(self, db: Session, orcid_id: str):
"""
Sincroniza las publicaciones de un investigador con manejo robusto de errores.
"""
# 1. Obtener o crear investigador
try:
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
if not researcher:
record = self.orcid_client.fetch_record(orcid_id)
name = (
record.get("person", {})
.get("name", {})
.get("given-names", {})
.get("value")
)
researcher = ResearcherRepository.create(db, orcid_id, name)
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
return {
"status": "error",
"message": f"El ORCID {orcid_id} no existe en Sandbox."
}
return {"status": "error", "message": str(e)}
# 2. Crear SyncJob
job = SyncJobRepository.start_job(db, researcher.id)
# 3. Obtener works
try:
works_raw = self.orcid_client.fetch_works(orcid_id)
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
SyncJobRepository.finish_job(db, job, 0, 0)
ResearcherRepository.update_last_sync(db, researcher)
return {
"status": "ok",
"message": "El ORCID existe pero no tiene publicaciones públicas.",
"new_records": 0,
"updated_records": 0,
"total": 0
}
return {"status": "error", "message": str(e)}
groups = works_raw.get("group", [])
new_records = 0
updated_records = 0
# 4. Procesar works
for group in groups:
summary = group["work-summary"][0]
normalized = PublicationNormalizer.normalize_work(summary)
# 🔥 AHORA SE DETECTAN DUPLICADOS POR put_code
existing = PublicationRepository.get_by_put_code(
db, researcher.id, normalized["put_code"]
)
if existing:
PublicationRepository.update(db, existing, normalized)
updated_records += 1
else:
PublicationRepository.create(db, researcher.id, normalized)
new_records += 1
# 5. Finalizar SyncJob
SyncJobRepository.finish_job(db, job, new_records, updated_records)
# 6. Actualizar last_sync_at
ResearcherRepository.update_last_sync(db, researcher)
return {
"status": "ok",
"message": "Sincronización completada correctamente.",
"researcher": researcher.orcid_id,
"new_records": new_records,
"updated_records": updated_records,
"total": new_records + updated_records
}