feat: implement researcher and publication management with sync functionality
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
from sqlalchemy.orm import Session
|
||||
from app.services.orcid_client import ORCIDClient
|
||||
from app.services.normalizer import PublicationNormalizer
|
||||
from app.repositories.researcher_repository import ResearcherRepository
|
||||
from app.repositories.publication_repository import PublicationRepository
|
||||
from app.repositories.syncjob_repository import SyncJobRepository
|
||||
import httpx
|
||||
|
||||
|
||||
class SyncService:
|
||||
|
||||
def __init__(self):
|
||||
self.orcid_client = ORCIDClient()
|
||||
|
||||
def sync_researcher(self, db: Session, orcid_id: str):
|
||||
"""
|
||||
Sincroniza las publicaciones de un investigador con manejo robusto de errores.
|
||||
"""
|
||||
|
||||
# 1. Obtener o crear investigador
|
||||
try:
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
|
||||
if not researcher:
|
||||
record = self.orcid_client.fetch_record(orcid_id)
|
||||
name = (
|
||||
record.get("person", {})
|
||||
.get("name", {})
|
||||
.get("given-names", {})
|
||||
.get("value")
|
||||
)
|
||||
researcher = ResearcherRepository.create(db, orcid_id, name)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
return {
|
||||
"status": "error",
|
||||
"message": f"El ORCID {orcid_id} no existe en Sandbox."
|
||||
}
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
# 2. Crear SyncJob
|
||||
job = SyncJobRepository.start_job(db, researcher.id)
|
||||
|
||||
# 3. Obtener works
|
||||
try:
|
||||
works_raw = self.orcid_client.fetch_works(orcid_id)
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
SyncJobRepository.finish_job(db, job, 0, 0)
|
||||
ResearcherRepository.update_last_sync(db, researcher)
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": "El ORCID existe pero no tiene publicaciones públicas.",
|
||||
"new_records": 0,
|
||||
"updated_records": 0,
|
||||
"total": 0
|
||||
}
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
groups = works_raw.get("group", [])
|
||||
|
||||
new_records = 0
|
||||
updated_records = 0
|
||||
|
||||
# 4. Procesar works
|
||||
for group in groups:
|
||||
summary = group["work-summary"][0]
|
||||
normalized = PublicationNormalizer.normalize_work(summary)
|
||||
|
||||
# 🔥 AHORA SE DETECTAN DUPLICADOS POR put_code
|
||||
existing = PublicationRepository.get_by_put_code(
|
||||
db, researcher.id, normalized["put_code"]
|
||||
)
|
||||
|
||||
if existing:
|
||||
PublicationRepository.update(db, existing, normalized)
|
||||
updated_records += 1
|
||||
else:
|
||||
PublicationRepository.create(db, researcher.id, normalized)
|
||||
new_records += 1
|
||||
|
||||
# 5. Finalizar SyncJob
|
||||
SyncJobRepository.finish_job(db, job, new_records, updated_records)
|
||||
|
||||
# 6. Actualizar last_sync_at
|
||||
ResearcherRepository.update_last_sync(db, researcher)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": "Sincronización completada correctamente.",
|
||||
"researcher": researcher.orcid_id,
|
||||
"new_records": new_records,
|
||||
"updated_records": updated_records,
|
||||
"total": new_records + updated_records
|
||||
}
|
||||
Reference in New Issue
Block a user