feat: implement researcher and publication management with sync functionality
This commit is contained in:
@@ -0,0 +1,120 @@
|
|||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import Response
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from app.schema.publication import PublicationSchema
|
||||||
|
from app.db.session import get_db
|
||||||
|
from app.repositories.researcher_repository import ResearcherRepository
|
||||||
|
from app.repositories.publication_repository import PublicationRepository
|
||||||
|
from app.services.sync_service import SyncService
|
||||||
|
from app.services.sword_exporter import SWORDExporter
|
||||||
|
from app.utils.orcid_validator import is_valid_orcid
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/researchers", tags=["researchers"])
|
||||||
|
|
||||||
|
|
||||||
|
def validate_orcid_or_400(orcid_id: str):
|
||||||
|
if not is_valid_orcid(orcid_id):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"ORCID ID '{orcid_id}' no es válido según el formato y dígito de control."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/", response_model=dict)
|
||||||
|
def create_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
validate_orcid_or_400(orcid_id)
|
||||||
|
|
||||||
|
existing = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||||
|
if existing:
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": "Researcher ya existe.",
|
||||||
|
"orcid_id": existing.orcid_id,
|
||||||
|
"id": existing.id
|
||||||
|
}
|
||||||
|
|
||||||
|
# Aquí podrías opcionalmente validar que el ORCID existe en ORCID API
|
||||||
|
researcher = ResearcherRepository.create(db, orcid_id, name=None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": "Researcher creado correctamente.",
|
||||||
|
"orcid_id": researcher.orcid_id,
|
||||||
|
"id": researcher.id
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{orcid_id}", response_model=dict)
|
||||||
|
def get_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
validate_orcid_or_400(orcid_id)
|
||||||
|
|
||||||
|
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||||
|
if not researcher:
|
||||||
|
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"orcid_id": researcher.orcid_id,
|
||||||
|
"name": researcher.name,
|
||||||
|
"authenticated": researcher.authenticated,
|
||||||
|
"access_token": researcher.access_token,
|
||||||
|
"id": researcher.id,
|
||||||
|
"last_sync_at": researcher.last_sync_at,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{orcid_id}/sync", response_model=dict)
|
||||||
|
def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
validate_orcid_or_400(orcid_id)
|
||||||
|
|
||||||
|
service = SyncService()
|
||||||
|
result = service.sync_researcher(db, orcid_id)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{orcid_id}/publications", response_model=list[PublicationSchema], tags=["researchers"])
|
||||||
|
def get_publications(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||||
|
if not researcher:
|
||||||
|
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||||
|
return researcher.publications
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{orcid_id}/export/sword.xml")
|
||||||
|
def export_sword_xml(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
validate_orcid_or_400(orcid_id)
|
||||||
|
|
||||||
|
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||||
|
if not researcher:
|
||||||
|
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||||
|
|
||||||
|
pubs = PublicationRepository.list_by_researcher(db, researcher.id)
|
||||||
|
xml_bytes = SWORDExporter.export_feed_xml(researcher, pubs)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=xml_bytes,
|
||||||
|
media_type="application/xml",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f'attachment; filename="sword_{orcid_id}.xml"'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{orcid_id}/export/sword.zip")
|
||||||
|
def export_sword_zip(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
validate_orcid_or_400(orcid_id)
|
||||||
|
|
||||||
|
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||||
|
if not researcher:
|
||||||
|
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||||
|
|
||||||
|
pubs = PublicationRepository.list_by_researcher(db, researcher.id)
|
||||||
|
zip_bytes = SWORDExporter.export_zip(researcher, pubs)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=zip_bytes,
|
||||||
|
media_type="application/zip",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f'attachment; filename="sword_{orcid_id}.zip"'
|
||||||
|
}
|
||||||
|
)
|
||||||
@@ -1,9 +1,11 @@
|
|||||||
from sqlalchemy import Column, String, Boolean, Integer, DateTime, Text
|
from sqlalchemy import Column, String, Boolean, Integer, DateTime, Text, ForeignKey
|
||||||
from sqlalchemy.dialects.postgresql import UUID
|
from sqlalchemy.dialects.postgresql import UUID
|
||||||
from sqlalchemy.sql import func
|
from sqlalchemy.sql import func
|
||||||
|
from sqlalchemy.orm import relationship
|
||||||
from .session import Base
|
from .session import Base
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
class Researcher(Base):
|
class Researcher(Base):
|
||||||
__tablename__ = "researchers"
|
__tablename__ = "researchers"
|
||||||
|
|
||||||
@@ -11,18 +13,48 @@ class Researcher(Base):
|
|||||||
orcid_id = Column(String(19), unique=True, nullable=False)
|
orcid_id = Column(String(19), unique=True, nullable=False)
|
||||||
name = Column(Text)
|
name = Column(Text)
|
||||||
authenticated = Column(Boolean, default=False)
|
authenticated = Column(Boolean, default=False)
|
||||||
access_token = Column(Text)
|
access_token = Column(Text, nullable=True)
|
||||||
last_sync_at = Column(DateTime)
|
last_sync_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
|
||||||
|
publications = relationship(
|
||||||
|
"Publication",
|
||||||
|
back_populates="researcher",
|
||||||
|
cascade="all, delete-orphan"
|
||||||
|
)
|
||||||
|
|
||||||
|
sync_jobs = relationship(
|
||||||
|
"SyncJob",
|
||||||
|
back_populates="researcher",
|
||||||
|
cascade="all, delete-orphan"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Publication(Base):
|
class Publication(Base):
|
||||||
__tablename__ = "publications"
|
__tablename__ = "publications"
|
||||||
|
|
||||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||||
researcher_id = Column(UUID(as_uuid=True))
|
researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"))
|
||||||
put_code = Column(Integer)
|
put_code = Column(Integer)
|
||||||
title = Column(Text)
|
title = Column(Text)
|
||||||
|
journal = Column(Text)
|
||||||
doi = Column(Text)
|
doi = Column(Text)
|
||||||
pub_year = Column(Integer)
|
pub_year = Column(Integer)
|
||||||
type = Column(Text)
|
type = Column(Text)
|
||||||
hash_fingerprint = Column(Text)
|
hash_fingerprint = Column(Text)
|
||||||
last_modified = Column(DateTime)
|
last_modified = Column(DateTime(timezone=True))
|
||||||
|
|
||||||
|
researcher = relationship("Researcher", back_populates="publications")
|
||||||
|
|
||||||
|
|
||||||
|
class SyncJob(Base):
|
||||||
|
__tablename__ = "sync_jobs"
|
||||||
|
|
||||||
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||||
|
researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"))
|
||||||
|
status = Column(String(20))
|
||||||
|
new_records = Column(Integer, default=0)
|
||||||
|
updated_records = Column(Integer, default=0)
|
||||||
|
started_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
finished_at = Column(DateTime(timezone=True))
|
||||||
|
|
||||||
|
researcher = relationship("Researcher", back_populates="sync_jobs")
|
||||||
|
|||||||
@@ -4,7 +4,23 @@ import os
|
|||||||
|
|
||||||
DATABASE_URL = os.getenv("DATABASE_URL")
|
DATABASE_URL = os.getenv("DATABASE_URL")
|
||||||
|
|
||||||
engine = create_engine(DATABASE_URL)
|
engine = create_engine(
|
||||||
SessionLocal = sessionmaker(bind=engine, autocommit=False, autoflush=False)
|
DATABASE_URL,
|
||||||
|
future=True,
|
||||||
|
echo=False
|
||||||
|
)
|
||||||
|
|
||||||
|
SessionLocal = sessionmaker(
|
||||||
|
autocommit=False,
|
||||||
|
autoflush=False,
|
||||||
|
bind=engine
|
||||||
|
)
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|||||||
+29
-6
@@ -1,9 +1,32 @@
|
|||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from app.services.orcid_client import ORCIDClient
|
from app.api.researchers import router as researchers_router
|
||||||
|
from app.db.session import Base, engine
|
||||||
|
|
||||||
app = FastAPI()
|
|
||||||
|
|
||||||
@app.get("/orcid/{orcid_id}/works")
|
app = FastAPI(
|
||||||
def test_works(orcid_id: str):
|
title="ORCID SWORD Backend",
|
||||||
client = ORCIDClient()
|
description="Backend para sincronización ORCID y exportación SWORD",
|
||||||
return client.fetch_works(orcid_id)
|
version="1.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Crear tablas al iniciar la aplicación
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@app.on_event("startup")
|
||||||
|
def startup_event():
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Healthcheck
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Registrar routers
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
app.include_router(researchers_router)
|
||||||
|
|||||||
@@ -0,0 +1,67 @@
|
|||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from app.db.models import Publication
|
||||||
|
|
||||||
|
|
||||||
|
class PublicationRepository:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_by_put_code(db: Session, researcher_id: str, put_code: int):
|
||||||
|
"""
|
||||||
|
Devuelve una publicación existente por put_code (único en ORCID).
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
db.query(Publication)
|
||||||
|
.filter(
|
||||||
|
Publication.researcher_id == researcher_id,
|
||||||
|
Publication.put_code == put_code
|
||||||
|
)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create(db: Session, researcher_id: str, data: dict):
|
||||||
|
"""
|
||||||
|
Crea una nueva publicación normalizada.
|
||||||
|
"""
|
||||||
|
pub = Publication(
|
||||||
|
researcher_id=researcher_id,
|
||||||
|
put_code=data["put_code"],
|
||||||
|
title=data["title"],
|
||||||
|
journal=data["journal"],
|
||||||
|
doi=data["doi"],
|
||||||
|
pub_year=data["pub_year"],
|
||||||
|
type=data["type"],
|
||||||
|
hash_fingerprint=data["hash_fingerprint"]
|
||||||
|
)
|
||||||
|
db.add(pub)
|
||||||
|
db.commit()
|
||||||
|
db.refresh(pub)
|
||||||
|
return pub
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update(db: Session, publication: Publication, data: dict):
|
||||||
|
"""
|
||||||
|
Actualiza una publicación existente si ORCID ha cambiado algo.
|
||||||
|
"""
|
||||||
|
publication.title = data["title"]
|
||||||
|
publication.journal = data["journal"]
|
||||||
|
publication.doi = data["doi"]
|
||||||
|
publication.pub_year = data["pub_year"]
|
||||||
|
publication.type = data["type"]
|
||||||
|
publication.hash_fingerprint = data["hash_fingerprint"]
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
db.refresh(publication)
|
||||||
|
return publication
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def list_by_researcher(db: Session, researcher_id: str):
|
||||||
|
"""
|
||||||
|
Lista todas las publicaciones de un investigador.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
db.query(Publication)
|
||||||
|
.filter(Publication.researcher_id == researcher_id)
|
||||||
|
.order_by(Publication.pub_year.desc().nullslast())
|
||||||
|
.all()
|
||||||
|
)
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from app.db.models import Researcher
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
|
||||||
|
class ResearcherRepository:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_by_orcid(db: Session, orcid_id: str):
|
||||||
|
return db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create(db: Session, orcid_id: str, name: str = None):
|
||||||
|
researcher = Researcher(orcid_id=orcid_id, name=name)
|
||||||
|
db.add(researcher)
|
||||||
|
db.commit()
|
||||||
|
db.refresh(researcher)
|
||||||
|
return researcher
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update_last_sync(db: Session, researcher: Researcher):
|
||||||
|
researcher.last_sync_at = func.now()
|
||||||
|
db.commit()
|
||||||
|
db.refresh(researcher)
|
||||||
|
return researcher
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from app.db.models import SyncJob
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
|
||||||
|
class SyncJobRepository:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def start_job(db: Session, researcher_id: str):
|
||||||
|
job = SyncJob(
|
||||||
|
researcher_id=researcher_id,
|
||||||
|
status="running",
|
||||||
|
started_at=func.now()
|
||||||
|
)
|
||||||
|
db.add(job)
|
||||||
|
db.commit()
|
||||||
|
db.refresh(job)
|
||||||
|
return job
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def finish_job(db: Session, job: SyncJob, new_records: int, updated_records: int):
|
||||||
|
job.status = "finished"
|
||||||
|
job.new_records = new_records
|
||||||
|
job.updated_records = updated_records
|
||||||
|
job.finished_at = func.now()
|
||||||
|
db.commit()
|
||||||
|
db.refresh(job)
|
||||||
|
return job
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
from pydantic import BaseModel
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
class PublicationSchema(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
put_code: int | None = None
|
||||||
|
title: str
|
||||||
|
journal: str | None = None
|
||||||
|
doi: str | None = None
|
||||||
|
pub_year: int | None = None
|
||||||
|
type: str | None = None
|
||||||
|
hash_fingerprint: str | None = None
|
||||||
|
last_modified: str | None = None
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
from_attributes = True
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
class PublicationNormalizer:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def safe_get_title(summary):
|
||||||
|
t = summary.get("title")
|
||||||
|
|
||||||
|
if t is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Caso 1: {"title": {"value": "..."}}
|
||||||
|
if isinstance(t, dict) and "title" in t and isinstance(t["title"], dict):
|
||||||
|
return t["title"].get("value")
|
||||||
|
|
||||||
|
# Caso 2: {"title": {"title": "..."}} (muy común en /works)
|
||||||
|
if isinstance(t, dict) and "title" in t and isinstance(t["title"], str):
|
||||||
|
return t["title"]
|
||||||
|
|
||||||
|
# Caso 3: {"title": "string"}
|
||||||
|
if isinstance(t, str):
|
||||||
|
return t
|
||||||
|
|
||||||
|
# Caso 4: {"value": "..."}
|
||||||
|
if isinstance(t, dict) and "value" in t:
|
||||||
|
return t["value"]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def normalize_work(summary: dict) -> dict:
|
||||||
|
|
||||||
|
title = PublicationNormalizer.safe_get_title(summary)
|
||||||
|
|
||||||
|
# Journal title
|
||||||
|
journal_raw = summary.get("journal-title")
|
||||||
|
if isinstance(journal_raw, dict):
|
||||||
|
journal = journal_raw.get("value") or journal_raw.get("title")
|
||||||
|
else:
|
||||||
|
journal = journal_raw
|
||||||
|
|
||||||
|
# DOI
|
||||||
|
doi = None
|
||||||
|
ext_ids = summary.get("external-ids", {}).get("external-id", [])
|
||||||
|
for ext in ext_ids:
|
||||||
|
if ext.get("external-id-type") == "doi":
|
||||||
|
doi = ext.get("external-id-value")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Publication year
|
||||||
|
pub_year = (
|
||||||
|
summary.get("publication-date", {})
|
||||||
|
.get("year", {})
|
||||||
|
.get("value")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Type
|
||||||
|
work_type = summary.get("type")
|
||||||
|
|
||||||
|
# put-code
|
||||||
|
put_code = summary.get("put-code")
|
||||||
|
|
||||||
|
# Fingerprint
|
||||||
|
fingerprint = f"{title}-{doi}-{pub_year}-{work_type}"
|
||||||
|
if fingerprint:
|
||||||
|
fingerprint = fingerprint.lower().replace(" ", "")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"put_code": put_code,
|
||||||
|
"title": title or "Untitled",
|
||||||
|
"journal": journal,
|
||||||
|
"doi": doi,
|
||||||
|
"pub_year": pub_year,
|
||||||
|
"type": work_type,
|
||||||
|
"hash_fingerprint": fingerprint
|
||||||
|
}
|
||||||
@@ -2,18 +2,13 @@ import httpx
|
|||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class ORCIDClient:
|
class ORCIDClient:
|
||||||
"""
|
|
||||||
Cliente para interactuar con la Public API de ORCID.
|
|
||||||
Permite:
|
|
||||||
- Obtener token público
|
|
||||||
- Consultar /record
|
|
||||||
- Consultar /works
|
|
||||||
"""
|
|
||||||
|
|
||||||
TOKEN_URL = "https://orcid.org/oauth/token"
|
TOKEN_URL = "https://sandbox.orcid.org/oauth/token"
|
||||||
BASE_URL = "https://pub.orcid.org/v3.0"
|
BASE_URL = "https://pub.sandbox.orcid.org/v3.0"
|
||||||
|
|
||||||
|
# TOKEN_URL = "https://orcid.org/oauth/token"
|
||||||
|
# BASE_URL = "https://pub.orcid.org/v3.0"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.client_id = os.getenv("ORCID_CLIENT_ID")
|
self.client_id = os.getenv("ORCID_CLIENT_ID")
|
||||||
|
|||||||
@@ -0,0 +1,155 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||||
|
from io import BytesIO
|
||||||
|
import zipfile
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class SWORDExporter:
|
||||||
|
|
||||||
|
ATOM_NS = "http://www.w3.org/2005/Atom"
|
||||||
|
DC_NS = "http://purl.org/dc/elements/1.1/"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 1) XML PRINCIPAL (sword.xml)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def export_feed_xml(researcher, publications) -> bytes:
|
||||||
|
feed = Element("feed", xmlns=SWORDExporter.ATOM_NS)
|
||||||
|
|
||||||
|
title = SubElement(feed, "title")
|
||||||
|
title.text = f"Publications for {researcher.orcid_id}"
|
||||||
|
|
||||||
|
author = SubElement(feed, "author")
|
||||||
|
name = SubElement(author, "name")
|
||||||
|
name.text = researcher.name or "Unknown"
|
||||||
|
|
||||||
|
updated = SubElement(feed, "updated")
|
||||||
|
updated.text = datetime.utcnow().isoformat() + "Z"
|
||||||
|
|
||||||
|
feed_id = SubElement(feed, "id")
|
||||||
|
feed_id.text = f"urn:uuid:{researcher.id}"
|
||||||
|
|
||||||
|
for pub in publications:
|
||||||
|
entry = SubElement(feed, "entry")
|
||||||
|
|
||||||
|
entry_id = SubElement(entry, "id")
|
||||||
|
entry_id.text = f"urn:uuid:{pub.id}"
|
||||||
|
|
||||||
|
entry_updated = SubElement(entry, "updated")
|
||||||
|
entry_updated.text = datetime.utcnow().isoformat() + "Z"
|
||||||
|
|
||||||
|
dc_title = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}title")
|
||||||
|
dc_title.text = pub.title
|
||||||
|
|
||||||
|
if pub.doi:
|
||||||
|
dc_identifier = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}identifier")
|
||||||
|
dc_identifier.text = f"doi:{pub.doi}"
|
||||||
|
|
||||||
|
if pub.pub_year:
|
||||||
|
dc_date = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}date")
|
||||||
|
dc_date.text = str(pub.pub_year)
|
||||||
|
|
||||||
|
if pub.type:
|
||||||
|
dc_type = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}type")
|
||||||
|
dc_type.text = pub.type
|
||||||
|
|
||||||
|
if pub.journal:
|
||||||
|
dc_source = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}source")
|
||||||
|
dc_source.text = pub.journal
|
||||||
|
|
||||||
|
xml_bytes = tostring(feed, encoding="utf-8", xml_declaration=True)
|
||||||
|
return xml_bytes
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 2) manifest.txt
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def generate_manifest(researcher, publications) -> str:
|
||||||
|
lines = [
|
||||||
|
"SWORD Deposit Package",
|
||||||
|
"----------------------",
|
||||||
|
f"Researcher ORCID: {researcher.orcid_id}",
|
||||||
|
f"Researcher Name: {researcher.name or 'Unknown'}",
|
||||||
|
f"Total Publications: {len(publications)}",
|
||||||
|
f"Generated At: {datetime.utcnow().isoformat()}Z",
|
||||||
|
"",
|
||||||
|
"Publications:",
|
||||||
|
]
|
||||||
|
|
||||||
|
for pub in publications:
|
||||||
|
lines.append(f"- {pub.title} ({pub.pub_year}) DOI={pub.doi}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 3) metadata.json
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def generate_metadata_json(researcher, publications) -> str:
|
||||||
|
data = {
|
||||||
|
"researcher": {
|
||||||
|
"orcid_id": researcher.orcid_id,
|
||||||
|
"name": researcher.name,
|
||||||
|
"id": str(researcher.id),
|
||||||
|
},
|
||||||
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"publications": [
|
||||||
|
{
|
||||||
|
"id": str(pub.id),
|
||||||
|
"title": pub.title,
|
||||||
|
"doi": pub.doi,
|
||||||
|
"year": pub.pub_year,
|
||||||
|
"type": pub.type,
|
||||||
|
"journal": pub.journal,
|
||||||
|
}
|
||||||
|
for pub in publications
|
||||||
|
],
|
||||||
|
}
|
||||||
|
return json.dumps(data, indent=4)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 4) mets.xml (versión simple)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def generate_mets_xml(researcher, publications) -> bytes:
|
||||||
|
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
|
||||||
|
|
||||||
|
header = SubElement(mets, "metsHdr")
|
||||||
|
agent = SubElement(header, "agent", ROLE="CREATOR", TYPE="OTHER")
|
||||||
|
name = SubElement(agent, "name")
|
||||||
|
name.text = "ORCID Exporter System"
|
||||||
|
|
||||||
|
dmd_sec = SubElement(mets, "dmdSec", ID="dmd1")
|
||||||
|
md_wrap = SubElement(dmd_sec, "mdWrap", MDTYPE="DC")
|
||||||
|
xml_data = SubElement(md_wrap, "xmlData")
|
||||||
|
|
||||||
|
for pub in publications:
|
||||||
|
dc_title = SubElement(xml_data, f"{{{SWORDExporter.DC_NS}}}title")
|
||||||
|
dc_title.text = pub.title
|
||||||
|
|
||||||
|
if pub.doi:
|
||||||
|
dc_id = SubElement(xml_data, f"{{{SWORDExporter.DC_NS}}}identifier")
|
||||||
|
dc_id.text = f"doi:{pub.doi}"
|
||||||
|
|
||||||
|
return tostring(mets, encoding="utf-8", xml_declaration=True)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 5) ZIP FINAL
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def export_zip(researcher, publications) -> bytes:
|
||||||
|
xml_bytes = SWORDExporter.export_feed_xml(researcher, publications)
|
||||||
|
manifest = SWORDExporter.generate_manifest(researcher, publications)
|
||||||
|
metadata_json = SWORDExporter.generate_metadata_json(researcher, publications)
|
||||||
|
mets_xml = SWORDExporter.generate_mets_xml(researcher, publications)
|
||||||
|
|
||||||
|
mem_file = BytesIO()
|
||||||
|
with zipfile.ZipFile(mem_file, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||||
|
zf.writestr("sword.xml", xml_bytes)
|
||||||
|
zf.writestr("manifest.txt", manifest)
|
||||||
|
zf.writestr("metadata.json", metadata_json)
|
||||||
|
zf.writestr("mets.xml", mets_xml)
|
||||||
|
|
||||||
|
mem_file.seek(0)
|
||||||
|
return mem_file.read()
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from app.services.orcid_client import ORCIDClient
|
||||||
|
from app.services.normalizer import PublicationNormalizer
|
||||||
|
from app.repositories.researcher_repository import ResearcherRepository
|
||||||
|
from app.repositories.publication_repository import PublicationRepository
|
||||||
|
from app.repositories.syncjob_repository import SyncJobRepository
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class SyncService:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.orcid_client = ORCIDClient()
|
||||||
|
|
||||||
|
def sync_researcher(self, db: Session, orcid_id: str):
|
||||||
|
"""
|
||||||
|
Sincroniza las publicaciones de un investigador con manejo robusto de errores.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 1. Obtener o crear investigador
|
||||||
|
try:
|
||||||
|
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||||
|
|
||||||
|
if not researcher:
|
||||||
|
record = self.orcid_client.fetch_record(orcid_id)
|
||||||
|
name = (
|
||||||
|
record.get("person", {})
|
||||||
|
.get("name", {})
|
||||||
|
.get("given-names", {})
|
||||||
|
.get("value")
|
||||||
|
)
|
||||||
|
researcher = ResearcherRepository.create(db, orcid_id, name)
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
if e.response.status_code == 404:
|
||||||
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": f"El ORCID {orcid_id} no existe en Sandbox."
|
||||||
|
}
|
||||||
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
|
# 2. Crear SyncJob
|
||||||
|
job = SyncJobRepository.start_job(db, researcher.id)
|
||||||
|
|
||||||
|
# 3. Obtener works
|
||||||
|
try:
|
||||||
|
works_raw = self.orcid_client.fetch_works(orcid_id)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
if e.response.status_code == 404:
|
||||||
|
SyncJobRepository.finish_job(db, job, 0, 0)
|
||||||
|
ResearcherRepository.update_last_sync(db, researcher)
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": "El ORCID existe pero no tiene publicaciones públicas.",
|
||||||
|
"new_records": 0,
|
||||||
|
"updated_records": 0,
|
||||||
|
"total": 0
|
||||||
|
}
|
||||||
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
|
groups = works_raw.get("group", [])
|
||||||
|
|
||||||
|
new_records = 0
|
||||||
|
updated_records = 0
|
||||||
|
|
||||||
|
# 4. Procesar works
|
||||||
|
for group in groups:
|
||||||
|
summary = group["work-summary"][0]
|
||||||
|
normalized = PublicationNormalizer.normalize_work(summary)
|
||||||
|
|
||||||
|
# 🔥 AHORA SE DETECTAN DUPLICADOS POR put_code
|
||||||
|
existing = PublicationRepository.get_by_put_code(
|
||||||
|
db, researcher.id, normalized["put_code"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
PublicationRepository.update(db, existing, normalized)
|
||||||
|
updated_records += 1
|
||||||
|
else:
|
||||||
|
PublicationRepository.create(db, researcher.id, normalized)
|
||||||
|
new_records += 1
|
||||||
|
|
||||||
|
# 5. Finalizar SyncJob
|
||||||
|
SyncJobRepository.finish_job(db, job, new_records, updated_records)
|
||||||
|
|
||||||
|
# 6. Actualizar last_sync_at
|
||||||
|
ResearcherRepository.update_last_sync(db, researcher)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": "Sincronización completada correctamente.",
|
||||||
|
"researcher": researcher.orcid_id,
|
||||||
|
"new_records": new_records,
|
||||||
|
"updated_records": updated_records,
|
||||||
|
"total": new_records + updated_records
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$")
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_orcid(orcid_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Valida un ORCID ID:
|
||||||
|
- Formato: 0000-0000-0000-0000
|
||||||
|
- Dígito de control según ISO 7064 Mod 11-2
|
||||||
|
"""
|
||||||
|
if not ORCID_REGEX.match(orcid_id):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Quitar guiones
|
||||||
|
digits = orcid_id.replace("-", "")
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
# Los primeros 15 dígitos
|
||||||
|
for char in digits[:-1]:
|
||||||
|
total = (total + int(char)) * 2
|
||||||
|
|
||||||
|
# Resto
|
||||||
|
remainder = total % 11
|
||||||
|
result = (12 - remainder) % 11
|
||||||
|
check_digit = "X" if result == 10 else str(result)
|
||||||
|
|
||||||
|
return digits[-1] == check_digit
|
||||||
Reference in New Issue
Block a user