Versión 3 Backend - Endpoints finales corregidos
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy.orm import Session
|
||||
from uuid import UUID
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.db.models import Publication, Researcher
|
||||
from app.security.api_key import get_api_key
|
||||
from app.services.sword_generator import SWORDGenerator
|
||||
from app.services.zip_generator import ZIPGenerator
|
||||
|
||||
router = APIRouter(prefix="/export")
|
||||
|
||||
|
||||
def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
|
||||
valid_ids = []
|
||||
for pid in pub_ids:
|
||||
try:
|
||||
valid_ids.append(UUID(pid))
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid publication ID (not UUID): {pid}"
|
||||
)
|
||||
return valid_ids
|
||||
|
||||
|
||||
@router.post("/sword/publications")
|
||||
async def export_multiple_sword(
|
||||
pub_ids: list[str],
|
||||
db: Session = Depends(get_db),
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
validate_uuid_list(pub_ids)
|
||||
|
||||
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
|
||||
|
||||
if not pubs:
|
||||
raise HTTPException(status_code=404, detail="No publications found")
|
||||
|
||||
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
|
||||
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
|
||||
return Response(content=xml_bytes, media_type="application/xml")
|
||||
|
||||
|
||||
@router.get("/sword/researcher/{orcid_id}")
|
||||
async def export_researcher_sword(
|
||||
orcid_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
|
||||
if not researcher:
|
||||
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||
|
||||
pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
|
||||
|
||||
if not pubs:
|
||||
raise HTTPException(status_code=404, detail="No publications found for this researcher")
|
||||
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
|
||||
return Response(content=xml_bytes, media_type="application/xml")
|
||||
|
||||
|
||||
@router.post("/zip/publications")
|
||||
async def export_multiple_zip(
|
||||
pub_ids: list[str],
|
||||
db: Session = Depends(get_db),
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
validate_uuid_list(pub_ids)
|
||||
|
||||
pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
|
||||
|
||||
if not pubs:
|
||||
raise HTTPException(status_code=404, detail="No publications found")
|
||||
|
||||
researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
|
||||
|
||||
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
|
||||
return Response(content=zip_bytes, media_type="application/zip")
|
||||
|
||||
|
||||
@router.get("/zip/researcher/{orcid_id}")
|
||||
async def export_researcher_zip(
|
||||
orcid_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
|
||||
if not researcher:
|
||||
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||
|
||||
pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
|
||||
|
||||
if not pubs:
|
||||
raise HTTPException(status_code=404, detail="No publications found for this researcher")
|
||||
|
||||
zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
|
||||
return Response(content=zip_bytes, media_type="application/zip")
|
||||
+189
-101
@@ -1,120 +1,208 @@
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy.orm import Session
|
||||
from app.schema.publication import PublicationSchema
|
||||
|
||||
from app.db.models import Publication, Researcher
|
||||
from app.db.session import get_db
|
||||
from app.repositories.researcher_repository import ResearcherRepository
|
||||
from app.repositories.publication_repository import PublicationRepository
|
||||
from app.services.sync_service import SyncService
|
||||
from app.services.sword_exporter import SWORDExporter
|
||||
from app.utils.orcid_validator import is_valid_orcid
|
||||
from app.schema.researcher import ResearcherWithPublicationsSchema
|
||||
from app.services.normalizer import PublicationNormalizer
|
||||
from app.services.orcid_client import get_works_summary, get_work_detail
|
||||
|
||||
router = APIRouter(prefix="/researchers", tags=["researchers"])
|
||||
|
||||
|
||||
def validate_orcid_or_400(orcid_id: str):
|
||||
if not is_valid_orcid(orcid_id):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"ORCID ID '{orcid_id}' no es válido según el formato y dígito de control."
|
||||
# ---------------------------------------------------------
|
||||
# Función auxiliar: detectar si una publicación ha cambiado
|
||||
# ---------------------------------------------------------
|
||||
def publication_changed(existing: Publication, data: dict) -> bool:
|
||||
fields = [
|
||||
"title", "subtitle", "type", "journal",
|
||||
"pub_year", "pub_month", "pub_day",
|
||||
"doi", "url", "short_description",
|
||||
"citation_type", "citation_value",
|
||||
"language_code", "country",
|
||||
"external_ids", "contributors"
|
||||
]
|
||||
|
||||
for f in fields:
|
||||
if getattr(existing, f) != data[f]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
|
||||
# ---------------------------------------------------------
|
||||
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
|
||||
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||
# Buscar o crear Researcher
|
||||
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
|
||||
if not researcher:
|
||||
researcher = Researcher(
|
||||
orcid_id=orcid_id,
|
||||
name=None,
|
||||
authenticated=False,
|
||||
last_sync_at=None,
|
||||
)
|
||||
db.add(researcher)
|
||||
db.flush()
|
||||
|
||||
# Obtener works summary desde ORCID
|
||||
works = get_works_summary(orcid_id)
|
||||
groups = works.get("group", [])
|
||||
|
||||
publications: List[Publication] = []
|
||||
|
||||
for g in groups:
|
||||
summaries = g.get("work-summary") or []
|
||||
if not summaries:
|
||||
continue
|
||||
|
||||
summary = summaries[0]
|
||||
put_code = summary.get("put-code")
|
||||
if put_code is None:
|
||||
continue
|
||||
|
||||
# Obtener detalle del work
|
||||
try:
|
||||
detail = get_work_detail(orcid_id, put_code)
|
||||
except Exception:
|
||||
detail = None
|
||||
|
||||
# Normalizar datos
|
||||
data = PublicationNormalizer.normalize(summary, detail)
|
||||
|
||||
# Ver si ya existe la publicación
|
||||
existing = (
|
||||
db.query(Publication)
|
||||
.filter(
|
||||
Publication.researcher_id == researcher.id,
|
||||
Publication.put_code == data["put_code"],
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing:
|
||||
for field in [
|
||||
"title", "subtitle", "type", "journal",
|
||||
"pub_year", "pub_month", "pub_day",
|
||||
"doi", "url", "short_description",
|
||||
"citation_type", "citation_value",
|
||||
"language_code", "country",
|
||||
"external_ids", "contributors"
|
||||
]:
|
||||
setattr(existing, field, data[field])
|
||||
existing.last_modified = datetime.utcnow()
|
||||
existing.status = None
|
||||
publications.append(existing)
|
||||
else:
|
||||
pub = Publication(
|
||||
researcher_id=researcher.id,
|
||||
**data,
|
||||
last_modified=datetime.utcnow(),
|
||||
)
|
||||
pub.status = None
|
||||
db.add(pub)
|
||||
publications.append(pub)
|
||||
|
||||
@router.post("/", response_model=dict)
|
||||
def create_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||
validate_orcid_or_400(orcid_id)
|
||||
researcher.last_sync_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(researcher)
|
||||
|
||||
existing = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
if existing:
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": "Researcher ya existe.",
|
||||
"orcid_id": existing.orcid_id,
|
||||
"id": existing.id
|
||||
}
|
||||
|
||||
# Aquí podrías opcionalmente validar que el ORCID existe en ORCID API
|
||||
researcher = ResearcherRepository.create(db, orcid_id, name=None)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": "Researcher creado correctamente.",
|
||||
"orcid_id": researcher.orcid_id,
|
||||
"id": researcher.id
|
||||
}
|
||||
return ResearcherWithPublicationsSchema(
|
||||
researcher=researcher,
|
||||
publications=publications,
|
||||
new_records=0,
|
||||
updated_records=0,
|
||||
unchanged_records=0,
|
||||
total_records=len(publications),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{orcid_id}", response_model=dict)
|
||||
def get_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||
validate_orcid_or_400(orcid_id)
|
||||
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
if not researcher:
|
||||
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||
|
||||
return {
|
||||
"orcid_id": researcher.orcid_id,
|
||||
"name": researcher.name,
|
||||
"authenticated": researcher.authenticated,
|
||||
"access_token": researcher.access_token,
|
||||
"id": researcher.id,
|
||||
"last_sync_at": researcher.last_sync_at,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{orcid_id}/sync", response_model=dict)
|
||||
# ---------------------------------------------------------
|
||||
# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
|
||||
# ---------------------------------------------------------
|
||||
@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema)
|
||||
def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||
validate_orcid_or_400(orcid_id)
|
||||
|
||||
service = SyncService()
|
||||
result = service.sync_researcher(db, orcid_id)
|
||||
return result
|
||||
|
||||
|
||||
@router.get("/{orcid_id}/publications", response_model=list[PublicationSchema], tags=["researchers"])
|
||||
def get_publications(orcid_id: str, db: Session = Depends(get_db)):
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
if not researcher:
|
||||
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||
return researcher.publications
|
||||
|
||||
|
||||
|
||||
@router.get("/{orcid_id}/export/sword.xml")
|
||||
def export_sword_xml(orcid_id: str, db: Session = Depends(get_db)):
|
||||
validate_orcid_or_400(orcid_id)
|
||||
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
|
||||
if not researcher:
|
||||
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||
|
||||
pubs = PublicationRepository.list_by_researcher(db, researcher.id)
|
||||
xml_bytes = SWORDExporter.export_feed_xml(researcher, pubs)
|
||||
works = get_works_summary(orcid_id)
|
||||
groups = works.get("group", [])
|
||||
|
||||
return Response(
|
||||
content=xml_bytes,
|
||||
media_type="application/xml",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="sword_{orcid_id}.xml"'
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{orcid_id}/export/sword.zip")
|
||||
def export_sword_zip(orcid_id: str, db: Session = Depends(get_db)):
|
||||
validate_orcid_or_400(orcid_id)
|
||||
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
if not researcher:
|
||||
raise HTTPException(status_code=404, detail="Researcher not found")
|
||||
|
||||
pubs = PublicationRepository.list_by_researcher(db, researcher.id)
|
||||
zip_bytes = SWORDExporter.export_zip(researcher, pubs)
|
||||
|
||||
return Response(
|
||||
content=zip_bytes,
|
||||
media_type="application/zip",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="sword_{orcid_id}.zip"'
|
||||
}
|
||||
publications_output = []
|
||||
|
||||
new_count = 0
|
||||
updated_count = 0
|
||||
unchanged_count = 0
|
||||
|
||||
for g in groups:
|
||||
summaries = g.get("work-summary") or []
|
||||
if not summaries:
|
||||
continue
|
||||
|
||||
summary = summaries[0]
|
||||
put_code = summary.get("put-code")
|
||||
if put_code is None:
|
||||
continue
|
||||
|
||||
try:
|
||||
detail = get_work_detail(orcid_id, put_code)
|
||||
except Exception:
|
||||
detail = None
|
||||
|
||||
data = PublicationNormalizer.normalize(summary, detail)
|
||||
|
||||
existing = (
|
||||
db.query(Publication)
|
||||
.filter(
|
||||
Publication.researcher_id == researcher.id,
|
||||
Publication.put_code == data["put_code"],
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing:
|
||||
if publication_changed(existing, data):
|
||||
# updated
|
||||
for field in data:
|
||||
setattr(existing, field, data[field])
|
||||
existing.last_modified = datetime.utcnow()
|
||||
existing.status = "updated"
|
||||
updated_count += 1
|
||||
else:
|
||||
# unchanged
|
||||
existing.status = "unchanged"
|
||||
unchanged_count += 1
|
||||
|
||||
pub = existing
|
||||
|
||||
else:
|
||||
# new
|
||||
pub = Publication(
|
||||
researcher_id=researcher.id,
|
||||
**data,
|
||||
last_modified=datetime.utcnow(),
|
||||
)
|
||||
pub.status = "new"
|
||||
db.add(pub)
|
||||
new_count += 1
|
||||
|
||||
db.flush()
|
||||
publications_output.append(pub)
|
||||
|
||||
researcher.last_sync_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(researcher)
|
||||
|
||||
return ResearcherWithPublicationsSchema(
|
||||
researcher=researcher,
|
||||
publications=publications_output,
|
||||
new_records=new_count,
|
||||
updated_records=updated_count,
|
||||
unchanged_records=unchanged_count,
|
||||
total_records=new_count + updated_count + unchanged_count,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from sqlalchemy.orm import declarative_base
|
||||
|
||||
Base = declarative_base()
|
||||
+41
-38
@@ -1,60 +1,63 @@
|
||||
from sqlalchemy import Column, String, Boolean, Integer, DateTime, Text, ForeignKey
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from .session import Base
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from app.db.session import Base
|
||||
|
||||
|
||||
class Researcher(Base):
|
||||
__tablename__ = "researchers"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
orcid_id = Column(String(19), unique=True, nullable=False)
|
||||
name = Column(Text)
|
||||
orcid_id = Column(String, unique=True, index=True, nullable=False)
|
||||
name = Column(String, nullable=True)
|
||||
authenticated = Column(Boolean, default=False)
|
||||
access_token = Column(Text, nullable=True)
|
||||
last_sync_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
last_sync_at = Column(DateTime, nullable=True)
|
||||
|
||||
publications = relationship(
|
||||
"Publication",
|
||||
back_populates="researcher",
|
||||
cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
sync_jobs = relationship(
|
||||
"SyncJob",
|
||||
back_populates="researcher",
|
||||
cascade="all, delete-orphan"
|
||||
)
|
||||
publications = relationship("Publication", back_populates="researcher", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class Publication(Base):
|
||||
__tablename__ = "publications"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"))
|
||||
put_code = Column(Integer)
|
||||
title = Column(Text)
|
||||
journal = Column(Text)
|
||||
doi = Column(Text)
|
||||
pub_year = Column(Integer)
|
||||
type = Column(Text)
|
||||
hash_fingerprint = Column(Text)
|
||||
last_modified = Column(DateTime(timezone=True))
|
||||
|
||||
researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"), nullable=False)
|
||||
researcher = relationship("Researcher", back_populates="publications")
|
||||
|
||||
# ORCID core
|
||||
put_code = Column(Integer, index=True, nullable=False)
|
||||
title = Column(String, nullable=True)
|
||||
subtitle = Column(String, nullable=True)
|
||||
type = Column(String, nullable=True)
|
||||
|
||||
class SyncJob(Base):
|
||||
__tablename__ = "sync_jobs"
|
||||
# Journal / container
|
||||
journal = Column(String, nullable=True)
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"))
|
||||
status = Column(String(20))
|
||||
new_records = Column(Integer, default=0)
|
||||
updated_records = Column(Integer, default=0)
|
||||
started_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
finished_at = Column(DateTime(timezone=True))
|
||||
# Dates
|
||||
pub_year = Column(Integer, nullable=True)
|
||||
pub_month = Column(Integer, nullable=True)
|
||||
pub_day = Column(Integer, nullable=True)
|
||||
|
||||
researcher = relationship("Researcher", back_populates="sync_jobs")
|
||||
# Identifiers / links
|
||||
doi = Column(String, nullable=True)
|
||||
url = Column(String, nullable=True)
|
||||
|
||||
# Description / citation
|
||||
short_description = Column(String, nullable=True)
|
||||
citation_type = Column(String, nullable=True)
|
||||
citation_value = Column(String, nullable=True)
|
||||
|
||||
# Language / country
|
||||
language_code = Column(String, nullable=True)
|
||||
country = Column(String, nullable=True)
|
||||
|
||||
# Extra structured data
|
||||
external_ids = Column(JSONB, nullable=True) # lista de external-id normalizados
|
||||
contributors = Column(JSONB, nullable=True) # lista de autores/roles
|
||||
|
||||
# Tu campo existente
|
||||
hash_fingerprint = Column(String, nullable=True)
|
||||
last_modified = Column(DateTime, nullable=True, default=None)
|
||||
|
||||
-1
@@ -1,7 +1,6 @@
|
||||
from sqlalchemy.orm import Session
|
||||
from app.db.models import Publication
|
||||
|
||||
|
||||
class PublicationRepository:
|
||||
|
||||
@staticmethod
|
||||
@@ -2,6 +2,9 @@ from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||
import os
|
||||
|
||||
# -----------------------------
|
||||
# DATABASE URL
|
||||
# -----------------------------
|
||||
DATABASE_URL = os.getenv("DATABASE_URL")
|
||||
|
||||
engine = create_engine(
|
||||
@@ -18,9 +21,24 @@ SessionLocal = sessionmaker(
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# DB SESSION DEPENDENCY
|
||||
# -----------------------------
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# INIT DB (CREA TABLAS)
|
||||
# -----------------------------
|
||||
def init_db():
|
||||
# Importa modelos para que SQLAlchemy los registre
|
||||
import app.db.models # noqa
|
||||
|
||||
# Crea todas las tablas si no existen
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
+24
-3
@@ -1,8 +1,15 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.db.session import init_db
|
||||
from app.api.researchers import router as researchers_router
|
||||
from app.db.session import Base, engine
|
||||
from app.api.export import router as export_router
|
||||
from app.scheduler.sync_scheduler import start_scheduler
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Crear instancia principal de FastAPI
|
||||
# ---------------------------------------------------------
|
||||
app = FastAPI(
|
||||
title="ORCID SWORD Backend",
|
||||
description="Backend para sincronización ORCID y exportación SWORD",
|
||||
@@ -15,7 +22,8 @@ app = FastAPI(
|
||||
# ---------------------------------------------------------
|
||||
@app.on_event("startup")
|
||||
def startup_event():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
init_db() # 🔥 CREA TABLAS
|
||||
start_scheduler() # 🔥 INICIA SCHEDULER
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
@@ -29,4 +37,17 @@ def health():
|
||||
# ---------------------------------------------------------
|
||||
# Registrar routers
|
||||
# ---------------------------------------------------------
|
||||
app.include_router(researchers_router)
|
||||
app.include_router(researchers_router, prefix="/api")
|
||||
app.include_router(export_router, prefix="/api")
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# CORS
|
||||
# ---------------------------------------------------------
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # en producción limitar
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
import requests
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from app.db.session import SessionLocal
|
||||
from app.db.repositories.researcher_repository import ResearcherRepository
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
|
||||
# Cargar variables del .env
|
||||
load_dotenv()
|
||||
|
||||
API_KEY = os.getenv("API_KEY_VALUE")
|
||||
BASE_URL = os.getenv("BASE_URL")
|
||||
|
||||
|
||||
def run_monthly_sync():
|
||||
db = SessionLocal()
|
||||
|
||||
researchers = ResearcherRepository.get_all(db)
|
||||
|
||||
for r in researchers:
|
||||
try:
|
||||
url = f"{BASE_URL}/researchers/{r.orcid_id}/sync"
|
||||
response = requests.post(
|
||||
url,
|
||||
headers={"X-API-Key": API_KEY}
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"[ERROR] Sync failed for {r.orcid_id}: {response.text}")
|
||||
else:
|
||||
print(f"[OK] Synced {r.orcid_id}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[EXCEPTION] Error syncing {r.orcid_id}: {e}")
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
def start_scheduler():
|
||||
scheduler = BackgroundScheduler()
|
||||
scheduler.add_job(run_monthly_sync, "cron", day=1, hour=3) # día 1 a las 03:00
|
||||
scheduler.start()
|
||||
@@ -1,16 +1,30 @@
|
||||
from pydantic import BaseModel
|
||||
from uuid import UUID
|
||||
from typing import Optional, List, Any
|
||||
from datetime import datetime
|
||||
|
||||
class PublicationSchema(BaseModel):
|
||||
id: UUID
|
||||
put_code: int | None = None
|
||||
title: str
|
||||
title: str | None = None
|
||||
subtitle: str | None = None
|
||||
journal: str | None = None
|
||||
doi: str | None = None
|
||||
pub_year: int | None = None
|
||||
pub_month: int | None = None
|
||||
pub_day: int | None = None
|
||||
type: str | None = None
|
||||
url: str | None = None
|
||||
short_description: str | None = None
|
||||
citation_type: str | None = None
|
||||
citation_value: str | None = None
|
||||
language_code: str | None = None
|
||||
country: str | None = None
|
||||
external_ids: List[Any] | None = None
|
||||
contributors: List[Any] | None = None
|
||||
hash_fingerprint: str | None = None
|
||||
last_modified: str | None = None
|
||||
last_modified: datetime | None = None
|
||||
status: str | None = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
from pydantic import BaseModel
|
||||
from uuid import UUID
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from app.schema.publication import PublicationSchema
|
||||
|
||||
class ResearcherSchema(BaseModel):
|
||||
id: UUID
|
||||
orcid_id: str
|
||||
name: Optional[str]
|
||||
authenticated: bool
|
||||
last_sync_at: Optional[datetime]
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class ResearcherWithPublicationsSchema(BaseModel):
|
||||
researcher: ResearcherSchema
|
||||
publications: List[PublicationSchema]
|
||||
|
||||
# NUEVOS CAMPOS
|
||||
new_records: int
|
||||
updated_records: int
|
||||
unchanged_records: int
|
||||
total_records: int
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import APIKeyHeader
|
||||
|
||||
# Cargar variables del .env
|
||||
load_dotenv()
|
||||
|
||||
API_KEY_NAME = os.getenv("API_KEY_NAME")
|
||||
API_KEY_VALUE = os.getenv("API_KEY_VALUE")
|
||||
|
||||
if not API_KEY_NAME:
|
||||
raise RuntimeError("ERROR: La variable API_KEY_NAME no está definida en el .env")
|
||||
|
||||
if not API_KEY_VALUE:
|
||||
raise RuntimeError("ERROR: La variable API_KEY_VALUE no está definida en el .env")
|
||||
|
||||
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
||||
|
||||
|
||||
def get_api_key(api_key: str = Depends(api_key_header)):
|
||||
if api_key != API_KEY_VALUE:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="API key inválida o ausente."
|
||||
)
|
||||
return api_key
|
||||
@@ -1,74 +1,111 @@
|
||||
from typing import List
|
||||
|
||||
|
||||
def _get(d: dict | None, *keys, default=None):
|
||||
cur = d or {}
|
||||
for k in keys:
|
||||
if not isinstance(cur, dict):
|
||||
return default
|
||||
cur = cur.get(k)
|
||||
if cur is None:
|
||||
return default
|
||||
return cur
|
||||
|
||||
|
||||
class PublicationNormalizer:
|
||||
|
||||
@staticmethod
|
||||
def safe_get_title(summary):
|
||||
t = summary.get("title")
|
||||
def normalize(summary: dict, detail: dict | None = None) -> dict:
|
||||
"""
|
||||
summary: work-summary de ORCID
|
||||
detail: work completo (puede ser None si la llamada falla)
|
||||
"""
|
||||
|
||||
if t is None:
|
||||
return None
|
||||
# --- Core desde summary ---
|
||||
put_code = summary.get("put-code")
|
||||
|
||||
# Caso 1: {"title": {"value": "..."}}
|
||||
if isinstance(t, dict) and "title" in t and isinstance(t["title"], dict):
|
||||
return t["title"].get("value")
|
||||
title = _get(summary, "title", "title", "value")
|
||||
type_ = summary.get("type")
|
||||
|
||||
# Caso 2: {"title": {"title": "..."}} (muy común en /works)
|
||||
if isinstance(t, dict) and "title" in t and isinstance(t["title"], str):
|
||||
return t["title"]
|
||||
journal = _get(summary, "journal-title", "value")
|
||||
|
||||
# Caso 3: {"title": "string"}
|
||||
if isinstance(t, str):
|
||||
return t
|
||||
year = _get(summary, "publication-date", "year", "value")
|
||||
month = _get(summary, "publication-date", "month", "value")
|
||||
day = _get(summary, "publication-date", "day", "value")
|
||||
|
||||
# Caso 4: {"value": "..."}
|
||||
if isinstance(t, dict) and "value" in t:
|
||||
return t["value"]
|
||||
url = _get(summary, "url", "value")
|
||||
short_description = summary.get("short-description")
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def normalize_work(summary: dict) -> dict:
|
||||
|
||||
title = PublicationNormalizer.safe_get_title(summary)
|
||||
|
||||
# Journal title
|
||||
journal_raw = summary.get("journal-title")
|
||||
if isinstance(journal_raw, dict):
|
||||
journal = journal_raw.get("value") or journal_raw.get("title")
|
||||
else:
|
||||
journal = journal_raw
|
||||
|
||||
# DOI
|
||||
# DOI desde summary (external-ids)
|
||||
doi = None
|
||||
ext_ids = summary.get("external-ids", {}).get("external-id", [])
|
||||
for ext in ext_ids:
|
||||
external_ids_list: List[dict] = _get(
|
||||
summary, "external-ids", "external-id", default=[]
|
||||
) or []
|
||||
for ext in external_ids_list:
|
||||
if ext.get("external-id-type") == "doi":
|
||||
doi = ext.get("external-id-value")
|
||||
break
|
||||
|
||||
# Publication year
|
||||
pub_year = (
|
||||
summary.get("publication-date", {})
|
||||
.get("year", {})
|
||||
.get("value")
|
||||
)
|
||||
# --- Si tenemos detail, enriquecemos ---
|
||||
subtitle = None
|
||||
citation_type = None
|
||||
citation_value = None
|
||||
language_code = None
|
||||
country = None
|
||||
external_ids_full: List[dict] | None = None
|
||||
contributors: List[dict] | None = None
|
||||
|
||||
# Type
|
||||
work_type = summary.get("type")
|
||||
if detail:
|
||||
# Subtitle
|
||||
subtitle = _get(detail, "title", "subtitle", "value") or subtitle
|
||||
|
||||
# put-code
|
||||
put_code = summary.get("put-code")
|
||||
# Citation
|
||||
citation_type = _get(detail, "citation", "citation-type")
|
||||
citation_value = _get(detail, "citation", "citation-value")
|
||||
|
||||
# Fingerprint
|
||||
fingerprint = f"{title}-{doi}-{pub_year}-{work_type}"
|
||||
if fingerprint:
|
||||
fingerprint = fingerprint.lower().replace(" ", "")
|
||||
# Language
|
||||
language_code = detail.get("language-code")
|
||||
|
||||
# Country
|
||||
country = _get(detail, "country", "value")
|
||||
|
||||
# External IDs completos
|
||||
external_ids_full = _get(
|
||||
detail, "external-ids", "external-id", default=[]
|
||||
) or []
|
||||
|
||||
# Contributors
|
||||
raw_contributors = _get(
|
||||
detail, "contributors", "contributor", default=[]
|
||||
) or []
|
||||
contributors = []
|
||||
for c in raw_contributors:
|
||||
contributors.append(
|
||||
{
|
||||
"name": _get(c, "credit-name", "value"),
|
||||
"orcid": _get(c, "contributor-orcid", "path"),
|
||||
"role": _get(
|
||||
c, "contributor-attributes", "contributor-role"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"put_code": put_code,
|
||||
"title": title or "Untitled",
|
||||
"title": title,
|
||||
"subtitle": subtitle,
|
||||
"type": type_,
|
||||
"journal": journal,
|
||||
"pub_year": int(year) if year is not None else None,
|
||||
"pub_month": int(month) if month is not None else None,
|
||||
"pub_day": int(day) if day is not None else None,
|
||||
"doi": doi,
|
||||
"pub_year": pub_year,
|
||||
"type": work_type,
|
||||
"hash_fingerprint": fingerprint
|
||||
"url": url,
|
||||
"short_description": short_description,
|
||||
"citation_type": citation_type,
|
||||
"citation_value": citation_value,
|
||||
"language_code": language_code,
|
||||
"country": country,
|
||||
"external_ids": external_ids_full,
|
||||
"contributors": contributors,
|
||||
"hash_fingerprint": None,
|
||||
}
|
||||
|
||||
@@ -1,28 +1,28 @@
|
||||
import httpx
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
TOKEN_URL_SANDBOX = "https://sandbox.orcid.org/oauth/token"
|
||||
BASE_URL_SANDBOX = "https://pub.sandbox.orcid.org/v3.0"
|
||||
|
||||
# Si en algún momento pasas a producción, cambiarías a:
|
||||
# TOKEN_URL_PROD = "https://orcid.org/oauth/token"
|
||||
# BASE_URL_PROD = "https://pub.orcid.org/v3.0"
|
||||
|
||||
|
||||
class ORCIDClient:
|
||||
|
||||
TOKEN_URL = "https://sandbox.orcid.org/oauth/token"
|
||||
BASE_URL = "https://pub.sandbox.orcid.org/v3.0"
|
||||
|
||||
# TOKEN_URL = "https://orcid.org/oauth/token"
|
||||
# BASE_URL = "https://pub.orcid.org/v3.0"
|
||||
|
||||
def __init__(self):
|
||||
self.client_id = os.getenv("ORCID_CLIENT_ID")
|
||||
self.client_secret = os.getenv("ORCID_CLIENT_SECRET")
|
||||
self._token_cache: Optional[str] = None
|
||||
self.token_url = TOKEN_URL_SANDBOX
|
||||
self.base_url = BASE_URL_SANDBOX
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 1. Obtener token público
|
||||
# ---------------------------------------------------------
|
||||
def get_public_token(self) -> str:
|
||||
"""
|
||||
Obtiene un token público de ORCID (scope: /read-public).
|
||||
Se cachea en memoria para evitar pedirlo cada vez.
|
||||
"""
|
||||
if self._token_cache:
|
||||
return self._token_cache
|
||||
|
||||
@@ -30,11 +30,11 @@ class ORCIDClient:
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"grant_type": "client_credentials",
|
||||
"scope": "/read-public"
|
||||
"scope": "/read-public",
|
||||
}
|
||||
|
||||
with httpx.Client(timeout=20.0) as client:
|
||||
response = client.post(self.TOKEN_URL, data=data)
|
||||
response = client.post(self.token_url, data=data)
|
||||
response.raise_for_status()
|
||||
token = response.json()["access_token"]
|
||||
self._token_cache = token
|
||||
@@ -43,29 +43,53 @@ class ORCIDClient:
|
||||
# ---------------------------------------------------------
|
||||
# Headers comunes
|
||||
# ---------------------------------------------------------
|
||||
def _headers(self):
|
||||
def _headers(self) -> dict:
|
||||
token = self.get_public_token()
|
||||
return {
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {token}"
|
||||
"Authorization": f"Bearer {token}",
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 2. Consultar /record
|
||||
# ---------------------------------------------------------
|
||||
def fetch_record(self, orcid_id: str) -> dict:
|
||||
url = f"{self.BASE_URL}/{orcid_id}/record"
|
||||
url = f"{self.base_url}/{orcid_id}/record"
|
||||
with httpx.Client(timeout=20.0) as client:
|
||||
response = client.get(url, headers=self._headers())
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 3. Consultar /works
|
||||
# 3. Consultar /works (summary)
|
||||
# ---------------------------------------------------------
|
||||
def fetch_works(self, orcid_id: str) -> dict:
|
||||
url = f"{self.BASE_URL}/{orcid_id}/works"
|
||||
url = f"{self.base_url}/{orcid_id}/works"
|
||||
with httpx.Client(timeout=20.0) as client:
|
||||
response = client.get(url, headers=self._headers())
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 4. Consultar /work/{put_code} (detalle)
|
||||
# ---------------------------------------------------------
|
||||
def fetch_work_detail(self, orcid_id: str, put_code: int) -> dict | None:
|
||||
url = f"{self.base_url}/{orcid_id}/work/{put_code}"
|
||||
with httpx.Client(timeout=20.0) as client:
|
||||
response = client.get(url, headers=self._headers())
|
||||
if response.status_code != 200:
|
||||
return None
|
||||
return response.json()
|
||||
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Funciones de módulo usadas en researchers.py
|
||||
# -------------------------------------------------------------------
|
||||
def get_works_summary(orcid_id: str) -> dict:
|
||||
client = ORCIDClient()
|
||||
return client.fetch_works(orcid_id)
|
||||
|
||||
|
||||
def get_work_detail(orcid_id: str, put_code: int) -> dict | None:
|
||||
client = ORCIDClient()
|
||||
return client.fetch_work_detail(orcid_id, put_code)
|
||||
|
||||
@@ -1,155 +0,0 @@
|
||||
from datetime import datetime
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
from io import BytesIO
|
||||
import zipfile
|
||||
import json
|
||||
|
||||
|
||||
class SWORDExporter:
|
||||
|
||||
ATOM_NS = "http://www.w3.org/2005/Atom"
|
||||
DC_NS = "http://purl.org/dc/elements/1.1/"
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 1) XML PRINCIPAL (sword.xml)
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def export_feed_xml(researcher, publications) -> bytes:
|
||||
feed = Element("feed", xmlns=SWORDExporter.ATOM_NS)
|
||||
|
||||
title = SubElement(feed, "title")
|
||||
title.text = f"Publications for {researcher.orcid_id}"
|
||||
|
||||
author = SubElement(feed, "author")
|
||||
name = SubElement(author, "name")
|
||||
name.text = researcher.name or "Unknown"
|
||||
|
||||
updated = SubElement(feed, "updated")
|
||||
updated.text = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
feed_id = SubElement(feed, "id")
|
||||
feed_id.text = f"urn:uuid:{researcher.id}"
|
||||
|
||||
for pub in publications:
|
||||
entry = SubElement(feed, "entry")
|
||||
|
||||
entry_id = SubElement(entry, "id")
|
||||
entry_id.text = f"urn:uuid:{pub.id}"
|
||||
|
||||
entry_updated = SubElement(entry, "updated")
|
||||
entry_updated.text = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
dc_title = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}title")
|
||||
dc_title.text = pub.title
|
||||
|
||||
if pub.doi:
|
||||
dc_identifier = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}identifier")
|
||||
dc_identifier.text = f"doi:{pub.doi}"
|
||||
|
||||
if pub.pub_year:
|
||||
dc_date = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}date")
|
||||
dc_date.text = str(pub.pub_year)
|
||||
|
||||
if pub.type:
|
||||
dc_type = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}type")
|
||||
dc_type.text = pub.type
|
||||
|
||||
if pub.journal:
|
||||
dc_source = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}source")
|
||||
dc_source.text = pub.journal
|
||||
|
||||
xml_bytes = tostring(feed, encoding="utf-8", xml_declaration=True)
|
||||
return xml_bytes
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 2) manifest.txt
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_manifest(researcher, publications) -> str:
|
||||
lines = [
|
||||
"SWORD Deposit Package",
|
||||
"----------------------",
|
||||
f"Researcher ORCID: {researcher.orcid_id}",
|
||||
f"Researcher Name: {researcher.name or 'Unknown'}",
|
||||
f"Total Publications: {len(publications)}",
|
||||
f"Generated At: {datetime.utcnow().isoformat()}Z",
|
||||
"",
|
||||
"Publications:",
|
||||
]
|
||||
|
||||
for pub in publications:
|
||||
lines.append(f"- {pub.title} ({pub.pub_year}) DOI={pub.doi}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 3) metadata.json
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_metadata_json(researcher, publications) -> str:
|
||||
data = {
|
||||
"researcher": {
|
||||
"orcid_id": researcher.orcid_id,
|
||||
"name": researcher.name,
|
||||
"id": str(researcher.id),
|
||||
},
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
"publications": [
|
||||
{
|
||||
"id": str(pub.id),
|
||||
"title": pub.title,
|
||||
"doi": pub.doi,
|
||||
"year": pub.pub_year,
|
||||
"type": pub.type,
|
||||
"journal": pub.journal,
|
||||
}
|
||||
for pub in publications
|
||||
],
|
||||
}
|
||||
return json.dumps(data, indent=4)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 4) mets.xml (versión simple)
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_mets_xml(researcher, publications) -> bytes:
|
||||
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
|
||||
|
||||
header = SubElement(mets, "metsHdr")
|
||||
agent = SubElement(header, "agent", ROLE="CREATOR", TYPE="OTHER")
|
||||
name = SubElement(agent, "name")
|
||||
name.text = "ORCID Exporter System"
|
||||
|
||||
dmd_sec = SubElement(mets, "dmdSec", ID="dmd1")
|
||||
md_wrap = SubElement(dmd_sec, "mdWrap", MDTYPE="DC")
|
||||
xml_data = SubElement(md_wrap, "xmlData")
|
||||
|
||||
for pub in publications:
|
||||
dc_title = SubElement(xml_data, f"{{{SWORDExporter.DC_NS}}}title")
|
||||
dc_title.text = pub.title
|
||||
|
||||
if pub.doi:
|
||||
dc_id = SubElement(xml_data, f"{{{SWORDExporter.DC_NS}}}identifier")
|
||||
dc_id.text = f"doi:{pub.doi}"
|
||||
|
||||
return tostring(mets, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 5) ZIP FINAL
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def export_zip(researcher, publications) -> bytes:
|
||||
xml_bytes = SWORDExporter.export_feed_xml(researcher, publications)
|
||||
manifest = SWORDExporter.generate_manifest(researcher, publications)
|
||||
metadata_json = SWORDExporter.generate_metadata_json(researcher, publications)
|
||||
mets_xml = SWORDExporter.generate_mets_xml(researcher, publications)
|
||||
|
||||
mem_file = BytesIO()
|
||||
with zipfile.ZipFile(mem_file, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("sword.xml", xml_bytes)
|
||||
zf.writestr("manifest.txt", manifest)
|
||||
zf.writestr("metadata.json", metadata_json)
|
||||
zf.writestr("mets.xml", mets_xml)
|
||||
|
||||
mem_file.seek(0)
|
||||
return mem_file.read()
|
||||
@@ -0,0 +1,112 @@
|
||||
from datetime import datetime
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
from app.db.models import Publication, Researcher
|
||||
|
||||
ATOM_NS = "http://www.w3.org/2005/Atom"
|
||||
DC_NS = "http://purl.org/dc/elements/1.1/"
|
||||
EXTRA_NS = "http://example.org/orcid-extra" # namespace para campos extendidos
|
||||
|
||||
|
||||
class SWORDGenerator:
|
||||
|
||||
@staticmethod
|
||||
def generate_feed_xml(researcher: Researcher, publications: list[Publication]) -> bytes:
|
||||
feed = Element("feed", {
|
||||
"xmlns": ATOM_NS,
|
||||
"xmlns:dc": DC_NS,
|
||||
"xmlns:extra": EXTRA_NS
|
||||
})
|
||||
|
||||
SubElement(feed, "title").text = f"Publications for {researcher.orcid_id}"
|
||||
|
||||
author = SubElement(feed, "author")
|
||||
SubElement(author, "name").text = researcher.name or "Unknown"
|
||||
|
||||
SubElement(feed, "updated").text = datetime.utcnow().isoformat() + "Z"
|
||||
SubElement(feed, "id").text = f"urn:uuid:{researcher.id}"
|
||||
|
||||
for pub in publications:
|
||||
entry = SubElement(feed, "entry")
|
||||
|
||||
SubElement(entry, "id").text = f"urn:uuid:{pub.id}"
|
||||
SubElement(entry, "updated").text = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
# Title
|
||||
SubElement(entry, f"{{{DC_NS}}}title").text = pub.title or "Untitled"
|
||||
|
||||
# Subtitle
|
||||
if pub.subtitle:
|
||||
SubElement(entry, f"{{{EXTRA_NS}}}subtitle").text = pub.subtitle
|
||||
|
||||
# DOI
|
||||
if pub.doi:
|
||||
SubElement(entry, f"{{{DC_NS}}}identifier").text = f"doi:{pub.doi}"
|
||||
|
||||
# Journal
|
||||
if pub.journal:
|
||||
SubElement(entry, f"{{{DC_NS}}}source").text = pub.journal
|
||||
|
||||
# URL
|
||||
if pub.url:
|
||||
SubElement(entry, f"{{{DC_NS}}}relation").text = pub.url
|
||||
|
||||
# Short description
|
||||
if pub.short_description:
|
||||
SubElement(entry, f"{{{DC_NS}}}description").text = pub.short_description
|
||||
|
||||
# Citation
|
||||
if pub.citation_value:
|
||||
cit = SubElement(entry, f"{{{EXTRA_NS}}}citation")
|
||||
SubElement(cit, "type").text = pub.citation_type or "unknown"
|
||||
SubElement(cit, "value").text = pub.citation_value
|
||||
|
||||
# Language
|
||||
if pub.language_code:
|
||||
SubElement(entry, f"{{{DC_NS}}}language").text = pub.language_code
|
||||
|
||||
# Country
|
||||
if pub.country:
|
||||
SubElement(entry, f"{{{EXTRA_NS}}}country").text = pub.country
|
||||
|
||||
# External IDs
|
||||
if pub.external_ids:
|
||||
ext_ids_el = SubElement(entry, f"{{{EXTRA_NS}}}external_ids")
|
||||
for ext in pub.external_ids:
|
||||
ext_el = SubElement(ext_ids_el, "external_id")
|
||||
for k, v in ext.items():
|
||||
if isinstance(v, dict) and "value" in v:
|
||||
SubElement(ext_el, k).text = v["value"]
|
||||
else:
|
||||
SubElement(ext_el, k).text = str(v)
|
||||
|
||||
# Contributors
|
||||
if pub.contributors:
|
||||
contribs_el = SubElement(entry, f"{{{EXTRA_NS}}}contributors")
|
||||
for c in pub.contributors:
|
||||
c_el = SubElement(contribs_el, "contributor")
|
||||
SubElement(c_el, "name").text = c.get("name")
|
||||
SubElement(c_el, "orcid").text = c.get("orcid")
|
||||
SubElement(c_el, "role").text = c.get("role")
|
||||
|
||||
# Date
|
||||
if pub.pub_year:
|
||||
date_str = str(pub.pub_year)
|
||||
if pub.pub_month:
|
||||
date_str += f"-{pub.pub_month:02d}"
|
||||
if pub.pub_day:
|
||||
date_str += f"-{pub.pub_day:02d}"
|
||||
SubElement(entry, f"{{{DC_NS}}}date").text = date_str
|
||||
|
||||
# Type
|
||||
if pub.type:
|
||||
SubElement(entry, f"{{{DC_NS}}}type").text = pub.type
|
||||
|
||||
# Status (new / updated / unchanged)
|
||||
if hasattr(pub, "status") and pub.status:
|
||||
SubElement(entry, f"{{{EXTRA_NS}}}status").text = pub.status
|
||||
|
||||
# Last modified
|
||||
if pub.last_modified:
|
||||
SubElement(entry, f"{{{EXTRA_NS}}}last_modified").text = pub.last_modified.isoformat()
|
||||
|
||||
return tostring(feed, encoding="utf-8", xml_declaration=True)
|
||||
@@ -1,10 +1,12 @@
|
||||
from sqlalchemy.orm import Session
|
||||
import httpx
|
||||
|
||||
from app.services.orcid_client import ORCIDClient
|
||||
from app.services.normalizer import PublicationNormalizer
|
||||
from app.repositories.researcher_repository import ResearcherRepository
|
||||
from app.repositories.publication_repository import PublicationRepository
|
||||
from app.repositories.syncjob_repository import SyncJobRepository
|
||||
import httpx
|
||||
|
||||
from app.db.repositories.researcher_repository import ResearcherRepository
|
||||
from app.db.repositories.publication_repository import PublicationRepository
|
||||
from app.db.repositories.syncjob_repository import SyncJobRepository
|
||||
|
||||
|
||||
class SyncService:
|
||||
@@ -16,8 +18,6 @@ class SyncService:
|
||||
"""
|
||||
Sincroniza las publicaciones de un investigador con manejo robusto de errores.
|
||||
"""
|
||||
|
||||
# 1. Obtener o crear investigador
|
||||
try:
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
|
||||
@@ -35,14 +35,23 @@ class SyncService:
|
||||
if e.response.status_code == 404:
|
||||
return {
|
||||
"status": "error",
|
||||
"message": f"El ORCID {orcid_id} no existe en Sandbox."
|
||||
"code": 404,
|
||||
"message": f"El ORCID {orcid_id} no existe en ORCID."
|
||||
}
|
||||
return {"status": "error", "message": str(e)}
|
||||
return {
|
||||
"status": "error",
|
||||
"code": e.response.status_code,
|
||||
"message": f"Error al consultar ORCID: {str(e)}"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "error",
|
||||
"code": 500,
|
||||
"message": f"Error interno durante la sincronización: {str(e)}"
|
||||
}
|
||||
|
||||
# 2. Crear SyncJob
|
||||
job = SyncJobRepository.start_job(db, researcher.id)
|
||||
|
||||
# 3. Obtener works
|
||||
try:
|
||||
works_raw = self.orcid_client.fetch_works(orcid_id)
|
||||
except httpx.HTTPStatusError as e:
|
||||
@@ -56,19 +65,27 @@ class SyncService:
|
||||
"updated_records": 0,
|
||||
"total": 0
|
||||
}
|
||||
return {"status": "error", "message": str(e)}
|
||||
return {
|
||||
"status": "error",
|
||||
"code": e.response.status_code,
|
||||
"message": f"Error al obtener works de ORCID: {str(e)}"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "error",
|
||||
"code": 500,
|
||||
"message": f"Error interno al obtener works: {str(e)}"
|
||||
}
|
||||
|
||||
groups = works_raw.get("group", [])
|
||||
|
||||
new_records = 0
|
||||
updated_records = 0
|
||||
|
||||
# 4. Procesar works
|
||||
for group in groups:
|
||||
summary = group["work-summary"][0]
|
||||
normalized = PublicationNormalizer.normalize_work(summary)
|
||||
|
||||
# 🔥 AHORA SE DETECTAN DUPLICADOS POR put_code
|
||||
existing = PublicationRepository.get_by_put_code(
|
||||
db, researcher.id, normalized["put_code"]
|
||||
)
|
||||
@@ -80,17 +97,40 @@ class SyncService:
|
||||
PublicationRepository.create(db, researcher.id, normalized)
|
||||
new_records += 1
|
||||
|
||||
# 5. Finalizar SyncJob
|
||||
SyncJobRepository.finish_job(db, job, new_records, updated_records)
|
||||
|
||||
# 6. Actualizar last_sync_at
|
||||
ResearcherRepository.update_last_sync(db, researcher)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": "Sincronización completada correctamente.",
|
||||
"researcher": researcher.orcid_id,
|
||||
"researcher_id": researcher.id,
|
||||
"new_records": new_records,
|
||||
"updated_records": updated_records,
|
||||
"total": new_records + updated_records
|
||||
}
|
||||
|
||||
def sync_and_get_full(self, db: Session, orcid_id: str):
|
||||
"""
|
||||
Sincroniza (si es necesario) y devuelve investigador + publicaciones.
|
||||
Pensado para el buscador: una sola petición.
|
||||
"""
|
||||
sync_result = self.sync_researcher(db, orcid_id)
|
||||
|
||||
if sync_result.get("status") == "error":
|
||||
return sync_result
|
||||
|
||||
researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
|
||||
if not researcher:
|
||||
return {
|
||||
"status": "error",
|
||||
"code": 500,
|
||||
"message": "Error interno: investigador no encontrado tras sincronización."
|
||||
}
|
||||
|
||||
publications = PublicationRepository.list_by_researcher(db, researcher.id)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"researcher": researcher,
|
||||
"publications": publications
|
||||
}
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
import io
|
||||
import zipfile
|
||||
import json
|
||||
from datetime import datetime
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
from app.db.models import Publication, Researcher
|
||||
from app.services.sword_generator import SWORDGenerator
|
||||
|
||||
|
||||
class ZIPGenerator:
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# MANIFEST.TXT — más completo
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_manifest(researcher, publications):
|
||||
lines = [
|
||||
"SWORD Deposit Package",
|
||||
"----------------------",
|
||||
f"Researcher ORCID: {researcher.orcid_id}",
|
||||
f"Researcher Name: {researcher.name}",
|
||||
f"Researcher UUID: {researcher.id}",
|
||||
f"Total Publications: {len(publications)}",
|
||||
f"Generated At: {datetime.utcnow().isoformat()}Z",
|
||||
"",
|
||||
"Publications:",
|
||||
]
|
||||
|
||||
for pub in publications:
|
||||
year = pub.pub_year or "Unknown"
|
||||
lines.append(
|
||||
f"- {pub.title} ({year}) | DOI={pub.doi} | TYPE={pub.type}"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# METADATA.JSON — ahora con TODOS los campos
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_metadata_json(researcher, publications):
|
||||
data = {
|
||||
"researcher": {
|
||||
"orcid_id": researcher.orcid_id,
|
||||
"name": researcher.name,
|
||||
"id": str(researcher.id),
|
||||
"last_sync_at": researcher.last_sync_at.isoformat() if researcher.last_sync_at else None,
|
||||
},
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
"publications": [],
|
||||
}
|
||||
|
||||
for pub in publications:
|
||||
data["publications"].append({
|
||||
"id": str(pub.id),
|
||||
"put_code": pub.put_code,
|
||||
"title": pub.title,
|
||||
"subtitle": pub.subtitle,
|
||||
"doi": pub.doi,
|
||||
"journal": pub.journal,
|
||||
"type": pub.type,
|
||||
"url": pub.url,
|
||||
"short_description": pub.short_description,
|
||||
"citation_type": pub.citation_type,
|
||||
"citation_value": pub.citation_value,
|
||||
"language_code": pub.language_code,
|
||||
"country": pub.country,
|
||||
"pub_year": pub.pub_year,
|
||||
"pub_month": pub.pub_month,
|
||||
"pub_day": pub.pub_day,
|
||||
"external_ids": pub.external_ids,
|
||||
"contributors": pub.contributors,
|
||||
"hash_fingerprint": pub.hash_fingerprint,
|
||||
"last_modified": pub.last_modified.isoformat() if pub.last_modified else None,
|
||||
"status": getattr(pub, "status", None),
|
||||
})
|
||||
|
||||
return json.dumps(data, indent=4)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# METS.XML — ampliado con más metadatos
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_mets_xml(researcher, publications):
|
||||
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
|
||||
|
||||
header = SubElement(mets, "metsHdr")
|
||||
agent = SubElement(header, "agent", ROLE="CREATOR", TYPE="OTHER")
|
||||
SubElement(agent, "name").text = "ORCID Exporter System"
|
||||
|
||||
dmd_sec = SubElement(mets, "dmdSec", ID="dmd1")
|
||||
md_wrap = SubElement(dmd_sec, "mdWrap", MDTYPE="DC")
|
||||
xml_data = SubElement(md_wrap, "xmlData")
|
||||
|
||||
for pub in publications:
|
||||
# Title
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
|
||||
|
||||
# Subtitle
|
||||
if pub.subtitle:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
|
||||
|
||||
# DOI
|
||||
if pub.doi:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
|
||||
|
||||
# Journal
|
||||
if pub.journal:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
|
||||
|
||||
# URL
|
||||
if pub.url:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
|
||||
|
||||
# Description
|
||||
if pub.short_description:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
|
||||
|
||||
# Citation
|
||||
if pub.citation_value:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value
|
||||
|
||||
# Language
|
||||
if pub.language_code:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
|
||||
|
||||
# Country
|
||||
if pub.country:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
|
||||
|
||||
# Date
|
||||
if pub.pub_year:
|
||||
date_str = str(pub.pub_year)
|
||||
if pub.pub_month:
|
||||
date_str += f"-{pub.pub_month:02d}"
|
||||
if pub.pub_day:
|
||||
date_str += f"-{pub.pub_day:02d}"
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
|
||||
|
||||
# Type
|
||||
if pub.type:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
|
||||
|
||||
return tostring(mets, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# ZIP FINAL
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_zip(researcher, publications):
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications)
|
||||
manifest = ZIPGenerator.generate_manifest(researcher, publications)
|
||||
metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
|
||||
mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
|
||||
|
||||
mem_file = io.BytesIO()
|
||||
with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("sword.xml", xml_bytes)
|
||||
zf.writestr("manifest.txt", manifest)
|
||||
zf.writestr("metadata.json", metadata_json)
|
||||
zf.writestr("mets.xml", mets_xml)
|
||||
|
||||
mem_file.seek(0)
|
||||
return mem_file.read()
|
||||
@@ -8,4 +8,6 @@ python-dotenv
|
||||
lxml
|
||||
apscheduler
|
||||
authlib
|
||||
redis
|
||||
redis
|
||||
APScheduler==3.10.4
|
||||
requests
|
||||
|
||||
Reference in New Issue
Block a user