Versión 3 Backend - Endpoints finales corregidos

2026-04-27 13:39:32 +02:00
parent a286c2e3ae
commit 96f01c0126
4343 changed files with 1046097 additions and 465 deletions
@@ -0,0 +1,101 @@
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import Response
+from sqlalchemy.orm import Session
+from uuid import UUID
+
+from app.db.session import get_db
+from app.db.models import Publication, Researcher
+from app.security.api_key import get_api_key
+from app.services.sword_generator import SWORDGenerator
+from app.services.zip_generator import ZIPGenerator
+
+router = APIRouter(prefix="/export")
+
+
+def validate_uuid_list(pub_ids: list[str]) -> list[UUID]:
+    valid_ids = []
+    for pid in pub_ids:
+        try:
+            valid_ids.append(UUID(pid))
+        except Exception:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid publication ID (not UUID): {pid}"
+            )
+    return valid_ids
+
+
+@router.post("/sword/publications")
+async def export_multiple_sword(
+    pub_ids: list[str],
+    db: Session = Depends(get_db),
+    api_key: str = Depends(get_api_key)
+):
+    validate_uuid_list(pub_ids)
+
+    pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
+
+    if not pubs:
+        raise HTTPException(status_code=404, detail="No publications found")
+
+    researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
+
+    xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
+    return Response(content=xml_bytes, media_type="application/xml")
+
+
+@router.get("/sword/researcher/{orcid_id}")
+async def export_researcher_sword(
+    orcid_id: str,
+    db: Session = Depends(get_db),
+    api_key: str = Depends(get_api_key)
+):
+    researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
+    if not researcher:
+        raise HTTPException(status_code=404, detail="Researcher not found")
+
+    pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
+
+    if not pubs:
+        raise HTTPException(status_code=404, detail="No publications found for this researcher")
+
+    xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
+    return Response(content=xml_bytes, media_type="application/xml")
+
+
+@router.post("/zip/publications")
+async def export_multiple_zip(
+    pub_ids: list[str],
+    db: Session = Depends(get_db),
+    api_key: str = Depends(get_api_key)
+):
+    validate_uuid_list(pub_ids)
+
+    pubs = db.query(Publication).filter(Publication.id.in_(pub_ids)).all()
+
+    if not pubs:
+        raise HTTPException(status_code=404, detail="No publications found")
+
+    researcher = db.query(Researcher).filter_by(id=pubs[0].researcher_id).first()
+
+    zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
+    return Response(content=zip_bytes, media_type="application/zip")
+
+
+@router.get("/zip/researcher/{orcid_id}")
+async def export_researcher_zip(
+    orcid_id: str,
+    db: Session = Depends(get_db),
+    api_key: str = Depends(get_api_key)
+):
+    researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
+    if not researcher:
+        raise HTTPException(status_code=404, detail="Researcher not found")
+
+    pubs = db.query(Publication).filter_by(researcher_id=researcher.id).all()
+
+    if not pubs:
+        raise HTTPException(status_code=404, detail="No publications found for this researcher")
+
+    zip_bytes = ZIPGenerator.generate_zip(researcher, pubs)
+    return Response(content=zip_bytes, media_type="application/zip")
@@ -1,120 +1,208 @@
+from datetime import datetime
+from typing import List
+
 from fastapi import APIRouter, Depends, HTTPException
-from fastapi.responses import Response
 from sqlalchemy.orm import Session
-from app.schema.publication import PublicationSchema
+
+from app.db.models import Publication, Researcher
 from app.db.session import get_db
-from app.repositories.researcher_repository import ResearcherRepository
-from app.repositories.publication_repository import PublicationRepository
-from app.services.sync_service import SyncService
-from app.services.sword_exporter import SWORDExporter
-from app.utils.orcid_validator import is_valid_orcid
+from app.schema.researcher import ResearcherWithPublicationsSchema
+from app.services.normalizer import PublicationNormalizer
+from app.services.orcid_client import get_works_summary, get_work_detail

 router = APIRouter(prefix="/researchers", tags=["researchers"])


-def validate_orcid_or_400(orcid_id: str):
-    if not is_valid_orcid(orcid_id):
-        raise HTTPException(
-            status_code=400,
-            detail=f"ORCID ID '{orcid_id}' no es válido según el formato y dígito de control."
+# ---------------------------------------------------------
+# Función auxiliar: detectar si una publicación ha cambiado
+# ---------------------------------------------------------
+def publication_changed(existing: Publication, data: dict) -> bool:
+    fields = [
+        "title", "subtitle", "type", "journal",
+        "pub_year", "pub_month", "pub_day",
+        "doi", "url", "short_description",
+        "citation_type", "citation_value",
+        "language_code", "country",
+        "external_ids", "contributors"
+    ]
+
+    for f in fields:
+        if getattr(existing, f) != data[f]:
+            return True
+    return False
+
+
+# ---------------------------------------------------------
+# ENDPOINT 1: SEARCH + SYNC (sin contadores)
+# ---------------------------------------------------------
+@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
+def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
+    # Buscar o crear Researcher
+    researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
+    if not researcher:
+        researcher = Researcher(
+            orcid_id=orcid_id,
+            name=None,
+            authenticated=False,
+            last_sync_at=None,
+        )
+        db.add(researcher)
+        db.flush()
+
+    # Obtener works summary desde ORCID
+    works = get_works_summary(orcid_id)
+    groups = works.get("group", [])
+
+    publications: List[Publication] = []
+
+    for g in groups:
+        summaries = g.get("work-summary") or []
+        if not summaries:
+            continue
+
+        summary = summaries[0]
+        put_code = summary.get("put-code")
+        if put_code is None:
+            continue
+
+        # Obtener detalle del work
+        try:
+            detail = get_work_detail(orcid_id, put_code)
+        except Exception:
+            detail = None
+
+        # Normalizar datos
+        data = PublicationNormalizer.normalize(summary, detail)
+
+        # Ver si ya existe la publicación
+        existing = (
+            db.query(Publication)
+            .filter(
+                Publication.researcher_id == researcher.id,
+                Publication.put_code == data["put_code"],
+            )
+            .first()
        )

+        if existing:
+            for field in [
+                "title", "subtitle", "type", "journal",
+                "pub_year", "pub_month", "pub_day",
+                "doi", "url", "short_description",
+                "citation_type", "citation_value",
+                "language_code", "country",
+                "external_ids", "contributors"
+            ]:
+                setattr(existing, field, data[field])
+            existing.last_modified = datetime.utcnow()
+            existing.status = None
+            publications.append(existing)
+        else:
+            pub = Publication(
+                researcher_id=researcher.id,
+                **data,
+                last_modified=datetime.utcnow(),
+            )
+            pub.status = None
+            db.add(pub)
+            publications.append(pub)

-@router.post("/", response_model=dict)
-def create_researcher(orcid_id: str, db: Session = Depends(get_db)):
-    validate_orcid_or_400(orcid_id)
+    researcher.last_sync_at = datetime.utcnow()
+    db.commit()
+    db.refresh(researcher)

-    existing = ResearcherRepository.get_by_orcid(db, orcid_id)
-    if existing:
-        return {
-            "status": "ok",
-            "message": "Researcher ya existe.",
-            "orcid_id": existing.orcid_id,
-            "id": existing.id
-        }
-
-    # Aquí podrías opcionalmente validar que el ORCID existe en ORCID API
-    researcher = ResearcherRepository.create(db, orcid_id, name=None)
-
-    return {
-        "status": "ok",
-        "message": "Researcher creado correctamente.",
-        "orcid_id": researcher.orcid_id,
-        "id": researcher.id
-    }
+    return ResearcherWithPublicationsSchema(
+        researcher=researcher,
+        publications=publications,
+        new_records=0,
+        updated_records=0,
+        unchanged_records=0,
+        total_records=len(publications),
+    )


-@router.get("/{orcid_id}", response_model=dict)
-def get_researcher(orcid_id: str, db: Session = Depends(get_db)):
-    validate_orcid_or_400(orcid_id)
-
-    researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
-    if not researcher:
-        raise HTTPException(status_code=404, detail="Researcher not found")
-
-    return {
-        "orcid_id": researcher.orcid_id,
-        "name": researcher.name,
-        "authenticated": researcher.authenticated,
-        "access_token": researcher.access_token,
-        "id": researcher.id,
-        "last_sync_at": researcher.last_sync_at,
-    }
-
-
-@router.post("/{orcid_id}/sync", response_model=dict)
+# ---------------------------------------------------------
+# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
+# ---------------------------------------------------------
+@router.post("/{orcid_id}/sync", response_model=ResearcherWithPublicationsSchema)
 def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
-    validate_orcid_or_400(orcid_id)
-
-    service = SyncService()
-    result = service.sync_researcher(db, orcid_id)
-    return result
-
-
-@router.get("/{orcid_id}/publications", response_model=list[PublicationSchema], tags=["researchers"])
-def get_publications(orcid_id: str, db: Session = Depends(get_db)):
-    researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
-    if not researcher:
-        raise HTTPException(status_code=404, detail="Researcher not found")
-    return researcher.publications
-
-
-
-@router.get("/{orcid_id}/export/sword.xml")
-def export_sword_xml(orcid_id: str, db: Session = Depends(get_db)):
-    validate_orcid_or_400(orcid_id)
-
-    researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
+    researcher = db.query(Researcher).filter_by(orcid_id=orcid_id).first()
    if not researcher:
        raise HTTPException(status_code=404, detail="Researcher not found")

-    pubs = PublicationRepository.list_by_researcher(db, researcher.id)
-    xml_bytes = SWORDExporter.export_feed_xml(researcher, pubs)
+    works = get_works_summary(orcid_id)
+    groups = works.get("group", [])

-    return Response(
-        content=xml_bytes,
-        media_type="application/xml",
-        headers={
-            "Content-Disposition": f'attachment; filename="sword_{orcid_id}.xml"'
-        }
-    )
-
-
-@router.get("/{orcid_id}/export/sword.zip")
-def export_sword_zip(orcid_id: str, db: Session = Depends(get_db)):
-    validate_orcid_or_400(orcid_id)
-
-    researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
-    if not researcher:
-        raise HTTPException(status_code=404, detail="Researcher not found")
-
-    pubs = PublicationRepository.list_by_researcher(db, researcher.id)
-    zip_bytes = SWORDExporter.export_zip(researcher, pubs)
-
-    return Response(
-        content=zip_bytes,
-        media_type="application/zip",
-        headers={
-            "Content-Disposition": f'attachment; filename="sword_{orcid_id}.zip"'
-        }
+    publications_output = []
+
+    new_count = 0
+    updated_count = 0
+    unchanged_count = 0
+
+    for g in groups:
+        summaries = g.get("work-summary") or []
+        if not summaries:
+            continue
+
+        summary = summaries[0]
+        put_code = summary.get("put-code")
+        if put_code is None:
+            continue
+
+        try:
+            detail = get_work_detail(orcid_id, put_code)
+        except Exception:
+            detail = None
+
+        data = PublicationNormalizer.normalize(summary, detail)
+
+        existing = (
+            db.query(Publication)
+            .filter(
+                Publication.researcher_id == researcher.id,
+                Publication.put_code == data["put_code"],
+            )
+            .first()
+        )
+
+        if existing:
+            if publication_changed(existing, data):
+                # updated
+                for field in data:
+                    setattr(existing, field, data[field])
+                existing.last_modified = datetime.utcnow()
+                existing.status = "updated"
+                updated_count += 1
+            else:
+                # unchanged
+                existing.status = "unchanged"
+                unchanged_count += 1
+
+            pub = existing
+
+        else:
+            # new
+            pub = Publication(
+                researcher_id=researcher.id,
+                **data,
+                last_modified=datetime.utcnow(),
+            )
+            pub.status = "new"
+            db.add(pub)
+            new_count += 1
+
+        db.flush()
+        publications_output.append(pub)
+
+    researcher.last_sync_at = datetime.utcnow()
+    db.commit()
+    db.refresh(researcher)
+
+    return ResearcherWithPublicationsSchema(
+        researcher=researcher,
+        publications=publications_output,
+        new_records=new_count,
+        updated_records=updated_count,
+        unchanged_records=unchanged_count,
+        total_records=new_count + updated_count + unchanged_count,
    )
@@ -0,0 +1,3 @@
+from sqlalchemy.orm import declarative_base
+
+Base = declarative_base()
@@ -1,60 +1,63 @@
-from sqlalchemy import Column, String, Boolean, Integer, DateTime, Text, ForeignKey
-from sqlalchemy.dialects.postgresql import UUID
-from sqlalchemy.sql import func
+from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey
+from sqlalchemy.dialects.postgresql import UUID, JSONB
 from sqlalchemy.orm import relationship
-from .session import Base
 import uuid
+from datetime import datetime
+
+from app.db.session import Base


 class Researcher(Base):
    __tablename__ = "researchers"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    orcid_id = Column(String(19), unique=True, nullable=False)
-    name = Column(Text)
+    orcid_id = Column(String, unique=True, index=True, nullable=False)
+    name = Column(String, nullable=True)
    authenticated = Column(Boolean, default=False)
-    access_token = Column(Text, nullable=True)
-    last_sync_at = Column(DateTime(timezone=True), server_default=func.now())
+    last_sync_at = Column(DateTime, nullable=True)

-    publications = relationship(
-        "Publication",
-        back_populates="researcher",
-        cascade="all, delete-orphan"
-    )
-
-    sync_jobs = relationship(
-        "SyncJob",
-        back_populates="researcher",
-        cascade="all, delete-orphan"
-    )
+    publications = relationship("Publication", back_populates="researcher", cascade="all, delete-orphan")


 class Publication(Base):
    __tablename__ = "publications"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"))
-    put_code = Column(Integer)
-    title = Column(Text)
-    journal = Column(Text)
-    doi = Column(Text)
-    pub_year = Column(Integer)
-    type = Column(Text)
-    hash_fingerprint = Column(Text)
-    last_modified = Column(DateTime(timezone=True))

+    researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"), nullable=False)
    researcher = relationship("Researcher", back_populates="publications")

+    # ORCID core
+    put_code = Column(Integer, index=True, nullable=False)
+    title = Column(String, nullable=True)
+    subtitle = Column(String, nullable=True)
+    type = Column(String, nullable=True)

-class SyncJob(Base):
-    __tablename__ = "sync_jobs"
+    # Journal / container
+    journal = Column(String, nullable=True)

-    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    researcher_id = Column(UUID(as_uuid=True), ForeignKey("researchers.id"))
-    status = Column(String(20))
-    new_records = Column(Integer, default=0)
-    updated_records = Column(Integer, default=0)
-    started_at = Column(DateTime(timezone=True), server_default=func.now())
-    finished_at = Column(DateTime(timezone=True))
+    # Dates
+    pub_year = Column(Integer, nullable=True)
+    pub_month = Column(Integer, nullable=True)
+    pub_day = Column(Integer, nullable=True)

-    researcher = relationship("Researcher", back_populates="sync_jobs")
+    # Identifiers / links
+    doi = Column(String, nullable=True)
+    url = Column(String, nullable=True)
+
+    # Description / citation
+    short_description = Column(String, nullable=True)
+    citation_type = Column(String, nullable=True)
+    citation_value = Column(String, nullable=True)
+
+    # Language / country
+    language_code = Column(String, nullable=True)
+    country = Column(String, nullable=True)
+
+    # Extra structured data
+    external_ids = Column(JSONB, nullable=True)   # lista de external-id normalizados
+    contributors = Column(JSONB, nullable=True)   # lista de autores/roles
+
+    # Tu campo existente
+    hash_fingerprint = Column(String, nullable=True)
+    last_modified = Column(DateTime, nullable=True, default=None)
@@ -1,7 +1,6 @@
 from sqlalchemy.orm import Session
 from app.db.models import Publication

-
 class PublicationRepository:

    @staticmethod
@@ -2,6 +2,9 @@ from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker, declarative_base
 import os

+# -----------------------------
+# DATABASE URL
+# -----------------------------
 DATABASE_URL = os.getenv("DATABASE_URL")

 engine = create_engine(
@@ -18,9 +21,24 @@ SessionLocal = sessionmaker(

 Base = declarative_base()

+
+# -----------------------------
+# DB SESSION DEPENDENCY
+# -----------------------------
 def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()
+
+
+# -----------------------------
+# INIT DB (CREA TABLAS)
+# -----------------------------
+def init_db():
+    # Importa modelos para que SQLAlchemy los registre
+    import app.db.models  # noqa
+
+    # Crea todas las tablas si no existen
+    Base.metadata.create_all(bind=engine)
@@ -1,8 +1,15 @@
 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.db.session import init_db
 from app.api.researchers import router as researchers_router
-from app.db.session import Base, engine
+from app.api.export import router as export_router
+from app.scheduler.sync_scheduler import start_scheduler


+# ---------------------------------------------------------
+# Crear instancia principal de FastAPI
+# ---------------------------------------------------------
 app = FastAPI(
    title="ORCID SWORD Backend",
    description="Backend para sincronización ORCID y exportación SWORD",
@@ -15,7 +22,8 @@ app = FastAPI(
 # ---------------------------------------------------------
@app.on_event("startup")
 def startup_event():
-    Base.metadata.create_all(bind=engine)
+    init_db()          # 🔥 CREA TABLAS
+    start_scheduler()  # 🔥 INICIA SCHEDULER


 # ---------------------------------------------------------
@@ -29,4 +37,17 @@ def health():
 # ---------------------------------------------------------
 # Registrar routers
 # ---------------------------------------------------------
-app.include_router(researchers_router)
+app.include_router(researchers_router, prefix="/api")
+app.include_router(export_router, prefix="/api")
+
+
+# ---------------------------------------------------------
+# CORS
+# ---------------------------------------------------------
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # en producción limitar
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
@@ -0,0 +1,43 @@
+import requests
+from apscheduler.schedulers.background import BackgroundScheduler
+from app.db.session import SessionLocal
+from app.db.repositories.researcher_repository import ResearcherRepository
+from dotenv import load_dotenv
+import os
+
+
+# Cargar variables del .env
+load_dotenv()
+
+API_KEY = os.getenv("API_KEY_VALUE")
+BASE_URL = os.getenv("BASE_URL")
+
+
+def run_monthly_sync():
+    db = SessionLocal()
+
+    researchers = ResearcherRepository.get_all(db)
+
+    for r in researchers:
+        try:
+            url = f"{BASE_URL}/researchers/{r.orcid_id}/sync"
+            response = requests.post(
+                url,
+                headers={"X-API-Key": API_KEY}
+            )
+
+            if response.status_code != 200:
+                print(f"[ERROR] Sync failed for {r.orcid_id}: {response.text}")
+            else:
+                print(f"[OK] Synced {r.orcid_id}")
+
+        except Exception as e:
+            print(f"[EXCEPTION] Error syncing {r.orcid_id}: {e}")
+
+    db.close()
+
+
+def start_scheduler():
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(run_monthly_sync, "cron", day=1, hour=3)  # día 1 a las 03:00
+    scheduler.start()
@@ -1,16 +1,30 @@
 from pydantic import BaseModel
 from uuid import UUID
+from typing import Optional, List, Any
+from datetime import datetime

 class PublicationSchema(BaseModel):
    id: UUID
    put_code: int | None = None
-    title: str
+    title: str | None = None
+    subtitle: str | None = None
    journal: str | None = None
    doi: str | None = None
    pub_year: int | None = None
+    pub_month: int | None = None
+    pub_day: int | None = None
    type: str | None = None
+    url: str | None = None
+    short_description: str | None = None
+    citation_type: str | None = None
+    citation_value: str | None = None
+    language_code: str | None = None
+    country: str | None = None
+    external_ids: List[Any] | None = None
+    contributors: List[Any] | None = None
    hash_fingerprint: str | None = None
-    last_modified: str | None = None
+    last_modified: datetime | None = None
+    status: str | None = None

    class Config:
        from_attributes = True
@@ -0,0 +1,27 @@
+from pydantic import BaseModel
+from uuid import UUID
+from typing import Optional, List
+from datetime import datetime
+from app.schema.publication import PublicationSchema
+
+class ResearcherSchema(BaseModel):
+    id: UUID
+    orcid_id: str
+    name: Optional[str]
+    authenticated: bool
+    last_sync_at: Optional[datetime]
+
+    model_config = {"from_attributes": True}
+
+
+class ResearcherWithPublicationsSchema(BaseModel):
+    researcher: ResearcherSchema
+    publications: List[PublicationSchema]
+
+    # NUEVOS CAMPOS
+    new_records: int
+    updated_records: int
+    unchanged_records: int
+    total_records: int
+
+    model_config = {"from_attributes": True}
@@ -0,0 +1,27 @@
+import os
+from dotenv import load_dotenv
+from fastapi import Depends, HTTPException, status
+from fastapi.security import APIKeyHeader
+
+# Cargar variables del .env
+load_dotenv()
+
+API_KEY_NAME = os.getenv("API_KEY_NAME")
+API_KEY_VALUE = os.getenv("API_KEY_VALUE")
+
+if not API_KEY_NAME:
+    raise RuntimeError("ERROR: La variable API_KEY_NAME no está definida en el .env")
+
+if not API_KEY_VALUE:
+    raise RuntimeError("ERROR: La variable API_KEY_VALUE no está definida en el .env")
+
+api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
+
+
+def get_api_key(api_key: str = Depends(api_key_header)):
+    if api_key != API_KEY_VALUE:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="API key inválida o ausente."
+        )
+    return api_key
@@ -1,74 +1,111 @@
+from typing import List
+
+
+def _get(d: dict | None, *keys, default=None):
+    cur = d or {}
+    for k in keys:
+        if not isinstance(cur, dict):
+            return default
+        cur = cur.get(k)
+        if cur is None:
+            return default
+    return cur
+
+
 class PublicationNormalizer:
-
    @staticmethod
-    def safe_get_title(summary):
-        t = summary.get("title")
+    def normalize(summary: dict, detail: dict | None = None) -> dict:
+        """
+        summary: work-summary de ORCID
+        detail: work completo (puede ser None si la llamada falla)
+        """

-        if t is None:
-            return None
+        # --- Core desde summary ---
+        put_code = summary.get("put-code")

-        # Caso 1: {"title": {"value": "..."}}
-        if isinstance(t, dict) and "title" in t and isinstance(t["title"], dict):
-            return t["title"].get("value")
+        title = _get(summary, "title", "title", "value")
+        type_ = summary.get("type")

-        # Caso 2: {"title": {"title": "..."}} (muy común en /works)
-        if isinstance(t, dict) and "title" in t and isinstance(t["title"], str):
-            return t["title"]
+        journal = _get(summary, "journal-title", "value")

-        # Caso 3: {"title": "string"}
-        if isinstance(t, str):
-            return t
+        year = _get(summary, "publication-date", "year", "value")
+        month = _get(summary, "publication-date", "month", "value")
+        day = _get(summary, "publication-date", "day", "value")

-        # Caso 4: {"value": "..."}
-        if isinstance(t, dict) and "value" in t:
-            return t["value"]
+        url = _get(summary, "url", "value")
+        short_description = summary.get("short-description")

-        return None
-
-    @staticmethod
-    def normalize_work(summary: dict) -> dict:
-
-        title = PublicationNormalizer.safe_get_title(summary)
-
-        # Journal title
-        journal_raw = summary.get("journal-title")
-        if isinstance(journal_raw, dict):
-            journal = journal_raw.get("value") or journal_raw.get("title")
-        else:
-            journal = journal_raw
-
-        # DOI
+        # DOI desde summary (external-ids)
        doi = None
-        ext_ids = summary.get("external-ids", {}).get("external-id", [])
-        for ext in ext_ids:
+        external_ids_list: List[dict] = _get(
+            summary, "external-ids", "external-id", default=[]
+        ) or []
+        for ext in external_ids_list:
            if ext.get("external-id-type") == "doi":
                doi = ext.get("external-id-value")
                break

-        # Publication year
-        pub_year = (
-            summary.get("publication-date", {})
-                   .get("year", {})
-                   .get("value")
-        )
+        # --- Si tenemos detail, enriquecemos ---
+        subtitle = None
+        citation_type = None
+        citation_value = None
+        language_code = None
+        country = None
+        external_ids_full: List[dict] | None = None
+        contributors: List[dict] | None = None

-        # Type
-        work_type = summary.get("type")
+        if detail:
+            # Subtitle
+            subtitle = _get(detail, "title", "subtitle", "value") or subtitle

-        # put-code
-        put_code = summary.get("put-code")
+            # Citation
+            citation_type = _get(detail, "citation", "citation-type")
+            citation_value = _get(detail, "citation", "citation-value")

-        # Fingerprint
-        fingerprint = f"{title}-{doi}-{pub_year}-{work_type}"
-        if fingerprint:
-            fingerprint = fingerprint.lower().replace(" ", "")
+            # Language
+            language_code = detail.get("language-code")
+
+            # Country
+            country = _get(detail, "country", "value")
+
+            # External IDs completos
+            external_ids_full = _get(
+                detail, "external-ids", "external-id", default=[]
+            ) or []
+
+            # Contributors
+            raw_contributors = _get(
+                detail, "contributors", "contributor", default=[]
+            ) or []
+            contributors = []
+            for c in raw_contributors:
+                contributors.append(
+                    {
+                        "name": _get(c, "credit-name", "value"),
+                        "orcid": _get(c, "contributor-orcid", "path"),
+                        "role": _get(
+                            c, "contributor-attributes", "contributor-role"
+                        ),
+                    }
+                )

        return {
            "put_code": put_code,
-            "title": title or "Untitled",
+            "title": title,
+            "subtitle": subtitle,
+            "type": type_,
            "journal": journal,
+            "pub_year": int(year) if year is not None else None,
+            "pub_month": int(month) if month is not None else None,
+            "pub_day": int(day) if day is not None else None,
            "doi": doi,
-            "pub_year": pub_year,
-            "type": work_type,
-            "hash_fingerprint": fingerprint
+            "url": url,
+            "short_description": short_description,
+            "citation_type": citation_type,
+            "citation_value": citation_value,
+            "language_code": language_code,
+            "country": country,
+            "external_ids": external_ids_full,
+            "contributors": contributors,
+            "hash_fingerprint": None,
        }
@@ -1,28 +1,28 @@
-import httpx
 import os
 from typing import Optional

+import httpx
+
+TOKEN_URL_SANDBOX = "https://sandbox.orcid.org/oauth/token"
+BASE_URL_SANDBOX = "https://pub.sandbox.orcid.org/v3.0"
+
+# Si en algún momento pasas a producción, cambiarías a:
+# TOKEN_URL_PROD = "https://orcid.org/oauth/token"
+# BASE_URL_PROD = "https://pub.orcid.org/v3.0"
+
+
 class ORCIDClient:
-    
-    TOKEN_URL = "https://sandbox.orcid.org/oauth/token"
-    BASE_URL = "https://pub.sandbox.orcid.org/v3.0"
-
-    # TOKEN_URL = "https://orcid.org/oauth/token"
-    # BASE_URL = "https://pub.orcid.org/v3.0"
-
    def __init__(self):
        self.client_id = os.getenv("ORCID_CLIENT_ID")
        self.client_secret = os.getenv("ORCID_CLIENT_SECRET")
        self._token_cache: Optional[str] = None
+        self.token_url = TOKEN_URL_SANDBOX
+        self.base_url = BASE_URL_SANDBOX

    # ---------------------------------------------------------
    # 1. Obtener token público
    # ---------------------------------------------------------
    def get_public_token(self) -> str:
-        """
-        Obtiene un token público de ORCID (scope: /read-public).
-        Se cachea en memoria para evitar pedirlo cada vez.
-        """
        if self._token_cache:
            return self._token_cache

@@ -30,11 +30,11 @@ class ORCIDClient:
            "client_id": self.client_id,
            "client_secret": self.client_secret,
            "grant_type": "client_credentials",
-            "scope": "/read-public"
+            "scope": "/read-public",
        }

        with httpx.Client(timeout=20.0) as client:
-            response = client.post(self.TOKEN_URL, data=data)
+            response = client.post(self.token_url, data=data)
            response.raise_for_status()
            token = response.json()["access_token"]
            self._token_cache = token
@@ -43,29 +43,53 @@ class ORCIDClient:
    # ---------------------------------------------------------
    # Headers comunes
    # ---------------------------------------------------------
-    def _headers(self):
+    def _headers(self) -> dict:
        token = self.get_public_token()
        return {
            "Accept": "application/json",
-            "Authorization": f"Bearer {token}"
+            "Authorization": f"Bearer {token}",
        }

    # ---------------------------------------------------------
    # 2. Consultar /record
    # ---------------------------------------------------------
    def fetch_record(self, orcid_id: str) -> dict:
-        url = f"{self.BASE_URL}/{orcid_id}/record"
+        url = f"{self.base_url}/{orcid_id}/record"
        with httpx.Client(timeout=20.0) as client:
            response = client.get(url, headers=self._headers())
            response.raise_for_status()
            return response.json()

    # ---------------------------------------------------------
-    # 3. Consultar /works
+    # 3. Consultar /works (summary)
    # ---------------------------------------------------------
    def fetch_works(self, orcid_id: str) -> dict:
-        url = f"{self.BASE_URL}/{orcid_id}/works"
+        url = f"{self.base_url}/{orcid_id}/works"
        with httpx.Client(timeout=20.0) as client:
            response = client.get(url, headers=self._headers())
            response.raise_for_status()
            return response.json()
+
+    # ---------------------------------------------------------
+    # 4. Consultar /work/{put_code} (detalle)
+    # ---------------------------------------------------------
+    def fetch_work_detail(self, orcid_id: str, put_code: int) -> dict | None:
+        url = f"{self.base_url}/{orcid_id}/work/{put_code}"
+        with httpx.Client(timeout=20.0) as client:
+            response = client.get(url, headers=self._headers())
+            if response.status_code != 200:
+                return None
+            return response.json()
+
+
+# -------------------------------------------------------------------
+# Funciones de módulo usadas en researchers.py
+# -------------------------------------------------------------------
+def get_works_summary(orcid_id: str) -> dict:
+    client = ORCIDClient()
+    return client.fetch_works(orcid_id)
+
+
+def get_work_detail(orcid_id: str, put_code: int) -> dict | None:
+    client = ORCIDClient()
+    return client.fetch_work_detail(orcid_id, put_code)
@@ -1,155 +0,0 @@
-from datetime import datetime
-from xml.etree.ElementTree import Element, SubElement, tostring
-from io import BytesIO
-import zipfile
-import json
-
-
-class SWORDExporter:
-
-    ATOM_NS = "http://www.w3.org/2005/Atom"
-    DC_NS = "http://purl.org/dc/elements/1.1/"
-
-    # ---------------------------------------------------------
-    # 1) XML PRINCIPAL (sword.xml)
-    # ---------------------------------------------------------
-    @staticmethod
-    def export_feed_xml(researcher, publications) -> bytes:
-        feed = Element("feed", xmlns=SWORDExporter.ATOM_NS)
-
-        title = SubElement(feed, "title")
-        title.text = f"Publications for {researcher.orcid_id}"
-
-        author = SubElement(feed, "author")
-        name = SubElement(author, "name")
-        name.text = researcher.name or "Unknown"
-
-        updated = SubElement(feed, "updated")
-        updated.text = datetime.utcnow().isoformat() + "Z"
-
-        feed_id = SubElement(feed, "id")
-        feed_id.text = f"urn:uuid:{researcher.id}"
-
-        for pub in publications:
-            entry = SubElement(feed, "entry")
-
-            entry_id = SubElement(entry, "id")
-            entry_id.text = f"urn:uuid:{pub.id}"
-
-            entry_updated = SubElement(entry, "updated")
-            entry_updated.text = datetime.utcnow().isoformat() + "Z"
-
-            dc_title = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}title")
-            dc_title.text = pub.title
-
-            if pub.doi:
-                dc_identifier = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}identifier")
-                dc_identifier.text = f"doi:{pub.doi}"
-
-            if pub.pub_year:
-                dc_date = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}date")
-                dc_date.text = str(pub.pub_year)
-
-            if pub.type:
-                dc_type = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}type")
-                dc_type.text = pub.type
-
-            if pub.journal:
-                dc_source = SubElement(entry, f"{{{SWORDExporter.DC_NS}}}source")
-                dc_source.text = pub.journal
-
-        xml_bytes = tostring(feed, encoding="utf-8", xml_declaration=True)
-        return xml_bytes
-
-    # ---------------------------------------------------------
-    # 2) manifest.txt
-    # ---------------------------------------------------------
-    @staticmethod
-    def generate_manifest(researcher, publications) -> str:
-        lines = [
-            "SWORD Deposit Package",
-            "----------------------",
-            f"Researcher ORCID: {researcher.orcid_id}",
-            f"Researcher Name: {researcher.name or 'Unknown'}",
-            f"Total Publications: {len(publications)}",
-            f"Generated At: {datetime.utcnow().isoformat()}Z",
-            "",
-            "Publications:",
-        ]
-
-        for pub in publications:
-            lines.append(f"- {pub.title} ({pub.pub_year}) DOI={pub.doi}")
-
-        return "\n".join(lines)
-
-    # ---------------------------------------------------------
-    # 3) metadata.json
-    # ---------------------------------------------------------
-    @staticmethod
-    def generate_metadata_json(researcher, publications) -> str:
-        data = {
-            "researcher": {
-                "orcid_id": researcher.orcid_id,
-                "name": researcher.name,
-                "id": str(researcher.id),
-            },
-            "generated_at": datetime.utcnow().isoformat() + "Z",
-            "publications": [
-                {
-                    "id": str(pub.id),
-                    "title": pub.title,
-                    "doi": pub.doi,
-                    "year": pub.pub_year,
-                    "type": pub.type,
-                    "journal": pub.journal,
-                }
-                for pub in publications
-            ],
-        }
-        return json.dumps(data, indent=4)
-
-    # ---------------------------------------------------------
-    # 4) mets.xml (versión simple)
-    # ---------------------------------------------------------
-    @staticmethod
-    def generate_mets_xml(researcher, publications) -> bytes:
-        mets = Element("mets", xmlns="http://www.loc.gov/METS/")
-
-        header = SubElement(mets, "metsHdr")
-        agent = SubElement(header, "agent", ROLE="CREATOR", TYPE="OTHER")
-        name = SubElement(agent, "name")
-        name.text = "ORCID Exporter System"
-
-        dmd_sec = SubElement(mets, "dmdSec", ID="dmd1")
-        md_wrap = SubElement(dmd_sec, "mdWrap", MDTYPE="DC")
-        xml_data = SubElement(md_wrap, "xmlData")
-
-        for pub in publications:
-            dc_title = SubElement(xml_data, f"{{{SWORDExporter.DC_NS}}}title")
-            dc_title.text = pub.title
-
-            if pub.doi:
-                dc_id = SubElement(xml_data, f"{{{SWORDExporter.DC_NS}}}identifier")
-                dc_id.text = f"doi:{pub.doi}"
-
-        return tostring(mets, encoding="utf-8", xml_declaration=True)
-
-    # ---------------------------------------------------------
-    # 5) ZIP FINAL
-    # ---------------------------------------------------------
-    @staticmethod
-    def export_zip(researcher, publications) -> bytes:
-        xml_bytes = SWORDExporter.export_feed_xml(researcher, publications)
-        manifest = SWORDExporter.generate_manifest(researcher, publications)
-        metadata_json = SWORDExporter.generate_metadata_json(researcher, publications)
-        mets_xml = SWORDExporter.generate_mets_xml(researcher, publications)
-
-        mem_file = BytesIO()
-        with zipfile.ZipFile(mem_file, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
-            zf.writestr("sword.xml", xml_bytes)
-            zf.writestr("manifest.txt", manifest)
-            zf.writestr("metadata.json", metadata_json)
-            zf.writestr("mets.xml", mets_xml)
-
-        mem_file.seek(0)
-        return mem_file.read()
@@ -0,0 +1,112 @@
+from datetime import datetime
+from xml.etree.ElementTree import Element, SubElement, tostring
+from app.db.models import Publication, Researcher
+
+ATOM_NS = "http://www.w3.org/2005/Atom"
+DC_NS = "http://purl.org/dc/elements/1.1/"
+EXTRA_NS = "http://example.org/orcid-extra"   # namespace para campos extendidos
+
+
+class SWORDGenerator:
+
+    @staticmethod
+    def generate_feed_xml(researcher: Researcher, publications: list[Publication]) -> bytes:
+        feed = Element("feed", {
+            "xmlns": ATOM_NS,
+            "xmlns:dc": DC_NS,
+            "xmlns:extra": EXTRA_NS
+        })
+
+        SubElement(feed, "title").text = f"Publications for {researcher.orcid_id}"
+
+        author = SubElement(feed, "author")
+        SubElement(author, "name").text = researcher.name or "Unknown"
+
+        SubElement(feed, "updated").text = datetime.utcnow().isoformat() + "Z"
+        SubElement(feed, "id").text = f"urn:uuid:{researcher.id}"
+
+        for pub in publications:
+            entry = SubElement(feed, "entry")
+
+            SubElement(entry, "id").text = f"urn:uuid:{pub.id}"
+            SubElement(entry, "updated").text = datetime.utcnow().isoformat() + "Z"
+
+            # Title
+            SubElement(entry, f"{{{DC_NS}}}title").text = pub.title or "Untitled"
+
+            # Subtitle
+            if pub.subtitle:
+                SubElement(entry, f"{{{EXTRA_NS}}}subtitle").text = pub.subtitle
+
+            # DOI
+            if pub.doi:
+                SubElement(entry, f"{{{DC_NS}}}identifier").text = f"doi:{pub.doi}"
+
+            # Journal
+            if pub.journal:
+                SubElement(entry, f"{{{DC_NS}}}source").text = pub.journal
+
+            # URL
+            if pub.url:
+                SubElement(entry, f"{{{DC_NS}}}relation").text = pub.url
+
+            # Short description
+            if pub.short_description:
+                SubElement(entry, f"{{{DC_NS}}}description").text = pub.short_description
+
+            # Citation
+            if pub.citation_value:
+                cit = SubElement(entry, f"{{{EXTRA_NS}}}citation")
+                SubElement(cit, "type").text = pub.citation_type or "unknown"
+                SubElement(cit, "value").text = pub.citation_value
+
+            # Language
+            if pub.language_code:
+                SubElement(entry, f"{{{DC_NS}}}language").text = pub.language_code
+
+            # Country
+            if pub.country:
+                SubElement(entry, f"{{{EXTRA_NS}}}country").text = pub.country
+
+            # External IDs
+            if pub.external_ids:
+                ext_ids_el = SubElement(entry, f"{{{EXTRA_NS}}}external_ids")
+                for ext in pub.external_ids:
+                    ext_el = SubElement(ext_ids_el, "external_id")
+                    for k, v in ext.items():
+                        if isinstance(v, dict) and "value" in v:
+                            SubElement(ext_el, k).text = v["value"]
+                        else:
+                            SubElement(ext_el, k).text = str(v)
+
+            # Contributors
+            if pub.contributors:
+                contribs_el = SubElement(entry, f"{{{EXTRA_NS}}}contributors")
+                for c in pub.contributors:
+                    c_el = SubElement(contribs_el, "contributor")
+                    SubElement(c_el, "name").text = c.get("name")
+                    SubElement(c_el, "orcid").text = c.get("orcid")
+                    SubElement(c_el, "role").text = c.get("role")
+
+            # Date
+            if pub.pub_year:
+                date_str = str(pub.pub_year)
+                if pub.pub_month:
+                    date_str += f"-{pub.pub_month:02d}"
+                if pub.pub_day:
+                    date_str += f"-{pub.pub_day:02d}"
+                SubElement(entry, f"{{{DC_NS}}}date").text = date_str
+
+            # Type
+            if pub.type:
+                SubElement(entry, f"{{{DC_NS}}}type").text = pub.type
+
+            # Status (new / updated / unchanged)
+            if hasattr(pub, "status") and pub.status:
+                SubElement(entry, f"{{{EXTRA_NS}}}status").text = pub.status
+
+            # Last modified
+            if pub.last_modified:
+                SubElement(entry, f"{{{EXTRA_NS}}}last_modified").text = pub.last_modified.isoformat()
+
+        return tostring(feed, encoding="utf-8", xml_declaration=True)
@@ -1,10 +1,12 @@
 from sqlalchemy.orm import Session
+import httpx
+
 from app.services.orcid_client import ORCIDClient
 from app.services.normalizer import PublicationNormalizer
-from app.repositories.researcher_repository import ResearcherRepository
-from app.repositories.publication_repository import PublicationRepository
-from app.repositories.syncjob_repository import SyncJobRepository
-import httpx
+
+from app.db.repositories.researcher_repository import ResearcherRepository
+from app.db.repositories.publication_repository import PublicationRepository
+from app.db.repositories.syncjob_repository import SyncJobRepository


 class SyncService:
@@ -16,8 +18,6 @@ class SyncService:
        """
        Sincroniza las publicaciones de un investigador con manejo robusto de errores.
        """
-
-        # 1. Obtener o crear investigador
        try:
            researcher = ResearcherRepository.get_by_orcid(db, orcid_id)

@@ -35,14 +35,23 @@ class SyncService:
            if e.response.status_code == 404:
                return {
                    "status": "error",
-                    "message": f"El ORCID {orcid_id} no existe en Sandbox."
+                    "code": 404,
+                    "message": f"El ORCID {orcid_id} no existe en ORCID."
                }
-            return {"status": "error", "message": str(e)}
+            return {
+                "status": "error",
+                "code": e.response.status_code,
+                "message": f"Error al consultar ORCID: {str(e)}"
+            }
+        except Exception as e:
+            return {
+                "status": "error",
+                "code": 500,
+                "message": f"Error interno durante la sincronización: {str(e)}"
+            }

-        # 2. Crear SyncJob
        job = SyncJobRepository.start_job(db, researcher.id)

-        # 3. Obtener works
        try:
            works_raw = self.orcid_client.fetch_works(orcid_id)
        except httpx.HTTPStatusError as e:
@@ -56,19 +65,27 @@ class SyncService:
                    "updated_records": 0,
                    "total": 0
                }
-            return {"status": "error", "message": str(e)}
+            return {
+                "status": "error",
+                "code": e.response.status_code,
+                "message": f"Error al obtener works de ORCID: {str(e)}"
+            }
+        except Exception as e:
+            return {
+                "status": "error",
+                "code": 500,
+                "message": f"Error interno al obtener works: {str(e)}"
+            }

        groups = works_raw.get("group", [])

        new_records = 0
        updated_records = 0

-        # 4. Procesar works
        for group in groups:
            summary = group["work-summary"][0]
            normalized = PublicationNormalizer.normalize_work(summary)

-            # 🔥 AHORA SE DETECTAN DUPLICADOS POR put_code
            existing = PublicationRepository.get_by_put_code(
                db, researcher.id, normalized["put_code"]
            )
@@ -80,17 +97,40 @@ class SyncService:
                PublicationRepository.create(db, researcher.id, normalized)
                new_records += 1

-        # 5. Finalizar SyncJob
        SyncJobRepository.finish_job(db, job, new_records, updated_records)
-
-        # 6. Actualizar last_sync_at
        ResearcherRepository.update_last_sync(db, researcher)

        return {
            "status": "ok",
            "message": "Sincronización completada correctamente.",
-            "researcher": researcher.orcid_id,
+            "researcher_id": researcher.id,
            "new_records": new_records,
            "updated_records": updated_records,
            "total": new_records + updated_records
        }
+
+    def sync_and_get_full(self, db: Session, orcid_id: str):
+        """
+        Sincroniza (si es necesario) y devuelve investigador + publicaciones.
+        Pensado para el buscador: una sola petición.
+        """
+        sync_result = self.sync_researcher(db, orcid_id)
+
+        if sync_result.get("status") == "error":
+            return sync_result
+
+        researcher = ResearcherRepository.get_by_orcid(db, orcid_id)
+        if not researcher:
+            return {
+                "status": "error",
+                "code": 500,
+                "message": "Error interno: investigador no encontrado tras sincronización."
+            }
+
+        publications = PublicationRepository.list_by_researcher(db, researcher.id)
+
+        return {
+            "status": "ok",
+            "researcher": researcher,
+            "publications": publications
+        }
@@ -0,0 +1,165 @@
+import io
+import zipfile
+import json
+from datetime import datetime
+from xml.etree.ElementTree import Element, SubElement, tostring
+
+from app.db.models import Publication, Researcher
+from app.services.sword_generator import SWORDGenerator
+
+
+class ZIPGenerator:
+
+    # ---------------------------------------------------------
+    # MANIFEST.TXT — más completo
+    # ---------------------------------------------------------
+    @staticmethod
+    def generate_manifest(researcher, publications):
+        lines = [
+            "SWORD Deposit Package",
+            "----------------------",
+            f"Researcher ORCID: {researcher.orcid_id}",
+            f"Researcher Name: {researcher.name}",
+            f"Researcher UUID: {researcher.id}",
+            f"Total Publications: {len(publications)}",
+            f"Generated At: {datetime.utcnow().isoformat()}Z",
+            "",
+            "Publications:",
+        ]
+
+        for pub in publications:
+            year = pub.pub_year or "Unknown"
+            lines.append(
+                f"- {pub.title} ({year}) | DOI={pub.doi} | TYPE={pub.type}"
+            )
+
+        return "\n".join(lines)
+
+    # ---------------------------------------------------------
+    # METADATA.JSON — ahora con TODOS los campos
+    # ---------------------------------------------------------
+    @staticmethod
+    def generate_metadata_json(researcher, publications):
+        data = {
+            "researcher": {
+                "orcid_id": researcher.orcid_id,
+                "name": researcher.name,
+                "id": str(researcher.id),
+                "last_sync_at": researcher.last_sync_at.isoformat() if researcher.last_sync_at else None,
+            },
+            "generated_at": datetime.utcnow().isoformat() + "Z",
+            "publications": [],
+        }
+
+        for pub in publications:
+            data["publications"].append({
+                "id": str(pub.id),
+                "put_code": pub.put_code,
+                "title": pub.title,
+                "subtitle": pub.subtitle,
+                "doi": pub.doi,
+                "journal": pub.journal,
+                "type": pub.type,
+                "url": pub.url,
+                "short_description": pub.short_description,
+                "citation_type": pub.citation_type,
+                "citation_value": pub.citation_value,
+                "language_code": pub.language_code,
+                "country": pub.country,
+                "pub_year": pub.pub_year,
+                "pub_month": pub.pub_month,
+                "pub_day": pub.pub_day,
+                "external_ids": pub.external_ids,
+                "contributors": pub.contributors,
+                "hash_fingerprint": pub.hash_fingerprint,
+                "last_modified": pub.last_modified.isoformat() if pub.last_modified else None,
+                "status": getattr(pub, "status", None),
+            })
+
+        return json.dumps(data, indent=4)
+
+    # ---------------------------------------------------------
+    # METS.XML — ampliado con más metadatos
+    # ---------------------------------------------------------
+    @staticmethod
+    def generate_mets_xml(researcher, publications):
+        mets = Element("mets", xmlns="http://www.loc.gov/METS/")
+
+        header = SubElement(mets, "metsHdr")
+        agent = SubElement(header, "agent", ROLE="CREATOR", TYPE="OTHER")
+        SubElement(agent, "name").text = "ORCID Exporter System"
+
+        dmd_sec = SubElement(mets, "dmdSec", ID="dmd1")
+        md_wrap = SubElement(dmd_sec, "mdWrap", MDTYPE="DC")
+        xml_data = SubElement(md_wrap, "xmlData")
+
+        for pub in publications:
+            # Title
+            SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
+
+            # Subtitle
+            if pub.subtitle:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
+
+            # DOI
+            if pub.doi:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
+
+            # Journal
+            if pub.journal:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
+
+            # URL
+            if pub.url:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
+
+            # Description
+            if pub.short_description:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
+
+            # Citation
+            if pub.citation_value:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value
+
+            # Language
+            if pub.language_code:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
+
+            # Country
+            if pub.country:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
+
+            # Date
+            if pub.pub_year:
+                date_str = str(pub.pub_year)
+                if pub.pub_month:
+                    date_str += f"-{pub.pub_month:02d}"
+                if pub.pub_day:
+                    date_str += f"-{pub.pub_day:02d}"
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
+
+            # Type
+            if pub.type:
+                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
+
+        return tostring(mets, encoding="utf-8", xml_declaration=True)
+
+    # ---------------------------------------------------------
+    # ZIP FINAL
+    # ---------------------------------------------------------
+    @staticmethod
+    def generate_zip(researcher, publications):
+        xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications)
+        manifest = ZIPGenerator.generate_manifest(researcher, publications)
+        metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
+        mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
+
+        mem_file = io.BytesIO()
+        with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
+            zf.writestr("sword.xml", xml_bytes)
+            zf.writestr("manifest.txt", manifest)
+            zf.writestr("metadata.json", metadata_json)
+            zf.writestr("mets.xml", mets_xml)
+
+        mem_file.seek(0)
+        return mem_file.read()
@@ -8,4 +8,6 @@ python-dotenv
 lxml
 apscheduler
 authlib
-redis
+redis
+APScheduler==3.10.4
+requests