feat(export): perfiles DSpace/EPrints/Dublin Core y selector SWORD en UI

Backend: generadores por repositorio, ZIP multi-formato y query profile en /export/sword. Frontend: selector Destino que envia profile al descargar SWORD XML.
This commit is contained in:
Mireya Cueto Garrido
2026-05-20 13:25:35 +02:00
parent 9b596af494
commit aa2e7280dc
9 changed files with 585 additions and 64 deletions
+32 -6
View File
@@ -1,7 +1,7 @@
from typing import Iterable, List from typing import Iterable, List
from uuid import UUID from uuid import UUID
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Request from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query, Request
from fastapi.responses import Response from fastapi.responses import Response
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@@ -12,7 +12,7 @@ from app.db.session import get_db
from app.security.export_auth import require_export_access from app.security.export_auth import require_export_access
from app.services.orcid_client import get_display_name from app.services.orcid_client import get_display_name
from app.services.publication_enrichment import enrich_publications_from_orcid from app.services.publication_enrichment import enrich_publications_from_orcid
from app.services.sword_generator import SWORDGenerator from app.services.repository_export import EXPORT_PROFILES, generate_repository_xml
from app.services.zip_generator import ZIPGenerator from app.services.zip_generator import ZIPGenerator
from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
@@ -96,6 +96,18 @@ def _raise_clear_error_if_researcher_id_was_used(db: Session, pub_ids: List[UUID
) )
def _export_xml_response(
researcher: Researcher,
pubs: List[Publication],
profile: str,
) -> Response:
try:
xml_bytes = generate_repository_xml(researcher, pubs, profile)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return Response(content=xml_bytes, media_type="application/xml")
# --------------------------------------------------------- # ---------------------------------------------------------
# ENDPOINT 1: SWORD múltiples publicaciones # ENDPOINT 1: SWORD múltiples publicaciones
# --------------------------------------------------------- # ---------------------------------------------------------
@@ -105,6 +117,13 @@ def _raise_clear_error_if_researcher_id_was_used(db: Session, pub_ids: List[UUID
async def export_multiple_sword( async def export_multiple_sword(
request: Request, request: Request,
pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH), pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH),
profile: str = Query(
"generic",
description=(
"Perfil de repositorio: generic (Atom ORCID), dublin_core, dspace, eprints. "
f"Valores: {', '.join(EXPORT_PROFILES)}"
),
),
db: Session = Depends(get_db), db: Session = Depends(get_db),
current: Researcher | None = Depends(require_export_access), current: Researcher | None = Depends(require_export_access),
): ):
@@ -121,11 +140,11 @@ async def export_multiple_sword(
_prepare_researcher_and_publications_for_export(db, researcher, pubs) _prepare_researcher_and_publications_for_export(db, researcher, pubs)
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs) response = _export_xml_response(researcher, pubs, profile)
if current: if current:
_record_downloads(db, current, pubs) _record_downloads(db, current, pubs)
return Response(content=xml_bytes, media_type="application/xml") return response
# --------------------------------------------------------- # ---------------------------------------------------------
@@ -137,6 +156,13 @@ async def export_multiple_sword(
async def export_researcher_sword( async def export_researcher_sword(
request: Request, request: Request,
orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN), orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
profile: str = Query(
"generic",
description=(
"Perfil de repositorio: generic (Atom ORCID), dublin_core, dspace, eprints. "
f"Valores: {', '.join(EXPORT_PROFILES)}"
),
),
db: Session = Depends(get_db), db: Session = Depends(get_db),
current: Researcher | None = Depends(require_export_access), current: Researcher | None = Depends(require_export_access),
): ):
@@ -153,11 +179,11 @@ async def export_researcher_sword(
_prepare_researcher_and_publications_for_export(db, researcher, pubs) _prepare_researcher_and_publications_for_export(db, researcher, pubs)
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs) response = _export_xml_response(researcher, pubs, profile)
if current: if current:
_record_downloads(db, current, pubs) _record_downloads(db, current, pubs)
return Response(content=xml_bytes, media_type="application/xml") return response
# --------------------------------------------------------- # ---------------------------------------------------------
+365
View File
@@ -0,0 +1,365 @@
"""
Exportadores orientados a repositorios: Dublin Core, DSpace y EPrints.
Perfiles soportados (query `profile` en /export/sword/...):
- generic → feed Atom ORCID (compatibilidad hacia atrás)
- dublin_core → XML con un registro DC por publicación
- dspace → feed Atom con metadatos DSpace / Dublin Core ampliado
- eprints → XML de importación EPrints (EP3)
El ZIP incluye todos los perfiles bajo `formats/` más SAF DSpace en `dspace-saf/`.
"""
from __future__ import annotations
import csv
import io
from datetime import datetime
from typing import Iterable, List
from xml.etree.ElementTree import Element, SubElement, tostring
from app.db.models import Publication, Researcher
ATOM_NS = "http://www.w3.org/2005/Atom"
DC_NS = "http://purl.org/dc/elements/1.1/"
DCTERMS_NS = "http://purl.org/dc/terms/"
SWORD_NS = "http://purl.org/net/sword/"
EPRINTS_NS = "http://eprints.org/ep3/data"
EXPORT_PROFILES = ("generic", "dublin_core", "dspace", "eprints")
_DSPACE_TYPE_MAP = {
"journal-article": "Article",
"book-chapter": "Book chapter",
"book": "Book",
"conference-paper": "Conference paper",
"conference-abstract": "Conference paper",
"dissertation-thesis": "Thesis",
"report": "Report",
"preprint": "Preprint",
"other": "Other",
}
_EPRINTS_TYPE_MAP = {
"journal-article": "article",
"book-chapter": "book_section",
"book": "book",
"conference-paper": "conference_item",
"conference-abstract": "conference_item",
"dissertation-thesis": "thesis",
"report": "report",
"preprint": "preprint",
"other": "other",
}
def normalize_profile(profile: str | None) -> str:
value = (profile or "generic").strip().lower()
if value not in EXPORT_PROFILES:
raise ValueError(
f"Invalid export profile {profile!r}. "
f"Use one of: {', '.join(EXPORT_PROFILES)}"
)
return value
def publication_date_iso(pub: Publication) -> str | None:
if not pub.pub_year:
return None
date_str = str(pub.pub_year)
if pub.pub_month:
date_str += f"-{pub.pub_month:02d}"
if pub.pub_day:
date_str += f"-{pub.pub_day:02d}"
return date_str
def contributor_names(pub: Publication) -> List[str]:
names: List[str] = []
for item in pub.contributors or []:
name = (item or {}).get("name")
if name:
names.append(str(name))
return names
def split_person_name(full_name: str) -> tuple[str, str]:
parts = full_name.strip().split()
if len(parts) <= 1:
return full_name, ""
return parts[-1], " ".join(parts[:-1])
def dspace_type(pub: Publication) -> str:
if not pub.type:
return "Other"
return _DSPACE_TYPE_MAP.get(pub.type, pub.type.replace("-", " ").title())
def eprints_type(pub: Publication) -> str:
if not pub.type:
return "other"
return _EPRINTS_TYPE_MAP.get(pub.type, "other")
def _safe_text(value) -> str | None:
if value is None:
return None
text = str(value).strip()
if text.lower() == "none":
return None
return text or None
def _append_dc(parent: Element, tag: str, text: str | None) -> None:
if text:
SubElement(parent, tag).text = text
def generate_dublin_core_records_xml(
researcher: Researcher,
publications: Iterable[Publication],
) -> bytes:
root = Element("dublinCoreRecords", {
"researcherOrcid": researcher.orcid_id,
"researcherName": researcher.name or "",
})
for pub in publications:
record = SubElement(root, "record", {
"id": str(pub.id),
"putCode": str(pub.put_code),
})
_append_dc(record, f"{{{DC_NS}}}title", pub.title)
_append_dc(record, f"{{{DC_NS}}}type", pub.type)
_append_dc(record, f"{{{DC_NS}}}source", pub.journal)
_append_dc(record, f"{{{DC_NS}}}language", pub.language_code)
_append_dc(record, f"{{{DC_NS}}}coverage", pub.country)
_append_dc(record, f"{{{DC_NS}}}description", pub.short_description or pub.subtitle)
_append_dc(record, f"{{{DC_NS}}}bibliographicCitation", pub.citation_value)
if pub.doi:
_append_dc(record, f"{{{DC_NS}}}identifier", f"doi:{pub.doi}")
if pub.url:
_append_dc(record, f"{{{DC_NS}}}relation", pub.url)
date_iso = publication_date_iso(pub)
if date_iso:
_append_dc(record, f"{{{DC_NS}}}date", date_iso)
for name in contributor_names(pub):
_append_dc(record, f"{{{DC_NS}}}creator", name)
if researcher.orcid_id:
_append_dc(record, f"{{{DC_NS}}}provenance", f"orcid:{researcher.orcid_id}")
return tostring(root, encoding="utf-8", xml_declaration=True)
def generate_dspace_item_dublin_core(pub: Publication) -> bytes:
root = Element("dublin_core")
def dcvalue(element: str, qualifier: str, value: str | None) -> None:
if value:
SubElement(root, "dcvalue", element=element, qualifier=qualifier).text = value
dcvalue("title", "none", pub.title)
dcvalue("type", "none", dspace_type(pub))
dcvalue("source", "none", pub.journal)
dcvalue("language", "iso", pub.language_code)
dcvalue("coverage", "spatial", pub.country)
dcvalue("description", "abstract", pub.short_description or pub.subtitle)
dcvalue("description", "none", pub.citation_value)
date_iso = publication_date_iso(pub)
if date_iso:
dcvalue("date", "issued", date_iso)
if pub.doi:
dcvalue("identifier", "doi", pub.doi)
dcvalue("identifier", "uri", f"https://doi.org/{pub.doi}")
elif pub.url:
dcvalue("identifier", "uri", pub.url)
if pub.url:
dcvalue("relation", "uri", pub.url)
for name in contributor_names(pub):
dcvalue("contributor", "author", name)
return tostring(root, encoding="utf-8", xml_declaration=True)
def generate_dspace_import_csv(
researcher: Researcher,
publications: Iterable[Publication],
) -> str:
output = io.StringIO()
writer = csv.writer(output, quoting=csv.QUOTE_ALL, lineterminator="\n")
writer.writerow([
"row_id",
"collection",
"dc.title",
"dc.contributor.author",
"dc.date.issued",
"dc.description",
"dc.identifier.doi",
"dc.identifier.uri",
"dc.language.iso",
"dc.publisher",
"dc.relation.ispartof",
"dc.source",
"dc.type",
"dc.provenance",
])
for index, pub in enumerate(publications, start=1):
authors = "; ".join(contributor_names(pub)) or (researcher.name or "")
writer.writerow([
index,
"",
pub.title or "",
authors,
publication_date_iso(pub) or "",
pub.short_description or pub.citation_value or "",
pub.doi or "",
pub.url or (f"https://doi.org/{pub.doi}" if pub.doi else ""),
pub.language_code or "",
"",
pub.journal or "",
pub.journal or "",
dspace_type(pub),
f"orcid:{researcher.orcid_id}",
])
return output.getvalue()
def generate_dspace_sword_feed_xml(
researcher: Researcher,
publications: Iterable[Publication],
) -> bytes:
"""
Feed Atom orientado a ingest DSpace (metadatos DC/dcterms por entry).
No sustituye un depósito SWORD 2.0 con bitstreams, pero alinea campos DC.
"""
feed = Element("feed", {
"xmlns": ATOM_NS,
"xmlns:dc": DC_NS,
"xmlns:dcterms": DCTERMS_NS,
"xmlns:sword": SWORD_NS,
})
SubElement(feed, "title").text = f"DSpace export for {researcher.orcid_id}"
SubElement(feed, "id").text = f"urn:uuid:{researcher.id}"
SubElement(feed, "updated").text = datetime.utcnow().isoformat() + "Z"
author = SubElement(feed, "author")
SubElement(author, "name").text = researcher.name or researcher.orcid_id
for pub in publications:
entry = SubElement(feed, "entry")
SubElement(entry, "title").text = pub.title or "Untitled"
SubElement(entry, "id").text = f"urn:uuid:{pub.id}"
SubElement(entry, "updated").text = datetime.utcnow().isoformat() + "Z"
SubElement(entry, f"{{{SWORD_NS}}}deposit").text = "true"
SubElement(entry, f"{{{SWORD_NS}}}noOp").text = "false"
category = SubElement(entry, "category")
category.set("term", dspace_type(pub))
category.set("scheme", "http://dspace.org/itemtypes")
if pub.title:
SubElement(entry, f"{{{DC_NS}}}title").text = pub.title
if pub.journal:
SubElement(entry, f"{{{DC_NS}}}source").text = pub.journal
if pub.doi:
SubElement(entry, f"{{{DC_NS}}}identifier").text = f"doi:{pub.doi}"
if pub.url:
SubElement(entry, f"{{{DCTERMS_NS}}}relation").text = pub.url
if pub.short_description:
SubElement(entry, f"{{{DCTERMS_NS}}}abstract").text = pub.short_description
if pub.citation_value:
SubElement(entry, f"{{{DCTERMS_NS}}}bibliographicCitation").text = pub.citation_value
if pub.language_code:
SubElement(entry, f"{{{DC_NS}}}language").text = pub.language_code
date_iso = publication_date_iso(pub)
if date_iso:
SubElement(entry, f"{{{DCTERMS_NS}}}issued").text = date_iso
SubElement(entry, f"{{{DC_NS}}}type").text = dspace_type(pub)
for name in contributor_names(pub):
author_el = SubElement(entry, "author")
SubElement(author_el, "name").text = name
return tostring(feed, encoding="utf-8", xml_declaration=True)
def generate_eprints_import_xml(
researcher: Researcher,
publications: Iterable[Publication],
) -> bytes:
root = Element("eprints", xmlns=EPRINTS_NS)
today = datetime.utcnow().strftime("%Y-%m-%d")
for index, pub in enumerate(publications, start=1):
eprint = SubElement(root, "eprint")
SubElement(eprint, "eprintid").text = str(index)
SubElement(eprint, "rev_number").text = "1"
SubElement(eprint, "documents")
SubElement(eprint, "eprint_status").text = "archive"
SubElement(eprint, "userid").text = "1"
SubElement(eprint, "dir").text = f"disk00000/00/00/{index:02d}"
SubElement(eprint, "datestamp").text = today
SubElement(eprint, "lastmod").text = today
SubElement(eprint, "status_changed").text = today
SubElement(eprint, "type").text = eprints_type(pub)
titles = SubElement(eprint, "titles")
title_item = SubElement(titles, "item")
SubElement(title_item, "lang").text = pub.language_code or "en"
SubElement(title_item, "title").text = pub.title or "Untitled"
creators = SubElement(eprint, "creators")
names = contributor_names(pub) or ([researcher.name] if researcher.name else [])
for name in names:
family, given = split_person_name(name)
item = SubElement(creators, "item")
name_el = SubElement(item, "name")
SubElement(name_el, "family").text = family
if given:
SubElement(name_el, "given").text = given
if pub.pub_year:
SubElement(eprint, "date").text = str(pub.pub_year)
if pub.journal:
SubElement(eprint, "publication").text = pub.journal
if pub.doi:
SubElement(eprint, "doi").text = pub.doi
if pub.url:
SubElement(eprint, "official_url").text = pub.url
if pub.short_description:
SubElement(eprint, "abstract").text = pub.short_description
if pub.citation_value:
SubElement(eprint, "note").text = pub.citation_value
if researcher.orcid_id:
SubElement(eprint, "note").text = f"Source ORCID: {researcher.orcid_id}"
return tostring(root, encoding="utf-8", xml_declaration=True)
def generate_repository_xml(
researcher: Researcher,
publications: List[Publication],
profile: str,
) -> bytes:
profile = normalize_profile(profile)
if profile == "dublin_core":
return generate_dublin_core_records_xml(researcher, publications)
if profile == "dspace":
return generate_dspace_sword_feed_xml(researcher, publications)
if profile == "eprints":
return generate_eprints_import_xml(researcher, publications)
from app.services.sword_generator import SWORDGenerator
return SWORDGenerator.generate_feed_xml(researcher, publications)
def export_filename_for_profile(profile: str) -> str:
profile = normalize_profile(profile)
return {
"generic": "generic-atom.xml",
"dublin_core": "dublin_core.xml",
"dspace": "dspace-atom.xml",
"eprints": "eprints-import.xml",
}[profile]
+65 -44
View File
@@ -1,33 +1,52 @@
import io import io
import zipfile
import json import json
import zipfile
from datetime import datetime from datetime import datetime
from xml.etree.ElementTree import Element, SubElement, tostring
from app.db.models import Publication, Researcher from app.db.models import Publication, Researcher
from app.services.repository_export import (
export_filename_for_profile,
generate_dspace_import_csv,
generate_dspace_item_dublin_core,
generate_dublin_core_records_xml,
generate_dspace_sword_feed_xml,
generate_eprints_import_xml,
)
from app.services.sword_generator import SWORDGenerator from app.services.sword_generator import SWORDGenerator
# ---------------------------------------------------------
# Clase de generador de ZIP
# ---------------------------------------------------------
class ZIPGenerator: class ZIPGenerator:
# ---------------------------------------------------------
# Función auxiliar: generar manifest.txt
# ---------------------------------------------------------
@staticmethod @staticmethod
def generate_manifest(researcher, publications): def generate_manifest(researcher, publications):
lines = [ lines = [
"SWORD Deposit Package", "ORCID Export Package",
"----------------------", "--------------------",
f"Researcher ORCID: {researcher.orcid_id}", f"Researcher ORCID: {researcher.orcid_id}",
f"Researcher Name: {researcher.name}", f"Researcher Name: {researcher.name}",
f"Researcher UUID: {researcher.id}", f"Researcher UUID: {researcher.id}",
f"Total Publications: {len(publications)}", f"Total Publications: {len(publications)}",
f"Generated At: {datetime.utcnow().isoformat()}Z", f"Generated At: {datetime.utcnow().isoformat()}Z",
"", "",
"Files:",
"- sword.xml → Atom genérico ORCID (compatibilidad)",
"- formats/generic-atom.xml",
"- formats/dublin_core.xml → Dublin Core (un registro por obra)",
"- formats/dspace-atom.xml → Atom con metadatos DSpace",
"- formats/dspace-import.csv → Importación batch CSV DSpace",
"- formats/eprints-import.xml → Importación XML EPrints",
"- dspace-saf/item_NNNNN/dublin_core.xml → Simple Archive Format (DSpace)",
"- metadata.json → Metadatos completos (JSON)",
"- mets.xml → METS simplificado (legacy)",
"",
"Repository hints:",
"- DSpace: use dspace-saf/ (SAF) or formats/dspace-import.csv",
"- EPrints: import formats/eprints-import.xml via admin tools",
"- Dublin Core: use formats/dublin_core.xml",
"",
"SWORD endpoint profile query:",
" ?profile=generic|dublin_core|dspace|eprints",
"",
"Publications:", "Publications:",
] ]
@@ -39,9 +58,6 @@ class ZIPGenerator:
return "\n".join(lines) return "\n".join(lines)
# ---------------------------------------------------------
# METADATA.JSON — ahora con TODOS los campos
# ---------------------------------------------------------
@staticmethod @staticmethod
def generate_metadata_json(researcher, publications): def generate_metadata_json(researcher, publications):
data = { data = {
@@ -82,11 +98,10 @@ class ZIPGenerator:
return json.dumps(data, indent=4) return json.dumps(data, indent=4)
# ---------------------------------------------------------
# METS.XML — ampliado con más metadatos
# ---------------------------------------------------------
@staticmethod @staticmethod
def generate_mets_xml(researcher, publications): def generate_mets_xml(researcher, publications):
from xml.etree.ElementTree import Element, SubElement, tostring
mets = Element("mets", xmlns="http://www.loc.gov/METS/") mets = Element("mets", xmlns="http://www.loc.gov/METS/")
header = SubElement(mets, "metsHdr") header = SubElement(mets, "metsHdr")
@@ -98,42 +113,26 @@ class ZIPGenerator:
xml_data = SubElement(md_wrap, "xmlData") xml_data = SubElement(md_wrap, "xmlData")
for pub in publications: for pub in publications:
# Title
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
# Subtitle
if pub.subtitle: if pub.subtitle:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
# DOI
if pub.doi: if pub.doi:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}" SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
# Journal
if pub.journal: if pub.journal:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
# URL
if pub.url: if pub.url:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
# Description
if pub.short_description: if pub.short_description:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
# Citation
if pub.citation_value: if pub.citation_value:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value SubElement(
xml_data,
# Language "{http://purl.org/dc/elements/1.1/}bibliographicCitation",
).text = pub.citation_value
if pub.language_code: if pub.language_code:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
# Country
if pub.country: if pub.country:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
# Date
if pub.pub_year: if pub.pub_year:
date_str = str(pub.pub_year) date_str = str(pub.pub_year)
if pub.pub_month: if pub.pub_month:
@@ -141,29 +140,51 @@ class ZIPGenerator:
if pub.pub_day: if pub.pub_day:
date_str += f"-{pub.pub_day:02d}" date_str += f"-{pub.pub_day:02d}"
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
# Type
if pub.type: if pub.type:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
return tostring(mets, encoding="utf-8", xml_declaration=True) return tostring(mets, encoding="utf-8", xml_declaration=True)
# ---------------------------------------------------------
# ZIP FINAL
# ---------------------------------------------------------
@staticmethod @staticmethod
def generate_zip(researcher, publications): def generate_zip(researcher, publications):
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications) generic_xml = SWORDGenerator.generate_feed_xml(researcher, publications)
manifest = ZIPGenerator.generate_manifest(researcher, publications) manifest = ZIPGenerator.generate_manifest(researcher, publications)
metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications) metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications) mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
format_files = {
f"formats/{export_filename_for_profile('generic')}": generic_xml,
f"formats/{export_filename_for_profile('dublin_core')}": generate_dublin_core_records_xml(
researcher, publications
),
f"formats/{export_filename_for_profile('dspace')}": generate_dspace_sword_feed_xml(
researcher, publications
),
f"formats/{export_filename_for_profile('eprints')}": generate_eprints_import_xml(
researcher, publications
),
}
mem_file = io.BytesIO() mem_file = io.BytesIO()
with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf: with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr("sword.xml", xml_bytes) zf.writestr("sword.xml", generic_xml)
zf.writestr("manifest.txt", manifest) zf.writestr("manifest.txt", manifest)
zf.writestr("metadata.json", metadata_json) zf.writestr("metadata.json", metadata_json)
zf.writestr("mets.xml", mets_xml) zf.writestr("mets.xml", mets_xml)
zf.writestr(
"formats/dspace-import.csv",
generate_dspace_import_csv(researcher, publications),
)
for path, content in format_files.items():
zf.writestr(path, content)
for index, pub in enumerate(publications, start=1):
item_dir = f"dspace-saf/item_{index:05d}"
zf.writestr(
f"{item_dir}/dublin_core.xml",
generate_dspace_item_dublin_core(pub),
)
mem_file.seek(0) mem_file.seek(0)
return mem_file.read() return mem_file.read()
@@ -7,13 +7,15 @@ import {
SparkleIcon, SparkleIcon,
} from "../ui/Icons"; } from "../ui/Icons";
import { Spinner } from "../ui/Spinner"; import { Spinner } from "../ui/Spinner";
import { SwordProfileSelect } from "./SwordProfileSelect";
import { DEFAULT_EXPORT_PROFILE } from "../../utils/exportProfiles";
const FORMATS = [ const FORMATS = [
{ {
format: "xml", format: "xml",
icon: <DocumentIcon size={20} className="shrink-0 text-ink-secondary" />, icon: <DocumentIcon size={20} className="shrink-0 text-ink-secondary" />,
label: "SWORD XML", label: "SWORD XML",
desc: "Metadatos en formato Atom", desc: "Según destino seleccionado",
}, },
{ {
format: "zip", format: "zip",
@@ -31,6 +33,8 @@ const FORMATS = [
* - `newPublicationsCount` → cuántas publicaciones tiene downloaded_by_me=false. * - `newPublicationsCount` → cuántas publicaciones tiene downloaded_by_me=false.
* - `selectedCount` → publicaciones seleccionadas manualmente. * - `selectedCount` → publicaciones seleccionadas manualmente.
* - `exportingFormat` → formato en curso (pone el botón en loading). * - `exportingFormat` → formato en curso (pone el botón en loading).
* - `swordProfile` → perfil SWORD (dublin_core, dspace, eprints…).
* - `onSwordProfileChange` → callback al cambiar destino.
*/ */
export function ExportDropdown({ export function ExportDropdown({
onExport, onExport,
@@ -38,6 +42,8 @@ export function ExportDropdown({
selectedCount = 0, selectedCount = 0,
isAuthenticated = false, isAuthenticated = false,
newPublicationsCount = 0, newPublicationsCount = 0,
swordProfile = DEFAULT_EXPORT_PROFILE,
onSwordProfileChange,
}) { }) {
const [open, setOpen] = useState(false); const [open, setOpen] = useState(false);
const rootRef = useRef(null); const rootRef = useRef(null);
@@ -57,7 +63,7 @@ export function ExportDropdown({
function handlePick(format) { function handlePick(format) {
setOpen(false); setOpen(false);
onExport(format); onExport(format, format === "xml" ? swordProfile : undefined);
} }
// Label logic: // Label logic:
@@ -80,7 +86,14 @@ export function ExportDropdown({
} }
return ( return (
<div className="relative" ref={rootRef}> <div className="flex flex-wrap items-center justify-end gap-2">
<SwordProfileSelect
id="dashboard-sword-profile"
value={swordProfile}
onChange={onSwordProfileChange}
/>
<div className="relative" ref={rootRef}>
<button <button
type="button" type="button"
onClick={() => setOpen((o) => !o)} onClick={() => setOpen((o) => !o)}
@@ -124,6 +137,7 @@ export function ExportDropdown({
))} ))}
</div> </div>
)} )}
</div>
</div> </div>
); );
} }
@@ -0,0 +1,35 @@
import {
DEFAULT_EXPORT_PROFILE,
EXPORT_PROFILE_OPTIONS,
} from "../../utils/exportProfiles";
/**
* Selector de destino para exportación SWORD XML (DSpace, EPrints, Dublin Core…).
*/
export function SwordProfileSelect({
value = DEFAULT_EXPORT_PROFILE,
onChange,
id = "sword-export-profile",
className = "",
}) {
return (
<label
htmlFor={id}
className={`flex items-center gap-2 text-sm ${className}`.trim()}
>
<span className="whitespace-nowrap text-ink-tertiary">Destino:</span>
<select
id={id}
value={value}
onChange={(event) => onChange(event.target.value)}
className="min-w-[9.5rem] rounded-lg border border-surface-border-strong bg-surface-primary px-2.5 py-2 text-sm font-medium text-ink-primary transition-colors hover:bg-surface-secondary focus:border-brand-primary focus:outline-none focus:ring-2 focus:ring-brand-primary/20"
>
{EXPORT_PROFILE_OPTIONS.map(({ value: optionValue, label }) => (
<option key={optionValue} value={optionValue}>
{label}
</option>
))}
</select>
</label>
);
}
+10 -2
View File
@@ -16,6 +16,7 @@ import {
syncResearcher, syncResearcher,
} from "../services/api"; } from "../services/api";
import { isValidOrcid } from "../utils/orcid"; import { isValidOrcid } from "../utils/orcid";
import { DEFAULT_EXPORT_PROFILE, swordXmlFilename } from "../utils/exportProfiles";
import { useAuth } from "../contexts/AuthContext"; import { useAuth } from "../contexts/AuthContext";
const SUCCESS_FLASH_MS = 3000; const SUCCESS_FLASH_MS = 3000;
@@ -49,6 +50,7 @@ export function DashboardPage() {
const [syncStatus, setSyncStatus] = useState("idle"); // idle | loading | success const [syncStatus, setSyncStatus] = useState("idle"); // idle | loading | success
const [exportingFormat, setExportingFormat] = useState(null); const [exportingFormat, setExportingFormat] = useState(null);
const [swordProfile, setSwordProfile] = useState(DEFAULT_EXPORT_PROFILE);
const [selectedIds, setSelectedIds] = useState(() => new Set()); const [selectedIds, setSelectedIds] = useState(() => new Set());
@@ -138,7 +140,7 @@ export function DashboardPage() {
} }
} }
async function handleExport(format) { async function handleExport(format, profile = DEFAULT_EXPORT_PROFILE) {
setExportingFormat(format); setExportingFormat(format);
try { try {
let ids; let ids;
@@ -162,13 +164,17 @@ export function DashboardPage() {
const { blob } = await downloadExport(orcid, format, { const { blob } = await downloadExport(orcid, format, {
publicationIds: ids, publicationIds: ids,
profile: format === "xml" ? profile : undefined,
}); });
if (blob) { if (blob) {
const objectUrl = URL.createObjectURL(blob); const objectUrl = URL.createObjectURL(blob);
const anchor = document.createElement("a"); const anchor = document.createElement("a");
anchor.href = objectUrl; anchor.href = objectUrl;
const extension = format === "xml" ? "xml" : format; const extension = format === "xml" ? "xml" : format;
anchor.download = `sword-${orcid}.${extension}`; anchor.download =
format === "xml"
? swordXmlFilename(orcid, profile)
: `sword-${orcid}.${extension}`;
document.body.appendChild(anchor); document.body.appendChild(anchor);
anchor.click(); anchor.click();
anchor.remove(); anchor.remove();
@@ -220,6 +226,8 @@ export function DashboardPage() {
selectedCount={selectedIds.size} selectedCount={selectedIds.size}
isAuthenticated={isAuthenticated} isAuthenticated={isAuthenticated}
newPublicationsCount={newPublicationIds.length} newPublicationsCount={newPublicationIds.length}
swordProfile={swordProfile}
onSwordProfileChange={setSwordProfile}
/> />
</> </>
} }
+37 -7
View File
@@ -14,6 +14,8 @@ import {
UsersIcon, UsersIcon,
} from "../components/ui/Icons"; } from "../components/ui/Icons";
import { downloadExport, searchResearchersBulk } from "../services/api"; import { downloadExport, searchResearchersBulk } from "../services/api";
import { DEFAULT_EXPORT_PROFILE, swordXmlFilename } from "../utils/exportProfiles";
import { SwordProfileSelect } from "../components/dashboard/SwordProfileSelect";
import { useAuth } from "../contexts/AuthContext"; import { useAuth } from "../contexts/AuthContext";
/** /**
@@ -34,6 +36,7 @@ export function GroupResultsPage() {
const [errors, setErrors] = useState([]); const [errors, setErrors] = useState([]);
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [globalExporting, setGlobalExporting] = useState(null); // format | null const [globalExporting, setGlobalExporting] = useState(null); // format | null
const [swordProfile, setSwordProfile] = useState(DEFAULT_EXPORT_PROFILE);
// Track per-researcher export state (format | null) // Track per-researcher export state (format | null)
const [cardExporting, setCardExporting] = useState({}); const [cardExporting, setCardExporting] = useState({});
@@ -99,7 +102,7 @@ export function GroupResultsPage() {
[results], [results],
); );
async function handleGlobalExport(format) { async function handleGlobalExport(format, profile = DEFAULT_EXPORT_PROFILE) {
const ids = isAuthenticated ? allNewIds : allIds; const ids = isAuthenticated ? allNewIds : allIds;
if (ids.length === 0) { if (ids.length === 0) {
toast.info( toast.info(
@@ -116,12 +119,16 @@ export function GroupResultsPage() {
// since the endpoint is POST /export/{format}/publications (no orcid needed) // since the endpoint is POST /export/{format}/publications (no orcid needed)
const { blob } = await downloadExport(null, format, { const { blob } = await downloadExport(null, format, {
publicationIds: ids, publicationIds: ids,
profile: format === "xml" ? profile : undefined,
}); });
if (blob) { if (blob) {
const objectUrl = URL.createObjectURL(blob); const objectUrl = URL.createObjectURL(blob);
const anchor = document.createElement("a"); const anchor = document.createElement("a");
anchor.href = objectUrl; anchor.href = objectUrl;
anchor.download = `sword-group.${format === "xml" ? "xml" : format}`; anchor.download =
format === "xml"
? swordXmlFilename("group", profile)
: `sword-group.${format}`;
document.body.appendChild(anchor); document.body.appendChild(anchor);
anchor.click(); anchor.click();
anchor.remove(); anchor.remove();
@@ -139,7 +146,13 @@ export function GroupResultsPage() {
} }
} }
async function handleCardExport(orcidId, format, newIds, totalIds) { async function handleCardExport(
orcidId,
format,
newIds,
totalIds,
profile = DEFAULT_EXPORT_PROFILE,
) {
const ids = isAuthenticated ? newIds : totalIds; const ids = isAuthenticated ? newIds : totalIds;
if (ids.length === 0) { if (ids.length === 0) {
toast.info("No hay publicaciones para exportar"); toast.info("No hay publicaciones para exportar");
@@ -149,12 +162,16 @@ export function GroupResultsPage() {
try { try {
const { blob } = await downloadExport(orcidId, format, { const { blob } = await downloadExport(orcidId, format, {
publicationIds: ids, publicationIds: ids,
profile: format === "xml" ? profile : undefined,
}); });
if (blob) { if (blob) {
const objectUrl = URL.createObjectURL(blob); const objectUrl = URL.createObjectURL(blob);
const anchor = document.createElement("a"); const anchor = document.createElement("a");
anchor.href = objectUrl; anchor.href = objectUrl;
anchor.download = `sword-${orcidId}.${format === "xml" ? "xml" : format}`; anchor.download =
format === "xml"
? swordXmlFilename(orcidId, profile)
: `sword-${orcidId}.${format}`;
document.body.appendChild(anchor); document.body.appendChild(anchor);
anchor.click(); anchor.click();
anchor.remove(); anchor.remove();
@@ -216,12 +233,17 @@ export function GroupResultsPage() {
{/* Global export buttons */} {/* Global export buttons */}
{!loading && results.length > 0 && ( {!loading && results.length > 0 && (
<div className="flex gap-2"> <div className="flex flex-wrap items-center justify-end gap-2">
<SwordProfileSelect
id="group-sword-profile"
value={swordProfile}
onChange={setSwordProfile}
/>
{["xml", "zip"].map((fmt) => ( {["xml", "zip"].map((fmt) => (
<button <button
key={fmt} key={fmt}
type="button" type="button"
onClick={() => handleGlobalExport(fmt)} onClick={() => handleGlobalExport(fmt, swordProfile)}
disabled={globalDisabled} disabled={globalDisabled}
className="inline-flex items-center gap-2 rounded-lg border border-surface-border-strong bg-surface-primary px-4 py-2 text-sm font-medium text-ink-primary transition-colors enabled:hover:bg-surface-secondary disabled:cursor-not-allowed disabled:opacity-60" className="inline-flex items-center gap-2 rounded-lg border border-surface-border-strong bg-surface-primary px-4 py-2 text-sm font-medium text-ink-primary transition-colors enabled:hover:bg-surface-secondary disabled:cursor-not-allowed disabled:opacity-60"
> >
@@ -269,8 +291,10 @@ export function GroupResultsPage() {
fmt, fmt,
newIds, newIds,
totalIds, totalIds,
swordProfile,
) )
} }
swordProfile={swordProfile}
/> />
))} ))}
</div> </div>
@@ -326,7 +350,13 @@ export function GroupResultsPage() {
/* ─────────────────────────── Researcher card ─────────────────────────── */ /* ─────────────────────────── Researcher card ─────────────────────────── */
function ResearcherResultCard({ bundle, isAuthenticated, exporting, onExport }) { function ResearcherResultCard({
bundle,
isAuthenticated,
exporting,
onExport,
swordProfile,
}) {
const researcher = bundle.researcher ?? {}; const researcher = bundle.researcher ?? {};
const publications = bundle.publications ?? []; const publications = bundle.publications ?? [];
const totalRecords = bundle.totalRecords ?? publications.length; const totalRecords = bundle.totalRecords ?? publications.length;
+9 -2
View File
@@ -402,13 +402,15 @@ export function getExportUrl(orcidId, format) {
* `["id1", "id2", ...]` (array crudo, tal como espera el backend). * `["id1", "id2", ...]` (array crudo, tal como espera el backend).
* - Si viene vacío/undefined usamos el endpoint masivo * - Si viene vacío/undefined usamos el endpoint masivo
* `GET /export/{sword|zip}/researcher/{orcid_id}` y descargamos todo. * `GET /export/{sword|zip}/researcher/{orcid_id}` y descargamos todo.
* - Para SWORD XML, `profile` añade `?profile=dublin_core|dspace|eprints`
* (genérico = sin query).
* *
* Lanza `ApiError` en fallo. * Lanza `ApiError` en fallo.
*/ */
export async function downloadExport( export async function downloadExport(
orcidId, orcidId,
format, format,
{ signal, publicationIds } = {}, { signal, publicationIds, profile } = {},
) { ) {
if (USE_MOCKS) { if (USE_MOCKS) {
await mockExport(format); await mockExport(format);
@@ -421,10 +423,15 @@ export async function downloadExport(
? publicationIds ? publicationIds
: null; : null;
const url = ids let url = ids
? `${BASE_URL}/export/${segment}/publications` ? `${BASE_URL}/export/${segment}/publications`
: `${BASE_URL}/export/${segment}/researcher/${encodeURIComponent(orcidId)}`; : `${BASE_URL}/export/${segment}/researcher/${encodeURIComponent(orcidId)}`;
if (format === "xml" && profile && profile !== "generic") {
const separator = url.includes("?") ? "&" : "?";
url += `${separator}profile=${encodeURIComponent(profile)}`;
}
const init = { const init = {
method: ids ? "POST" : "GET", method: ids ? "POST" : "GET",
signal, signal,
+15
View File
@@ -0,0 +1,15 @@
/** Perfiles de exportación SWORD XML (query `profile` en el backend). */
export const EXPORT_PROFILE_OPTIONS = [
{ value: "generic", label: "Genérico (ORCID)" },
{ value: "dublin_core", label: "Dublin Core" },
{ value: "dspace", label: "DSpace" },
{ value: "eprints", label: "EPrints" },
];
export const DEFAULT_EXPORT_PROFILE = "generic";
export function swordXmlFilename(baseName, profile = DEFAULT_EXPORT_PROFILE) {
const suffix =
profile && profile !== DEFAULT_EXPORT_PROFILE ? `-${profile}` : "";
return `sword${suffix}-${baseName}.xml`;
}