feat(export): perfiles DSpace/EPrints/Dublin Core y selector SWORD en UI
Backend: generadores por repositorio, ZIP multi-formato y query profile en /export/sword. Frontend: selector Destino que envia profile al descargar SWORD XML.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from typing import Iterable, List
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Request
|
||||
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query, Request
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -12,7 +12,7 @@ from app.db.session import get_db
|
||||
from app.security.export_auth import require_export_access
|
||||
from app.services.orcid_client import get_display_name
|
||||
from app.services.publication_enrichment import enrich_publications_from_orcid
|
||||
from app.services.sword_generator import SWORDGenerator
|
||||
from app.services.repository_export import EXPORT_PROFILES, generate_repository_xml
|
||||
from app.services.zip_generator import ZIPGenerator
|
||||
from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid
|
||||
|
||||
@@ -96,6 +96,18 @@ def _raise_clear_error_if_researcher_id_was_used(db: Session, pub_ids: List[UUID
|
||||
)
|
||||
|
||||
|
||||
def _export_xml_response(
|
||||
researcher: Researcher,
|
||||
pubs: List[Publication],
|
||||
profile: str,
|
||||
) -> Response:
|
||||
try:
|
||||
xml_bytes = generate_repository_xml(researcher, pubs, profile)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
return Response(content=xml_bytes, media_type="application/xml")
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# ENDPOINT 1: SWORD múltiples publicaciones
|
||||
# ---------------------------------------------------------
|
||||
@@ -105,6 +117,13 @@ def _raise_clear_error_if_researcher_id_was_used(db: Session, pub_ids: List[UUID
|
||||
async def export_multiple_sword(
|
||||
request: Request,
|
||||
pub_ids: List[UUID] = Body(..., min_length=1, max_length=settings.MAX_PUB_IDS_BATCH),
|
||||
profile: str = Query(
|
||||
"generic",
|
||||
description=(
|
||||
"Perfil de repositorio: generic (Atom ORCID), dublin_core, dspace, eprints. "
|
||||
f"Valores: {', '.join(EXPORT_PROFILES)}"
|
||||
),
|
||||
),
|
||||
db: Session = Depends(get_db),
|
||||
current: Researcher | None = Depends(require_export_access),
|
||||
):
|
||||
@@ -121,11 +140,11 @@ async def export_multiple_sword(
|
||||
|
||||
_prepare_researcher_and_publications_for_export(db, researcher, pubs)
|
||||
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
|
||||
response = _export_xml_response(researcher, pubs, profile)
|
||||
if current:
|
||||
_record_downloads(db, current, pubs)
|
||||
|
||||
return Response(content=xml_bytes, media_type="application/xml")
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
@@ -137,6 +156,13 @@ async def export_multiple_sword(
|
||||
async def export_researcher_sword(
|
||||
request: Request,
|
||||
orcid_id: str = Path(min_length=19, max_length=19, pattern=ORCID_PATTERN),
|
||||
profile: str = Query(
|
||||
"generic",
|
||||
description=(
|
||||
"Perfil de repositorio: generic (Atom ORCID), dublin_core, dspace, eprints. "
|
||||
f"Valores: {', '.join(EXPORT_PROFILES)}"
|
||||
),
|
||||
),
|
||||
db: Session = Depends(get_db),
|
||||
current: Researcher | None = Depends(require_export_access),
|
||||
):
|
||||
@@ -153,11 +179,11 @@ async def export_researcher_sword(
|
||||
|
||||
_prepare_researcher_and_publications_for_export(db, researcher, pubs)
|
||||
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, pubs)
|
||||
response = _export_xml_response(researcher, pubs, profile)
|
||||
if current:
|
||||
_record_downloads(db, current, pubs)
|
||||
|
||||
return Response(content=xml_bytes, media_type="application/xml")
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,365 @@
|
||||
"""
|
||||
Exportadores orientados a repositorios: Dublin Core, DSpace y EPrints.
|
||||
|
||||
Perfiles soportados (query `profile` en /export/sword/...):
|
||||
- generic → feed Atom ORCID (compatibilidad hacia atrás)
|
||||
- dublin_core → XML con un registro DC por publicación
|
||||
- dspace → feed Atom con metadatos DSpace / Dublin Core ampliado
|
||||
- eprints → XML de importación EPrints (EP3)
|
||||
|
||||
El ZIP incluye todos los perfiles bajo `formats/` más SAF DSpace en `dspace-saf/`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
from datetime import datetime
|
||||
from typing import Iterable, List
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
from app.db.models import Publication, Researcher
|
||||
|
||||
ATOM_NS = "http://www.w3.org/2005/Atom"
|
||||
DC_NS = "http://purl.org/dc/elements/1.1/"
|
||||
DCTERMS_NS = "http://purl.org/dc/terms/"
|
||||
SWORD_NS = "http://purl.org/net/sword/"
|
||||
EPRINTS_NS = "http://eprints.org/ep3/data"
|
||||
|
||||
EXPORT_PROFILES = ("generic", "dublin_core", "dspace", "eprints")
|
||||
|
||||
_DSPACE_TYPE_MAP = {
|
||||
"journal-article": "Article",
|
||||
"book-chapter": "Book chapter",
|
||||
"book": "Book",
|
||||
"conference-paper": "Conference paper",
|
||||
"conference-abstract": "Conference paper",
|
||||
"dissertation-thesis": "Thesis",
|
||||
"report": "Report",
|
||||
"preprint": "Preprint",
|
||||
"other": "Other",
|
||||
}
|
||||
|
||||
_EPRINTS_TYPE_MAP = {
|
||||
"journal-article": "article",
|
||||
"book-chapter": "book_section",
|
||||
"book": "book",
|
||||
"conference-paper": "conference_item",
|
||||
"conference-abstract": "conference_item",
|
||||
"dissertation-thesis": "thesis",
|
||||
"report": "report",
|
||||
"preprint": "preprint",
|
||||
"other": "other",
|
||||
}
|
||||
|
||||
|
||||
def normalize_profile(profile: str | None) -> str:
|
||||
value = (profile or "generic").strip().lower()
|
||||
if value not in EXPORT_PROFILES:
|
||||
raise ValueError(
|
||||
f"Invalid export profile {profile!r}. "
|
||||
f"Use one of: {', '.join(EXPORT_PROFILES)}"
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
def publication_date_iso(pub: Publication) -> str | None:
|
||||
if not pub.pub_year:
|
||||
return None
|
||||
date_str = str(pub.pub_year)
|
||||
if pub.pub_month:
|
||||
date_str += f"-{pub.pub_month:02d}"
|
||||
if pub.pub_day:
|
||||
date_str += f"-{pub.pub_day:02d}"
|
||||
return date_str
|
||||
|
||||
|
||||
def contributor_names(pub: Publication) -> List[str]:
|
||||
names: List[str] = []
|
||||
for item in pub.contributors or []:
|
||||
name = (item or {}).get("name")
|
||||
if name:
|
||||
names.append(str(name))
|
||||
return names
|
||||
|
||||
|
||||
def split_person_name(full_name: str) -> tuple[str, str]:
|
||||
parts = full_name.strip().split()
|
||||
if len(parts) <= 1:
|
||||
return full_name, ""
|
||||
return parts[-1], " ".join(parts[:-1])
|
||||
|
||||
|
||||
def dspace_type(pub: Publication) -> str:
|
||||
if not pub.type:
|
||||
return "Other"
|
||||
return _DSPACE_TYPE_MAP.get(pub.type, pub.type.replace("-", " ").title())
|
||||
|
||||
|
||||
def eprints_type(pub: Publication) -> str:
|
||||
if not pub.type:
|
||||
return "other"
|
||||
return _EPRINTS_TYPE_MAP.get(pub.type, "other")
|
||||
|
||||
|
||||
def _safe_text(value) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
if text.lower() == "none":
|
||||
return None
|
||||
return text or None
|
||||
|
||||
|
||||
def _append_dc(parent: Element, tag: str, text: str | None) -> None:
|
||||
if text:
|
||||
SubElement(parent, tag).text = text
|
||||
|
||||
|
||||
def generate_dublin_core_records_xml(
|
||||
researcher: Researcher,
|
||||
publications: Iterable[Publication],
|
||||
) -> bytes:
|
||||
root = Element("dublinCoreRecords", {
|
||||
"researcherOrcid": researcher.orcid_id,
|
||||
"researcherName": researcher.name or "",
|
||||
})
|
||||
|
||||
for pub in publications:
|
||||
record = SubElement(root, "record", {
|
||||
"id": str(pub.id),
|
||||
"putCode": str(pub.put_code),
|
||||
})
|
||||
_append_dc(record, f"{{{DC_NS}}}title", pub.title)
|
||||
_append_dc(record, f"{{{DC_NS}}}type", pub.type)
|
||||
_append_dc(record, f"{{{DC_NS}}}source", pub.journal)
|
||||
_append_dc(record, f"{{{DC_NS}}}language", pub.language_code)
|
||||
_append_dc(record, f"{{{DC_NS}}}coverage", pub.country)
|
||||
_append_dc(record, f"{{{DC_NS}}}description", pub.short_description or pub.subtitle)
|
||||
_append_dc(record, f"{{{DC_NS}}}bibliographicCitation", pub.citation_value)
|
||||
if pub.doi:
|
||||
_append_dc(record, f"{{{DC_NS}}}identifier", f"doi:{pub.doi}")
|
||||
if pub.url:
|
||||
_append_dc(record, f"{{{DC_NS}}}relation", pub.url)
|
||||
date_iso = publication_date_iso(pub)
|
||||
if date_iso:
|
||||
_append_dc(record, f"{{{DC_NS}}}date", date_iso)
|
||||
for name in contributor_names(pub):
|
||||
_append_dc(record, f"{{{DC_NS}}}creator", name)
|
||||
if researcher.orcid_id:
|
||||
_append_dc(record, f"{{{DC_NS}}}provenance", f"orcid:{researcher.orcid_id}")
|
||||
|
||||
return tostring(root, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
|
||||
def generate_dspace_item_dublin_core(pub: Publication) -> bytes:
|
||||
root = Element("dublin_core")
|
||||
|
||||
def dcvalue(element: str, qualifier: str, value: str | None) -> None:
|
||||
if value:
|
||||
SubElement(root, "dcvalue", element=element, qualifier=qualifier).text = value
|
||||
|
||||
dcvalue("title", "none", pub.title)
|
||||
dcvalue("type", "none", dspace_type(pub))
|
||||
dcvalue("source", "none", pub.journal)
|
||||
dcvalue("language", "iso", pub.language_code)
|
||||
dcvalue("coverage", "spatial", pub.country)
|
||||
dcvalue("description", "abstract", pub.short_description or pub.subtitle)
|
||||
dcvalue("description", "none", pub.citation_value)
|
||||
date_iso = publication_date_iso(pub)
|
||||
if date_iso:
|
||||
dcvalue("date", "issued", date_iso)
|
||||
if pub.doi:
|
||||
dcvalue("identifier", "doi", pub.doi)
|
||||
dcvalue("identifier", "uri", f"https://doi.org/{pub.doi}")
|
||||
elif pub.url:
|
||||
dcvalue("identifier", "uri", pub.url)
|
||||
if pub.url:
|
||||
dcvalue("relation", "uri", pub.url)
|
||||
for name in contributor_names(pub):
|
||||
dcvalue("contributor", "author", name)
|
||||
|
||||
return tostring(root, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
|
||||
def generate_dspace_import_csv(
|
||||
researcher: Researcher,
|
||||
publications: Iterable[Publication],
|
||||
) -> str:
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, quoting=csv.QUOTE_ALL, lineterminator="\n")
|
||||
writer.writerow([
|
||||
"row_id",
|
||||
"collection",
|
||||
"dc.title",
|
||||
"dc.contributor.author",
|
||||
"dc.date.issued",
|
||||
"dc.description",
|
||||
"dc.identifier.doi",
|
||||
"dc.identifier.uri",
|
||||
"dc.language.iso",
|
||||
"dc.publisher",
|
||||
"dc.relation.ispartof",
|
||||
"dc.source",
|
||||
"dc.type",
|
||||
"dc.provenance",
|
||||
])
|
||||
|
||||
for index, pub in enumerate(publications, start=1):
|
||||
authors = "; ".join(contributor_names(pub)) or (researcher.name or "")
|
||||
writer.writerow([
|
||||
index,
|
||||
"",
|
||||
pub.title or "",
|
||||
authors,
|
||||
publication_date_iso(pub) or "",
|
||||
pub.short_description or pub.citation_value or "",
|
||||
pub.doi or "",
|
||||
pub.url or (f"https://doi.org/{pub.doi}" if pub.doi else ""),
|
||||
pub.language_code or "",
|
||||
"",
|
||||
pub.journal or "",
|
||||
pub.journal or "",
|
||||
dspace_type(pub),
|
||||
f"orcid:{researcher.orcid_id}",
|
||||
])
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
|
||||
def generate_dspace_sword_feed_xml(
|
||||
researcher: Researcher,
|
||||
publications: Iterable[Publication],
|
||||
) -> bytes:
|
||||
"""
|
||||
Feed Atom orientado a ingest DSpace (metadatos DC/dcterms por entry).
|
||||
No sustituye un depósito SWORD 2.0 con bitstreams, pero alinea campos DC.
|
||||
"""
|
||||
feed = Element("feed", {
|
||||
"xmlns": ATOM_NS,
|
||||
"xmlns:dc": DC_NS,
|
||||
"xmlns:dcterms": DCTERMS_NS,
|
||||
"xmlns:sword": SWORD_NS,
|
||||
})
|
||||
|
||||
SubElement(feed, "title").text = f"DSpace export for {researcher.orcid_id}"
|
||||
SubElement(feed, "id").text = f"urn:uuid:{researcher.id}"
|
||||
SubElement(feed, "updated").text = datetime.utcnow().isoformat() + "Z"
|
||||
author = SubElement(feed, "author")
|
||||
SubElement(author, "name").text = researcher.name or researcher.orcid_id
|
||||
|
||||
for pub in publications:
|
||||
entry = SubElement(feed, "entry")
|
||||
SubElement(entry, "title").text = pub.title or "Untitled"
|
||||
SubElement(entry, "id").text = f"urn:uuid:{pub.id}"
|
||||
SubElement(entry, "updated").text = datetime.utcnow().isoformat() + "Z"
|
||||
SubElement(entry, f"{{{SWORD_NS}}}deposit").text = "true"
|
||||
SubElement(entry, f"{{{SWORD_NS}}}noOp").text = "false"
|
||||
|
||||
category = SubElement(entry, "category")
|
||||
category.set("term", dspace_type(pub))
|
||||
category.set("scheme", "http://dspace.org/itemtypes")
|
||||
|
||||
if pub.title:
|
||||
SubElement(entry, f"{{{DC_NS}}}title").text = pub.title
|
||||
if pub.journal:
|
||||
SubElement(entry, f"{{{DC_NS}}}source").text = pub.journal
|
||||
if pub.doi:
|
||||
SubElement(entry, f"{{{DC_NS}}}identifier").text = f"doi:{pub.doi}"
|
||||
if pub.url:
|
||||
SubElement(entry, f"{{{DCTERMS_NS}}}relation").text = pub.url
|
||||
if pub.short_description:
|
||||
SubElement(entry, f"{{{DCTERMS_NS}}}abstract").text = pub.short_description
|
||||
if pub.citation_value:
|
||||
SubElement(entry, f"{{{DCTERMS_NS}}}bibliographicCitation").text = pub.citation_value
|
||||
if pub.language_code:
|
||||
SubElement(entry, f"{{{DC_NS}}}language").text = pub.language_code
|
||||
date_iso = publication_date_iso(pub)
|
||||
if date_iso:
|
||||
SubElement(entry, f"{{{DCTERMS_NS}}}issued").text = date_iso
|
||||
SubElement(entry, f"{{{DC_NS}}}type").text = dspace_type(pub)
|
||||
for name in contributor_names(pub):
|
||||
author_el = SubElement(entry, "author")
|
||||
SubElement(author_el, "name").text = name
|
||||
|
||||
return tostring(feed, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
|
||||
def generate_eprints_import_xml(
|
||||
researcher: Researcher,
|
||||
publications: Iterable[Publication],
|
||||
) -> bytes:
|
||||
root = Element("eprints", xmlns=EPRINTS_NS)
|
||||
today = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
|
||||
for index, pub in enumerate(publications, start=1):
|
||||
eprint = SubElement(root, "eprint")
|
||||
SubElement(eprint, "eprintid").text = str(index)
|
||||
SubElement(eprint, "rev_number").text = "1"
|
||||
SubElement(eprint, "documents")
|
||||
SubElement(eprint, "eprint_status").text = "archive"
|
||||
SubElement(eprint, "userid").text = "1"
|
||||
SubElement(eprint, "dir").text = f"disk00000/00/00/{index:02d}"
|
||||
SubElement(eprint, "datestamp").text = today
|
||||
SubElement(eprint, "lastmod").text = today
|
||||
SubElement(eprint, "status_changed").text = today
|
||||
SubElement(eprint, "type").text = eprints_type(pub)
|
||||
|
||||
titles = SubElement(eprint, "titles")
|
||||
title_item = SubElement(titles, "item")
|
||||
SubElement(title_item, "lang").text = pub.language_code or "en"
|
||||
SubElement(title_item, "title").text = pub.title or "Untitled"
|
||||
|
||||
creators = SubElement(eprint, "creators")
|
||||
names = contributor_names(pub) or ([researcher.name] if researcher.name else [])
|
||||
for name in names:
|
||||
family, given = split_person_name(name)
|
||||
item = SubElement(creators, "item")
|
||||
name_el = SubElement(item, "name")
|
||||
SubElement(name_el, "family").text = family
|
||||
if given:
|
||||
SubElement(name_el, "given").text = given
|
||||
|
||||
if pub.pub_year:
|
||||
SubElement(eprint, "date").text = str(pub.pub_year)
|
||||
if pub.journal:
|
||||
SubElement(eprint, "publication").text = pub.journal
|
||||
if pub.doi:
|
||||
SubElement(eprint, "doi").text = pub.doi
|
||||
if pub.url:
|
||||
SubElement(eprint, "official_url").text = pub.url
|
||||
if pub.short_description:
|
||||
SubElement(eprint, "abstract").text = pub.short_description
|
||||
if pub.citation_value:
|
||||
SubElement(eprint, "note").text = pub.citation_value
|
||||
if researcher.orcid_id:
|
||||
SubElement(eprint, "note").text = f"Source ORCID: {researcher.orcid_id}"
|
||||
|
||||
return tostring(root, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
|
||||
def generate_repository_xml(
|
||||
researcher: Researcher,
|
||||
publications: List[Publication],
|
||||
profile: str,
|
||||
) -> bytes:
|
||||
profile = normalize_profile(profile)
|
||||
if profile == "dublin_core":
|
||||
return generate_dublin_core_records_xml(researcher, publications)
|
||||
if profile == "dspace":
|
||||
return generate_dspace_sword_feed_xml(researcher, publications)
|
||||
if profile == "eprints":
|
||||
return generate_eprints_import_xml(researcher, publications)
|
||||
from app.services.sword_generator import SWORDGenerator
|
||||
|
||||
return SWORDGenerator.generate_feed_xml(researcher, publications)
|
||||
|
||||
|
||||
def export_filename_for_profile(profile: str) -> str:
|
||||
profile = normalize_profile(profile)
|
||||
return {
|
||||
"generic": "generic-atom.xml",
|
||||
"dublin_core": "dublin_core.xml",
|
||||
"dspace": "dspace-atom.xml",
|
||||
"eprints": "eprints-import.xml",
|
||||
}[profile]
|
||||
@@ -1,33 +1,52 @@
|
||||
import io
|
||||
import zipfile
|
||||
import json
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
from app.db.models import Publication, Researcher
|
||||
from app.services.repository_export import (
|
||||
export_filename_for_profile,
|
||||
generate_dspace_import_csv,
|
||||
generate_dspace_item_dublin_core,
|
||||
generate_dublin_core_records_xml,
|
||||
generate_dspace_sword_feed_xml,
|
||||
generate_eprints_import_xml,
|
||||
)
|
||||
from app.services.sword_generator import SWORDGenerator
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Clase de generador de ZIP
|
||||
# ---------------------------------------------------------
|
||||
|
||||
class ZIPGenerator:
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Función auxiliar: generar manifest.txt
|
||||
# ---------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def generate_manifest(researcher, publications):
|
||||
lines = [
|
||||
"SWORD Deposit Package",
|
||||
"----------------------",
|
||||
"ORCID Export Package",
|
||||
"--------------------",
|
||||
f"Researcher ORCID: {researcher.orcid_id}",
|
||||
f"Researcher Name: {researcher.name}",
|
||||
f"Researcher UUID: {researcher.id}",
|
||||
f"Total Publications: {len(publications)}",
|
||||
f"Generated At: {datetime.utcnow().isoformat()}Z",
|
||||
"",
|
||||
"Files:",
|
||||
"- sword.xml → Atom genérico ORCID (compatibilidad)",
|
||||
"- formats/generic-atom.xml",
|
||||
"- formats/dublin_core.xml → Dublin Core (un registro por obra)",
|
||||
"- formats/dspace-atom.xml → Atom con metadatos DSpace",
|
||||
"- formats/dspace-import.csv → Importación batch CSV DSpace",
|
||||
"- formats/eprints-import.xml → Importación XML EPrints",
|
||||
"- dspace-saf/item_NNNNN/dublin_core.xml → Simple Archive Format (DSpace)",
|
||||
"- metadata.json → Metadatos completos (JSON)",
|
||||
"- mets.xml → METS simplificado (legacy)",
|
||||
"",
|
||||
"Repository hints:",
|
||||
"- DSpace: use dspace-saf/ (SAF) or formats/dspace-import.csv",
|
||||
"- EPrints: import formats/eprints-import.xml via admin tools",
|
||||
"- Dublin Core: use formats/dublin_core.xml",
|
||||
"",
|
||||
"SWORD endpoint profile query:",
|
||||
" ?profile=generic|dublin_core|dspace|eprints",
|
||||
"",
|
||||
"Publications:",
|
||||
]
|
||||
|
||||
@@ -39,9 +58,6 @@ class ZIPGenerator:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# METADATA.JSON — ahora con TODOS los campos
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_metadata_json(researcher, publications):
|
||||
data = {
|
||||
@@ -82,11 +98,10 @@ class ZIPGenerator:
|
||||
|
||||
return json.dumps(data, indent=4)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# METS.XML — ampliado con más metadatos
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_mets_xml(researcher, publications):
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
|
||||
|
||||
header = SubElement(mets, "metsHdr")
|
||||
@@ -98,42 +113,26 @@ class ZIPGenerator:
|
||||
xml_data = SubElement(md_wrap, "xmlData")
|
||||
|
||||
for pub in publications:
|
||||
# Title
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
|
||||
|
||||
# Subtitle
|
||||
if pub.subtitle:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
|
||||
|
||||
# DOI
|
||||
if pub.doi:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
|
||||
|
||||
# Journal
|
||||
if pub.journal:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
|
||||
|
||||
# URL
|
||||
if pub.url:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
|
||||
|
||||
# Description
|
||||
if pub.short_description:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
|
||||
|
||||
# Citation
|
||||
if pub.citation_value:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value
|
||||
|
||||
# Language
|
||||
SubElement(
|
||||
xml_data,
|
||||
"{http://purl.org/dc/elements/1.1/}bibliographicCitation",
|
||||
).text = pub.citation_value
|
||||
if pub.language_code:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
|
||||
|
||||
# Country
|
||||
if pub.country:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
|
||||
|
||||
# Date
|
||||
if pub.pub_year:
|
||||
date_str = str(pub.pub_year)
|
||||
if pub.pub_month:
|
||||
@@ -141,29 +140,51 @@ class ZIPGenerator:
|
||||
if pub.pub_day:
|
||||
date_str += f"-{pub.pub_day:02d}"
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
|
||||
|
||||
# Type
|
||||
if pub.type:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
|
||||
|
||||
return tostring(mets, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# ZIP FINAL
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_zip(researcher, publications):
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications)
|
||||
generic_xml = SWORDGenerator.generate_feed_xml(researcher, publications)
|
||||
manifest = ZIPGenerator.generate_manifest(researcher, publications)
|
||||
metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
|
||||
mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
|
||||
|
||||
format_files = {
|
||||
f"formats/{export_filename_for_profile('generic')}": generic_xml,
|
||||
f"formats/{export_filename_for_profile('dublin_core')}": generate_dublin_core_records_xml(
|
||||
researcher, publications
|
||||
),
|
||||
f"formats/{export_filename_for_profile('dspace')}": generate_dspace_sword_feed_xml(
|
||||
researcher, publications
|
||||
),
|
||||
f"formats/{export_filename_for_profile('eprints')}": generate_eprints_import_xml(
|
||||
researcher, publications
|
||||
),
|
||||
}
|
||||
|
||||
mem_file = io.BytesIO()
|
||||
with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("sword.xml", xml_bytes)
|
||||
zf.writestr("sword.xml", generic_xml)
|
||||
zf.writestr("manifest.txt", manifest)
|
||||
zf.writestr("metadata.json", metadata_json)
|
||||
zf.writestr("mets.xml", mets_xml)
|
||||
zf.writestr(
|
||||
"formats/dspace-import.csv",
|
||||
generate_dspace_import_csv(researcher, publications),
|
||||
)
|
||||
|
||||
for path, content in format_files.items():
|
||||
zf.writestr(path, content)
|
||||
|
||||
for index, pub in enumerate(publications, start=1):
|
||||
item_dir = f"dspace-saf/item_{index:05d}"
|
||||
zf.writestr(
|
||||
f"{item_dir}/dublin_core.xml",
|
||||
generate_dspace_item_dublin_core(pub),
|
||||
)
|
||||
|
||||
mem_file.seek(0)
|
||||
return mem_file.read()
|
||||
|
||||
@@ -7,13 +7,15 @@ import {
|
||||
SparkleIcon,
|
||||
} from "../ui/Icons";
|
||||
import { Spinner } from "../ui/Spinner";
|
||||
import { SwordProfileSelect } from "./SwordProfileSelect";
|
||||
import { DEFAULT_EXPORT_PROFILE } from "../../utils/exportProfiles";
|
||||
|
||||
const FORMATS = [
|
||||
{
|
||||
format: "xml",
|
||||
icon: <DocumentIcon size={20} className="shrink-0 text-ink-secondary" />,
|
||||
label: "SWORD XML",
|
||||
desc: "Metadatos en formato Atom",
|
||||
desc: "Según destino seleccionado",
|
||||
},
|
||||
{
|
||||
format: "zip",
|
||||
@@ -31,6 +33,8 @@ const FORMATS = [
|
||||
* - `newPublicationsCount` → cuántas publicaciones tiene downloaded_by_me=false.
|
||||
* - `selectedCount` → publicaciones seleccionadas manualmente.
|
||||
* - `exportingFormat` → formato en curso (pone el botón en loading).
|
||||
* - `swordProfile` → perfil SWORD (dublin_core, dspace, eprints…).
|
||||
* - `onSwordProfileChange` → callback al cambiar destino.
|
||||
*/
|
||||
export function ExportDropdown({
|
||||
onExport,
|
||||
@@ -38,6 +42,8 @@ export function ExportDropdown({
|
||||
selectedCount = 0,
|
||||
isAuthenticated = false,
|
||||
newPublicationsCount = 0,
|
||||
swordProfile = DEFAULT_EXPORT_PROFILE,
|
||||
onSwordProfileChange,
|
||||
}) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const rootRef = useRef(null);
|
||||
@@ -57,7 +63,7 @@ export function ExportDropdown({
|
||||
|
||||
function handlePick(format) {
|
||||
setOpen(false);
|
||||
onExport(format);
|
||||
onExport(format, format === "xml" ? swordProfile : undefined);
|
||||
}
|
||||
|
||||
// Label logic:
|
||||
@@ -80,6 +86,13 @@ export function ExportDropdown({
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-wrap items-center justify-end gap-2">
|
||||
<SwordProfileSelect
|
||||
id="dashboard-sword-profile"
|
||||
value={swordProfile}
|
||||
onChange={onSwordProfileChange}
|
||||
/>
|
||||
|
||||
<div className="relative" ref={rootRef}>
|
||||
<button
|
||||
type="button"
|
||||
@@ -125,5 +138,6 @@ export function ExportDropdown({
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import {
|
||||
DEFAULT_EXPORT_PROFILE,
|
||||
EXPORT_PROFILE_OPTIONS,
|
||||
} from "../../utils/exportProfiles";
|
||||
|
||||
/**
|
||||
* Selector de destino para exportación SWORD XML (DSpace, EPrints, Dublin Core…).
|
||||
*/
|
||||
export function SwordProfileSelect({
|
||||
value = DEFAULT_EXPORT_PROFILE,
|
||||
onChange,
|
||||
id = "sword-export-profile",
|
||||
className = "",
|
||||
}) {
|
||||
return (
|
||||
<label
|
||||
htmlFor={id}
|
||||
className={`flex items-center gap-2 text-sm ${className}`.trim()}
|
||||
>
|
||||
<span className="whitespace-nowrap text-ink-tertiary">Destino:</span>
|
||||
<select
|
||||
id={id}
|
||||
value={value}
|
||||
onChange={(event) => onChange(event.target.value)}
|
||||
className="min-w-[9.5rem] rounded-lg border border-surface-border-strong bg-surface-primary px-2.5 py-2 text-sm font-medium text-ink-primary transition-colors hover:bg-surface-secondary focus:border-brand-primary focus:outline-none focus:ring-2 focus:ring-brand-primary/20"
|
||||
>
|
||||
{EXPORT_PROFILE_OPTIONS.map(({ value: optionValue, label }) => (
|
||||
<option key={optionValue} value={optionValue}>
|
||||
{label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</label>
|
||||
);
|
||||
}
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
syncResearcher,
|
||||
} from "../services/api";
|
||||
import { isValidOrcid } from "../utils/orcid";
|
||||
import { DEFAULT_EXPORT_PROFILE, swordXmlFilename } from "../utils/exportProfiles";
|
||||
import { useAuth } from "../contexts/AuthContext";
|
||||
|
||||
const SUCCESS_FLASH_MS = 3000;
|
||||
@@ -49,6 +50,7 @@ export function DashboardPage() {
|
||||
|
||||
const [syncStatus, setSyncStatus] = useState("idle"); // idle | loading | success
|
||||
const [exportingFormat, setExportingFormat] = useState(null);
|
||||
const [swordProfile, setSwordProfile] = useState(DEFAULT_EXPORT_PROFILE);
|
||||
|
||||
const [selectedIds, setSelectedIds] = useState(() => new Set());
|
||||
|
||||
@@ -138,7 +140,7 @@ export function DashboardPage() {
|
||||
}
|
||||
}
|
||||
|
||||
async function handleExport(format) {
|
||||
async function handleExport(format, profile = DEFAULT_EXPORT_PROFILE) {
|
||||
setExportingFormat(format);
|
||||
try {
|
||||
let ids;
|
||||
@@ -162,13 +164,17 @@ export function DashboardPage() {
|
||||
|
||||
const { blob } = await downloadExport(orcid, format, {
|
||||
publicationIds: ids,
|
||||
profile: format === "xml" ? profile : undefined,
|
||||
});
|
||||
if (blob) {
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement("a");
|
||||
anchor.href = objectUrl;
|
||||
const extension = format === "xml" ? "xml" : format;
|
||||
anchor.download = `sword-${orcid}.${extension}`;
|
||||
anchor.download =
|
||||
format === "xml"
|
||||
? swordXmlFilename(orcid, profile)
|
||||
: `sword-${orcid}.${extension}`;
|
||||
document.body.appendChild(anchor);
|
||||
anchor.click();
|
||||
anchor.remove();
|
||||
@@ -220,6 +226,8 @@ export function DashboardPage() {
|
||||
selectedCount={selectedIds.size}
|
||||
isAuthenticated={isAuthenticated}
|
||||
newPublicationsCount={newPublicationIds.length}
|
||||
swordProfile={swordProfile}
|
||||
onSwordProfileChange={setSwordProfile}
|
||||
/>
|
||||
</>
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ import {
|
||||
UsersIcon,
|
||||
} from "../components/ui/Icons";
|
||||
import { downloadExport, searchResearchersBulk } from "../services/api";
|
||||
import { DEFAULT_EXPORT_PROFILE, swordXmlFilename } from "../utils/exportProfiles";
|
||||
import { SwordProfileSelect } from "../components/dashboard/SwordProfileSelect";
|
||||
import { useAuth } from "../contexts/AuthContext";
|
||||
|
||||
/**
|
||||
@@ -34,6 +36,7 @@ export function GroupResultsPage() {
|
||||
const [errors, setErrors] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [globalExporting, setGlobalExporting] = useState(null); // format | null
|
||||
const [swordProfile, setSwordProfile] = useState(DEFAULT_EXPORT_PROFILE);
|
||||
|
||||
// Track per-researcher export state (format | null)
|
||||
const [cardExporting, setCardExporting] = useState({});
|
||||
@@ -99,7 +102,7 @@ export function GroupResultsPage() {
|
||||
[results],
|
||||
);
|
||||
|
||||
async function handleGlobalExport(format) {
|
||||
async function handleGlobalExport(format, profile = DEFAULT_EXPORT_PROFILE) {
|
||||
const ids = isAuthenticated ? allNewIds : allIds;
|
||||
if (ids.length === 0) {
|
||||
toast.info(
|
||||
@@ -116,12 +119,16 @@ export function GroupResultsPage() {
|
||||
// since the endpoint is POST /export/{format}/publications (no orcid needed)
|
||||
const { blob } = await downloadExport(null, format, {
|
||||
publicationIds: ids,
|
||||
profile: format === "xml" ? profile : undefined,
|
||||
});
|
||||
if (blob) {
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement("a");
|
||||
anchor.href = objectUrl;
|
||||
anchor.download = `sword-group.${format === "xml" ? "xml" : format}`;
|
||||
anchor.download =
|
||||
format === "xml"
|
||||
? swordXmlFilename("group", profile)
|
||||
: `sword-group.${format}`;
|
||||
document.body.appendChild(anchor);
|
||||
anchor.click();
|
||||
anchor.remove();
|
||||
@@ -139,7 +146,13 @@ export function GroupResultsPage() {
|
||||
}
|
||||
}
|
||||
|
||||
async function handleCardExport(orcidId, format, newIds, totalIds) {
|
||||
async function handleCardExport(
|
||||
orcidId,
|
||||
format,
|
||||
newIds,
|
||||
totalIds,
|
||||
profile = DEFAULT_EXPORT_PROFILE,
|
||||
) {
|
||||
const ids = isAuthenticated ? newIds : totalIds;
|
||||
if (ids.length === 0) {
|
||||
toast.info("No hay publicaciones para exportar");
|
||||
@@ -149,12 +162,16 @@ export function GroupResultsPage() {
|
||||
try {
|
||||
const { blob } = await downloadExport(orcidId, format, {
|
||||
publicationIds: ids,
|
||||
profile: format === "xml" ? profile : undefined,
|
||||
});
|
||||
if (blob) {
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement("a");
|
||||
anchor.href = objectUrl;
|
||||
anchor.download = `sword-${orcidId}.${format === "xml" ? "xml" : format}`;
|
||||
anchor.download =
|
||||
format === "xml"
|
||||
? swordXmlFilename(orcidId, profile)
|
||||
: `sword-${orcidId}.${format}`;
|
||||
document.body.appendChild(anchor);
|
||||
anchor.click();
|
||||
anchor.remove();
|
||||
@@ -216,12 +233,17 @@ export function GroupResultsPage() {
|
||||
|
||||
{/* Global export buttons */}
|
||||
{!loading && results.length > 0 && (
|
||||
<div className="flex gap-2">
|
||||
<div className="flex flex-wrap items-center justify-end gap-2">
|
||||
<SwordProfileSelect
|
||||
id="group-sword-profile"
|
||||
value={swordProfile}
|
||||
onChange={setSwordProfile}
|
||||
/>
|
||||
{["xml", "zip"].map((fmt) => (
|
||||
<button
|
||||
key={fmt}
|
||||
type="button"
|
||||
onClick={() => handleGlobalExport(fmt)}
|
||||
onClick={() => handleGlobalExport(fmt, swordProfile)}
|
||||
disabled={globalDisabled}
|
||||
className="inline-flex items-center gap-2 rounded-lg border border-surface-border-strong bg-surface-primary px-4 py-2 text-sm font-medium text-ink-primary transition-colors enabled:hover:bg-surface-secondary disabled:cursor-not-allowed disabled:opacity-60"
|
||||
>
|
||||
@@ -269,8 +291,10 @@ export function GroupResultsPage() {
|
||||
fmt,
|
||||
newIds,
|
||||
totalIds,
|
||||
swordProfile,
|
||||
)
|
||||
}
|
||||
swordProfile={swordProfile}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
@@ -326,7 +350,13 @@ export function GroupResultsPage() {
|
||||
|
||||
/* ─────────────────────────── Researcher card ─────────────────────────── */
|
||||
|
||||
function ResearcherResultCard({ bundle, isAuthenticated, exporting, onExport }) {
|
||||
function ResearcherResultCard({
|
||||
bundle,
|
||||
isAuthenticated,
|
||||
exporting,
|
||||
onExport,
|
||||
swordProfile,
|
||||
}) {
|
||||
const researcher = bundle.researcher ?? {};
|
||||
const publications = bundle.publications ?? [];
|
||||
const totalRecords = bundle.totalRecords ?? publications.length;
|
||||
|
||||
@@ -402,13 +402,15 @@ export function getExportUrl(orcidId, format) {
|
||||
* `["id1", "id2", ...]` (array crudo, tal como espera el backend).
|
||||
* - Si viene vacío/undefined usamos el endpoint masivo
|
||||
* `GET /export/{sword|zip}/researcher/{orcid_id}` y descargamos todo.
|
||||
* - Para SWORD XML, `profile` añade `?profile=dublin_core|dspace|eprints`
|
||||
* (genérico = sin query).
|
||||
*
|
||||
* Lanza `ApiError` en fallo.
|
||||
*/
|
||||
export async function downloadExport(
|
||||
orcidId,
|
||||
format,
|
||||
{ signal, publicationIds } = {},
|
||||
{ signal, publicationIds, profile } = {},
|
||||
) {
|
||||
if (USE_MOCKS) {
|
||||
await mockExport(format);
|
||||
@@ -421,10 +423,15 @@ export async function downloadExport(
|
||||
? publicationIds
|
||||
: null;
|
||||
|
||||
const url = ids
|
||||
let url = ids
|
||||
? `${BASE_URL}/export/${segment}/publications`
|
||||
: `${BASE_URL}/export/${segment}/researcher/${encodeURIComponent(orcidId)}`;
|
||||
|
||||
if (format === "xml" && profile && profile !== "generic") {
|
||||
const separator = url.includes("?") ? "&" : "?";
|
||||
url += `${separator}profile=${encodeURIComponent(profile)}`;
|
||||
}
|
||||
|
||||
const init = {
|
||||
method: ids ? "POST" : "GET",
|
||||
signal,
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
/** Perfiles de exportación SWORD XML (query `profile` en el backend). */
|
||||
export const EXPORT_PROFILE_OPTIONS = [
|
||||
{ value: "generic", label: "Genérico (ORCID)" },
|
||||
{ value: "dublin_core", label: "Dublin Core" },
|
||||
{ value: "dspace", label: "DSpace" },
|
||||
{ value: "eprints", label: "EPrints" },
|
||||
];
|
||||
|
||||
export const DEFAULT_EXPORT_PROFILE = "generic";
|
||||
|
||||
export function swordXmlFilename(baseName, profile = DEFAULT_EXPORT_PROFILE) {
|
||||
const suffix =
|
||||
profile && profile !== DEFAULT_EXPORT_PROFILE ? `-${profile}` : "";
|
||||
return `sword${suffix}-${baseName}.xml`;
|
||||
}
|
||||
Reference in New Issue
Block a user