feat(export): perfiles DSpace/EPrints/Dublin Core y selector SWORD en UI

Backend: generadores por repositorio, ZIP multi-formato y query profile en /export/sword. Frontend: selector Destino que envia profile al descargar SWORD XML.
This commit is contained in:
Mireya Cueto Garrido
2026-05-20 13:25:35 +02:00
parent 9b596af494
commit aa2e7280dc
9 changed files with 585 additions and 64 deletions
+65 -44
View File
@@ -1,33 +1,52 @@
import io
import zipfile
import json
import zipfile
from datetime import datetime
from xml.etree.ElementTree import Element, SubElement, tostring
from app.db.models import Publication, Researcher
from app.services.repository_export import (
export_filename_for_profile,
generate_dspace_import_csv,
generate_dspace_item_dublin_core,
generate_dublin_core_records_xml,
generate_dspace_sword_feed_xml,
generate_eprints_import_xml,
)
from app.services.sword_generator import SWORDGenerator
# ---------------------------------------------------------
# Clase de generador de ZIP
# ---------------------------------------------------------
class ZIPGenerator:
# ---------------------------------------------------------
# Función auxiliar: generar manifest.txt
# ---------------------------------------------------------
@staticmethod
def generate_manifest(researcher, publications):
lines = [
"SWORD Deposit Package",
"----------------------",
"ORCID Export Package",
"--------------------",
f"Researcher ORCID: {researcher.orcid_id}",
f"Researcher Name: {researcher.name}",
f"Researcher UUID: {researcher.id}",
f"Total Publications: {len(publications)}",
f"Generated At: {datetime.utcnow().isoformat()}Z",
"",
"Files:",
"- sword.xml → Atom genérico ORCID (compatibilidad)",
"- formats/generic-atom.xml",
"- formats/dublin_core.xml → Dublin Core (un registro por obra)",
"- formats/dspace-atom.xml → Atom con metadatos DSpace",
"- formats/dspace-import.csv → Importación batch CSV DSpace",
"- formats/eprints-import.xml → Importación XML EPrints",
"- dspace-saf/item_NNNNN/dublin_core.xml → Simple Archive Format (DSpace)",
"- metadata.json → Metadatos completos (JSON)",
"- mets.xml → METS simplificado (legacy)",
"",
"Repository hints:",
"- DSpace: use dspace-saf/ (SAF) or formats/dspace-import.csv",
"- EPrints: import formats/eprints-import.xml via admin tools",
"- Dublin Core: use formats/dublin_core.xml",
"",
"SWORD endpoint profile query:",
" ?profile=generic|dublin_core|dspace|eprints",
"",
"Publications:",
]
@@ -39,9 +58,6 @@ class ZIPGenerator:
return "\n".join(lines)
# ---------------------------------------------------------
# METADATA.JSON — ahora con TODOS los campos
# ---------------------------------------------------------
@staticmethod
def generate_metadata_json(researcher, publications):
data = {
@@ -82,11 +98,10 @@ class ZIPGenerator:
return json.dumps(data, indent=4)
# ---------------------------------------------------------
# METS.XML — ampliado con más metadatos
# ---------------------------------------------------------
@staticmethod
def generate_mets_xml(researcher, publications):
from xml.etree.ElementTree import Element, SubElement, tostring
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
header = SubElement(mets, "metsHdr")
@@ -98,42 +113,26 @@ class ZIPGenerator:
xml_data = SubElement(md_wrap, "xmlData")
for pub in publications:
# Title
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
# Subtitle
if pub.subtitle:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
# DOI
if pub.doi:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
# Journal
if pub.journal:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
# URL
if pub.url:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
# Description
if pub.short_description:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
# Citation
if pub.citation_value:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value
# Language
SubElement(
xml_data,
"{http://purl.org/dc/elements/1.1/}bibliographicCitation",
).text = pub.citation_value
if pub.language_code:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
# Country
if pub.country:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
# Date
if pub.pub_year:
date_str = str(pub.pub_year)
if pub.pub_month:
@@ -141,29 +140,51 @@ class ZIPGenerator:
if pub.pub_day:
date_str += f"-{pub.pub_day:02d}"
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
# Type
if pub.type:
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
return tostring(mets, encoding="utf-8", xml_declaration=True)
# ---------------------------------------------------------
# ZIP FINAL
# ---------------------------------------------------------
@staticmethod
def generate_zip(researcher, publications):
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications)
generic_xml = SWORDGenerator.generate_feed_xml(researcher, publications)
manifest = ZIPGenerator.generate_manifest(researcher, publications)
metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
format_files = {
f"formats/{export_filename_for_profile('generic')}": generic_xml,
f"formats/{export_filename_for_profile('dublin_core')}": generate_dublin_core_records_xml(
researcher, publications
),
f"formats/{export_filename_for_profile('dspace')}": generate_dspace_sword_feed_xml(
researcher, publications
),
f"formats/{export_filename_for_profile('eprints')}": generate_eprints_import_xml(
researcher, publications
),
}
mem_file = io.BytesIO()
with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr("sword.xml", xml_bytes)
zf.writestr("sword.xml", generic_xml)
zf.writestr("manifest.txt", manifest)
zf.writestr("metadata.json", metadata_json)
zf.writestr("mets.xml", mets_xml)
zf.writestr(
"formats/dspace-import.csv",
generate_dspace_import_csv(researcher, publications),
)
for path, content in format_files.items():
zf.writestr(path, content)
for index, pub in enumerate(publications, start=1):
item_dir = f"dspace-saf/item_{index:05d}"
zf.writestr(
f"{item_dir}/dublin_core.xml",
generate_dspace_item_dublin_core(pub),
)
mem_file.seek(0)
return mem_file.read()