feat(export): perfiles DSpace/EPrints/Dublin Core y selector SWORD en UI

Backend: generadores por repositorio, ZIP multi-formato y query profile en /export/sword. Frontend: selector Destino que envia profile al descargar SWORD XML.
2026-05-20 13:25:35 +02:00
parent 9b596af494
commit aa2e7280dc
9 changed files with 585 additions and 64 deletions
@@ -1,33 +1,52 @@
 import io
-import zipfile
 import json
+import zipfile
 from datetime import datetime
-from xml.etree.ElementTree import Element, SubElement, tostring

 from app.db.models import Publication, Researcher
+from app.services.repository_export import (
+    export_filename_for_profile,
+    generate_dspace_import_csv,
+    generate_dspace_item_dublin_core,
+    generate_dublin_core_records_xml,
+    generate_dspace_sword_feed_xml,
+    generate_eprints_import_xml,
+)
 from app.services.sword_generator import SWORDGenerator

-# ---------------------------------------------------------
-# Clase de generador de ZIP
-# ---------------------------------------------------------

 class ZIPGenerator:

-    # ---------------------------------------------------------
-    # Función auxiliar: generar manifest.txt
-    # ---------------------------------------------------------
-
    @staticmethod
    def generate_manifest(researcher, publications):
        lines = [
-            "SWORD Deposit Package",
-            "----------------------",
+            "ORCID Export Package",
+            "--------------------",
            f"Researcher ORCID: {researcher.orcid_id}",
            f"Researcher Name: {researcher.name}",
            f"Researcher UUID: {researcher.id}",
            f"Total Publications: {len(publications)}",
            f"Generated At: {datetime.utcnow().isoformat()}Z",
            "",
+            "Files:",
+            "- sword.xml              → Atom genérico ORCID (compatibilidad)",
+            "- formats/generic-atom.xml",
+            "- formats/dublin_core.xml → Dublin Core (un registro por obra)",
+            "- formats/dspace-atom.xml → Atom con metadatos DSpace",
+            "- formats/dspace-import.csv → Importación batch CSV DSpace",
+            "- formats/eprints-import.xml → Importación XML EPrints",
+            "- dspace-saf/item_NNNNN/dublin_core.xml → Simple Archive Format (DSpace)",
+            "- metadata.json          → Metadatos completos (JSON)",
+            "- mets.xml               → METS simplificado (legacy)",
+            "",
+            "Repository hints:",
+            "- DSpace: use dspace-saf/ (SAF) or formats/dspace-import.csv",
+            "- EPrints: import formats/eprints-import.xml via admin tools",
+            "- Dublin Core: use formats/dublin_core.xml",
+            "",
+            "SWORD endpoint profile query:",
+            "  ?profile=generic|dublin_core|dspace|eprints",
+            "",
            "Publications:",
        ]

@@ -39,9 +58,6 @@ class ZIPGenerator:

        return "\n".join(lines)

-    # ---------------------------------------------------------
-    # METADATA.JSON — ahora con TODOS los campos
-    # ---------------------------------------------------------
    @staticmethod
    def generate_metadata_json(researcher, publications):
        data = {
@@ -82,11 +98,10 @@ class ZIPGenerator:

        return json.dumps(data, indent=4)

-    # ---------------------------------------------------------
-    # METS.XML — ampliado con más metadatos
-    # ---------------------------------------------------------
    @staticmethod
    def generate_mets_xml(researcher, publications):
+        from xml.etree.ElementTree import Element, SubElement, tostring
+
        mets = Element("mets", xmlns="http://www.loc.gov/METS/")

        header = SubElement(mets, "metsHdr")
@@ -98,42 +113,26 @@ class ZIPGenerator:
        xml_data = SubElement(md_wrap, "xmlData")

        for pub in publications:
-            # Title
            SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
-
-            # Subtitle
            if pub.subtitle:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
-
-            # DOI
            if pub.doi:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
-
-            # Journal
            if pub.journal:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
-
-            # URL
            if pub.url:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
-
-            # Description
            if pub.short_description:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
-
-            # Citation
            if pub.citation_value:
-                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value
-
-            # Language
+                SubElement(
+                    xml_data,
+                    "{http://purl.org/dc/elements/1.1/}bibliographicCitation",
+                ).text = pub.citation_value
            if pub.language_code:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
-
-            # Country
            if pub.country:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
-
-            # Date
            if pub.pub_year:
                date_str = str(pub.pub_year)
                if pub.pub_month:
@@ -141,29 +140,51 @@ class ZIPGenerator:
                if pub.pub_day:
                    date_str += f"-{pub.pub_day:02d}"
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
-
-            # Type
            if pub.type:
                SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type

        return tostring(mets, encoding="utf-8", xml_declaration=True)

-    # ---------------------------------------------------------
-    # ZIP FINAL
-    # ---------------------------------------------------------
    @staticmethod
    def generate_zip(researcher, publications):
-        xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications)
+        generic_xml = SWORDGenerator.generate_feed_xml(researcher, publications)
        manifest = ZIPGenerator.generate_manifest(researcher, publications)
        metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
        mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)

+        format_files = {
+            f"formats/{export_filename_for_profile('generic')}": generic_xml,
+            f"formats/{export_filename_for_profile('dublin_core')}": generate_dublin_core_records_xml(
+                researcher, publications
+            ),
+            f"formats/{export_filename_for_profile('dspace')}": generate_dspace_sword_feed_xml(
+                researcher, publications
+            ),
+            f"formats/{export_filename_for_profile('eprints')}": generate_eprints_import_xml(
+                researcher, publications
+            ),
+        }
+
        mem_file = io.BytesIO()
        with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
-            zf.writestr("sword.xml", xml_bytes)
+            zf.writestr("sword.xml", generic_xml)
            zf.writestr("manifest.txt", manifest)
            zf.writestr("metadata.json", metadata_json)
            zf.writestr("mets.xml", mets_xml)
+            zf.writestr(
+                "formats/dspace-import.csv",
+                generate_dspace_import_csv(researcher, publications),
+            )
+
+            for path, content in format_files.items():
+                zf.writestr(path, content)
+
+            for index, pub in enumerate(publications, start=1):
+                item_dir = f"dspace-saf/item_{index:05d}"
+                zf.writestr(
+                    f"{item_dir}/dublin_core.xml",
+                    generate_dspace_item_dublin_core(pub),
+                )

        mem_file.seek(0)
        return mem_file.read()