feat(export): perfiles DSpace/EPrints/Dublin Core y selector SWORD en UI
Backend: generadores por repositorio, ZIP multi-formato y query profile en /export/sword. Frontend: selector Destino que envia profile al descargar SWORD XML.
This commit is contained in:
@@ -1,33 +1,52 @@
|
||||
import io
|
||||
import zipfile
|
||||
import json
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
from app.db.models import Publication, Researcher
|
||||
from app.services.repository_export import (
|
||||
export_filename_for_profile,
|
||||
generate_dspace_import_csv,
|
||||
generate_dspace_item_dublin_core,
|
||||
generate_dublin_core_records_xml,
|
||||
generate_dspace_sword_feed_xml,
|
||||
generate_eprints_import_xml,
|
||||
)
|
||||
from app.services.sword_generator import SWORDGenerator
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Clase de generador de ZIP
|
||||
# ---------------------------------------------------------
|
||||
|
||||
class ZIPGenerator:
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Función auxiliar: generar manifest.txt
|
||||
# ---------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def generate_manifest(researcher, publications):
|
||||
lines = [
|
||||
"SWORD Deposit Package",
|
||||
"----------------------",
|
||||
"ORCID Export Package",
|
||||
"--------------------",
|
||||
f"Researcher ORCID: {researcher.orcid_id}",
|
||||
f"Researcher Name: {researcher.name}",
|
||||
f"Researcher UUID: {researcher.id}",
|
||||
f"Total Publications: {len(publications)}",
|
||||
f"Generated At: {datetime.utcnow().isoformat()}Z",
|
||||
"",
|
||||
"Files:",
|
||||
"- sword.xml → Atom genérico ORCID (compatibilidad)",
|
||||
"- formats/generic-atom.xml",
|
||||
"- formats/dublin_core.xml → Dublin Core (un registro por obra)",
|
||||
"- formats/dspace-atom.xml → Atom con metadatos DSpace",
|
||||
"- formats/dspace-import.csv → Importación batch CSV DSpace",
|
||||
"- formats/eprints-import.xml → Importación XML EPrints",
|
||||
"- dspace-saf/item_NNNNN/dublin_core.xml → Simple Archive Format (DSpace)",
|
||||
"- metadata.json → Metadatos completos (JSON)",
|
||||
"- mets.xml → METS simplificado (legacy)",
|
||||
"",
|
||||
"Repository hints:",
|
||||
"- DSpace: use dspace-saf/ (SAF) or formats/dspace-import.csv",
|
||||
"- EPrints: import formats/eprints-import.xml via admin tools",
|
||||
"- Dublin Core: use formats/dublin_core.xml",
|
||||
"",
|
||||
"SWORD endpoint profile query:",
|
||||
" ?profile=generic|dublin_core|dspace|eprints",
|
||||
"",
|
||||
"Publications:",
|
||||
]
|
||||
|
||||
@@ -39,9 +58,6 @@ class ZIPGenerator:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# METADATA.JSON — ahora con TODOS los campos
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_metadata_json(researcher, publications):
|
||||
data = {
|
||||
@@ -82,11 +98,10 @@ class ZIPGenerator:
|
||||
|
||||
return json.dumps(data, indent=4)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# METS.XML — ampliado con más metadatos
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_mets_xml(researcher, publications):
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
|
||||
|
||||
header = SubElement(mets, "metsHdr")
|
||||
@@ -98,42 +113,26 @@ class ZIPGenerator:
|
||||
xml_data = SubElement(md_wrap, "xmlData")
|
||||
|
||||
for pub in publications:
|
||||
# Title
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
|
||||
|
||||
# Subtitle
|
||||
if pub.subtitle:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
|
||||
|
||||
# DOI
|
||||
if pub.doi:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
|
||||
|
||||
# Journal
|
||||
if pub.journal:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
|
||||
|
||||
# URL
|
||||
if pub.url:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
|
||||
|
||||
# Description
|
||||
if pub.short_description:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
|
||||
|
||||
# Citation
|
||||
if pub.citation_value:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}bibliographicCitation").text = pub.citation_value
|
||||
|
||||
# Language
|
||||
SubElement(
|
||||
xml_data,
|
||||
"{http://purl.org/dc/elements/1.1/}bibliographicCitation",
|
||||
).text = pub.citation_value
|
||||
if pub.language_code:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
|
||||
|
||||
# Country
|
||||
if pub.country:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
|
||||
|
||||
# Date
|
||||
if pub.pub_year:
|
||||
date_str = str(pub.pub_year)
|
||||
if pub.pub_month:
|
||||
@@ -141,29 +140,51 @@ class ZIPGenerator:
|
||||
if pub.pub_day:
|
||||
date_str += f"-{pub.pub_day:02d}"
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
|
||||
|
||||
# Type
|
||||
if pub.type:
|
||||
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
|
||||
|
||||
return tostring(mets, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# ZIP FINAL
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_zip(researcher, publications):
|
||||
xml_bytes = SWORDGenerator.generate_feed_xml(researcher, publications)
|
||||
generic_xml = SWORDGenerator.generate_feed_xml(researcher, publications)
|
||||
manifest = ZIPGenerator.generate_manifest(researcher, publications)
|
||||
metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
|
||||
mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
|
||||
|
||||
format_files = {
|
||||
f"formats/{export_filename_for_profile('generic')}": generic_xml,
|
||||
f"formats/{export_filename_for_profile('dublin_core')}": generate_dublin_core_records_xml(
|
||||
researcher, publications
|
||||
),
|
||||
f"formats/{export_filename_for_profile('dspace')}": generate_dspace_sword_feed_xml(
|
||||
researcher, publications
|
||||
),
|
||||
f"formats/{export_filename_for_profile('eprints')}": generate_eprints_import_xml(
|
||||
researcher, publications
|
||||
),
|
||||
}
|
||||
|
||||
mem_file = io.BytesIO()
|
||||
with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("sword.xml", xml_bytes)
|
||||
zf.writestr("sword.xml", generic_xml)
|
||||
zf.writestr("manifest.txt", manifest)
|
||||
zf.writestr("metadata.json", metadata_json)
|
||||
zf.writestr("mets.xml", mets_xml)
|
||||
zf.writestr(
|
||||
"formats/dspace-import.csv",
|
||||
generate_dspace_import_csv(researcher, publications),
|
||||
)
|
||||
|
||||
for path, content in format_files.items():
|
||||
zf.writestr(path, content)
|
||||
|
||||
for index, pub in enumerate(publications, start=1):
|
||||
item_dir = f"dspace-saf/item_{index:05d}"
|
||||
zf.writestr(
|
||||
f"{item_dir}/dublin_core.xml",
|
||||
generate_dspace_item_dublin_core(pub),
|
||||
)
|
||||
|
||||
mem_file.seek(0)
|
||||
return mem_file.read()
|
||||
|
||||
Reference in New Issue
Block a user