aa2e7280dc
Backend: generadores por repositorio, ZIP multi-formato y query profile en /export/sword. Frontend: selector Destino que envia profile al descargar SWORD XML.
191 lines
7.9 KiB
Python
191 lines
7.9 KiB
Python
import io
|
|
import json
|
|
import zipfile
|
|
from datetime import datetime
|
|
|
|
from app.db.models import Publication, Researcher
|
|
from app.services.repository_export import (
|
|
export_filename_for_profile,
|
|
generate_dspace_import_csv,
|
|
generate_dspace_item_dublin_core,
|
|
generate_dublin_core_records_xml,
|
|
generate_dspace_sword_feed_xml,
|
|
generate_eprints_import_xml,
|
|
)
|
|
from app.services.sword_generator import SWORDGenerator
|
|
|
|
|
|
class ZIPGenerator:
|
|
|
|
@staticmethod
|
|
def generate_manifest(researcher, publications):
|
|
lines = [
|
|
"ORCID Export Package",
|
|
"--------------------",
|
|
f"Researcher ORCID: {researcher.orcid_id}",
|
|
f"Researcher Name: {researcher.name}",
|
|
f"Researcher UUID: {researcher.id}",
|
|
f"Total Publications: {len(publications)}",
|
|
f"Generated At: {datetime.utcnow().isoformat()}Z",
|
|
"",
|
|
"Files:",
|
|
"- sword.xml → Atom genérico ORCID (compatibilidad)",
|
|
"- formats/generic-atom.xml",
|
|
"- formats/dublin_core.xml → Dublin Core (un registro por obra)",
|
|
"- formats/dspace-atom.xml → Atom con metadatos DSpace",
|
|
"- formats/dspace-import.csv → Importación batch CSV DSpace",
|
|
"- formats/eprints-import.xml → Importación XML EPrints",
|
|
"- dspace-saf/item_NNNNN/dublin_core.xml → Simple Archive Format (DSpace)",
|
|
"- metadata.json → Metadatos completos (JSON)",
|
|
"- mets.xml → METS simplificado (legacy)",
|
|
"",
|
|
"Repository hints:",
|
|
"- DSpace: use dspace-saf/ (SAF) or formats/dspace-import.csv",
|
|
"- EPrints: import formats/eprints-import.xml via admin tools",
|
|
"- Dublin Core: use formats/dublin_core.xml",
|
|
"",
|
|
"SWORD endpoint profile query:",
|
|
" ?profile=generic|dublin_core|dspace|eprints",
|
|
"",
|
|
"Publications:",
|
|
]
|
|
|
|
for pub in publications:
|
|
year = pub.pub_year or "Unknown"
|
|
lines.append(
|
|
f"- {pub.title} ({year}) | DOI={pub.doi} | TYPE={pub.type}"
|
|
)
|
|
|
|
return "\n".join(lines)
|
|
|
|
@staticmethod
|
|
def generate_metadata_json(researcher, publications):
|
|
data = {
|
|
"researcher": {
|
|
"orcid_id": researcher.orcid_id,
|
|
"name": researcher.name,
|
|
"id": str(researcher.id),
|
|
"last_sync_at": researcher.last_sync_at.isoformat() if researcher.last_sync_at else None,
|
|
},
|
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
|
"publications": [],
|
|
}
|
|
|
|
for pub in publications:
|
|
data["publications"].append({
|
|
"id": str(pub.id),
|
|
"put_code": pub.put_code,
|
|
"title": pub.title,
|
|
"subtitle": pub.subtitle,
|
|
"doi": pub.doi,
|
|
"journal": pub.journal,
|
|
"type": pub.type,
|
|
"url": pub.url,
|
|
"short_description": pub.short_description,
|
|
"citation_type": pub.citation_type,
|
|
"citation_value": pub.citation_value,
|
|
"language_code": pub.language_code,
|
|
"country": pub.country,
|
|
"pub_year": pub.pub_year,
|
|
"pub_month": pub.pub_month,
|
|
"pub_day": pub.pub_day,
|
|
"external_ids": pub.external_ids,
|
|
"contributors": pub.contributors,
|
|
"hash_fingerprint": pub.hash_fingerprint,
|
|
"last_modified": pub.last_modified.isoformat() if pub.last_modified else None,
|
|
"status": getattr(pub, "status", None),
|
|
})
|
|
|
|
return json.dumps(data, indent=4)
|
|
|
|
@staticmethod
|
|
def generate_mets_xml(researcher, publications):
|
|
from xml.etree.ElementTree import Element, SubElement, tostring
|
|
|
|
mets = Element("mets", xmlns="http://www.loc.gov/METS/")
|
|
|
|
header = SubElement(mets, "metsHdr")
|
|
agent = SubElement(header, "agent", ROLE="CREATOR", TYPE="OTHER")
|
|
SubElement(agent, "name").text = "ORCID Exporter System"
|
|
|
|
dmd_sec = SubElement(mets, "dmdSec", ID="dmd1")
|
|
md_wrap = SubElement(dmd_sec, "mdWrap", MDTYPE="DC")
|
|
xml_data = SubElement(md_wrap, "xmlData")
|
|
|
|
for pub in publications:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}title").text = pub.title
|
|
if pub.subtitle:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.subtitle
|
|
if pub.doi:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}identifier").text = f"doi:{pub.doi}"
|
|
if pub.journal:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}source").text = pub.journal
|
|
if pub.url:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}relation").text = pub.url
|
|
if pub.short_description:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}description").text = pub.short_description
|
|
if pub.citation_value:
|
|
SubElement(
|
|
xml_data,
|
|
"{http://purl.org/dc/elements/1.1/}bibliographicCitation",
|
|
).text = pub.citation_value
|
|
if pub.language_code:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}language").text = pub.language_code
|
|
if pub.country:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}coverage").text = pub.country
|
|
if pub.pub_year:
|
|
date_str = str(pub.pub_year)
|
|
if pub.pub_month:
|
|
date_str += f"-{pub.pub_month:02d}"
|
|
if pub.pub_day:
|
|
date_str += f"-{pub.pub_day:02d}"
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}date").text = date_str
|
|
if pub.type:
|
|
SubElement(xml_data, "{http://purl.org/dc/elements/1.1/}type").text = pub.type
|
|
|
|
return tostring(mets, encoding="utf-8", xml_declaration=True)
|
|
|
|
@staticmethod
|
|
def generate_zip(researcher, publications):
|
|
generic_xml = SWORDGenerator.generate_feed_xml(researcher, publications)
|
|
manifest = ZIPGenerator.generate_manifest(researcher, publications)
|
|
metadata_json = ZIPGenerator.generate_metadata_json(researcher, publications)
|
|
mets_xml = ZIPGenerator.generate_mets_xml(researcher, publications)
|
|
|
|
format_files = {
|
|
f"formats/{export_filename_for_profile('generic')}": generic_xml,
|
|
f"formats/{export_filename_for_profile('dublin_core')}": generate_dublin_core_records_xml(
|
|
researcher, publications
|
|
),
|
|
f"formats/{export_filename_for_profile('dspace')}": generate_dspace_sword_feed_xml(
|
|
researcher, publications
|
|
),
|
|
f"formats/{export_filename_for_profile('eprints')}": generate_eprints_import_xml(
|
|
researcher, publications
|
|
),
|
|
}
|
|
|
|
mem_file = io.BytesIO()
|
|
with zipfile.ZipFile(mem_file, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
zf.writestr("sword.xml", generic_xml)
|
|
zf.writestr("manifest.txt", manifest)
|
|
zf.writestr("metadata.json", metadata_json)
|
|
zf.writestr("mets.xml", mets_xml)
|
|
zf.writestr(
|
|
"formats/dspace-import.csv",
|
|
generate_dspace_import_csv(researcher, publications),
|
|
)
|
|
|
|
for path, content in format_files.items():
|
|
zf.writestr(path, content)
|
|
|
|
for index, pub in enumerate(publications, start=1):
|
|
item_dir = f"dspace-saf/item_{index:05d}"
|
|
zf.writestr(
|
|
f"{item_dir}/dublin_core.xml",
|
|
generate_dspace_item_dublin_core(pub),
|
|
)
|
|
|
|
mem_file.seek(0)
|
|
return mem_file.read()
|