7bc27da33a
Upload documents for AI context, exam images for Moodle questions, per-template storage limits, embedded images in XML export, and GUIA_API_Y_FLUJO.md with full endpoint documentation.
189 lines
6.9 KiB
Python
189 lines
6.9 KiB
Python
import uuid
|
|
from pathlib import Path
|
|
|
|
from fastapi import UploadFile
|
|
from sqlalchemy import func, select
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import Settings
|
|
from app.core.errors import AppError, NotFoundError
|
|
from app.core.security import clean_text
|
|
from app.models.exam import ExamMaterial, ExamTemplate, MaterialStatus
|
|
from app.schemas.material import ExamMaterialRead
|
|
from app.services.document_extractor import SUPPORTED_EXTENSIONS, DocumentExtractor
|
|
from app.services.storage_quota import StorageQuotaService
|
|
|
|
|
|
class MaterialService:
|
|
def __init__(
|
|
self,
|
|
db: Session,
|
|
settings: Settings,
|
|
storage_quota: StorageQuotaService | None = None,
|
|
) -> None:
|
|
self.db = db
|
|
self.settings = settings
|
|
self.storage_quota = storage_quota or StorageQuotaService(db, settings)
|
|
self.extractor = DocumentExtractor()
|
|
self.upload_root = Path(settings.upload_dir)
|
|
self.upload_root.mkdir(parents=True, exist_ok=True)
|
|
|
|
def upload(
|
|
self,
|
|
template: ExamTemplate,
|
|
upload_file: UploadFile,
|
|
) -> ExamMaterialRead:
|
|
self._validate_upload(template.id, upload_file)
|
|
|
|
suffix = Path(upload_file.filename or "file").suffix.lower()
|
|
if suffix not in SUPPORTED_EXTENSIONS:
|
|
raise AppError(
|
|
f"Unsupported extension. Allowed: {', '.join(sorted(SUPPORTED_EXTENSIONS))}",
|
|
status_code=415,
|
|
code="unsupported_media",
|
|
)
|
|
|
|
content = upload_file.file.read()
|
|
if len(content) > self.settings.max_upload_bytes:
|
|
raise AppError(
|
|
f"File exceeds maximum size of {self.settings.max_upload_bytes} bytes",
|
|
status_code=413,
|
|
code="file_too_large",
|
|
)
|
|
if not content:
|
|
raise AppError("Uploaded file is empty", status_code=400, code="empty_file")
|
|
|
|
self.storage_quota.ensure_template_has_space(template.id, len(content))
|
|
|
|
material_id = uuid.uuid4()
|
|
safe_name = f"{material_id}{suffix}"
|
|
target_dir = self.upload_root / str(template.user_id) / str(template.id)
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
storage_path = target_dir / safe_name
|
|
storage_path.write_bytes(content)
|
|
|
|
mime_type = upload_file.content_type or SUPPORTED_EXTENSIONS[suffix]
|
|
material = ExamMaterial(
|
|
id=material_id,
|
|
template_id=template.id,
|
|
original_filename=clean_text(upload_file.filename or safe_name, max_length=255),
|
|
mime_type=mime_type,
|
|
size_bytes=len(content),
|
|
storage_path=str(storage_path),
|
|
status=MaterialStatus.PROCESSED,
|
|
)
|
|
|
|
try:
|
|
material.extracted_text = clean_text(
|
|
self.extractor.extract(storage_path, mime_type),
|
|
max_length=500_000,
|
|
)
|
|
except AppError as exc:
|
|
material.status = MaterialStatus.FAILED
|
|
material.error_message = clean_text(exc.message, max_length=500)
|
|
except Exception as exc:
|
|
material.status = MaterialStatus.FAILED
|
|
material.error_message = clean_text(str(exc), max_length=500)
|
|
|
|
self.db.add(material)
|
|
self.db.commit()
|
|
self.db.refresh(material)
|
|
return self._to_read(material)
|
|
|
|
def list_materials(self, template_id: uuid.UUID) -> list[ExamMaterialRead]:
|
|
materials = self.db.scalars(
|
|
select(ExamMaterial)
|
|
.where(ExamMaterial.template_id == template_id)
|
|
.order_by(ExamMaterial.created_at.desc())
|
|
).all()
|
|
return [self._to_read(material) for material in materials]
|
|
|
|
def delete_material(self, template: ExamTemplate, material_id: uuid.UUID) -> None:
|
|
material = self.db.get(ExamMaterial, material_id)
|
|
if material is None or material.template_id != template.id:
|
|
raise NotFoundError("Material not found")
|
|
|
|
path = Path(material.storage_path)
|
|
if path.exists():
|
|
path.unlink()
|
|
|
|
self.db.delete(material)
|
|
self.db.commit()
|
|
|
|
def build_reference_context(
|
|
self,
|
|
template_id: uuid.UUID,
|
|
material_ids: list[uuid.UUID] | None = None,
|
|
) -> str:
|
|
query = select(ExamMaterial).where(
|
|
ExamMaterial.template_id == template_id,
|
|
ExamMaterial.status == MaterialStatus.PROCESSED,
|
|
ExamMaterial.extracted_text.isnot(None),
|
|
)
|
|
if material_ids:
|
|
query = query.where(ExamMaterial.id.in_(material_ids))
|
|
|
|
materials = self.db.scalars(query.order_by(ExamMaterial.created_at.asc())).all()
|
|
if material_ids:
|
|
found_ids = {material.id for material in materials}
|
|
missing = [material_id for material_id in material_ids if material_id not in found_ids]
|
|
if missing:
|
|
raise NotFoundError("One or more material IDs were not found or are not processed")
|
|
|
|
if not materials:
|
|
return ""
|
|
|
|
sections: list[str] = []
|
|
for material in materials:
|
|
text = material.extracted_text or ""
|
|
if not text.strip():
|
|
continue
|
|
sections.append(
|
|
f"--- Archivo: {material.original_filename} ---\n{text.strip()}"
|
|
)
|
|
|
|
if not sections:
|
|
return ""
|
|
|
|
combined = "\n\n".join(sections)
|
|
max_chars = self.settings.max_reference_chars
|
|
if len(combined) <= max_chars:
|
|
return combined
|
|
|
|
truncated = combined[:max_chars].rsplit("\n", 1)[0]
|
|
return f"{truncated}\n\n[Material truncado por límite de contexto]"
|
|
|
|
def _validate_upload(self, template_id: uuid.UUID, upload_file: UploadFile) -> None:
|
|
if not upload_file.filename:
|
|
raise AppError("Filename is required", status_code=400, code="invalid_file")
|
|
|
|
count = self.db.scalar(
|
|
select(func.count())
|
|
.select_from(ExamMaterial)
|
|
.where(ExamMaterial.template_id == template_id)
|
|
)
|
|
if count is not None and count >= self.settings.max_materials_per_template:
|
|
raise AppError(
|
|
f"Maximum of {self.settings.max_materials_per_template} files per template reached",
|
|
status_code=409,
|
|
code="too_many_files",
|
|
)
|
|
|
|
def _to_read(self, material: ExamMaterial) -> ExamMaterialRead:
|
|
preview = None
|
|
if material.extracted_text:
|
|
preview = material.extracted_text[:300]
|
|
if len(material.extracted_text) > 300:
|
|
preview += "..."
|
|
return ExamMaterialRead(
|
|
id=material.id,
|
|
template_id=material.template_id,
|
|
original_filename=material.original_filename,
|
|
mime_type=material.mime_type,
|
|
size_bytes=material.size_bytes,
|
|
status=material.status,
|
|
error_message=material.error_message,
|
|
text_preview=preview,
|
|
created_at=material.created_at,
|
|
)
|