From 7bc27da33a3642f530e7e292c88b2c2fe6282d57 Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Mon, 1 Jun 2026 10:30:40 +0200 Subject: [PATCH] Add materials, exam images, storage quota, and API guide Upload documents for AI context, exam images for Moodle questions, per-template storage limits, embedded images in XML export, and GUIA_API_Y_FLUJO.md with full endpoint documentation. --- FlujoDeUsuario.txt | 24 +- GUIA_API_Y_FLUJO.md | 694 +++++++++++++++++++++ README.md | 50 +- backend/.env.example | 15 +- backend/Dockerfile | 9 + backend/app/api/dependencies.py | 34 +- backend/app/api/routes/generation.py | 8 +- backend/app/api/routes/images.py | 73 +++ backend/app/api/routes/materials.py | 55 ++ backend/app/api/routes/questions.py | 27 + backend/app/api/routes/templates.py | 15 +- backend/app/core/config.py | 11 + backend/app/core/security.py | 4 +- backend/app/main.py | 7 +- backend/app/models/exam.py | 66 ++ backend/app/schemas/exam.py | 13 + backend/app/schemas/image.py | 30 + backend/app/schemas/material.py | 32 + backend/app/schemas/storage.py | 14 + backend/app/services/document_extractor.py | 74 +++ backend/app/services/exam_service.py | 82 ++- backend/app/services/image_service.py | 206 ++++++ backend/app/services/material_service.py | 188 ++++++ backend/app/services/moodle_exporter.py | 104 ++- backend/app/services/parser.py | 2 + backend/app/services/prompt_builder.py | 25 +- backend/app/services/storage_quota.py | 81 +++ backend/requirements.txt | 5 + docker-compose.yml | 3 + 29 files changed, 1892 insertions(+), 59 deletions(-) create mode 100644 GUIA_API_Y_FLUJO.md create mode 100644 backend/app/api/routes/images.py create mode 100644 backend/app/api/routes/materials.py create mode 100644 backend/app/api/routes/questions.py create mode 100644 backend/app/schemas/image.py create mode 100644 backend/app/schemas/material.py create mode 100644 backend/app/schemas/storage.py create mode 100644 backend/app/services/document_extractor.py create mode 100644 backend/app/services/image_service.py create mode 100644 backend/app/services/material_service.py create mode 100644 backend/app/services/storage_quota.py diff --git a/FlujoDeUsuario.txt b/FlujoDeUsuario.txt index da16758..82394ae 100644 --- a/FlujoDeUsuario.txt +++ b/FlujoDeUsuario.txt @@ -6,21 +6,27 @@ Ahora mismo el flujo es backend/API, sin frontend: Define título, materia, nivel educativo, tipos de preguntas, puntuación, penalización y dificultad. La plantilla queda guardada en base de datos asociada a su usuario. -2.- Genera un prompt con POST /exam/prompts/{template_id}. - La API devuelve un prompt estructurado para pedirle al LLM preguntas en JSON válido. +2.- Sube materiales de referencia con POST /exam/templates/{id}/materials (campo file). + Acepta PDF, DOCX, TXT, MD, PNG, JPG, WEBP. La API extrae el texto para contexto de la IA. -3.- Hay dos caminos posibles: +2b.- Sube imágenes de examen con POST /exam/templates/{id}/images (campo file, opcional caption). + PNG, JPG, WEBP, GIF. Se usan para mostrarlas en las preguntas (no para extraer texto). - 3.1.- Generación automática: POST /exam/generate. - La API llama al LLM configurado, parsea la respuesta y guarda las preguntas. +3.- Genera un prompt con POST /exam/prompts/{template_id}. + El prompt incluye el material subido + el tema indicado por el profesor. - 3.2.- Carga manual: POST /exam/parse. +4.- Hay dos caminos posibles: + + 4.1.- Generación automática: POST /exam/generate. + La API llama al LLM con el contexto de los ficheros y guarda las preguntas. + + 4.2.- Carga manual: POST /exam/parse. El profesor pega una salida de IA en json o txt, y la API la valida y guarda. -4.- Consulta su historial con GET /exam/history. +5.- Consulta su historial con GET /exam/history. Ve todos los exámenes que ha creado, cuántas preguntas tienen y cuándo exportó por última vez. -5.- Exporta el examen: +6.- Exporta el examen: GET /exam/export/xml/{template_id} para Moodle XML. GET /exam/export/txt/{template_id} para texto plano. @@ -28,4 +34,4 @@ Ahora mismo el flujo es backend/API, sin frontend: (El XML generado se importa manualmente en Moodle.) -En resumen: registrarse → configurar plantilla → generar prompt o llamar al LLM → guardar preguntas → ver historial → exportar Moodle XML. +En resumen: registrarse → plantilla → subir materiales → prompt/generar con IA → historial → exportar Moodle XML. diff --git a/GUIA_API_Y_FLUJO.md b/GUIA_API_Y_FLUJO.md new file mode 100644 index 0000000..ccbae35 --- /dev/null +++ b/GUIA_API_Y_FLUJO.md @@ -0,0 +1,694 @@ +# Guía de uso de la API y flujo de la aplicación + +Documento resumen para entender **qué hace el usuario en cada paso**, **qué endpoint usar**, **cabeceras**, **cuerpos**, **ejemplos de respuesta** y **errores típicos**. + +**Base URL de ejemplo:** `http://localhost:8000` + +--- + +## 1. Conceptos rápidos + +| Concepto | Significado | +|----------|-------------| +| **Usuario** | Cada persona tiene su cuenta; los exámenes son suyos. | +| **Plantilla (template)** | Configuración de un examen: título, materia, tipos de pregunta, dificultad, etc. | +| **Materiales** | Ficheros para **extraer texto** y dar **contexto a la IA** (PDF, DOCX, TXT; imágenes aquí se procesan con OCR para texto). | +| **Imágenes de examen** | Imágenes para **mostrar en la pregunta** en Moodle; no se usan como texto de contexto para la IA. | +| **Preguntas** | Se generan con la IA, se pegan manualmente (parse) o se ajustan después. | +| **Exportación** | Salida Moodle XML, TXT o JSON. | + +**Autenticación:** casi todo va con JWT: + +```http +Authorization: Bearer +``` + +Las rutas bajo `/exam/...` **requieren** ese header (salvo que indiquemos lo contrario). + +**Formato de error habitual** (API propia): + +```json +{ + "error": { + "code": "codigo_corto", + "message": "Texto legible para humanos", + "details": null + } +} +``` + +(`details` solo aparece en algunos errores de validación.) + +**Otros códigos:** `401` token inválido o ausente, `403` recurso de otro usuario, `404` no existe, `409` conflicto (email duplicado, cupo, etc.), `413` fichero o cupo demasiado grande, `422` validación o parseo, `429` demasiadas peticiones, `503` servicio externo no configurado (p. ej. Google o LLM). + +--- + +## 2. Flujo de uso (orden recomendado) + +Hasta el **examen exportable** (normalmente Moodle XML): autenticación → plantilla → (materiales + imágenes) → generar preguntas → exportar. + +### Tres piezas que debes distinguir + +| Pieza | Para qué sirve | Endpoints | +|-------|----------------|-----------| +| **Materiales** | Extraen **texto** (PDF, DOCX, TXT; imagen aquí = OCR) y alimentan el **prompt** de la IA. | `POST/GET/DELETE …/templates/{id}/materials` | +| **Imágenes de examen** | Solo para **mostrar** la figura en la pregunta / Moodle (`image_id`). **No** aportan texto al prompt. | `POST/GET/DELETE …/templates/{id}/images`, `GET …/images/{id}/content`, `PATCH …/questions/{id}/image` | +| **IA** | Crea y **guarda** las preguntas en BD. | `POST …/prompts/{id}`, `POST …/generate`, `POST …/parse` | + +```text +Materiales → texto en prompt ─┐ +Imágenes → catálogo ids ─┼→ generate/parse → preguntas → export/xml +``` + +**Importante:** “Leer” un escaneado como texto → **material**. “Que el alumno vea la foto” → **imagen de examen** (pueden ser el mismo fichero subido dos veces si necesitas ambas cosas). + +--- + +### Pasos (qué hacer y endpoint) + +Todas las rutas `/exam/*` llevan `Authorization: Bearer `. + +| # | Qué haces | Endpoint(s) | +|---|-----------|-------------| +| 1 | Registro o login; guardas el JWT | `POST /auth/register`, `POST /auth/login` o `POST /auth/google` | +| 2 | Creas el examen (tipos, nº preguntas, dificultad); guardas **`template_id`** | `POST /exam/templates` | +| 3 | *(Opc.)* Subes apuntes; compruebas estado **`processed`** | `POST …/materials` (`file`), `GET …/materials` | +| 4 | *(Opc.)* Subes figuras para preguntas; anotas cada **`image_id`** | `POST …/images` (`file`, `caption` opcional), `GET …/images` | +| 5 | *(Opc.)* Ves cuota de espacio (materiales + imágenes) | `GET …/templates/{id}/storage` | +| 6 | Generas preguntas (ver tabla abajo) | `prompts` / `generate` / `parse` | +| 7 | *(Opc.)* Corriges imagen de una pregunta | `PATCH …/questions/{id}/image` | +| 8 | *(Opc.)* Listado de tus exámenes | `GET /exam/history` | +| 9 | Descargas el examen (hay que tener preguntas) | `GET …/export/xml/{id}` (Moodle), `…/txt`, `…/json` | + +--- + +### Generación con IA (paso 6) + +Body habitual en **prompts** y **generate**: + +```json +{ "topic_prompt": "…instrucciones…", "material_ids": null } +``` + +`material_ids`: `null` = todos los materiales OK; o lista de UUIDs concretos. + +| Opción | Endpoint | Resultado | +|--------|----------|-----------| +| Ver/copiar prompt (sin LLM en servidor) | `POST /exam/prompts/{template_id}` | Texto del prompt | +| Generar y guardar en servidor | `POST /exam/generate` (+ `template_id`) | Preguntas en BD; requiere `LLM_API_KEY` | +| Pegar JSON/TXT de otra IA | `POST /exam/parse` | Preguntas en BD | + +El prompt incluye texto de **materiales** y catálogo de **imágenes**. La IA puede poner **`image_id`** en cada pregunta; el backend **no** obliga “una imagen = una pregunta” (solo lo que pidas en `topic_prompt` + revisión o `PATCH`). + +Detalle de cuerpos, respuestas y errores: **sección 4** de esta guía. + +--- + +## 3. Cabeceras comunes + +| Cabecera | Cuándo | +|----------|--------| +| `Content-Type: application/json` | Peticiones con body JSON. | +| `Authorization: Bearer ` | Rutas protegidas (`/exam/*`, `/auth/me`). | +| `multipart/form-data` | Subida de ficheros (el cliente lo pone automáticamente con `curl -F`). | + +**No** hace falta `X-API-Key` para el flujo normal de usuario (sigue existiendo en configuración por compatibilidad, pero el acceso a exámenes es por JWT). + +--- + +## 4. Endpoints por bloques + +### 4.1 Salud del servicio + +#### `GET /health` + +**Qué hace:** Comprueba que el servidor responde. **No** requiere autenticación. + +**Headers:** ninguno obligatorio. + +**Body:** no. + +**Ejemplo:** + +```bash +curl -s http://localhost:8000/health +``` + +**Respuesta OK (200):** + +```json +{ "status": "ok" } +``` + +**Error típico:** si el servidor está caído, no hay respuesta HTTP (no es JSON de la API). + +--- + +### 4.2 Autenticación (`/auth`) + +#### `POST /auth/register` + +**Qué hace:** Crea usuario con email y contraseña. + +**Headers:** `Content-Type: application/json` + +**Body:** + +```json +{ + "email": "profesor@ejemplo.com", + "password": "Minimo8caracteres", + "full_name": "María García" +} +``` + +**Ejemplo:** + +```bash +curl -s -X POST http://localhost:8000/auth/register \ + -H "Content-Type: application/json" \ + -d '{"email":"profesor@ejemplo.com","password":"ClaveSegura1","full_name":"María"}' +``` + +**Respuesta OK (201):** + +```json +{ + "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "email": "profesor@ejemplo.com", + "full_name": "María", + "created_at": "2026-05-19T10:00:00+00:00" +} +``` + +**Error típico (409):** email ya registrado. + +```json +{ + "error": { + "code": "conflict", + "message": "Email is already registered" + } +} +``` + +--- + +#### `POST /auth/login` + +**Qué hace:** Devuelve el **JWT** para el resto de llamadas. + +**Body:** + +```json +{ + "email": "profesor@ejemplo.com", + "password": "ClaveSegura1" +} +``` + +**Respuesta OK (200):** + +```json +{ + "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...", + "token_type": "bearer" +} +``` + +**Error típico (401):** credenciales incorrectas. + +```json +{ + "error": { + "code": "unauthorized", + "message": "Invalid email or password" + } +} +``` + +--- + +#### `POST /auth/google` + +**Qué hace:** Inicia sesión o registra con el **id_token** de Google (desde el frontend con Sign in with Google). + +**Headers:** `Content-Type: application/json` + +**Body:** + +```json +{ + "id_token": "eyJhbGciOiJSUzI1NiIs..." +} +``` + +**Requisitos:** `GOOGLE_CLIENT_ID` en `backend/.env`. + +**Respuesta OK (200):** igual que login (`access_token`). + +**Error típico (503):** Google no configurado. + +```json +{ + "error": { + "code": "google_not_configured", + "message": "Google login is not configured" + } +} +``` + +**Error típico (401):** token de Google inválido o email no verificado. + +--- + +#### `GET /auth/me` + +**Qué hace:** Devuelve los datos del usuario logueado. + +**Headers:** `Authorization: Bearer ` + +**Body:** no. + +**Respuesta OK (200):** mismo esquema que register (sin password). + +**Error típico (401):** falta token o token caducado. + +```json +{ + "error": { + "code": "unauthorized", + "message": "Invalid or expired token" + } +} +``` + +--- + +### 4.3 Plantillas de examen (`/exam/templates`) + +Todas requieren: `Authorization: Bearer ` + +#### `POST /exam/templates` + +**Qué hace:** Crea una plantilla nueva asociada al usuario. + +**Body (JSON):** ver `ExamTemplateCreate` en el código; resumen: + +- `title`, `subject`, `educational_level`, `language` +- `settings.question_types`: lista de `{ "type": "multichoice"|"truefalse"|"shortanswer"|"matching", "count", "options_count", "multiple_correct", "score", "penalty" }` +- `settings.shuffle_questions`, `shuffle_answers`, `include_feedback` +- `difficulty_profile`: `easy`, `medium`, `hard`, `very_hard` (al menos uno > 0) + +**Ejemplo mínimo:** + +```bash +curl -s -X POST http://localhost:8000/exam/templates \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "title": "Examen UD3", + "subject": "Bases de datos", + "educational_level": "CFGS DAW", + "language": "es", + "settings": { + "question_types": [ + {"type": "multichoice", "count": 5, "options_count": 4, "multiple_correct": false, "score": 1, "penalty": 0.25} + ], + "shuffle_questions": true, + "shuffle_answers": true, + "include_feedback": true + }, + "difficulty_profile": {"easy": 2, "medium": 2, "hard": 1, "very_hard": 0} + }' +``` + +**Respuesta OK (201):** plantilla con `id`, fechas, `question_count`, etc. + +**Error típico (422):** JSON mal formado o reglas de validación (p. ej. suma de dificultades vacía). + +```json +{ + "error": { + "code": "validation_error", + "message": "Invalid request payload", + "details": [ { "loc": ["body", "difficulty_profile"], "msg": "...", "type": "value_error" } ] + } +} +``` + +--- + +#### `GET /exam/templates` + +**Qué hace:** Lista las plantillas del usuario. + +**Respuesta OK (200):** array de plantillas. + +**Error típico (401):** sin token. + +--- + +#### `GET /exam/templates/{template_id}` + +**Qué hace:** Obtiene una plantilla concreta. + +**Parámetros URL:** `template_id` (UUID). + +**Error típico (404):** no existe o no es tuya. + +```json +{ + "error": { + "code": "not_found", + "message": "Exam template not found" + } +} +``` + +**Error típico (403):** plantilla de otro usuario. + +```json +{ + "error": { + "code": "forbidden", + "message": "You do not have access to this exam template" + } +} +``` + +--- + +#### `GET /exam/templates/{template_id}/storage` + +**Qué hace:** Muestra cuánto espacio ocupan **materiales + imágenes** de esa plantilla frente al cupo (`MAX_STORAGE_BYTES_PER_TEMPLATE`). + +**Respuesta OK (200) ejemplo:** + +```json +{ + "template_id": "...", + "used_bytes": 1048576, + "limit_bytes": 52428800, + "remaining_bytes": 51380224, + "materials_bytes": 524288, + "images_bytes": 524288, + "used_mb": 1.0, + "limit_mb": 50.0 +} +``` + +**Error típico:** mismo 404/403 que la plantilla. + +--- + +### 4.4 Materiales de contexto (`/exam/templates/.../materials`) + +Sirven para **texto** que la IA puede usar al generar (PDF, DOCX, TXT, MD; imágenes aquí → OCR para texto). + +#### `POST /exam/templates/{template_id}/materials` + +**Headers:** `Authorization` + `multipart/form-data` + +**Body:** campo formulario `file` = fichero. + +**Ejemplo:** + +```bash +curl -s -X POST "http://localhost:8000/exam/templates/TEMPLATE_UUID/materials" \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@./apuntes.pdf" +``` + +**Respuesta OK (201):** objeto con `material` (id, estado `processed` o `failed`, vista previa de texto si hay) y `message`. + +**Errores típicos:** + +| Código | Situación | +|--------|-----------| +| 413 | Fichero mayor que `MAX_UPLOAD_BYTES` o cupo total de plantilla superado (`template_storage_quota_exceeded`). | +| 415 | Extensión no permitida. | +| 409 | Demasiados ficheros (`too_many_files`). | + +Ejemplo cupo: + +```json +{ + "error": { + "code": "template_storage_quota_exceeded", + "message": "Template storage quota exceeded. Limit: 50.00 MB, used: 48.00 MB, file: 5.00 MB" + } +} +``` + +--- + +#### `GET /exam/templates/{template_id}/materials` + +Lista materiales de la plantilla. **200:** array. + +--- + +#### `DELETE /exam/templates/{template_id}/materials/{material_id}` + +Borra un material. **204:** sin cuerpo. + +**Error típico (404):** material o plantilla no encontrados. + +--- + +### 4.5 Imágenes de examen (`/exam/templates/.../images` y `/exam/images/...`) + +Solo para **mostrar en la pregunta** (Moodle); no rellenan el contexto de texto de la IA. + +#### `POST /exam/templates/{template_id}/images` + +**Body:** `multipart/form-data` con `file` obligatorio y `caption` opcional. + +**Ejemplo:** + +```bash +curl -s -X POST "http://localhost:8000/exam/templates/TEMPLATE_UUID/images" \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@./diagrama.png" \ + -F "caption=Diagrama del modelo ER" +``` + +**Respuesta OK (201):** incluye `image.id` y `content_url` tipo `/exam/images/{id}/content`. + +**Errores típicos:** 413 tamaño / cupo, 415 tipo no imagen, 422 imagen corrupta, 409 demasiadas imágenes. + +--- + +#### `GET /exam/templates/{template_id}/images` + +Lista imágenes. **200:** array. + +--- + +#### `GET /exam/images/{image_id}/content` + +Devuelve el **binario** de la imagen (previsualización o descarga). **200** con `Content-Type` de imagen. + +**Headers:** `Authorization: Bearer ` + +**Error típico (404):** id inexistente o imagen de otro usuario. + +--- + +#### `DELETE /exam/templates/{template_id}/images/{image_id}` + +Borra imagen y desvincula de preguntas. **204** sin cuerpo. + +--- + +### 4.6 Vincular imagen a pregunta (`/exam/questions`) + +#### `PATCH /exam/questions/{question_id}/image` + +**Qué hace:** Asigna o quita la imagen de una pregunta ya guardada. + +**Headers:** `Authorization`, `Content-Type: application/json` + +**Body:** + +```json +{ "image_id": "UUID-de-imagen-de-la-misma-plantilla" } +``` + +o para quitar: + +```json +{ "image_id": null } +``` + +**Respuesta OK (200):** pregunta con campos incl. `image_url` si hay imagen. + +**Errores típicos:** 404 pregunta no tuya; 404 `image_id` no pertenece a la plantilla de esa pregunta. + +--- + +### 4.7 Generación con IA (`/exam`) + +Todas con `Authorization: Bearer `. + +#### `POST /exam/prompts/{template_id}` + +**Qué hace:** Construye el **texto del prompt** (incluye materiales procesados y catálogo de imágenes de examen) sin llamar al LLM. + +**Body:** + +```json +{ + "topic_prompt": "Genera preguntas sobre normalización y formas normales.", + "material_ids": null +} +``` + +`material_ids`: lista de UUIDs de materiales concretos, o `null` para usar **todos** los materiales con estado `processed`. + +**Respuesta OK (200):** + +```json +{ + "template_id": "...", + "prompt": "Eres un generador...", + "expected_format": "json" +} +``` + +**Errores típicos:** 404 plantilla; 404 si en `material_ids` pides un material que no existe o no está procesado. + +--- + +#### `POST /exam/generate` + +**Qué hace:** Llama al **LLM**, parsea JSON y **guarda** preguntas. + +**Body:** + +```json +{ + "template_id": "UUID-plantilla", + "topic_prompt": "Enfócate en claves foráneas e integridad referencial.", + "material_ids": null +} +``` + +**Respuesta OK (200):** `{ "questions": [ { ...pregunta..., "image_id": null, "image_url": null } ] }` + +**Errores típicos:** + +| Código | Ejemplo | +|--------|---------| +| 503 | `LLM_API_KEY` no configurada (`llm_unavailable`). | +| 422 | JSON del modelo inválido (`parse_error`). | + +```json +{ + "error": { + "code": "llm_unavailable", + "message": "LLM_API_KEY is not configured" + } +} +``` + +--- + +#### `POST /exam/parse` + +**Qué hace:** Pegas la salida de una IA externa (JSON o TXT) y se validan y guardan preguntas. + +**Body:** + +```json +{ + "template_id": "UUID-plantilla", + "input_format": "json", + "raw_output": "{\"questions\":[...]}" +} +``` + +**Respuesta OK (200):** igual que generate (`questions`). + +**Error típico (422):** `parse_error` si el formato no cuadra con el esquema de preguntas. + +--- + +### 4.8 Historial (`/exam/history`) + +#### `GET /exam/history` + +**Qué hace:** Lista exámenes del usuario (plantillas) con resumen (preguntas, exportaciones, fechas). + +**Respuesta OK (200):** array de `ExamHistoryItem`. + +**Error típico (401):** sin token. + +--- + +### 4.9 Exportación (`/exam/export`) + +Requiere que la plantilla **tenga preguntas** guardadas. + +#### `GET /exam/export/xml/{template_id}` + +**Respuesta OK (200):** cuerpo **XML** (`Content-Type: application/xml`). Incluye imágenes embebidas si las preguntas las tienen. + +**Error típico (404):** sin preguntas aún. + +```json +{ + "error": { + "code": "not_found", + "message": "Template does not contain questions to export" + } +} +``` + +--- + +#### `GET /exam/export/txt/{template_id}` + +**200:** texto plano. + +--- + +#### `GET /exam/export/json/{template_id}` + +**200:** JSON con lista de preguntas. + +--- + +## 5. Cómo elegir imagen por pregunta (recordatorio) + +Resumen ya integrado en la **sección 2.4** (subida y catálogo) y **2.7** (PATCH). En corto: + +1. `POST /exam/templates/{template_id}/images` → anota cada **`id`**. +2. `POST /exam/generate` (o prompt + IA externa + `parse`) → el JSON puede incluir **`image_id`** por pregunta. +3. `PATCH /exam/questions/{question_id}/image` → corrección manual. + +--- + +## 6. Límites y buenas prácticas (recordatorio) + +- **Cupo total por plantilla:** `MAX_STORAGE_BYTES_PER_TEMPLATE` (materiales + imágenes). Consulta `GET .../storage` antes de subir mucho. +- **Tamaño por fichero:** materiales `MAX_UPLOAD_BYTES`, imágenes `MAX_IMAGE_BYTES`. +- **Contexto en el prompt:** el texto de materiales se trunca (`MAX_REFERENCE_CHARS`); no metas PDFs enormes sin trocear en el futuro. +- **Misma imagen para contexto OCR y para mostrar en examen:** hoy son dos rutas (`/materials` vs `/images`); si solo quieres **mostrar**, usa solo `/images`. + +--- + +## 7. Orden de lectura del código + +| Área | Carpeta / archivos | +|------|---------------------| +| Rutas | `backend/app/api/routes/` | +| Esquemas | `backend/app/schemas/` | +| Lógica de negocio | `backend/app/services/` | +| Modelos BD | `backend/app/models/exam.py`, `user.py` | +| Configuración | `backend/app/core/config.py`, `backend/.env.example` | + +--- + +*Documento generado para el proyecto GenExamenes / moodle-exam-generator. Ajusta la base URL y los UUID de ejemplo a tu entorno real.* diff --git a/README.md b/README.md index 498acec..1971961 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Backend para generar exámenes con IA, procesar la salida de un LLM y exportar preguntas a Moodle XML. +**Guía detallada de flujo, endpoints, ejemplos y errores:** [GUIA_API_Y_FLUJO.md](GUIA_API_Y_FLUJO.md) + El proyecto está centrado en backend. La carpeta `frontend` se mantiene vacía a nivel de aplicación, aunque existe un servicio en Docker Compose para reservar el despliegue futuro. ## Stack @@ -46,6 +48,7 @@ Variables principales: - `LLM_BASE_URL`: endpoint compatible con OpenAI. - `LLM_MODEL`: modelo usado para generar preguntas. - `ALLOWED_ORIGINS`: orígenes permitidos por CORS. +- `MAX_STORAGE_BYTES_PER_TEMPLATE`: cupo total de almacenamiento por examen (materiales + imágenes). Todas las rutas bajo `/exam` requieren autenticación de usuario con: @@ -64,11 +67,12 @@ docker compose up --build 1. Registrarse o iniciar sesión. 2. Crear una plantilla de examen (queda asociada al usuario). -3. Generar un prompt guiado para el LLM. -4. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. -5. Guardar las preguntas validadas en PostgreSQL. -6. Consultar el historial de exámenes creados. -7. Exportar el examen a Moodle XML, TXT o JSON. +3. Subir materiales de referencia (PDF, DOCX, TXT, PNG, JPG…) a la plantilla. +4. Generar un prompt guiado para el LLM (incluye el texto extraído de los ficheros). +5. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. +6. Guardar las preguntas validadas en PostgreSQL. +7. Consultar el historial de exámenes creados. +8. Exportar el examen a Moodle XML, TXT o JSON. ## Endpoints @@ -96,6 +100,38 @@ Devuelve los datos del usuario autenticado. Lista el historial de exámenes del usuario (plantillas, preguntas y exportaciones). +`POST /exam/templates/{template_id}/materials` + +Sube un fichero (`multipart/form-data`, campo `file`). Formatos: PDF, DOCX, TXT, MD, PNG, JPG, WEBP. Extrae texto y lo guarda como contexto. + +`GET /exam/templates/{template_id}/materials` + +Lista los materiales subidos a una plantilla. + +`DELETE /exam/templates/{template_id}/materials/{material_id}` + +Elimina un material. + +`POST /exam/templates/{template_id}/images` + +Sube una imagen para preguntas visuales (`file`, opcional `caption`). No se usa OCR: la imagen se muestra en el examen y se embebe en el XML de Moodle. + +`GET /exam/templates/{template_id}/images` + +Lista las imágenes de la plantilla. + +`GET /exam/images/{image_id}/content` + +Devuelve la imagen (requiere JWT). Para previsualizar en el frontend o en Moodle tras importar. + +`DELETE /exam/templates/{template_id}/images/{image_id}` + +Elimina una imagen. + +`PATCH /exam/questions/{question_id}/image` + +Vincula o desvincula una imagen a una pregunta existente (`{"image_id": "uuid"}` o `null`). + `POST /exam/templates` Crea una plantilla con materia, nivel educativo, tipos de pregunta, puntuación, penalización y dificultad. @@ -108,6 +144,10 @@ Lista las plantillas del usuario autenticado. Obtiene una plantilla concreta. +`GET /exam/templates/{template_id}/storage` + +Muestra cuánto espacio usa el examen (materiales + imágenes) y el límite configurado. + `POST /exam/prompts/{template_id}` Genera un prompt estructurado para IA. diff --git a/backend/.env.example b/backend/.env.example index 8df96cc..b098eb6 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -14,7 +14,20 @@ ALLOWED_ORIGINS=http://localhost:3000 # --- Rate limiting y tamaño de petición --- RATE_LIMIT_REQUESTS=60 RATE_LIMIT_WINDOW_SECONDS=60 -MAX_REQUEST_BYTES=1048576 +MAX_REQUEST_BYTES=25165824 + +# --- Materiales de contexto (PDF, DOCX, imágenes, etc.) --- +UPLOAD_DIR=/app/uploads +MAX_UPLOAD_BYTES=20971520 +MAX_MATERIALS_PER_TEMPLATE=10 +MAX_REFERENCE_CHARS=12000 + +# --- Imágenes de examen (preguntas visuales, sin extracción OCR) --- +MAX_IMAGE_BYTES=5242880 +MAX_IMAGES_PER_TEMPLATE=20 + +# Cupo total por examen (materiales + imágenes). 50 MB por defecto. +MAX_STORAGE_BYTES_PER_TEMPLATE=52428800 # --- JWT (login email/contraseña y sesión tras Google) --- JWT_SECRET_KEY=change-me-use-a-long-random-secret-key-at-least-32-chars diff --git a/backend/Dockerfile b/backend/Dockerfile index 4ee7778..7d5524f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -5,6 +5,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /app +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + tesseract-ocr \ + tesseract-ocr-spa \ + tesseract-ocr-eng \ + && rm -rf /var/lib/apt/lists/* + RUN addgroup --system app && adduser --system --ingroup app app COPY requirements.txt . @@ -12,6 +19,8 @@ RUN pip install --no-cache-dir -r requirements.txt COPY app ./app +RUN mkdir -p /app/uploads && chown -R app:app /app/uploads + USER app EXPOSE 8000 diff --git a/backend/app/api/dependencies.py b/backend/app/api/dependencies.py index 061bfc9..c84e035 100644 --- a/backend/app/api/dependencies.py +++ b/backend/app/api/dependencies.py @@ -6,11 +6,41 @@ from sqlalchemy.orm import Session from app.core.config import Settings, get_settings from app.db.session import get_db from app.services.exam_service import ExamService +from app.services.image_service import ImageService from app.services.llm import LLMClient +from app.services.material_service import MaterialService +from app.services.storage_quota import StorageQuotaService -def get_exam_service(db: Annotated[Session, Depends(get_db)]) -> ExamService: - return ExamService(db) +def get_storage_quota_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], +) -> StorageQuotaService: + return StorageQuotaService(db, settings) + + +def get_material_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], + storage_quota: Annotated[StorageQuotaService, Depends(get_storage_quota_service)], +) -> MaterialService: + return MaterialService(db, settings, storage_quota) + + +def get_image_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], + storage_quota: Annotated[StorageQuotaService, Depends(get_storage_quota_service)], +) -> ImageService: + return ImageService(db, settings, storage_quota) + + +def get_exam_service( + db: Annotated[Session, Depends(get_db)], + material_service: Annotated[MaterialService, Depends(get_material_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> ExamService: + return ExamService(db, material_service=material_service, image_service=image_service) def get_llm_client(settings: Annotated[Settings, Depends(get_settings)]) -> LLMClient: diff --git a/backend/app/api/routes/generation.py b/backend/app/api/routes/generation.py index 81dab5a..e7a63c2 100644 --- a/backend/app/api/routes/generation.py +++ b/backend/app/api/routes/generation.py @@ -26,7 +26,12 @@ def build_prompt( current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> PromptResponse: - return service.build_prompt(current_user.id, template_id, payload.topic_prompt) + return service.build_prompt( + current_user.id, + template_id, + payload.topic_prompt, + payload.material_ids, + ) @router.post("/generate", response_model=ParsedQuestionsResponse) @@ -41,6 +46,7 @@ async def generate_exam( payload.template_id, payload.topic_prompt, llm_client, + payload.material_ids, ) diff --git a/backend/app/api/routes/images.py b/backend/app/api/routes/images.py new file mode 100644 index 0000000..5d14b45 --- /dev/null +++ b/backend/app/api/routes/images.py @@ -0,0 +1,73 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, File, Form, UploadFile, status +from fastapi.responses import FileResponse + +from app.api.dependencies import get_exam_service, get_image_service +from app.core.auth import get_current_user +from app.models.user import User +from app.schemas.image import ExamImageRead, ExamImageUploadResponse +from app.services.exam_service import ExamService +from app.services.image_service import ImageService + +router = APIRouter(tags=["images"]) + + +@router.post( + "/templates/{template_id}/images", + response_model=ExamImageUploadResponse, + status_code=status.HTTP_201_CREATED, +) +def upload_exam_image( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], + file: UploadFile = File(...), + caption: Annotated[str | None, Form()] = None, +) -> ExamImageUploadResponse: + template = exam_service.get_owned_template(current_user.id, template_id) + image = image_service.upload(template, file, caption=caption) + return ExamImageUploadResponse( + image=ExamImageRead.model_validate(image_service.to_read(image)), + message="Image uploaded successfully", + ) + + +@router.get("/templates/{template_id}/images", response_model=list[ExamImageRead]) +def list_exam_images( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> list[ExamImageRead]: + exam_service.get_owned_template(current_user.id, template_id) + images = image_service.list_images(template_id) + return [ExamImageRead.model_validate(image_service.to_read(image)) for image in images] + + +@router.get("/images/{image_id}/content") +def get_exam_image_content( + image_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> FileResponse: + image = image_service.get_image_for_user(current_user.id, image_id) + return FileResponse( + path=image.storage_path, + media_type=image.mime_type, + filename=image.original_filename, + ) + + +@router.delete("/templates/{template_id}/images/{image_id}", status_code=status.HTTP_204_NO_CONTENT) +def delete_exam_image( + template_id: uuid.UUID, + image_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> None: + template = exam_service.get_owned_template(current_user.id, template_id) + image_service.delete_image(template, image_id) diff --git a/backend/app/api/routes/materials.py b/backend/app/api/routes/materials.py new file mode 100644 index 0000000..efd2fc9 --- /dev/null +++ b/backend/app/api/routes/materials.py @@ -0,0 +1,55 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, File, UploadFile, status + +from app.api.dependencies import get_exam_service, get_material_service +from app.core.auth import get_current_user +from app.models.exam import MaterialStatus +from app.models.user import User +from app.schemas.material import ExamMaterialRead, ExamMaterialUploadResponse +from app.services.exam_service import ExamService +from app.services.material_service import MaterialService + +router = APIRouter(prefix="/templates/{template_id}/materials", tags=["materials"]) + + +@router.post("", response_model=ExamMaterialUploadResponse, status_code=status.HTTP_201_CREATED) +def upload_material( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + material_service: Annotated[MaterialService, Depends(get_material_service)], + file: UploadFile = File(...), +) -> ExamMaterialUploadResponse: + template = exam_service.get_owned_template(current_user.id, template_id) + material = material_service.upload(template, file) + message = ( + "File uploaded and processed successfully" + if material.status == MaterialStatus.PROCESSED + else "File uploaded but text extraction failed" + ) + return ExamMaterialUploadResponse(material=material, message=message) + + +@router.get("", response_model=list[ExamMaterialRead]) +def list_materials( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + material_service: Annotated[MaterialService, Depends(get_material_service)], +) -> list[ExamMaterialRead]: + exam_service.get_owned_template(current_user.id, template_id) + return material_service.list_materials(template_id) + + +@router.delete("/{material_id}", status_code=status.HTTP_204_NO_CONTENT) +def delete_material( + template_id: uuid.UUID, + material_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + material_service: Annotated[MaterialService, Depends(get_material_service)], +) -> None: + template = exam_service.get_owned_template(current_user.id, template_id) + material_service.delete_material(template, material_id) diff --git a/backend/app/api/routes/questions.py b/backend/app/api/routes/questions.py new file mode 100644 index 0000000..4eb1d77 --- /dev/null +++ b/backend/app/api/routes/questions.py @@ -0,0 +1,27 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_exam_service, get_image_service +from app.core.auth import get_current_user +from app.models.user import User +from app.schemas.exam import QuestionRead +from app.schemas.image import QuestionImageAttach +from app.services.exam_service import ExamService +from app.services.image_service import ImageService + +router = APIRouter(prefix="/questions", tags=["questions"]) + + +@router.patch("/{question_id}/image", response_model=QuestionRead) +def attach_image_to_question( + question_id: uuid.UUID, + payload: QuestionImageAttach, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> QuestionRead: + question, template = exam_service.get_owned_question(current_user.id, question_id) + updated = image_service.attach_image_to_question(template, question, payload.image_id) + return exam_service.to_question_read(updated) diff --git a/backend/app/api/routes/templates.py b/backend/app/api/routes/templates.py index af152d2..5b9a1ff 100644 --- a/backend/app/api/routes/templates.py +++ b/backend/app/api/routes/templates.py @@ -3,11 +3,13 @@ from typing import Annotated from fastapi import APIRouter, Depends, status -from app.api.dependencies import get_exam_service +from app.api.dependencies import get_exam_service, get_storage_quota_service from app.core.auth import get_current_user from app.models.user import User from app.schemas.exam import ExamTemplateCreate, ExamTemplateRead +from app.schemas.storage import TemplateStorageUsage from app.services.exam_service import ExamService +from app.services.storage_quota import StorageQuotaService router = APIRouter(prefix="/templates", tags=["templates"]) @@ -36,3 +38,14 @@ def get_template( service: Annotated[ExamService, Depends(get_exam_service)], ) -> ExamTemplateRead: return service.get_template(current_user.id, template_id) + + +@router.get("/{template_id}/storage", response_model=TemplateStorageUsage) +def get_template_storage_usage( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + storage_quota: Annotated[StorageQuotaService, Depends(get_storage_quota_service)], +) -> TemplateStorageUsage: + exam_service.get_owned_template(current_user.id, template_id) + return TemplateStorageUsage.model_validate(storage_quota.get_usage_summary(template_id)) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index a6693d4..a2e47e0 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -22,6 +22,17 @@ class Settings(BaseSettings): jwt_algorithm: str = "HS256" jwt_expire_minutes: int = Field(default=60 * 24, ge=5) google_client_id: str | None = None + upload_dir: str = "/app/uploads" + max_upload_bytes: int = Field(default=20_971_520, ge=1_024) + max_materials_per_template: int = Field(default=10, ge=1, le=50) + max_reference_chars: int = Field(default=12_000, ge=1_000, le=100_000) + max_image_bytes: int = Field(default=5_242_880, ge=1_024) + max_images_per_template: int = Field(default=20, ge=1, le=100) + max_storage_bytes_per_template: int = Field( + default=52_428_800, + ge=1_024, + description="Cupo total por examen (materiales + imágenes). Por defecto 50 MB.", + ) model_config = SettingsConfigDict( env_file=".env", diff --git a/backend/app/core/security.py b/backend/app/core/security.py index facd271..83a06c1 100644 --- a/backend/app/core/security.py +++ b/backend/app/core/security.py @@ -32,8 +32,8 @@ def clean_text(value: str, *, max_length: int = 8_000) -> str: return cleaned -def sanitize_prompt_input(value: str) -> str: - cleaned = clean_text(value, max_length=4_000) +def sanitize_prompt_input(value: str, *, max_length: int = 4_000) -> str: + cleaned = clean_text(value, max_length=max_length) return ROLE_INJECTION_HINTS.sub("[filtered instruction]", cleaned) diff --git a/backend/app/main.py b/backend/app/main.py index bc6a8dd..7157853 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -4,7 +4,7 @@ from collections.abc import AsyncIterator from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.api.routes import auth, exports, generation, health, history, templates +from app.api.routes import auth, exports, generation, health, history, images, materials, questions, templates from app.core.config import get_settings from app.core.errors import register_exception_handlers from app.core.middleware import RateLimitMiddleware, RequestSizeLimitMiddleware @@ -25,7 +25,7 @@ def create_app() -> FastAPI: CORSMiddleware, allow_origins=settings.cors_origins, allow_credentials=True, - allow_methods=["GET", "POST", "OPTIONS"], + allow_methods=["GET", "POST", "PATCH", "DELETE", "OPTIONS"], allow_headers=["Authorization", "Content-Type", "X-API-Key"], ) app.add_middleware(RequestSizeLimitMiddleware, settings=settings) @@ -39,6 +39,9 @@ def create_app() -> FastAPI: app.include_router(generation.router, prefix="/exam") app.include_router(exports.router, prefix="/exam") app.include_router(history.router, prefix="/exam") + app.include_router(materials.router, prefix="/exam") + app.include_router(images.router, prefix="/exam") + app.include_router(questions.router, prefix="/exam") return app diff --git a/backend/app/models/exam.py b/backend/app/models/exam.py index a6717c9..98870aa 100644 --- a/backend/app/models/exam.py +++ b/backend/app/models/exam.py @@ -35,6 +35,11 @@ class ExportFormat(str, enum.Enum): JSON = "json" +class MaterialStatus(str, enum.Enum): + PROCESSED = "processed" + FAILED = "failed" + + class ExamTemplate(Base): __tablename__ = "exam_templates" @@ -66,6 +71,16 @@ class ExamTemplate(Base): cascade="all, delete-orphan", passive_deletes=True, ) + materials: Mapped[list["ExamMaterial"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) + images: Mapped[list["ExamImage"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) class Question(Base): @@ -87,9 +102,16 @@ class Question(Base): score: Mapped[float] = mapped_column(Float, nullable=False, default=1.0) penalty: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) options: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + image_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_images.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) template: Mapped[ExamTemplate] = relationship(back_populates="questions") + image: Mapped["ExamImage | None"] = relationship(back_populates="questions") class ExportJob(Base): @@ -108,3 +130,47 @@ class ExportJob(Base): created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) template: Mapped[ExamTemplate] = relationship(back_populates="export_jobs") + + +class ExamMaterial(Base): + __tablename__ = "exam_materials" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + original_filename: Mapped[str] = mapped_column(String(255), nullable=False) + mime_type: Mapped[str] = mapped_column(String(120), nullable=False) + size_bytes: Mapped[int] = mapped_column(nullable=False) + storage_path: Mapped[str] = mapped_column(String(500), nullable=False) + extracted_text: Mapped[str | None] = mapped_column(Text, nullable=True) + status: Mapped[MaterialStatus] = mapped_column(Enum(MaterialStatus), nullable=False) + error_message: Mapped[str | None] = mapped_column(String(500), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="materials") + + +class ExamImage(Base): + __tablename__ = "exam_images" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + original_filename: Mapped[str] = mapped_column(String(255), nullable=False) + stored_filename: Mapped[str] = mapped_column(String(255), nullable=False) + mime_type: Mapped[str] = mapped_column(String(120), nullable=False) + size_bytes: Mapped[int] = mapped_column(nullable=False) + storage_path: Mapped[str] = mapped_column(String(500), nullable=False) + caption: Mapped[str | None] = mapped_column(String(500), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="images") + questions: Mapped[list["Question"]] = relationship(back_populates="image") diff --git a/backend/app/schemas/exam.py b/backend/app/schemas/exam.py index ef2ad15..8ac7e45 100644 --- a/backend/app/schemas/exam.py +++ b/backend/app/schemas/exam.py @@ -65,6 +65,10 @@ class QuestionCreate(BaseModel): correct_answers: list[str] = Field(min_length=1, max_length=20) wrong_answers: list[str] = Field(default_factory=list, max_length=20) matching_pairs: list[MatchingPair] = Field(default_factory=list, max_length=50) + image_id: uuid.UUID | None = Field( + default=None, + description="ID de imagen de la plantilla que debe mostrarse con la pregunta.", + ) difficulty: Difficulty = Difficulty.MEDIUM score: float = Field(default=1.0, ge=0.0, le=100.0) penalty: float = Field(default=0.0, ge=0.0, le=100.0) @@ -91,6 +95,7 @@ class QuestionCreate(BaseModel): class QuestionRead(QuestionCreate): id: uuid.UUID template_id: uuid.UUID + image_url: str | None = None created_at: datetime model_config = ConfigDict(from_attributes=True) @@ -104,11 +109,19 @@ class PromptResponse(BaseModel): class BuildPromptRequest(BaseModel): topic_prompt: str = Field(min_length=5, max_length=4_000) + material_ids: list[uuid.UUID] | None = Field( + default=None, + description="IDs de materiales a incluir. Si no se indica, se usan todos los procesados.", + ) class GenerateExamRequest(BaseModel): template_id: uuid.UUID topic_prompt: str = Field(min_length=5, max_length=4_000) + material_ids: list[uuid.UUID] | None = Field( + default=None, + description="IDs de materiales a incluir. Si no se indica, se usan todos los procesados.", + ) class ParseRequest(BaseModel): diff --git a/backend/app/schemas/image.py b/backend/app/schemas/image.py new file mode 100644 index 0000000..7a0811c --- /dev/null +++ b/backend/app/schemas/image.py @@ -0,0 +1,30 @@ +import uuid +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + + +class ExamImageRead(BaseModel): + id: uuid.UUID + template_id: uuid.UUID + original_filename: str + stored_filename: str + mime_type: str + size_bytes: int + caption: str | None + content_url: str + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class ExamImageUploadResponse(BaseModel): + image: ExamImageRead + message: str = "Image uploaded successfully" + + +class QuestionImageAttach(BaseModel): + image_id: uuid.UUID | None = Field( + default=None, + description="ID de imagen de la plantilla. null para desvincular.", + ) diff --git a/backend/app/schemas/material.py b/backend/app/schemas/material.py new file mode 100644 index 0000000..93bbf20 --- /dev/null +++ b/backend/app/schemas/material.py @@ -0,0 +1,32 @@ +import uuid +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + +from app.models.exam import MaterialStatus + + +class ExamMaterialRead(BaseModel): + id: uuid.UUID + template_id: uuid.UUID + original_filename: str + mime_type: str + size_bytes: int + status: MaterialStatus + error_message: str | None + text_preview: str | None = None + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class ExamMaterialUploadResponse(BaseModel): + material: ExamMaterialRead + message: str = "File uploaded and processed successfully" + + +class MaterialIdsFilter(BaseModel): + material_ids: list[uuid.UUID] | None = Field( + default=None, + description="Si se indica, solo se usan estos materiales como contexto.", + ) diff --git a/backend/app/schemas/storage.py b/backend/app/schemas/storage.py new file mode 100644 index 0000000..036656e --- /dev/null +++ b/backend/app/schemas/storage.py @@ -0,0 +1,14 @@ +import uuid + +from pydantic import BaseModel + + +class TemplateStorageUsage(BaseModel): + template_id: uuid.UUID + used_bytes: int + limit_bytes: int + remaining_bytes: int + materials_bytes: int + images_bytes: int + used_mb: float + limit_mb: float diff --git a/backend/app/services/document_extractor.py b/backend/app/services/document_extractor.py new file mode 100644 index 0000000..1841ca4 --- /dev/null +++ b/backend/app/services/document_extractor.py @@ -0,0 +1,74 @@ +from pathlib import Path + +from app.core.errors import AppError + +SUPPORTED_EXTENSIONS = { + ".pdf": "application/pdf", + ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".txt": "text/plain", + ".md": "text/markdown", + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", +} + + +class DocumentExtractor: + def extract(self, file_path: Path, mime_type: str) -> str: + suffix = file_path.suffix.lower() + if mime_type == "application/pdf" or suffix == ".pdf": + return self._extract_pdf(file_path) + if ( + mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + or suffix == ".docx" + ): + return self._extract_docx(file_path) + if mime_type.startswith("text/") or suffix in {".txt", ".md"}: + return self._extract_text(file_path) + if mime_type.startswith("image/") or suffix in {".png", ".jpg", ".jpeg", ".webp"}: + return self._extract_image(file_path) + raise AppError(f"Unsupported file type: {mime_type}", status_code=415, code="unsupported_media") + + def _extract_pdf(self, file_path: Path) -> str: + from pypdf import PdfReader + + reader = PdfReader(str(file_path)) + parts = [page.extract_text() or "" for page in reader.pages] + text = "\n".join(parts).strip() + if not text: + raise AppError("PDF does not contain extractable text", status_code=422, code="empty_extraction") + return text + + def _extract_docx(self, file_path: Path) -> str: + from docx import Document + + document = Document(str(file_path)) + parts = [paragraph.text.strip() for paragraph in document.paragraphs if paragraph.text.strip()] + text = "\n".join(parts).strip() + if not text: + raise AppError("DOCX does not contain extractable text", status_code=422, code="empty_extraction") + return text + + def _extract_text(self, file_path: Path) -> str: + text = file_path.read_text(encoding="utf-8", errors="ignore").strip() + if not text: + raise AppError("Text file is empty", status_code=422, code="empty_extraction") + return text + + def _extract_image(self, file_path: Path) -> str: + try: + import pytesseract + from PIL import Image + except ImportError as exc: + raise AppError( + "Image OCR is not available on this server", + status_code=503, + code="ocr_unavailable", + ) from exc + + image = Image.open(file_path) + text = pytesseract.image_to_string(image, lang="spa+eng").strip() + if not text: + raise AppError("Image does not contain recognizable text", status_code=422, code="empty_extraction") + return text diff --git a/backend/app/services/exam_service.py b/backend/app/services/exam_service.py index 42d6114..a4371b2 100644 --- a/backend/app/services/exam_service.py +++ b/backend/app/services/exam_service.py @@ -17,7 +17,9 @@ from app.schemas.exam import ( QuestionCreate, QuestionRead, ) +from app.services.image_service import ImageService from app.services.llm import LLMClient +from app.services.material_service import MaterialService from app.services.moodle_exporter import MoodleXMLExporter from app.services.parser import AIQuestionParser from app.services.prompt_builder import PromptBuilder @@ -30,11 +32,15 @@ class ExamService: prompt_builder: PromptBuilder | None = None, parser: AIQuestionParser | None = None, exporter: MoodleXMLExporter | None = None, + material_service: MaterialService | None = None, + image_service: ImageService | None = None, ) -> None: self.db = db self.prompt_builder = prompt_builder or PromptBuilder() self.parser = parser or AIQuestionParser() self.exporter = exporter or MoodleXMLExporter() + self.material_service = material_service + self.image_service = image_service def create_template(self, user_id: uuid.UUID, payload: ExamTemplateCreate) -> ExamTemplateRead: template = ExamTemplate( @@ -87,9 +93,25 @@ class ExamService: def get_template(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplateRead: return self._template_read(self._get_user_template_or_404(user_id, template_id)) - def build_prompt(self, user_id: uuid.UUID, template_id: uuid.UUID, topic_prompt: str) -> PromptResponse: + def get_owned_template(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplate: + return self._get_user_template_or_404(user_id, template_id) + + def build_prompt( + self, + user_id: uuid.UUID, + template_id: uuid.UUID, + topic_prompt: str, + material_ids: list[uuid.UUID] | None = None, + ) -> PromptResponse: template = self._get_user_template_or_404(user_id, template_id) - prompt = self.prompt_builder.build_prompt(template, topic_prompt) + reference_context = self._reference_context(template_id, material_ids) + images_catalog = self._images_catalog(template_id) + prompt = self.prompt_builder.build_prompt( + template, + topic_prompt, + reference_context, + images_catalog, + ) return PromptResponse(template_id=template.id, prompt=prompt) async def generate_with_llm( @@ -98,9 +120,17 @@ class ExamService: template_id: uuid.UUID, topic_prompt: str, llm_client: LLMClient, + material_ids: list[uuid.UUID] | None = None, ) -> ParsedQuestionsResponse: template = self._get_user_template_or_404(user_id, template_id) - prompt = self.prompt_builder.build_prompt(template, topic_prompt) + reference_context = self._reference_context(template_id, material_ids) + images_catalog = self._images_catalog(template_id) + prompt = self.prompt_builder.build_prompt( + template, + topic_prompt, + reference_context, + images_catalog, + ) raw_output = await llm_client.generate(prompt) questions = self.parser.parse_json(raw_output) return self._persist_questions(template.id, questions) @@ -116,8 +146,9 @@ class ExamService: if not questions: raise NotFoundError("Template does not contain questions to export") + image_map = self._image_map(template.id) if export_format == ExportFormat.XML: - content = self.exporter.export_xml(questions) + content = self.exporter.export_xml(questions, image_map) elif export_format == ExportFormat.TXT: content = self.exporter.export_txt(questions) else: @@ -134,9 +165,30 @@ class ExamService: self.db.commit() return ExportResponse(template_id=template.id, format=export_format, content=content) + def get_owned_question(self, user_id: uuid.UUID, question_id: uuid.UUID) -> tuple[Question, ExamTemplate]: + question = self.db.get(Question, question_id) + if question is None: + raise NotFoundError("Question not found") + template = self._get_user_template_or_404(user_id, question.template_id) + if question.template_id != template.id: + raise NotFoundError("Question not found") + return question, template + + def to_question_read(self, question: Question) -> QuestionRead: + read = QuestionRead.model_validate(question) + if question.image_id: + return read.model_copy(update={"image_url": f"/exam/images/{question.image_id}/content"}) + return read + def _persist_questions(self, template_id: uuid.UUID, questions: list[QuestionCreate]) -> ParsedQuestionsResponse: persisted: list[Question] = [] for payload in questions: + image_id = payload.image_id + if image_id is not None: + if self.image_service is None: + raise NotFoundError("Image service is not available") + self.image_service.get_image_for_template(template_id, image_id) + question = Question( template_id=template_id, question_type=payload.question_type, @@ -144,6 +196,7 @@ class ExamService: correct_answers=[clean_text(answer, max_length=1_000) for answer in payload.correct_answers], wrong_answers=[clean_text(answer, max_length=1_000) for answer in payload.wrong_answers], matching_pairs=[pair.model_dump() for pair in payload.matching_pairs], + image_id=image_id, difficulty=payload.difficulty, score=payload.score, penalty=payload.penalty, @@ -156,7 +209,26 @@ class ExamService: for question in persisted: self.db.refresh(question) - return ParsedQuestionsResponse(questions=[QuestionRead.model_validate(question) for question in persisted]) + return ParsedQuestionsResponse(questions=[self.to_question_read(question) for question in persisted]) + + def _reference_context( + self, + template_id: uuid.UUID, + material_ids: list[uuid.UUID] | None, + ) -> str: + if self.material_service is None: + return "" + return self.material_service.build_reference_context(template_id, material_ids) + + def _images_catalog(self, template_id: uuid.UUID) -> str: + if self.image_service is None: + return "" + return self.image_service.images_catalog(template_id) + + def _image_map(self, template_id: uuid.UUID) -> dict[uuid.UUID, object]: + if self.image_service is None: + return {} + return self.image_service.build_image_map(template_id) def _get_user_template_or_404(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplate: template = self.db.get(ExamTemplate, template_id) diff --git a/backend/app/services/image_service.py b/backend/app/services/image_service.py new file mode 100644 index 0000000..3f5bf22 --- /dev/null +++ b/backend/app/services/image_service.py @@ -0,0 +1,206 @@ +import uuid +from pathlib import Path + +from fastapi import UploadFile +from PIL import Image, UnidentifiedImageError +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.core.config import Settings +from app.core.errors import AppError, NotFoundError +from app.core.security import clean_text +from app.models.exam import ExamImage, ExamTemplate, Question +from app.services.storage_quota import StorageQuotaService + +ALLOWED_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif"} +ALLOWED_IMAGE_MIMES = { + "image/png", + "image/jpeg", + "image/webp", + "image/gif", +} + + +class ImageService: + def __init__( + self, + db: Session, + settings: Settings, + storage_quota: StorageQuotaService | None = None, + ) -> None: + self.db = db + self.settings = settings + self.storage_quota = storage_quota or StorageQuotaService(db, settings) + self.image_root = Path(settings.upload_dir) / "exam_images" + self.image_root.mkdir(parents=True, exist_ok=True) + + def upload( + self, + template: ExamTemplate, + upload_file: UploadFile, + caption: str | None = None, + ) -> ExamImage: + self._validate_upload_count(template.id) + suffix, mime_type = self._validate_image_file(upload_file) + + content = upload_file.file.read() + if len(content) > self.settings.max_image_bytes: + raise AppError( + f"Image exceeds maximum size of {self.settings.max_image_bytes} bytes", + status_code=413, + code="file_too_large", + ) + + self.storage_quota.ensure_template_has_space(template.id, len(content)) + + image_id = uuid.uuid4() + stored_filename = f"{image_id}{suffix}" + target_dir = self.image_root / str(template.user_id) / str(template.id) + target_dir.mkdir(parents=True, exist_ok=True) + storage_path = target_dir / stored_filename + storage_path.write_bytes(content) + self._verify_image_integrity(storage_path) + + image = ExamImage( + id=image_id, + template_id=template.id, + original_filename=clean_text(upload_file.filename or stored_filename, max_length=255), + stored_filename=stored_filename, + mime_type=mime_type, + size_bytes=len(content), + storage_path=str(storage_path), + caption=clean_text(caption, max_length=500) if caption else None, + ) + self.db.add(image) + self.db.commit() + self.db.refresh(image) + return image + + def list_images(self, template_id: uuid.UUID) -> list[ExamImage]: + return list( + self.db.scalars( + select(ExamImage) + .where(ExamImage.template_id == template_id) + .order_by(ExamImage.created_at.desc()) + ).all() + ) + + def get_image_for_template(self, template_id: uuid.UUID, image_id: uuid.UUID) -> ExamImage: + image = self.db.get(ExamImage, image_id) + if image is None or image.template_id != template_id: + raise NotFoundError("Image not found for this template") + return image + + def get_image_for_user(self, user_id: uuid.UUID, image_id: uuid.UUID) -> ExamImage: + image = self.db.get(ExamImage, image_id) + if image is None: + raise NotFoundError("Image not found") + template = image.template + if template.user_id != user_id: + raise NotFoundError("Image not found") + return image + + def delete_image(self, template: ExamTemplate, image_id: uuid.UUID) -> None: + image = self.get_image_for_template(template.id, image_id) + for question in list(image.questions): + question.image_id = None + + path = Path(image.storage_path) + if path.exists(): + path.unlink() + + self.db.delete(image) + self.db.commit() + + def attach_image_to_question( + self, + template: ExamTemplate, + question: Question, + image_id: uuid.UUID | None, + ) -> Question: + if question.template_id != template.id: + raise NotFoundError("Question not found for this template") + if image_id is not None: + self.get_image_for_template(template.id, image_id) + question.image_id = image_id + self.db.commit() + self.db.refresh(question) + return question + + def images_catalog(self, template_id: uuid.UUID) -> str: + images = self.list_images(template_id) + if not images: + return "" + + lines = [ + "Imágenes disponibles para preguntas visuales (el enunciado debe referirse a la imagen; " + "asigna el campo image_id en cada pregunta que deba mostrarla):" + ] + for image in images: + caption = image.caption or "sin descripción" + lines.append( + f"- image_id: {image.id} | archivo: {image.original_filename} | descripción: {caption}" + ) + return "\n".join(lines) + + def build_image_map(self, template_id: uuid.UUID) -> dict[uuid.UUID, ExamImage]: + images = self.list_images(template_id) + return {image.id: image for image in images} + + def to_read(self, image: ExamImage) -> dict[str, object]: + return { + "id": image.id, + "template_id": image.template_id, + "original_filename": image.original_filename, + "stored_filename": image.stored_filename, + "mime_type": image.mime_type, + "size_bytes": image.size_bytes, + "caption": image.caption, + "content_url": f"/exam/images/{image.id}/content", + "created_at": image.created_at, + } + + def _validate_upload_count(self, template_id: uuid.UUID) -> None: + count = self.db.scalar( + select(func.count()).select_from(ExamImage).where(ExamImage.template_id == template_id) + ) + if count is not None and count >= self.settings.max_images_per_template: + raise AppError( + f"Maximum of {self.settings.max_images_per_template} images per template reached", + status_code=409, + code="too_many_images", + ) + + def _validate_image_file(self, upload_file: UploadFile) -> tuple[str, str]: + if not upload_file.filename: + raise AppError("Filename is required", status_code=400, code="invalid_file") + + suffix = Path(upload_file.filename).suffix.lower() + if suffix not in ALLOWED_IMAGE_EXTENSIONS: + raise AppError( + f"Unsupported image type. Allowed: {', '.join(sorted(ALLOWED_IMAGE_EXTENSIONS))}", + status_code=415, + code="unsupported_media", + ) + + mime_type = upload_file.content_type or "" + if mime_type and mime_type not in ALLOWED_IMAGE_MIMES: + raise AppError("Unsupported image MIME type", status_code=415, code="unsupported_media") + + mime_by_suffix = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", + ".gif": "image/gif", + } + resolved_mime = mime_type if mime_type in ALLOWED_IMAGE_MIMES else mime_by_suffix[suffix] + return suffix, resolved_mime + + def _verify_image_integrity(self, storage_path: Path) -> None: + try: + with Image.open(storage_path) as img: + img.verify() + except (UnidentifiedImageError, OSError) as exc: + storage_path.unlink(missing_ok=True) + raise AppError("Invalid or corrupted image file", status_code=422, code="invalid_image") from exc diff --git a/backend/app/services/material_service.py b/backend/app/services/material_service.py new file mode 100644 index 0000000..7d309a5 --- /dev/null +++ b/backend/app/services/material_service.py @@ -0,0 +1,188 @@ +import uuid +from pathlib import Path + +from fastapi import UploadFile +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.core.config import Settings +from app.core.errors import AppError, NotFoundError +from app.core.security import clean_text +from app.models.exam import ExamMaterial, ExamTemplate, MaterialStatus +from app.schemas.material import ExamMaterialRead +from app.services.document_extractor import SUPPORTED_EXTENSIONS, DocumentExtractor +from app.services.storage_quota import StorageQuotaService + + +class MaterialService: + def __init__( + self, + db: Session, + settings: Settings, + storage_quota: StorageQuotaService | None = None, + ) -> None: + self.db = db + self.settings = settings + self.storage_quota = storage_quota or StorageQuotaService(db, settings) + self.extractor = DocumentExtractor() + self.upload_root = Path(settings.upload_dir) + self.upload_root.mkdir(parents=True, exist_ok=True) + + def upload( + self, + template: ExamTemplate, + upload_file: UploadFile, + ) -> ExamMaterialRead: + self._validate_upload(template.id, upload_file) + + suffix = Path(upload_file.filename or "file").suffix.lower() + if suffix not in SUPPORTED_EXTENSIONS: + raise AppError( + f"Unsupported extension. Allowed: {', '.join(sorted(SUPPORTED_EXTENSIONS))}", + status_code=415, + code="unsupported_media", + ) + + content = upload_file.file.read() + if len(content) > self.settings.max_upload_bytes: + raise AppError( + f"File exceeds maximum size of {self.settings.max_upload_bytes} bytes", + status_code=413, + code="file_too_large", + ) + if not content: + raise AppError("Uploaded file is empty", status_code=400, code="empty_file") + + self.storage_quota.ensure_template_has_space(template.id, len(content)) + + material_id = uuid.uuid4() + safe_name = f"{material_id}{suffix}" + target_dir = self.upload_root / str(template.user_id) / str(template.id) + target_dir.mkdir(parents=True, exist_ok=True) + storage_path = target_dir / safe_name + storage_path.write_bytes(content) + + mime_type = upload_file.content_type or SUPPORTED_EXTENSIONS[suffix] + material = ExamMaterial( + id=material_id, + template_id=template.id, + original_filename=clean_text(upload_file.filename or safe_name, max_length=255), + mime_type=mime_type, + size_bytes=len(content), + storage_path=str(storage_path), + status=MaterialStatus.PROCESSED, + ) + + try: + material.extracted_text = clean_text( + self.extractor.extract(storage_path, mime_type), + max_length=500_000, + ) + except AppError as exc: + material.status = MaterialStatus.FAILED + material.error_message = clean_text(exc.message, max_length=500) + except Exception as exc: + material.status = MaterialStatus.FAILED + material.error_message = clean_text(str(exc), max_length=500) + + self.db.add(material) + self.db.commit() + self.db.refresh(material) + return self._to_read(material) + + def list_materials(self, template_id: uuid.UUID) -> list[ExamMaterialRead]: + materials = self.db.scalars( + select(ExamMaterial) + .where(ExamMaterial.template_id == template_id) + .order_by(ExamMaterial.created_at.desc()) + ).all() + return [self._to_read(material) for material in materials] + + def delete_material(self, template: ExamTemplate, material_id: uuid.UUID) -> None: + material = self.db.get(ExamMaterial, material_id) + if material is None or material.template_id != template.id: + raise NotFoundError("Material not found") + + path = Path(material.storage_path) + if path.exists(): + path.unlink() + + self.db.delete(material) + self.db.commit() + + def build_reference_context( + self, + template_id: uuid.UUID, + material_ids: list[uuid.UUID] | None = None, + ) -> str: + query = select(ExamMaterial).where( + ExamMaterial.template_id == template_id, + ExamMaterial.status == MaterialStatus.PROCESSED, + ExamMaterial.extracted_text.isnot(None), + ) + if material_ids: + query = query.where(ExamMaterial.id.in_(material_ids)) + + materials = self.db.scalars(query.order_by(ExamMaterial.created_at.asc())).all() + if material_ids: + found_ids = {material.id for material in materials} + missing = [material_id for material_id in material_ids if material_id not in found_ids] + if missing: + raise NotFoundError("One or more material IDs were not found or are not processed") + + if not materials: + return "" + + sections: list[str] = [] + for material in materials: + text = material.extracted_text or "" + if not text.strip(): + continue + sections.append( + f"--- Archivo: {material.original_filename} ---\n{text.strip()}" + ) + + if not sections: + return "" + + combined = "\n\n".join(sections) + max_chars = self.settings.max_reference_chars + if len(combined) <= max_chars: + return combined + + truncated = combined[:max_chars].rsplit("\n", 1)[0] + return f"{truncated}\n\n[Material truncado por límite de contexto]" + + def _validate_upload(self, template_id: uuid.UUID, upload_file: UploadFile) -> None: + if not upload_file.filename: + raise AppError("Filename is required", status_code=400, code="invalid_file") + + count = self.db.scalar( + select(func.count()) + .select_from(ExamMaterial) + .where(ExamMaterial.template_id == template_id) + ) + if count is not None and count >= self.settings.max_materials_per_template: + raise AppError( + f"Maximum of {self.settings.max_materials_per_template} files per template reached", + status_code=409, + code="too_many_files", + ) + + def _to_read(self, material: ExamMaterial) -> ExamMaterialRead: + preview = None + if material.extracted_text: + preview = material.extracted_text[:300] + if len(material.extracted_text) > 300: + preview += "..." + return ExamMaterialRead( + id=material.id, + template_id=material.template_id, + original_filename=material.original_filename, + mime_type=material.mime_type, + size_bytes=material.size_bytes, + status=material.status, + error_message=material.error_message, + text_preview=preview, + created_at=material.created_at, + ) diff --git a/backend/app/services/moodle_exporter.py b/backend/app/services/moodle_exporter.py index 2048cd9..d15358f 100644 --- a/backend/app/services/moodle_exporter.py +++ b/backend/app/services/moodle_exporter.py @@ -1,15 +1,20 @@ +import base64 import json +from html import escape as html_escape +from pathlib import Path from typing import Any +from uuid import UUID from xml.sax.saxutils import escape as xml_escape from app.core.security import clean_text class MoodleXMLExporter: - def export_xml(self, questions: list[Any]) -> str: + def export_xml(self, questions: list[Any], image_map: dict[UUID, Any] | None = None) -> str: + images = image_map or {} parts = ['', ""] for index, question in enumerate(questions, start=1): - parts.append(self._export_question(question, index)) + parts.append(self._export_question(question, index, images)) parts.append("") return "\n".join(parts) @@ -17,6 +22,8 @@ class MoodleXMLExporter: blocks: list[str] = [] for question in questions: lines = [self._attr(question, "statement")] + if self._attr(question, "image_id"): + lines.append(f"[Imagen adjunta: {self._attr(question, 'image_id')}]") lines.extend(self._attr(question, "correct_answers") or []) lines.extend(self._attr(question, "wrong_answers") or []) blocks.append("\n".join(clean_text(str(line)) for line in lines)) @@ -26,19 +33,19 @@ class MoodleXMLExporter: payload = {"questions": [self._question_dict(question) for question in questions]} return json.dumps(payload, ensure_ascii=False, indent=2, default=str) - def _export_question(self, question: Any, index: int) -> str: + def _export_question(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: question_type = self._enum_value(self._attr(question, "question_type")) if question_type == "multichoice": - return self._multichoice(question, index) + return self._multichoice(question, index, image_map) if question_type == "truefalse": - return self._truefalse(question, index) + return self._truefalse(question, index, image_map) if question_type == "shortanswer": - return self._shortanswer(question, index) + return self._shortanswer(question, index, image_map) if question_type == "matching": - return self._matching(question, index) + return self._matching(question, index, image_map) raise ValueError(f"Unsupported Moodle question type: {question_type}") - def _multichoice(self, question: Any, index: int) -> str: + def _multichoice(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: correct_answers = self._attr(question, "correct_answers") or [] wrong_answers = self._attr(question, "wrong_answers") or [] options = self._attr(question, "options") or {} @@ -53,7 +60,7 @@ class MoodleXMLExporter: return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), f" {str(not multiple_correct).lower()}", " 1", *answers, @@ -61,32 +68,32 @@ class MoodleXMLExporter: ] ) - def _truefalse(self, question: Any, index: int) -> str: + def _truefalse(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: correct = (self._attr(question, "correct_answers") or ["true"])[0].lower() is_true = correct in {"true", "verdadero"} return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), self._answer_xml("true", 100 if is_true else 0), self._answer_xml("false", 0 if is_true else 100), " ", ] ) - def _shortanswer(self, question: Any, index: int) -> str: + def _shortanswer(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: answers = [self._answer_xml(answer, 100) for answer in self._attr(question, "correct_answers")] return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), " 0", *answers, " ", ] ) - def _matching(self, question: Any, index: int) -> str: + def _matching(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: subquestions = [] for pair in self._attr(question, "matching_pairs") or []: prompt = pair.get("prompt") if isinstance(pair, dict) else pair.prompt @@ -106,27 +113,63 @@ class MoodleXMLExporter: return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), *subquestions, " ", ] ) - def _common_header(self, question: Any, index: int) -> str: + def _common_header(self, question: Any, index: int, image_map: dict[UUID, Any]) -> list[str]: statement = self._attr(question, "statement") name = clean_text(statement, max_length=80) or f"Pregunta {index}" - return "\n".join( - [ - " ", - f" {self._xml(name)}", - " ", - ' ', - f" {self._cdata(statement)}", - " ", - f" {float(self._attr(question, 'score') or 1.0):.2f}", - " ", - ] - ) + return [ + " ", + f" {self._xml(name)}", + " ", + ' ', + f" {self._question_html(question, image_map)}", + " ", + *self._embedded_files(question, image_map), + f" {float(self._attr(question, 'score') or 1.0):.2f}", + ' ', + ] + + def _question_html(self, question: Any, image_map: dict[UUID, Any]) -> str: + statement = html_escape(clean_text(str(self._attr(question, "statement")))) + html_parts = [f"

{statement}

"] + + image = self._resolve_image(question, image_map) + if image is not None: + alt = html_escape(clean_text(image.caption or image.original_filename, max_length=200)) + html_parts.append( + f'

{alt}

' + ) + + return self._cdata("".join(html_parts)) + + def _embedded_files(self, question: Any, image_map: dict[UUID, Any]) -> list[str]: + image = self._resolve_image(question, image_map) + if image is None: + return [] + + path = Path(image.storage_path) + if not path.exists(): + return [] + + encoded = base64.b64encode(path.read_bytes()).decode("ascii") + return [ + f' ', + encoded, + " ", + ] + + def _resolve_image(self, question: Any, image_map: dict[UUID, Any]) -> Any | None: + image_id = self._attr(question, "image_id") + if image_id is None: + return None + if hasattr(question, "image") and question.image is not None: + return question.image + return image_map.get(image_id) def _answer_xml(self, text: str, fraction: float) -> str: fraction_text = f"{fraction:.6g}" @@ -134,7 +177,7 @@ class MoodleXMLExporter: [ f' ', f" {self._xml(text)}", - " ", + ' ', " ", ] ) @@ -144,6 +187,7 @@ class MoodleXMLExporter: "id": str(self._attr(question, "id")) if self._attr(question, "id") else None, "question_type": self._enum_value(self._attr(question, "question_type")), "statement": self._attr(question, "statement"), + "image_id": str(self._attr(question, "image_id")) if self._attr(question, "image_id") else None, "correct_answers": self._attr(question, "correct_answers") or [], "wrong_answers": self._attr(question, "wrong_answers") or [], "matching_pairs": self._attr(question, "matching_pairs") or [], @@ -162,5 +206,5 @@ class MoodleXMLExporter: return xml_escape(clean_text(str(value)), {'"': """, "'": "'"}) def _cdata(self, value: Any) -> str: - text = clean_text(str(value)).replace("]]>", "]]]]>") + text = str(value).replace("]]>", "]]]]>") return f"" diff --git a/backend/app/services/parser.py b/backend/app/services/parser.py index f53dc22..74cb3c8 100644 --- a/backend/app/services/parser.py +++ b/backend/app/services/parser.py @@ -72,12 +72,14 @@ class AIQuestionParser: if isinstance(wrong, str): wrong = [wrong] + image_id = item.get("image_id") return { "question_type": question_type, "statement": item.get("statement", item.get("question", item.get("prompt", ""))), "correct_answers": correct, "wrong_answers": wrong, "matching_pairs": item.get("matching_pairs", []), + "image_id": image_id, "difficulty": item.get("difficulty", Difficulty.MEDIUM.value), "score": item.get("score", 1.0), "penalty": item.get("penalty", 0.0), diff --git a/backend/app/services/prompt_builder.py b/backend/app/services/prompt_builder.py index 78c7f6d..7f92647 100644 --- a/backend/app/services/prompt_builder.py +++ b/backend/app/services/prompt_builder.py @@ -5,7 +5,13 @@ from app.models.exam import ExamTemplate class PromptBuilder: - def build_prompt(self, template: ExamTemplate, topic_prompt: str) -> str: + def build_prompt( + self, + template: ExamTemplate, + topic_prompt: str, + reference_context: str = "", + images_catalog: str = "", + ) -> str: settings = template.settings difficulty_profile = template.difficulty_profile safe_topic = sanitize_prompt_input(topic_prompt) @@ -18,6 +24,7 @@ class PromptBuilder: "correct_answers": ["respuesta correcta"], "wrong_answers": ["distractor 1", "distractor 2"], "matching_pairs": [{"prompt": "concepto", "answer": "definicion"}], + "image_id": "uuid-opcional-de-imagen-de-la-plantilla", "difficulty": "easy | medium | hard | very_hard", "score": 1.0, "penalty": 0.0, @@ -41,6 +48,20 @@ class PromptBuilder: "Tema, conceptos y restricciones indicadas por el profesor:", safe_topic, "", + *( + [ + "Material de referencia (usa SOLO esta información junto con el tema para crear preguntas):", + sanitize_prompt_input(reference_context, max_length=12_000) if reference_context else "", + "", + ] + if reference_context.strip() + else [] + ), + *( + [images_catalog, ""] + if images_catalog.strip() + else [] + ), "Contrato de salida obligatorio:", json.dumps(contract, ensure_ascii=False, indent=2), "", @@ -51,5 +72,7 @@ class PromptBuilder: "- En truefalse, usa una única respuesta correcta: true o false.", "- En shortanswer, incluye respuestas exactas aceptadas.", "- En matching, rellena matching_pairs y deja wrong_answers vacío.", + "- Si la pregunta debe mostrar una imagen al alumno, incluye image_id del catálogo de imágenes.", + "- El enunciado debe describir qué observar en la imagen vinculada (sin inventar image_id inexistentes).", ] ) diff --git a/backend/app/services/storage_quota.py b/backend/app/services/storage_quota.py new file mode 100644 index 0000000..f064d5b --- /dev/null +++ b/backend/app/services/storage_quota.py @@ -0,0 +1,81 @@ +import uuid + +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.core.config import Settings +from app.core.errors import AppError +from app.models.exam import ExamImage, ExamMaterial + + +class StorageQuotaService: + def __init__(self, db: Session, settings: Settings) -> None: + self.db = db + self.settings = settings + + def get_template_usage_bytes(self, template_id: uuid.UUID) -> int: + materials_bytes = self.db.scalar( + select(func.coalesce(func.sum(ExamMaterial.size_bytes), 0)).where( + ExamMaterial.template_id == template_id + ) + ) + images_bytes = self.db.scalar( + select(func.coalesce(func.sum(ExamImage.size_bytes), 0)).where( + ExamImage.template_id == template_id + ) + ) + return int(materials_bytes or 0) + int(images_bytes or 0) + + def ensure_template_has_space(self, template_id: uuid.UUID, incoming_bytes: int) -> None: + if incoming_bytes <= 0: + return + + limit = self.settings.max_storage_bytes_per_template + used = self.get_template_usage_bytes(template_id) + projected = used + incoming_bytes + + if projected > limit: + raise AppError( + message=( + f"Template storage quota exceeded. " + f"Limit: {self._format_mb(limit)}, " + f"used: {self._format_mb(used)}, " + f"file: {self._format_mb(incoming_bytes)}" + ), + status_code=413, + code="template_storage_quota_exceeded", + ) + + def get_usage_summary(self, template_id: uuid.UUID) -> dict[str, int | float]: + materials_bytes = int( + self.db.scalar( + select(func.coalesce(func.sum(ExamMaterial.size_bytes), 0)).where( + ExamMaterial.template_id == template_id + ) + ) + or 0 + ) + images_bytes = int( + self.db.scalar( + select(func.coalesce(func.sum(ExamImage.size_bytes), 0)).where( + ExamImage.template_id == template_id + ) + ) + or 0 + ) + used = materials_bytes + images_bytes + limit = self.settings.max_storage_bytes_per_template + return { + "template_id": template_id, + "used_bytes": used, + "limit_bytes": limit, + "remaining_bytes": max(limit - used, 0), + "materials_bytes": materials_bytes, + "images_bytes": images_bytes, + "used_mb": round(used / (1024 * 1024), 2), + "limit_mb": round(limit / (1024 * 1024), 2), + } + + @staticmethod + def _format_mb(value_bytes: int) -> str: + return f"{value_bytes / (1024 * 1024):.2f} MB" diff --git a/backend/requirements.txt b/backend/requirements.txt index 3076625..f391448 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -11,4 +11,9 @@ passlib[bcrypt] python-jose[cryptography] google-auth requests +python-multipart +pypdf +python-docx +Pillow +pytesseract pytest diff --git a/docker-compose.yml b/docker-compose.yml index 1d61571..11f8de1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,8 @@ services: depends_on: db: condition: service_healthy + volumes: + - uploads_data:/app/uploads restart: unless-stopped frontend: @@ -40,3 +42,4 @@ services: volumes: postgres_data: + uploads_data: