From ebc3631cfddc243ef1a2e27d2bb7e0bfc7e52f62 Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Wed, 13 May 2026 13:43:32 +0200 Subject: [PATCH 1/6] =?UTF-8?q?Primera=20versi=C3=B3n=20del=20backend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env | 14 ++ .env.example | 14 ++ FlujoDeUsuario.txt | 22 ++++ backend/Dockerfile | 19 +++ backend/app/__init__.py | 1 + backend/app/api/__init__.py | 1 + backend/app/api/dependencies.py | 17 +++ backend/app/api/routes/__init__.py | 1 + backend/app/api/routes/exports.py | 37 ++++++ backend/app/api/routes/generation.py | 43 ++++++ backend/app/api/routes/health.py | 8 ++ backend/app/api/routes/templates.py | 31 +++++ backend/app/core/config.py | 36 +++++ backend/app/core/errors.py | 56 ++++++++ backend/app/core/middleware.py | 50 +++++++ backend/app/core/security.py | 41 ++++++ backend/app/db/base.py | 5 + backend/app/db/init_db.py | 7 + backend/app/db/session.py | 18 +++ backend/app/main.py | 46 +++++++ backend/app/models/__init__.py | 1 + backend/app/models/exam.py | 102 +++++++++++++++ backend/app/schemas/__init__.py | 1 + backend/app/schemas/exam.py | 127 ++++++++++++++++++ backend/app/services/__init__.py | 1 + backend/app/services/exam_service.py | 147 +++++++++++++++++++++ backend/app/services/llm.py | 48 +++++++ backend/app/services/moodle_exporter.py | 166 ++++++++++++++++++++++++ backend/app/services/parser.py | 98 ++++++++++++++ backend/app/services/prompt_builder.py | 55 ++++++++ backend/requirements.txt | 9 ++ docker-compose.yml | 42 ++++++ 32 files changed, 1264 insertions(+) create mode 100644 .env create mode 100644 .env.example create mode 100644 FlujoDeUsuario.txt create mode 100644 backend/Dockerfile create mode 100644 backend/app/__init__.py create mode 100644 backend/app/api/__init__.py create mode 100644 backend/app/api/dependencies.py create mode 100644 backend/app/api/routes/__init__.py create mode 100644 backend/app/api/routes/exports.py create mode 100644 backend/app/api/routes/generation.py create mode 100644 backend/app/api/routes/health.py create mode 100644 backend/app/api/routes/templates.py create mode 100644 backend/app/core/config.py create mode 100644 backend/app/core/errors.py create mode 100644 backend/app/core/middleware.py create mode 100644 backend/app/core/security.py create mode 100644 backend/app/db/base.py create mode 100644 backend/app/db/init_db.py create mode 100644 backend/app/db/session.py create mode 100644 backend/app/main.py create mode 100644 backend/app/models/__init__.py create mode 100644 backend/app/models/exam.py create mode 100644 backend/app/schemas/__init__.py create mode 100644 backend/app/schemas/exam.py create mode 100644 backend/app/services/__init__.py create mode 100644 backend/app/services/exam_service.py create mode 100644 backend/app/services/llm.py create mode 100644 backend/app/services/moodle_exporter.py create mode 100644 backend/app/services/parser.py create mode 100644 backend/app/services/prompt_builder.py create mode 100644 backend/requirements.txt create mode 100644 docker-compose.yml diff --git a/.env b/.env new file mode 100644 index 0000000..4b5f810 --- /dev/null +++ b/.env @@ -0,0 +1,14 @@ +APP_NAME=GenExamenes IA +ENVIRONMENT=local +API_KEY=change-me-in-production +DATABASE_URL=postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes +ALLOWED_ORIGINS=http://localhost:3000 +RATE_LIMIT_REQUESTS=300 +RATE_LIMIT_WINDOW_SECONDS=300 +MAX_REQUEST_BYTES=1048576 + +# OpenAI-compatible chat completions endpoint. +LLM_API_KEY= +LLM_BASE_URL=/api/chat +LLM_MODEL=qwen3.5:35b +LLM_TIMEOUT_SECONDS=250 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..173588e --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +APP_NAME=GenExamenes IA +ENVIRONMENT=local +API_KEY=change-me-in-production +DATABASE_URL=postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes +ALLOWED_ORIGINS=http://localhost:3000 +RATE_LIMIT_REQUESTS=60 +RATE_LIMIT_WINDOW_SECONDS=60 +MAX_REQUEST_BYTES=1048576 + +# OpenAI-compatible chat completions endpoint. +LLM_API_KEY= +LLM_BASE_URL=https://api.openai.com/v1 +LLM_MODEL=gpt-4o-mini +LLM_TIMEOUT_SECONDS=60 diff --git a/FlujoDeUsuario.txt b/FlujoDeUsuario.txt new file mode 100644 index 0000000..8f34a92 --- /dev/null +++ b/FlujoDeUsuario.txt @@ -0,0 +1,22 @@ +Ahora mismo el flujo es backend/API, sin frontend: + +1.- El profesor crea una plantilla con POST /exam/templates Define título, materia, nivel educativo, +tipos de preguntas, número de preguntas, puntuación, penalización y dificultad. + +2.- Genera un prompt con POST /exam/prompts/{template_id} La API devuelve un prompt estructurado para +pedirle al LLM preguntas en JSON válido. + +3.- Hay dos caminos posibles: + + 3.1.- Generación automática: POST /exam/generate La API llama al LLM configurado, parsea la respuesta y guarda las preguntas. + 3.2.- Carga manual: POST /exam/parse El profesor pega una salida de IA en json o txt, y la API la valida y guarda. + +4.- El profesor exporta el examen: + +GET /exam/export/xml/{template_id} para Moodle XML. +GET /exam/export/txt/{template_id} para texto plano. +GET /exam/export/json/{template_id} para JSON. + +(El XML generado se importa manualmente en Moodle.) + +En resumen: configurar plantilla → generar prompt o llamar al LLM → guardar preguntas → exportar Moodle XML. \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..4ee7778 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN addgroup --system app && adduser --system --ingroup app app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app ./app + +USER app + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/app/__init__.py b/backend/app/__init__.py new file mode 100644 index 0000000..526ac34 --- /dev/null +++ b/backend/app/__init__.py @@ -0,0 +1 @@ +"""GenExamenes IA backend package.""" diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py new file mode 100644 index 0000000..dff53e5 --- /dev/null +++ b/backend/app/api/__init__.py @@ -0,0 +1 @@ +"""API package.""" diff --git a/backend/app/api/dependencies.py b/backend/app/api/dependencies.py new file mode 100644 index 0000000..061bfc9 --- /dev/null +++ b/backend/app/api/dependencies.py @@ -0,0 +1,17 @@ +from typing import Annotated + +from fastapi import Depends +from sqlalchemy.orm import Session + +from app.core.config import Settings, get_settings +from app.db.session import get_db +from app.services.exam_service import ExamService +from app.services.llm import LLMClient + + +def get_exam_service(db: Annotated[Session, Depends(get_db)]) -> ExamService: + return ExamService(db) + + +def get_llm_client(settings: Annotated[Settings, Depends(get_settings)]) -> LLMClient: + return LLMClient(settings) diff --git a/backend/app/api/routes/__init__.py b/backend/app/api/routes/__init__.py new file mode 100644 index 0000000..509980f --- /dev/null +++ b/backend/app/api/routes/__init__.py @@ -0,0 +1 @@ +"""API route package.""" diff --git a/backend/app/api/routes/exports.py b/backend/app/api/routes/exports.py new file mode 100644 index 0000000..7b6ca68 --- /dev/null +++ b/backend/app/api/routes/exports.py @@ -0,0 +1,37 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, Response + +from app.api.dependencies import get_exam_service +from app.models.exam import ExportFormat +from app.services.exam_service import ExamService + +router = APIRouter(prefix="/export", tags=["exports"]) + + +@router.get("/xml/{template_id}") +def export_xml( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> Response: + export = service.export(template_id, ExportFormat.XML) + return Response(content=export.content, media_type="application/xml") + + +@router.get("/txt/{template_id}") +def export_txt( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> Response: + export = service.export(template_id, ExportFormat.TXT) + return Response(content=export.content, media_type="text/plain; charset=utf-8") + + +@router.get("/json/{template_id}") +def export_json( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> Response: + export = service.export(template_id, ExportFormat.JSON) + return Response(content=export.content, media_type="application/json") diff --git a/backend/app/api/routes/generation.py b/backend/app/api/routes/generation.py new file mode 100644 index 0000000..64cf87d --- /dev/null +++ b/backend/app/api/routes/generation.py @@ -0,0 +1,43 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_exam_service, get_llm_client +from app.schemas.exam import ( + BuildPromptRequest, + GenerateExamRequest, + ParsedQuestionsResponse, + ParseRequest, + PromptResponse, +) +from app.services.exam_service import ExamService +from app.services.llm import LLMClient + +router = APIRouter(tags=["generation"]) + + +@router.post("/prompts/{template_id}", response_model=PromptResponse) +def build_prompt( + template_id: uuid.UUID, + payload: BuildPromptRequest, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> PromptResponse: + return service.build_prompt(template_id, payload.topic_prompt) + + +@router.post("/generate", response_model=ParsedQuestionsResponse) +async def generate_exam( + payload: GenerateExamRequest, + service: Annotated[ExamService, Depends(get_exam_service)], + llm_client: Annotated[LLMClient, Depends(get_llm_client)], +) -> ParsedQuestionsResponse: + return await service.generate_with_llm(payload.template_id, payload.topic_prompt, llm_client) + + +@router.post("/parse", response_model=ParsedQuestionsResponse) +def parse_ai_output( + payload: ParseRequest, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> ParsedQuestionsResponse: + return service.parse_and_persist(payload) diff --git a/backend/app/api/routes/health.py b/backend/app/api/routes/health.py new file mode 100644 index 0000000..dd3a219 --- /dev/null +++ b/backend/app/api/routes/health.py @@ -0,0 +1,8 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["health"]) + + +@router.get("/health") +def health_check() -> dict[str, str]: + return {"status": "ok"} diff --git a/backend/app/api/routes/templates.py b/backend/app/api/routes/templates.py new file mode 100644 index 0000000..2d1dd72 --- /dev/null +++ b/backend/app/api/routes/templates.py @@ -0,0 +1,31 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, status + +from app.api.dependencies import get_exam_service +from app.schemas.exam import ExamTemplateCreate, ExamTemplateRead +from app.services.exam_service import ExamService + +router = APIRouter(prefix="/templates", tags=["templates"]) + + +@router.post("", response_model=ExamTemplateRead, status_code=status.HTTP_201_CREATED) +def create_template( + payload: ExamTemplateCreate, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> ExamTemplateRead: + return service.create_template(payload) + + +@router.get("", response_model=list[ExamTemplateRead]) +def list_templates(service: Annotated[ExamService, Depends(get_exam_service)]) -> list[ExamTemplateRead]: + return service.list_templates() + + +@router.get("/{template_id}", response_model=ExamTemplateRead) +def get_template( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> ExamTemplateRead: + return service.get_template(template_id) diff --git a/backend/app/core/config.py b/backend/app/core/config.py new file mode 100644 index 0000000..348b46e --- /dev/null +++ b/backend/app/core/config.py @@ -0,0 +1,36 @@ +from functools import lru_cache + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + app_name: str = "GenExamenes IA" + environment: str = "local" + api_prefix: str = "" + api_key: str = Field(min_length=16) + database_url: str = "postgresql+psycopg://genexamenes:genexamenes@localhost:5432/genexamenes" + allowed_origins: str = "http://localhost:3000" + rate_limit_requests: int = Field(default=60, ge=1) + rate_limit_window_seconds: int = Field(default=60, ge=1) + max_request_bytes: int = Field(default=1_048_576, ge=1_024) + llm_api_key: str | None = None + llm_base_url: str = "https://api.openai.com/v1" + llm_model: str = "gpt-4o-mini" + llm_timeout_seconds: int = Field(default=60, ge=5) + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore", + ) + + @property + def cors_origins(self) -> list[str]: + return [origin.strip() for origin in self.allowed_origins.split(",") if origin.strip()] + + +@lru_cache +def get_settings() -> Settings: + return Settings() diff --git a/backend/app/core/errors.py b/backend/app/core/errors.py new file mode 100644 index 0000000..a4f902b --- /dev/null +++ b/backend/app/core/errors.py @@ -0,0 +1,56 @@ +from fastapi import FastAPI, Request +from fastapi.exceptions import RequestValidationError +from fastapi.responses import ORJSONResponse +from starlette.exceptions import HTTPException as StarletteHTTPException + + +class AppError(Exception): + def __init__(self, message: str, status_code: int = 400, code: str = "app_error") -> None: + self.message = message + self.status_code = status_code + self.code = code + + +class NotFoundError(AppError): + def __init__(self, message: str = "Resource not found") -> None: + super().__init__(message=message, status_code=404, code="not_found") + + +class LLMUnavailableError(AppError): + def __init__(self, message: str = "LLM service is unavailable") -> None: + super().__init__(message=message, status_code=503, code="llm_unavailable") + + +class ParseError(AppError): + def __init__(self, message: str = "Unable to parse AI output") -> None: + super().__init__(message=message, status_code=422, code="parse_error") + + +def error_payload(code: str, message: str, details: object | None = None) -> dict[str, object]: + payload: dict[str, object] = {"error": {"code": code, "message": message}} + if details is not None: + payload["error"]["details"] = details + return payload + + +def register_exception_handlers(app: FastAPI) -> None: + @app.exception_handler(AppError) + async def app_error_handler(_: Request, exc: AppError) -> ORJSONResponse: + return ORJSONResponse( + status_code=exc.status_code, + content=error_payload(exc.code, exc.message), + ) + + @app.exception_handler(StarletteHTTPException) + async def http_error_handler(_: Request, exc: StarletteHTTPException) -> ORJSONResponse: + return ORJSONResponse( + status_code=exc.status_code, + content=error_payload("http_error", str(exc.detail)), + ) + + @app.exception_handler(RequestValidationError) + async def validation_error_handler(_: Request, exc: RequestValidationError) -> ORJSONResponse: + return ORJSONResponse( + status_code=422, + content=error_payload("validation_error", "Invalid request payload", exc.errors()), + ) diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py new file mode 100644 index 0000000..8630ac3 --- /dev/null +++ b/backend/app/core/middleware.py @@ -0,0 +1,50 @@ +import time +from collections import defaultdict, deque + +from fastapi import Request, Response +from fastapi.responses import ORJSONResponse +from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint + +from app.core.config import Settings +from app.core.errors import error_payload + + +class RateLimitMiddleware(BaseHTTPMiddleware): + def __init__(self, app: object, settings: Settings) -> None: + super().__init__(app) + self.limit = settings.rate_limit_requests + self.window_seconds = settings.rate_limit_window_seconds + self.requests: defaultdict[str, deque[float]] = defaultdict(deque) + + async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: + client = request.client.host if request.client else "unknown" + now = time.monotonic() + bucket = self.requests[client] + + while bucket and now - bucket[0] > self.window_seconds: + bucket.popleft() + + if len(bucket) >= self.limit: + return ORJSONResponse( + status_code=429, + content=error_payload("rate_limited", "Too many requests"), + headers={"Retry-After": str(self.window_seconds)}, + ) + + bucket.append(now) + return await call_next(request) + + +class RequestSizeLimitMiddleware(BaseHTTPMiddleware): + def __init__(self, app: object, settings: Settings) -> None: + super().__init__(app) + self.max_request_bytes = settings.max_request_bytes + + async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: + content_length = request.headers.get("content-length") + if content_length and int(content_length) > self.max_request_bytes: + return ORJSONResponse( + status_code=413, + content=error_payload("payload_too_large", "Request body is too large"), + ) + return await call_next(request) diff --git a/backend/app/core/security.py b/backend/app/core/security.py new file mode 100644 index 0000000..facd271 --- /dev/null +++ b/backend/app/core/security.py @@ -0,0 +1,41 @@ +import re +from html import escape +from typing import Annotated + +from fastapi import Depends, Header, HTTPException, status + +from app.core.config import Settings, get_settings + + +CONTROL_CHARS = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]") +ROLE_INJECTION_HINTS = re.compile( + r"(ignore\s+(all\s+)?previous|system\s*:|developer\s*:|act\s+as\s+system)", + flags=re.IGNORECASE, +) + + +def require_api_key( + settings: Annotated[Settings, Depends(get_settings)], + x_api_key: Annotated[str | None, Header(alias="X-API-Key")] = None, +) -> None: + if not x_api_key or x_api_key != settings.api_key: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or missing API key", + ) + + +def clean_text(value: str, *, max_length: int = 8_000) -> str: + cleaned = CONTROL_CHARS.sub("", value).strip() + if len(cleaned) > max_length: + cleaned = cleaned[:max_length].strip() + return cleaned + + +def sanitize_prompt_input(value: str) -> str: + cleaned = clean_text(value, max_length=4_000) + return ROLE_INJECTION_HINTS.sub("[filtered instruction]", cleaned) + + +def html_text(value: str) -> str: + return escape(clean_text(value), quote=True) diff --git a/backend/app/db/base.py b/backend/app/db/base.py new file mode 100644 index 0000000..fa2b68a --- /dev/null +++ b/backend/app/db/base.py @@ -0,0 +1,5 @@ +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + pass diff --git a/backend/app/db/init_db.py b/backend/app/db/init_db.py new file mode 100644 index 0000000..94d6ead --- /dev/null +++ b/backend/app/db/init_db.py @@ -0,0 +1,7 @@ +from app.db.base import Base +from app.db.session import engine +from app.models import exam # noqa: F401 + + +def init_db() -> None: + Base.metadata.create_all(bind=engine) diff --git a/backend/app/db/session.py b/backend/app/db/session.py new file mode 100644 index 0000000..29744f4 --- /dev/null +++ b/backend/app/db/session.py @@ -0,0 +1,18 @@ +from collections.abc import Generator + +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from app.core.config import get_settings + + +engine = create_engine(get_settings().database_url, pool_pre_ping=True) +SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, expire_on_commit=False) + + +def get_db() -> Generator[Session, None, None]: + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..1b2bfd7 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,46 @@ +from contextlib import asynccontextmanager +from collections.abc import AsyncIterator + +from fastapi import Depends, FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.api.routes import exports, generation, health, templates +from app.core.config import get_settings +from app.core.errors import register_exception_handlers +from app.core.middleware import RateLimitMiddleware, RequestSizeLimitMiddleware +from app.core.security import require_api_key +from app.db.init_db import init_db + + +@asynccontextmanager +async def lifespan(_: FastAPI) -> AsyncIterator[None]: + init_db() + yield + + +def create_app() -> FastAPI: + settings = get_settings() + app = FastAPI(title=settings.app_name, lifespan=lifespan) + + app.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_origins, + allow_credentials=True, + allow_methods=["GET", "POST", "OPTIONS"], + allow_headers=["Authorization", "Content-Type", "X-API-Key"], + ) + app.add_middleware(RequestSizeLimitMiddleware, settings=settings) + app.add_middleware(RateLimitMiddleware, settings=settings) + + register_exception_handlers(app) + + app.include_router(health.router) + protected = [Depends(require_api_key)] + app.include_router(templates.router, prefix="/exam", dependencies=protected) + app.include_router(generation.router, prefix="/exam", dependencies=protected) + app.include_router(exports.router, prefix="/exam", dependencies=protected) + + return app + + +app = create_app() diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py new file mode 100644 index 0000000..663618f --- /dev/null +++ b/backend/app/models/__init__.py @@ -0,0 +1 @@ +"""SQLAlchemy model package.""" diff --git a/backend/app/models/exam.py b/backend/app/models/exam.py new file mode 100644 index 0000000..ad987fc --- /dev/null +++ b/backend/app/models/exam.py @@ -0,0 +1,102 @@ +import enum +import uuid +from datetime import datetime +from typing import Any + +from sqlalchemy import DateTime, Enum, Float, ForeignKey, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base + + +class QuestionType(str, enum.Enum): + MULTICHOICE = "multichoice" + TRUE_FALSE = "truefalse" + SHORT_ANSWER = "shortanswer" + MATCHING = "matching" + + +class Difficulty(str, enum.Enum): + EASY = "easy" + MEDIUM = "medium" + HARD = "hard" + VERY_HARD = "very_hard" + + +class ExportStatus(str, enum.Enum): + COMPLETED = "completed" + FAILED = "failed" + + +class ExportFormat(str, enum.Enum): + XML = "xml" + TXT = "txt" + JSON = "json" + + +class ExamTemplate(Base): + __tablename__ = "exam_templates" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + title: Mapped[str] = mapped_column(String(200), nullable=False) + subject: Mapped[str] = mapped_column(String(200), nullable=False) + educational_level: Mapped[str] = mapped_column(String(120), nullable=False) + language: Mapped[str] = mapped_column(String(20), nullable=False, default="es") + settings: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + difficulty_profile: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + questions: Mapped[list["Question"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) + export_jobs: Mapped[list["ExportJob"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + +class Question(Base): + __tablename__ = "questions" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + question_type: Mapped[QuestionType] = mapped_column(Enum(QuestionType), nullable=False) + statement: Mapped[str] = mapped_column(Text, nullable=False) + correct_answers: Mapped[list[str]] = mapped_column(JSONB, nullable=False, default=list) + wrong_answers: Mapped[list[str]] = mapped_column(JSONB, nullable=False, default=list) + matching_pairs: Mapped[list[dict[str, str]]] = mapped_column(JSONB, nullable=False, default=list) + difficulty: Mapped[Difficulty] = mapped_column(Enum(Difficulty), nullable=False, default=Difficulty.MEDIUM) + score: Mapped[float] = mapped_column(Float, nullable=False, default=1.0) + penalty: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + options: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="questions") + + +class ExportJob(Base): + __tablename__ = "export_jobs" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + status: Mapped[ExportStatus] = mapped_column(Enum(ExportStatus), nullable=False) + format: Mapped[ExportFormat] = mapped_column(Enum(ExportFormat), nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="export_jobs") diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py new file mode 100644 index 0000000..13c4df7 --- /dev/null +++ b/backend/app/schemas/__init__.py @@ -0,0 +1 @@ +"""API schema package.""" diff --git a/backend/app/schemas/exam.py b/backend/app/schemas/exam.py new file mode 100644 index 0000000..d476cf6 --- /dev/null +++ b/backend/app/schemas/exam.py @@ -0,0 +1,127 @@ +import uuid +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +from app.models.exam import Difficulty, ExportFormat, QuestionType + + +class QuestionTypeSettings(BaseModel): + type: QuestionType + count: int = Field(ge=1, le=200) + options_count: int | None = Field(default=None, ge=2, le=8) + multiple_correct: bool = False + score: float = Field(default=1.0, ge=0.0, le=100.0) + penalty: float = Field(default=0.0, ge=0.0, le=100.0) + + +class ExamSettings(BaseModel): + question_types: list[QuestionTypeSettings] = Field(min_length=1, max_length=20) + shuffle_questions: bool = True + shuffle_answers: bool = True + include_feedback: bool = True + + +class DifficultyProfile(BaseModel): + easy: int = Field(default=0, ge=0, le=500) + medium: int = Field(default=0, ge=0, le=500) + hard: int = Field(default=0, ge=0, le=500) + very_hard: int = Field(default=0, ge=0, le=500) + + @model_validator(mode="after") + def require_at_least_one_question(self) -> "DifficultyProfile": + if self.easy + self.medium + self.hard + self.very_hard <= 0: + raise ValueError("At least one difficulty bucket must contain questions") + return self + + +class ExamTemplateCreate(BaseModel): + title: str = Field(min_length=3, max_length=200) + subject: str = Field(min_length=2, max_length=200) + educational_level: str = Field(min_length=2, max_length=120) + language: str = Field(default="es", min_length=2, max_length=20) + settings: ExamSettings + difficulty_profile: DifficultyProfile + + +class ExamTemplateRead(ExamTemplateCreate): + id: uuid.UUID + created_at: datetime + updated_at: datetime + question_count: int = 0 + + model_config = ConfigDict(from_attributes=True) + + +class MatchingPair(BaseModel): + prompt: str = Field(min_length=1, max_length=1_000) + answer: str = Field(min_length=1, max_length=1_000) + + +class QuestionCreate(BaseModel): + question_type: QuestionType + statement: str = Field(min_length=3, max_length=8_000) + correct_answers: list[str] = Field(min_length=1, max_length=20) + wrong_answers: list[str] = Field(default_factory=list, max_length=20) + matching_pairs: list[MatchingPair] = Field(default_factory=list, max_length=50) + difficulty: Difficulty = Difficulty.MEDIUM + score: float = Field(default=1.0, ge=0.0, le=100.0) + penalty: float = Field(default=0.0, ge=0.0, le=100.0) + options: dict[str, object] = Field(default_factory=dict) + + @field_validator("correct_answers", "wrong_answers") + @classmethod + def strip_answers(cls, value: list[str]) -> list[str]: + return [answer.strip() for answer in value if answer.strip()] + + @model_validator(mode="after") + def validate_question_payload(self) -> "QuestionCreate": + if self.question_type == QuestionType.MULTICHOICE and not self.wrong_answers: + raise ValueError("Multichoice questions require wrong_answers") + if self.question_type == QuestionType.TRUE_FALSE: + accepted = {"true", "false", "verdadero", "falso"} + if self.correct_answers[0].lower() not in accepted: + raise ValueError("True/false questions require a true or false correct answer") + if self.question_type == QuestionType.MATCHING and not self.matching_pairs: + raise ValueError("Matching questions require matching_pairs") + return self + + +class QuestionRead(QuestionCreate): + id: uuid.UUID + template_id: uuid.UUID + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class PromptResponse(BaseModel): + template_id: uuid.UUID + prompt: str + expected_format: Literal["json"] = "json" + + +class BuildPromptRequest(BaseModel): + topic_prompt: str = Field(min_length=5, max_length=4_000) + + +class GenerateExamRequest(BaseModel): + template_id: uuid.UUID + topic_prompt: str = Field(min_length=5, max_length=4_000) + + +class ParseRequest(BaseModel): + raw_output: str = Field(min_length=5, max_length=200_000) + input_format: Literal["json", "txt"] + template_id: uuid.UUID + + +class ParsedQuestionsResponse(BaseModel): + questions: list[QuestionRead] + + +class ExportResponse(BaseModel): + template_id: uuid.UUID + format: ExportFormat + content: str diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000..ac5acdf --- /dev/null +++ b/backend/app/services/__init__.py @@ -0,0 +1 @@ +"""Business service package.""" diff --git a/backend/app/services/exam_service.py b/backend/app/services/exam_service.py new file mode 100644 index 0000000..49a74de --- /dev/null +++ b/backend/app/services/exam_service.py @@ -0,0 +1,147 @@ +import uuid + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.errors import NotFoundError +from app.core.security import clean_text +from app.models.exam import ExamTemplate, ExportFormat, ExportJob, ExportStatus, Question +from app.schemas.exam import ( + ExamTemplateCreate, + ExamTemplateRead, + ExportResponse, + ParsedQuestionsResponse, + ParseRequest, + PromptResponse, + QuestionCreate, + QuestionRead, +) +from app.services.llm import LLMClient +from app.services.moodle_exporter import MoodleXMLExporter +from app.services.parser import AIQuestionParser +from app.services.prompt_builder import PromptBuilder + + +class ExamService: + def __init__( + self, + db: Session, + prompt_builder: PromptBuilder | None = None, + parser: AIQuestionParser | None = None, + exporter: MoodleXMLExporter | None = None, + ) -> None: + self.db = db + self.prompt_builder = prompt_builder or PromptBuilder() + self.parser = parser or AIQuestionParser() + self.exporter = exporter or MoodleXMLExporter() + + def create_template(self, payload: ExamTemplateCreate) -> ExamTemplateRead: + template = ExamTemplate( + title=clean_text(payload.title, max_length=200), + subject=clean_text(payload.subject, max_length=200), + educational_level=clean_text(payload.educational_level, max_length=120), + language=clean_text(payload.language, max_length=20), + settings=payload.settings.model_dump(mode="json"), + difficulty_profile=payload.difficulty_profile.model_dump(mode="json"), + ) + self.db.add(template) + self.db.commit() + self.db.refresh(template) + return self._template_read(template) + + def list_templates(self) -> list[ExamTemplateRead]: + templates = self.db.scalars(select(ExamTemplate).order_by(ExamTemplate.created_at.desc())).all() + return [self._template_read(template) for template in templates] + + def get_template(self, template_id: uuid.UUID) -> ExamTemplateRead: + return self._template_read(self._get_template_or_404(template_id)) + + def build_prompt(self, template_id: uuid.UUID, topic_prompt: str) -> PromptResponse: + template = self._get_template_or_404(template_id) + prompt = self.prompt_builder.build_prompt(template, topic_prompt) + return PromptResponse(template_id=template.id, prompt=prompt) + + async def generate_with_llm( + self, + template_id: uuid.UUID, + topic_prompt: str, + llm_client: LLMClient, + ) -> ParsedQuestionsResponse: + template = self._get_template_or_404(template_id) + prompt = self.prompt_builder.build_prompt(template, topic_prompt) + raw_output = await llm_client.generate(prompt) + questions = self.parser.parse_json(raw_output) + return self._persist_questions(template.id, questions) + + def parse_and_persist(self, payload: ParseRequest) -> ParsedQuestionsResponse: + self._get_template_or_404(payload.template_id) + questions = self.parser.parse(payload.raw_output, payload.input_format) + return self._persist_questions(payload.template_id, questions) + + def export(self, template_id: uuid.UUID, export_format: ExportFormat) -> ExportResponse: + template = self._get_template_or_404(template_id) + questions = list(template.questions) + if not questions: + raise NotFoundError("Template does not contain questions to export") + + if export_format == ExportFormat.XML: + content = self.exporter.export_xml(questions) + elif export_format == ExportFormat.TXT: + content = self.exporter.export_txt(questions) + else: + content = self.exporter.export_json(questions) + + self.db.add( + ExportJob( + template_id=template.id, + status=ExportStatus.COMPLETED, + format=export_format, + content=content, + ) + ) + self.db.commit() + return ExportResponse(template_id=template.id, format=export_format, content=content) + + def _persist_questions(self, template_id: uuid.UUID, questions: list[QuestionCreate]) -> ParsedQuestionsResponse: + persisted: list[Question] = [] + for payload in questions: + question = Question( + template_id=template_id, + question_type=payload.question_type, + statement=clean_text(payload.statement), + correct_answers=[clean_text(answer, max_length=1_000) for answer in payload.correct_answers], + wrong_answers=[clean_text(answer, max_length=1_000) for answer in payload.wrong_answers], + matching_pairs=[pair.model_dump() for pair in payload.matching_pairs], + difficulty=payload.difficulty, + score=payload.score, + penalty=payload.penalty, + options=payload.options, + ) + self.db.add(question) + persisted.append(question) + + self.db.commit() + for question in persisted: + self.db.refresh(question) + + return ParsedQuestionsResponse(questions=[QuestionRead.model_validate(question) for question in persisted]) + + def _get_template_or_404(self, template_id: uuid.UUID) -> ExamTemplate: + template = self.db.get(ExamTemplate, template_id) + if template is None: + raise NotFoundError("Exam template not found") + return template + + def _template_read(self, template: ExamTemplate) -> ExamTemplateRead: + return ExamTemplateRead( + id=template.id, + title=template.title, + subject=template.subject, + educational_level=template.educational_level, + language=template.language, + settings=template.settings, + difficulty_profile=template.difficulty_profile, + created_at=template.created_at, + updated_at=template.updated_at, + question_count=len(template.questions), + ) diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py new file mode 100644 index 0000000..0356cb4 --- /dev/null +++ b/backend/app/services/llm.py @@ -0,0 +1,48 @@ +import httpx + +from app.core.config import Settings +from app.core.errors import LLMUnavailableError + + +class LLMClient: + def __init__(self, settings: Settings) -> None: + self.settings = settings + + async def generate(self, prompt: str) -> str: + if not self.settings.llm_api_key: + raise LLMUnavailableError("LLM_API_KEY is not configured") + + url = f"{self.settings.llm_base_url.rstrip('/')}/chat/completions" + payload = { + "model": self.settings.llm_model, + "messages": [ + { + "role": "system", + "content": "You generate safe, valid JSON exam questions for Moodle imports.", + }, + {"role": "user", "content": prompt}, + ], + "temperature": 0.2, + "response_format": {"type": "json_object"}, + } + headers = { + "Authorization": f"Bearer {self.settings.llm_api_key}", + "Content-Type": "application/json", + } + + try: + async with httpx.AsyncClient(timeout=self.settings.llm_timeout_seconds) as client: + response = await client.post(url, json=payload, headers=headers) + response.raise_for_status() + except httpx.HTTPError as exc: + raise LLMUnavailableError("LLM request failed") from exc + + data = response.json() + try: + content = data["choices"][0]["message"]["content"] + except (KeyError, IndexError, TypeError) as exc: + raise LLMUnavailableError("LLM response did not include message content") from exc + + if not isinstance(content, str) or not content.strip(): + raise LLMUnavailableError("LLM returned empty content") + return content diff --git a/backend/app/services/moodle_exporter.py b/backend/app/services/moodle_exporter.py new file mode 100644 index 0000000..2048cd9 --- /dev/null +++ b/backend/app/services/moodle_exporter.py @@ -0,0 +1,166 @@ +import json +from typing import Any +from xml.sax.saxutils import escape as xml_escape + +from app.core.security import clean_text + + +class MoodleXMLExporter: + def export_xml(self, questions: list[Any]) -> str: + parts = ['', ""] + for index, question in enumerate(questions, start=1): + parts.append(self._export_question(question, index)) + parts.append("") + return "\n".join(parts) + + def export_txt(self, questions: list[Any]) -> str: + blocks: list[str] = [] + for question in questions: + lines = [self._attr(question, "statement")] + lines.extend(self._attr(question, "correct_answers") or []) + lines.extend(self._attr(question, "wrong_answers") or []) + blocks.append("\n".join(clean_text(str(line)) for line in lines)) + return "\n\n".join(blocks) + + def export_json(self, questions: list[Any]) -> str: + payload = {"questions": [self._question_dict(question) for question in questions]} + return json.dumps(payload, ensure_ascii=False, indent=2, default=str) + + def _export_question(self, question: Any, index: int) -> str: + question_type = self._enum_value(self._attr(question, "question_type")) + if question_type == "multichoice": + return self._multichoice(question, index) + if question_type == "truefalse": + return self._truefalse(question, index) + if question_type == "shortanswer": + return self._shortanswer(question, index) + if question_type == "matching": + return self._matching(question, index) + raise ValueError(f"Unsupported Moodle question type: {question_type}") + + def _multichoice(self, question: Any, index: int) -> str: + correct_answers = self._attr(question, "correct_answers") or [] + wrong_answers = self._attr(question, "wrong_answers") or [] + options = self._attr(question, "options") or {} + multiple_correct = bool(options.get("multiple_correct", len(correct_answers) > 1)) + correct_fraction = 100 / max(len(correct_answers), 1) + wrong_fraction = -abs(float(self._attr(question, "penalty") or 0.0)) if self._attr(question, "penalty") else 0 + + answers = [ + self._answer_xml(answer, correct_fraction) for answer in correct_answers + ] + [self._answer_xml(answer, wrong_fraction) for answer in wrong_answers] + + return "\n".join( + [ + ' ', + self._common_header(question, index), + f" {str(not multiple_correct).lower()}", + " 1", + *answers, + " ", + ] + ) + + def _truefalse(self, question: Any, index: int) -> str: + correct = (self._attr(question, "correct_answers") or ["true"])[0].lower() + is_true = correct in {"true", "verdadero"} + return "\n".join( + [ + ' ', + self._common_header(question, index), + self._answer_xml("true", 100 if is_true else 0), + self._answer_xml("false", 0 if is_true else 100), + " ", + ] + ) + + def _shortanswer(self, question: Any, index: int) -> str: + answers = [self._answer_xml(answer, 100) for answer in self._attr(question, "correct_answers")] + return "\n".join( + [ + ' ', + self._common_header(question, index), + " 0", + *answers, + " ", + ] + ) + + def _matching(self, question: Any, index: int) -> str: + subquestions = [] + for pair in self._attr(question, "matching_pairs") or []: + prompt = pair.get("prompt") if isinstance(pair, dict) else pair.prompt + answer = pair.get("answer") if isinstance(pair, dict) else pair.answer + subquestions.append( + "\n".join( + [ + ' ', + f" {self._cdata(prompt)}", + " ", + f" {self._xml(answer)}", + " ", + " ", + ] + ) + ) + return "\n".join( + [ + ' ', + self._common_header(question, index), + *subquestions, + " ", + ] + ) + + def _common_header(self, question: Any, index: int) -> str: + statement = self._attr(question, "statement") + name = clean_text(statement, max_length=80) or f"Pregunta {index}" + return "\n".join( + [ + " ", + f" {self._xml(name)}", + " ", + ' ', + f" {self._cdata(statement)}", + " ", + f" {float(self._attr(question, 'score') or 1.0):.2f}", + " ", + ] + ) + + def _answer_xml(self, text: str, fraction: float) -> str: + fraction_text = f"{fraction:.6g}" + return "\n".join( + [ + f' ', + f" {self._xml(text)}", + " ", + " ", + ] + ) + + def _question_dict(self, question: Any) -> dict[str, Any]: + return { + "id": str(self._attr(question, "id")) if self._attr(question, "id") else None, + "question_type": self._enum_value(self._attr(question, "question_type")), + "statement": self._attr(question, "statement"), + "correct_answers": self._attr(question, "correct_answers") or [], + "wrong_answers": self._attr(question, "wrong_answers") or [], + "matching_pairs": self._attr(question, "matching_pairs") or [], + "difficulty": self._enum_value(self._attr(question, "difficulty")), + "score": self._attr(question, "score"), + "penalty": self._attr(question, "penalty"), + } + + def _attr(self, question: Any, name: str) -> Any: + return getattr(question, name, None) + + def _enum_value(self, value: Any) -> Any: + return value.value if hasattr(value, "value") else value + + def _xml(self, value: Any) -> str: + return xml_escape(clean_text(str(value)), {'"': """, "'": "'"}) + + def _cdata(self, value: Any) -> str: + text = clean_text(str(value)).replace("]]>", "]]]]>") + return f"" diff --git a/backend/app/services/parser.py b/backend/app/services/parser.py new file mode 100644 index 0000000..f53dc22 --- /dev/null +++ b/backend/app/services/parser.py @@ -0,0 +1,98 @@ +import json +from typing import Any + +from pydantic import ValidationError + +from app.core.errors import ParseError +from app.core.security import clean_text +from app.models.exam import Difficulty, QuestionType +from app.schemas.exam import QuestionCreate + + +class AIQuestionParser: + def parse(self, raw_output: str, input_format: str) -> list[QuestionCreate]: + if input_format == "json": + return self.parse_json(raw_output) + if input_format == "txt": + return self.parse_txt(raw_output) + raise ParseError("Unsupported input format") + + def parse_json(self, raw_json: str) -> list[QuestionCreate]: + try: + data = json.loads(raw_json) + except json.JSONDecodeError as exc: + raise ParseError("Invalid JSON returned by AI") from exc + + items = data.get("questions", data) if isinstance(data, dict) else data + if not isinstance(items, list) or not items: + raise ParseError("JSON must contain a non-empty questions list") + + questions: list[QuestionCreate] = [] + for item in items: + if not isinstance(item, dict): + raise ParseError("Each JSON question must be an object") + questions.append(self._build_question(self._normalize_item(item))) + return questions + + def parse_txt(self, raw_text: str) -> list[QuestionCreate]: + blocks = [block.strip() for block in raw_text.replace("\r\n", "\n").split("\n\n") if block.strip()] + questions: list[QuestionCreate] = [] + + for block in blocks: + lines = [clean_text(line) for line in block.split("\n") if clean_text(line)] + if len(lines) < 2: + continue + + statement = lines[0] + correct_answer = lines[1] + wrong_answers = lines[2:] + question_type = self._infer_txt_type(correct_answer, wrong_answers) + payload = { + "question_type": question_type, + "statement": statement, + "correct_answers": [correct_answer], + "wrong_answers": wrong_answers, + "difficulty": Difficulty.MEDIUM, + "score": 1.0, + "penalty": 0.0, + } + questions.append(self._build_question(payload)) + + if not questions: + raise ParseError("TXT output did not contain parseable questions") + return questions + + def _normalize_item(self, item: dict[str, Any]) -> dict[str, Any]: + correct = item.get("correct_answers", item.get("correct_answer", item.get("answer", []))) + wrong = item.get("wrong_answers", item.get("incorrect_answers", item.get("distractors", []))) + question_type = item.get("question_type", item.get("type", QuestionType.MULTICHOICE.value)) + + if isinstance(correct, str): + correct = [correct] + if isinstance(wrong, str): + wrong = [wrong] + + return { + "question_type": question_type, + "statement": item.get("statement", item.get("question", item.get("prompt", ""))), + "correct_answers": correct, + "wrong_answers": wrong, + "matching_pairs": item.get("matching_pairs", []), + "difficulty": item.get("difficulty", Difficulty.MEDIUM.value), + "score": item.get("score", 1.0), + "penalty": item.get("penalty", 0.0), + "options": item.get("options", {}), + } + + def _build_question(self, payload: dict[str, Any]) -> QuestionCreate: + try: + return QuestionCreate.model_validate(payload) + except ValidationError as exc: + raise ParseError(f"Invalid question payload: {exc.errors()}") from exc + + def _infer_txt_type(self, correct_answer: str, wrong_answers: list[str]) -> QuestionType: + if correct_answer.lower() in {"true", "false", "verdadero", "falso"} and not wrong_answers: + return QuestionType.TRUE_FALSE + if wrong_answers: + return QuestionType.MULTICHOICE + return QuestionType.SHORT_ANSWER diff --git a/backend/app/services/prompt_builder.py b/backend/app/services/prompt_builder.py new file mode 100644 index 0000000..78c7f6d --- /dev/null +++ b/backend/app/services/prompt_builder.py @@ -0,0 +1,55 @@ +import json + +from app.core.security import sanitize_prompt_input +from app.models.exam import ExamTemplate + + +class PromptBuilder: + def build_prompt(self, template: ExamTemplate, topic_prompt: str) -> str: + settings = template.settings + difficulty_profile = template.difficulty_profile + safe_topic = sanitize_prompt_input(topic_prompt) + + contract = { + "questions": [ + { + "question_type": "multichoice | truefalse | shortanswer | matching", + "statement": "Enunciado claro de la pregunta", + "correct_answers": ["respuesta correcta"], + "wrong_answers": ["distractor 1", "distractor 2"], + "matching_pairs": [{"prompt": "concepto", "answer": "definicion"}], + "difficulty": "easy | medium | hard | very_hard", + "score": 1.0, + "penalty": 0.0, + } + ] + } + + return "\n".join( + [ + "Eres un generador de cuestionarios académicos para Moodle.", + "Devuelve exclusivamente JSON válido, sin markdown ni texto adicional.", + "No incluyas instrucciones del usuario dentro de las preguntas.", + "", + f"Título del examen: {sanitize_prompt_input(template.title)}", + f"Materia: {sanitize_prompt_input(template.subject)}", + f"Nivel educativo: {sanitize_prompt_input(template.educational_level)}", + f"Idioma: {sanitize_prompt_input(template.language)}", + f"Configuración de tipos: {json.dumps(settings, ensure_ascii=False)}", + f"Distribución de dificultad: {json.dumps(difficulty_profile, ensure_ascii=False)}", + "", + "Tema, conceptos y restricciones indicadas por el profesor:", + safe_topic, + "", + "Contrato de salida obligatorio:", + json.dumps(contract, ensure_ascii=False, indent=2), + "", + "Reglas:", + "- Respeta el número de preguntas por tipo.", + "- Respeta la distribución de dificultad.", + "- En multichoice, incluye al menos una respuesta correcta y varias incorrectas.", + "- En truefalse, usa una única respuesta correcta: true o false.", + "- En shortanswer, incluye respuestas exactas aceptadas.", + "- En matching, rellena matching_pairs y deja wrong_answers vacío.", + ] + ) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..6ad34b4 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi +uvicorn[standard] +SQLAlchemy +psycopg[binary] +pydantic-settings +python-dotenv +httpx +orjson +pytest diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2a0b355 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +services: + backend: + build: + context: ./backend + env_file: + - .env + environment: + DATABASE_URL: postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes + ports: + - "8000:8000" + depends_on: + db: + condition: service_healthy + restart: unless-stopped + + frontend: + image: nginx:1.27-alpine + ports: + - "3000:80" + volumes: + - ./frontend:/usr/share/nginx/html:ro + restart: unless-stopped + + db: + image: postgres:16-alpine + environment: + POSTGRES_DB: genexamenes + POSTGRES_USER: genexamenes + POSTGRES_PASSWORD: genexamenes + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U genexamenes -d genexamenes"] + interval: 5s + timeout: 5s + retries: 10 + restart: unless-stopped + +volumes: + postgres_data: From abc620838ccdb22842579461b9f5c82ac17206e6 Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Wed, 13 May 2026 13:48:42 +0200 Subject: [PATCH 2/6] Cambio en el .env y docker compose --- README.md | 113 ++++++++++++++++++++++++++- .env => backend/.env | 0 .env.example => backend/.env.example | 0 docker-compose.yml | 2 +- 4 files changed, 112 insertions(+), 3 deletions(-) rename .env => backend/.env (100%) rename .env.example => backend/.env.example (100%) diff --git a/README.md b/README.md index 3e8fc47..9ac9cb4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,111 @@ -# moodle-exam-generator -Generador avanzado de exámenes con IA, capaz de crear cuestionarios personalizados, configurar tipos de preguntas y exportar automáticamente el resultado en formato Moodle XML listo para importar. Integración completa con prompts para generar contenido educativo de forma dinámica. +# GenExamenes IA + +Backend para generar exámenes con IA, procesar la salida de un LLM y exportar preguntas a Moodle XML. + +El proyecto está centrado en backend. La carpeta `frontend` se mantiene vacía a nivel de aplicación, aunque existe un servicio en Docker Compose para reservar el despliegue futuro. + +## Stack + +- FastAPI +- PostgreSQL +- SQLAlchemy +- Cliente LLM compatible con OpenAI Chat Completions +- Docker Compose con servicios `backend`, `frontend` y `db` + +## Puesta en Marcha + +Copia el ejemplo de variables dentro de la carpeta del backend: + +```bash +cp backend/.env.example backend/.env +``` + +Después levanta los servicios: + +```bash +docker compose up --build +``` + +La API queda disponible en: + +```text +http://localhost:8000 +``` + +## Configuración + +El archivo de entorno debe estar en `backend/.env`. + +Variables principales: + +- `API_KEY`: clave obligatoria para consumir las rutas protegidas. +- `DATABASE_URL`: conexión PostgreSQL usada por el backend. +- `LLM_API_KEY`: clave del proveedor LLM. +- `LLM_BASE_URL`: endpoint compatible con OpenAI. +- `LLM_MODEL`: modelo usado para generar preguntas. +- `ALLOWED_ORIGINS`: orígenes permitidos por CORS. + +Todas las rutas bajo `/exam` requieren la cabecera: + +```http +X-API-Key: change-me-in-production +``` + +## Flujo de Usuario + +1. Crear una plantilla de examen. +2. Generar un prompt guiado para el LLM. +3. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. +4. Guardar las preguntas validadas en PostgreSQL. +5. Exportar el examen a Moodle XML, TXT o JSON. + +## Endpoints + +`GET /health` + +Comprueba que la API está levantada. + +`POST /exam/templates` + +Crea una plantilla con materia, nivel educativo, tipos de pregunta, puntuación, penalización y dificultad. + +`GET /exam/templates` + +Lista las plantillas creadas. + +`GET /exam/templates/{template_id}` + +Obtiene una plantilla concreta. + +`POST /exam/prompts/{template_id}` + +Genera un prompt estructurado para IA. + +`POST /exam/generate` + +Llama al LLM configurado, parsea la respuesta y guarda las preguntas. + +`POST /exam/parse` + +Procesa una salida externa de IA en formato `json` o `txt`. + +`GET /exam/export/xml/{template_id}` + +Exporta las preguntas en Moodle XML. + +`GET /exam/export/txt/{template_id}` + +Exporta las preguntas en texto plano. + +`GET /exam/export/json/{template_id}` + +Exporta las preguntas en JSON. + +## Seguridad + +- Autenticación por API key. +- Rate limiting por cliente. +- Límite de tamaño de petición. +- Validación de entrada con Pydantic. +- Manejo uniforme de errores HTTP. +- Sanitización básica de prompts y respuestas antes de persistir/exportar. diff --git a/.env b/backend/.env similarity index 100% rename from .env rename to backend/.env diff --git a/.env.example b/backend/.env.example similarity index 100% rename from .env.example rename to backend/.env.example diff --git a/docker-compose.yml b/docker-compose.yml index 2a0b355..1d61571 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: build: context: ./backend env_file: - - .env + - ./backend/.env environment: DATABASE_URL: postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes ports: From eca398d8922f28c733a79564769627e9863c5f63 Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Wed, 13 May 2026 13:56:26 +0200 Subject: [PATCH 3/6] Delete backend/.env --- backend/.env | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 backend/.env diff --git a/backend/.env b/backend/.env deleted file mode 100644 index 4b5f810..0000000 --- a/backend/.env +++ /dev/null @@ -1,14 +0,0 @@ -APP_NAME=GenExamenes IA -ENVIRONMENT=local -API_KEY=change-me-in-production -DATABASE_URL=postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes -ALLOWED_ORIGINS=http://localhost:3000 -RATE_LIMIT_REQUESTS=300 -RATE_LIMIT_WINDOW_SECONDS=300 -MAX_REQUEST_BYTES=1048576 - -# OpenAI-compatible chat completions endpoint. -LLM_API_KEY= -LLM_BASE_URL=/api/chat -LLM_MODEL=qwen3.5:35b -LLM_TIMEOUT_SECONDS=250 From 8a889a75979e28ff4d54aec2516895bd0e16365c Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Wed, 13 May 2026 13:58:28 +0200 Subject: [PATCH 4/6] =?UTF-8?q?A=C3=B1adido=20el=20.gitignore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3413576 --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ +# Environment and secrets +.env +.env.* +!.env.example +!backend/.env.example +backend/.env +backend/.env.* + +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +.venv/ +venv/ +env/ +ENV/ +pip-wheel-metadata/ +*.egg-info/ +.eggs/ +dist/ +build/ + +# FastAPI / local runtime +*.log +logs/ +tmp/ +temp/ + +# Tests and coverage +.pytest_cache/ +.coverage +.coverage.* +htmlcov/ +coverage.xml +.mypy_cache/ +.ruff_cache/ +.tox/ + +# Docker and local database data +docker-compose.override.yml +postgres_data/ +pgdata/ + +# IDE and OS files +.idea/ +.vscode/ +*.swp +*.swo +.DS_Store +Thumbs.db +desktop.ini + +# Node / future frontend +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +.next/ +out/ +coverage/ + +# Generated exports +exports/ +*.xml.tmp From ba2507918b4156f17d504fa5e3f6fe00d0979a39 Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Tue, 19 May 2026 10:21:34 +0200 Subject: [PATCH 5/6] Nuevos cambios en el backend --- FlujoDeUsuario.txt | 31 ++++--- README.md | 53 +++++++++-- backend/.env.example | 25 ++++- backend/app/api/routes/auth.py | 43 +++++++++ backend/app/api/routes/exports.py | 11 ++- backend/app/api/routes/generation.py | 16 +++- backend/app/api/routes/history.py | 19 ++++ backend/app/api/routes/templates.py | 15 ++- backend/app/core/auth.py | 21 +++++ backend/app/core/config.py | 4 + backend/app/core/errors.py | 15 +++ backend/app/db/init_db.py | 2 +- backend/app/main.py | 12 +-- backend/app/models/exam.py | 8 ++ backend/app/models/user.py | 25 +++++ backend/app/schemas/exam.py | 15 +++ backend/app/schemas/user.py | 35 +++++++ backend/app/services/auth_service.py | 132 +++++++++++++++++++++++++++ backend/app/services/exam_service.py | 62 ++++++++++--- backend/requirements.txt | 5 + 20 files changed, 494 insertions(+), 55 deletions(-) create mode 100644 backend/app/api/routes/auth.py create mode 100644 backend/app/api/routes/history.py create mode 100644 backend/app/core/auth.py create mode 100644 backend/app/models/user.py create mode 100644 backend/app/schemas/user.py create mode 100644 backend/app/services/auth_service.py diff --git a/FlujoDeUsuario.txt b/FlujoDeUsuario.txt index 8f34a92..da16758 100644 --- a/FlujoDeUsuario.txt +++ b/FlujoDeUsuario.txt @@ -1,22 +1,31 @@ Ahora mismo el flujo es backend/API, sin frontend: -1.- El profesor crea una plantilla con POST /exam/templates Define título, materia, nivel educativo, -tipos de preguntas, número de preguntas, puntuación, penalización y dificultad. +0.- El profesor se registra (POST /auth/register) o inicia sesión (POST /auth/login) y obtiene un token JWT. -2.- Genera un prompt con POST /exam/prompts/{template_id} La API devuelve un prompt estructurado para -pedirle al LLM preguntas en JSON válido. +1.- Crea una plantilla con POST /exam/templates (requiere Authorization: Bearer ). + Define título, materia, nivel educativo, tipos de preguntas, puntuación, penalización y dificultad. + La plantilla queda guardada en base de datos asociada a su usuario. + +2.- Genera un prompt con POST /exam/prompts/{template_id}. + La API devuelve un prompt estructurado para pedirle al LLM preguntas en JSON válido. 3.- Hay dos caminos posibles: - 3.1.- Generación automática: POST /exam/generate La API llama al LLM configurado, parsea la respuesta y guarda las preguntas. - 3.2.- Carga manual: POST /exam/parse El profesor pega una salida de IA en json o txt, y la API la valida y guarda. + 3.1.- Generación automática: POST /exam/generate. + La API llama al LLM configurado, parsea la respuesta y guarda las preguntas. -4.- El profesor exporta el examen: + 3.2.- Carga manual: POST /exam/parse. + El profesor pega una salida de IA en json o txt, y la API la valida y guarda. -GET /exam/export/xml/{template_id} para Moodle XML. -GET /exam/export/txt/{template_id} para texto plano. -GET /exam/export/json/{template_id} para JSON. +4.- Consulta su historial con GET /exam/history. + Ve todos los exámenes que ha creado, cuántas preguntas tienen y cuándo exportó por última vez. + +5.- Exporta el examen: + + GET /exam/export/xml/{template_id} para Moodle XML. + GET /exam/export/txt/{template_id} para texto plano. + GET /exam/export/json/{template_id} para JSON. (El XML generado se importa manualmente en Moodle.) -En resumen: configurar plantilla → generar prompt o llamar al LLM → guardar preguntas → exportar Moodle XML. \ No newline at end of file +En resumen: registrarse → configurar plantilla → generar prompt o llamar al LLM → guardar preguntas → ver historial → exportar Moodle XML. diff --git a/README.md b/README.md index 9ac9cb4..498acec 100644 --- a/README.md +++ b/README.md @@ -38,26 +38,37 @@ El archivo de entorno debe estar en `backend/.env`. Variables principales: -- `API_KEY`: clave obligatoria para consumir las rutas protegidas. +- `JWT_SECRET_KEY`: secreto para firmar tokens JWT (mínimo 32 caracteres). +- `JWT_EXPIRE_MINUTES`: duración del token de acceso. +- `GOOGLE_CLIENT_ID`: Client ID de OAuth 2.0 en Google Cloud Console (para `/auth/google`). - `DATABASE_URL`: conexión PostgreSQL usada por el backend. - `LLM_API_KEY`: clave del proveedor LLM. - `LLM_BASE_URL`: endpoint compatible con OpenAI. - `LLM_MODEL`: modelo usado para generar preguntas. - `ALLOWED_ORIGINS`: orígenes permitidos por CORS. -Todas las rutas bajo `/exam` requieren la cabecera: +Todas las rutas bajo `/exam` requieren autenticación de usuario con: ```http -X-API-Key: change-me-in-production +Authorization: Bearer +``` + +Si ya tenías una base de datos creada antes de añadir usuarios, recrea el volumen: + +```bash +docker compose down -v +docker compose up --build ``` ## Flujo de Usuario -1. Crear una plantilla de examen. -2. Generar un prompt guiado para el LLM. -3. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. -4. Guardar las preguntas validadas en PostgreSQL. -5. Exportar el examen a Moodle XML, TXT o JSON. +1. Registrarse o iniciar sesión. +2. Crear una plantilla de examen (queda asociada al usuario). +3. Generar un prompt guiado para el LLM. +4. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. +5. Guardar las preguntas validadas en PostgreSQL. +6. Consultar el historial de exámenes creados. +7. Exportar el examen a Moodle XML, TXT o JSON. ## Endpoints @@ -65,13 +76,33 @@ X-API-Key: change-me-in-production Comprueba que la API está levantada. +`POST /auth/register` + +Registra un usuario con email y contraseña. + +`POST /auth/login` + +Devuelve un token JWT para usar en las rutas protegidas. + +`POST /auth/google` + +Recibe el `id_token` de Google (Sign in with Google en el frontend), verifica la cuenta y devuelve el mismo JWT de la API. + +`GET /auth/me` + +Devuelve los datos del usuario autenticado. + +`GET /exam/history` + +Lista el historial de exámenes del usuario (plantillas, preguntas y exportaciones). + `POST /exam/templates` Crea una plantilla con materia, nivel educativo, tipos de pregunta, puntuación, penalización y dificultad. `GET /exam/templates` -Lista las plantillas creadas. +Lista las plantillas del usuario autenticado. `GET /exam/templates/{template_id}` @@ -103,7 +134,9 @@ Exporta las preguntas en JSON. ## Seguridad -- Autenticación por API key. +- Registro e inicio de sesión con contraseña hasheada (bcrypt). +- Autenticación JWT por usuario. +- Cada examen pertenece a un único usuario; no se puede acceder al de otro. - Rate limiting por cliente. - Límite de tamaño de petición. - Validación de entrada con Pydantic. diff --git a/backend/.env.example b/backend/.env.example index 173588e..8df96cc 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,14 +1,33 @@ +# --- Aplicación --- APP_NAME=GenExamenes IA ENVIRONMENT=local -API_KEY=change-me-in-production + +# Clave legacy (reservada; las rutas /exam usan JWT de usuario). +API_KEY=change-me-in-production-min-16-chars + +# --- Base de datos (Docker: host "db") --- DATABASE_URL=postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes + +# --- CORS (orígenes del frontend, separados por coma) --- ALLOWED_ORIGINS=http://localhost:3000 + +# --- Rate limiting y tamaño de petición --- RATE_LIMIT_REQUESTS=60 RATE_LIMIT_WINDOW_SECONDS=60 MAX_REQUEST_BYTES=1048576 -# OpenAI-compatible chat completions endpoint. -LLM_API_KEY= +# --- JWT (login email/contraseña y sesión tras Google) --- +JWT_SECRET_KEY=change-me-use-a-long-random-secret-key-at-least-32-chars +JWT_ALGORITHM=HS256 +JWT_EXPIRE_MINUTES=1440 + +# --- Google Sign-In --- +# Client ID de OAuth 2.0 (tipo "Aplicación web") en Google Cloud Console. +# El frontend obtiene un id_token con Google Identity Services y lo envía a POST /auth/google. +GOOGLE_CLIENT_ID=123456789012-abcdefghijklmnopqrstuvwxyz123456.apps.googleusercontent.com + +# --- LLM (OpenAI o compatible) --- +LLM_API_KEY=sk-your-openai-api-key LLM_BASE_URL=https://api.openai.com/v1 LLM_MODEL=gpt-4o-mini LLM_TIMEOUT_SECONDS=60 diff --git a/backend/app/api/routes/auth.py b/backend/app/api/routes/auth.py new file mode 100644 index 0000000..079bc68 --- /dev/null +++ b/backend/app/api/routes/auth.py @@ -0,0 +1,43 @@ +from typing import Annotated + +from fastapi import APIRouter, Depends, status + +from app.core.auth import get_current_user +from app.models.user import User +from app.schemas.user import GoogleLoginRequest, TokenResponse, UserLogin, UserRead, UserRegister +from app.services.auth_service import AuthService, get_auth_service + +router = APIRouter(prefix="/auth", tags=["auth"]) + + +@router.post("/register", response_model=UserRead, status_code=status.HTTP_201_CREATED) +def register( + payload: UserRegister, + auth_service: Annotated[AuthService, Depends(get_auth_service)], +) -> UserRead: + return auth_service.register(payload) + + +@router.post("/login", response_model=TokenResponse) +def login( + payload: UserLogin, + auth_service: Annotated[AuthService, Depends(get_auth_service)], +) -> TokenResponse: + user = auth_service.authenticate(payload) + token = auth_service.create_access_token(user.id) + return TokenResponse(access_token=token) + + +@router.post("/google", response_model=TokenResponse) +def login_with_google( + payload: GoogleLoginRequest, + auth_service: Annotated[AuthService, Depends(get_auth_service)], +) -> TokenResponse: + user = auth_service.login_with_google(payload.id_token) + token = auth_service.create_access_token(user.id) + return TokenResponse(access_token=token) + + +@router.get("/me", response_model=UserRead) +def get_me(current_user: Annotated[User, Depends(get_current_user)]) -> UserRead: + return UserRead.model_validate(current_user) diff --git a/backend/app/api/routes/exports.py b/backend/app/api/routes/exports.py index 7b6ca68..a098001 100644 --- a/backend/app/api/routes/exports.py +++ b/backend/app/api/routes/exports.py @@ -4,7 +4,9 @@ from typing import Annotated from fastapi import APIRouter, Depends, Response from app.api.dependencies import get_exam_service +from app.core.auth import get_current_user from app.models.exam import ExportFormat +from app.models.user import User from app.services.exam_service import ExamService router = APIRouter(prefix="/export", tags=["exports"]) @@ -13,25 +15,28 @@ router = APIRouter(prefix="/export", tags=["exports"]) @router.get("/xml/{template_id}") def export_xml( template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> Response: - export = service.export(template_id, ExportFormat.XML) + export = service.export(current_user.id, template_id, ExportFormat.XML) return Response(content=export.content, media_type="application/xml") @router.get("/txt/{template_id}") def export_txt( template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> Response: - export = service.export(template_id, ExportFormat.TXT) + export = service.export(current_user.id, template_id, ExportFormat.TXT) return Response(content=export.content, media_type="text/plain; charset=utf-8") @router.get("/json/{template_id}") def export_json( template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> Response: - export = service.export(template_id, ExportFormat.JSON) + export = service.export(current_user.id, template_id, ExportFormat.JSON) return Response(content=export.content, media_type="application/json") diff --git a/backend/app/api/routes/generation.py b/backend/app/api/routes/generation.py index 64cf87d..81dab5a 100644 --- a/backend/app/api/routes/generation.py +++ b/backend/app/api/routes/generation.py @@ -4,6 +4,8 @@ from typing import Annotated from fastapi import APIRouter, Depends from app.api.dependencies import get_exam_service, get_llm_client +from app.core.auth import get_current_user +from app.models.user import User from app.schemas.exam import ( BuildPromptRequest, GenerateExamRequest, @@ -21,23 +23,31 @@ router = APIRouter(tags=["generation"]) def build_prompt( template_id: uuid.UUID, payload: BuildPromptRequest, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> PromptResponse: - return service.build_prompt(template_id, payload.topic_prompt) + return service.build_prompt(current_user.id, template_id, payload.topic_prompt) @router.post("/generate", response_model=ParsedQuestionsResponse) async def generate_exam( payload: GenerateExamRequest, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], llm_client: Annotated[LLMClient, Depends(get_llm_client)], ) -> ParsedQuestionsResponse: - return await service.generate_with_llm(payload.template_id, payload.topic_prompt, llm_client) + return await service.generate_with_llm( + current_user.id, + payload.template_id, + payload.topic_prompt, + llm_client, + ) @router.post("/parse", response_model=ParsedQuestionsResponse) def parse_ai_output( payload: ParseRequest, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> ParsedQuestionsResponse: - return service.parse_and_persist(payload) + return service.parse_and_persist(current_user.id, payload) diff --git a/backend/app/api/routes/history.py b/backend/app/api/routes/history.py new file mode 100644 index 0000000..3a78169 --- /dev/null +++ b/backend/app/api/routes/history.py @@ -0,0 +1,19 @@ +from typing import Annotated + +from fastapi import APIRouter, Depends + +from app.core.auth import get_current_user +from app.models.user import User +from app.schemas.exam import ExamHistoryItem +from app.services.exam_service import ExamService +from app.api.dependencies import get_exam_service + +router = APIRouter(prefix="/history", tags=["history"]) + + +@router.get("", response_model=list[ExamHistoryItem]) +def list_exam_history( + current_user: Annotated[User, Depends(get_current_user)], + service: Annotated[ExamService, Depends(get_exam_service)], +) -> list[ExamHistoryItem]: + return service.list_history(current_user.id) diff --git a/backend/app/api/routes/templates.py b/backend/app/api/routes/templates.py index 2d1dd72..af152d2 100644 --- a/backend/app/api/routes/templates.py +++ b/backend/app/api/routes/templates.py @@ -4,6 +4,8 @@ from typing import Annotated from fastapi import APIRouter, Depends, status from app.api.dependencies import get_exam_service +from app.core.auth import get_current_user +from app.models.user import User from app.schemas.exam import ExamTemplateCreate, ExamTemplateRead from app.services.exam_service import ExamService @@ -13,19 +15,24 @@ router = APIRouter(prefix="/templates", tags=["templates"]) @router.post("", response_model=ExamTemplateRead, status_code=status.HTTP_201_CREATED) def create_template( payload: ExamTemplateCreate, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> ExamTemplateRead: - return service.create_template(payload) + return service.create_template(current_user.id, payload) @router.get("", response_model=list[ExamTemplateRead]) -def list_templates(service: Annotated[ExamService, Depends(get_exam_service)]) -> list[ExamTemplateRead]: - return service.list_templates() +def list_templates( + current_user: Annotated[User, Depends(get_current_user)], + service: Annotated[ExamService, Depends(get_exam_service)], +) -> list[ExamTemplateRead]: + return service.list_templates(current_user.id) @router.get("/{template_id}", response_model=ExamTemplateRead) def get_template( template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> ExamTemplateRead: - return service.get_template(template_id) + return service.get_template(current_user.id, template_id) diff --git a/backend/app/core/auth.py b/backend/app/core/auth.py new file mode 100644 index 0000000..e7ab3ca --- /dev/null +++ b/backend/app/core/auth.py @@ -0,0 +1,21 @@ +from typing import Annotated + +from fastapi import Depends +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from sqlalchemy.orm import Session + +from app.core.errors import UnauthorizedError +from app.db.session import get_db +from app.models.user import User +from app.services.auth_service import AuthService, get_auth_service + +bearer_scheme = HTTPBearer(auto_error=False) + + +def get_current_user( + credentials: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer_scheme)], + auth_service: Annotated[AuthService, Depends(get_auth_service)], +) -> User: + if credentials is None or credentials.scheme.lower() != "bearer": + raise UnauthorizedError("Missing or invalid authorization token") + return auth_service.get_user_by_id(auth_service.decode_user_id(credentials.credentials)) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 348b46e..a6693d4 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -18,6 +18,10 @@ class Settings(BaseSettings): llm_base_url: str = "https://api.openai.com/v1" llm_model: str = "gpt-4o-mini" llm_timeout_seconds: int = Field(default=60, ge=5) + jwt_secret_key: str = Field(min_length=32) + jwt_algorithm: str = "HS256" + jwt_expire_minutes: int = Field(default=60 * 24, ge=5) + google_client_id: str | None = None model_config = SettingsConfigDict( env_file=".env", diff --git a/backend/app/core/errors.py b/backend/app/core/errors.py index a4f902b..6afce1a 100644 --- a/backend/app/core/errors.py +++ b/backend/app/core/errors.py @@ -26,6 +26,21 @@ class ParseError(AppError): super().__init__(message=message, status_code=422, code="parse_error") +class ConflictError(AppError): + def __init__(self, message: str = "Resource already exists") -> None: + super().__init__(message=message, status_code=409, code="conflict") + + +class ForbiddenError(AppError): + def __init__(self, message: str = "Access denied") -> None: + super().__init__(message=message, status_code=403, code="forbidden") + + +class UnauthorizedError(AppError): + def __init__(self, message: str = "Unauthorized") -> None: + super().__init__(message=message, status_code=401, code="unauthorized") + + def error_payload(code: str, message: str, details: object | None = None) -> dict[str, object]: payload: dict[str, object] = {"error": {"code": code, "message": message}} if details is not None: diff --git a/backend/app/db/init_db.py b/backend/app/db/init_db.py index 94d6ead..da37a46 100644 --- a/backend/app/db/init_db.py +++ b/backend/app/db/init_db.py @@ -1,6 +1,6 @@ from app.db.base import Base from app.db.session import engine -from app.models import exam # noqa: F401 +from app.models import exam, user # noqa: F401 def init_db() -> None: diff --git a/backend/app/main.py b/backend/app/main.py index 1b2bfd7..bc6a8dd 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -4,11 +4,10 @@ from collections.abc import AsyncIterator from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.api.routes import exports, generation, health, templates +from app.api.routes import auth, exports, generation, health, history, templates from app.core.config import get_settings from app.core.errors import register_exception_handlers from app.core.middleware import RateLimitMiddleware, RequestSizeLimitMiddleware -from app.core.security import require_api_key from app.db.init_db import init_db @@ -35,10 +34,11 @@ def create_app() -> FastAPI: register_exception_handlers(app) app.include_router(health.router) - protected = [Depends(require_api_key)] - app.include_router(templates.router, prefix="/exam", dependencies=protected) - app.include_router(generation.router, prefix="/exam", dependencies=protected) - app.include_router(exports.router, prefix="/exam", dependencies=protected) + app.include_router(auth.router) + app.include_router(templates.router, prefix="/exam") + app.include_router(generation.router, prefix="/exam") + app.include_router(exports.router, prefix="/exam") + app.include_router(history.router, prefix="/exam") return app diff --git a/backend/app/models/exam.py b/backend/app/models/exam.py index ad987fc..a6717c9 100644 --- a/backend/app/models/exam.py +++ b/backend/app/models/exam.py @@ -39,6 +39,12 @@ class ExamTemplate(Base): __tablename__ = "exam_templates" id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) title: Mapped[str] = mapped_column(String(200), nullable=False) subject: Mapped[str] = mapped_column(String(200), nullable=False) educational_level: Mapped[str] = mapped_column(String(120), nullable=False) @@ -48,6 +54,8 @@ class ExamTemplate(Base): created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + owner: Mapped["User"] = relationship(back_populates="exam_templates") + questions: Mapped[list["Question"]] = relationship( back_populates="template", cascade="all, delete-orphan", diff --git a/backend/app/models/user.py b/backend/app/models/user.py new file mode 100644 index 0000000..36240c0 --- /dev/null +++ b/backend/app/models/user.py @@ -0,0 +1,25 @@ +import uuid +from datetime import datetime + +from sqlalchemy import DateTime, String, func +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base + + +class User(Base): + __tablename__ = "users" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + email: Mapped[str] = mapped_column(String(255), unique=True, nullable=False, index=True) + password_hash: Mapped[str | None] = mapped_column(String(255), nullable=True) + google_sub: Mapped[str | None] = mapped_column(String(255), unique=True, nullable=True, index=True) + full_name: Mapped[str | None] = mapped_column(String(200), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + exam_templates: Mapped[list["ExamTemplate"]] = relationship( + back_populates="owner", + cascade="all, delete-orphan", + passive_deletes=True, + ) diff --git a/backend/app/schemas/exam.py b/backend/app/schemas/exam.py index d476cf6..ef2ad15 100644 --- a/backend/app/schemas/exam.py +++ b/backend/app/schemas/exam.py @@ -125,3 +125,18 @@ class ExportResponse(BaseModel): template_id: uuid.UUID format: ExportFormat content: str + + +class ExamHistoryItem(BaseModel): + id: uuid.UUID + title: str + subject: str + educational_level: str + language: str + question_count: int + export_count: int + last_export_at: datetime | None + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) diff --git a/backend/app/schemas/user.py b/backend/app/schemas/user.py new file mode 100644 index 0000000..7d35545 --- /dev/null +++ b/backend/app/schemas/user.py @@ -0,0 +1,35 @@ +import uuid +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, EmailStr, Field + + +class UserRegister(BaseModel): + email: EmailStr + password: str = Field(min_length=8, max_length=128) + full_name: str | None = Field(default=None, max_length=200) + + +class UserLogin(BaseModel): + email: EmailStr + password: str = Field(min_length=1, max_length=128) + + +class UserRead(BaseModel): + id: uuid.UUID + email: EmailStr + full_name: str | None + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class GoogleLoginRequest(BaseModel): + """ID token obtenido en el frontend con Google Identity Services (Sign in with Google).""" + + id_token: str = Field(min_length=10, max_length=8_000) + + +class TokenResponse(BaseModel): + access_token: str + token_type: str = "bearer" diff --git a/backend/app/services/auth_service.py b/backend/app/services/auth_service.py new file mode 100644 index 0000000..0f509b5 --- /dev/null +++ b/backend/app/services/auth_service.py @@ -0,0 +1,132 @@ +import uuid +from datetime import UTC, datetime, timedelta +from typing import Annotated + +from fastapi import Depends +from jose import JWTError, jwt +from passlib.context import CryptContext +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.config import Settings, get_settings +from google.auth.transport import requests as google_requests +from google.oauth2 import id_token as google_id_token + +from app.core.errors import AppError, ConflictError, NotFoundError, UnauthorizedError +from app.core.security import clean_text +from app.db.session import get_db +from app.models.user import User +from app.schemas.user import UserLogin, UserRead, UserRegister + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + + +class AuthService: + def __init__(self, db: Session, settings: Settings) -> None: + self.db = db + self.settings = settings + + def register(self, payload: UserRegister) -> UserRead: + email = payload.email.lower().strip() + existing = self.db.scalar(select(User).where(User.email == email)) + if existing is not None: + raise ConflictError("Email is already registered") + + user = User( + email=email, + password_hash=pwd_context.hash(payload.password), + full_name=clean_text(payload.full_name, max_length=200) if payload.full_name else None, + ) + self.db.add(user) + self.db.commit() + self.db.refresh(user) + return UserRead.model_validate(user) + + def authenticate(self, payload: UserLogin) -> User: + email = payload.email.lower().strip() + user = self.db.scalar(select(User).where(User.email == email)) + if user is None or user.password_hash is None: + raise UnauthorizedError("Invalid email or password") + if not pwd_context.verify(payload.password, user.password_hash): + raise UnauthorizedError("Invalid email or password") + return user + + def login_with_google(self, id_token_value: str) -> User: + if not self.settings.google_client_id: + raise AppError( + message="Google login is not configured", + status_code=503, + code="google_not_configured", + ) + + try: + idinfo = google_id_token.verify_oauth2_token( + id_token_value, + google_requests.Request(), + self.settings.google_client_id, + ) + except ValueError as exc: + raise UnauthorizedError("Invalid Google ID token") from exc + + google_sub = idinfo.get("sub") + email = (idinfo.get("email") or "").lower().strip() + if not google_sub or not email: + raise UnauthorizedError("Google token does not include required user information") + if not idinfo.get("email_verified", False): + raise UnauthorizedError("Google email is not verified") + + user = self.db.scalar(select(User).where(User.google_sub == google_sub)) + if user is not None: + return user + + user = self.db.scalar(select(User).where(User.email == email)) + if user is not None: + if user.google_sub is not None and user.google_sub != google_sub: + raise ConflictError("Email is linked to another Google account") + user.google_sub = google_sub + if not user.full_name and idinfo.get("name"): + user.full_name = clean_text(idinfo["name"], max_length=200) + self.db.commit() + self.db.refresh(user) + return user + + user = User( + email=email, + password_hash=None, + google_sub=google_sub, + full_name=clean_text(idinfo["name"], max_length=200) if idinfo.get("name") else None, + ) + self.db.add(user) + self.db.commit() + self.db.refresh(user) + return user + + def get_user_by_id(self, user_id: uuid.UUID) -> User: + user = self.db.get(User, user_id) + if user is None: + raise NotFoundError("User not found") + return user + + def create_access_token(self, user_id: uuid.UUID) -> str: + expire = datetime.now(UTC) + timedelta(minutes=self.settings.jwt_expire_minutes) + payload = {"sub": str(user_id), "exp": expire} + return jwt.encode(payload, self.settings.jwt_secret_key, algorithm=self.settings.jwt_algorithm) + + def decode_user_id(self, token: str) -> uuid.UUID: + try: + payload = jwt.decode( + token, + self.settings.jwt_secret_key, + algorithms=[self.settings.jwt_algorithm], + ) + user_id = uuid.UUID(payload["sub"]) + except (JWTError, KeyError, ValueError) as exc: + raise UnauthorizedError("Invalid or expired token") from exc + return user_id + + +def get_auth_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], +) -> AuthService: + return AuthService(db, settings) diff --git a/backend/app/services/exam_service.py b/backend/app/services/exam_service.py index 49a74de..42d6114 100644 --- a/backend/app/services/exam_service.py +++ b/backend/app/services/exam_service.py @@ -3,10 +3,11 @@ import uuid from sqlalchemy import select from sqlalchemy.orm import Session -from app.core.errors import NotFoundError +from app.core.errors import ForbiddenError, NotFoundError from app.core.security import clean_text from app.models.exam import ExamTemplate, ExportFormat, ExportJob, ExportStatus, Question from app.schemas.exam import ( + ExamHistoryItem, ExamTemplateCreate, ExamTemplateRead, ExportResponse, @@ -35,8 +36,9 @@ class ExamService: self.parser = parser or AIQuestionParser() self.exporter = exporter or MoodleXMLExporter() - def create_template(self, payload: ExamTemplateCreate) -> ExamTemplateRead: + def create_template(self, user_id: uuid.UUID, payload: ExamTemplateCreate) -> ExamTemplateRead: template = ExamTemplate( + user_id=user_id, title=clean_text(payload.title, max_length=200), subject=clean_text(payload.subject, max_length=200), educational_level=clean_text(payload.educational_level, max_length=120), @@ -49,37 +51,67 @@ class ExamService: self.db.refresh(template) return self._template_read(template) - def list_templates(self) -> list[ExamTemplateRead]: - templates = self.db.scalars(select(ExamTemplate).order_by(ExamTemplate.created_at.desc())).all() + def list_templates(self, user_id: uuid.UUID) -> list[ExamTemplateRead]: + templates = self.db.scalars( + select(ExamTemplate) + .where(ExamTemplate.user_id == user_id) + .order_by(ExamTemplate.created_at.desc()) + ).all() return [self._template_read(template) for template in templates] - def get_template(self, template_id: uuid.UUID) -> ExamTemplateRead: - return self._template_read(self._get_template_or_404(template_id)) + def list_history(self, user_id: uuid.UUID) -> list[ExamHistoryItem]: + templates = self.db.scalars( + select(ExamTemplate) + .where(ExamTemplate.user_id == user_id) + .order_by(ExamTemplate.updated_at.desc()) + ).all() + history: list[ExamHistoryItem] = [] + for template in templates: + export_jobs = sorted(template.export_jobs, key=lambda job: job.created_at, reverse=True) + history.append( + ExamHistoryItem( + id=template.id, + title=template.title, + subject=template.subject, + educational_level=template.educational_level, + language=template.language, + question_count=len(template.questions), + export_count=len(export_jobs), + last_export_at=export_jobs[0].created_at if export_jobs else None, + created_at=template.created_at, + updated_at=template.updated_at, + ) + ) + return history - def build_prompt(self, template_id: uuid.UUID, topic_prompt: str) -> PromptResponse: - template = self._get_template_or_404(template_id) + def get_template(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplateRead: + return self._template_read(self._get_user_template_or_404(user_id, template_id)) + + def build_prompt(self, user_id: uuid.UUID, template_id: uuid.UUID, topic_prompt: str) -> PromptResponse: + template = self._get_user_template_or_404(user_id, template_id) prompt = self.prompt_builder.build_prompt(template, topic_prompt) return PromptResponse(template_id=template.id, prompt=prompt) async def generate_with_llm( self, + user_id: uuid.UUID, template_id: uuid.UUID, topic_prompt: str, llm_client: LLMClient, ) -> ParsedQuestionsResponse: - template = self._get_template_or_404(template_id) + template = self._get_user_template_or_404(user_id, template_id) prompt = self.prompt_builder.build_prompt(template, topic_prompt) raw_output = await llm_client.generate(prompt) questions = self.parser.parse_json(raw_output) return self._persist_questions(template.id, questions) - def parse_and_persist(self, payload: ParseRequest) -> ParsedQuestionsResponse: - self._get_template_or_404(payload.template_id) + def parse_and_persist(self, user_id: uuid.UUID, payload: ParseRequest) -> ParsedQuestionsResponse: + self._get_user_template_or_404(user_id, payload.template_id) questions = self.parser.parse(payload.raw_output, payload.input_format) return self._persist_questions(payload.template_id, questions) - def export(self, template_id: uuid.UUID, export_format: ExportFormat) -> ExportResponse: - template = self._get_template_or_404(template_id) + def export(self, user_id: uuid.UUID, template_id: uuid.UUID, export_format: ExportFormat) -> ExportResponse: + template = self._get_user_template_or_404(user_id, template_id) questions = list(template.questions) if not questions: raise NotFoundError("Template does not contain questions to export") @@ -126,10 +158,12 @@ class ExamService: return ParsedQuestionsResponse(questions=[QuestionRead.model_validate(question) for question in persisted]) - def _get_template_or_404(self, template_id: uuid.UUID) -> ExamTemplate: + def _get_user_template_or_404(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplate: template = self.db.get(ExamTemplate, template_id) if template is None: raise NotFoundError("Exam template not found") + if template.user_id != user_id: + raise ForbiddenError("You do not have access to this exam template") return template def _template_read(self, template: ExamTemplate) -> ExamTemplateRead: diff --git a/backend/requirements.txt b/backend/requirements.txt index 6ad34b4..3076625 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -3,7 +3,12 @@ uvicorn[standard] SQLAlchemy psycopg[binary] pydantic-settings +pydantic[email] python-dotenv httpx orjson +passlib[bcrypt] +python-jose[cryptography] +google-auth +requests pytest From 7bc27da33a3642f530e7e292c88b2c2fe6282d57 Mon Sep 17 00:00:00 2001 From: Mireya Cueto Garrido Date: Mon, 1 Jun 2026 10:30:40 +0200 Subject: [PATCH 6/6] Add materials, exam images, storage quota, and API guide Upload documents for AI context, exam images for Moodle questions, per-template storage limits, embedded images in XML export, and GUIA_API_Y_FLUJO.md with full endpoint documentation. --- FlujoDeUsuario.txt | 24 +- GUIA_API_Y_FLUJO.md | 694 +++++++++++++++++++++ README.md | 50 +- backend/.env.example | 15 +- backend/Dockerfile | 9 + backend/app/api/dependencies.py | 34 +- backend/app/api/routes/generation.py | 8 +- backend/app/api/routes/images.py | 73 +++ backend/app/api/routes/materials.py | 55 ++ backend/app/api/routes/questions.py | 27 + backend/app/api/routes/templates.py | 15 +- backend/app/core/config.py | 11 + backend/app/core/security.py | 4 +- backend/app/main.py | 7 +- backend/app/models/exam.py | 66 ++ backend/app/schemas/exam.py | 13 + backend/app/schemas/image.py | 30 + backend/app/schemas/material.py | 32 + backend/app/schemas/storage.py | 14 + backend/app/services/document_extractor.py | 74 +++ backend/app/services/exam_service.py | 82 ++- backend/app/services/image_service.py | 206 ++++++ backend/app/services/material_service.py | 188 ++++++ backend/app/services/moodle_exporter.py | 104 ++- backend/app/services/parser.py | 2 + backend/app/services/prompt_builder.py | 25 +- backend/app/services/storage_quota.py | 81 +++ backend/requirements.txt | 5 + docker-compose.yml | 3 + 29 files changed, 1892 insertions(+), 59 deletions(-) create mode 100644 GUIA_API_Y_FLUJO.md create mode 100644 backend/app/api/routes/images.py create mode 100644 backend/app/api/routes/materials.py create mode 100644 backend/app/api/routes/questions.py create mode 100644 backend/app/schemas/image.py create mode 100644 backend/app/schemas/material.py create mode 100644 backend/app/schemas/storage.py create mode 100644 backend/app/services/document_extractor.py create mode 100644 backend/app/services/image_service.py create mode 100644 backend/app/services/material_service.py create mode 100644 backend/app/services/storage_quota.py diff --git a/FlujoDeUsuario.txt b/FlujoDeUsuario.txt index da16758..82394ae 100644 --- a/FlujoDeUsuario.txt +++ b/FlujoDeUsuario.txt @@ -6,21 +6,27 @@ Ahora mismo el flujo es backend/API, sin frontend: Define título, materia, nivel educativo, tipos de preguntas, puntuación, penalización y dificultad. La plantilla queda guardada en base de datos asociada a su usuario. -2.- Genera un prompt con POST /exam/prompts/{template_id}. - La API devuelve un prompt estructurado para pedirle al LLM preguntas en JSON válido. +2.- Sube materiales de referencia con POST /exam/templates/{id}/materials (campo file). + Acepta PDF, DOCX, TXT, MD, PNG, JPG, WEBP. La API extrae el texto para contexto de la IA. -3.- Hay dos caminos posibles: +2b.- Sube imágenes de examen con POST /exam/templates/{id}/images (campo file, opcional caption). + PNG, JPG, WEBP, GIF. Se usan para mostrarlas en las preguntas (no para extraer texto). - 3.1.- Generación automática: POST /exam/generate. - La API llama al LLM configurado, parsea la respuesta y guarda las preguntas. +3.- Genera un prompt con POST /exam/prompts/{template_id}. + El prompt incluye el material subido + el tema indicado por el profesor. - 3.2.- Carga manual: POST /exam/parse. +4.- Hay dos caminos posibles: + + 4.1.- Generación automática: POST /exam/generate. + La API llama al LLM con el contexto de los ficheros y guarda las preguntas. + + 4.2.- Carga manual: POST /exam/parse. El profesor pega una salida de IA en json o txt, y la API la valida y guarda. -4.- Consulta su historial con GET /exam/history. +5.- Consulta su historial con GET /exam/history. Ve todos los exámenes que ha creado, cuántas preguntas tienen y cuándo exportó por última vez. -5.- Exporta el examen: +6.- Exporta el examen: GET /exam/export/xml/{template_id} para Moodle XML. GET /exam/export/txt/{template_id} para texto plano. @@ -28,4 +34,4 @@ Ahora mismo el flujo es backend/API, sin frontend: (El XML generado se importa manualmente en Moodle.) -En resumen: registrarse → configurar plantilla → generar prompt o llamar al LLM → guardar preguntas → ver historial → exportar Moodle XML. +En resumen: registrarse → plantilla → subir materiales → prompt/generar con IA → historial → exportar Moodle XML. diff --git a/GUIA_API_Y_FLUJO.md b/GUIA_API_Y_FLUJO.md new file mode 100644 index 0000000..ccbae35 --- /dev/null +++ b/GUIA_API_Y_FLUJO.md @@ -0,0 +1,694 @@ +# Guía de uso de la API y flujo de la aplicación + +Documento resumen para entender **qué hace el usuario en cada paso**, **qué endpoint usar**, **cabeceras**, **cuerpos**, **ejemplos de respuesta** y **errores típicos**. + +**Base URL de ejemplo:** `http://localhost:8000` + +--- + +## 1. Conceptos rápidos + +| Concepto | Significado | +|----------|-------------| +| **Usuario** | Cada persona tiene su cuenta; los exámenes son suyos. | +| **Plantilla (template)** | Configuración de un examen: título, materia, tipos de pregunta, dificultad, etc. | +| **Materiales** | Ficheros para **extraer texto** y dar **contexto a la IA** (PDF, DOCX, TXT; imágenes aquí se procesan con OCR para texto). | +| **Imágenes de examen** | Imágenes para **mostrar en la pregunta** en Moodle; no se usan como texto de contexto para la IA. | +| **Preguntas** | Se generan con la IA, se pegan manualmente (parse) o se ajustan después. | +| **Exportación** | Salida Moodle XML, TXT o JSON. | + +**Autenticación:** casi todo va con JWT: + +```http +Authorization: Bearer +``` + +Las rutas bajo `/exam/...` **requieren** ese header (salvo que indiquemos lo contrario). + +**Formato de error habitual** (API propia): + +```json +{ + "error": { + "code": "codigo_corto", + "message": "Texto legible para humanos", + "details": null + } +} +``` + +(`details` solo aparece en algunos errores de validación.) + +**Otros códigos:** `401` token inválido o ausente, `403` recurso de otro usuario, `404` no existe, `409` conflicto (email duplicado, cupo, etc.), `413` fichero o cupo demasiado grande, `422` validación o parseo, `429` demasiadas peticiones, `503` servicio externo no configurado (p. ej. Google o LLM). + +--- + +## 2. Flujo de uso (orden recomendado) + +Hasta el **examen exportable** (normalmente Moodle XML): autenticación → plantilla → (materiales + imágenes) → generar preguntas → exportar. + +### Tres piezas que debes distinguir + +| Pieza | Para qué sirve | Endpoints | +|-------|----------------|-----------| +| **Materiales** | Extraen **texto** (PDF, DOCX, TXT; imagen aquí = OCR) y alimentan el **prompt** de la IA. | `POST/GET/DELETE …/templates/{id}/materials` | +| **Imágenes de examen** | Solo para **mostrar** la figura en la pregunta / Moodle (`image_id`). **No** aportan texto al prompt. | `POST/GET/DELETE …/templates/{id}/images`, `GET …/images/{id}/content`, `PATCH …/questions/{id}/image` | +| **IA** | Crea y **guarda** las preguntas en BD. | `POST …/prompts/{id}`, `POST …/generate`, `POST …/parse` | + +```text +Materiales → texto en prompt ─┐ +Imágenes → catálogo ids ─┼→ generate/parse → preguntas → export/xml +``` + +**Importante:** “Leer” un escaneado como texto → **material**. “Que el alumno vea la foto” → **imagen de examen** (pueden ser el mismo fichero subido dos veces si necesitas ambas cosas). + +--- + +### Pasos (qué hacer y endpoint) + +Todas las rutas `/exam/*` llevan `Authorization: Bearer `. + +| # | Qué haces | Endpoint(s) | +|---|-----------|-------------| +| 1 | Registro o login; guardas el JWT | `POST /auth/register`, `POST /auth/login` o `POST /auth/google` | +| 2 | Creas el examen (tipos, nº preguntas, dificultad); guardas **`template_id`** | `POST /exam/templates` | +| 3 | *(Opc.)* Subes apuntes; compruebas estado **`processed`** | `POST …/materials` (`file`), `GET …/materials` | +| 4 | *(Opc.)* Subes figuras para preguntas; anotas cada **`image_id`** | `POST …/images` (`file`, `caption` opcional), `GET …/images` | +| 5 | *(Opc.)* Ves cuota de espacio (materiales + imágenes) | `GET …/templates/{id}/storage` | +| 6 | Generas preguntas (ver tabla abajo) | `prompts` / `generate` / `parse` | +| 7 | *(Opc.)* Corriges imagen de una pregunta | `PATCH …/questions/{id}/image` | +| 8 | *(Opc.)* Listado de tus exámenes | `GET /exam/history` | +| 9 | Descargas el examen (hay que tener preguntas) | `GET …/export/xml/{id}` (Moodle), `…/txt`, `…/json` | + +--- + +### Generación con IA (paso 6) + +Body habitual en **prompts** y **generate**: + +```json +{ "topic_prompt": "…instrucciones…", "material_ids": null } +``` + +`material_ids`: `null` = todos los materiales OK; o lista de UUIDs concretos. + +| Opción | Endpoint | Resultado | +|--------|----------|-----------| +| Ver/copiar prompt (sin LLM en servidor) | `POST /exam/prompts/{template_id}` | Texto del prompt | +| Generar y guardar en servidor | `POST /exam/generate` (+ `template_id`) | Preguntas en BD; requiere `LLM_API_KEY` | +| Pegar JSON/TXT de otra IA | `POST /exam/parse` | Preguntas en BD | + +El prompt incluye texto de **materiales** y catálogo de **imágenes**. La IA puede poner **`image_id`** en cada pregunta; el backend **no** obliga “una imagen = una pregunta” (solo lo que pidas en `topic_prompt` + revisión o `PATCH`). + +Detalle de cuerpos, respuestas y errores: **sección 4** de esta guía. + +--- + +## 3. Cabeceras comunes + +| Cabecera | Cuándo | +|----------|--------| +| `Content-Type: application/json` | Peticiones con body JSON. | +| `Authorization: Bearer ` | Rutas protegidas (`/exam/*`, `/auth/me`). | +| `multipart/form-data` | Subida de ficheros (el cliente lo pone automáticamente con `curl -F`). | + +**No** hace falta `X-API-Key` para el flujo normal de usuario (sigue existiendo en configuración por compatibilidad, pero el acceso a exámenes es por JWT). + +--- + +## 4. Endpoints por bloques + +### 4.1 Salud del servicio + +#### `GET /health` + +**Qué hace:** Comprueba que el servidor responde. **No** requiere autenticación. + +**Headers:** ninguno obligatorio. + +**Body:** no. + +**Ejemplo:** + +```bash +curl -s http://localhost:8000/health +``` + +**Respuesta OK (200):** + +```json +{ "status": "ok" } +``` + +**Error típico:** si el servidor está caído, no hay respuesta HTTP (no es JSON de la API). + +--- + +### 4.2 Autenticación (`/auth`) + +#### `POST /auth/register` + +**Qué hace:** Crea usuario con email y contraseña. + +**Headers:** `Content-Type: application/json` + +**Body:** + +```json +{ + "email": "profesor@ejemplo.com", + "password": "Minimo8caracteres", + "full_name": "María García" +} +``` + +**Ejemplo:** + +```bash +curl -s -X POST http://localhost:8000/auth/register \ + -H "Content-Type: application/json" \ + -d '{"email":"profesor@ejemplo.com","password":"ClaveSegura1","full_name":"María"}' +``` + +**Respuesta OK (201):** + +```json +{ + "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "email": "profesor@ejemplo.com", + "full_name": "María", + "created_at": "2026-05-19T10:00:00+00:00" +} +``` + +**Error típico (409):** email ya registrado. + +```json +{ + "error": { + "code": "conflict", + "message": "Email is already registered" + } +} +``` + +--- + +#### `POST /auth/login` + +**Qué hace:** Devuelve el **JWT** para el resto de llamadas. + +**Body:** + +```json +{ + "email": "profesor@ejemplo.com", + "password": "ClaveSegura1" +} +``` + +**Respuesta OK (200):** + +```json +{ + "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...", + "token_type": "bearer" +} +``` + +**Error típico (401):** credenciales incorrectas. + +```json +{ + "error": { + "code": "unauthorized", + "message": "Invalid email or password" + } +} +``` + +--- + +#### `POST /auth/google` + +**Qué hace:** Inicia sesión o registra con el **id_token** de Google (desde el frontend con Sign in with Google). + +**Headers:** `Content-Type: application/json` + +**Body:** + +```json +{ + "id_token": "eyJhbGciOiJSUzI1NiIs..." +} +``` + +**Requisitos:** `GOOGLE_CLIENT_ID` en `backend/.env`. + +**Respuesta OK (200):** igual que login (`access_token`). + +**Error típico (503):** Google no configurado. + +```json +{ + "error": { + "code": "google_not_configured", + "message": "Google login is not configured" + } +} +``` + +**Error típico (401):** token de Google inválido o email no verificado. + +--- + +#### `GET /auth/me` + +**Qué hace:** Devuelve los datos del usuario logueado. + +**Headers:** `Authorization: Bearer ` + +**Body:** no. + +**Respuesta OK (200):** mismo esquema que register (sin password). + +**Error típico (401):** falta token o token caducado. + +```json +{ + "error": { + "code": "unauthorized", + "message": "Invalid or expired token" + } +} +``` + +--- + +### 4.3 Plantillas de examen (`/exam/templates`) + +Todas requieren: `Authorization: Bearer ` + +#### `POST /exam/templates` + +**Qué hace:** Crea una plantilla nueva asociada al usuario. + +**Body (JSON):** ver `ExamTemplateCreate` en el código; resumen: + +- `title`, `subject`, `educational_level`, `language` +- `settings.question_types`: lista de `{ "type": "multichoice"|"truefalse"|"shortanswer"|"matching", "count", "options_count", "multiple_correct", "score", "penalty" }` +- `settings.shuffle_questions`, `shuffle_answers`, `include_feedback` +- `difficulty_profile`: `easy`, `medium`, `hard`, `very_hard` (al menos uno > 0) + +**Ejemplo mínimo:** + +```bash +curl -s -X POST http://localhost:8000/exam/templates \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "title": "Examen UD3", + "subject": "Bases de datos", + "educational_level": "CFGS DAW", + "language": "es", + "settings": { + "question_types": [ + {"type": "multichoice", "count": 5, "options_count": 4, "multiple_correct": false, "score": 1, "penalty": 0.25} + ], + "shuffle_questions": true, + "shuffle_answers": true, + "include_feedback": true + }, + "difficulty_profile": {"easy": 2, "medium": 2, "hard": 1, "very_hard": 0} + }' +``` + +**Respuesta OK (201):** plantilla con `id`, fechas, `question_count`, etc. + +**Error típico (422):** JSON mal formado o reglas de validación (p. ej. suma de dificultades vacía). + +```json +{ + "error": { + "code": "validation_error", + "message": "Invalid request payload", + "details": [ { "loc": ["body", "difficulty_profile"], "msg": "...", "type": "value_error" } ] + } +} +``` + +--- + +#### `GET /exam/templates` + +**Qué hace:** Lista las plantillas del usuario. + +**Respuesta OK (200):** array de plantillas. + +**Error típico (401):** sin token. + +--- + +#### `GET /exam/templates/{template_id}` + +**Qué hace:** Obtiene una plantilla concreta. + +**Parámetros URL:** `template_id` (UUID). + +**Error típico (404):** no existe o no es tuya. + +```json +{ + "error": { + "code": "not_found", + "message": "Exam template not found" + } +} +``` + +**Error típico (403):** plantilla de otro usuario. + +```json +{ + "error": { + "code": "forbidden", + "message": "You do not have access to this exam template" + } +} +``` + +--- + +#### `GET /exam/templates/{template_id}/storage` + +**Qué hace:** Muestra cuánto espacio ocupan **materiales + imágenes** de esa plantilla frente al cupo (`MAX_STORAGE_BYTES_PER_TEMPLATE`). + +**Respuesta OK (200) ejemplo:** + +```json +{ + "template_id": "...", + "used_bytes": 1048576, + "limit_bytes": 52428800, + "remaining_bytes": 51380224, + "materials_bytes": 524288, + "images_bytes": 524288, + "used_mb": 1.0, + "limit_mb": 50.0 +} +``` + +**Error típico:** mismo 404/403 que la plantilla. + +--- + +### 4.4 Materiales de contexto (`/exam/templates/.../materials`) + +Sirven para **texto** que la IA puede usar al generar (PDF, DOCX, TXT, MD; imágenes aquí → OCR para texto). + +#### `POST /exam/templates/{template_id}/materials` + +**Headers:** `Authorization` + `multipart/form-data` + +**Body:** campo formulario `file` = fichero. + +**Ejemplo:** + +```bash +curl -s -X POST "http://localhost:8000/exam/templates/TEMPLATE_UUID/materials" \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@./apuntes.pdf" +``` + +**Respuesta OK (201):** objeto con `material` (id, estado `processed` o `failed`, vista previa de texto si hay) y `message`. + +**Errores típicos:** + +| Código | Situación | +|--------|-----------| +| 413 | Fichero mayor que `MAX_UPLOAD_BYTES` o cupo total de plantilla superado (`template_storage_quota_exceeded`). | +| 415 | Extensión no permitida. | +| 409 | Demasiados ficheros (`too_many_files`). | + +Ejemplo cupo: + +```json +{ + "error": { + "code": "template_storage_quota_exceeded", + "message": "Template storage quota exceeded. Limit: 50.00 MB, used: 48.00 MB, file: 5.00 MB" + } +} +``` + +--- + +#### `GET /exam/templates/{template_id}/materials` + +Lista materiales de la plantilla. **200:** array. + +--- + +#### `DELETE /exam/templates/{template_id}/materials/{material_id}` + +Borra un material. **204:** sin cuerpo. + +**Error típico (404):** material o plantilla no encontrados. + +--- + +### 4.5 Imágenes de examen (`/exam/templates/.../images` y `/exam/images/...`) + +Solo para **mostrar en la pregunta** (Moodle); no rellenan el contexto de texto de la IA. + +#### `POST /exam/templates/{template_id}/images` + +**Body:** `multipart/form-data` con `file` obligatorio y `caption` opcional. + +**Ejemplo:** + +```bash +curl -s -X POST "http://localhost:8000/exam/templates/TEMPLATE_UUID/images" \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@./diagrama.png" \ + -F "caption=Diagrama del modelo ER" +``` + +**Respuesta OK (201):** incluye `image.id` y `content_url` tipo `/exam/images/{id}/content`. + +**Errores típicos:** 413 tamaño / cupo, 415 tipo no imagen, 422 imagen corrupta, 409 demasiadas imágenes. + +--- + +#### `GET /exam/templates/{template_id}/images` + +Lista imágenes. **200:** array. + +--- + +#### `GET /exam/images/{image_id}/content` + +Devuelve el **binario** de la imagen (previsualización o descarga). **200** con `Content-Type` de imagen. + +**Headers:** `Authorization: Bearer ` + +**Error típico (404):** id inexistente o imagen de otro usuario. + +--- + +#### `DELETE /exam/templates/{template_id}/images/{image_id}` + +Borra imagen y desvincula de preguntas. **204** sin cuerpo. + +--- + +### 4.6 Vincular imagen a pregunta (`/exam/questions`) + +#### `PATCH /exam/questions/{question_id}/image` + +**Qué hace:** Asigna o quita la imagen de una pregunta ya guardada. + +**Headers:** `Authorization`, `Content-Type: application/json` + +**Body:** + +```json +{ "image_id": "UUID-de-imagen-de-la-misma-plantilla" } +``` + +o para quitar: + +```json +{ "image_id": null } +``` + +**Respuesta OK (200):** pregunta con campos incl. `image_url` si hay imagen. + +**Errores típicos:** 404 pregunta no tuya; 404 `image_id` no pertenece a la plantilla de esa pregunta. + +--- + +### 4.7 Generación con IA (`/exam`) + +Todas con `Authorization: Bearer `. + +#### `POST /exam/prompts/{template_id}` + +**Qué hace:** Construye el **texto del prompt** (incluye materiales procesados y catálogo de imágenes de examen) sin llamar al LLM. + +**Body:** + +```json +{ + "topic_prompt": "Genera preguntas sobre normalización y formas normales.", + "material_ids": null +} +``` + +`material_ids`: lista de UUIDs de materiales concretos, o `null` para usar **todos** los materiales con estado `processed`. + +**Respuesta OK (200):** + +```json +{ + "template_id": "...", + "prompt": "Eres un generador...", + "expected_format": "json" +} +``` + +**Errores típicos:** 404 plantilla; 404 si en `material_ids` pides un material que no existe o no está procesado. + +--- + +#### `POST /exam/generate` + +**Qué hace:** Llama al **LLM**, parsea JSON y **guarda** preguntas. + +**Body:** + +```json +{ + "template_id": "UUID-plantilla", + "topic_prompt": "Enfócate en claves foráneas e integridad referencial.", + "material_ids": null +} +``` + +**Respuesta OK (200):** `{ "questions": [ { ...pregunta..., "image_id": null, "image_url": null } ] }` + +**Errores típicos:** + +| Código | Ejemplo | +|--------|---------| +| 503 | `LLM_API_KEY` no configurada (`llm_unavailable`). | +| 422 | JSON del modelo inválido (`parse_error`). | + +```json +{ + "error": { + "code": "llm_unavailable", + "message": "LLM_API_KEY is not configured" + } +} +``` + +--- + +#### `POST /exam/parse` + +**Qué hace:** Pegas la salida de una IA externa (JSON o TXT) y se validan y guardan preguntas. + +**Body:** + +```json +{ + "template_id": "UUID-plantilla", + "input_format": "json", + "raw_output": "{\"questions\":[...]}" +} +``` + +**Respuesta OK (200):** igual que generate (`questions`). + +**Error típico (422):** `parse_error` si el formato no cuadra con el esquema de preguntas. + +--- + +### 4.8 Historial (`/exam/history`) + +#### `GET /exam/history` + +**Qué hace:** Lista exámenes del usuario (plantillas) con resumen (preguntas, exportaciones, fechas). + +**Respuesta OK (200):** array de `ExamHistoryItem`. + +**Error típico (401):** sin token. + +--- + +### 4.9 Exportación (`/exam/export`) + +Requiere que la plantilla **tenga preguntas** guardadas. + +#### `GET /exam/export/xml/{template_id}` + +**Respuesta OK (200):** cuerpo **XML** (`Content-Type: application/xml`). Incluye imágenes embebidas si las preguntas las tienen. + +**Error típico (404):** sin preguntas aún. + +```json +{ + "error": { + "code": "not_found", + "message": "Template does not contain questions to export" + } +} +``` + +--- + +#### `GET /exam/export/txt/{template_id}` + +**200:** texto plano. + +--- + +#### `GET /exam/export/json/{template_id}` + +**200:** JSON con lista de preguntas. + +--- + +## 5. Cómo elegir imagen por pregunta (recordatorio) + +Resumen ya integrado en la **sección 2.4** (subida y catálogo) y **2.7** (PATCH). En corto: + +1. `POST /exam/templates/{template_id}/images` → anota cada **`id`**. +2. `POST /exam/generate` (o prompt + IA externa + `parse`) → el JSON puede incluir **`image_id`** por pregunta. +3. `PATCH /exam/questions/{question_id}/image` → corrección manual. + +--- + +## 6. Límites y buenas prácticas (recordatorio) + +- **Cupo total por plantilla:** `MAX_STORAGE_BYTES_PER_TEMPLATE` (materiales + imágenes). Consulta `GET .../storage` antes de subir mucho. +- **Tamaño por fichero:** materiales `MAX_UPLOAD_BYTES`, imágenes `MAX_IMAGE_BYTES`. +- **Contexto en el prompt:** el texto de materiales se trunca (`MAX_REFERENCE_CHARS`); no metas PDFs enormes sin trocear en el futuro. +- **Misma imagen para contexto OCR y para mostrar en examen:** hoy son dos rutas (`/materials` vs `/images`); si solo quieres **mostrar**, usa solo `/images`. + +--- + +## 7. Orden de lectura del código + +| Área | Carpeta / archivos | +|------|---------------------| +| Rutas | `backend/app/api/routes/` | +| Esquemas | `backend/app/schemas/` | +| Lógica de negocio | `backend/app/services/` | +| Modelos BD | `backend/app/models/exam.py`, `user.py` | +| Configuración | `backend/app/core/config.py`, `backend/.env.example` | + +--- + +*Documento generado para el proyecto GenExamenes / moodle-exam-generator. Ajusta la base URL y los UUID de ejemplo a tu entorno real.* diff --git a/README.md b/README.md index 498acec..1971961 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Backend para generar exámenes con IA, procesar la salida de un LLM y exportar preguntas a Moodle XML. +**Guía detallada de flujo, endpoints, ejemplos y errores:** [GUIA_API_Y_FLUJO.md](GUIA_API_Y_FLUJO.md) + El proyecto está centrado en backend. La carpeta `frontend` se mantiene vacía a nivel de aplicación, aunque existe un servicio en Docker Compose para reservar el despliegue futuro. ## Stack @@ -46,6 +48,7 @@ Variables principales: - `LLM_BASE_URL`: endpoint compatible con OpenAI. - `LLM_MODEL`: modelo usado para generar preguntas. - `ALLOWED_ORIGINS`: orígenes permitidos por CORS. +- `MAX_STORAGE_BYTES_PER_TEMPLATE`: cupo total de almacenamiento por examen (materiales + imágenes). Todas las rutas bajo `/exam` requieren autenticación de usuario con: @@ -64,11 +67,12 @@ docker compose up --build 1. Registrarse o iniciar sesión. 2. Crear una plantilla de examen (queda asociada al usuario). -3. Generar un prompt guiado para el LLM. -4. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. -5. Guardar las preguntas validadas en PostgreSQL. -6. Consultar el historial de exámenes creados. -7. Exportar el examen a Moodle XML, TXT o JSON. +3. Subir materiales de referencia (PDF, DOCX, TXT, PNG, JPG…) a la plantilla. +4. Generar un prompt guiado para el LLM (incluye el texto extraído de los ficheros). +5. Generar preguntas automáticamente con el LLM o parsear una salida externa en JSON/TXT. +6. Guardar las preguntas validadas en PostgreSQL. +7. Consultar el historial de exámenes creados. +8. Exportar el examen a Moodle XML, TXT o JSON. ## Endpoints @@ -96,6 +100,38 @@ Devuelve los datos del usuario autenticado. Lista el historial de exámenes del usuario (plantillas, preguntas y exportaciones). +`POST /exam/templates/{template_id}/materials` + +Sube un fichero (`multipart/form-data`, campo `file`). Formatos: PDF, DOCX, TXT, MD, PNG, JPG, WEBP. Extrae texto y lo guarda como contexto. + +`GET /exam/templates/{template_id}/materials` + +Lista los materiales subidos a una plantilla. + +`DELETE /exam/templates/{template_id}/materials/{material_id}` + +Elimina un material. + +`POST /exam/templates/{template_id}/images` + +Sube una imagen para preguntas visuales (`file`, opcional `caption`). No se usa OCR: la imagen se muestra en el examen y se embebe en el XML de Moodle. + +`GET /exam/templates/{template_id}/images` + +Lista las imágenes de la plantilla. + +`GET /exam/images/{image_id}/content` + +Devuelve la imagen (requiere JWT). Para previsualizar en el frontend o en Moodle tras importar. + +`DELETE /exam/templates/{template_id}/images/{image_id}` + +Elimina una imagen. + +`PATCH /exam/questions/{question_id}/image` + +Vincula o desvincula una imagen a una pregunta existente (`{"image_id": "uuid"}` o `null`). + `POST /exam/templates` Crea una plantilla con materia, nivel educativo, tipos de pregunta, puntuación, penalización y dificultad. @@ -108,6 +144,10 @@ Lista las plantillas del usuario autenticado. Obtiene una plantilla concreta. +`GET /exam/templates/{template_id}/storage` + +Muestra cuánto espacio usa el examen (materiales + imágenes) y el límite configurado. + `POST /exam/prompts/{template_id}` Genera un prompt estructurado para IA. diff --git a/backend/.env.example b/backend/.env.example index 8df96cc..b098eb6 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -14,7 +14,20 @@ ALLOWED_ORIGINS=http://localhost:3000 # --- Rate limiting y tamaño de petición --- RATE_LIMIT_REQUESTS=60 RATE_LIMIT_WINDOW_SECONDS=60 -MAX_REQUEST_BYTES=1048576 +MAX_REQUEST_BYTES=25165824 + +# --- Materiales de contexto (PDF, DOCX, imágenes, etc.) --- +UPLOAD_DIR=/app/uploads +MAX_UPLOAD_BYTES=20971520 +MAX_MATERIALS_PER_TEMPLATE=10 +MAX_REFERENCE_CHARS=12000 + +# --- Imágenes de examen (preguntas visuales, sin extracción OCR) --- +MAX_IMAGE_BYTES=5242880 +MAX_IMAGES_PER_TEMPLATE=20 + +# Cupo total por examen (materiales + imágenes). 50 MB por defecto. +MAX_STORAGE_BYTES_PER_TEMPLATE=52428800 # --- JWT (login email/contraseña y sesión tras Google) --- JWT_SECRET_KEY=change-me-use-a-long-random-secret-key-at-least-32-chars diff --git a/backend/Dockerfile b/backend/Dockerfile index 4ee7778..7d5524f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -5,6 +5,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /app +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + tesseract-ocr \ + tesseract-ocr-spa \ + tesseract-ocr-eng \ + && rm -rf /var/lib/apt/lists/* + RUN addgroup --system app && adduser --system --ingroup app app COPY requirements.txt . @@ -12,6 +19,8 @@ RUN pip install --no-cache-dir -r requirements.txt COPY app ./app +RUN mkdir -p /app/uploads && chown -R app:app /app/uploads + USER app EXPOSE 8000 diff --git a/backend/app/api/dependencies.py b/backend/app/api/dependencies.py index 061bfc9..c84e035 100644 --- a/backend/app/api/dependencies.py +++ b/backend/app/api/dependencies.py @@ -6,11 +6,41 @@ from sqlalchemy.orm import Session from app.core.config import Settings, get_settings from app.db.session import get_db from app.services.exam_service import ExamService +from app.services.image_service import ImageService from app.services.llm import LLMClient +from app.services.material_service import MaterialService +from app.services.storage_quota import StorageQuotaService -def get_exam_service(db: Annotated[Session, Depends(get_db)]) -> ExamService: - return ExamService(db) +def get_storage_quota_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], +) -> StorageQuotaService: + return StorageQuotaService(db, settings) + + +def get_material_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], + storage_quota: Annotated[StorageQuotaService, Depends(get_storage_quota_service)], +) -> MaterialService: + return MaterialService(db, settings, storage_quota) + + +def get_image_service( + db: Annotated[Session, Depends(get_db)], + settings: Annotated[Settings, Depends(get_settings)], + storage_quota: Annotated[StorageQuotaService, Depends(get_storage_quota_service)], +) -> ImageService: + return ImageService(db, settings, storage_quota) + + +def get_exam_service( + db: Annotated[Session, Depends(get_db)], + material_service: Annotated[MaterialService, Depends(get_material_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> ExamService: + return ExamService(db, material_service=material_service, image_service=image_service) def get_llm_client(settings: Annotated[Settings, Depends(get_settings)]) -> LLMClient: diff --git a/backend/app/api/routes/generation.py b/backend/app/api/routes/generation.py index 81dab5a..e7a63c2 100644 --- a/backend/app/api/routes/generation.py +++ b/backend/app/api/routes/generation.py @@ -26,7 +26,12 @@ def build_prompt( current_user: Annotated[User, Depends(get_current_user)], service: Annotated[ExamService, Depends(get_exam_service)], ) -> PromptResponse: - return service.build_prompt(current_user.id, template_id, payload.topic_prompt) + return service.build_prompt( + current_user.id, + template_id, + payload.topic_prompt, + payload.material_ids, + ) @router.post("/generate", response_model=ParsedQuestionsResponse) @@ -41,6 +46,7 @@ async def generate_exam( payload.template_id, payload.topic_prompt, llm_client, + payload.material_ids, ) diff --git a/backend/app/api/routes/images.py b/backend/app/api/routes/images.py new file mode 100644 index 0000000..5d14b45 --- /dev/null +++ b/backend/app/api/routes/images.py @@ -0,0 +1,73 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, File, Form, UploadFile, status +from fastapi.responses import FileResponse + +from app.api.dependencies import get_exam_service, get_image_service +from app.core.auth import get_current_user +from app.models.user import User +from app.schemas.image import ExamImageRead, ExamImageUploadResponse +from app.services.exam_service import ExamService +from app.services.image_service import ImageService + +router = APIRouter(tags=["images"]) + + +@router.post( + "/templates/{template_id}/images", + response_model=ExamImageUploadResponse, + status_code=status.HTTP_201_CREATED, +) +def upload_exam_image( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], + file: UploadFile = File(...), + caption: Annotated[str | None, Form()] = None, +) -> ExamImageUploadResponse: + template = exam_service.get_owned_template(current_user.id, template_id) + image = image_service.upload(template, file, caption=caption) + return ExamImageUploadResponse( + image=ExamImageRead.model_validate(image_service.to_read(image)), + message="Image uploaded successfully", + ) + + +@router.get("/templates/{template_id}/images", response_model=list[ExamImageRead]) +def list_exam_images( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> list[ExamImageRead]: + exam_service.get_owned_template(current_user.id, template_id) + images = image_service.list_images(template_id) + return [ExamImageRead.model_validate(image_service.to_read(image)) for image in images] + + +@router.get("/images/{image_id}/content") +def get_exam_image_content( + image_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> FileResponse: + image = image_service.get_image_for_user(current_user.id, image_id) + return FileResponse( + path=image.storage_path, + media_type=image.mime_type, + filename=image.original_filename, + ) + + +@router.delete("/templates/{template_id}/images/{image_id}", status_code=status.HTTP_204_NO_CONTENT) +def delete_exam_image( + template_id: uuid.UUID, + image_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> None: + template = exam_service.get_owned_template(current_user.id, template_id) + image_service.delete_image(template, image_id) diff --git a/backend/app/api/routes/materials.py b/backend/app/api/routes/materials.py new file mode 100644 index 0000000..efd2fc9 --- /dev/null +++ b/backend/app/api/routes/materials.py @@ -0,0 +1,55 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, File, UploadFile, status + +from app.api.dependencies import get_exam_service, get_material_service +from app.core.auth import get_current_user +from app.models.exam import MaterialStatus +from app.models.user import User +from app.schemas.material import ExamMaterialRead, ExamMaterialUploadResponse +from app.services.exam_service import ExamService +from app.services.material_service import MaterialService + +router = APIRouter(prefix="/templates/{template_id}/materials", tags=["materials"]) + + +@router.post("", response_model=ExamMaterialUploadResponse, status_code=status.HTTP_201_CREATED) +def upload_material( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + material_service: Annotated[MaterialService, Depends(get_material_service)], + file: UploadFile = File(...), +) -> ExamMaterialUploadResponse: + template = exam_service.get_owned_template(current_user.id, template_id) + material = material_service.upload(template, file) + message = ( + "File uploaded and processed successfully" + if material.status == MaterialStatus.PROCESSED + else "File uploaded but text extraction failed" + ) + return ExamMaterialUploadResponse(material=material, message=message) + + +@router.get("", response_model=list[ExamMaterialRead]) +def list_materials( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + material_service: Annotated[MaterialService, Depends(get_material_service)], +) -> list[ExamMaterialRead]: + exam_service.get_owned_template(current_user.id, template_id) + return material_service.list_materials(template_id) + + +@router.delete("/{material_id}", status_code=status.HTTP_204_NO_CONTENT) +def delete_material( + template_id: uuid.UUID, + material_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + material_service: Annotated[MaterialService, Depends(get_material_service)], +) -> None: + template = exam_service.get_owned_template(current_user.id, template_id) + material_service.delete_material(template, material_id) diff --git a/backend/app/api/routes/questions.py b/backend/app/api/routes/questions.py new file mode 100644 index 0000000..4eb1d77 --- /dev/null +++ b/backend/app/api/routes/questions.py @@ -0,0 +1,27 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_exam_service, get_image_service +from app.core.auth import get_current_user +from app.models.user import User +from app.schemas.exam import QuestionRead +from app.schemas.image import QuestionImageAttach +from app.services.exam_service import ExamService +from app.services.image_service import ImageService + +router = APIRouter(prefix="/questions", tags=["questions"]) + + +@router.patch("/{question_id}/image", response_model=QuestionRead) +def attach_image_to_question( + question_id: uuid.UUID, + payload: QuestionImageAttach, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + image_service: Annotated[ImageService, Depends(get_image_service)], +) -> QuestionRead: + question, template = exam_service.get_owned_question(current_user.id, question_id) + updated = image_service.attach_image_to_question(template, question, payload.image_id) + return exam_service.to_question_read(updated) diff --git a/backend/app/api/routes/templates.py b/backend/app/api/routes/templates.py index af152d2..5b9a1ff 100644 --- a/backend/app/api/routes/templates.py +++ b/backend/app/api/routes/templates.py @@ -3,11 +3,13 @@ from typing import Annotated from fastapi import APIRouter, Depends, status -from app.api.dependencies import get_exam_service +from app.api.dependencies import get_exam_service, get_storage_quota_service from app.core.auth import get_current_user from app.models.user import User from app.schemas.exam import ExamTemplateCreate, ExamTemplateRead +from app.schemas.storage import TemplateStorageUsage from app.services.exam_service import ExamService +from app.services.storage_quota import StorageQuotaService router = APIRouter(prefix="/templates", tags=["templates"]) @@ -36,3 +38,14 @@ def get_template( service: Annotated[ExamService, Depends(get_exam_service)], ) -> ExamTemplateRead: return service.get_template(current_user.id, template_id) + + +@router.get("/{template_id}/storage", response_model=TemplateStorageUsage) +def get_template_storage_usage( + template_id: uuid.UUID, + current_user: Annotated[User, Depends(get_current_user)], + exam_service: Annotated[ExamService, Depends(get_exam_service)], + storage_quota: Annotated[StorageQuotaService, Depends(get_storage_quota_service)], +) -> TemplateStorageUsage: + exam_service.get_owned_template(current_user.id, template_id) + return TemplateStorageUsage.model_validate(storage_quota.get_usage_summary(template_id)) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index a6693d4..a2e47e0 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -22,6 +22,17 @@ class Settings(BaseSettings): jwt_algorithm: str = "HS256" jwt_expire_minutes: int = Field(default=60 * 24, ge=5) google_client_id: str | None = None + upload_dir: str = "/app/uploads" + max_upload_bytes: int = Field(default=20_971_520, ge=1_024) + max_materials_per_template: int = Field(default=10, ge=1, le=50) + max_reference_chars: int = Field(default=12_000, ge=1_000, le=100_000) + max_image_bytes: int = Field(default=5_242_880, ge=1_024) + max_images_per_template: int = Field(default=20, ge=1, le=100) + max_storage_bytes_per_template: int = Field( + default=52_428_800, + ge=1_024, + description="Cupo total por examen (materiales + imágenes). Por defecto 50 MB.", + ) model_config = SettingsConfigDict( env_file=".env", diff --git a/backend/app/core/security.py b/backend/app/core/security.py index facd271..83a06c1 100644 --- a/backend/app/core/security.py +++ b/backend/app/core/security.py @@ -32,8 +32,8 @@ def clean_text(value: str, *, max_length: int = 8_000) -> str: return cleaned -def sanitize_prompt_input(value: str) -> str: - cleaned = clean_text(value, max_length=4_000) +def sanitize_prompt_input(value: str, *, max_length: int = 4_000) -> str: + cleaned = clean_text(value, max_length=max_length) return ROLE_INJECTION_HINTS.sub("[filtered instruction]", cleaned) diff --git a/backend/app/main.py b/backend/app/main.py index bc6a8dd..7157853 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -4,7 +4,7 @@ from collections.abc import AsyncIterator from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.api.routes import auth, exports, generation, health, history, templates +from app.api.routes import auth, exports, generation, health, history, images, materials, questions, templates from app.core.config import get_settings from app.core.errors import register_exception_handlers from app.core.middleware import RateLimitMiddleware, RequestSizeLimitMiddleware @@ -25,7 +25,7 @@ def create_app() -> FastAPI: CORSMiddleware, allow_origins=settings.cors_origins, allow_credentials=True, - allow_methods=["GET", "POST", "OPTIONS"], + allow_methods=["GET", "POST", "PATCH", "DELETE", "OPTIONS"], allow_headers=["Authorization", "Content-Type", "X-API-Key"], ) app.add_middleware(RequestSizeLimitMiddleware, settings=settings) @@ -39,6 +39,9 @@ def create_app() -> FastAPI: app.include_router(generation.router, prefix="/exam") app.include_router(exports.router, prefix="/exam") app.include_router(history.router, prefix="/exam") + app.include_router(materials.router, prefix="/exam") + app.include_router(images.router, prefix="/exam") + app.include_router(questions.router, prefix="/exam") return app diff --git a/backend/app/models/exam.py b/backend/app/models/exam.py index a6717c9..98870aa 100644 --- a/backend/app/models/exam.py +++ b/backend/app/models/exam.py @@ -35,6 +35,11 @@ class ExportFormat(str, enum.Enum): JSON = "json" +class MaterialStatus(str, enum.Enum): + PROCESSED = "processed" + FAILED = "failed" + + class ExamTemplate(Base): __tablename__ = "exam_templates" @@ -66,6 +71,16 @@ class ExamTemplate(Base): cascade="all, delete-orphan", passive_deletes=True, ) + materials: Mapped[list["ExamMaterial"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) + images: Mapped[list["ExamImage"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) class Question(Base): @@ -87,9 +102,16 @@ class Question(Base): score: Mapped[float] = mapped_column(Float, nullable=False, default=1.0) penalty: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) options: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + image_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_images.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) template: Mapped[ExamTemplate] = relationship(back_populates="questions") + image: Mapped["ExamImage | None"] = relationship(back_populates="questions") class ExportJob(Base): @@ -108,3 +130,47 @@ class ExportJob(Base): created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) template: Mapped[ExamTemplate] = relationship(back_populates="export_jobs") + + +class ExamMaterial(Base): + __tablename__ = "exam_materials" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + original_filename: Mapped[str] = mapped_column(String(255), nullable=False) + mime_type: Mapped[str] = mapped_column(String(120), nullable=False) + size_bytes: Mapped[int] = mapped_column(nullable=False) + storage_path: Mapped[str] = mapped_column(String(500), nullable=False) + extracted_text: Mapped[str | None] = mapped_column(Text, nullable=True) + status: Mapped[MaterialStatus] = mapped_column(Enum(MaterialStatus), nullable=False) + error_message: Mapped[str | None] = mapped_column(String(500), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="materials") + + +class ExamImage(Base): + __tablename__ = "exam_images" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + original_filename: Mapped[str] = mapped_column(String(255), nullable=False) + stored_filename: Mapped[str] = mapped_column(String(255), nullable=False) + mime_type: Mapped[str] = mapped_column(String(120), nullable=False) + size_bytes: Mapped[int] = mapped_column(nullable=False) + storage_path: Mapped[str] = mapped_column(String(500), nullable=False) + caption: Mapped[str | None] = mapped_column(String(500), nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="images") + questions: Mapped[list["Question"]] = relationship(back_populates="image") diff --git a/backend/app/schemas/exam.py b/backend/app/schemas/exam.py index ef2ad15..8ac7e45 100644 --- a/backend/app/schemas/exam.py +++ b/backend/app/schemas/exam.py @@ -65,6 +65,10 @@ class QuestionCreate(BaseModel): correct_answers: list[str] = Field(min_length=1, max_length=20) wrong_answers: list[str] = Field(default_factory=list, max_length=20) matching_pairs: list[MatchingPair] = Field(default_factory=list, max_length=50) + image_id: uuid.UUID | None = Field( + default=None, + description="ID de imagen de la plantilla que debe mostrarse con la pregunta.", + ) difficulty: Difficulty = Difficulty.MEDIUM score: float = Field(default=1.0, ge=0.0, le=100.0) penalty: float = Field(default=0.0, ge=0.0, le=100.0) @@ -91,6 +95,7 @@ class QuestionCreate(BaseModel): class QuestionRead(QuestionCreate): id: uuid.UUID template_id: uuid.UUID + image_url: str | None = None created_at: datetime model_config = ConfigDict(from_attributes=True) @@ -104,11 +109,19 @@ class PromptResponse(BaseModel): class BuildPromptRequest(BaseModel): topic_prompt: str = Field(min_length=5, max_length=4_000) + material_ids: list[uuid.UUID] | None = Field( + default=None, + description="IDs de materiales a incluir. Si no se indica, se usan todos los procesados.", + ) class GenerateExamRequest(BaseModel): template_id: uuid.UUID topic_prompt: str = Field(min_length=5, max_length=4_000) + material_ids: list[uuid.UUID] | None = Field( + default=None, + description="IDs de materiales a incluir. Si no se indica, se usan todos los procesados.", + ) class ParseRequest(BaseModel): diff --git a/backend/app/schemas/image.py b/backend/app/schemas/image.py new file mode 100644 index 0000000..7a0811c --- /dev/null +++ b/backend/app/schemas/image.py @@ -0,0 +1,30 @@ +import uuid +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + + +class ExamImageRead(BaseModel): + id: uuid.UUID + template_id: uuid.UUID + original_filename: str + stored_filename: str + mime_type: str + size_bytes: int + caption: str | None + content_url: str + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class ExamImageUploadResponse(BaseModel): + image: ExamImageRead + message: str = "Image uploaded successfully" + + +class QuestionImageAttach(BaseModel): + image_id: uuid.UUID | None = Field( + default=None, + description="ID de imagen de la plantilla. null para desvincular.", + ) diff --git a/backend/app/schemas/material.py b/backend/app/schemas/material.py new file mode 100644 index 0000000..93bbf20 --- /dev/null +++ b/backend/app/schemas/material.py @@ -0,0 +1,32 @@ +import uuid +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + +from app.models.exam import MaterialStatus + + +class ExamMaterialRead(BaseModel): + id: uuid.UUID + template_id: uuid.UUID + original_filename: str + mime_type: str + size_bytes: int + status: MaterialStatus + error_message: str | None + text_preview: str | None = None + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class ExamMaterialUploadResponse(BaseModel): + material: ExamMaterialRead + message: str = "File uploaded and processed successfully" + + +class MaterialIdsFilter(BaseModel): + material_ids: list[uuid.UUID] | None = Field( + default=None, + description="Si se indica, solo se usan estos materiales como contexto.", + ) diff --git a/backend/app/schemas/storage.py b/backend/app/schemas/storage.py new file mode 100644 index 0000000..036656e --- /dev/null +++ b/backend/app/schemas/storage.py @@ -0,0 +1,14 @@ +import uuid + +from pydantic import BaseModel + + +class TemplateStorageUsage(BaseModel): + template_id: uuid.UUID + used_bytes: int + limit_bytes: int + remaining_bytes: int + materials_bytes: int + images_bytes: int + used_mb: float + limit_mb: float diff --git a/backend/app/services/document_extractor.py b/backend/app/services/document_extractor.py new file mode 100644 index 0000000..1841ca4 --- /dev/null +++ b/backend/app/services/document_extractor.py @@ -0,0 +1,74 @@ +from pathlib import Path + +from app.core.errors import AppError + +SUPPORTED_EXTENSIONS = { + ".pdf": "application/pdf", + ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".txt": "text/plain", + ".md": "text/markdown", + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", +} + + +class DocumentExtractor: + def extract(self, file_path: Path, mime_type: str) -> str: + suffix = file_path.suffix.lower() + if mime_type == "application/pdf" or suffix == ".pdf": + return self._extract_pdf(file_path) + if ( + mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + or suffix == ".docx" + ): + return self._extract_docx(file_path) + if mime_type.startswith("text/") or suffix in {".txt", ".md"}: + return self._extract_text(file_path) + if mime_type.startswith("image/") or suffix in {".png", ".jpg", ".jpeg", ".webp"}: + return self._extract_image(file_path) + raise AppError(f"Unsupported file type: {mime_type}", status_code=415, code="unsupported_media") + + def _extract_pdf(self, file_path: Path) -> str: + from pypdf import PdfReader + + reader = PdfReader(str(file_path)) + parts = [page.extract_text() or "" for page in reader.pages] + text = "\n".join(parts).strip() + if not text: + raise AppError("PDF does not contain extractable text", status_code=422, code="empty_extraction") + return text + + def _extract_docx(self, file_path: Path) -> str: + from docx import Document + + document = Document(str(file_path)) + parts = [paragraph.text.strip() for paragraph in document.paragraphs if paragraph.text.strip()] + text = "\n".join(parts).strip() + if not text: + raise AppError("DOCX does not contain extractable text", status_code=422, code="empty_extraction") + return text + + def _extract_text(self, file_path: Path) -> str: + text = file_path.read_text(encoding="utf-8", errors="ignore").strip() + if not text: + raise AppError("Text file is empty", status_code=422, code="empty_extraction") + return text + + def _extract_image(self, file_path: Path) -> str: + try: + import pytesseract + from PIL import Image + except ImportError as exc: + raise AppError( + "Image OCR is not available on this server", + status_code=503, + code="ocr_unavailable", + ) from exc + + image = Image.open(file_path) + text = pytesseract.image_to_string(image, lang="spa+eng").strip() + if not text: + raise AppError("Image does not contain recognizable text", status_code=422, code="empty_extraction") + return text diff --git a/backend/app/services/exam_service.py b/backend/app/services/exam_service.py index 42d6114..a4371b2 100644 --- a/backend/app/services/exam_service.py +++ b/backend/app/services/exam_service.py @@ -17,7 +17,9 @@ from app.schemas.exam import ( QuestionCreate, QuestionRead, ) +from app.services.image_service import ImageService from app.services.llm import LLMClient +from app.services.material_service import MaterialService from app.services.moodle_exporter import MoodleXMLExporter from app.services.parser import AIQuestionParser from app.services.prompt_builder import PromptBuilder @@ -30,11 +32,15 @@ class ExamService: prompt_builder: PromptBuilder | None = None, parser: AIQuestionParser | None = None, exporter: MoodleXMLExporter | None = None, + material_service: MaterialService | None = None, + image_service: ImageService | None = None, ) -> None: self.db = db self.prompt_builder = prompt_builder or PromptBuilder() self.parser = parser or AIQuestionParser() self.exporter = exporter or MoodleXMLExporter() + self.material_service = material_service + self.image_service = image_service def create_template(self, user_id: uuid.UUID, payload: ExamTemplateCreate) -> ExamTemplateRead: template = ExamTemplate( @@ -87,9 +93,25 @@ class ExamService: def get_template(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplateRead: return self._template_read(self._get_user_template_or_404(user_id, template_id)) - def build_prompt(self, user_id: uuid.UUID, template_id: uuid.UUID, topic_prompt: str) -> PromptResponse: + def get_owned_template(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplate: + return self._get_user_template_or_404(user_id, template_id) + + def build_prompt( + self, + user_id: uuid.UUID, + template_id: uuid.UUID, + topic_prompt: str, + material_ids: list[uuid.UUID] | None = None, + ) -> PromptResponse: template = self._get_user_template_or_404(user_id, template_id) - prompt = self.prompt_builder.build_prompt(template, topic_prompt) + reference_context = self._reference_context(template_id, material_ids) + images_catalog = self._images_catalog(template_id) + prompt = self.prompt_builder.build_prompt( + template, + topic_prompt, + reference_context, + images_catalog, + ) return PromptResponse(template_id=template.id, prompt=prompt) async def generate_with_llm( @@ -98,9 +120,17 @@ class ExamService: template_id: uuid.UUID, topic_prompt: str, llm_client: LLMClient, + material_ids: list[uuid.UUID] | None = None, ) -> ParsedQuestionsResponse: template = self._get_user_template_or_404(user_id, template_id) - prompt = self.prompt_builder.build_prompt(template, topic_prompt) + reference_context = self._reference_context(template_id, material_ids) + images_catalog = self._images_catalog(template_id) + prompt = self.prompt_builder.build_prompt( + template, + topic_prompt, + reference_context, + images_catalog, + ) raw_output = await llm_client.generate(prompt) questions = self.parser.parse_json(raw_output) return self._persist_questions(template.id, questions) @@ -116,8 +146,9 @@ class ExamService: if not questions: raise NotFoundError("Template does not contain questions to export") + image_map = self._image_map(template.id) if export_format == ExportFormat.XML: - content = self.exporter.export_xml(questions) + content = self.exporter.export_xml(questions, image_map) elif export_format == ExportFormat.TXT: content = self.exporter.export_txt(questions) else: @@ -134,9 +165,30 @@ class ExamService: self.db.commit() return ExportResponse(template_id=template.id, format=export_format, content=content) + def get_owned_question(self, user_id: uuid.UUID, question_id: uuid.UUID) -> tuple[Question, ExamTemplate]: + question = self.db.get(Question, question_id) + if question is None: + raise NotFoundError("Question not found") + template = self._get_user_template_or_404(user_id, question.template_id) + if question.template_id != template.id: + raise NotFoundError("Question not found") + return question, template + + def to_question_read(self, question: Question) -> QuestionRead: + read = QuestionRead.model_validate(question) + if question.image_id: + return read.model_copy(update={"image_url": f"/exam/images/{question.image_id}/content"}) + return read + def _persist_questions(self, template_id: uuid.UUID, questions: list[QuestionCreate]) -> ParsedQuestionsResponse: persisted: list[Question] = [] for payload in questions: + image_id = payload.image_id + if image_id is not None: + if self.image_service is None: + raise NotFoundError("Image service is not available") + self.image_service.get_image_for_template(template_id, image_id) + question = Question( template_id=template_id, question_type=payload.question_type, @@ -144,6 +196,7 @@ class ExamService: correct_answers=[clean_text(answer, max_length=1_000) for answer in payload.correct_answers], wrong_answers=[clean_text(answer, max_length=1_000) for answer in payload.wrong_answers], matching_pairs=[pair.model_dump() for pair in payload.matching_pairs], + image_id=image_id, difficulty=payload.difficulty, score=payload.score, penalty=payload.penalty, @@ -156,7 +209,26 @@ class ExamService: for question in persisted: self.db.refresh(question) - return ParsedQuestionsResponse(questions=[QuestionRead.model_validate(question) for question in persisted]) + return ParsedQuestionsResponse(questions=[self.to_question_read(question) for question in persisted]) + + def _reference_context( + self, + template_id: uuid.UUID, + material_ids: list[uuid.UUID] | None, + ) -> str: + if self.material_service is None: + return "" + return self.material_service.build_reference_context(template_id, material_ids) + + def _images_catalog(self, template_id: uuid.UUID) -> str: + if self.image_service is None: + return "" + return self.image_service.images_catalog(template_id) + + def _image_map(self, template_id: uuid.UUID) -> dict[uuid.UUID, object]: + if self.image_service is None: + return {} + return self.image_service.build_image_map(template_id) def _get_user_template_or_404(self, user_id: uuid.UUID, template_id: uuid.UUID) -> ExamTemplate: template = self.db.get(ExamTemplate, template_id) diff --git a/backend/app/services/image_service.py b/backend/app/services/image_service.py new file mode 100644 index 0000000..3f5bf22 --- /dev/null +++ b/backend/app/services/image_service.py @@ -0,0 +1,206 @@ +import uuid +from pathlib import Path + +from fastapi import UploadFile +from PIL import Image, UnidentifiedImageError +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.core.config import Settings +from app.core.errors import AppError, NotFoundError +from app.core.security import clean_text +from app.models.exam import ExamImage, ExamTemplate, Question +from app.services.storage_quota import StorageQuotaService + +ALLOWED_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif"} +ALLOWED_IMAGE_MIMES = { + "image/png", + "image/jpeg", + "image/webp", + "image/gif", +} + + +class ImageService: + def __init__( + self, + db: Session, + settings: Settings, + storage_quota: StorageQuotaService | None = None, + ) -> None: + self.db = db + self.settings = settings + self.storage_quota = storage_quota or StorageQuotaService(db, settings) + self.image_root = Path(settings.upload_dir) / "exam_images" + self.image_root.mkdir(parents=True, exist_ok=True) + + def upload( + self, + template: ExamTemplate, + upload_file: UploadFile, + caption: str | None = None, + ) -> ExamImage: + self._validate_upload_count(template.id) + suffix, mime_type = self._validate_image_file(upload_file) + + content = upload_file.file.read() + if len(content) > self.settings.max_image_bytes: + raise AppError( + f"Image exceeds maximum size of {self.settings.max_image_bytes} bytes", + status_code=413, + code="file_too_large", + ) + + self.storage_quota.ensure_template_has_space(template.id, len(content)) + + image_id = uuid.uuid4() + stored_filename = f"{image_id}{suffix}" + target_dir = self.image_root / str(template.user_id) / str(template.id) + target_dir.mkdir(parents=True, exist_ok=True) + storage_path = target_dir / stored_filename + storage_path.write_bytes(content) + self._verify_image_integrity(storage_path) + + image = ExamImage( + id=image_id, + template_id=template.id, + original_filename=clean_text(upload_file.filename or stored_filename, max_length=255), + stored_filename=stored_filename, + mime_type=mime_type, + size_bytes=len(content), + storage_path=str(storage_path), + caption=clean_text(caption, max_length=500) if caption else None, + ) + self.db.add(image) + self.db.commit() + self.db.refresh(image) + return image + + def list_images(self, template_id: uuid.UUID) -> list[ExamImage]: + return list( + self.db.scalars( + select(ExamImage) + .where(ExamImage.template_id == template_id) + .order_by(ExamImage.created_at.desc()) + ).all() + ) + + def get_image_for_template(self, template_id: uuid.UUID, image_id: uuid.UUID) -> ExamImage: + image = self.db.get(ExamImage, image_id) + if image is None or image.template_id != template_id: + raise NotFoundError("Image not found for this template") + return image + + def get_image_for_user(self, user_id: uuid.UUID, image_id: uuid.UUID) -> ExamImage: + image = self.db.get(ExamImage, image_id) + if image is None: + raise NotFoundError("Image not found") + template = image.template + if template.user_id != user_id: + raise NotFoundError("Image not found") + return image + + def delete_image(self, template: ExamTemplate, image_id: uuid.UUID) -> None: + image = self.get_image_for_template(template.id, image_id) + for question in list(image.questions): + question.image_id = None + + path = Path(image.storage_path) + if path.exists(): + path.unlink() + + self.db.delete(image) + self.db.commit() + + def attach_image_to_question( + self, + template: ExamTemplate, + question: Question, + image_id: uuid.UUID | None, + ) -> Question: + if question.template_id != template.id: + raise NotFoundError("Question not found for this template") + if image_id is not None: + self.get_image_for_template(template.id, image_id) + question.image_id = image_id + self.db.commit() + self.db.refresh(question) + return question + + def images_catalog(self, template_id: uuid.UUID) -> str: + images = self.list_images(template_id) + if not images: + return "" + + lines = [ + "Imágenes disponibles para preguntas visuales (el enunciado debe referirse a la imagen; " + "asigna el campo image_id en cada pregunta que deba mostrarla):" + ] + for image in images: + caption = image.caption or "sin descripción" + lines.append( + f"- image_id: {image.id} | archivo: {image.original_filename} | descripción: {caption}" + ) + return "\n".join(lines) + + def build_image_map(self, template_id: uuid.UUID) -> dict[uuid.UUID, ExamImage]: + images = self.list_images(template_id) + return {image.id: image for image in images} + + def to_read(self, image: ExamImage) -> dict[str, object]: + return { + "id": image.id, + "template_id": image.template_id, + "original_filename": image.original_filename, + "stored_filename": image.stored_filename, + "mime_type": image.mime_type, + "size_bytes": image.size_bytes, + "caption": image.caption, + "content_url": f"/exam/images/{image.id}/content", + "created_at": image.created_at, + } + + def _validate_upload_count(self, template_id: uuid.UUID) -> None: + count = self.db.scalar( + select(func.count()).select_from(ExamImage).where(ExamImage.template_id == template_id) + ) + if count is not None and count >= self.settings.max_images_per_template: + raise AppError( + f"Maximum of {self.settings.max_images_per_template} images per template reached", + status_code=409, + code="too_many_images", + ) + + def _validate_image_file(self, upload_file: UploadFile) -> tuple[str, str]: + if not upload_file.filename: + raise AppError("Filename is required", status_code=400, code="invalid_file") + + suffix = Path(upload_file.filename).suffix.lower() + if suffix not in ALLOWED_IMAGE_EXTENSIONS: + raise AppError( + f"Unsupported image type. Allowed: {', '.join(sorted(ALLOWED_IMAGE_EXTENSIONS))}", + status_code=415, + code="unsupported_media", + ) + + mime_type = upload_file.content_type or "" + if mime_type and mime_type not in ALLOWED_IMAGE_MIMES: + raise AppError("Unsupported image MIME type", status_code=415, code="unsupported_media") + + mime_by_suffix = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", + ".gif": "image/gif", + } + resolved_mime = mime_type if mime_type in ALLOWED_IMAGE_MIMES else mime_by_suffix[suffix] + return suffix, resolved_mime + + def _verify_image_integrity(self, storage_path: Path) -> None: + try: + with Image.open(storage_path) as img: + img.verify() + except (UnidentifiedImageError, OSError) as exc: + storage_path.unlink(missing_ok=True) + raise AppError("Invalid or corrupted image file", status_code=422, code="invalid_image") from exc diff --git a/backend/app/services/material_service.py b/backend/app/services/material_service.py new file mode 100644 index 0000000..7d309a5 --- /dev/null +++ b/backend/app/services/material_service.py @@ -0,0 +1,188 @@ +import uuid +from pathlib import Path + +from fastapi import UploadFile +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.core.config import Settings +from app.core.errors import AppError, NotFoundError +from app.core.security import clean_text +from app.models.exam import ExamMaterial, ExamTemplate, MaterialStatus +from app.schemas.material import ExamMaterialRead +from app.services.document_extractor import SUPPORTED_EXTENSIONS, DocumentExtractor +from app.services.storage_quota import StorageQuotaService + + +class MaterialService: + def __init__( + self, + db: Session, + settings: Settings, + storage_quota: StorageQuotaService | None = None, + ) -> None: + self.db = db + self.settings = settings + self.storage_quota = storage_quota or StorageQuotaService(db, settings) + self.extractor = DocumentExtractor() + self.upload_root = Path(settings.upload_dir) + self.upload_root.mkdir(parents=True, exist_ok=True) + + def upload( + self, + template: ExamTemplate, + upload_file: UploadFile, + ) -> ExamMaterialRead: + self._validate_upload(template.id, upload_file) + + suffix = Path(upload_file.filename or "file").suffix.lower() + if suffix not in SUPPORTED_EXTENSIONS: + raise AppError( + f"Unsupported extension. Allowed: {', '.join(sorted(SUPPORTED_EXTENSIONS))}", + status_code=415, + code="unsupported_media", + ) + + content = upload_file.file.read() + if len(content) > self.settings.max_upload_bytes: + raise AppError( + f"File exceeds maximum size of {self.settings.max_upload_bytes} bytes", + status_code=413, + code="file_too_large", + ) + if not content: + raise AppError("Uploaded file is empty", status_code=400, code="empty_file") + + self.storage_quota.ensure_template_has_space(template.id, len(content)) + + material_id = uuid.uuid4() + safe_name = f"{material_id}{suffix}" + target_dir = self.upload_root / str(template.user_id) / str(template.id) + target_dir.mkdir(parents=True, exist_ok=True) + storage_path = target_dir / safe_name + storage_path.write_bytes(content) + + mime_type = upload_file.content_type or SUPPORTED_EXTENSIONS[suffix] + material = ExamMaterial( + id=material_id, + template_id=template.id, + original_filename=clean_text(upload_file.filename or safe_name, max_length=255), + mime_type=mime_type, + size_bytes=len(content), + storage_path=str(storage_path), + status=MaterialStatus.PROCESSED, + ) + + try: + material.extracted_text = clean_text( + self.extractor.extract(storage_path, mime_type), + max_length=500_000, + ) + except AppError as exc: + material.status = MaterialStatus.FAILED + material.error_message = clean_text(exc.message, max_length=500) + except Exception as exc: + material.status = MaterialStatus.FAILED + material.error_message = clean_text(str(exc), max_length=500) + + self.db.add(material) + self.db.commit() + self.db.refresh(material) + return self._to_read(material) + + def list_materials(self, template_id: uuid.UUID) -> list[ExamMaterialRead]: + materials = self.db.scalars( + select(ExamMaterial) + .where(ExamMaterial.template_id == template_id) + .order_by(ExamMaterial.created_at.desc()) + ).all() + return [self._to_read(material) for material in materials] + + def delete_material(self, template: ExamTemplate, material_id: uuid.UUID) -> None: + material = self.db.get(ExamMaterial, material_id) + if material is None or material.template_id != template.id: + raise NotFoundError("Material not found") + + path = Path(material.storage_path) + if path.exists(): + path.unlink() + + self.db.delete(material) + self.db.commit() + + def build_reference_context( + self, + template_id: uuid.UUID, + material_ids: list[uuid.UUID] | None = None, + ) -> str: + query = select(ExamMaterial).where( + ExamMaterial.template_id == template_id, + ExamMaterial.status == MaterialStatus.PROCESSED, + ExamMaterial.extracted_text.isnot(None), + ) + if material_ids: + query = query.where(ExamMaterial.id.in_(material_ids)) + + materials = self.db.scalars(query.order_by(ExamMaterial.created_at.asc())).all() + if material_ids: + found_ids = {material.id for material in materials} + missing = [material_id for material_id in material_ids if material_id not in found_ids] + if missing: + raise NotFoundError("One or more material IDs were not found or are not processed") + + if not materials: + return "" + + sections: list[str] = [] + for material in materials: + text = material.extracted_text or "" + if not text.strip(): + continue + sections.append( + f"--- Archivo: {material.original_filename} ---\n{text.strip()}" + ) + + if not sections: + return "" + + combined = "\n\n".join(sections) + max_chars = self.settings.max_reference_chars + if len(combined) <= max_chars: + return combined + + truncated = combined[:max_chars].rsplit("\n", 1)[0] + return f"{truncated}\n\n[Material truncado por límite de contexto]" + + def _validate_upload(self, template_id: uuid.UUID, upload_file: UploadFile) -> None: + if not upload_file.filename: + raise AppError("Filename is required", status_code=400, code="invalid_file") + + count = self.db.scalar( + select(func.count()) + .select_from(ExamMaterial) + .where(ExamMaterial.template_id == template_id) + ) + if count is not None and count >= self.settings.max_materials_per_template: + raise AppError( + f"Maximum of {self.settings.max_materials_per_template} files per template reached", + status_code=409, + code="too_many_files", + ) + + def _to_read(self, material: ExamMaterial) -> ExamMaterialRead: + preview = None + if material.extracted_text: + preview = material.extracted_text[:300] + if len(material.extracted_text) > 300: + preview += "..." + return ExamMaterialRead( + id=material.id, + template_id=material.template_id, + original_filename=material.original_filename, + mime_type=material.mime_type, + size_bytes=material.size_bytes, + status=material.status, + error_message=material.error_message, + text_preview=preview, + created_at=material.created_at, + ) diff --git a/backend/app/services/moodle_exporter.py b/backend/app/services/moodle_exporter.py index 2048cd9..d15358f 100644 --- a/backend/app/services/moodle_exporter.py +++ b/backend/app/services/moodle_exporter.py @@ -1,15 +1,20 @@ +import base64 import json +from html import escape as html_escape +from pathlib import Path from typing import Any +from uuid import UUID from xml.sax.saxutils import escape as xml_escape from app.core.security import clean_text class MoodleXMLExporter: - def export_xml(self, questions: list[Any]) -> str: + def export_xml(self, questions: list[Any], image_map: dict[UUID, Any] | None = None) -> str: + images = image_map or {} parts = ['', ""] for index, question in enumerate(questions, start=1): - parts.append(self._export_question(question, index)) + parts.append(self._export_question(question, index, images)) parts.append("") return "\n".join(parts) @@ -17,6 +22,8 @@ class MoodleXMLExporter: blocks: list[str] = [] for question in questions: lines = [self._attr(question, "statement")] + if self._attr(question, "image_id"): + lines.append(f"[Imagen adjunta: {self._attr(question, 'image_id')}]") lines.extend(self._attr(question, "correct_answers") or []) lines.extend(self._attr(question, "wrong_answers") or []) blocks.append("\n".join(clean_text(str(line)) for line in lines)) @@ -26,19 +33,19 @@ class MoodleXMLExporter: payload = {"questions": [self._question_dict(question) for question in questions]} return json.dumps(payload, ensure_ascii=False, indent=2, default=str) - def _export_question(self, question: Any, index: int) -> str: + def _export_question(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: question_type = self._enum_value(self._attr(question, "question_type")) if question_type == "multichoice": - return self._multichoice(question, index) + return self._multichoice(question, index, image_map) if question_type == "truefalse": - return self._truefalse(question, index) + return self._truefalse(question, index, image_map) if question_type == "shortanswer": - return self._shortanswer(question, index) + return self._shortanswer(question, index, image_map) if question_type == "matching": - return self._matching(question, index) + return self._matching(question, index, image_map) raise ValueError(f"Unsupported Moodle question type: {question_type}") - def _multichoice(self, question: Any, index: int) -> str: + def _multichoice(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: correct_answers = self._attr(question, "correct_answers") or [] wrong_answers = self._attr(question, "wrong_answers") or [] options = self._attr(question, "options") or {} @@ -53,7 +60,7 @@ class MoodleXMLExporter: return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), f" {str(not multiple_correct).lower()}", " 1", *answers, @@ -61,32 +68,32 @@ class MoodleXMLExporter: ] ) - def _truefalse(self, question: Any, index: int) -> str: + def _truefalse(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: correct = (self._attr(question, "correct_answers") or ["true"])[0].lower() is_true = correct in {"true", "verdadero"} return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), self._answer_xml("true", 100 if is_true else 0), self._answer_xml("false", 0 if is_true else 100), " ", ] ) - def _shortanswer(self, question: Any, index: int) -> str: + def _shortanswer(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: answers = [self._answer_xml(answer, 100) for answer in self._attr(question, "correct_answers")] return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), " 0", *answers, " ", ] ) - def _matching(self, question: Any, index: int) -> str: + def _matching(self, question: Any, index: int, image_map: dict[UUID, Any]) -> str: subquestions = [] for pair in self._attr(question, "matching_pairs") or []: prompt = pair.get("prompt") if isinstance(pair, dict) else pair.prompt @@ -106,27 +113,63 @@ class MoodleXMLExporter: return "\n".join( [ ' ', - self._common_header(question, index), + *self._common_header(question, index, image_map), *subquestions, " ", ] ) - def _common_header(self, question: Any, index: int) -> str: + def _common_header(self, question: Any, index: int, image_map: dict[UUID, Any]) -> list[str]: statement = self._attr(question, "statement") name = clean_text(statement, max_length=80) or f"Pregunta {index}" - return "\n".join( - [ - " ", - f" {self._xml(name)}", - " ", - ' ', - f" {self._cdata(statement)}", - " ", - f" {float(self._attr(question, 'score') or 1.0):.2f}", - " ", - ] - ) + return [ + " ", + f" {self._xml(name)}", + " ", + ' ', + f" {self._question_html(question, image_map)}", + " ", + *self._embedded_files(question, image_map), + f" {float(self._attr(question, 'score') or 1.0):.2f}", + ' ', + ] + + def _question_html(self, question: Any, image_map: dict[UUID, Any]) -> str: + statement = html_escape(clean_text(str(self._attr(question, "statement")))) + html_parts = [f"

{statement}

"] + + image = self._resolve_image(question, image_map) + if image is not None: + alt = html_escape(clean_text(image.caption or image.original_filename, max_length=200)) + html_parts.append( + f'

{alt}

' + ) + + return self._cdata("".join(html_parts)) + + def _embedded_files(self, question: Any, image_map: dict[UUID, Any]) -> list[str]: + image = self._resolve_image(question, image_map) + if image is None: + return [] + + path = Path(image.storage_path) + if not path.exists(): + return [] + + encoded = base64.b64encode(path.read_bytes()).decode("ascii") + return [ + f' ', + encoded, + " ", + ] + + def _resolve_image(self, question: Any, image_map: dict[UUID, Any]) -> Any | None: + image_id = self._attr(question, "image_id") + if image_id is None: + return None + if hasattr(question, "image") and question.image is not None: + return question.image + return image_map.get(image_id) def _answer_xml(self, text: str, fraction: float) -> str: fraction_text = f"{fraction:.6g}" @@ -134,7 +177,7 @@ class MoodleXMLExporter: [ f' ', f" {self._xml(text)}", - " ", + ' ', " ", ] ) @@ -144,6 +187,7 @@ class MoodleXMLExporter: "id": str(self._attr(question, "id")) if self._attr(question, "id") else None, "question_type": self._enum_value(self._attr(question, "question_type")), "statement": self._attr(question, "statement"), + "image_id": str(self._attr(question, "image_id")) if self._attr(question, "image_id") else None, "correct_answers": self._attr(question, "correct_answers") or [], "wrong_answers": self._attr(question, "wrong_answers") or [], "matching_pairs": self._attr(question, "matching_pairs") or [], @@ -162,5 +206,5 @@ class MoodleXMLExporter: return xml_escape(clean_text(str(value)), {'"': """, "'": "'"}) def _cdata(self, value: Any) -> str: - text = clean_text(str(value)).replace("]]>", "]]]]>") + text = str(value).replace("]]>", "]]]]>") return f"" diff --git a/backend/app/services/parser.py b/backend/app/services/parser.py index f53dc22..74cb3c8 100644 --- a/backend/app/services/parser.py +++ b/backend/app/services/parser.py @@ -72,12 +72,14 @@ class AIQuestionParser: if isinstance(wrong, str): wrong = [wrong] + image_id = item.get("image_id") return { "question_type": question_type, "statement": item.get("statement", item.get("question", item.get("prompt", ""))), "correct_answers": correct, "wrong_answers": wrong, "matching_pairs": item.get("matching_pairs", []), + "image_id": image_id, "difficulty": item.get("difficulty", Difficulty.MEDIUM.value), "score": item.get("score", 1.0), "penalty": item.get("penalty", 0.0), diff --git a/backend/app/services/prompt_builder.py b/backend/app/services/prompt_builder.py index 78c7f6d..7f92647 100644 --- a/backend/app/services/prompt_builder.py +++ b/backend/app/services/prompt_builder.py @@ -5,7 +5,13 @@ from app.models.exam import ExamTemplate class PromptBuilder: - def build_prompt(self, template: ExamTemplate, topic_prompt: str) -> str: + def build_prompt( + self, + template: ExamTemplate, + topic_prompt: str, + reference_context: str = "", + images_catalog: str = "", + ) -> str: settings = template.settings difficulty_profile = template.difficulty_profile safe_topic = sanitize_prompt_input(topic_prompt) @@ -18,6 +24,7 @@ class PromptBuilder: "correct_answers": ["respuesta correcta"], "wrong_answers": ["distractor 1", "distractor 2"], "matching_pairs": [{"prompt": "concepto", "answer": "definicion"}], + "image_id": "uuid-opcional-de-imagen-de-la-plantilla", "difficulty": "easy | medium | hard | very_hard", "score": 1.0, "penalty": 0.0, @@ -41,6 +48,20 @@ class PromptBuilder: "Tema, conceptos y restricciones indicadas por el profesor:", safe_topic, "", + *( + [ + "Material de referencia (usa SOLO esta información junto con el tema para crear preguntas):", + sanitize_prompt_input(reference_context, max_length=12_000) if reference_context else "", + "", + ] + if reference_context.strip() + else [] + ), + *( + [images_catalog, ""] + if images_catalog.strip() + else [] + ), "Contrato de salida obligatorio:", json.dumps(contract, ensure_ascii=False, indent=2), "", @@ -51,5 +72,7 @@ class PromptBuilder: "- En truefalse, usa una única respuesta correcta: true o false.", "- En shortanswer, incluye respuestas exactas aceptadas.", "- En matching, rellena matching_pairs y deja wrong_answers vacío.", + "- Si la pregunta debe mostrar una imagen al alumno, incluye image_id del catálogo de imágenes.", + "- El enunciado debe describir qué observar en la imagen vinculada (sin inventar image_id inexistentes).", ] ) diff --git a/backend/app/services/storage_quota.py b/backend/app/services/storage_quota.py new file mode 100644 index 0000000..f064d5b --- /dev/null +++ b/backend/app/services/storage_quota.py @@ -0,0 +1,81 @@ +import uuid + +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.core.config import Settings +from app.core.errors import AppError +from app.models.exam import ExamImage, ExamMaterial + + +class StorageQuotaService: + def __init__(self, db: Session, settings: Settings) -> None: + self.db = db + self.settings = settings + + def get_template_usage_bytes(self, template_id: uuid.UUID) -> int: + materials_bytes = self.db.scalar( + select(func.coalesce(func.sum(ExamMaterial.size_bytes), 0)).where( + ExamMaterial.template_id == template_id + ) + ) + images_bytes = self.db.scalar( + select(func.coalesce(func.sum(ExamImage.size_bytes), 0)).where( + ExamImage.template_id == template_id + ) + ) + return int(materials_bytes or 0) + int(images_bytes or 0) + + def ensure_template_has_space(self, template_id: uuid.UUID, incoming_bytes: int) -> None: + if incoming_bytes <= 0: + return + + limit = self.settings.max_storage_bytes_per_template + used = self.get_template_usage_bytes(template_id) + projected = used + incoming_bytes + + if projected > limit: + raise AppError( + message=( + f"Template storage quota exceeded. " + f"Limit: {self._format_mb(limit)}, " + f"used: {self._format_mb(used)}, " + f"file: {self._format_mb(incoming_bytes)}" + ), + status_code=413, + code="template_storage_quota_exceeded", + ) + + def get_usage_summary(self, template_id: uuid.UUID) -> dict[str, int | float]: + materials_bytes = int( + self.db.scalar( + select(func.coalesce(func.sum(ExamMaterial.size_bytes), 0)).where( + ExamMaterial.template_id == template_id + ) + ) + or 0 + ) + images_bytes = int( + self.db.scalar( + select(func.coalesce(func.sum(ExamImage.size_bytes), 0)).where( + ExamImage.template_id == template_id + ) + ) + or 0 + ) + used = materials_bytes + images_bytes + limit = self.settings.max_storage_bytes_per_template + return { + "template_id": template_id, + "used_bytes": used, + "limit_bytes": limit, + "remaining_bytes": max(limit - used, 0), + "materials_bytes": materials_bytes, + "images_bytes": images_bytes, + "used_mb": round(used / (1024 * 1024), 2), + "limit_mb": round(limit / (1024 * 1024), 2), + } + + @staticmethod + def _format_mb(value_bytes: int) -> str: + return f"{value_bytes / (1024 * 1024):.2f} MB" diff --git a/backend/requirements.txt b/backend/requirements.txt index 3076625..f391448 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -11,4 +11,9 @@ passlib[bcrypt] python-jose[cryptography] google-auth requests +python-multipart +pypdf +python-docx +Pillow +pytesseract pytest diff --git a/docker-compose.yml b/docker-compose.yml index 1d61571..11f8de1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,8 @@ services: depends_on: db: condition: service_healthy + volumes: + - uploads_data:/app/uploads restart: unless-stopped frontend: @@ -40,3 +42,4 @@ services: volumes: postgres_data: + uploads_data: