diff --git a/.env b/.env new file mode 100644 index 0000000..4b5f810 --- /dev/null +++ b/.env @@ -0,0 +1,14 @@ +APP_NAME=GenExamenes IA +ENVIRONMENT=local +API_KEY=change-me-in-production +DATABASE_URL=postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes +ALLOWED_ORIGINS=http://localhost:3000 +RATE_LIMIT_REQUESTS=300 +RATE_LIMIT_WINDOW_SECONDS=300 +MAX_REQUEST_BYTES=1048576 + +# OpenAI-compatible chat completions endpoint. +LLM_API_KEY= +LLM_BASE_URL=/api/chat +LLM_MODEL=qwen3.5:35b +LLM_TIMEOUT_SECONDS=250 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..173588e --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +APP_NAME=GenExamenes IA +ENVIRONMENT=local +API_KEY=change-me-in-production +DATABASE_URL=postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes +ALLOWED_ORIGINS=http://localhost:3000 +RATE_LIMIT_REQUESTS=60 +RATE_LIMIT_WINDOW_SECONDS=60 +MAX_REQUEST_BYTES=1048576 + +# OpenAI-compatible chat completions endpoint. +LLM_API_KEY= +LLM_BASE_URL=https://api.openai.com/v1 +LLM_MODEL=gpt-4o-mini +LLM_TIMEOUT_SECONDS=60 diff --git a/FlujoDeUsuario.txt b/FlujoDeUsuario.txt new file mode 100644 index 0000000..8f34a92 --- /dev/null +++ b/FlujoDeUsuario.txt @@ -0,0 +1,22 @@ +Ahora mismo el flujo es backend/API, sin frontend: + +1.- El profesor crea una plantilla con POST /exam/templates Define título, materia, nivel educativo, +tipos de preguntas, número de preguntas, puntuación, penalización y dificultad. + +2.- Genera un prompt con POST /exam/prompts/{template_id} La API devuelve un prompt estructurado para +pedirle al LLM preguntas en JSON válido. + +3.- Hay dos caminos posibles: + + 3.1.- Generación automática: POST /exam/generate La API llama al LLM configurado, parsea la respuesta y guarda las preguntas. + 3.2.- Carga manual: POST /exam/parse El profesor pega una salida de IA en json o txt, y la API la valida y guarda. + +4.- El profesor exporta el examen: + +GET /exam/export/xml/{template_id} para Moodle XML. +GET /exam/export/txt/{template_id} para texto plano. +GET /exam/export/json/{template_id} para JSON. + +(El XML generado se importa manualmente en Moodle.) + +En resumen: configurar plantilla → generar prompt o llamar al LLM → guardar preguntas → exportar Moodle XML. \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..4ee7778 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN addgroup --system app && adduser --system --ingroup app app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app ./app + +USER app + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/app/__init__.py b/backend/app/__init__.py new file mode 100644 index 0000000..526ac34 --- /dev/null +++ b/backend/app/__init__.py @@ -0,0 +1 @@ +"""GenExamenes IA backend package.""" diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py new file mode 100644 index 0000000..dff53e5 --- /dev/null +++ b/backend/app/api/__init__.py @@ -0,0 +1 @@ +"""API package.""" diff --git a/backend/app/api/dependencies.py b/backend/app/api/dependencies.py new file mode 100644 index 0000000..061bfc9 --- /dev/null +++ b/backend/app/api/dependencies.py @@ -0,0 +1,17 @@ +from typing import Annotated + +from fastapi import Depends +from sqlalchemy.orm import Session + +from app.core.config import Settings, get_settings +from app.db.session import get_db +from app.services.exam_service import ExamService +from app.services.llm import LLMClient + + +def get_exam_service(db: Annotated[Session, Depends(get_db)]) -> ExamService: + return ExamService(db) + + +def get_llm_client(settings: Annotated[Settings, Depends(get_settings)]) -> LLMClient: + return LLMClient(settings) diff --git a/backend/app/api/routes/__init__.py b/backend/app/api/routes/__init__.py new file mode 100644 index 0000000..509980f --- /dev/null +++ b/backend/app/api/routes/__init__.py @@ -0,0 +1 @@ +"""API route package.""" diff --git a/backend/app/api/routes/exports.py b/backend/app/api/routes/exports.py new file mode 100644 index 0000000..7b6ca68 --- /dev/null +++ b/backend/app/api/routes/exports.py @@ -0,0 +1,37 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, Response + +from app.api.dependencies import get_exam_service +from app.models.exam import ExportFormat +from app.services.exam_service import ExamService + +router = APIRouter(prefix="/export", tags=["exports"]) + + +@router.get("/xml/{template_id}") +def export_xml( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> Response: + export = service.export(template_id, ExportFormat.XML) + return Response(content=export.content, media_type="application/xml") + + +@router.get("/txt/{template_id}") +def export_txt( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> Response: + export = service.export(template_id, ExportFormat.TXT) + return Response(content=export.content, media_type="text/plain; charset=utf-8") + + +@router.get("/json/{template_id}") +def export_json( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> Response: + export = service.export(template_id, ExportFormat.JSON) + return Response(content=export.content, media_type="application/json") diff --git a/backend/app/api/routes/generation.py b/backend/app/api/routes/generation.py new file mode 100644 index 0000000..64cf87d --- /dev/null +++ b/backend/app/api/routes/generation.py @@ -0,0 +1,43 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_exam_service, get_llm_client +from app.schemas.exam import ( + BuildPromptRequest, + GenerateExamRequest, + ParsedQuestionsResponse, + ParseRequest, + PromptResponse, +) +from app.services.exam_service import ExamService +from app.services.llm import LLMClient + +router = APIRouter(tags=["generation"]) + + +@router.post("/prompts/{template_id}", response_model=PromptResponse) +def build_prompt( + template_id: uuid.UUID, + payload: BuildPromptRequest, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> PromptResponse: + return service.build_prompt(template_id, payload.topic_prompt) + + +@router.post("/generate", response_model=ParsedQuestionsResponse) +async def generate_exam( + payload: GenerateExamRequest, + service: Annotated[ExamService, Depends(get_exam_service)], + llm_client: Annotated[LLMClient, Depends(get_llm_client)], +) -> ParsedQuestionsResponse: + return await service.generate_with_llm(payload.template_id, payload.topic_prompt, llm_client) + + +@router.post("/parse", response_model=ParsedQuestionsResponse) +def parse_ai_output( + payload: ParseRequest, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> ParsedQuestionsResponse: + return service.parse_and_persist(payload) diff --git a/backend/app/api/routes/health.py b/backend/app/api/routes/health.py new file mode 100644 index 0000000..dd3a219 --- /dev/null +++ b/backend/app/api/routes/health.py @@ -0,0 +1,8 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["health"]) + + +@router.get("/health") +def health_check() -> dict[str, str]: + return {"status": "ok"} diff --git a/backend/app/api/routes/templates.py b/backend/app/api/routes/templates.py new file mode 100644 index 0000000..2d1dd72 --- /dev/null +++ b/backend/app/api/routes/templates.py @@ -0,0 +1,31 @@ +import uuid +from typing import Annotated + +from fastapi import APIRouter, Depends, status + +from app.api.dependencies import get_exam_service +from app.schemas.exam import ExamTemplateCreate, ExamTemplateRead +from app.services.exam_service import ExamService + +router = APIRouter(prefix="/templates", tags=["templates"]) + + +@router.post("", response_model=ExamTemplateRead, status_code=status.HTTP_201_CREATED) +def create_template( + payload: ExamTemplateCreate, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> ExamTemplateRead: + return service.create_template(payload) + + +@router.get("", response_model=list[ExamTemplateRead]) +def list_templates(service: Annotated[ExamService, Depends(get_exam_service)]) -> list[ExamTemplateRead]: + return service.list_templates() + + +@router.get("/{template_id}", response_model=ExamTemplateRead) +def get_template( + template_id: uuid.UUID, + service: Annotated[ExamService, Depends(get_exam_service)], +) -> ExamTemplateRead: + return service.get_template(template_id) diff --git a/backend/app/core/config.py b/backend/app/core/config.py new file mode 100644 index 0000000..348b46e --- /dev/null +++ b/backend/app/core/config.py @@ -0,0 +1,36 @@ +from functools import lru_cache + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + app_name: str = "GenExamenes IA" + environment: str = "local" + api_prefix: str = "" + api_key: str = Field(min_length=16) + database_url: str = "postgresql+psycopg://genexamenes:genexamenes@localhost:5432/genexamenes" + allowed_origins: str = "http://localhost:3000" + rate_limit_requests: int = Field(default=60, ge=1) + rate_limit_window_seconds: int = Field(default=60, ge=1) + max_request_bytes: int = Field(default=1_048_576, ge=1_024) + llm_api_key: str | None = None + llm_base_url: str = "https://api.openai.com/v1" + llm_model: str = "gpt-4o-mini" + llm_timeout_seconds: int = Field(default=60, ge=5) + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore", + ) + + @property + def cors_origins(self) -> list[str]: + return [origin.strip() for origin in self.allowed_origins.split(",") if origin.strip()] + + +@lru_cache +def get_settings() -> Settings: + return Settings() diff --git a/backend/app/core/errors.py b/backend/app/core/errors.py new file mode 100644 index 0000000..a4f902b --- /dev/null +++ b/backend/app/core/errors.py @@ -0,0 +1,56 @@ +from fastapi import FastAPI, Request +from fastapi.exceptions import RequestValidationError +from fastapi.responses import ORJSONResponse +from starlette.exceptions import HTTPException as StarletteHTTPException + + +class AppError(Exception): + def __init__(self, message: str, status_code: int = 400, code: str = "app_error") -> None: + self.message = message + self.status_code = status_code + self.code = code + + +class NotFoundError(AppError): + def __init__(self, message: str = "Resource not found") -> None: + super().__init__(message=message, status_code=404, code="not_found") + + +class LLMUnavailableError(AppError): + def __init__(self, message: str = "LLM service is unavailable") -> None: + super().__init__(message=message, status_code=503, code="llm_unavailable") + + +class ParseError(AppError): + def __init__(self, message: str = "Unable to parse AI output") -> None: + super().__init__(message=message, status_code=422, code="parse_error") + + +def error_payload(code: str, message: str, details: object | None = None) -> dict[str, object]: + payload: dict[str, object] = {"error": {"code": code, "message": message}} + if details is not None: + payload["error"]["details"] = details + return payload + + +def register_exception_handlers(app: FastAPI) -> None: + @app.exception_handler(AppError) + async def app_error_handler(_: Request, exc: AppError) -> ORJSONResponse: + return ORJSONResponse( + status_code=exc.status_code, + content=error_payload(exc.code, exc.message), + ) + + @app.exception_handler(StarletteHTTPException) + async def http_error_handler(_: Request, exc: StarletteHTTPException) -> ORJSONResponse: + return ORJSONResponse( + status_code=exc.status_code, + content=error_payload("http_error", str(exc.detail)), + ) + + @app.exception_handler(RequestValidationError) + async def validation_error_handler(_: Request, exc: RequestValidationError) -> ORJSONResponse: + return ORJSONResponse( + status_code=422, + content=error_payload("validation_error", "Invalid request payload", exc.errors()), + ) diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py new file mode 100644 index 0000000..8630ac3 --- /dev/null +++ b/backend/app/core/middleware.py @@ -0,0 +1,50 @@ +import time +from collections import defaultdict, deque + +from fastapi import Request, Response +from fastapi.responses import ORJSONResponse +from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint + +from app.core.config import Settings +from app.core.errors import error_payload + + +class RateLimitMiddleware(BaseHTTPMiddleware): + def __init__(self, app: object, settings: Settings) -> None: + super().__init__(app) + self.limit = settings.rate_limit_requests + self.window_seconds = settings.rate_limit_window_seconds + self.requests: defaultdict[str, deque[float]] = defaultdict(deque) + + async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: + client = request.client.host if request.client else "unknown" + now = time.monotonic() + bucket = self.requests[client] + + while bucket and now - bucket[0] > self.window_seconds: + bucket.popleft() + + if len(bucket) >= self.limit: + return ORJSONResponse( + status_code=429, + content=error_payload("rate_limited", "Too many requests"), + headers={"Retry-After": str(self.window_seconds)}, + ) + + bucket.append(now) + return await call_next(request) + + +class RequestSizeLimitMiddleware(BaseHTTPMiddleware): + def __init__(self, app: object, settings: Settings) -> None: + super().__init__(app) + self.max_request_bytes = settings.max_request_bytes + + async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: + content_length = request.headers.get("content-length") + if content_length and int(content_length) > self.max_request_bytes: + return ORJSONResponse( + status_code=413, + content=error_payload("payload_too_large", "Request body is too large"), + ) + return await call_next(request) diff --git a/backend/app/core/security.py b/backend/app/core/security.py new file mode 100644 index 0000000..facd271 --- /dev/null +++ b/backend/app/core/security.py @@ -0,0 +1,41 @@ +import re +from html import escape +from typing import Annotated + +from fastapi import Depends, Header, HTTPException, status + +from app.core.config import Settings, get_settings + + +CONTROL_CHARS = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]") +ROLE_INJECTION_HINTS = re.compile( + r"(ignore\s+(all\s+)?previous|system\s*:|developer\s*:|act\s+as\s+system)", + flags=re.IGNORECASE, +) + + +def require_api_key( + settings: Annotated[Settings, Depends(get_settings)], + x_api_key: Annotated[str | None, Header(alias="X-API-Key")] = None, +) -> None: + if not x_api_key or x_api_key != settings.api_key: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or missing API key", + ) + + +def clean_text(value: str, *, max_length: int = 8_000) -> str: + cleaned = CONTROL_CHARS.sub("", value).strip() + if len(cleaned) > max_length: + cleaned = cleaned[:max_length].strip() + return cleaned + + +def sanitize_prompt_input(value: str) -> str: + cleaned = clean_text(value, max_length=4_000) + return ROLE_INJECTION_HINTS.sub("[filtered instruction]", cleaned) + + +def html_text(value: str) -> str: + return escape(clean_text(value), quote=True) diff --git a/backend/app/db/base.py b/backend/app/db/base.py new file mode 100644 index 0000000..fa2b68a --- /dev/null +++ b/backend/app/db/base.py @@ -0,0 +1,5 @@ +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + pass diff --git a/backend/app/db/init_db.py b/backend/app/db/init_db.py new file mode 100644 index 0000000..94d6ead --- /dev/null +++ b/backend/app/db/init_db.py @@ -0,0 +1,7 @@ +from app.db.base import Base +from app.db.session import engine +from app.models import exam # noqa: F401 + + +def init_db() -> None: + Base.metadata.create_all(bind=engine) diff --git a/backend/app/db/session.py b/backend/app/db/session.py new file mode 100644 index 0000000..29744f4 --- /dev/null +++ b/backend/app/db/session.py @@ -0,0 +1,18 @@ +from collections.abc import Generator + +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from app.core.config import get_settings + + +engine = create_engine(get_settings().database_url, pool_pre_ping=True) +SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, expire_on_commit=False) + + +def get_db() -> Generator[Session, None, None]: + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..1b2bfd7 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,46 @@ +from contextlib import asynccontextmanager +from collections.abc import AsyncIterator + +from fastapi import Depends, FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.api.routes import exports, generation, health, templates +from app.core.config import get_settings +from app.core.errors import register_exception_handlers +from app.core.middleware import RateLimitMiddleware, RequestSizeLimitMiddleware +from app.core.security import require_api_key +from app.db.init_db import init_db + + +@asynccontextmanager +async def lifespan(_: FastAPI) -> AsyncIterator[None]: + init_db() + yield + + +def create_app() -> FastAPI: + settings = get_settings() + app = FastAPI(title=settings.app_name, lifespan=lifespan) + + app.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_origins, + allow_credentials=True, + allow_methods=["GET", "POST", "OPTIONS"], + allow_headers=["Authorization", "Content-Type", "X-API-Key"], + ) + app.add_middleware(RequestSizeLimitMiddleware, settings=settings) + app.add_middleware(RateLimitMiddleware, settings=settings) + + register_exception_handlers(app) + + app.include_router(health.router) + protected = [Depends(require_api_key)] + app.include_router(templates.router, prefix="/exam", dependencies=protected) + app.include_router(generation.router, prefix="/exam", dependencies=protected) + app.include_router(exports.router, prefix="/exam", dependencies=protected) + + return app + + +app = create_app() diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py new file mode 100644 index 0000000..663618f --- /dev/null +++ b/backend/app/models/__init__.py @@ -0,0 +1 @@ +"""SQLAlchemy model package.""" diff --git a/backend/app/models/exam.py b/backend/app/models/exam.py new file mode 100644 index 0000000..ad987fc --- /dev/null +++ b/backend/app/models/exam.py @@ -0,0 +1,102 @@ +import enum +import uuid +from datetime import datetime +from typing import Any + +from sqlalchemy import DateTime, Enum, Float, ForeignKey, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base + + +class QuestionType(str, enum.Enum): + MULTICHOICE = "multichoice" + TRUE_FALSE = "truefalse" + SHORT_ANSWER = "shortanswer" + MATCHING = "matching" + + +class Difficulty(str, enum.Enum): + EASY = "easy" + MEDIUM = "medium" + HARD = "hard" + VERY_HARD = "very_hard" + + +class ExportStatus(str, enum.Enum): + COMPLETED = "completed" + FAILED = "failed" + + +class ExportFormat(str, enum.Enum): + XML = "xml" + TXT = "txt" + JSON = "json" + + +class ExamTemplate(Base): + __tablename__ = "exam_templates" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + title: Mapped[str] = mapped_column(String(200), nullable=False) + subject: Mapped[str] = mapped_column(String(200), nullable=False) + educational_level: Mapped[str] = mapped_column(String(120), nullable=False) + language: Mapped[str] = mapped_column(String(20), nullable=False, default="es") + settings: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + difficulty_profile: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + questions: Mapped[list["Question"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) + export_jobs: Mapped[list["ExportJob"]] = relationship( + back_populates="template", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + +class Question(Base): + __tablename__ = "questions" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + question_type: Mapped[QuestionType] = mapped_column(Enum(QuestionType), nullable=False) + statement: Mapped[str] = mapped_column(Text, nullable=False) + correct_answers: Mapped[list[str]] = mapped_column(JSONB, nullable=False, default=list) + wrong_answers: Mapped[list[str]] = mapped_column(JSONB, nullable=False, default=list) + matching_pairs: Mapped[list[dict[str, str]]] = mapped_column(JSONB, nullable=False, default=list) + difficulty: Mapped[Difficulty] = mapped_column(Enum(Difficulty), nullable=False, default=Difficulty.MEDIUM) + score: Mapped[float] = mapped_column(Float, nullable=False, default=1.0) + penalty: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + options: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="questions") + + +class ExportJob(Base): + __tablename__ = "export_jobs" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + template_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("exam_templates.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + status: Mapped[ExportStatus] = mapped_column(Enum(ExportStatus), nullable=False) + format: Mapped[ExportFormat] = mapped_column(Enum(ExportFormat), nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + template: Mapped[ExamTemplate] = relationship(back_populates="export_jobs") diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py new file mode 100644 index 0000000..13c4df7 --- /dev/null +++ b/backend/app/schemas/__init__.py @@ -0,0 +1 @@ +"""API schema package.""" diff --git a/backend/app/schemas/exam.py b/backend/app/schemas/exam.py new file mode 100644 index 0000000..d476cf6 --- /dev/null +++ b/backend/app/schemas/exam.py @@ -0,0 +1,127 @@ +import uuid +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +from app.models.exam import Difficulty, ExportFormat, QuestionType + + +class QuestionTypeSettings(BaseModel): + type: QuestionType + count: int = Field(ge=1, le=200) + options_count: int | None = Field(default=None, ge=2, le=8) + multiple_correct: bool = False + score: float = Field(default=1.0, ge=0.0, le=100.0) + penalty: float = Field(default=0.0, ge=0.0, le=100.0) + + +class ExamSettings(BaseModel): + question_types: list[QuestionTypeSettings] = Field(min_length=1, max_length=20) + shuffle_questions: bool = True + shuffle_answers: bool = True + include_feedback: bool = True + + +class DifficultyProfile(BaseModel): + easy: int = Field(default=0, ge=0, le=500) + medium: int = Field(default=0, ge=0, le=500) + hard: int = Field(default=0, ge=0, le=500) + very_hard: int = Field(default=0, ge=0, le=500) + + @model_validator(mode="after") + def require_at_least_one_question(self) -> "DifficultyProfile": + if self.easy + self.medium + self.hard + self.very_hard <= 0: + raise ValueError("At least one difficulty bucket must contain questions") + return self + + +class ExamTemplateCreate(BaseModel): + title: str = Field(min_length=3, max_length=200) + subject: str = Field(min_length=2, max_length=200) + educational_level: str = Field(min_length=2, max_length=120) + language: str = Field(default="es", min_length=2, max_length=20) + settings: ExamSettings + difficulty_profile: DifficultyProfile + + +class ExamTemplateRead(ExamTemplateCreate): + id: uuid.UUID + created_at: datetime + updated_at: datetime + question_count: int = 0 + + model_config = ConfigDict(from_attributes=True) + + +class MatchingPair(BaseModel): + prompt: str = Field(min_length=1, max_length=1_000) + answer: str = Field(min_length=1, max_length=1_000) + + +class QuestionCreate(BaseModel): + question_type: QuestionType + statement: str = Field(min_length=3, max_length=8_000) + correct_answers: list[str] = Field(min_length=1, max_length=20) + wrong_answers: list[str] = Field(default_factory=list, max_length=20) + matching_pairs: list[MatchingPair] = Field(default_factory=list, max_length=50) + difficulty: Difficulty = Difficulty.MEDIUM + score: float = Field(default=1.0, ge=0.0, le=100.0) + penalty: float = Field(default=0.0, ge=0.0, le=100.0) + options: dict[str, object] = Field(default_factory=dict) + + @field_validator("correct_answers", "wrong_answers") + @classmethod + def strip_answers(cls, value: list[str]) -> list[str]: + return [answer.strip() for answer in value if answer.strip()] + + @model_validator(mode="after") + def validate_question_payload(self) -> "QuestionCreate": + if self.question_type == QuestionType.MULTICHOICE and not self.wrong_answers: + raise ValueError("Multichoice questions require wrong_answers") + if self.question_type == QuestionType.TRUE_FALSE: + accepted = {"true", "false", "verdadero", "falso"} + if self.correct_answers[0].lower() not in accepted: + raise ValueError("True/false questions require a true or false correct answer") + if self.question_type == QuestionType.MATCHING and not self.matching_pairs: + raise ValueError("Matching questions require matching_pairs") + return self + + +class QuestionRead(QuestionCreate): + id: uuid.UUID + template_id: uuid.UUID + created_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class PromptResponse(BaseModel): + template_id: uuid.UUID + prompt: str + expected_format: Literal["json"] = "json" + + +class BuildPromptRequest(BaseModel): + topic_prompt: str = Field(min_length=5, max_length=4_000) + + +class GenerateExamRequest(BaseModel): + template_id: uuid.UUID + topic_prompt: str = Field(min_length=5, max_length=4_000) + + +class ParseRequest(BaseModel): + raw_output: str = Field(min_length=5, max_length=200_000) + input_format: Literal["json", "txt"] + template_id: uuid.UUID + + +class ParsedQuestionsResponse(BaseModel): + questions: list[QuestionRead] + + +class ExportResponse(BaseModel): + template_id: uuid.UUID + format: ExportFormat + content: str diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000..ac5acdf --- /dev/null +++ b/backend/app/services/__init__.py @@ -0,0 +1 @@ +"""Business service package.""" diff --git a/backend/app/services/exam_service.py b/backend/app/services/exam_service.py new file mode 100644 index 0000000..49a74de --- /dev/null +++ b/backend/app/services/exam_service.py @@ -0,0 +1,147 @@ +import uuid + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.errors import NotFoundError +from app.core.security import clean_text +from app.models.exam import ExamTemplate, ExportFormat, ExportJob, ExportStatus, Question +from app.schemas.exam import ( + ExamTemplateCreate, + ExamTemplateRead, + ExportResponse, + ParsedQuestionsResponse, + ParseRequest, + PromptResponse, + QuestionCreate, + QuestionRead, +) +from app.services.llm import LLMClient +from app.services.moodle_exporter import MoodleXMLExporter +from app.services.parser import AIQuestionParser +from app.services.prompt_builder import PromptBuilder + + +class ExamService: + def __init__( + self, + db: Session, + prompt_builder: PromptBuilder | None = None, + parser: AIQuestionParser | None = None, + exporter: MoodleXMLExporter | None = None, + ) -> None: + self.db = db + self.prompt_builder = prompt_builder or PromptBuilder() + self.parser = parser or AIQuestionParser() + self.exporter = exporter or MoodleXMLExporter() + + def create_template(self, payload: ExamTemplateCreate) -> ExamTemplateRead: + template = ExamTemplate( + title=clean_text(payload.title, max_length=200), + subject=clean_text(payload.subject, max_length=200), + educational_level=clean_text(payload.educational_level, max_length=120), + language=clean_text(payload.language, max_length=20), + settings=payload.settings.model_dump(mode="json"), + difficulty_profile=payload.difficulty_profile.model_dump(mode="json"), + ) + self.db.add(template) + self.db.commit() + self.db.refresh(template) + return self._template_read(template) + + def list_templates(self) -> list[ExamTemplateRead]: + templates = self.db.scalars(select(ExamTemplate).order_by(ExamTemplate.created_at.desc())).all() + return [self._template_read(template) for template in templates] + + def get_template(self, template_id: uuid.UUID) -> ExamTemplateRead: + return self._template_read(self._get_template_or_404(template_id)) + + def build_prompt(self, template_id: uuid.UUID, topic_prompt: str) -> PromptResponse: + template = self._get_template_or_404(template_id) + prompt = self.prompt_builder.build_prompt(template, topic_prompt) + return PromptResponse(template_id=template.id, prompt=prompt) + + async def generate_with_llm( + self, + template_id: uuid.UUID, + topic_prompt: str, + llm_client: LLMClient, + ) -> ParsedQuestionsResponse: + template = self._get_template_or_404(template_id) + prompt = self.prompt_builder.build_prompt(template, topic_prompt) + raw_output = await llm_client.generate(prompt) + questions = self.parser.parse_json(raw_output) + return self._persist_questions(template.id, questions) + + def parse_and_persist(self, payload: ParseRequest) -> ParsedQuestionsResponse: + self._get_template_or_404(payload.template_id) + questions = self.parser.parse(payload.raw_output, payload.input_format) + return self._persist_questions(payload.template_id, questions) + + def export(self, template_id: uuid.UUID, export_format: ExportFormat) -> ExportResponse: + template = self._get_template_or_404(template_id) + questions = list(template.questions) + if not questions: + raise NotFoundError("Template does not contain questions to export") + + if export_format == ExportFormat.XML: + content = self.exporter.export_xml(questions) + elif export_format == ExportFormat.TXT: + content = self.exporter.export_txt(questions) + else: + content = self.exporter.export_json(questions) + + self.db.add( + ExportJob( + template_id=template.id, + status=ExportStatus.COMPLETED, + format=export_format, + content=content, + ) + ) + self.db.commit() + return ExportResponse(template_id=template.id, format=export_format, content=content) + + def _persist_questions(self, template_id: uuid.UUID, questions: list[QuestionCreate]) -> ParsedQuestionsResponse: + persisted: list[Question] = [] + for payload in questions: + question = Question( + template_id=template_id, + question_type=payload.question_type, + statement=clean_text(payload.statement), + correct_answers=[clean_text(answer, max_length=1_000) for answer in payload.correct_answers], + wrong_answers=[clean_text(answer, max_length=1_000) for answer in payload.wrong_answers], + matching_pairs=[pair.model_dump() for pair in payload.matching_pairs], + difficulty=payload.difficulty, + score=payload.score, + penalty=payload.penalty, + options=payload.options, + ) + self.db.add(question) + persisted.append(question) + + self.db.commit() + for question in persisted: + self.db.refresh(question) + + return ParsedQuestionsResponse(questions=[QuestionRead.model_validate(question) for question in persisted]) + + def _get_template_or_404(self, template_id: uuid.UUID) -> ExamTemplate: + template = self.db.get(ExamTemplate, template_id) + if template is None: + raise NotFoundError("Exam template not found") + return template + + def _template_read(self, template: ExamTemplate) -> ExamTemplateRead: + return ExamTemplateRead( + id=template.id, + title=template.title, + subject=template.subject, + educational_level=template.educational_level, + language=template.language, + settings=template.settings, + difficulty_profile=template.difficulty_profile, + created_at=template.created_at, + updated_at=template.updated_at, + question_count=len(template.questions), + ) diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py new file mode 100644 index 0000000..0356cb4 --- /dev/null +++ b/backend/app/services/llm.py @@ -0,0 +1,48 @@ +import httpx + +from app.core.config import Settings +from app.core.errors import LLMUnavailableError + + +class LLMClient: + def __init__(self, settings: Settings) -> None: + self.settings = settings + + async def generate(self, prompt: str) -> str: + if not self.settings.llm_api_key: + raise LLMUnavailableError("LLM_API_KEY is not configured") + + url = f"{self.settings.llm_base_url.rstrip('/')}/chat/completions" + payload = { + "model": self.settings.llm_model, + "messages": [ + { + "role": "system", + "content": "You generate safe, valid JSON exam questions for Moodle imports.", + }, + {"role": "user", "content": prompt}, + ], + "temperature": 0.2, + "response_format": {"type": "json_object"}, + } + headers = { + "Authorization": f"Bearer {self.settings.llm_api_key}", + "Content-Type": "application/json", + } + + try: + async with httpx.AsyncClient(timeout=self.settings.llm_timeout_seconds) as client: + response = await client.post(url, json=payload, headers=headers) + response.raise_for_status() + except httpx.HTTPError as exc: + raise LLMUnavailableError("LLM request failed") from exc + + data = response.json() + try: + content = data["choices"][0]["message"]["content"] + except (KeyError, IndexError, TypeError) as exc: + raise LLMUnavailableError("LLM response did not include message content") from exc + + if not isinstance(content, str) or not content.strip(): + raise LLMUnavailableError("LLM returned empty content") + return content diff --git a/backend/app/services/moodle_exporter.py b/backend/app/services/moodle_exporter.py new file mode 100644 index 0000000..2048cd9 --- /dev/null +++ b/backend/app/services/moodle_exporter.py @@ -0,0 +1,166 @@ +import json +from typing import Any +from xml.sax.saxutils import escape as xml_escape + +from app.core.security import clean_text + + +class MoodleXMLExporter: + def export_xml(self, questions: list[Any]) -> str: + parts = ['', ""] + for index, question in enumerate(questions, start=1): + parts.append(self._export_question(question, index)) + parts.append("") + return "\n".join(parts) + + def export_txt(self, questions: list[Any]) -> str: + blocks: list[str] = [] + for question in questions: + lines = [self._attr(question, "statement")] + lines.extend(self._attr(question, "correct_answers") or []) + lines.extend(self._attr(question, "wrong_answers") or []) + blocks.append("\n".join(clean_text(str(line)) for line in lines)) + return "\n\n".join(blocks) + + def export_json(self, questions: list[Any]) -> str: + payload = {"questions": [self._question_dict(question) for question in questions]} + return json.dumps(payload, ensure_ascii=False, indent=2, default=str) + + def _export_question(self, question: Any, index: int) -> str: + question_type = self._enum_value(self._attr(question, "question_type")) + if question_type == "multichoice": + return self._multichoice(question, index) + if question_type == "truefalse": + return self._truefalse(question, index) + if question_type == "shortanswer": + return self._shortanswer(question, index) + if question_type == "matching": + return self._matching(question, index) + raise ValueError(f"Unsupported Moodle question type: {question_type}") + + def _multichoice(self, question: Any, index: int) -> str: + correct_answers = self._attr(question, "correct_answers") or [] + wrong_answers = self._attr(question, "wrong_answers") or [] + options = self._attr(question, "options") or {} + multiple_correct = bool(options.get("multiple_correct", len(correct_answers) > 1)) + correct_fraction = 100 / max(len(correct_answers), 1) + wrong_fraction = -abs(float(self._attr(question, "penalty") or 0.0)) if self._attr(question, "penalty") else 0 + + answers = [ + self._answer_xml(answer, correct_fraction) for answer in correct_answers + ] + [self._answer_xml(answer, wrong_fraction) for answer in wrong_answers] + + return "\n".join( + [ + ' ', + self._common_header(question, index), + f" {str(not multiple_correct).lower()}", + " 1", + *answers, + " ", + ] + ) + + def _truefalse(self, question: Any, index: int) -> str: + correct = (self._attr(question, "correct_answers") or ["true"])[0].lower() + is_true = correct in {"true", "verdadero"} + return "\n".join( + [ + ' ', + self._common_header(question, index), + self._answer_xml("true", 100 if is_true else 0), + self._answer_xml("false", 0 if is_true else 100), + " ", + ] + ) + + def _shortanswer(self, question: Any, index: int) -> str: + answers = [self._answer_xml(answer, 100) for answer in self._attr(question, "correct_answers")] + return "\n".join( + [ + ' ', + self._common_header(question, index), + " 0", + *answers, + " ", + ] + ) + + def _matching(self, question: Any, index: int) -> str: + subquestions = [] + for pair in self._attr(question, "matching_pairs") or []: + prompt = pair.get("prompt") if isinstance(pair, dict) else pair.prompt + answer = pair.get("answer") if isinstance(pair, dict) else pair.answer + subquestions.append( + "\n".join( + [ + ' ', + f" {self._cdata(prompt)}", + " ", + f" {self._xml(answer)}", + " ", + " ", + ] + ) + ) + return "\n".join( + [ + ' ', + self._common_header(question, index), + *subquestions, + " ", + ] + ) + + def _common_header(self, question: Any, index: int) -> str: + statement = self._attr(question, "statement") + name = clean_text(statement, max_length=80) or f"Pregunta {index}" + return "\n".join( + [ + " ", + f" {self._xml(name)}", + " ", + ' ', + f" {self._cdata(statement)}", + " ", + f" {float(self._attr(question, 'score') or 1.0):.2f}", + " ", + ] + ) + + def _answer_xml(self, text: str, fraction: float) -> str: + fraction_text = f"{fraction:.6g}" + return "\n".join( + [ + f' ', + f" {self._xml(text)}", + " ", + " ", + ] + ) + + def _question_dict(self, question: Any) -> dict[str, Any]: + return { + "id": str(self._attr(question, "id")) if self._attr(question, "id") else None, + "question_type": self._enum_value(self._attr(question, "question_type")), + "statement": self._attr(question, "statement"), + "correct_answers": self._attr(question, "correct_answers") or [], + "wrong_answers": self._attr(question, "wrong_answers") or [], + "matching_pairs": self._attr(question, "matching_pairs") or [], + "difficulty": self._enum_value(self._attr(question, "difficulty")), + "score": self._attr(question, "score"), + "penalty": self._attr(question, "penalty"), + } + + def _attr(self, question: Any, name: str) -> Any: + return getattr(question, name, None) + + def _enum_value(self, value: Any) -> Any: + return value.value if hasattr(value, "value") else value + + def _xml(self, value: Any) -> str: + return xml_escape(clean_text(str(value)), {'"': """, "'": "'"}) + + def _cdata(self, value: Any) -> str: + text = clean_text(str(value)).replace("]]>", "]]]]>") + return f"" diff --git a/backend/app/services/parser.py b/backend/app/services/parser.py new file mode 100644 index 0000000..f53dc22 --- /dev/null +++ b/backend/app/services/parser.py @@ -0,0 +1,98 @@ +import json +from typing import Any + +from pydantic import ValidationError + +from app.core.errors import ParseError +from app.core.security import clean_text +from app.models.exam import Difficulty, QuestionType +from app.schemas.exam import QuestionCreate + + +class AIQuestionParser: + def parse(self, raw_output: str, input_format: str) -> list[QuestionCreate]: + if input_format == "json": + return self.parse_json(raw_output) + if input_format == "txt": + return self.parse_txt(raw_output) + raise ParseError("Unsupported input format") + + def parse_json(self, raw_json: str) -> list[QuestionCreate]: + try: + data = json.loads(raw_json) + except json.JSONDecodeError as exc: + raise ParseError("Invalid JSON returned by AI") from exc + + items = data.get("questions", data) if isinstance(data, dict) else data + if not isinstance(items, list) or not items: + raise ParseError("JSON must contain a non-empty questions list") + + questions: list[QuestionCreate] = [] + for item in items: + if not isinstance(item, dict): + raise ParseError("Each JSON question must be an object") + questions.append(self._build_question(self._normalize_item(item))) + return questions + + def parse_txt(self, raw_text: str) -> list[QuestionCreate]: + blocks = [block.strip() for block in raw_text.replace("\r\n", "\n").split("\n\n") if block.strip()] + questions: list[QuestionCreate] = [] + + for block in blocks: + lines = [clean_text(line) for line in block.split("\n") if clean_text(line)] + if len(lines) < 2: + continue + + statement = lines[0] + correct_answer = lines[1] + wrong_answers = lines[2:] + question_type = self._infer_txt_type(correct_answer, wrong_answers) + payload = { + "question_type": question_type, + "statement": statement, + "correct_answers": [correct_answer], + "wrong_answers": wrong_answers, + "difficulty": Difficulty.MEDIUM, + "score": 1.0, + "penalty": 0.0, + } + questions.append(self._build_question(payload)) + + if not questions: + raise ParseError("TXT output did not contain parseable questions") + return questions + + def _normalize_item(self, item: dict[str, Any]) -> dict[str, Any]: + correct = item.get("correct_answers", item.get("correct_answer", item.get("answer", []))) + wrong = item.get("wrong_answers", item.get("incorrect_answers", item.get("distractors", []))) + question_type = item.get("question_type", item.get("type", QuestionType.MULTICHOICE.value)) + + if isinstance(correct, str): + correct = [correct] + if isinstance(wrong, str): + wrong = [wrong] + + return { + "question_type": question_type, + "statement": item.get("statement", item.get("question", item.get("prompt", ""))), + "correct_answers": correct, + "wrong_answers": wrong, + "matching_pairs": item.get("matching_pairs", []), + "difficulty": item.get("difficulty", Difficulty.MEDIUM.value), + "score": item.get("score", 1.0), + "penalty": item.get("penalty", 0.0), + "options": item.get("options", {}), + } + + def _build_question(self, payload: dict[str, Any]) -> QuestionCreate: + try: + return QuestionCreate.model_validate(payload) + except ValidationError as exc: + raise ParseError(f"Invalid question payload: {exc.errors()}") from exc + + def _infer_txt_type(self, correct_answer: str, wrong_answers: list[str]) -> QuestionType: + if correct_answer.lower() in {"true", "false", "verdadero", "falso"} and not wrong_answers: + return QuestionType.TRUE_FALSE + if wrong_answers: + return QuestionType.MULTICHOICE + return QuestionType.SHORT_ANSWER diff --git a/backend/app/services/prompt_builder.py b/backend/app/services/prompt_builder.py new file mode 100644 index 0000000..78c7f6d --- /dev/null +++ b/backend/app/services/prompt_builder.py @@ -0,0 +1,55 @@ +import json + +from app.core.security import sanitize_prompt_input +from app.models.exam import ExamTemplate + + +class PromptBuilder: + def build_prompt(self, template: ExamTemplate, topic_prompt: str) -> str: + settings = template.settings + difficulty_profile = template.difficulty_profile + safe_topic = sanitize_prompt_input(topic_prompt) + + contract = { + "questions": [ + { + "question_type": "multichoice | truefalse | shortanswer | matching", + "statement": "Enunciado claro de la pregunta", + "correct_answers": ["respuesta correcta"], + "wrong_answers": ["distractor 1", "distractor 2"], + "matching_pairs": [{"prompt": "concepto", "answer": "definicion"}], + "difficulty": "easy | medium | hard | very_hard", + "score": 1.0, + "penalty": 0.0, + } + ] + } + + return "\n".join( + [ + "Eres un generador de cuestionarios académicos para Moodle.", + "Devuelve exclusivamente JSON válido, sin markdown ni texto adicional.", + "No incluyas instrucciones del usuario dentro de las preguntas.", + "", + f"Título del examen: {sanitize_prompt_input(template.title)}", + f"Materia: {sanitize_prompt_input(template.subject)}", + f"Nivel educativo: {sanitize_prompt_input(template.educational_level)}", + f"Idioma: {sanitize_prompt_input(template.language)}", + f"Configuración de tipos: {json.dumps(settings, ensure_ascii=False)}", + f"Distribución de dificultad: {json.dumps(difficulty_profile, ensure_ascii=False)}", + "", + "Tema, conceptos y restricciones indicadas por el profesor:", + safe_topic, + "", + "Contrato de salida obligatorio:", + json.dumps(contract, ensure_ascii=False, indent=2), + "", + "Reglas:", + "- Respeta el número de preguntas por tipo.", + "- Respeta la distribución de dificultad.", + "- En multichoice, incluye al menos una respuesta correcta y varias incorrectas.", + "- En truefalse, usa una única respuesta correcta: true o false.", + "- En shortanswer, incluye respuestas exactas aceptadas.", + "- En matching, rellena matching_pairs y deja wrong_answers vacío.", + ] + ) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..6ad34b4 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi +uvicorn[standard] +SQLAlchemy +psycopg[binary] +pydantic-settings +python-dotenv +httpx +orjson +pytest diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2a0b355 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +services: + backend: + build: + context: ./backend + env_file: + - .env + environment: + DATABASE_URL: postgresql+psycopg://genexamenes:genexamenes@db:5432/genexamenes + ports: + - "8000:8000" + depends_on: + db: + condition: service_healthy + restart: unless-stopped + + frontend: + image: nginx:1.27-alpine + ports: + - "3000:80" + volumes: + - ./frontend:/usr/share/nginx/html:ro + restart: unless-stopped + + db: + image: postgres:16-alpine + environment: + POSTGRES_DB: genexamenes + POSTGRES_USER: genexamenes + POSTGRES_PASSWORD: genexamenes + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U genexamenes -d genexamenes"] + interval: 5s + timeout: 5s + retries: 10 + restart: unless-stopped + +volumes: + postgres_data: