ORCID2SWORD/backend/app/schema/researcher.py

from datetime import datetime
from typing import Dict, List, Optional
from uuid import UUID

from pydantic import BaseModel, Field, field_validator

from app.core.config import settings
from app.schema.publication import PublicationSchema
from app.utils.orcid_validator import ORCID_PATTERN, is_valid_orcid


class ResearcherSchema(BaseModel):
    id: UUID
    orcid_id: str = Field(min_length=19, max_length=19, pattern=ORCID_PATTERN)
    name: Optional[str] = Field(default=None, max_length=255)
    authenticated: bool
    last_sync_at: Optional[datetime]

    model_config = {"from_attributes": True}


class ResearcherStatsSchema(BaseModel):
    total_publications: int
    publication_types: Dict[str, int]


class ResearcherWithPublicationsSchema(BaseModel):
    researcher: ResearcherSchema
    publications: List[PublicationSchema]
    stats: ResearcherStatsSchema

    new_records: int
    updated_records: int
    unchanged_records: int
    total_records: int

    model_config = {"from_attributes": True}


class ResearcherBatchSearchRequestSchema(BaseModel):
    orcid_ids: List[str] = Field(
        min_length=1,
        max_length=settings.MAX_ORCID_BATCH,
    )
    # Si es true, se consulta /work/{put_code} hasta ORCID_WORK_DETAIL_ENRICH_MAX veces
    # por investigador (contribuidores, citación, etc.); el resto solo usa summary.
    enrich_work_details: bool = False

    @field_validator("orcid_ids")
    @classmethod
    def _validate_each(cls, value: List[str]) -> List[str]:
        deduped: List[str] = []
        seen = set()
        for v in value:
            if not isinstance(v, str):
                raise ValueError("ORCID iD debe ser string")
            if not is_valid_orcid(v):
                raise ValueError(f"ORCID iD inválido: {v}")
            if v not in seen:
                seen.add(v)
                deduped.append(v)
        return deduped


class ResearcherSearchErrorSchema(BaseModel):
    orcid_id: str
    detail: str


class ResearcherBatchSearchResponseSchema(BaseModel):
    results: List[ResearcherWithPublicationsSchema]
    errors: List[ResearcherSearchErrorSchema]
    total_requested: int
    total_processed: int