Merge pull request #6 from uja-dev-practices/backend-v3

refactor: update researcher API endpoints to include batch search and…
This commit is contained in:
Mireya Cueto Garrido
2026-04-28 09:42:57 +02:00
committed by GitHub
3 changed files with 116 additions and 26 deletions
+92 -21
View File
@@ -1,12 +1,19 @@
from datetime import datetime from datetime import datetime
from typing import List from typing import List
import httpx
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.db.models import Publication, Researcher from app.db.models import Publication, Researcher
from app.db.session import get_db from app.db.session import get_db
from app.schema.researcher import ResearcherWithPublicationsSchema from app.schema.researcher import (
ResearcherBatchSearchRequestSchema,
ResearcherBatchSearchResponseSchema,
ResearcherSearchErrorSchema,
ResearcherStatsSchema,
ResearcherWithPublicationsSchema,
)
from app.services.normalizer import PublicationNormalizer from app.services.normalizer import PublicationNormalizer
from app.services.orcid_client import get_works_summary, get_work_detail from app.services.orcid_client import get_works_summary, get_work_detail
@@ -32,24 +39,24 @@ def publication_changed(existing: Publication, data: dict) -> bool:
return False return False
# --------------------------------------------------------- def build_researcher_stats(publications: List[Publication]) -> ResearcherStatsSchema:
# ENDPOINT 1: SEARCH + SYNC (sin contadores) publication_types: dict[str, int] = {}
# ---------------------------------------------------------
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
# Buscar o crear Researcher
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(
orcid_id=orcid_id,
name=None,
authenticated=False,
last_sync_at=None,
)
db.add(researcher)
db.flush()
# Obtener works summary desde ORCID for publication in publications:
pub_type = publication.type or "unknown"
publication_types[pub_type] = publication_types.get(pub_type, 0) + 1
return ResearcherStatsSchema(
total_publications=len(publications),
publication_types=publication_types,
)
def _upsert_researcher_publications(
researcher: Researcher,
orcid_id: str,
db: Session,
) -> List[Publication]:
works = get_works_summary(orcid_id) works = get_works_summary(orcid_id)
groups = works.get("group", []) groups = works.get("group", [])
@@ -65,16 +72,13 @@ def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
if put_code is None: if put_code is None:
continue continue
# Obtener detalle del work
try: try:
detail = get_work_detail(orcid_id, put_code) detail = get_work_detail(orcid_id, put_code)
except Exception: except Exception:
detail = None detail = None
# Normalizar datos
data = PublicationNormalizer.normalize(summary, detail) data = PublicationNormalizer.normalize(summary, detail)
# Ver si ya existe la publicación
existing = ( existing = (
db.query(Publication) db.query(Publication)
.filter( .filter(
@@ -111,9 +115,28 @@ def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
db.commit() db.commit()
db.refresh(researcher) db.refresh(researcher)
return publications
def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicationsSchema:
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
if not researcher:
researcher = Researcher(
orcid_id=orcid_id,
name=None,
authenticated=False,
last_sync_at=None,
)
db.add(researcher)
db.flush()
publications = _upsert_researcher_publications(researcher, orcid_id, db)
stats = build_researcher_stats(publications)
return ResearcherWithPublicationsSchema( return ResearcherWithPublicationsSchema(
researcher=researcher, researcher=researcher,
publications=publications, publications=publications,
stats=stats,
new_records=0, new_records=0,
updated_records=0, updated_records=0,
unchanged_records=0, unchanged_records=0,
@@ -121,6 +144,53 @@ def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
) )
# ---------------------------------------------------------
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
# ---------------------------------------------------------
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
return build_search_response(orcid_id, db)
@router.post("/search", response_model=ResearcherBatchSearchResponseSchema)
def search_and_sync_researchers(
payload: ResearcherBatchSearchRequestSchema,
db: Session = Depends(get_db),
):
results: List[ResearcherWithPublicationsSchema] = []
errors: List[ResearcherSearchErrorSchema] = []
# Evita llamadas duplicadas a ORCID conservando el orden de entrada.
unique_orcid_ids = list(dict.fromkeys(payload.orcid_ids))
for orcid_id in unique_orcid_ids:
try:
results.append(build_search_response(orcid_id, db))
except httpx.HTTPStatusError as exc:
db.rollback()
errors.append(
ResearcherSearchErrorSchema(
orcid_id=orcid_id,
detail=f"ORCID devolvió {exc.response.status_code} para {orcid_id}.",
)
)
except Exception as exc:
db.rollback()
errors.append(
ResearcherSearchErrorSchema(
orcid_id=orcid_id,
detail=str(exc),
)
)
return ResearcherBatchSearchResponseSchema(
results=results,
errors=errors,
total_requested=len(unique_orcid_ids),
total_processed=len(results),
)
# --------------------------------------------------------- # ---------------------------------------------------------
# ENDPOINT 2: SYNC COMPLETO (con contadores + status) # ENDPOINT 2: SYNC COMPLETO (con contadores + status)
# --------------------------------------------------------- # ---------------------------------------------------------
@@ -201,6 +271,7 @@ def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
return ResearcherWithPublicationsSchema( return ResearcherWithPublicationsSchema(
researcher=researcher, researcher=researcher,
publications=publications_output, publications=publications_output,
stats=build_researcher_stats(publications_output),
new_records=new_count, new_records=new_count,
updated_records=updated_count, updated_records=updated_count,
unchanged_records=unchanged_count, unchanged_records=unchanged_count,
+24 -3
View File
@@ -1,6 +1,6 @@
from pydantic import BaseModel from pydantic import BaseModel, Field
from uuid import UUID from uuid import UUID
from typing import Optional, List from typing import Optional, List, Dict
from datetime import datetime from datetime import datetime
from app.schema.publication import PublicationSchema from app.schema.publication import PublicationSchema
@@ -14,14 +14,35 @@ class ResearcherSchema(BaseModel):
model_config = {"from_attributes": True} model_config = {"from_attributes": True}
class ResearcherStatsSchema(BaseModel):
total_publications: int
publication_types: Dict[str, int]
class ResearcherWithPublicationsSchema(BaseModel): class ResearcherWithPublicationsSchema(BaseModel):
researcher: ResearcherSchema researcher: ResearcherSchema
publications: List[PublicationSchema] publications: List[PublicationSchema]
stats: ResearcherStatsSchema
# NUEVOS CAMPOS
new_records: int new_records: int
updated_records: int updated_records: int
unchanged_records: int unchanged_records: int
total_records: int total_records: int
model_config = {"from_attributes": True} model_config = {"from_attributes": True}
class ResearcherBatchSearchRequestSchema(BaseModel):
orcid_ids: List[str] = Field(min_length=1)
class ResearcherSearchErrorSchema(BaseModel):
orcid_id: str
detail: str
class ResearcherBatchSearchResponseSchema(BaseModel):
results: List[ResearcherWithPublicationsSchema]
errors: List[ResearcherSearchErrorSchema]
total_requested: int
total_processed: int
-2
View File
@@ -1,5 +1,3 @@
version: "3.9"
services: services:
backend: backend: