refactor: update researcher API endpoints to include batch search and statistics functionality
- Removed deprecated search logic and replaced it with a new structure for handling researcher statistics. - Introduced new schemas for batch search requests and responses. - Enhanced the search endpoint to return publication statistics alongside researcher data. - Updated docker-compose file to remove unnecessary versioning.
This commit is contained in:
@@ -1,12 +1,19 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import httpx
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.db.models import Publication, Researcher
|
from app.db.models import Publication, Researcher
|
||||||
from app.db.session import get_db
|
from app.db.session import get_db
|
||||||
from app.schema.researcher import ResearcherWithPublicationsSchema
|
from app.schema.researcher import (
|
||||||
|
ResearcherBatchSearchRequestSchema,
|
||||||
|
ResearcherBatchSearchResponseSchema,
|
||||||
|
ResearcherSearchErrorSchema,
|
||||||
|
ResearcherStatsSchema,
|
||||||
|
ResearcherWithPublicationsSchema,
|
||||||
|
)
|
||||||
from app.services.normalizer import PublicationNormalizer
|
from app.services.normalizer import PublicationNormalizer
|
||||||
from app.services.orcid_client import get_works_summary, get_work_detail
|
from app.services.orcid_client import get_works_summary, get_work_detail
|
||||||
|
|
||||||
@@ -32,24 +39,24 @@ def publication_changed(existing: Publication, data: dict) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
def build_researcher_stats(publications: List[Publication]) -> ResearcherStatsSchema:
|
||||||
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
|
publication_types: dict[str, int] = {}
|
||||||
# ---------------------------------------------------------
|
|
||||||
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
|
|
||||||
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|
||||||
# Buscar o crear Researcher
|
|
||||||
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
|
|
||||||
if not researcher:
|
|
||||||
researcher = Researcher(
|
|
||||||
orcid_id=orcid_id,
|
|
||||||
name=None,
|
|
||||||
authenticated=False,
|
|
||||||
last_sync_at=None,
|
|
||||||
)
|
|
||||||
db.add(researcher)
|
|
||||||
db.flush()
|
|
||||||
|
|
||||||
# Obtener works summary desde ORCID
|
for publication in publications:
|
||||||
|
pub_type = publication.type or "unknown"
|
||||||
|
publication_types[pub_type] = publication_types.get(pub_type, 0) + 1
|
||||||
|
|
||||||
|
return ResearcherStatsSchema(
|
||||||
|
total_publications=len(publications),
|
||||||
|
publication_types=publication_types,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _upsert_researcher_publications(
|
||||||
|
researcher: Researcher,
|
||||||
|
orcid_id: str,
|
||||||
|
db: Session,
|
||||||
|
) -> List[Publication]:
|
||||||
works = get_works_summary(orcid_id)
|
works = get_works_summary(orcid_id)
|
||||||
groups = works.get("group", [])
|
groups = works.get("group", [])
|
||||||
|
|
||||||
@@ -65,16 +72,13 @@ def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|||||||
if put_code is None:
|
if put_code is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Obtener detalle del work
|
|
||||||
try:
|
try:
|
||||||
detail = get_work_detail(orcid_id, put_code)
|
detail = get_work_detail(orcid_id, put_code)
|
||||||
except Exception:
|
except Exception:
|
||||||
detail = None
|
detail = None
|
||||||
|
|
||||||
# Normalizar datos
|
|
||||||
data = PublicationNormalizer.normalize(summary, detail)
|
data = PublicationNormalizer.normalize(summary, detail)
|
||||||
|
|
||||||
# Ver si ya existe la publicación
|
|
||||||
existing = (
|
existing = (
|
||||||
db.query(Publication)
|
db.query(Publication)
|
||||||
.filter(
|
.filter(
|
||||||
@@ -111,9 +115,28 @@ def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|||||||
db.commit()
|
db.commit()
|
||||||
db.refresh(researcher)
|
db.refresh(researcher)
|
||||||
|
|
||||||
|
return publications
|
||||||
|
|
||||||
|
|
||||||
|
def build_search_response(orcid_id: str, db: Session) -> ResearcherWithPublicationsSchema:
|
||||||
|
researcher = db.query(Researcher).filter(Researcher.orcid_id == orcid_id).first()
|
||||||
|
if not researcher:
|
||||||
|
researcher = Researcher(
|
||||||
|
orcid_id=orcid_id,
|
||||||
|
name=None,
|
||||||
|
authenticated=False,
|
||||||
|
last_sync_at=None,
|
||||||
|
)
|
||||||
|
db.add(researcher)
|
||||||
|
db.flush()
|
||||||
|
|
||||||
|
publications = _upsert_researcher_publications(researcher, orcid_id, db)
|
||||||
|
stats = build_researcher_stats(publications)
|
||||||
|
|
||||||
return ResearcherWithPublicationsSchema(
|
return ResearcherWithPublicationsSchema(
|
||||||
researcher=researcher,
|
researcher=researcher,
|
||||||
publications=publications,
|
publications=publications,
|
||||||
|
stats=stats,
|
||||||
new_records=0,
|
new_records=0,
|
||||||
updated_records=0,
|
updated_records=0,
|
||||||
unchanged_records=0,
|
unchanged_records=0,
|
||||||
@@ -121,6 +144,53 @@ def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# ENDPOINT 1: SEARCH + SYNC (sin contadores)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@router.get("/search/{orcid_id}", response_model=ResearcherWithPublicationsSchema)
|
||||||
|
def search_and_sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
||||||
|
return build_search_response(orcid_id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/search", response_model=ResearcherBatchSearchResponseSchema)
|
||||||
|
def search_and_sync_researchers(
|
||||||
|
payload: ResearcherBatchSearchRequestSchema,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
results: List[ResearcherWithPublicationsSchema] = []
|
||||||
|
errors: List[ResearcherSearchErrorSchema] = []
|
||||||
|
|
||||||
|
# Evita llamadas duplicadas a ORCID conservando el orden de entrada.
|
||||||
|
unique_orcid_ids = list(dict.fromkeys(payload.orcid_ids))
|
||||||
|
|
||||||
|
for orcid_id in unique_orcid_ids:
|
||||||
|
try:
|
||||||
|
results.append(build_search_response(orcid_id, db))
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
db.rollback()
|
||||||
|
errors.append(
|
||||||
|
ResearcherSearchErrorSchema(
|
||||||
|
orcid_id=orcid_id,
|
||||||
|
detail=f"ORCID devolvió {exc.response.status_code} para {orcid_id}.",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
db.rollback()
|
||||||
|
errors.append(
|
||||||
|
ResearcherSearchErrorSchema(
|
||||||
|
orcid_id=orcid_id,
|
||||||
|
detail=str(exc),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return ResearcherBatchSearchResponseSchema(
|
||||||
|
results=results,
|
||||||
|
errors=errors,
|
||||||
|
total_requested=len(unique_orcid_ids),
|
||||||
|
total_processed=len(results),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
|
# ENDPOINT 2: SYNC COMPLETO (con contadores + status)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
@@ -201,6 +271,7 @@ def sync_researcher(orcid_id: str, db: Session = Depends(get_db)):
|
|||||||
return ResearcherWithPublicationsSchema(
|
return ResearcherWithPublicationsSchema(
|
||||||
researcher=researcher,
|
researcher=researcher,
|
||||||
publications=publications_output,
|
publications=publications_output,
|
||||||
|
stats=build_researcher_stats(publications_output),
|
||||||
new_records=new_count,
|
new_records=new_count,
|
||||||
updated_records=updated_count,
|
updated_records=updated_count,
|
||||||
unchanged_records=unchanged_count,
|
unchanged_records=unchanged_count,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, Field
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Optional, List
|
from typing import Optional, List, Dict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from app.schema.publication import PublicationSchema
|
from app.schema.publication import PublicationSchema
|
||||||
|
|
||||||
@@ -14,14 +14,35 @@ class ResearcherSchema(BaseModel):
|
|||||||
model_config = {"from_attributes": True}
|
model_config = {"from_attributes": True}
|
||||||
|
|
||||||
|
|
||||||
|
class ResearcherStatsSchema(BaseModel):
|
||||||
|
total_publications: int
|
||||||
|
publication_types: Dict[str, int]
|
||||||
|
|
||||||
|
|
||||||
class ResearcherWithPublicationsSchema(BaseModel):
|
class ResearcherWithPublicationsSchema(BaseModel):
|
||||||
researcher: ResearcherSchema
|
researcher: ResearcherSchema
|
||||||
publications: List[PublicationSchema]
|
publications: List[PublicationSchema]
|
||||||
|
stats: ResearcherStatsSchema
|
||||||
|
|
||||||
# NUEVOS CAMPOS
|
|
||||||
new_records: int
|
new_records: int
|
||||||
updated_records: int
|
updated_records: int
|
||||||
unchanged_records: int
|
unchanged_records: int
|
||||||
total_records: int
|
total_records: int
|
||||||
|
|
||||||
model_config = {"from_attributes": True}
|
model_config = {"from_attributes": True}
|
||||||
|
|
||||||
|
|
||||||
|
class ResearcherBatchSearchRequestSchema(BaseModel):
|
||||||
|
orcid_ids: List[str] = Field(min_length=1)
|
||||||
|
|
||||||
|
|
||||||
|
class ResearcherSearchErrorSchema(BaseModel):
|
||||||
|
orcid_id: str
|
||||||
|
detail: str
|
||||||
|
|
||||||
|
|
||||||
|
class ResearcherBatchSearchResponseSchema(BaseModel):
|
||||||
|
results: List[ResearcherWithPublicationsSchema]
|
||||||
|
errors: List[ResearcherSearchErrorSchema]
|
||||||
|
total_requested: int
|
||||||
|
total_processed: int
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
version: "3.9"
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
|
|||||||
Reference in New Issue
Block a user