Spaces:
Build error
fix(sprint-f0): fondations — lazy imports, schémas conformes, pydantic-settings
Browse filesSprint F0 — corrections fondamentales qui débloquent tout le reste :
- Lazy imports des providers IA : l'import chain google-genai/mistralai
n'est plus chargée au démarrage. `from app.main import app` fonctionne
même si les SDK tiers ont des problèmes de dépendances.
Fichiers : ai/__init__.py, model_registry.py, jobs.py, models_api.py,
job_runner.py, corpus_runner.py, provider_vertex_key.py
- Schémas PageMaster conformes à CLAUDE.md §4.2 :
- ImageInfo(BaseModel) remplace image: dict (6 champs typés)
- Summary(BaseModel) remplace summary: dict (short + detailed)
- ProcessingInfo.provider: str ajouté (traçabilité du provider)
Fichiers : page_master.py, analyzer.py, exports (alto/iiif/mets)
- config.py migré vers pydantic-settings BaseSettings (CLAUDE.md §2/§7)
avec pydantic-settings ajouté dans pyproject.toml
- Defaults datetime sur CorpusModel et JobModel (plus de crash insert)
- 28 fichiers de tests mis à jour (mocks adaptés aux lazy imports,
fixtures image/ProcessingInfo corrigées)
Résultat : 460 tests passants, 0 échecs, 3 skipped.
https://claude.ai/code/session_015Lht7wNQRzhUaLw94dE9z9
- backend/app/api/v1/jobs.py +6 -2
- backend/app/api/v1/models_api.py +6 -5
- backend/app/config.py +10 -24
- backend/app/models/corpus.py +7 -2
- backend/app/models/job.py +5 -2
- backend/app/schemas/page_master.py +24 -5
- backend/app/services/ai/__init__.py +23 -11
- backend/app/services/ai/analyzer.py +9 -8
- backend/app/services/ai/model_registry.py +9 -4
- backend/app/services/ai/provider_vertex_key.py +0 -3
- backend/app/services/corpus_runner.py +2 -1
- backend/app/services/export/alto.py +3 -7
- backend/app/services/export/iiif.py +3 -3
- backend/app/services/export/mets.py +2 -2
- backend/app/services/job_runner.py +4 -2
- backend/pyproject.toml +1 -0
- backend/tests/conftest_api.py +5 -4
- backend/tests/test_ai_analyzer.py +5 -4
- backend/tests/test_api_corrections.py +1 -1
- backend/tests/test_api_export.py +1 -1
- backend/tests/test_api_models.py +4 -4
- backend/tests/test_api_pages.py +1 -1
- backend/tests/test_api_providers.py +11 -11
- backend/tests/test_api_search.py +1 -1
- backend/tests/test_export_alto.py +2 -1
- backend/tests/test_export_iiif.py +4 -4
- backend/tests/test_export_mets.py +2 -1
- backend/tests/test_job_runner.py +28 -16
|
@@ -22,8 +22,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
| 22 |
from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
|
| 23 |
from app.models.database import get_db
|
| 24 |
from app.models.job import JobModel
|
| 25 |
-
from app.services.corpus_runner import execute_corpus_job
|
| 26 |
-
from app.services.job_runner import execute_page_job
|
| 27 |
|
| 28 |
router = APIRouter(tags=["jobs"])
|
| 29 |
|
|
@@ -101,6 +99,8 @@ async def run_corpus(
|
|
| 101 |
await db.commit()
|
| 102 |
|
| 103 |
# Lancer le pipeline en arrière-plan (après envoi de la réponse)
|
|
|
|
|
|
|
| 104 |
background_tasks.add_task(execute_corpus_job, corpus_id)
|
| 105 |
|
| 106 |
return CorpusRunResponse(
|
|
@@ -135,6 +135,8 @@ async def run_page(
|
|
| 135 |
await db.refresh(job)
|
| 136 |
|
| 137 |
# Lancer le pipeline en arrière-plan (après envoi de la réponse)
|
|
|
|
|
|
|
| 138 |
background_tasks.add_task(execute_page_job, job.id)
|
| 139 |
|
| 140 |
return job
|
|
@@ -175,6 +177,8 @@ async def retry_job(
|
|
| 175 |
await db.refresh(job)
|
| 176 |
|
| 177 |
# Relancer le pipeline
|
|
|
|
|
|
|
| 178 |
background_tasks.add_task(execute_page_job, job.id)
|
| 179 |
|
| 180 |
return job
|
|
|
|
| 22 |
from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
|
| 23 |
from app.models.database import get_db
|
| 24 |
from app.models.job import JobModel
|
|
|
|
|
|
|
| 25 |
|
| 26 |
router = APIRouter(tags=["jobs"])
|
| 27 |
|
|
|
|
| 99 |
await db.commit()
|
| 100 |
|
| 101 |
# Lancer le pipeline en arrière-plan (après envoi de la réponse)
|
| 102 |
+
from app.services.corpus_runner import execute_corpus_job
|
| 103 |
+
|
| 104 |
background_tasks.add_task(execute_corpus_job, corpus_id)
|
| 105 |
|
| 106 |
return CorpusRunResponse(
|
|
|
|
| 135 |
await db.refresh(job)
|
| 136 |
|
| 137 |
# Lancer le pipeline en arrière-plan (après envoi de la réponse)
|
| 138 |
+
from app.services.job_runner import execute_page_job
|
| 139 |
+
|
| 140 |
background_tasks.add_task(execute_page_job, job.id)
|
| 141 |
|
| 142 |
return job
|
|
|
|
| 177 |
await db.refresh(job)
|
| 178 |
|
| 179 |
# Relancer le pipeline
|
| 180 |
+
from app.services.job_runner import execute_page_job
|
| 181 |
+
|
| 182 |
background_tasks.add_task(execute_page_job, job.id)
|
| 183 |
|
| 184 |
return job
|
|
@@ -25,11 +25,6 @@ from app.models.corpus import CorpusModel
|
|
| 25 |
from app.models.database import get_db
|
| 26 |
from app.models.model_config_db import ModelConfigDB
|
| 27 |
from app.schemas.model_config import ProviderType
|
| 28 |
-
from app.services.ai.model_registry import (
|
| 29 |
-
get_available_providers,
|
| 30 |
-
list_all_models,
|
| 31 |
-
list_models_for_provider,
|
| 32 |
-
)
|
| 33 |
|
| 34 |
logger = logging.getLogger(__name__)
|
| 35 |
|
|
@@ -77,6 +72,8 @@ async def list_providers() -> list[dict]:
|
|
| 77 |
Un provider est disponible si la variable d'environnement correspondante
|
| 78 |
est présente dans les secrets HuggingFace. Aucune clé n'est exposée.
|
| 79 |
"""
|
|
|
|
|
|
|
| 80 |
return get_available_providers()
|
| 81 |
|
| 82 |
|
|
@@ -91,6 +88,8 @@ async def get_provider_models(provider_type: str) -> list[dict]:
|
|
| 91 |
detail=f"Provider inconnu : {provider_type}. "
|
| 92 |
f"Valeurs acceptées : {[p.value for p in ProviderType]}",
|
| 93 |
)
|
|
|
|
|
|
|
| 94 |
try:
|
| 95 |
models = list_models_for_provider(ptype)
|
| 96 |
except RuntimeError as exc:
|
|
@@ -104,6 +103,8 @@ async def get_provider_models(provider_type: str) -> list[dict]:
|
|
| 104 |
@router.post("/models/refresh", response_model=ModelsRefreshResponse)
|
| 105 |
async def refresh_models() -> ModelsRefreshResponse:
|
| 106 |
"""Force la mise à jour de la liste agrégée de tous les modèles disponibles."""
|
|
|
|
|
|
|
| 107 |
models = list_all_models()
|
| 108 |
return ModelsRefreshResponse(
|
| 109 |
models=[m.model_dump() for m in models],
|
|
|
|
| 25 |
from app.models.database import get_db
|
| 26 |
from app.models.model_config_db import ModelConfigDB
|
| 27 |
from app.schemas.model_config import ProviderType
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
logger = logging.getLogger(__name__)
|
| 30 |
|
|
|
|
| 72 |
Un provider est disponible si la variable d'environnement correspondante
|
| 73 |
est présente dans les secrets HuggingFace. Aucune clé n'est exposée.
|
| 74 |
"""
|
| 75 |
+
from app.services.ai.model_registry import get_available_providers
|
| 76 |
+
|
| 77 |
return get_available_providers()
|
| 78 |
|
| 79 |
|
|
|
|
| 88 |
detail=f"Provider inconnu : {provider_type}. "
|
| 89 |
f"Valeurs acceptées : {[p.value for p in ProviderType]}",
|
| 90 |
)
|
| 91 |
+
from app.services.ai.model_registry import list_models_for_provider
|
| 92 |
+
|
| 93 |
try:
|
| 94 |
models = list_models_for_provider(ptype)
|
| 95 |
except RuntimeError as exc:
|
|
|
|
| 103 |
@router.post("/models/refresh", response_model=ModelsRefreshResponse)
|
| 104 |
async def refresh_models() -> ModelsRefreshResponse:
|
| 105 |
"""Force la mise à jour de la liste agrégée de tous les modèles disponibles."""
|
| 106 |
+
from app.services.ai.model_registry import list_all_models
|
| 107 |
+
|
| 108 |
models = list_all_models()
|
| 109 |
return ModelsRefreshResponse(
|
| 110 |
models=[m.model_dump() for m in models],
|
|
@@ -1,17 +1,17 @@
|
|
| 1 |
"""
|
| 2 |
Configuration globale de la plateforme, chargée depuis les variables d'environnement.
|
| 3 |
|
| 4 |
-
|
| 5 |
-
- les valeurs sont lues depuis os.environ au moment de l'instanciation
|
| 6 |
- l'objet `settings` est importé partout dans l'application
|
| 7 |
- dans les tests : monkeypatch.setattr(config, "settings", ...) pour surcharger
|
| 8 |
"""
|
| 9 |
# 1. stdlib
|
| 10 |
-
import os
|
| 11 |
from pathlib import Path
|
| 12 |
|
| 13 |
# 2. third-party
|
| 14 |
-
from pydantic import
|
|
|
|
| 15 |
|
| 16 |
# Racine du dépôt — résolue depuis l'emplacement absolu de ce fichier.
|
| 17 |
# config.py se trouve dans backend/app/ ; 3 parents remontent à la racine.
|
|
@@ -19,14 +19,17 @@ from pydantic import BaseModel, ConfigDict
|
|
| 19 |
_REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
| 20 |
|
| 21 |
|
| 22 |
-
class Settings(
|
| 23 |
"""Paramètres d'application lus depuis les variables d'environnement.
|
| 24 |
|
| 25 |
Toutes les clés API sont optionnelles (None si non configurées).
|
| 26 |
Elles ne sont jamais loguées ni exportées (R06).
|
| 27 |
"""
|
| 28 |
|
| 29 |
-
model_config = ConfigDict(
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# ── Serveur ──────────────────────────────────────────────────────────────
|
| 32 |
base_url: str = "http://localhost:8000"
|
|
@@ -50,21 +53,4 @@ class Settings(BaseModel):
|
|
| 50 |
mistral_api_key: str | None = None
|
| 51 |
|
| 52 |
|
| 53 |
-
|
| 54 |
-
"""Lit les variables d'environnement et construit l'objet Settings."""
|
| 55 |
-
return Settings(
|
| 56 |
-
base_url=os.getenv("BASE_URL", "http://localhost:8000"),
|
| 57 |
-
data_dir=Path(os.getenv("DATA_DIR", "data")),
|
| 58 |
-
profiles_dir=Path(os.getenv("PROFILES_DIR", str(_REPO_ROOT / "profiles"))),
|
| 59 |
-
prompts_dir=Path(os.getenv("PROMPTS_DIR", str(_REPO_ROOT / "prompts"))),
|
| 60 |
-
database_url=os.getenv(
|
| 61 |
-
"DATABASE_URL", "sqlite+aiosqlite:///./scriptorium.db"
|
| 62 |
-
),
|
| 63 |
-
google_ai_studio_api_key=os.getenv("GOOGLE_AI_STUDIO_API_KEY"),
|
| 64 |
-
vertex_api_key=os.getenv("VERTEX_API_KEY"),
|
| 65 |
-
vertex_service_account_json=os.getenv("VERTEX_SERVICE_ACCOUNT_JSON"),
|
| 66 |
-
mistral_api_key=os.getenv("MISTRAL_API_KEY"),
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
settings: Settings = _load_settings()
|
|
|
|
| 1 |
"""
|
| 2 |
Configuration globale de la plateforme, chargée depuis les variables d'environnement.
|
| 3 |
|
| 4 |
+
Utilise pydantic-settings (CLAUDE.md §2, §7) :
|
| 5 |
+
- les valeurs sont lues depuis os.environ / fichier .env au moment de l'instanciation
|
| 6 |
- l'objet `settings` est importé partout dans l'application
|
| 7 |
- dans les tests : monkeypatch.setattr(config, "settings", ...) pour surcharger
|
| 8 |
"""
|
| 9 |
# 1. stdlib
|
|
|
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
# 2. third-party
|
| 13 |
+
from pydantic import ConfigDict
|
| 14 |
+
from pydantic_settings import BaseSettings
|
| 15 |
|
| 16 |
# Racine du dépôt — résolue depuis l'emplacement absolu de ce fichier.
|
| 17 |
# config.py se trouve dans backend/app/ ; 3 parents remontent à la racine.
|
|
|
|
| 19 |
_REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
| 20 |
|
| 21 |
|
| 22 |
+
class Settings(BaseSettings):
|
| 23 |
"""Paramètres d'application lus depuis les variables d'environnement.
|
| 24 |
|
| 25 |
Toutes les clés API sont optionnelles (None si non configurées).
|
| 26 |
Elles ne sont jamais loguées ni exportées (R06).
|
| 27 |
"""
|
| 28 |
|
| 29 |
+
model_config = ConfigDict(
|
| 30 |
+
env_file=".env",
|
| 31 |
+
extra="ignore",
|
| 32 |
+
)
|
| 33 |
|
| 34 |
# ── Serveur ──────────────────────────────────────────────────────────────
|
| 35 |
base_url: str = "http://localhost:8000"
|
|
|
|
| 53 |
mistral_api_key: str | None = None
|
| 54 |
|
| 55 |
|
| 56 |
+
settings: Settings = Settings()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -6,6 +6,7 @@ Ils NE se substituent PAS aux schémas Pydantic (source canonique des types).
|
|
| 6 |
"""
|
| 7 |
# 1. stdlib
|
| 8 |
from datetime import datetime, timezone
|
|
|
|
| 9 |
|
| 10 |
# 2. third-party
|
| 11 |
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
|
|
@@ -24,8 +25,12 @@ class CorpusModel(Base):
|
|
| 24 |
slug: Mapped[str] = mapped_column(String, unique=True, nullable=False, index=True)
|
| 25 |
title: Mapped[str] = mapped_column(String, nullable=False)
|
| 26 |
profile_id: Mapped[str] = mapped_column(String, nullable=False)
|
| 27 |
-
created_at: Mapped[datetime] = mapped_column(
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
manuscripts: Mapped[list["ManuscriptModel"]] = relationship(
|
| 31 |
back_populates="corpus", cascade="all, delete-orphan"
|
|
|
|
| 6 |
"""
|
| 7 |
# 1. stdlib
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
+
from functools import partial
|
| 10 |
|
| 11 |
# 2. third-party
|
| 12 |
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
|
|
|
|
| 25 |
slug: Mapped[str] = mapped_column(String, unique=True, nullable=False, index=True)
|
| 26 |
title: Mapped[str] = mapped_column(String, nullable=False)
|
| 27 |
profile_id: Mapped[str] = mapped_column(String, nullable=False)
|
| 28 |
+
created_at: Mapped[datetime] = mapped_column(
|
| 29 |
+
DateTime, nullable=False, default=partial(datetime.now, tz=timezone.utc)
|
| 30 |
+
)
|
| 31 |
+
updated_at: Mapped[datetime] = mapped_column(
|
| 32 |
+
DateTime, nullable=False, default=partial(datetime.now, tz=timezone.utc)
|
| 33 |
+
)
|
| 34 |
|
| 35 |
manuscripts: Mapped[list["ManuscriptModel"]] = relationship(
|
| 36 |
back_populates="corpus", cascade="all, delete-orphan"
|
|
@@ -10,7 +10,8 @@ Cycle de vie :
|
|
| 10 |
↘ failed
|
| 11 |
"""
|
| 12 |
# 1. stdlib
|
| 13 |
-
from datetime import datetime
|
|
|
|
| 14 |
|
| 15 |
# 2. third-party
|
| 16 |
from sqlalchemy import DateTime, ForeignKey, String, Text
|
|
@@ -37,4 +38,6 @@ class JobModel(Base):
|
|
| 37 |
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 38 |
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 39 |
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 40 |
-
created_at: Mapped[datetime] = mapped_column(
|
|
|
|
|
|
|
|
|
| 10 |
↘ failed
|
| 11 |
"""
|
| 12 |
# 1. stdlib
|
| 13 |
+
from datetime import datetime, timezone
|
| 14 |
+
from functools import partial
|
| 15 |
|
| 16 |
# 2. third-party
|
| 17 |
from sqlalchemy import DateTime, ForeignKey, String, Text
|
|
|
|
| 38 |
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 39 |
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 40 |
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 41 |
+
created_at: Mapped[datetime] = mapped_column(
|
| 42 |
+
DateTime, nullable=False, default=partial(datetime.now, tz=timezone.utc)
|
| 43 |
+
)
|
|
@@ -29,14 +29,25 @@ class Region(BaseModel):
|
|
| 29 |
|
| 30 |
@field_validator("bbox")
|
| 31 |
@classmethod
|
| 32 |
-
def
|
| 33 |
if any(x < 0 for x in v):
|
| 34 |
-
raise ValueError("bbox
|
| 35 |
if v[2] <= 0 or v[3] <= 0:
|
| 36 |
-
raise ValueError("bbox width
|
| 37 |
return v
|
| 38 |
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
class OCRResult(BaseModel):
|
| 41 |
diplomatic_text: str = ""
|
| 42 |
blocks: list[dict] = []
|
|
@@ -51,6 +62,13 @@ class Translation(BaseModel):
|
|
| 51 |
en: str = ""
|
| 52 |
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
class CommentaryClaim(BaseModel):
|
| 55 |
claim: str
|
| 56 |
evidence_region_ids: list[str] = []
|
|
@@ -64,6 +82,7 @@ class Commentary(BaseModel):
|
|
| 64 |
|
| 65 |
|
| 66 |
class ProcessingInfo(BaseModel):
|
|
|
|
| 67 |
model_id: str
|
| 68 |
model_display_name: str
|
| 69 |
prompt_version: str
|
|
@@ -96,11 +115,11 @@ class PageMaster(BaseModel):
|
|
| 96 |
folio_label: str
|
| 97 |
sequence: int
|
| 98 |
|
| 99 |
-
image:
|
| 100 |
layout: dict
|
| 101 |
ocr: OCRResult | None = None
|
| 102 |
translation: Translation | None = None
|
| 103 |
-
summary:
|
| 104 |
commentary: Commentary | None = None
|
| 105 |
extensions: dict[str, Any] = {}
|
| 106 |
|
|
|
|
| 29 |
|
| 30 |
@field_validator("bbox")
|
| 31 |
@classmethod
|
| 32 |
+
def bbox_must_be_valid(cls, v: list[int]) -> list[int]:
|
| 33 |
if any(x < 0 for x in v):
|
| 34 |
+
raise ValueError("bbox: toutes les valeurs doivent être >= 0")
|
| 35 |
if v[2] <= 0 or v[3] <= 0:
|
| 36 |
+
raise ValueError("bbox: width et height doivent être > 0")
|
| 37 |
return v
|
| 38 |
|
| 39 |
|
| 40 |
+
class ImageInfo(BaseModel):
|
| 41 |
+
"""Métadonnées image — CLAUDE.md §4.2."""
|
| 42 |
+
|
| 43 |
+
master: str
|
| 44 |
+
derivative_web: str | None = None
|
| 45 |
+
thumbnail: str | None = None
|
| 46 |
+
iiif_base: str | None = None
|
| 47 |
+
width: int
|
| 48 |
+
height: int
|
| 49 |
+
|
| 50 |
+
|
| 51 |
class OCRResult(BaseModel):
|
| 52 |
diplomatic_text: str = ""
|
| 53 |
blocks: list[dict] = []
|
|
|
|
| 62 |
en: str = ""
|
| 63 |
|
| 64 |
|
| 65 |
+
class Summary(BaseModel):
|
| 66 |
+
"""Résumé — CLAUDE.md §4.2."""
|
| 67 |
+
|
| 68 |
+
short: str = ""
|
| 69 |
+
detailed: str = ""
|
| 70 |
+
|
| 71 |
+
|
| 72 |
class CommentaryClaim(BaseModel):
|
| 73 |
claim: str
|
| 74 |
evidence_region_ids: list[str] = []
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
class ProcessingInfo(BaseModel):
|
| 85 |
+
provider: str
|
| 86 |
model_id: str
|
| 87 |
model_display_name: str
|
| 88 |
prompt_version: str
|
|
|
|
| 115 |
folio_label: str
|
| 116 |
sequence: int
|
| 117 |
|
| 118 |
+
image: ImageInfo
|
| 119 |
layout: dict
|
| 120 |
ocr: OCRResult | None = None
|
| 121 |
translation: Translation | None = None
|
| 122 |
+
summary: Summary | None = None
|
| 123 |
commentary: Commentary | None = None
|
| 124 |
extensions: dict[str, Any] = {}
|
| 125 |
|
|
@@ -1,19 +1,31 @@
|
|
| 1 |
"""
|
| 2 |
Services AI — providers Google AI, registre de modèles, et analyse IA.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
__all__ = [
|
| 14 |
-
"GoogleAIProvider",
|
| 15 |
-
"VertexAPIKeyProvider",
|
| 16 |
-
"VertexServiceAccountProvider",
|
| 17 |
"list_all_models",
|
| 18 |
"build_model_config",
|
| 19 |
"build_client",
|
|
|
|
| 1 |
"""
|
| 2 |
Services AI — providers Google AI, registre de modèles, et analyse IA.
|
| 3 |
+
|
| 4 |
+
Les imports de providers sont différés (lazy) pour éviter de charger les SDK
|
| 5 |
+
tiers (google-genai, mistralai) au démarrage. Cela permet à l'application
|
| 6 |
+
de fonctionner même si un SDK n'est pas installé.
|
| 7 |
"""
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def __getattr__(name: str):
|
| 11 |
+
"""Import paresseux — les symboles sont résolus au premier accès."""
|
| 12 |
+
_lazy_map = {
|
| 13 |
+
"run_primary_analysis": "app.services.ai.analyzer",
|
| 14 |
+
"build_client": "app.services.ai.client_factory",
|
| 15 |
+
"build_model_config": "app.services.ai.model_registry",
|
| 16 |
+
"list_all_models": "app.services.ai.model_registry",
|
| 17 |
+
"load_and_render_prompt": "app.services.ai.prompt_loader",
|
| 18 |
+
"parse_ai_response": "app.services.ai.response_parser",
|
| 19 |
+
"ParseError": "app.services.ai.response_parser",
|
| 20 |
+
}
|
| 21 |
+
if name in _lazy_map:
|
| 22 |
+
import importlib
|
| 23 |
+
module = importlib.import_module(_lazy_map[name])
|
| 24 |
+
return getattr(module, name)
|
| 25 |
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
| 26 |
+
|
| 27 |
|
| 28 |
__all__ = [
|
|
|
|
|
|
|
|
|
|
| 29 |
"list_all_models",
|
| 30 |
"build_model_config",
|
| 31 |
"build_client",
|
|
@@ -13,7 +13,7 @@ from pathlib import Path
|
|
| 13 |
from app.schemas.corpus_profile import CorpusProfile
|
| 14 |
from app.schemas.image import ImageDerivativeInfo
|
| 15 |
from app.schemas.model_config import ModelConfig
|
| 16 |
-
from app.schemas.page_master import EditorialInfo, EditorialStatus, PageMaster, ProcessingInfo
|
| 17 |
from app.services.ai.master_writer import write_gemini_raw, write_master_json
|
| 18 |
from app.services.ai.model_registry import get_provider
|
| 19 |
from app.services.ai.prompt_loader import load_and_render_prompt
|
|
@@ -118,16 +118,17 @@ def run_primary_analysis(
|
|
| 118 |
manuscript_id=manuscript_id,
|
| 119 |
folio_label=folio_label,
|
| 120 |
sequence=sequence,
|
| 121 |
-
image=
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
layout=layout,
|
| 129 |
ocr=ocr,
|
| 130 |
processing=ProcessingInfo(
|
|
|
|
| 131 |
model_id=model_config.selected_model_id,
|
| 132 |
model_display_name=model_config.selected_model_display_name,
|
| 133 |
prompt_version=prompt_rel_path,
|
|
|
|
| 13 |
from app.schemas.corpus_profile import CorpusProfile
|
| 14 |
from app.schemas.image import ImageDerivativeInfo
|
| 15 |
from app.schemas.model_config import ModelConfig
|
| 16 |
+
from app.schemas.page_master import EditorialInfo, EditorialStatus, ImageInfo, PageMaster, ProcessingInfo
|
| 17 |
from app.services.ai.master_writer import write_gemini_raw, write_master_json
|
| 18 |
from app.services.ai.model_registry import get_provider
|
| 19 |
from app.services.ai.prompt_loader import load_and_render_prompt
|
|
|
|
| 118 |
manuscript_id=manuscript_id,
|
| 119 |
folio_label=folio_label,
|
| 120 |
sequence=sequence,
|
| 121 |
+
image=ImageInfo(
|
| 122 |
+
master=image_info.original_url,
|
| 123 |
+
derivative_web=image_info.derivative_path,
|
| 124 |
+
thumbnail=image_info.thumbnail_path,
|
| 125 |
+
width=image_info.derivative_width,
|
| 126 |
+
height=image_info.derivative_height,
|
| 127 |
+
),
|
| 128 |
layout=layout,
|
| 129 |
ocr=ocr,
|
| 130 |
processing=ProcessingInfo(
|
| 131 |
+
provider=model_config.provider.value if hasattr(model_config.provider, "value") else str(model_config.provider),
|
| 132 |
model_id=model_config.selected_model_id,
|
| 133 |
model_display_name=model_config.selected_model_display_name,
|
| 134 |
prompt_version=prompt_rel_path,
|
|
@@ -1,5 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
Registre agrégé des modèles disponibles tous providers confondus.
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
# 1. stdlib
|
| 5 |
import logging
|
|
@@ -8,10 +11,6 @@ from datetime import datetime, timezone
|
|
| 8 |
# 2. local
|
| 9 |
from app.schemas.model_config import ModelConfig, ModelInfo, ProviderType
|
| 10 |
from app.services.ai.base import AIProvider
|
| 11 |
-
from app.services.ai.provider_google_ai import GoogleAIProvider
|
| 12 |
-
from app.services.ai.provider_mistral import MistralProvider
|
| 13 |
-
from app.services.ai.provider_vertex_key import VertexAPIKeyProvider
|
| 14 |
-
from app.services.ai.provider_vertex_sa import VertexServiceAccountProvider
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
@@ -25,6 +24,12 @@ _PROVIDER_DISPLAY_NAMES: dict[ProviderType, str] = {
|
|
| 25 |
|
| 26 |
|
| 27 |
def _build_providers() -> list[AIProvider]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return [
|
| 29 |
GoogleAIProvider(),
|
| 30 |
VertexAPIKeyProvider(),
|
|
|
|
| 1 |
"""
|
| 2 |
Registre agrégé des modèles disponibles tous providers confondus.
|
| 3 |
+
|
| 4 |
+
Les imports de providers sont différés dans _build_providers() pour éviter
|
| 5 |
+
de charger les SDK tiers (google-genai, mistralai) au niveau module.
|
| 6 |
"""
|
| 7 |
# 1. stdlib
|
| 8 |
import logging
|
|
|
|
| 11 |
# 2. local
|
| 12 |
from app.schemas.model_config import ModelConfig, ModelInfo, ProviderType
|
| 13 |
from app.services.ai.base import AIProvider
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
def _build_providers() -> list[AIProvider]:
|
| 27 |
+
"""Construit la liste des providers — imports différés."""
|
| 28 |
+
from app.services.ai.provider_google_ai import GoogleAIProvider
|
| 29 |
+
from app.services.ai.provider_mistral import MistralProvider
|
| 30 |
+
from app.services.ai.provider_vertex_key import VertexAPIKeyProvider
|
| 31 |
+
from app.services.ai.provider_vertex_sa import VertexServiceAccountProvider
|
| 32 |
+
|
| 33 |
return [
|
| 34 |
GoogleAIProvider(),
|
| 35 |
VertexAPIKeyProvider(),
|
|
@@ -22,9 +22,6 @@ retourne toujours False afin d'éviter des appels réseau voués à l'échec.
|
|
| 22 |
import logging
|
| 23 |
import os
|
| 24 |
|
| 25 |
-
# 2. third-party
|
| 26 |
-
from google.genai import types # noqa: F401 (conservé pour import cohérence)
|
| 27 |
-
|
| 28 |
# 3. local
|
| 29 |
from app.schemas.model_config import ModelInfo, ProviderType
|
| 30 |
from app.services.ai.base import AIProvider
|
|
|
|
| 22 |
import logging
|
| 23 |
import os
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
# 3. local
|
| 26 |
from app.schemas.model_config import ModelInfo, ProviderType
|
| 27 |
from app.services.ai.base import AIProvider
|
|
@@ -17,7 +17,6 @@ from sqlalchemy import select
|
|
| 17 |
# 3. local
|
| 18 |
from app.models.database import async_session_factory
|
| 19 |
from app.models.job import JobModel
|
| 20 |
-
from app.services.job_runner import execute_page_job
|
| 21 |
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
|
@@ -54,6 +53,8 @@ async def execute_corpus_job(corpus_id: str) -> dict:
|
|
| 54 |
)
|
| 55 |
|
| 56 |
# Exécution séquentielle — chaque job gère sa propre session
|
|
|
|
|
|
|
| 57 |
for job_id in job_ids:
|
| 58 |
await execute_page_job(job_id)
|
| 59 |
|
|
|
|
| 17 |
# 3. local
|
| 18 |
from app.models.database import async_session_factory
|
| 19 |
from app.models.job import JobModel
|
|
|
|
| 20 |
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
|
|
|
| 53 |
)
|
| 54 |
|
| 55 |
# Exécution séquentielle — chaque job gère sa propre session
|
| 56 |
+
from app.services.job_runner import execute_page_job
|
| 57 |
+
|
| 58 |
for job_id in job_ids:
|
| 59 |
await execute_page_job(job_id)
|
| 60 |
|
|
@@ -160,11 +160,7 @@ def generate_alto(master: PageMaster) -> str:
|
|
| 160 |
etree.SubElement(desc, _a("MeasurementUnit")).text = "pixel"
|
| 161 |
|
| 162 |
src_info = etree.SubElement(desc, _a("sourceImageInformation"))
|
| 163 |
-
file_name =
|
| 164 |
-
master.image.get("original_url")
|
| 165 |
-
or master.image.get("derivative_web")
|
| 166 |
-
or master.page_id
|
| 167 |
-
)
|
| 168 |
etree.SubElement(src_info, _a("fileName")).text = str(file_name)
|
| 169 |
|
| 170 |
if master.processing:
|
|
@@ -185,8 +181,8 @@ def generate_alto(master: PageMaster) -> str:
|
|
| 185 |
# ── Layout ─────────────────────────────────────────────────────────────
|
| 186 |
layout_el = etree.SubElement(root, _a("Layout"))
|
| 187 |
|
| 188 |
-
width =
|
| 189 |
-
height =
|
| 190 |
|
| 191 |
page_id_safe = master.page_id.replace(" ", "_")
|
| 192 |
page_el = etree.SubElement(
|
|
|
|
| 160 |
etree.SubElement(desc, _a("MeasurementUnit")).text = "pixel"
|
| 161 |
|
| 162 |
src_info = etree.SubElement(desc, _a("sourceImageInformation"))
|
| 163 |
+
file_name = master.image.master or master.image.derivative_web or master.page_id
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
etree.SubElement(src_info, _a("fileName")).text = str(file_name)
|
| 165 |
|
| 166 |
if master.processing:
|
|
|
|
| 181 |
# ── Layout ─────────────────────────────────────────────────────────────
|
| 182 |
layout_el = etree.SubElement(root, _a("Layout"))
|
| 183 |
|
| 184 |
+
width = master.image.width
|
| 185 |
+
height = master.image.height
|
| 186 |
|
| 187 |
page_id_safe = master.page_id.replace(" ", "_")
|
| 188 |
page_el = etree.SubElement(
|
|
@@ -102,12 +102,12 @@ def generate_manifest(
|
|
| 102 |
canvas_id = (
|
| 103 |
f"{base_url}/api/v1/manuscripts/{manuscript_id}/canvas/{page.page_id}"
|
| 104 |
)
|
| 105 |
-
width =
|
| 106 |
-
height =
|
| 107 |
|
| 108 |
annotation_page_id = f"{canvas_id}/annotation-page/1"
|
| 109 |
annotation_id = f"{canvas_id}/annotation/painting"
|
| 110 |
-
image_url = page.image.
|
| 111 |
|
| 112 |
canvas: dict = {
|
| 113 |
"id": canvas_id,
|
|
|
|
| 102 |
canvas_id = (
|
| 103 |
f"{base_url}/api/v1/manuscripts/{manuscript_id}/canvas/{page.page_id}"
|
| 104 |
)
|
| 105 |
+
width = page.image.width
|
| 106 |
+
height = page.image.height
|
| 107 |
|
| 108 |
annotation_page_id = f"{canvas_id}/annotation-page/1"
|
| 109 |
annotation_id = f"{canvas_id}/annotation/painting"
|
| 110 |
+
image_url = page.image.master or ""
|
| 111 |
|
| 112 |
canvas: dict = {
|
| 113 |
"id": canvas_id,
|
|
@@ -182,7 +182,7 @@ def generate_mets(
|
|
| 182 |
f_master = _el(grp_master, f"{_M}file", {"ID": f"IMG_MASTER_{sid}", "MIMETYPE": "image/jpeg"})
|
| 183 |
_el(f_master, f"{_M}FLocat", {
|
| 184 |
"LOCTYPE": "URL",
|
| 185 |
-
f"{_XL}href": page.image.
|
| 186 |
f"{_XL}type": "simple",
|
| 187 |
})
|
| 188 |
|
|
@@ -191,7 +191,7 @@ def generate_mets(
|
|
| 191 |
_el(f_deriv, f"{_M}FLocat", {
|
| 192 |
"LOCTYPE": "OTHER",
|
| 193 |
"OTHERLOCTYPE": "filepath",
|
| 194 |
-
f"{_XL}href": page.image.
|
| 195 |
f"{_XL}type": "simple",
|
| 196 |
})
|
| 197 |
|
|
|
|
| 182 |
f_master = _el(grp_master, f"{_M}file", {"ID": f"IMG_MASTER_{sid}", "MIMETYPE": "image/jpeg"})
|
| 183 |
_el(f_master, f"{_M}FLocat", {
|
| 184 |
"LOCTYPE": "URL",
|
| 185 |
+
f"{_XL}href": page.image.master or "",
|
| 186 |
f"{_XL}type": "simple",
|
| 187 |
})
|
| 188 |
|
|
|
|
| 191 |
_el(f_deriv, f"{_M}FLocat", {
|
| 192 |
"LOCTYPE": "OTHER",
|
| 193 |
"OTHERLOCTYPE": "filepath",
|
| 194 |
+
f"{_XL}href": page.image.derivative_web or "",
|
| 195 |
f"{_XL}type": "simple",
|
| 196 |
})
|
| 197 |
|
|
@@ -33,8 +33,6 @@ from app.models.job import JobModel
|
|
| 33 |
from app.models.model_config_db import ModelConfigDB
|
| 34 |
from app.schemas.corpus_profile import CorpusProfile
|
| 35 |
from app.schemas.model_config import ModelConfig, ProviderType
|
| 36 |
-
from app.services.ai.analyzer import run_primary_analysis
|
| 37 |
-
from app.services.export.alto import generate_alto, write_alto
|
| 38 |
from app.services.image.normalizer import create_derivatives, fetch_and_normalize
|
| 39 |
|
| 40 |
logger = logging.getLogger(__name__)
|
|
@@ -148,6 +146,8 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
|
|
| 148 |
)
|
| 149 |
|
| 150 |
# ── 6. Analyse primaire IA (R05 : double stockage) ───────────────────
|
|
|
|
|
|
|
| 151 |
page_master = run_primary_analysis(
|
| 152 |
derivative_image_path=Path(image_info.derivative_path),
|
| 153 |
corpus_profile=corpus_profile,
|
|
@@ -163,6 +163,8 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
|
|
| 163 |
)
|
| 164 |
|
| 165 |
# ── 7. Générer et écrire l'ALTO XML ──────────────────────────────────
|
|
|
|
|
|
|
| 166 |
alto_xml = generate_alto(page_master)
|
| 167 |
alto_path = (
|
| 168 |
data_dir
|
|
|
|
| 33 |
from app.models.model_config_db import ModelConfigDB
|
| 34 |
from app.schemas.corpus_profile import CorpusProfile
|
| 35 |
from app.schemas.model_config import ModelConfig, ProviderType
|
|
|
|
|
|
|
| 36 |
from app.services.image.normalizer import create_derivatives, fetch_and_normalize
|
| 37 |
|
| 38 |
logger = logging.getLogger(__name__)
|
|
|
|
| 146 |
)
|
| 147 |
|
| 148 |
# ── 6. Analyse primaire IA (R05 : double stockage) ───────────────────
|
| 149 |
+
from app.services.ai.analyzer import run_primary_analysis
|
| 150 |
+
|
| 151 |
page_master = run_primary_analysis(
|
| 152 |
derivative_image_path=Path(image_info.derivative_path),
|
| 153 |
corpus_profile=corpus_profile,
|
|
|
|
| 163 |
)
|
| 164 |
|
| 165 |
# ── 7. Générer et écrire l'ALTO XML ──────────────────────────────────
|
| 166 |
+
from app.services.export.alto import generate_alto, write_alto
|
| 167 |
+
|
| 168 |
alto_xml = generate_alto(page_master)
|
| 169 |
alto_path = (
|
| 170 |
data_dir
|
|
@@ -11,6 +11,7 @@ dependencies = [
|
|
| 11 |
"fastapi>=0.111",
|
| 12 |
"uvicorn[standard]>=0.29",
|
| 13 |
"pydantic>=2.7",
|
|
|
|
| 14 |
"sqlalchemy>=2.0",
|
| 15 |
"aiosqlite>=0.20",
|
| 16 |
"google-genai>=1.0",
|
|
|
|
| 11 |
"fastapi>=0.111",
|
| 12 |
"uvicorn[standard]>=0.29",
|
| 13 |
"pydantic>=2.7",
|
| 14 |
+
"pydantic-settings>=2.0",
|
| 15 |
"sqlalchemy>=2.0",
|
| 16 |
"aiosqlite>=0.20",
|
| 17 |
"google-genai>=1.0",
|
|
@@ -51,10 +51,11 @@ async def async_client(db_session: AsyncSession):
|
|
| 51 |
|
| 52 |
app.dependency_overrides[get_db] = _override_get_db
|
| 53 |
# Les background tasks (execute_corpus_job, execute_page_job) créent leur
|
| 54 |
-
# propre session via async_session_factory. On les neutralise
|
| 55 |
-
# qu'elles tentent de se connecter à la
|
| 56 |
-
|
| 57 |
-
|
|
|
|
| 58 |
async with AsyncClient(
|
| 59 |
transport=ASGITransport(app=app), base_url="http://test"
|
| 60 |
) as client:
|
|
|
|
| 51 |
|
| 52 |
app.dependency_overrides[get_db] = _override_get_db
|
| 53 |
# Les background tasks (execute_corpus_job, execute_page_job) créent leur
|
| 54 |
+
# propre session via async_session_factory. On les neutralise en mockant
|
| 55 |
+
# les modules sources pour éviter qu'elles tentent de se connecter à la
|
| 56 |
+
# BDD réelle pendant les tests d'API.
|
| 57 |
+
with patch("app.services.corpus_runner.execute_corpus_job", AsyncMock(return_value={"total": 0, "done": 0, "failed": 0})), \
|
| 58 |
+
patch("app.services.job_runner.execute_page_job", AsyncMock(return_value=None)):
|
| 59 |
async with AsyncClient(
|
| 60 |
transport=ASGITransport(app=app), base_url="http://test"
|
| 61 |
) as client:
|
|
@@ -432,7 +432,7 @@ def _make_page_master() -> PageMaster:
|
|
| 432 |
folio_label="0001r",
|
| 433 |
sequence=1,
|
| 434 |
image={
|
| 435 |
-
"
|
| 436 |
"derivative_web": "/data/deriv.jpg",
|
| 437 |
"thumbnail": "/data/thumb.jpg",
|
| 438 |
"width": 1500,
|
|
@@ -440,6 +440,7 @@ def _make_page_master() -> PageMaster:
|
|
| 440 |
},
|
| 441 |
layout={"regions": []},
|
| 442 |
processing={
|
|
|
|
| 443 |
"model_id": "gemini-2.0-flash",
|
| 444 |
"model_display_name": "Gemini 2.0 Flash",
|
| 445 |
"prompt_version": "prompts/medieval-illuminated/primary_v1.txt",
|
|
@@ -663,9 +664,9 @@ def test_run_primary_analysis_image_dict(tmp_path):
|
|
| 663 |
project_root=tmp_path,
|
| 664 |
)
|
| 665 |
|
| 666 |
-
assert result.image
|
| 667 |
-
assert result.image
|
| 668 |
-
assert result.image
|
| 669 |
|
| 670 |
|
| 671 |
def test_run_primary_analysis_regions_in_layout(tmp_path):
|
|
|
|
| 432 |
folio_label="0001r",
|
| 433 |
sequence=1,
|
| 434 |
image={
|
| 435 |
+
"master": "https://example.com/img.jpg",
|
| 436 |
"derivative_web": "/data/deriv.jpg",
|
| 437 |
"thumbnail": "/data/thumb.jpg",
|
| 438 |
"width": 1500,
|
|
|
|
| 440 |
},
|
| 441 |
layout={"regions": []},
|
| 442 |
processing={
|
| 443 |
+
"provider": "google_ai_studio",
|
| 444 |
"model_id": "gemini-2.0-flash",
|
| 445 |
"model_display_name": "Gemini 2.0 Flash",
|
| 446 |
"prompt_version": "prompts/medieval-illuminated/primary_v1.txt",
|
|
|
|
| 664 |
project_root=tmp_path,
|
| 665 |
)
|
| 666 |
|
| 667 |
+
assert result.image.master == image_info.original_url
|
| 668 |
+
assert result.image.width == image_info.derivative_width
|
| 669 |
+
assert result.image.height == image_info.derivative_height
|
| 670 |
|
| 671 |
|
| 672 |
def test_run_primary_analysis_regions_in_layout(tmp_path):
|
|
@@ -75,7 +75,7 @@ def _make_master(
|
|
| 75 |
"manuscript_id": "ms-test",
|
| 76 |
"folio_label": "f001r",
|
| 77 |
"sequence": 1,
|
| 78 |
-
"image": {"
|
| 79 |
"layout": {"regions": []},
|
| 80 |
"ocr": {
|
| 81 |
"diplomatic_text": "Incipit liber primus",
|
|
|
|
| 75 |
"manuscript_id": "ms-test",
|
| 76 |
"folio_label": "f001r",
|
| 77 |
"sequence": 1,
|
| 78 |
+
"image": {"master": "https://example.com/f.jpg", "width": 1500, "height": 2000},
|
| 79 |
"layout": {"regions": []},
|
| 80 |
"ocr": {
|
| 81 |
"diplomatic_text": "Incipit liber primus",
|
|
@@ -83,7 +83,7 @@ def _make_master_json(page_id: str, folio_label: str, sequence: int) -> str:
|
|
| 83 |
"folio_label": folio_label,
|
| 84 |
"sequence": sequence,
|
| 85 |
"image": {
|
| 86 |
-
"
|
| 87 |
"derivative_web": f"/data/deriv/{page_id}.jpg",
|
| 88 |
"thumbnail": f"/data/thumb/{page_id}.jpg",
|
| 89 |
"width": 1500,
|
|
|
|
| 83 |
"folio_label": folio_label,
|
| 84 |
"sequence": sequence,
|
| 85 |
"image": {
|
| 86 |
+
"master": f"https://example.com/{page_id}.jpg",
|
| 87 |
"derivative_web": f"/data/deriv/{page_id}.jpg",
|
| 88 |
"thumbnail": f"/data/thumb/{page_id}.jpg",
|
| 89 |
"width": 1500,
|
|
@@ -94,7 +94,7 @@ async def test_get_models_endpoint_removed(async_client):
|
|
| 94 |
@pytest.mark.asyncio
|
| 95 |
async def test_refresh_models_ok(async_client, monkeypatch):
|
| 96 |
monkeypatch.setattr(
|
| 97 |
-
|
| 98 |
)
|
| 99 |
response = await async_client.post("/api/v1/models/refresh")
|
| 100 |
assert response.status_code == 200
|
|
@@ -103,7 +103,7 @@ async def test_refresh_models_ok(async_client, monkeypatch):
|
|
| 103 |
@pytest.mark.asyncio
|
| 104 |
async def test_refresh_models_has_timestamp(async_client, monkeypatch):
|
| 105 |
monkeypatch.setattr(
|
| 106 |
-
|
| 107 |
)
|
| 108 |
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 109 |
assert "refreshed_at" in data
|
|
@@ -113,7 +113,7 @@ async def test_refresh_models_has_timestamp(async_client, monkeypatch):
|
|
| 113 |
@pytest.mark.asyncio
|
| 114 |
async def test_refresh_models_count(async_client, monkeypatch):
|
| 115 |
monkeypatch.setattr(
|
| 116 |
-
|
| 117 |
)
|
| 118 |
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 119 |
assert data["count"] == 2
|
|
@@ -123,7 +123,7 @@ async def test_refresh_models_count(async_client, monkeypatch):
|
|
| 123 |
@pytest.mark.asyncio
|
| 124 |
async def test_refresh_models_structure(async_client, monkeypatch):
|
| 125 |
monkeypatch.setattr(
|
| 126 |
-
|
| 127 |
)
|
| 128 |
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 129 |
assert "models" in data
|
|
|
|
| 94 |
@pytest.mark.asyncio
|
| 95 |
async def test_refresh_models_ok(async_client, monkeypatch):
|
| 96 |
monkeypatch.setattr(
|
| 97 |
+
"app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
|
| 98 |
)
|
| 99 |
response = await async_client.post("/api/v1/models/refresh")
|
| 100 |
assert response.status_code == 200
|
|
|
|
| 103 |
@pytest.mark.asyncio
|
| 104 |
async def test_refresh_models_has_timestamp(async_client, monkeypatch):
|
| 105 |
monkeypatch.setattr(
|
| 106 |
+
"app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
|
| 107 |
)
|
| 108 |
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 109 |
assert "refreshed_at" in data
|
|
|
|
| 113 |
@pytest.mark.asyncio
|
| 114 |
async def test_refresh_models_count(async_client, monkeypatch):
|
| 115 |
monkeypatch.setattr(
|
| 116 |
+
"app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
|
| 117 |
)
|
| 118 |
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 119 |
assert data["count"] == 2
|
|
|
|
| 123 |
@pytest.mark.asyncio
|
| 124 |
async def test_refresh_models_structure(async_client, monkeypatch):
|
| 125 |
monkeypatch.setattr(
|
| 126 |
+
"app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
|
| 127 |
)
|
| 128 |
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 129 |
assert "models" in data
|
|
@@ -87,7 +87,7 @@ def _make_master_json(page_id: str, corpus_profile: str = "medieval-illuminated"
|
|
| 87 |
"folio_label": "f001r",
|
| 88 |
"sequence": 1,
|
| 89 |
"image": {
|
| 90 |
-
"
|
| 91 |
"derivative_web": "/data/deriv/f001r.jpg",
|
| 92 |
"thumbnail": "/data/thumb/f001r.jpg",
|
| 93 |
"width": 1500,
|
|
|
|
| 87 |
"folio_label": "f001r",
|
| 88 |
"sequence": 1,
|
| 89 |
"image": {
|
| 90 |
+
"master": "https://example.com/f001r.jpg",
|
| 91 |
"derivative_web": "/data/deriv/f001r.jpg",
|
| 92 |
"thumbnail": "/data/thumb/f001r.jpg",
|
| 93 |
"width": 1500,
|
|
@@ -90,7 +90,7 @@ _MOCK_MISTRAL_MODELS = [
|
|
| 90 |
|
| 91 |
@pytest.mark.asyncio
|
| 92 |
async def test_list_providers_returns_list(async_client, monkeypatch):
|
| 93 |
-
monkeypatch.setattr(
|
| 94 |
resp = await async_client.get("/api/v1/providers")
|
| 95 |
assert resp.status_code == 200
|
| 96 |
assert isinstance(resp.json(), list)
|
|
@@ -98,14 +98,14 @@ async def test_list_providers_returns_list(async_client, monkeypatch):
|
|
| 98 |
|
| 99 |
@pytest.mark.asyncio
|
| 100 |
async def test_list_providers_count(async_client, monkeypatch):
|
| 101 |
-
monkeypatch.setattr(
|
| 102 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 103 |
assert len(data) == 4 # 4 providers connus
|
| 104 |
|
| 105 |
|
| 106 |
@pytest.mark.asyncio
|
| 107 |
async def test_list_providers_fields(async_client, monkeypatch):
|
| 108 |
-
monkeypatch.setattr(
|
| 109 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 110 |
p = data[0]
|
| 111 |
assert "provider_type" in p
|
|
@@ -116,7 +116,7 @@ async def test_list_providers_fields(async_client, monkeypatch):
|
|
| 116 |
|
| 117 |
@pytest.mark.asyncio
|
| 118 |
async def test_list_providers_all_unavailable(async_client, monkeypatch):
|
| 119 |
-
monkeypatch.setattr(
|
| 120 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 121 |
assert all(not p["available"] for p in data)
|
| 122 |
assert all(p["model_count"] == 0 for p in data)
|
|
@@ -124,7 +124,7 @@ async def test_list_providers_all_unavailable(async_client, monkeypatch):
|
|
| 124 |
|
| 125 |
@pytest.mark.asyncio
|
| 126 |
async def test_list_providers_google_available(async_client, monkeypatch):
|
| 127 |
-
monkeypatch.setattr(
|
| 128 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 129 |
google = next(p for p in data if p["provider_type"] == "google_ai_studio")
|
| 130 |
assert google["available"] is True
|
|
@@ -133,7 +133,7 @@ async def test_list_providers_google_available(async_client, monkeypatch):
|
|
| 133 |
|
| 134 |
@pytest.mark.asyncio
|
| 135 |
async def test_list_providers_mistral_available(async_client, monkeypatch):
|
| 136 |
-
monkeypatch.setattr(
|
| 137 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 138 |
mistral = next(p for p in data if p["provider_type"] == "mistral")
|
| 139 |
assert mistral["available"] is True
|
|
@@ -143,7 +143,7 @@ async def test_list_providers_mistral_available(async_client, monkeypatch):
|
|
| 143 |
@pytest.mark.asyncio
|
| 144 |
async def test_list_providers_includes_mistral_type(async_client, monkeypatch):
|
| 145 |
"""Mistral est toujours dans la liste même si indisponible."""
|
| 146 |
-
monkeypatch.setattr(
|
| 147 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 148 |
types_ = [p["provider_type"] for p in data]
|
| 149 |
assert "mistral" in types_
|
|
@@ -156,7 +156,7 @@ async def test_list_providers_includes_mistral_type(async_client, monkeypatch):
|
|
| 156 |
@pytest.mark.asyncio
|
| 157 |
async def test_get_provider_models_google(async_client, monkeypatch):
|
| 158 |
monkeypatch.setattr(
|
| 159 |
-
|
| 160 |
)
|
| 161 |
resp = await async_client.get("/api/v1/providers/google_ai_studio/models")
|
| 162 |
assert resp.status_code == 200
|
|
@@ -166,7 +166,7 @@ async def test_get_provider_models_google(async_client, monkeypatch):
|
|
| 166 |
@pytest.mark.asyncio
|
| 167 |
async def test_get_provider_models_mistral(async_client, monkeypatch):
|
| 168 |
monkeypatch.setattr(
|
| 169 |
-
|
| 170 |
)
|
| 171 |
resp = await async_client.get("/api/v1/providers/mistral/models")
|
| 172 |
assert resp.status_code == 200
|
|
@@ -189,7 +189,7 @@ async def test_get_provider_models_not_configured(async_client, monkeypatch):
|
|
| 189 |
def _raise(ptype):
|
| 190 |
raise RuntimeError("Variable d'environnement manquante : MISTRAL_API_KEY")
|
| 191 |
|
| 192 |
-
monkeypatch.setattr(
|
| 193 |
resp = await async_client.get("/api/v1/providers/mistral/models")
|
| 194 |
assert resp.status_code == 503
|
| 195 |
|
|
@@ -197,7 +197,7 @@ async def test_get_provider_models_not_configured(async_client, monkeypatch):
|
|
| 197 |
@pytest.mark.asyncio
|
| 198 |
async def test_get_provider_models_fields(async_client, monkeypatch):
|
| 199 |
monkeypatch.setattr(
|
| 200 |
-
|
| 201 |
)
|
| 202 |
data = (await async_client.get("/api/v1/providers/mistral/models")).json()
|
| 203 |
m = data[0]
|
|
|
|
| 90 |
|
| 91 |
@pytest.mark.asyncio
|
| 92 |
async def test_list_providers_returns_list(async_client, monkeypatch):
|
| 93 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
|
| 94 |
resp = await async_client.get("/api/v1/providers")
|
| 95 |
assert resp.status_code == 200
|
| 96 |
assert isinstance(resp.json(), list)
|
|
|
|
| 98 |
|
| 99 |
@pytest.mark.asyncio
|
| 100 |
async def test_list_providers_count(async_client, monkeypatch):
|
| 101 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
|
| 102 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 103 |
assert len(data) == 4 # 4 providers connus
|
| 104 |
|
| 105 |
|
| 106 |
@pytest.mark.asyncio
|
| 107 |
async def test_list_providers_fields(async_client, monkeypatch):
|
| 108 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
|
| 109 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 110 |
p = data[0]
|
| 111 |
assert "provider_type" in p
|
|
|
|
| 116 |
|
| 117 |
@pytest.mark.asyncio
|
| 118 |
async def test_list_providers_all_unavailable(async_client, monkeypatch):
|
| 119 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
|
| 120 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 121 |
assert all(not p["available"] for p in data)
|
| 122 |
assert all(p["model_count"] == 0 for p in data)
|
|
|
|
| 124 |
|
| 125 |
@pytest.mark.asyncio
|
| 126 |
async def test_list_providers_google_available(async_client, monkeypatch):
|
| 127 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_GOOGLE_ONLY)
|
| 128 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 129 |
google = next(p for p in data if p["provider_type"] == "google_ai_studio")
|
| 130 |
assert google["available"] is True
|
|
|
|
| 133 |
|
| 134 |
@pytest.mark.asyncio
|
| 135 |
async def test_list_providers_mistral_available(async_client, monkeypatch):
|
| 136 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_GOOGLE_AND_MISTRAL)
|
| 137 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 138 |
mistral = next(p for p in data if p["provider_type"] == "mistral")
|
| 139 |
assert mistral["available"] is True
|
|
|
|
| 143 |
@pytest.mark.asyncio
|
| 144 |
async def test_list_providers_includes_mistral_type(async_client, monkeypatch):
|
| 145 |
"""Mistral est toujours dans la liste même si indisponible."""
|
| 146 |
+
monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
|
| 147 |
data = (await async_client.get("/api/v1/providers")).json()
|
| 148 |
types_ = [p["provider_type"] for p in data]
|
| 149 |
assert "mistral" in types_
|
|
|
|
| 156 |
@pytest.mark.asyncio
|
| 157 |
async def test_get_provider_models_google(async_client, monkeypatch):
|
| 158 |
monkeypatch.setattr(
|
| 159 |
+
"app.services.ai.model_registry.list_models_for_provider", lambda ptype: _MOCK_GOOGLE_MODELS
|
| 160 |
)
|
| 161 |
resp = await async_client.get("/api/v1/providers/google_ai_studio/models")
|
| 162 |
assert resp.status_code == 200
|
|
|
|
| 166 |
@pytest.mark.asyncio
|
| 167 |
async def test_get_provider_models_mistral(async_client, monkeypatch):
|
| 168 |
monkeypatch.setattr(
|
| 169 |
+
"app.services.ai.model_registry.list_models_for_provider", lambda ptype: _MOCK_MISTRAL_MODELS
|
| 170 |
)
|
| 171 |
resp = await async_client.get("/api/v1/providers/mistral/models")
|
| 172 |
assert resp.status_code == 200
|
|
|
|
| 189 |
def _raise(ptype):
|
| 190 |
raise RuntimeError("Variable d'environnement manquante : MISTRAL_API_KEY")
|
| 191 |
|
| 192 |
+
monkeypatch.setattr("app.services.ai.model_registry.list_models_for_provider", _raise)
|
| 193 |
resp = await async_client.get("/api/v1/providers/mistral/models")
|
| 194 |
assert resp.status_code == 503
|
| 195 |
|
|
|
|
| 197 |
@pytest.mark.asyncio
|
| 198 |
async def test_get_provider_models_fields(async_client, monkeypatch):
|
| 199 |
monkeypatch.setattr(
|
| 200 |
+
"app.services.ai.model_registry.list_models_for_provider", lambda ptype: _MOCK_MISTRAL_MODELS
|
| 201 |
)
|
| 202 |
data = (await async_client.get("/api/v1/providers/mistral/models")).json()
|
| 203 |
m = data[0]
|
|
@@ -33,7 +33,7 @@ def _make_master(page_id: str, diplomatic_text: str = "", translation_fr: str =
|
|
| 33 |
"manuscript_id": "ms-test",
|
| 34 |
"folio_label": "f001r",
|
| 35 |
"sequence": 1,
|
| 36 |
-
"image": {"
|
| 37 |
"layout": {"regions": []},
|
| 38 |
"ocr": {
|
| 39 |
"diplomatic_text": diplomatic_text,
|
|
|
|
| 33 |
"manuscript_id": "ms-test",
|
| 34 |
"folio_label": "f001r",
|
| 35 |
"sequence": 1,
|
| 36 |
+
"image": {"master": "https://example.com/f.jpg", "width": 1500, "height": 2000},
|
| 37 |
"layout": {"regions": []},
|
| 38 |
"ocr": {
|
| 39 |
"diplomatic_text": diplomatic_text,
|
|
@@ -52,6 +52,7 @@ def _make_master(
|
|
| 52 |
processing = None
|
| 53 |
if with_processing:
|
| 54 |
processing = ProcessingInfo(
|
|
|
|
| 55 |
model_id="gemini-2.0-flash",
|
| 56 |
model_display_name="Gemini 2.0 Flash",
|
| 57 |
prompt_version="prompts/medieval-illuminated/primary_v1.txt",
|
|
@@ -65,7 +66,7 @@ def _make_master(
|
|
| 65 |
folio_label="0001r",
|
| 66 |
sequence=sequence,
|
| 67 |
image={
|
| 68 |
-
"
|
| 69 |
"derivative_web": "/data/deriv.jpg",
|
| 70 |
"thumbnail": "/data/thumb.jpg",
|
| 71 |
"width": width,
|
|
|
|
| 52 |
processing = None
|
| 53 |
if with_processing:
|
| 54 |
processing = ProcessingInfo(
|
| 55 |
+
provider="google_ai_studio",
|
| 56 |
model_id="gemini-2.0-flash",
|
| 57 |
model_display_name="Gemini 2.0 Flash",
|
| 58 |
prompt_version="prompts/medieval-illuminated/primary_v1.txt",
|
|
|
|
| 66 |
folio_label="0001r",
|
| 67 |
sequence=sequence,
|
| 68 |
image={
|
| 69 |
+
"master": "https://example.com/img.jpg",
|
| 70 |
"derivative_web": "/data/deriv.jpg",
|
| 71 |
"thumbnail": "/data/thumb.jpg",
|
| 72 |
"width": width,
|
|
@@ -53,7 +53,7 @@ def _make_page(
|
|
| 53 |
folio_label=folio_label,
|
| 54 |
sequence=sequence,
|
| 55 |
image={
|
| 56 |
-
"
|
| 57 |
"derivative_web": f"/data/deriv/{folio_label}.jpg",
|
| 58 |
"thumbnail": f"/data/thumb/{folio_label}.jpg",
|
| 59 |
"width": width,
|
|
@@ -344,7 +344,7 @@ def test_canvas_width_matches_image(beatus_pages, beatus_meta):
|
|
| 344 |
# Trouve la page correspondante
|
| 345 |
page_id = canvas["id"].split("/canvas/")[-1]
|
| 346 |
page = next(p for p in beatus_pages if p.page_id == page_id)
|
| 347 |
-
assert canvas["width"] == page.image
|
| 348 |
|
| 349 |
|
| 350 |
def test_canvas_height_matches_image(beatus_pages, beatus_meta):
|
|
@@ -352,7 +352,7 @@ def test_canvas_height_matches_image(beatus_pages, beatus_meta):
|
|
| 352 |
for canvas in manifest["items"]:
|
| 353 |
page_id = canvas["id"].split("/canvas/")[-1]
|
| 354 |
page = next(p for p in beatus_pages if p.page_id == page_id)
|
| 355 |
-
assert canvas["height"] == page.image
|
| 356 |
|
| 357 |
|
| 358 |
def test_canvas_dimensions_beatus_hr():
|
|
@@ -447,7 +447,7 @@ def test_annotation_body_id_is_original_url(beatus_pages, beatus_meta):
|
|
| 447 |
page_id = canvas["id"].split("/canvas/")[-1]
|
| 448 |
page = next(p for p in beatus_pages if p.page_id == page_id)
|
| 449 |
body = canvas["items"][0]["items"][0]["body"]
|
| 450 |
-
assert body["id"] == page.image
|
| 451 |
|
| 452 |
|
| 453 |
def test_annotation_body_contains_gallica_url(beatus_pages, beatus_meta):
|
|
|
|
| 53 |
folio_label=folio_label,
|
| 54 |
sequence=sequence,
|
| 55 |
image={
|
| 56 |
+
"master": original_url or f"https://example.com/{folio_label}.jpg",
|
| 57 |
"derivative_web": f"/data/deriv/{folio_label}.jpg",
|
| 58 |
"thumbnail": f"/data/thumb/{folio_label}.jpg",
|
| 59 |
"width": width,
|
|
|
|
| 344 |
# Trouve la page correspondante
|
| 345 |
page_id = canvas["id"].split("/canvas/")[-1]
|
| 346 |
page = next(p for p in beatus_pages if p.page_id == page_id)
|
| 347 |
+
assert canvas["width"] == page.image.width
|
| 348 |
|
| 349 |
|
| 350 |
def test_canvas_height_matches_image(beatus_pages, beatus_meta):
|
|
|
|
| 352 |
for canvas in manifest["items"]:
|
| 353 |
page_id = canvas["id"].split("/canvas/")[-1]
|
| 354 |
page = next(p for p in beatus_pages if p.page_id == page_id)
|
| 355 |
+
assert canvas["height"] == page.image.height
|
| 356 |
|
| 357 |
|
| 358 |
def test_canvas_dimensions_beatus_hr():
|
|
|
|
| 447 |
page_id = canvas["id"].split("/canvas/")[-1]
|
| 448 |
page = next(p for p in beatus_pages if p.page_id == page_id)
|
| 449 |
body = canvas["items"][0]["items"][0]["body"]
|
| 450 |
+
assert body["id"] == page.image.master
|
| 451 |
|
| 452 |
|
| 453 |
def test_annotation_body_contains_gallica_url(beatus_pages, beatus_meta):
|
|
@@ -66,6 +66,7 @@ def _make_page(
|
|
| 66 |
processing = None
|
| 67 |
if with_processing:
|
| 68 |
processing = ProcessingInfo(
|
|
|
|
| 69 |
model_id="gemini-2.0-flash",
|
| 70 |
model_display_name="Gemini 2.0 Flash",
|
| 71 |
prompt_version="prompts/medieval-illuminated/primary_v1.txt",
|
|
@@ -80,7 +81,7 @@ def _make_page(
|
|
| 80 |
folio_label=folio_label,
|
| 81 |
sequence=sequence,
|
| 82 |
image={
|
| 83 |
-
"
|
| 84 |
"derivative_web": derivative_web or f"/data/deriv/{folio_label}.jpg",
|
| 85 |
"thumbnail": f"/data/thumb/{folio_label}.jpg",
|
| 86 |
"width": 1500,
|
|
|
|
| 66 |
processing = None
|
| 67 |
if with_processing:
|
| 68 |
processing = ProcessingInfo(
|
| 69 |
+
provider="google_ai_studio",
|
| 70 |
model_id="gemini-2.0-flash",
|
| 71 |
model_display_name="Gemini 2.0 Flash",
|
| 72 |
prompt_version="prompts/medieval-illuminated/primary_v1.txt",
|
|
|
|
| 81 |
folio_label=folio_label,
|
| 82 |
sequence=sequence,
|
| 83 |
image={
|
| 84 |
+
"master": original_url or f"https://example.com/{folio_label}.jpg",
|
| 85 |
"derivative_web": derivative_web or f"/data/deriv/{folio_label}.jpg",
|
| 86 |
"thumbnail": f"/data/thumb/{folio_label}.jpg",
|
| 87 |
"width": 1500,
|
|
@@ -142,16 +142,24 @@ def _page_master(page_id: str, ms_id: str) -> PageMaster:
|
|
| 142 |
|
| 143 |
|
| 144 |
def _apply_success_mocks(monkeypatch, page_id: str, ms_id: str) -> None:
|
| 145 |
-
"""Applique les mocks IO pour un pipeline réussi.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
monkeypatch.setattr(
|
| 147 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 148 |
)
|
| 149 |
monkeypatch.setattr(
|
| 150 |
-
|
| 151 |
lambda **kw: _page_master(page_id, ms_id),
|
| 152 |
)
|
| 153 |
-
monkeypatch.setattr(
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
# ---------------------------------------------------------------------------
|
|
@@ -274,7 +282,7 @@ async def test_no_image_path_job_failed(db, setup_with_model, monkeypatch):
|
|
| 274 |
s["page"].image_master_path = None
|
| 275 |
await db.commit()
|
| 276 |
monkeypatch.setattr(
|
| 277 |
-
|
| 278 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 279 |
)
|
| 280 |
|
|
@@ -291,7 +299,7 @@ async def test_no_image_path_page_error(db, setup_with_model, monkeypatch):
|
|
| 291 |
s["page"].image_master_path = None
|
| 292 |
await db.commit()
|
| 293 |
monkeypatch.setattr(
|
| 294 |
-
|
| 295 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 296 |
)
|
| 297 |
|
|
@@ -343,7 +351,7 @@ async def test_primary_analysis_fails_job_failed(db, setup_with_model, monkeypat
|
|
| 343 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 344 |
)
|
| 345 |
monkeypatch.setattr(
|
| 346 |
-
|
| 347 |
lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
|
| 348 |
)
|
| 349 |
|
|
@@ -361,7 +369,7 @@ async def test_primary_analysis_fails_page_error(db, setup_with_model, monkeypat
|
|
| 361 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 362 |
)
|
| 363 |
monkeypatch.setattr(
|
| 364 |
-
|
| 365 |
lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
|
| 366 |
)
|
| 367 |
|
|
@@ -379,7 +387,7 @@ async def test_primary_analysis_error_message_stored(db, setup_with_model, monke
|
|
| 379 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 380 |
)
|
| 381 |
monkeypatch.setattr(
|
| 382 |
-
|
| 383 |
lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
|
| 384 |
)
|
| 385 |
|
|
@@ -401,12 +409,14 @@ async def test_write_alto_fails_job_failed(db, setup_with_model, monkeypatch):
|
|
| 401 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 402 |
)
|
| 403 |
monkeypatch.setattr(
|
| 404 |
-
|
| 405 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 406 |
)
|
| 407 |
-
monkeypatch.setattr(job_runner_module, "generate_alto", lambda pm: "<alto/>")
|
| 408 |
monkeypatch.setattr(
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
| 410 |
lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
|
| 411 |
)
|
| 412 |
|
|
@@ -424,12 +434,14 @@ async def test_write_alto_fails_page_error(db, setup_with_model, monkeypatch):
|
|
| 424 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 425 |
)
|
| 426 |
monkeypatch.setattr(
|
| 427 |
-
|
| 428 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 429 |
)
|
| 430 |
-
monkeypatch.setattr(job_runner_module, "generate_alto", lambda pm: "<alto/>")
|
| 431 |
monkeypatch.setattr(
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
| 433 |
lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
|
| 434 |
)
|
| 435 |
|
|
@@ -534,7 +546,7 @@ async def test_corpus_runner_calls_execute_per_job(monkeypatch):
|
|
| 534 |
return _FakeSession()
|
| 535 |
|
| 536 |
monkeypatch.setattr(corpus_runner_module, "async_session_factory", _mock_factory)
|
| 537 |
-
monkeypatch.setattr(
|
| 538 |
|
| 539 |
await execute_corpus_job("corpus-xyz")
|
| 540 |
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
def _apply_success_mocks(monkeypatch, page_id: str, ms_id: str) -> None:
|
| 145 |
+
"""Applique les mocks IO pour un pipeline réussi.
|
| 146 |
+
|
| 147 |
+
Les imports sont différés dans job_runner (lazy imports). On patche donc
|
| 148 |
+
les modules sources pour que le import dans la fonction cible récupère le mock.
|
| 149 |
+
"""
|
| 150 |
monkeypatch.setattr(
|
| 151 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 152 |
)
|
| 153 |
monkeypatch.setattr(
|
| 154 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 155 |
lambda **kw: _page_master(page_id, ms_id),
|
| 156 |
)
|
| 157 |
+
monkeypatch.setattr(
|
| 158 |
+
"app.services.export.alto.generate_alto", lambda pm: "<alto/>"
|
| 159 |
+
)
|
| 160 |
+
monkeypatch.setattr(
|
| 161 |
+
"app.services.export.alto.write_alto", lambda xml, path: None
|
| 162 |
+
)
|
| 163 |
|
| 164 |
|
| 165 |
# ---------------------------------------------------------------------------
|
|
|
|
| 282 |
s["page"].image_master_path = None
|
| 283 |
await db.commit()
|
| 284 |
monkeypatch.setattr(
|
| 285 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 286 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 287 |
)
|
| 288 |
|
|
|
|
| 299 |
s["page"].image_master_path = None
|
| 300 |
await db.commit()
|
| 301 |
monkeypatch.setattr(
|
| 302 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 303 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 304 |
)
|
| 305 |
|
|
|
|
| 351 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 352 |
)
|
| 353 |
monkeypatch.setattr(
|
| 354 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 355 |
lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
|
| 356 |
)
|
| 357 |
|
|
|
|
| 369 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 370 |
)
|
| 371 |
monkeypatch.setattr(
|
| 372 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 373 |
lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
|
| 374 |
)
|
| 375 |
|
|
|
|
| 387 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 388 |
)
|
| 389 |
monkeypatch.setattr(
|
| 390 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 391 |
lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
|
| 392 |
)
|
| 393 |
|
|
|
|
| 409 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 410 |
)
|
| 411 |
monkeypatch.setattr(
|
| 412 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 413 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 414 |
)
|
|
|
|
| 415 |
monkeypatch.setattr(
|
| 416 |
+
"app.services.export.alto.generate_alto", lambda pm: "<alto/>"
|
| 417 |
+
)
|
| 418 |
+
monkeypatch.setattr(
|
| 419 |
+
"app.services.export.alto.write_alto",
|
| 420 |
lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
|
| 421 |
)
|
| 422 |
|
|
|
|
| 434 |
job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
|
| 435 |
)
|
| 436 |
monkeypatch.setattr(
|
| 437 |
+
"app.services.ai.analyzer.run_primary_analysis",
|
| 438 |
lambda **kw: _page_master(s["page"].id, s["ms"].id),
|
| 439 |
)
|
|
|
|
| 440 |
monkeypatch.setattr(
|
| 441 |
+
"app.services.export.alto.generate_alto", lambda pm: "<alto/>"
|
| 442 |
+
)
|
| 443 |
+
monkeypatch.setattr(
|
| 444 |
+
"app.services.export.alto.write_alto",
|
| 445 |
lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
|
| 446 |
)
|
| 447 |
|
|
|
|
| 546 |
return _FakeSession()
|
| 547 |
|
| 548 |
monkeypatch.setattr(corpus_runner_module, "async_session_factory", _mock_factory)
|
| 549 |
+
monkeypatch.setattr("app.services.job_runner.execute_page_job", _mock_execute)
|
| 550 |
|
| 551 |
await execute_corpus_job("corpus-xyz")
|
| 552 |
|