Claude commited on
Commit
193eb98
·
unverified ·
1 Parent(s): 7a648f5

fix(sprint-f0): fondations — lazy imports, schémas conformes, pydantic-settings

Browse files

Sprint F0 — corrections fondamentales qui débloquent tout le reste :

- Lazy imports des providers IA : l'import chain google-genai/mistralai
n'est plus chargée au démarrage. `from app.main import app` fonctionne
même si les SDK tiers ont des problèmes de dépendances.
Fichiers : ai/__init__.py, model_registry.py, jobs.py, models_api.py,
job_runner.py, corpus_runner.py, provider_vertex_key.py

- Schémas PageMaster conformes à CLAUDE.md §4.2 :
- ImageInfo(BaseModel) remplace image: dict (6 champs typés)
- Summary(BaseModel) remplace summary: dict (short + detailed)
- ProcessingInfo.provider: str ajouté (traçabilité du provider)
Fichiers : page_master.py, analyzer.py, exports (alto/iiif/mets)

- config.py migré vers pydantic-settings BaseSettings (CLAUDE.md §2/§7)
avec pydantic-settings ajouté dans pyproject.toml

- Defaults datetime sur CorpusModel et JobModel (plus de crash insert)

- 28 fichiers de tests mis à jour (mocks adaptés aux lazy imports,
fixtures image/ProcessingInfo corrigées)

Résultat : 460 tests passants, 0 échecs, 3 skipped.

https://claude.ai/code/session_015Lht7wNQRzhUaLw94dE9z9

backend/app/api/v1/jobs.py CHANGED
@@ -22,8 +22,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
22
  from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
23
  from app.models.database import get_db
24
  from app.models.job import JobModel
25
- from app.services.corpus_runner import execute_corpus_job
26
- from app.services.job_runner import execute_page_job
27
 
28
  router = APIRouter(tags=["jobs"])
29
 
@@ -101,6 +99,8 @@ async def run_corpus(
101
  await db.commit()
102
 
103
  # Lancer le pipeline en arrière-plan (après envoi de la réponse)
 
 
104
  background_tasks.add_task(execute_corpus_job, corpus_id)
105
 
106
  return CorpusRunResponse(
@@ -135,6 +135,8 @@ async def run_page(
135
  await db.refresh(job)
136
 
137
  # Lancer le pipeline en arrière-plan (après envoi de la réponse)
 
 
138
  background_tasks.add_task(execute_page_job, job.id)
139
 
140
  return job
@@ -175,6 +177,8 @@ async def retry_job(
175
  await db.refresh(job)
176
 
177
  # Relancer le pipeline
 
 
178
  background_tasks.add_task(execute_page_job, job.id)
179
 
180
  return job
 
22
  from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
23
  from app.models.database import get_db
24
  from app.models.job import JobModel
 
 
25
 
26
  router = APIRouter(tags=["jobs"])
27
 
 
99
  await db.commit()
100
 
101
  # Lancer le pipeline en arrière-plan (après envoi de la réponse)
102
+ from app.services.corpus_runner import execute_corpus_job
103
+
104
  background_tasks.add_task(execute_corpus_job, corpus_id)
105
 
106
  return CorpusRunResponse(
 
135
  await db.refresh(job)
136
 
137
  # Lancer le pipeline en arrière-plan (après envoi de la réponse)
138
+ from app.services.job_runner import execute_page_job
139
+
140
  background_tasks.add_task(execute_page_job, job.id)
141
 
142
  return job
 
177
  await db.refresh(job)
178
 
179
  # Relancer le pipeline
180
+ from app.services.job_runner import execute_page_job
181
+
182
  background_tasks.add_task(execute_page_job, job.id)
183
 
184
  return job
backend/app/api/v1/models_api.py CHANGED
@@ -25,11 +25,6 @@ from app.models.corpus import CorpusModel
25
  from app.models.database import get_db
26
  from app.models.model_config_db import ModelConfigDB
27
  from app.schemas.model_config import ProviderType
28
- from app.services.ai.model_registry import (
29
- get_available_providers,
30
- list_all_models,
31
- list_models_for_provider,
32
- )
33
 
34
  logger = logging.getLogger(__name__)
35
 
@@ -77,6 +72,8 @@ async def list_providers() -> list[dict]:
77
  Un provider est disponible si la variable d'environnement correspondante
78
  est présente dans les secrets HuggingFace. Aucune clé n'est exposée.
79
  """
 
 
80
  return get_available_providers()
81
 
82
 
@@ -91,6 +88,8 @@ async def get_provider_models(provider_type: str) -> list[dict]:
91
  detail=f"Provider inconnu : {provider_type}. "
92
  f"Valeurs acceptées : {[p.value for p in ProviderType]}",
93
  )
 
 
94
  try:
95
  models = list_models_for_provider(ptype)
96
  except RuntimeError as exc:
@@ -104,6 +103,8 @@ async def get_provider_models(provider_type: str) -> list[dict]:
104
  @router.post("/models/refresh", response_model=ModelsRefreshResponse)
105
  async def refresh_models() -> ModelsRefreshResponse:
106
  """Force la mise à jour de la liste agrégée de tous les modèles disponibles."""
 
 
107
  models = list_all_models()
108
  return ModelsRefreshResponse(
109
  models=[m.model_dump() for m in models],
 
25
  from app.models.database import get_db
26
  from app.models.model_config_db import ModelConfigDB
27
  from app.schemas.model_config import ProviderType
 
 
 
 
 
28
 
29
  logger = logging.getLogger(__name__)
30
 
 
72
  Un provider est disponible si la variable d'environnement correspondante
73
  est présente dans les secrets HuggingFace. Aucune clé n'est exposée.
74
  """
75
+ from app.services.ai.model_registry import get_available_providers
76
+
77
  return get_available_providers()
78
 
79
 
 
88
  detail=f"Provider inconnu : {provider_type}. "
89
  f"Valeurs acceptées : {[p.value for p in ProviderType]}",
90
  )
91
+ from app.services.ai.model_registry import list_models_for_provider
92
+
93
  try:
94
  models = list_models_for_provider(ptype)
95
  except RuntimeError as exc:
 
103
  @router.post("/models/refresh", response_model=ModelsRefreshResponse)
104
  async def refresh_models() -> ModelsRefreshResponse:
105
  """Force la mise à jour de la liste agrégée de tous les modèles disponibles."""
106
+ from app.services.ai.model_registry import list_all_models
107
+
108
  models = list_all_models()
109
  return ModelsRefreshResponse(
110
  models=[m.model_dump() for m in models],
backend/app/config.py CHANGED
@@ -1,17 +1,17 @@
1
  """
2
  Configuration globale de la plateforme, chargée depuis les variables d'environnement.
3
 
4
- Équivalent fonctionnel de pydantic-settings sans dépendance externe :
5
- - les valeurs sont lues depuis os.environ au moment de l'instanciation
6
  - l'objet `settings` est importé partout dans l'application
7
  - dans les tests : monkeypatch.setattr(config, "settings", ...) pour surcharger
8
  """
9
  # 1. stdlib
10
- import os
11
  from pathlib import Path
12
 
13
  # 2. third-party
14
- from pydantic import BaseModel, ConfigDict
 
15
 
16
  # Racine du dépôt — résolue depuis l'emplacement absolu de ce fichier.
17
  # config.py se trouve dans backend/app/ ; 3 parents remontent à la racine.
@@ -19,14 +19,17 @@ from pydantic import BaseModel, ConfigDict
19
  _REPO_ROOT = Path(__file__).resolve().parent.parent.parent
20
 
21
 
22
- class Settings(BaseModel):
23
  """Paramètres d'application lus depuis les variables d'environnement.
24
 
25
  Toutes les clés API sont optionnelles (None si non configurées).
26
  Elles ne sont jamais loguées ni exportées (R06).
27
  """
28
 
29
- model_config = ConfigDict(frozen=False)
 
 
 
30
 
31
  # ── Serveur ──────────────────────────────────────────────────────────────
32
  base_url: str = "http://localhost:8000"
@@ -50,21 +53,4 @@ class Settings(BaseModel):
50
  mistral_api_key: str | None = None
51
 
52
 
53
- def _load_settings() -> Settings:
54
- """Lit les variables d'environnement et construit l'objet Settings."""
55
- return Settings(
56
- base_url=os.getenv("BASE_URL", "http://localhost:8000"),
57
- data_dir=Path(os.getenv("DATA_DIR", "data")),
58
- profiles_dir=Path(os.getenv("PROFILES_DIR", str(_REPO_ROOT / "profiles"))),
59
- prompts_dir=Path(os.getenv("PROMPTS_DIR", str(_REPO_ROOT / "prompts"))),
60
- database_url=os.getenv(
61
- "DATABASE_URL", "sqlite+aiosqlite:///./scriptorium.db"
62
- ),
63
- google_ai_studio_api_key=os.getenv("GOOGLE_AI_STUDIO_API_KEY"),
64
- vertex_api_key=os.getenv("VERTEX_API_KEY"),
65
- vertex_service_account_json=os.getenv("VERTEX_SERVICE_ACCOUNT_JSON"),
66
- mistral_api_key=os.getenv("MISTRAL_API_KEY"),
67
- )
68
-
69
-
70
- settings: Settings = _load_settings()
 
1
  """
2
  Configuration globale de la plateforme, chargée depuis les variables d'environnement.
3
 
4
+ Utilise pydantic-settings (CLAUDE.md §2, §7) :
5
+ - les valeurs sont lues depuis os.environ / fichier .env au moment de l'instanciation
6
  - l'objet `settings` est importé partout dans l'application
7
  - dans les tests : monkeypatch.setattr(config, "settings", ...) pour surcharger
8
  """
9
  # 1. stdlib
 
10
  from pathlib import Path
11
 
12
  # 2. third-party
13
+ from pydantic import ConfigDict
14
+ from pydantic_settings import BaseSettings
15
 
16
  # Racine du dépôt — résolue depuis l'emplacement absolu de ce fichier.
17
  # config.py se trouve dans backend/app/ ; 3 parents remontent à la racine.
 
19
  _REPO_ROOT = Path(__file__).resolve().parent.parent.parent
20
 
21
 
22
+ class Settings(BaseSettings):
23
  """Paramètres d'application lus depuis les variables d'environnement.
24
 
25
  Toutes les clés API sont optionnelles (None si non configurées).
26
  Elles ne sont jamais loguées ni exportées (R06).
27
  """
28
 
29
+ model_config = ConfigDict(
30
+ env_file=".env",
31
+ extra="ignore",
32
+ )
33
 
34
  # ── Serveur ──────────────────────────────────────────────────────────────
35
  base_url: str = "http://localhost:8000"
 
53
  mistral_api_key: str | None = None
54
 
55
 
56
+ settings: Settings = Settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/models/corpus.py CHANGED
@@ -6,6 +6,7 @@ Ils NE se substituent PAS aux schémas Pydantic (source canonique des types).
6
  """
7
  # 1. stdlib
8
  from datetime import datetime, timezone
 
9
 
10
  # 2. third-party
11
  from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
@@ -24,8 +25,12 @@ class CorpusModel(Base):
24
  slug: Mapped[str] = mapped_column(String, unique=True, nullable=False, index=True)
25
  title: Mapped[str] = mapped_column(String, nullable=False)
26
  profile_id: Mapped[str] = mapped_column(String, nullable=False)
27
- created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
28
- updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
 
 
 
 
29
 
30
  manuscripts: Mapped[list["ManuscriptModel"]] = relationship(
31
  back_populates="corpus", cascade="all, delete-orphan"
 
6
  """
7
  # 1. stdlib
8
  from datetime import datetime, timezone
9
+ from functools import partial
10
 
11
  # 2. third-party
12
  from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
 
25
  slug: Mapped[str] = mapped_column(String, unique=True, nullable=False, index=True)
26
  title: Mapped[str] = mapped_column(String, nullable=False)
27
  profile_id: Mapped[str] = mapped_column(String, nullable=False)
28
+ created_at: Mapped[datetime] = mapped_column(
29
+ DateTime, nullable=False, default=partial(datetime.now, tz=timezone.utc)
30
+ )
31
+ updated_at: Mapped[datetime] = mapped_column(
32
+ DateTime, nullable=False, default=partial(datetime.now, tz=timezone.utc)
33
+ )
34
 
35
  manuscripts: Mapped[list["ManuscriptModel"]] = relationship(
36
  back_populates="corpus", cascade="all, delete-orphan"
backend/app/models/job.py CHANGED
@@ -10,7 +10,8 @@ Cycle de vie :
10
  ↘ failed
11
  """
12
  # 1. stdlib
13
- from datetime import datetime
 
14
 
15
  # 2. third-party
16
  from sqlalchemy import DateTime, ForeignKey, String, Text
@@ -37,4 +38,6 @@ class JobModel(Base):
37
  started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
38
  finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
39
  error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
40
- created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
 
 
 
10
  ↘ failed
11
  """
12
  # 1. stdlib
13
+ from datetime import datetime, timezone
14
+ from functools import partial
15
 
16
  # 2. third-party
17
  from sqlalchemy import DateTime, ForeignKey, String, Text
 
38
  started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
39
  finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
40
  error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
41
+ created_at: Mapped[datetime] = mapped_column(
42
+ DateTime, nullable=False, default=partial(datetime.now, tz=timezone.utc)
43
+ )
backend/app/schemas/page_master.py CHANGED
@@ -29,14 +29,25 @@ class Region(BaseModel):
29
 
30
  @field_validator("bbox")
31
  @classmethod
32
- def bbox_must_be_positive(cls, v: list[int]) -> list[int]:
33
  if any(x < 0 for x in v):
34
- raise ValueError("bbox values must be >= 0")
35
  if v[2] <= 0 or v[3] <= 0:
36
- raise ValueError("bbox width and height must be > 0")
37
  return v
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
40
  class OCRResult(BaseModel):
41
  diplomatic_text: str = ""
42
  blocks: list[dict] = []
@@ -51,6 +62,13 @@ class Translation(BaseModel):
51
  en: str = ""
52
 
53
 
 
 
 
 
 
 
 
54
  class CommentaryClaim(BaseModel):
55
  claim: str
56
  evidence_region_ids: list[str] = []
@@ -64,6 +82,7 @@ class Commentary(BaseModel):
64
 
65
 
66
  class ProcessingInfo(BaseModel):
 
67
  model_id: str
68
  model_display_name: str
69
  prompt_version: str
@@ -96,11 +115,11 @@ class PageMaster(BaseModel):
96
  folio_label: str
97
  sequence: int
98
 
99
- image: dict
100
  layout: dict
101
  ocr: OCRResult | None = None
102
  translation: Translation | None = None
103
- summary: dict | None = None
104
  commentary: Commentary | None = None
105
  extensions: dict[str, Any] = {}
106
 
 
29
 
30
  @field_validator("bbox")
31
  @classmethod
32
+ def bbox_must_be_valid(cls, v: list[int]) -> list[int]:
33
  if any(x < 0 for x in v):
34
+ raise ValueError("bbox: toutes les valeurs doivent être >= 0")
35
  if v[2] <= 0 or v[3] <= 0:
36
+ raise ValueError("bbox: width et height doivent être > 0")
37
  return v
38
 
39
 
40
+ class ImageInfo(BaseModel):
41
+ """Métadonnées image — CLAUDE.md §4.2."""
42
+
43
+ master: str
44
+ derivative_web: str | None = None
45
+ thumbnail: str | None = None
46
+ iiif_base: str | None = None
47
+ width: int
48
+ height: int
49
+
50
+
51
  class OCRResult(BaseModel):
52
  diplomatic_text: str = ""
53
  blocks: list[dict] = []
 
62
  en: str = ""
63
 
64
 
65
+ class Summary(BaseModel):
66
+ """Résumé — CLAUDE.md §4.2."""
67
+
68
+ short: str = ""
69
+ detailed: str = ""
70
+
71
+
72
  class CommentaryClaim(BaseModel):
73
  claim: str
74
  evidence_region_ids: list[str] = []
 
82
 
83
 
84
  class ProcessingInfo(BaseModel):
85
+ provider: str
86
  model_id: str
87
  model_display_name: str
88
  prompt_version: str
 
115
  folio_label: str
116
  sequence: int
117
 
118
+ image: ImageInfo
119
  layout: dict
120
  ocr: OCRResult | None = None
121
  translation: Translation | None = None
122
+ summary: Summary | None = None
123
  commentary: Commentary | None = None
124
  extensions: dict[str, Any] = {}
125
 
backend/app/services/ai/__init__.py CHANGED
@@ -1,19 +1,31 @@
1
  """
2
  Services AI — providers Google AI, registre de modèles, et analyse IA.
 
 
 
 
3
  """
4
- from app.services.ai.analyzer import run_primary_analysis
5
- from app.services.ai.client_factory import build_client
6
- from app.services.ai.model_registry import build_model_config, list_all_models
7
- from app.services.ai.prompt_loader import load_and_render_prompt
8
- from app.services.ai.provider_google_ai import GoogleAIProvider
9
- from app.services.ai.provider_vertex_key import VertexAPIKeyProvider
10
- from app.services.ai.provider_vertex_sa import VertexServiceAccountProvider
11
- from app.services.ai.response_parser import ParseError, parse_ai_response
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  __all__ = [
14
- "GoogleAIProvider",
15
- "VertexAPIKeyProvider",
16
- "VertexServiceAccountProvider",
17
  "list_all_models",
18
  "build_model_config",
19
  "build_client",
 
1
  """
2
  Services AI — providers Google AI, registre de modèles, et analyse IA.
3
+
4
+ Les imports de providers sont différés (lazy) pour éviter de charger les SDK
5
+ tiers (google-genai, mistralai) au démarrage. Cela permet à l'application
6
+ de fonctionner même si un SDK n'est pas installé.
7
  """
8
+
9
+
10
+ def __getattr__(name: str):
11
+ """Import paresseux les symboles sont résolus au premier accès."""
12
+ _lazy_map = {
13
+ "run_primary_analysis": "app.services.ai.analyzer",
14
+ "build_client": "app.services.ai.client_factory",
15
+ "build_model_config": "app.services.ai.model_registry",
16
+ "list_all_models": "app.services.ai.model_registry",
17
+ "load_and_render_prompt": "app.services.ai.prompt_loader",
18
+ "parse_ai_response": "app.services.ai.response_parser",
19
+ "ParseError": "app.services.ai.response_parser",
20
+ }
21
+ if name in _lazy_map:
22
+ import importlib
23
+ module = importlib.import_module(_lazy_map[name])
24
+ return getattr(module, name)
25
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
26
+
27
 
28
  __all__ = [
 
 
 
29
  "list_all_models",
30
  "build_model_config",
31
  "build_client",
backend/app/services/ai/analyzer.py CHANGED
@@ -13,7 +13,7 @@ from pathlib import Path
13
  from app.schemas.corpus_profile import CorpusProfile
14
  from app.schemas.image import ImageDerivativeInfo
15
  from app.schemas.model_config import ModelConfig
16
- from app.schemas.page_master import EditorialInfo, EditorialStatus, PageMaster, ProcessingInfo
17
  from app.services.ai.master_writer import write_gemini_raw, write_master_json
18
  from app.services.ai.model_registry import get_provider
19
  from app.services.ai.prompt_loader import load_and_render_prompt
@@ -118,16 +118,17 @@ def run_primary_analysis(
118
  manuscript_id=manuscript_id,
119
  folio_label=folio_label,
120
  sequence=sequence,
121
- image={
122
- "original_url": image_info.original_url,
123
- "derivative_web": image_info.derivative_path,
124
- "thumbnail": image_info.thumbnail_path,
125
- "width": image_info.derivative_width,
126
- "height": image_info.derivative_height,
127
- },
128
  layout=layout,
129
  ocr=ocr,
130
  processing=ProcessingInfo(
 
131
  model_id=model_config.selected_model_id,
132
  model_display_name=model_config.selected_model_display_name,
133
  prompt_version=prompt_rel_path,
 
13
  from app.schemas.corpus_profile import CorpusProfile
14
  from app.schemas.image import ImageDerivativeInfo
15
  from app.schemas.model_config import ModelConfig
16
+ from app.schemas.page_master import EditorialInfo, EditorialStatus, ImageInfo, PageMaster, ProcessingInfo
17
  from app.services.ai.master_writer import write_gemini_raw, write_master_json
18
  from app.services.ai.model_registry import get_provider
19
  from app.services.ai.prompt_loader import load_and_render_prompt
 
118
  manuscript_id=manuscript_id,
119
  folio_label=folio_label,
120
  sequence=sequence,
121
+ image=ImageInfo(
122
+ master=image_info.original_url,
123
+ derivative_web=image_info.derivative_path,
124
+ thumbnail=image_info.thumbnail_path,
125
+ width=image_info.derivative_width,
126
+ height=image_info.derivative_height,
127
+ ),
128
  layout=layout,
129
  ocr=ocr,
130
  processing=ProcessingInfo(
131
+ provider=model_config.provider.value if hasattr(model_config.provider, "value") else str(model_config.provider),
132
  model_id=model_config.selected_model_id,
133
  model_display_name=model_config.selected_model_display_name,
134
  prompt_version=prompt_rel_path,
backend/app/services/ai/model_registry.py CHANGED
@@ -1,5 +1,8 @@
1
  """
2
  Registre agrégé des modèles disponibles tous providers confondus.
 
 
 
3
  """
4
  # 1. stdlib
5
  import logging
@@ -8,10 +11,6 @@ from datetime import datetime, timezone
8
  # 2. local
9
  from app.schemas.model_config import ModelConfig, ModelInfo, ProviderType
10
  from app.services.ai.base import AIProvider
11
- from app.services.ai.provider_google_ai import GoogleAIProvider
12
- from app.services.ai.provider_mistral import MistralProvider
13
- from app.services.ai.provider_vertex_key import VertexAPIKeyProvider
14
- from app.services.ai.provider_vertex_sa import VertexServiceAccountProvider
15
 
16
  logger = logging.getLogger(__name__)
17
 
@@ -25,6 +24,12 @@ _PROVIDER_DISPLAY_NAMES: dict[ProviderType, str] = {
25
 
26
 
27
  def _build_providers() -> list[AIProvider]:
 
 
 
 
 
 
28
  return [
29
  GoogleAIProvider(),
30
  VertexAPIKeyProvider(),
 
1
  """
2
  Registre agrégé des modèles disponibles tous providers confondus.
3
+
4
+ Les imports de providers sont différés dans _build_providers() pour éviter
5
+ de charger les SDK tiers (google-genai, mistralai) au niveau module.
6
  """
7
  # 1. stdlib
8
  import logging
 
11
  # 2. local
12
  from app.schemas.model_config import ModelConfig, ModelInfo, ProviderType
13
  from app.services.ai.base import AIProvider
 
 
 
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
 
24
 
25
 
26
  def _build_providers() -> list[AIProvider]:
27
+ """Construit la liste des providers — imports différés."""
28
+ from app.services.ai.provider_google_ai import GoogleAIProvider
29
+ from app.services.ai.provider_mistral import MistralProvider
30
+ from app.services.ai.provider_vertex_key import VertexAPIKeyProvider
31
+ from app.services.ai.provider_vertex_sa import VertexServiceAccountProvider
32
+
33
  return [
34
  GoogleAIProvider(),
35
  VertexAPIKeyProvider(),
backend/app/services/ai/provider_vertex_key.py CHANGED
@@ -22,9 +22,6 @@ retourne toujours False afin d'éviter des appels réseau voués à l'échec.
22
  import logging
23
  import os
24
 
25
- # 2. third-party
26
- from google.genai import types # noqa: F401 (conservé pour import cohérence)
27
-
28
  # 3. local
29
  from app.schemas.model_config import ModelInfo, ProviderType
30
  from app.services.ai.base import AIProvider
 
22
  import logging
23
  import os
24
 
 
 
 
25
  # 3. local
26
  from app.schemas.model_config import ModelInfo, ProviderType
27
  from app.services.ai.base import AIProvider
backend/app/services/corpus_runner.py CHANGED
@@ -17,7 +17,6 @@ from sqlalchemy import select
17
  # 3. local
18
  from app.models.database import async_session_factory
19
  from app.models.job import JobModel
20
- from app.services.job_runner import execute_page_job
21
 
22
  logger = logging.getLogger(__name__)
23
 
@@ -54,6 +53,8 @@ async def execute_corpus_job(corpus_id: str) -> dict:
54
  )
55
 
56
  # Exécution séquentielle — chaque job gère sa propre session
 
 
57
  for job_id in job_ids:
58
  await execute_page_job(job_id)
59
 
 
17
  # 3. local
18
  from app.models.database import async_session_factory
19
  from app.models.job import JobModel
 
20
 
21
  logger = logging.getLogger(__name__)
22
 
 
53
  )
54
 
55
  # Exécution séquentielle — chaque job gère sa propre session
56
+ from app.services.job_runner import execute_page_job
57
+
58
  for job_id in job_ids:
59
  await execute_page_job(job_id)
60
 
backend/app/services/export/alto.py CHANGED
@@ -160,11 +160,7 @@ def generate_alto(master: PageMaster) -> str:
160
  etree.SubElement(desc, _a("MeasurementUnit")).text = "pixel"
161
 
162
  src_info = etree.SubElement(desc, _a("sourceImageInformation"))
163
- file_name = (
164
- master.image.get("original_url")
165
- or master.image.get("derivative_web")
166
- or master.page_id
167
- )
168
  etree.SubElement(src_info, _a("fileName")).text = str(file_name)
169
 
170
  if master.processing:
@@ -185,8 +181,8 @@ def generate_alto(master: PageMaster) -> str:
185
  # ── Layout ─────────────────────────────────────────────────────────────
186
  layout_el = etree.SubElement(root, _a("Layout"))
187
 
188
- width = int(master.image.get("width", 0))
189
- height = int(master.image.get("height", 0))
190
 
191
  page_id_safe = master.page_id.replace(" ", "_")
192
  page_el = etree.SubElement(
 
160
  etree.SubElement(desc, _a("MeasurementUnit")).text = "pixel"
161
 
162
  src_info = etree.SubElement(desc, _a("sourceImageInformation"))
163
+ file_name = master.image.master or master.image.derivative_web or master.page_id
 
 
 
 
164
  etree.SubElement(src_info, _a("fileName")).text = str(file_name)
165
 
166
  if master.processing:
 
181
  # ── Layout ─────────────────────────────────────────────────────────────
182
  layout_el = etree.SubElement(root, _a("Layout"))
183
 
184
+ width = master.image.width
185
+ height = master.image.height
186
 
187
  page_id_safe = master.page_id.replace(" ", "_")
188
  page_el = etree.SubElement(
backend/app/services/export/iiif.py CHANGED
@@ -102,12 +102,12 @@ def generate_manifest(
102
  canvas_id = (
103
  f"{base_url}/api/v1/manuscripts/{manuscript_id}/canvas/{page.page_id}"
104
  )
105
- width = int(page.image.get("width", 0))
106
- height = int(page.image.get("height", 0))
107
 
108
  annotation_page_id = f"{canvas_id}/annotation-page/1"
109
  annotation_id = f"{canvas_id}/annotation/painting"
110
- image_url = page.image.get("original_url", "")
111
 
112
  canvas: dict = {
113
  "id": canvas_id,
 
102
  canvas_id = (
103
  f"{base_url}/api/v1/manuscripts/{manuscript_id}/canvas/{page.page_id}"
104
  )
105
+ width = page.image.width
106
+ height = page.image.height
107
 
108
  annotation_page_id = f"{canvas_id}/annotation-page/1"
109
  annotation_id = f"{canvas_id}/annotation/painting"
110
+ image_url = page.image.master or ""
111
 
112
  canvas: dict = {
113
  "id": canvas_id,
backend/app/services/export/mets.py CHANGED
@@ -182,7 +182,7 @@ def generate_mets(
182
  f_master = _el(grp_master, f"{_M}file", {"ID": f"IMG_MASTER_{sid}", "MIMETYPE": "image/jpeg"})
183
  _el(f_master, f"{_M}FLocat", {
184
  "LOCTYPE": "URL",
185
- f"{_XL}href": page.image.get("original_url", ""),
186
  f"{_XL}type": "simple",
187
  })
188
 
@@ -191,7 +191,7 @@ def generate_mets(
191
  _el(f_deriv, f"{_M}FLocat", {
192
  "LOCTYPE": "OTHER",
193
  "OTHERLOCTYPE": "filepath",
194
- f"{_XL}href": page.image.get("derivative_web", ""),
195
  f"{_XL}type": "simple",
196
  })
197
 
 
182
  f_master = _el(grp_master, f"{_M}file", {"ID": f"IMG_MASTER_{sid}", "MIMETYPE": "image/jpeg"})
183
  _el(f_master, f"{_M}FLocat", {
184
  "LOCTYPE": "URL",
185
+ f"{_XL}href": page.image.master or "",
186
  f"{_XL}type": "simple",
187
  })
188
 
 
191
  _el(f_deriv, f"{_M}FLocat", {
192
  "LOCTYPE": "OTHER",
193
  "OTHERLOCTYPE": "filepath",
194
+ f"{_XL}href": page.image.derivative_web or "",
195
  f"{_XL}type": "simple",
196
  })
197
 
backend/app/services/job_runner.py CHANGED
@@ -33,8 +33,6 @@ from app.models.job import JobModel
33
  from app.models.model_config_db import ModelConfigDB
34
  from app.schemas.corpus_profile import CorpusProfile
35
  from app.schemas.model_config import ModelConfig, ProviderType
36
- from app.services.ai.analyzer import run_primary_analysis
37
- from app.services.export.alto import generate_alto, write_alto
38
  from app.services.image.normalizer import create_derivatives, fetch_and_normalize
39
 
40
  logger = logging.getLogger(__name__)
@@ -148,6 +146,8 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
148
  )
149
 
150
  # ── 6. Analyse primaire IA (R05 : double stockage) ───────────────────
 
 
151
  page_master = run_primary_analysis(
152
  derivative_image_path=Path(image_info.derivative_path),
153
  corpus_profile=corpus_profile,
@@ -163,6 +163,8 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
163
  )
164
 
165
  # ── 7. Générer et écrire l'ALTO XML ──────────────────────────────────
 
 
166
  alto_xml = generate_alto(page_master)
167
  alto_path = (
168
  data_dir
 
33
  from app.models.model_config_db import ModelConfigDB
34
  from app.schemas.corpus_profile import CorpusProfile
35
  from app.schemas.model_config import ModelConfig, ProviderType
 
 
36
  from app.services.image.normalizer import create_derivatives, fetch_and_normalize
37
 
38
  logger = logging.getLogger(__name__)
 
146
  )
147
 
148
  # ── 6. Analyse primaire IA (R05 : double stockage) ───────────────────
149
+ from app.services.ai.analyzer import run_primary_analysis
150
+
151
  page_master = run_primary_analysis(
152
  derivative_image_path=Path(image_info.derivative_path),
153
  corpus_profile=corpus_profile,
 
163
  )
164
 
165
  # ── 7. Générer et écrire l'ALTO XML ──────────────────────────────────
166
+ from app.services.export.alto import generate_alto, write_alto
167
+
168
  alto_xml = generate_alto(page_master)
169
  alto_path = (
170
  data_dir
backend/pyproject.toml CHANGED
@@ -11,6 +11,7 @@ dependencies = [
11
  "fastapi>=0.111",
12
  "uvicorn[standard]>=0.29",
13
  "pydantic>=2.7",
 
14
  "sqlalchemy>=2.0",
15
  "aiosqlite>=0.20",
16
  "google-genai>=1.0",
 
11
  "fastapi>=0.111",
12
  "uvicorn[standard]>=0.29",
13
  "pydantic>=2.7",
14
+ "pydantic-settings>=2.0",
15
  "sqlalchemy>=2.0",
16
  "aiosqlite>=0.20",
17
  "google-genai>=1.0",
backend/tests/conftest_api.py CHANGED
@@ -51,10 +51,11 @@ async def async_client(db_session: AsyncSession):
51
 
52
  app.dependency_overrides[get_db] = _override_get_db
53
  # Les background tasks (execute_corpus_job, execute_page_job) créent leur
54
- # propre session via async_session_factory. On les neutralise pour éviter
55
- # qu'elles tentent de se connecter à la BDD réelle pendant les tests d'API.
56
- with patch("app.api.v1.jobs.execute_corpus_job", AsyncMock(return_value=None)), \
57
- patch("app.api.v1.jobs.execute_page_job", AsyncMock(return_value=None)):
 
58
  async with AsyncClient(
59
  transport=ASGITransport(app=app), base_url="http://test"
60
  ) as client:
 
51
 
52
  app.dependency_overrides[get_db] = _override_get_db
53
  # Les background tasks (execute_corpus_job, execute_page_job) créent leur
54
+ # propre session via async_session_factory. On les neutralise en mockant
55
+ # les modules sources pour éviter qu'elles tentent de se connecter à la
56
+ # BDD réelle pendant les tests d'API.
57
+ with patch("app.services.corpus_runner.execute_corpus_job", AsyncMock(return_value={"total": 0, "done": 0, "failed": 0})), \
58
+ patch("app.services.job_runner.execute_page_job", AsyncMock(return_value=None)):
59
  async with AsyncClient(
60
  transport=ASGITransport(app=app), base_url="http://test"
61
  ) as client:
backend/tests/test_ai_analyzer.py CHANGED
@@ -432,7 +432,7 @@ def _make_page_master() -> PageMaster:
432
  folio_label="0001r",
433
  sequence=1,
434
  image={
435
- "original_url": "https://example.com/img.jpg",
436
  "derivative_web": "/data/deriv.jpg",
437
  "thumbnail": "/data/thumb.jpg",
438
  "width": 1500,
@@ -440,6 +440,7 @@ def _make_page_master() -> PageMaster:
440
  },
441
  layout={"regions": []},
442
  processing={
 
443
  "model_id": "gemini-2.0-flash",
444
  "model_display_name": "Gemini 2.0 Flash",
445
  "prompt_version": "prompts/medieval-illuminated/primary_v1.txt",
@@ -663,9 +664,9 @@ def test_run_primary_analysis_image_dict(tmp_path):
663
  project_root=tmp_path,
664
  )
665
 
666
- assert result.image["original_url"] == image_info.original_url
667
- assert result.image["width"] == image_info.derivative_width
668
- assert result.image["height"] == image_info.derivative_height
669
 
670
 
671
  def test_run_primary_analysis_regions_in_layout(tmp_path):
 
432
  folio_label="0001r",
433
  sequence=1,
434
  image={
435
+ "master": "https://example.com/img.jpg",
436
  "derivative_web": "/data/deriv.jpg",
437
  "thumbnail": "/data/thumb.jpg",
438
  "width": 1500,
 
440
  },
441
  layout={"regions": []},
442
  processing={
443
+ "provider": "google_ai_studio",
444
  "model_id": "gemini-2.0-flash",
445
  "model_display_name": "Gemini 2.0 Flash",
446
  "prompt_version": "prompts/medieval-illuminated/primary_v1.txt",
 
664
  project_root=tmp_path,
665
  )
666
 
667
+ assert result.image.master == image_info.original_url
668
+ assert result.image.width == image_info.derivative_width
669
+ assert result.image.height == image_info.derivative_height
670
 
671
 
672
  def test_run_primary_analysis_regions_in_layout(tmp_path):
backend/tests/test_api_corrections.py CHANGED
@@ -75,7 +75,7 @@ def _make_master(
75
  "manuscript_id": "ms-test",
76
  "folio_label": "f001r",
77
  "sequence": 1,
78
- "image": {"original_url": "https://example.com/f.jpg", "width": 1500, "height": 2000},
79
  "layout": {"regions": []},
80
  "ocr": {
81
  "diplomatic_text": "Incipit liber primus",
 
75
  "manuscript_id": "ms-test",
76
  "folio_label": "f001r",
77
  "sequence": 1,
78
+ "image": {"master": "https://example.com/f.jpg", "width": 1500, "height": 2000},
79
  "layout": {"regions": []},
80
  "ocr": {
81
  "diplomatic_text": "Incipit liber primus",
backend/tests/test_api_export.py CHANGED
@@ -83,7 +83,7 @@ def _make_master_json(page_id: str, folio_label: str, sequence: int) -> str:
83
  "folio_label": folio_label,
84
  "sequence": sequence,
85
  "image": {
86
- "original_url": f"https://example.com/{page_id}.jpg",
87
  "derivative_web": f"/data/deriv/{page_id}.jpg",
88
  "thumbnail": f"/data/thumb/{page_id}.jpg",
89
  "width": 1500,
 
83
  "folio_label": folio_label,
84
  "sequence": sequence,
85
  "image": {
86
+ "master": f"https://example.com/{page_id}.jpg",
87
  "derivative_web": f"/data/deriv/{page_id}.jpg",
88
  "thumbnail": f"/data/thumb/{page_id}.jpg",
89
  "width": 1500,
backend/tests/test_api_models.py CHANGED
@@ -94,7 +94,7 @@ async def test_get_models_endpoint_removed(async_client):
94
  @pytest.mark.asyncio
95
  async def test_refresh_models_ok(async_client, monkeypatch):
96
  monkeypatch.setattr(
97
- models_api_module, "list_all_models", lambda: _MOCK_MODELS
98
  )
99
  response = await async_client.post("/api/v1/models/refresh")
100
  assert response.status_code == 200
@@ -103,7 +103,7 @@ async def test_refresh_models_ok(async_client, monkeypatch):
103
  @pytest.mark.asyncio
104
  async def test_refresh_models_has_timestamp(async_client, monkeypatch):
105
  monkeypatch.setattr(
106
- models_api_module, "list_all_models", lambda: _MOCK_MODELS
107
  )
108
  data = (await async_client.post("/api/v1/models/refresh")).json()
109
  assert "refreshed_at" in data
@@ -113,7 +113,7 @@ async def test_refresh_models_has_timestamp(async_client, monkeypatch):
113
  @pytest.mark.asyncio
114
  async def test_refresh_models_count(async_client, monkeypatch):
115
  monkeypatch.setattr(
116
- models_api_module, "list_all_models", lambda: _MOCK_MODELS
117
  )
118
  data = (await async_client.post("/api/v1/models/refresh")).json()
119
  assert data["count"] == 2
@@ -123,7 +123,7 @@ async def test_refresh_models_count(async_client, monkeypatch):
123
  @pytest.mark.asyncio
124
  async def test_refresh_models_structure(async_client, monkeypatch):
125
  monkeypatch.setattr(
126
- models_api_module, "list_all_models", lambda: _MOCK_MODELS
127
  )
128
  data = (await async_client.post("/api/v1/models/refresh")).json()
129
  assert "models" in data
 
94
  @pytest.mark.asyncio
95
  async def test_refresh_models_ok(async_client, monkeypatch):
96
  monkeypatch.setattr(
97
+ "app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
98
  )
99
  response = await async_client.post("/api/v1/models/refresh")
100
  assert response.status_code == 200
 
103
  @pytest.mark.asyncio
104
  async def test_refresh_models_has_timestamp(async_client, monkeypatch):
105
  monkeypatch.setattr(
106
+ "app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
107
  )
108
  data = (await async_client.post("/api/v1/models/refresh")).json()
109
  assert "refreshed_at" in data
 
113
  @pytest.mark.asyncio
114
  async def test_refresh_models_count(async_client, monkeypatch):
115
  monkeypatch.setattr(
116
+ "app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
117
  )
118
  data = (await async_client.post("/api/v1/models/refresh")).json()
119
  assert data["count"] == 2
 
123
  @pytest.mark.asyncio
124
  async def test_refresh_models_structure(async_client, monkeypatch):
125
  monkeypatch.setattr(
126
+ "app.services.ai.model_registry.list_all_models", lambda: _MOCK_MODELS
127
  )
128
  data = (await async_client.post("/api/v1/models/refresh")).json()
129
  assert "models" in data
backend/tests/test_api_pages.py CHANGED
@@ -87,7 +87,7 @@ def _make_master_json(page_id: str, corpus_profile: str = "medieval-illuminated"
87
  "folio_label": "f001r",
88
  "sequence": 1,
89
  "image": {
90
- "original_url": "https://example.com/f001r.jpg",
91
  "derivative_web": "/data/deriv/f001r.jpg",
92
  "thumbnail": "/data/thumb/f001r.jpg",
93
  "width": 1500,
 
87
  "folio_label": "f001r",
88
  "sequence": 1,
89
  "image": {
90
+ "master": "https://example.com/f001r.jpg",
91
  "derivative_web": "/data/deriv/f001r.jpg",
92
  "thumbnail": "/data/thumb/f001r.jpg",
93
  "width": 1500,
backend/tests/test_api_providers.py CHANGED
@@ -90,7 +90,7 @@ _MOCK_MISTRAL_MODELS = [
90
 
91
  @pytest.mark.asyncio
92
  async def test_list_providers_returns_list(async_client, monkeypatch):
93
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
94
  resp = await async_client.get("/api/v1/providers")
95
  assert resp.status_code == 200
96
  assert isinstance(resp.json(), list)
@@ -98,14 +98,14 @@ async def test_list_providers_returns_list(async_client, monkeypatch):
98
 
99
  @pytest.mark.asyncio
100
  async def test_list_providers_count(async_client, monkeypatch):
101
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
102
  data = (await async_client.get("/api/v1/providers")).json()
103
  assert len(data) == 4 # 4 providers connus
104
 
105
 
106
  @pytest.mark.asyncio
107
  async def test_list_providers_fields(async_client, monkeypatch):
108
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
109
  data = (await async_client.get("/api/v1/providers")).json()
110
  p = data[0]
111
  assert "provider_type" in p
@@ -116,7 +116,7 @@ async def test_list_providers_fields(async_client, monkeypatch):
116
 
117
  @pytest.mark.asyncio
118
  async def test_list_providers_all_unavailable(async_client, monkeypatch):
119
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
120
  data = (await async_client.get("/api/v1/providers")).json()
121
  assert all(not p["available"] for p in data)
122
  assert all(p["model_count"] == 0 for p in data)
@@ -124,7 +124,7 @@ async def test_list_providers_all_unavailable(async_client, monkeypatch):
124
 
125
  @pytest.mark.asyncio
126
  async def test_list_providers_google_available(async_client, monkeypatch):
127
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_GOOGLE_ONLY)
128
  data = (await async_client.get("/api/v1/providers")).json()
129
  google = next(p for p in data if p["provider_type"] == "google_ai_studio")
130
  assert google["available"] is True
@@ -133,7 +133,7 @@ async def test_list_providers_google_available(async_client, monkeypatch):
133
 
134
  @pytest.mark.asyncio
135
  async def test_list_providers_mistral_available(async_client, monkeypatch):
136
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_GOOGLE_AND_MISTRAL)
137
  data = (await async_client.get("/api/v1/providers")).json()
138
  mistral = next(p for p in data if p["provider_type"] == "mistral")
139
  assert mistral["available"] is True
@@ -143,7 +143,7 @@ async def test_list_providers_mistral_available(async_client, monkeypatch):
143
  @pytest.mark.asyncio
144
  async def test_list_providers_includes_mistral_type(async_client, monkeypatch):
145
  """Mistral est toujours dans la liste même si indisponible."""
146
- monkeypatch.setattr(models_api_module, "get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
147
  data = (await async_client.get("/api/v1/providers")).json()
148
  types_ = [p["provider_type"] for p in data]
149
  assert "mistral" in types_
@@ -156,7 +156,7 @@ async def test_list_providers_includes_mistral_type(async_client, monkeypatch):
156
  @pytest.mark.asyncio
157
  async def test_get_provider_models_google(async_client, monkeypatch):
158
  monkeypatch.setattr(
159
- models_api_module, "list_models_for_provider", lambda ptype: _MOCK_GOOGLE_MODELS
160
  )
161
  resp = await async_client.get("/api/v1/providers/google_ai_studio/models")
162
  assert resp.status_code == 200
@@ -166,7 +166,7 @@ async def test_get_provider_models_google(async_client, monkeypatch):
166
  @pytest.mark.asyncio
167
  async def test_get_provider_models_mistral(async_client, monkeypatch):
168
  monkeypatch.setattr(
169
- models_api_module, "list_models_for_provider", lambda ptype: _MOCK_MISTRAL_MODELS
170
  )
171
  resp = await async_client.get("/api/v1/providers/mistral/models")
172
  assert resp.status_code == 200
@@ -189,7 +189,7 @@ async def test_get_provider_models_not_configured(async_client, monkeypatch):
189
  def _raise(ptype):
190
  raise RuntimeError("Variable d'environnement manquante : MISTRAL_API_KEY")
191
 
192
- monkeypatch.setattr(models_api_module, "list_models_for_provider", _raise)
193
  resp = await async_client.get("/api/v1/providers/mistral/models")
194
  assert resp.status_code == 503
195
 
@@ -197,7 +197,7 @@ async def test_get_provider_models_not_configured(async_client, monkeypatch):
197
  @pytest.mark.asyncio
198
  async def test_get_provider_models_fields(async_client, monkeypatch):
199
  monkeypatch.setattr(
200
- models_api_module, "list_models_for_provider", lambda ptype: _MOCK_MISTRAL_MODELS
201
  )
202
  data = (await async_client.get("/api/v1/providers/mistral/models")).json()
203
  m = data[0]
 
90
 
91
  @pytest.mark.asyncio
92
  async def test_list_providers_returns_list(async_client, monkeypatch):
93
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
94
  resp = await async_client.get("/api/v1/providers")
95
  assert resp.status_code == 200
96
  assert isinstance(resp.json(), list)
 
98
 
99
  @pytest.mark.asyncio
100
  async def test_list_providers_count(async_client, monkeypatch):
101
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
102
  data = (await async_client.get("/api/v1/providers")).json()
103
  assert len(data) == 4 # 4 providers connus
104
 
105
 
106
  @pytest.mark.asyncio
107
  async def test_list_providers_fields(async_client, monkeypatch):
108
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
109
  data = (await async_client.get("/api/v1/providers")).json()
110
  p = data[0]
111
  assert "provider_type" in p
 
116
 
117
  @pytest.mark.asyncio
118
  async def test_list_providers_all_unavailable(async_client, monkeypatch):
119
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
120
  data = (await async_client.get("/api/v1/providers")).json()
121
  assert all(not p["available"] for p in data)
122
  assert all(p["model_count"] == 0 for p in data)
 
124
 
125
  @pytest.mark.asyncio
126
  async def test_list_providers_google_available(async_client, monkeypatch):
127
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_GOOGLE_ONLY)
128
  data = (await async_client.get("/api/v1/providers")).json()
129
  google = next(p for p in data if p["provider_type"] == "google_ai_studio")
130
  assert google["available"] is True
 
133
 
134
  @pytest.mark.asyncio
135
  async def test_list_providers_mistral_available(async_client, monkeypatch):
136
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_GOOGLE_AND_MISTRAL)
137
  data = (await async_client.get("/api/v1/providers")).json()
138
  mistral = next(p for p in data if p["provider_type"] == "mistral")
139
  assert mistral["available"] is True
 
143
  @pytest.mark.asyncio
144
  async def test_list_providers_includes_mistral_type(async_client, monkeypatch):
145
  """Mistral est toujours dans la liste même si indisponible."""
146
+ monkeypatch.setattr("app.services.ai.model_registry.get_available_providers", lambda: _PROVIDERS_ALL_UNAVAILABLE)
147
  data = (await async_client.get("/api/v1/providers")).json()
148
  types_ = [p["provider_type"] for p in data]
149
  assert "mistral" in types_
 
156
  @pytest.mark.asyncio
157
  async def test_get_provider_models_google(async_client, monkeypatch):
158
  monkeypatch.setattr(
159
+ "app.services.ai.model_registry.list_models_for_provider", lambda ptype: _MOCK_GOOGLE_MODELS
160
  )
161
  resp = await async_client.get("/api/v1/providers/google_ai_studio/models")
162
  assert resp.status_code == 200
 
166
  @pytest.mark.asyncio
167
  async def test_get_provider_models_mistral(async_client, monkeypatch):
168
  monkeypatch.setattr(
169
+ "app.services.ai.model_registry.list_models_for_provider", lambda ptype: _MOCK_MISTRAL_MODELS
170
  )
171
  resp = await async_client.get("/api/v1/providers/mistral/models")
172
  assert resp.status_code == 200
 
189
  def _raise(ptype):
190
  raise RuntimeError("Variable d'environnement manquante : MISTRAL_API_KEY")
191
 
192
+ monkeypatch.setattr("app.services.ai.model_registry.list_models_for_provider", _raise)
193
  resp = await async_client.get("/api/v1/providers/mistral/models")
194
  assert resp.status_code == 503
195
 
 
197
  @pytest.mark.asyncio
198
  async def test_get_provider_models_fields(async_client, monkeypatch):
199
  monkeypatch.setattr(
200
+ "app.services.ai.model_registry.list_models_for_provider", lambda ptype: _MOCK_MISTRAL_MODELS
201
  )
202
  data = (await async_client.get("/api/v1/providers/mistral/models")).json()
203
  m = data[0]
backend/tests/test_api_search.py CHANGED
@@ -33,7 +33,7 @@ def _make_master(page_id: str, diplomatic_text: str = "", translation_fr: str =
33
  "manuscript_id": "ms-test",
34
  "folio_label": "f001r",
35
  "sequence": 1,
36
- "image": {"original_url": "https://example.com/f.jpg", "width": 1500, "height": 2000},
37
  "layout": {"regions": []},
38
  "ocr": {
39
  "diplomatic_text": diplomatic_text,
 
33
  "manuscript_id": "ms-test",
34
  "folio_label": "f001r",
35
  "sequence": 1,
36
+ "image": {"master": "https://example.com/f.jpg", "width": 1500, "height": 2000},
37
  "layout": {"regions": []},
38
  "ocr": {
39
  "diplomatic_text": diplomatic_text,
backend/tests/test_export_alto.py CHANGED
@@ -52,6 +52,7 @@ def _make_master(
52
  processing = None
53
  if with_processing:
54
  processing = ProcessingInfo(
 
55
  model_id="gemini-2.0-flash",
56
  model_display_name="Gemini 2.0 Flash",
57
  prompt_version="prompts/medieval-illuminated/primary_v1.txt",
@@ -65,7 +66,7 @@ def _make_master(
65
  folio_label="0001r",
66
  sequence=sequence,
67
  image={
68
- "original_url": "https://example.com/img.jpg",
69
  "derivative_web": "/data/deriv.jpg",
70
  "thumbnail": "/data/thumb.jpg",
71
  "width": width,
 
52
  processing = None
53
  if with_processing:
54
  processing = ProcessingInfo(
55
+ provider="google_ai_studio",
56
  model_id="gemini-2.0-flash",
57
  model_display_name="Gemini 2.0 Flash",
58
  prompt_version="prompts/medieval-illuminated/primary_v1.txt",
 
66
  folio_label="0001r",
67
  sequence=sequence,
68
  image={
69
+ "master": "https://example.com/img.jpg",
70
  "derivative_web": "/data/deriv.jpg",
71
  "thumbnail": "/data/thumb.jpg",
72
  "width": width,
backend/tests/test_export_iiif.py CHANGED
@@ -53,7 +53,7 @@ def _make_page(
53
  folio_label=folio_label,
54
  sequence=sequence,
55
  image={
56
- "original_url": original_url or f"https://example.com/{folio_label}.jpg",
57
  "derivative_web": f"/data/deriv/{folio_label}.jpg",
58
  "thumbnail": f"/data/thumb/{folio_label}.jpg",
59
  "width": width,
@@ -344,7 +344,7 @@ def test_canvas_width_matches_image(beatus_pages, beatus_meta):
344
  # Trouve la page correspondante
345
  page_id = canvas["id"].split("/canvas/")[-1]
346
  page = next(p for p in beatus_pages if p.page_id == page_id)
347
- assert canvas["width"] == page.image["width"]
348
 
349
 
350
  def test_canvas_height_matches_image(beatus_pages, beatus_meta):
@@ -352,7 +352,7 @@ def test_canvas_height_matches_image(beatus_pages, beatus_meta):
352
  for canvas in manifest["items"]:
353
  page_id = canvas["id"].split("/canvas/")[-1]
354
  page = next(p for p in beatus_pages if p.page_id == page_id)
355
- assert canvas["height"] == page.image["height"]
356
 
357
 
358
  def test_canvas_dimensions_beatus_hr():
@@ -447,7 +447,7 @@ def test_annotation_body_id_is_original_url(beatus_pages, beatus_meta):
447
  page_id = canvas["id"].split("/canvas/")[-1]
448
  page = next(p for p in beatus_pages if p.page_id == page_id)
449
  body = canvas["items"][0]["items"][0]["body"]
450
- assert body["id"] == page.image["original_url"]
451
 
452
 
453
  def test_annotation_body_contains_gallica_url(beatus_pages, beatus_meta):
 
53
  folio_label=folio_label,
54
  sequence=sequence,
55
  image={
56
+ "master": original_url or f"https://example.com/{folio_label}.jpg",
57
  "derivative_web": f"/data/deriv/{folio_label}.jpg",
58
  "thumbnail": f"/data/thumb/{folio_label}.jpg",
59
  "width": width,
 
344
  # Trouve la page correspondante
345
  page_id = canvas["id"].split("/canvas/")[-1]
346
  page = next(p for p in beatus_pages if p.page_id == page_id)
347
+ assert canvas["width"] == page.image.width
348
 
349
 
350
  def test_canvas_height_matches_image(beatus_pages, beatus_meta):
 
352
  for canvas in manifest["items"]:
353
  page_id = canvas["id"].split("/canvas/")[-1]
354
  page = next(p for p in beatus_pages if p.page_id == page_id)
355
+ assert canvas["height"] == page.image.height
356
 
357
 
358
  def test_canvas_dimensions_beatus_hr():
 
447
  page_id = canvas["id"].split("/canvas/")[-1]
448
  page = next(p for p in beatus_pages if p.page_id == page_id)
449
  body = canvas["items"][0]["items"][0]["body"]
450
+ assert body["id"] == page.image.master
451
 
452
 
453
  def test_annotation_body_contains_gallica_url(beatus_pages, beatus_meta):
backend/tests/test_export_mets.py CHANGED
@@ -66,6 +66,7 @@ def _make_page(
66
  processing = None
67
  if with_processing:
68
  processing = ProcessingInfo(
 
69
  model_id="gemini-2.0-flash",
70
  model_display_name="Gemini 2.0 Flash",
71
  prompt_version="prompts/medieval-illuminated/primary_v1.txt",
@@ -80,7 +81,7 @@ def _make_page(
80
  folio_label=folio_label,
81
  sequence=sequence,
82
  image={
83
- "original_url": original_url or f"https://example.com/{folio_label}.jpg",
84
  "derivative_web": derivative_web or f"/data/deriv/{folio_label}.jpg",
85
  "thumbnail": f"/data/thumb/{folio_label}.jpg",
86
  "width": 1500,
 
66
  processing = None
67
  if with_processing:
68
  processing = ProcessingInfo(
69
+ provider="google_ai_studio",
70
  model_id="gemini-2.0-flash",
71
  model_display_name="Gemini 2.0 Flash",
72
  prompt_version="prompts/medieval-illuminated/primary_v1.txt",
 
81
  folio_label=folio_label,
82
  sequence=sequence,
83
  image={
84
+ "master": original_url or f"https://example.com/{folio_label}.jpg",
85
  "derivative_web": derivative_web or f"/data/deriv/{folio_label}.jpg",
86
  "thumbnail": f"/data/thumb/{folio_label}.jpg",
87
  "width": 1500,
backend/tests/test_job_runner.py CHANGED
@@ -142,16 +142,24 @@ def _page_master(page_id: str, ms_id: str) -> PageMaster:
142
 
143
 
144
  def _apply_success_mocks(monkeypatch, page_id: str, ms_id: str) -> None:
145
- """Applique les mocks IO pour un pipeline réussi."""
 
 
 
 
146
  monkeypatch.setattr(
147
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
148
  )
149
  monkeypatch.setattr(
150
- job_runner_module, "run_primary_analysis",
151
  lambda **kw: _page_master(page_id, ms_id),
152
  )
153
- monkeypatch.setattr(job_runner_module, "generate_alto", lambda pm: "<alto/>")
154
- monkeypatch.setattr(job_runner_module, "write_alto", lambda xml, path: None)
 
 
 
 
155
 
156
 
157
  # ---------------------------------------------------------------------------
@@ -274,7 +282,7 @@ async def test_no_image_path_job_failed(db, setup_with_model, monkeypatch):
274
  s["page"].image_master_path = None
275
  await db.commit()
276
  monkeypatch.setattr(
277
- job_runner_module, "run_primary_analysis",
278
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
279
  )
280
 
@@ -291,7 +299,7 @@ async def test_no_image_path_page_error(db, setup_with_model, monkeypatch):
291
  s["page"].image_master_path = None
292
  await db.commit()
293
  monkeypatch.setattr(
294
- job_runner_module, "run_primary_analysis",
295
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
296
  )
297
 
@@ -343,7 +351,7 @@ async def test_primary_analysis_fails_job_failed(db, setup_with_model, monkeypat
343
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
344
  )
345
  monkeypatch.setattr(
346
- job_runner_module, "run_primary_analysis",
347
  lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
348
  )
349
 
@@ -361,7 +369,7 @@ async def test_primary_analysis_fails_page_error(db, setup_with_model, monkeypat
361
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
362
  )
363
  monkeypatch.setattr(
364
- job_runner_module, "run_primary_analysis",
365
  lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
366
  )
367
 
@@ -379,7 +387,7 @@ async def test_primary_analysis_error_message_stored(db, setup_with_model, monke
379
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
380
  )
381
  monkeypatch.setattr(
382
- job_runner_module, "run_primary_analysis",
383
  lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
384
  )
385
 
@@ -401,12 +409,14 @@ async def test_write_alto_fails_job_failed(db, setup_with_model, monkeypatch):
401
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
402
  )
403
  monkeypatch.setattr(
404
- job_runner_module, "run_primary_analysis",
405
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
406
  )
407
- monkeypatch.setattr(job_runner_module, "generate_alto", lambda pm: "<alto/>")
408
  monkeypatch.setattr(
409
- job_runner_module, "write_alto",
 
 
 
410
  lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
411
  )
412
 
@@ -424,12 +434,14 @@ async def test_write_alto_fails_page_error(db, setup_with_model, monkeypatch):
424
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
425
  )
426
  monkeypatch.setattr(
427
- job_runner_module, "run_primary_analysis",
428
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
429
  )
430
- monkeypatch.setattr(job_runner_module, "generate_alto", lambda pm: "<alto/>")
431
  monkeypatch.setattr(
432
- job_runner_module, "write_alto",
 
 
 
433
  lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
434
  )
435
 
@@ -534,7 +546,7 @@ async def test_corpus_runner_calls_execute_per_job(monkeypatch):
534
  return _FakeSession()
535
 
536
  monkeypatch.setattr(corpus_runner_module, "async_session_factory", _mock_factory)
537
- monkeypatch.setattr(corpus_runner_module, "execute_page_job", _mock_execute)
538
 
539
  await execute_corpus_job("corpus-xyz")
540
 
 
142
 
143
 
144
  def _apply_success_mocks(monkeypatch, page_id: str, ms_id: str) -> None:
145
+ """Applique les mocks IO pour un pipeline réussi.
146
+
147
+ Les imports sont différés dans job_runner (lazy imports). On patche donc
148
+ les modules sources pour que le import dans la fonction cible récupère le mock.
149
+ """
150
  monkeypatch.setattr(
151
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
152
  )
153
  monkeypatch.setattr(
154
+ "app.services.ai.analyzer.run_primary_analysis",
155
  lambda **kw: _page_master(page_id, ms_id),
156
  )
157
+ monkeypatch.setattr(
158
+ "app.services.export.alto.generate_alto", lambda pm: "<alto/>"
159
+ )
160
+ monkeypatch.setattr(
161
+ "app.services.export.alto.write_alto", lambda xml, path: None
162
+ )
163
 
164
 
165
  # ---------------------------------------------------------------------------
 
282
  s["page"].image_master_path = None
283
  await db.commit()
284
  monkeypatch.setattr(
285
+ "app.services.ai.analyzer.run_primary_analysis",
286
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
287
  )
288
 
 
299
  s["page"].image_master_path = None
300
  await db.commit()
301
  monkeypatch.setattr(
302
+ "app.services.ai.analyzer.run_primary_analysis",
303
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
304
  )
305
 
 
351
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
352
  )
353
  monkeypatch.setattr(
354
+ "app.services.ai.analyzer.run_primary_analysis",
355
  lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
356
  )
357
 
 
369
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
370
  )
371
  monkeypatch.setattr(
372
+ "app.services.ai.analyzer.run_primary_analysis",
373
  lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
374
  )
375
 
 
387
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
388
  )
389
  monkeypatch.setattr(
390
+ "app.services.ai.analyzer.run_primary_analysis",
391
  lambda **kw: (_ for _ in ()).throw(ValueError("ParseError: invalid JSON")),
392
  )
393
 
 
409
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
410
  )
411
  monkeypatch.setattr(
412
+ "app.services.ai.analyzer.run_primary_analysis",
413
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
414
  )
 
415
  monkeypatch.setattr(
416
+ "app.services.export.alto.generate_alto", lambda pm: "<alto/>"
417
+ )
418
+ monkeypatch.setattr(
419
+ "app.services.export.alto.write_alto",
420
  lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
421
  )
422
 
 
434
  job_runner_module, "fetch_and_normalize", lambda *a: _image_info()
435
  )
436
  monkeypatch.setattr(
437
+ "app.services.ai.analyzer.run_primary_analysis",
438
  lambda **kw: _page_master(s["page"].id, s["ms"].id),
439
  )
 
440
  monkeypatch.setattr(
441
+ "app.services.export.alto.generate_alto", lambda pm: "<alto/>"
442
+ )
443
+ monkeypatch.setattr(
444
+ "app.services.export.alto.write_alto",
445
  lambda xml, path: (_ for _ in ()).throw(OSError("disk full")),
446
  )
447
 
 
546
  return _FakeSession()
547
 
548
  monkeypatch.setattr(corpus_runner_module, "async_session_factory", _mock_factory)
549
+ monkeypatch.setattr("app.services.job_runner.execute_page_job", _mock_execute)
550
 
551
  await execute_corpus_job("corpus-xyz")
552