Spaces:

Ma-Ri-Ba-Ku
/

IIIF-Studio

Build error

Claude commited on about 13 hours ago

Commit

9097545

unverified ·

1 Parent(s): 4c0517f

fix: comprehensive repo audit — 15 issues fixed

Critical:
- Fix path mismatch between master.json write (folio_label) and read (page.id)
in pages.py, export.py — API returned 404 after pipeline analysis
- Fix path traversal using is_relative_to() instead of string comparison
in job_runner.py and main.py

Search & performance:
- Search now uses SQL LIKE on pre-normalized column instead of loading all
rows into Python (O(n) → SQL-filtered)
- Add normalized_text column + migration in main.py

Frontend:
- get() helper now extracts FastAPI error detail like post/put/del
- getCorpusModel() uses get() wrapper instead of raw fetch
- Extract duplicate STATUS_LABELS/VARIANTS into shared editorial.ts
- Add 'claimed' to JobStatus type

Backend cleanup:
- Remove dead iiif_base field from ImageInfo schema
- Remove dead ParseError import from analyzer.py
- Narrow catch-all Exception to specific types in response_parser.py
- Fix misleading return type hints in models_api.py endpoints
- Make AI concurrency configurable via AI_MAX_CONCURRENT env var
- Add limit/offset to history endpoint

CI/CD:
- Add frontend vitest to GitHub Actions workflow
- Replace hardcoded HuggingFace username with configurable vars

https://claude.ai/code/session_01NuG9pRMcEHDi4SsKtEHoCj

Files changed (24) hide show

.github/workflows/deploy-hf.yml +3 -1
.github/workflows/tests.yml +14 -1
backend/app/api/v1/export.py +6 -6
backend/app/api/v1/models_api.py +2 -2
backend/app/api/v1/pages.py +6 -3
backend/app/config.py +3 -0
backend/app/main.py +18 -3
backend/app/models/page_search.py +2 -0
backend/app/schemas/page_master.py +0 -1
backend/app/services/ai/analyzer.py +1 -1
backend/app/services/ai/response_parser.py +1 -1
backend/app/services/corpus_runner.py +2 -3
backend/app/services/job_runner.py +1 -1
backend/app/services/search/indexer.py +32 -19
backend/tests/test_api_corrections.py +1 -1
backend/tests/test_api_export.py +4 -4
backend/tests/test_api_search.py +2 -0
backend/tests/test_job_runner.py +0 -1
backend/tests/test_schemas.py +0 -1
frontend/src/components/CommentaryPanel.tsx +2 -17
frontend/src/components/TranscriptionPanel.tsx +2 -17
frontend/src/components/TranslationPanel.tsx +2 -17
frontend/src/lib/api.ts +12 -7
frontend/src/lib/editorial.ts +17 -0

.github/workflows/deploy-hf.yml CHANGED Viewed

@@ -32,8 +32,10 @@ jobs:
       - name: Push to HuggingFace Space
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           git config user.email "github-actions[bot]@users.noreply.github.com"
           git config user.name "github-actions[bot]"
-          git remote add hf https://Ma-Ri-Ba-Ku:${HF_TOKEN}@huggingface.co/spaces/Ma-Ri-Ba-Ku/IIIF-Studio
           git push hf main --force

       - name: Push to HuggingFace Space
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{ vars.HF_USERNAME || 'Ma-Ri-Ba-Ku' }}
+          HF_SPACE: ${{ vars.HF_SPACE || 'IIIF-Studio' }}
         run: |
           git config user.email "github-actions[bot]@users.noreply.github.com"
           git config user.name "github-actions[bot]"
+          git remote add hf "https://${HF_USERNAME}:${HF_TOKEN}@huggingface.co/spaces/${HF_USERNAME}/${HF_SPACE}"
           git push hf main --force

.github/workflows/tests.yml CHANGED Viewed

@@ -31,5 +31,18 @@ jobs:
       - name: Install backend with dev dependencies
         run: pip install -e "backend/[dev]"
-      - name: Run tests
         run: pytest backend/tests/ --tb=short -q

       - name: Install backend with dev dependencies
         run: pip install -e "backend/[dev]"
+      - name: Run backend tests
         run: pytest backend/tests/ --tb=short -q
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: "npm"
+          cache-dependency-path: "frontend/package-lock.json"
+      - name: Install frontend dependencies
+        run: cd frontend && npm ci
+      - name: Run frontend tests
+        run: cd frontend && npm test

backend/app/api/v1/export.py CHANGED Viewed

@@ -67,7 +67,7 @@ async def _load_manuscript_with_masters(
     masters: list[PageMaster] = []
     for page in pages:
-        master = await _read_master_json(corpus.slug, page.id)
         if master is not None:
             masters.append(master)
@@ -80,14 +80,14 @@ async def _load_manuscript_with_masters(
     return manuscript, corpus, masters
-def _read_master_json_sync(corpus_slug: str, page_id: str) -> PageMaster | None:
     """Lit le master.json d'une page depuis data/. Retourne None si absent (bloquant)."""
     path = (
         _config_module.settings.data_dir
         / "corpora"
         / corpus_slug
         / "pages"
-        / page_id
         / "master.json"
     )
     if not path.exists():
@@ -96,9 +96,9 @@ def _read_master_json_sync(corpus_slug: str, page_id: str) -> PageMaster | None:
     return PageMaster.model_validate(raw)
-async def _read_master_json(corpus_slug: str, page_id: str) -> PageMaster | None:
     """Version async — délègue la lecture au threadpool."""
-    return await asyncio.to_thread(_read_master_json_sync, corpus_slug, page_id)
 def _build_manuscript_meta(
@@ -165,7 +165,7 @@ async def get_alto(page_id: str, db: AsyncSession = Depends(get_db)) -> Response
     if corpus is None:
         raise HTTPException(status_code=404, detail="Corpus introuvable")
-    master = await _read_master_json(corpus.slug, page_id)
     if master is None:
         raise HTTPException(
             status_code=404,

     masters: list[PageMaster] = []
     for page in pages:
+        master = await _read_master_json(corpus.slug, page.folio_label)
         if master is not None:
             masters.append(master)
     return manuscript, corpus, masters
+def _read_master_json_sync(corpus_slug: str, folio_label: str) -> PageMaster | None:
     """Lit le master.json d'une page depuis data/. Retourne None si absent (bloquant)."""
     path = (
         _config_module.settings.data_dir
         / "corpora"
         / corpus_slug
         / "pages"
+        / folio_label
         / "master.json"
     )
     if not path.exists():
     return PageMaster.model_validate(raw)
+async def _read_master_json(corpus_slug: str, folio_label: str) -> PageMaster | None:
     """Version async — délègue la lecture au threadpool."""
+    return await asyncio.to_thread(_read_master_json_sync, corpus_slug, folio_label)
 def _build_manuscript_meta(
     if corpus is None:
         raise HTTPException(status_code=404, detail="Corpus introuvable")
+    master = await _read_master_json(corpus.slug, page.folio_label)
     if master is None:
         raise HTTPException(
             status_code=404,

backend/app/api/v1/models_api.py CHANGED Viewed

@@ -120,7 +120,7 @@ async def set_corpus_model(
     corpus_id: str,
     body: ModelSelectRequest,
     db: AsyncSession = Depends(get_db),
-) -> ModelConfigDB:
     """Associe un modèle IA à un corpus. Crée ou met à jour la configuration."""
     corpus = await db.get(CorpusModel, corpus_id)
     if corpus is None:
@@ -154,7 +154,7 @@ async def set_corpus_model(
 @router.get("/corpora/{corpus_id}/model", response_model=ModelConfigResponse)
 async def get_corpus_model(
     corpus_id: str, db: AsyncSession = Depends(get_db)
-) -> ModelConfigDB:
     """Retourne la configuration du modèle IA actif pour un corpus."""
     corpus = await db.get(CorpusModel, corpus_id)
     if corpus is None:

     corpus_id: str,
     body: ModelSelectRequest,
     db: AsyncSession = Depends(get_db),
+) -> ModelConfigResponse:
     """Associe un modèle IA à un corpus. Crée ou met à jour la configuration."""
     corpus = await db.get(CorpusModel, corpus_id)
     if corpus is None:
 @router.get("/corpora/{corpus_id}/model", response_model=ModelConfigResponse)
 async def get_corpus_model(
     corpus_id: str, db: AsyncSession = Depends(get_db)
+) -> ModelConfigResponse:
     """Retourne la configuration du modèle IA actif pour un corpus."""
     corpus = await db.get(CorpusModel, corpus_id)
     if corpus is None:

backend/app/api/v1/pages.py CHANGED Viewed

@@ -100,7 +100,7 @@ async def _load_master(
         / "corpora"
         / corpus.slug
         / "pages"
-        / page.id
         / "master.json"
     )
     if not master_path.exists():
@@ -125,7 +125,7 @@ async def _get_page_dir(page: PageModel, db: AsyncSession) -> Path | None:
         / "corpora"
         / corpus.slug
         / "pages"
-        / page.id
     )
@@ -388,6 +388,8 @@ async def apply_corrections(
 async def get_page_history(
     page_id: str,
     db: AsyncSession = Depends(get_db),
 ) -> list[VersionInfo]:
     """Liste les versions archivées du master.json (master_v*.json).
@@ -423,4 +425,5 @@ async def get_page_history(
             )
             continue
-    return sorted(versions, key=lambda v: v.version)

         / "corpora"
         / corpus.slug
         / "pages"
+        / page.folio_label
         / "master.json"
     )
     if not master_path.exists():
         / "corpora"
         / corpus.slug
         / "pages"
+        / page.folio_label
     )
 async def get_page_history(
     page_id: str,
     db: AsyncSession = Depends(get_db),
+    limit: int = 100,
+    offset: int = 0,
 ) -> list[VersionInfo]:
     """Liste les versions archivées du master.json (master_v*.json).
             )
             continue
+    versions.sort(key=lambda v: v.version)
+    return versions[offset:offset + limit]

backend/app/config.py CHANGED Viewed

@@ -44,6 +44,9 @@ class Settings(BaseSettings):
     # ── Base de données ───────────────────────────────────────────────────────
     database_url: str = "sqlite+aiosqlite:///./iiif_studio.db"
     # ── Fournisseurs IA (R06 — clés depuis l'environnement uniquement) ────────
     # Chaque clé est optionnelle. Le backend détecte automatiquement quels
     # providers sont disponibles selon les clés présentes. Pas de AI_PROVIDER

     # ── Base de données ───────────────────────────────────────────────────────
     database_url: str = "sqlite+aiosqlite:///./iiif_studio.db"
+    # ── Pipeline IA ────────────────────────────────────────────────────────────
+    ai_max_concurrent: int = 3  # jobs IA simultanés par corpus run
     # ── Fournisseurs IA (R06 — clés depuis l'environnement uniquement) ────────
     # Chaque clé est optionnelle. Le backend détecte automatiquement quels
     # providers sont disponibles selon les clés présentes. Pas de AI_PROVIDER

backend/app/main.py CHANGED Viewed

@@ -36,6 +36,21 @@ def _migrate_model_configs(connection) -> None:
         logger.info("Migration : colonne supports_vision ajoutée à model_configs")
 @asynccontextmanager
 async def lifespan(application: FastAPI):
     """Crée les tables SQLite au démarrage, libère l'engine à l'arrêt."""
@@ -65,9 +80,9 @@ async def lifespan(application: FastAPI):
     async with engine.begin() as conn:
         await conn.run_sync(Base.metadata.create_all)
-        # Migration : ajouter supports_vision aux model_configs existantes
-        # (create_all ne fait pas d'ALTER TABLE sur les tables existantes)
         await conn.run_sync(_migrate_model_configs)
     logger.info("Tables SQLite initialisées")
     yield
     await engine.dispose()
@@ -117,7 +132,7 @@ async def serve_frontend(full_path: str) -> FileResponse | RedirectResponse:
     if _STATIC_DIR.is_dir():
         candidate = (_STATIC_DIR / full_path).resolve()
         # Empêcher le path traversal : le fichier résolu doit être sous _STATIC_DIR
-        if candidate.is_file() and str(candidate).startswith(str(_STATIC_DIR.resolve()) + "/"):
             return FileResponse(candidate)
         index = _STATIC_DIR / "index.html"
         if index.exists():

         logger.info("Migration : colonne supports_vision ajoutée à model_configs")
+def _migrate_page_search(connection) -> None:
+    """Ajoute la colonne normalized_text si absente (recherche SQL LIKE)."""
+    from sqlalchemy import inspect, text
+    inspector = inspect(connection)
+    if "page_search" not in inspector.get_table_names():
+        return
+    columns = {c["name"] for c in inspector.get_columns("page_search")}
+    if "normalized_text" not in columns:
+        connection.execute(
+            text("ALTER TABLE page_search ADD COLUMN normalized_text TEXT NOT NULL DEFAULT ''")
+        )
+        logger.info("Migration : colonne normalized_text ajoutée à page_search")
 @asynccontextmanager
 async def lifespan(application: FastAPI):
     """Crée les tables SQLite au démarrage, libère l'engine à l'arrêt."""
     async with engine.begin() as conn:
         await conn.run_sync(Base.metadata.create_all)
+        # Migrations : create_all ne fait pas d'ALTER TABLE sur les tables existantes
         await conn.run_sync(_migrate_model_configs)
+        await conn.run_sync(_migrate_page_search)
     logger.info("Tables SQLite initialisées")
     yield
     await engine.dispose()
     if _STATIC_DIR.is_dir():
         candidate = (_STATIC_DIR / full_path).resolve()
         # Empêcher le path traversal : le fichier résolu doit être sous _STATIC_DIR
+        if candidate.is_file() and candidate.is_relative_to(_STATIC_DIR.resolve()):
             return FileResponse(candidate)
         index = _STATIC_DIR / "index.html"
         if index.exists():

backend/app/models/page_search.py CHANGED Viewed

@@ -22,3 +22,5 @@ class PageSearchIndex(Base):
     diplomatic_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
     translation_fr: Mapped[str] = mapped_column(Text, nullable=False, default="")
     tags: Mapped[str] = mapped_column(Text, nullable=False, default="")

     diplomatic_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
     translation_fr: Mapped[str] = mapped_column(Text, nullable=False, default="")
     tags: Mapped[str] = mapped_column(Text, nullable=False, default="")
+    # Pre-normalized concatenation of all text fields for SQL LIKE search
+    normalized_text: Mapped[str] = mapped_column(Text, nullable=False, default="")

backend/app/schemas/page_master.py CHANGED Viewed

@@ -50,7 +50,6 @@ class ImageInfo(BaseModel):
     master: str                           # URL source (service IIIF ou statique) ou chemin local
     derivative_web: str | None = None     # chemin dérivé 1500px (legacy/upload)
     thumbnail: str | None = None          # chemin thumbnail 256px (legacy/upload)
-    iiif_base: str | None = None          # compat arrière
     iiif_service_url: str | None = None   # URL du IIIF Image Service (zoom tuilé)
     manifest_url: str | None = None       # URL du manifest source (provenance)
     width: int                            # largeur du canvas original

     master: str                           # URL source (service IIIF ou statique) ou chemin local
     derivative_web: str | None = None     # chemin dérivé 1500px (legacy/upload)
     thumbnail: str | None = None          # chemin thumbnail 256px (legacy/upload)
     iiif_service_url: str | None = None   # URL du IIIF Image Service (zoom tuilé)
     manifest_url: str | None = None       # URL du manifest source (provenance)
     width: int                            # largeur du canvas original

backend/app/services/ai/analyzer.py CHANGED Viewed

@@ -17,7 +17,7 @@ from app.schemas.page_master import EditorialInfo, EditorialStatus, ImageInfo, P
 from app.services.ai.master_writer import write_ai_raw, write_master_json
 from app.services.ai.model_registry import get_provider
 from app.services.ai.prompt_loader import load_and_render_prompt
-from app.services.ai.response_parser import ParseError, parse_ai_response  # noqa: F401
 logger = logging.getLogger(__name__)

 from app.services.ai.master_writer import write_ai_raw, write_master_json
 from app.services.ai.model_registry import get_provider
 from app.services.ai.prompt_loader import load_and_render_prompt
+from app.services.ai.response_parser import parse_ai_response
 logger = logging.getLogger(__name__)

backend/app/services/ai/response_parser.py CHANGED Viewed

@@ -152,7 +152,7 @@ def parse_ai_response(raw_text: str) -> tuple[dict, OCRResult]:
         try:
             region = Region.model_validate(raw_region)
             valid_regions.append(region.model_dump())
-        except (ValidationError, Exception) as exc:
             logger.warning(
                 "Région ignorée — bbox ou champ invalide",
                 extra={"index": i, "region": raw_region, "error": str(exc)},

         try:
             region = Region.model_validate(raw_region)
             valid_regions.append(region.model_dump())
+        except (ValidationError, ValueError, KeyError, TypeError) as exc:
             logger.warning(
                 "Région ignorée — bbox ou champ invalide",
                 extra={"index": i, "region": raw_region, "error": str(exc)},

backend/app/services/corpus_runner.py CHANGED Viewed

@@ -61,10 +61,9 @@ async def execute_corpus_job(corpus_id: str) -> dict:
     # Exécution concurrente avec semaphore — chaque job gère sa propre session
     from app.services.job_runner import execute_page_job
-    _MAX_CONCURRENT = 3  # limiter la pression sur les APIs IA
-    sem = asyncio.Semaphore(_MAX_CONCURRENT)
     async def _run_one(jid: str) -> None:
         async with sem:

     # Exécution concurrente avec semaphore — chaque job gère sa propre session
     from app.services.job_runner import execute_page_job
+    from app.config import settings
+    sem = asyncio.Semaphore(settings.ai_max_concurrent)
     async def _run_one(jid: str) -> None:
         async with sem:

backend/app/services/job_runner.py CHANGED Viewed

@@ -196,7 +196,7 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
             # ── Mode fichier local (upload) ──────────────────────────────────
             source_path = Path(image_source).resolve()
             data_dir_resolved = data_dir.resolve()
-            if not str(source_path).startswith(str(data_dir_resolved) + "/") and source_path != data_dir_resolved:
                 raise ValueError(
                     f"Chemin image hors du répertoire de données interdit : "
                     f"{image_source!r} (résolu : {source_path})"

             # ── Mode fichier local (upload) ──────────────────────────────────
             source_path = Path(image_source).resolve()
             data_dir_resolved = data_dir.resolve()
+            if not source_path.is_relative_to(data_dir_resolved):
                 raise ValueError(
                     f"Chemin image hors du répertoire de données interdit : "
                     f"{image_source!r} (résolu : {source_path})"

backend/app/services/search/indexer.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
-Service d'indexation et de recherche FTS5 pour les pages analysées.
 """
 import logging
 import unicodedata
@@ -32,6 +36,11 @@ def _extract_tags(master: PageMaster) -> str:
     return " ".join(tags)
 async def index_page(db: AsyncSession, master: PageMaster) -> None:
     """Indexe ou met a jour une page dans la table de recherche."""
     existing = await db.get(PageSearchIndex, master.page_id)
@@ -39,6 +48,7 @@ async def index_page(db: AsyncSession, master: PageMaster) -> None:
     diplomatic = (master.ocr.diplomatic_text if master.ocr else "") or ""
     translation = (master.translation.fr if master.translation else "") or ""
     tags = _extract_tags(master)
     if existing:
         existing.corpus_profile = master.corpus_profile
@@ -47,6 +57,7 @@ async def index_page(db: AsyncSession, master: PageMaster) -> None:
         existing.diplomatic_text = diplomatic
         existing.translation_fr = translation
         existing.tags = tags
     else:
         entry = PageSearchIndex(
             page_id=master.page_id,
@@ -56,6 +67,7 @@ async def index_page(db: AsyncSession, master: PageMaster) -> None:
             diplomatic_text=diplomatic,
             translation_fr=translation,
             tags=tags,
         )
         db.add(entry)
@@ -64,33 +76,35 @@ async def index_page(db: AsyncSession, master: PageMaster) -> None:
 async def search_pages(db: AsyncSession, query: str, limit: int = 200) -> list[dict]:
-    """Recherche plein texte dans l'index.
-    Utilise LIKE avec normalisation (pas FTS5 natif) car SQLite FTS5
-    necessite une table virtuelle separee qui complique les migrations.
-    Cette approche est O(n) sur la table mais bien plus rapide que le
-    scan filesystem car les donnees sont deja en memoire SQLite.
     """
     query_norm = _normalize(query.strip())
     if not query_norm:
         return []
-    # Search using normalized LIKE across all text columns
-    # We concatenate and normalize in Python for accent-insensitive search
     result = await db.execute(
         text("""
             SELECT page_id, corpus_profile, manuscript_id, folio_label,
                    diplomatic_text, translation_fr, tags
             FROM page_search
-        """)
     )
     rows = result.fetchall()
     hits: list[dict] = []
     for row in rows:
         page_id, corpus_profile, manuscript_id, folio_label, diplo, trans, tags = row
-        # Score: count occurrences across all fields
         score = 0
         excerpt = ""
         for field_text in [diplo, trans, tags]:
@@ -111,15 +125,14 @@ async def search_pages(db: AsyncSession, query: str, limit: int = 200) -> list[d
                         ex = ex + "\u2026"
                     excerpt = ex
-        if score > 0:
-            hits.append({
-                "page_id": page_id,
-                "folio_label": folio_label,
-                "manuscript_id": manuscript_id,
-                "excerpt": excerpt,
-                "score": score,
-                "corpus_profile": corpus_profile,
-            })
     hits.sort(key=lambda h: h["score"], reverse=True)
     return hits[:limit]

 """
+Service d'indexation et de recherche pour les pages analysées.
+Utilise une colonne normalized_text pré-calculée pour permettre des
+recherches SQL LIKE insensibles aux accents, sans charger toutes les
+lignes en Python.
 """
 import logging
 import unicodedata
     return " ".join(tags)
+def _build_normalized_text(diplomatic: str, translation: str, tags: str) -> str:
+    """Construit le texte normalise concatene pour l'indexation SQL."""
+    return _normalize(f"{diplomatic} {translation} {tags}")
 async def index_page(db: AsyncSession, master: PageMaster) -> None:
     """Indexe ou met a jour une page dans la table de recherche."""
     existing = await db.get(PageSearchIndex, master.page_id)
     diplomatic = (master.ocr.diplomatic_text if master.ocr else "") or ""
     translation = (master.translation.fr if master.translation else "") or ""
     tags = _extract_tags(master)
+    normalized = _build_normalized_text(diplomatic, translation, tags)
     if existing:
         existing.corpus_profile = master.corpus_profile
         existing.diplomatic_text = diplomatic
         existing.translation_fr = translation
         existing.tags = tags
+        existing.normalized_text = normalized
     else:
         entry = PageSearchIndex(
             page_id=master.page_id,
             diplomatic_text=diplomatic,
             translation_fr=translation,
             tags=tags,
+            normalized_text=normalized,
         )
         db.add(entry)
 async def search_pages(db: AsyncSession, query: str, limit: int = 200) -> list[dict]:
+    """Recherche plein texte dans l'index via SQL LIKE sur normalized_text.
+    La colonne normalized_text est pre-calculee a l'indexation (minuscules,
+    sans accents). Le filtrage est fait cote SQL, pas en Python.
     """
     query_norm = _normalize(query.strip())
     if not query_norm:
         return []
+    # Escape SQL LIKE special characters
+    query_escaped = query_norm.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+    like_pattern = f"%{query_escaped}%"
     result = await db.execute(
         text("""
             SELECT page_id, corpus_profile, manuscript_id, folio_label,
                    diplomatic_text, translation_fr, tags
             FROM page_search
+            WHERE normalized_text LIKE :pattern ESCAPE '\\'
+        """),
+        {"pattern": like_pattern},
     )
     rows = result.fetchall()
+    # Score matching rows in Python (only the filtered subset, not all rows)
     hits: list[dict] = []
     for row in rows:
         page_id, corpus_profile, manuscript_id, folio_label, diplo, trans, tags = row
         score = 0
         excerpt = ""
         for field_text in [diplo, trans, tags]:
                         ex = ex + "\u2026"
                     excerpt = ex
+        hits.append({
+            "page_id": page_id,
+            "folio_label": folio_label,
+            "manuscript_id": manuscript_id,
+            "excerpt": excerpt,
+            "score": score,
+            "corpus_profile": corpus_profile,
+        })
     hits.sort(key=lambda h: h["score"], reverse=True)
     return hits[:limit]

backend/tests/test_api_corrections.py CHANGED Viewed

@@ -337,7 +337,7 @@ async def test_history_with_archived_files(async_client, db_session, tmp_path, m
     page = await _create_page(db_session, ms.id)
     # Crée le répertoire avec des fichiers de version
-    page_dir = tmp_path / "corpora" / corpus.slug / "pages" / page.id
     page_dir.mkdir(parents=True)
     (page_dir / "master_v1.json").write_text(_make_master(page.id, version=1, status="machine_draft"))
     (page_dir / "master_v2.json").write_text(_make_master(page.id, version=2, status="reviewed"))

     page = await _create_page(db_session, ms.id)
     # Crée le répertoire avec des fichiers de version
+    page_dir = tmp_path / "corpora" / corpus.slug / "pages" / page.folio_label
     page_dir.mkdir(parents=True)
     (page_dir / "master_v1.json").write_text(_make_master(page.id, version=1, status="machine_draft"))
     (page_dir / "master_v2.json").write_text(_make_master(page.id, version=2, status="reviewed"))

backend/tests/test_api_export.py CHANGED Viewed

@@ -111,16 +111,16 @@ def _make_master_json(page_id: str, folio_label: str, sequence: int) -> str:
 def _mock_master_files(monkeypatch, pages):
     """Patche Path.exists / Path.read_text pour simuler les master.json."""
     master_data = {
-        p.id: _make_master_json(p.id, p.folio_label, p.sequence)
         for p in pages
     }
     def fake_exists(self: Path) -> bool:
-        return any(p_id in str(self) for p_id in master_data)
     def fake_read_text(self: Path, **kwargs) -> str:
-        for p_id, data in master_data.items():
-            if p_id in str(self):
                 return data
         raise FileNotFoundError(str(self))

 def _mock_master_files(monkeypatch, pages):
     """Patche Path.exists / Path.read_text pour simuler les master.json."""
     master_data = {
+        p.folio_label: _make_master_json(p.id, p.folio_label, p.sequence)
         for p in pages
     }
     def fake_exists(self: Path) -> bool:
+        return any(label in str(self) for label in master_data)
     def fake_read_text(self: Path, **kwargs) -> str:
+        for label, data in master_data.items():
+            if label in str(self):
                 return data
         raise FileNotFoundError(str(self))

backend/tests/test_api_search.py CHANGED Viewed

@@ -15,6 +15,7 @@ import pytest
 # 3. local
 from app.models.page_search import PageSearchIndex
 from tests.conftest_api import async_client, db_session  # noqa: F401
@@ -40,6 +41,7 @@ async def _index_page(
         diplomatic_text=diplomatic_text,
         translation_fr=translation_fr,
         tags=tags,
     )
     db.add(entry)
     await db.commit()

 # 3. local
 from app.models.page_search import PageSearchIndex
+from app.services.search.indexer import _build_normalized_text
 from tests.conftest_api import async_client, db_session  # noqa: F401
         diplomatic_text=diplomatic_text,
         translation_fr=translation_fr,
         tags=tags,
+        normalized_text=_build_normalized_text(diplomatic_text, translation_fr, tags),
     )
     db.add(entry)
     await db.commit()

backend/tests/test_job_runner.py CHANGED Viewed

@@ -132,7 +132,6 @@ def _page_master(page_id: str, ms_id: str) -> PageMaster:
         image={
             "master": "https://example.com/image.jpg",
             "derivative_web": "/tmp/deriv.jpg",
-            "iiif_base": "",
             "width": 2000,
             "height": 3000,
         },

         image={
             "master": "https://example.com/image.jpg",
             "derivative_web": "/tmp/deriv.jpg",
             "width": 2000,
             "height": 3000,
         },

backend/tests/test_schemas.py CHANGED Viewed

@@ -60,7 +60,6 @@ def minimal_page_master() -> dict:
         "image": {
             "master": "data/corpora/test/masters/0001r.tif",
             "derivative_web": "data/corpora/test/derivatives/0001r.jpg",
-            "iiif_base": "",
             "width": 2000,
             "height": 3000,
         },

         "image": {
             "master": "data/corpora/test/masters/0001r.tif",
             "derivative_web": "data/corpora/test/derivatives/0001r.jpg",
             "width": 2000,
             "height": 3000,
         },

frontend/src/components/CommentaryPanel.tsx CHANGED Viewed

@@ -1,23 +1,8 @@
 import { useState, type FC } from 'react'
-import type { Commentary, EditorialInfo, EditorialStatus } from '../lib/api.ts'
 import { RetroBadge, RetroButton } from './retro'
-const STATUS_LABELS: Record<EditorialStatus, string> = {
-  machine_draft: 'Brouillon IA',
-  needs_review: 'A reviser',
-  reviewed: 'Revise',
-  validated: 'Valide',
-  published: 'Publie',
-}
-const STATUS_VARIANTS: Record<EditorialStatus, 'default' | 'success' | 'warning' | 'error' | 'info'> = {
-  machine_draft: 'info',
-  needs_review: 'warning',
-  reviewed: 'default',
-  validated: 'success',
-  published: 'success',
-}
 interface Props {
   commentary: Commentary | null
   editorial: EditorialInfo

 import { useState, type FC } from 'react'
+import type { Commentary, EditorialInfo } from '../lib/api.ts'
+import { STATUS_LABELS, STATUS_VARIANTS } from '../lib/editorial.ts'
 import { RetroBadge, RetroButton } from './retro'
 interface Props {
   commentary: Commentary | null
   editorial: EditorialInfo

frontend/src/components/TranscriptionPanel.tsx CHANGED Viewed

@@ -1,23 +1,8 @@
 import type { FC } from 'react'
-import type { OCRResult, EditorialInfo, EditorialStatus } from '../lib/api.ts'
 import { RetroBadge } from './retro'
-const STATUS_LABELS: Record<EditorialStatus, string> = {
-  machine_draft: 'Brouillon IA',
-  needs_review: 'A reviser',
-  reviewed: 'Revise',
-  validated: 'Valide',
-  published: 'Publie',
-}
-const STATUS_VARIANTS: Record<EditorialStatus, 'default' | 'success' | 'warning' | 'error' | 'info'> = {
-  machine_draft: 'info',
-  needs_review: 'warning',
-  reviewed: 'default',
-  validated: 'success',
-  published: 'success',
-}
 interface Props {
   ocr: OCRResult | null
   editorial: EditorialInfo

 import type { FC } from 'react'
+import type { OCRResult, EditorialInfo } from '../lib/api.ts'
+import { STATUS_LABELS, STATUS_VARIANTS } from '../lib/editorial.ts'
 import { RetroBadge } from './retro'
 interface Props {
   ocr: OCRResult | null
   editorial: EditorialInfo

frontend/src/components/TranslationPanel.tsx CHANGED Viewed

@@ -1,23 +1,8 @@
 import type { FC } from 'react'
-import type { Translation, EditorialInfo, EditorialStatus } from '../lib/api.ts'
 import { RetroBadge } from './retro'
-const STATUS_LABELS: Record<EditorialStatus, string> = {
-  machine_draft: 'Brouillon IA',
-  needs_review: 'A reviser',
-  reviewed: 'Revise',
-  validated: 'Valide',
-  published: 'Publie',
-}
-const STATUS_VARIANTS: Record<EditorialStatus, 'default' | 'success' | 'warning' | 'error' | 'info'> = {
-  machine_draft: 'info',
-  needs_review: 'warning',
-  reviewed: 'default',
-  validated: 'success',
-  published: 'success',
-}
 interface Props {
   translation: Translation | null
   editorial: EditorialInfo

 import type { FC } from 'react'
+import type { Translation, EditorialInfo } from '../lib/api.ts'
+import { STATUS_LABELS, STATUS_VARIANTS } from '../lib/editorial.ts'
 import { RetroBadge } from './retro'
 interface Props {
   translation: Translation | null
   editorial: EditorialInfo

frontend/src/lib/api.ts CHANGED Viewed

@@ -40,7 +40,7 @@ export interface CorpusRunResponse {
   job_ids: string[]
 }
-export type JobStatus = 'pending' | 'running' | 'done' | 'failed'
 export interface Job {
   id: string
@@ -153,7 +153,6 @@ export interface ImageInfo {
   master: string
   derivative_web?: string | null
   thumbnail?: string | null
-  iiif_base?: string | null
   iiif_service_url?: string | null
   manifest_url?: string | null
   width: number
@@ -225,7 +224,10 @@ function extractDetail(payload: unknown, fallback: string): string {
 async function get<T>(path: string): Promise<T> {
   const resp = await fetch(`${BASE_URL}${path}`)
-  if (!resp.ok) throw new ApiError(resp.status, `HTTP ${resp.status} — ${path}`)
   return resp.json() as Promise<T>
 }
@@ -330,10 +332,13 @@ export interface CorpusModelConfig {
   updated_at: string
 }
-export const getCorpusModel = (corpusId: string): Promise<CorpusModelConfig | null> =>
-  fetch(`${BASE_URL}/api/v1/corpora/${corpusId}/model`)
-    .then((r) => (r.ok ? (r.json() as Promise<CorpusModelConfig>) : null))
-    .catch(() => null)
 export const ingestImages = (
   corpusId: string,

   job_ids: string[]
 }
+export type JobStatus = 'pending' | 'claimed' | 'running' | 'done' | 'failed'
 export interface Job {
   id: string
   master: string
   derivative_web?: string | null
   thumbnail?: string | null
   iiif_service_url?: string | null
   manifest_url?: string | null
   width: number
 async function get<T>(path: string): Promise<T> {
   const resp = await fetch(`${BASE_URL}${path}`)
+  if (!resp.ok) {
+    const payload = await resp.json().catch(() => null)
+    throw new ApiError(resp.status, extractDetail(payload, `HTTP ${resp.status} — ${path}`))
+  }
   return resp.json() as Promise<T>
 }
   updated_at: string
 }
+export const getCorpusModel = async (corpusId: string): Promise<CorpusModelConfig | null> => {
+  try {
+    return await get<CorpusModelConfig>(`/api/v1/corpora/${corpusId}/model`)
+  } catch {
+    return null
+  }
+}
 export const ingestImages = (
   corpusId: string,

frontend/src/lib/editorial.ts ADDED Viewed

	@@ -0,0 +1,17 @@

+import type { EditorialStatus } from './api.ts'
+export const STATUS_LABELS: Record<EditorialStatus, string> = {
+  machine_draft: 'Brouillon IA',
+  needs_review: 'A reviser',
+  reviewed: 'Revise',
+  validated: 'Valide',
+  published: 'Publie',
+}
+export const STATUS_VARIANTS: Record<EditorialStatus, 'default' | 'success' | 'warning' | 'error' | 'info'> = {
+  machine_draft: 'info',
+  needs_review: 'warning',
+  reviewed: 'default',
+  validated: 'success',
+  published: 'success',
+}