Spaces:
Build error
Build error
Merge pull request #36 from maribakulj/claude/analyze-app-issues-MYBpt
Browse files- CLAUDE.md +6 -3
- backend/app/api/v1/ingest.py +95 -1
- backend/app/api/v1/manuscripts.py +4 -0
- backend/app/api/v1/pages.py +4 -0
- backend/app/models/corpus.py +4 -0
- backend/app/schemas/image.py +25 -2
- backend/app/schemas/page_master.py +17 -8
- backend/app/services/ai/analyzer.py +81 -43
- backend/app/services/export/alto.py +1 -1
- backend/app/services/export/iiif.py +19 -8
- backend/app/services/export/mets.py +14 -6
- backend/app/services/image/normalizer.py +52 -0
- backend/app/services/ingest/iiif_fetcher.py +35 -0
- backend/app/services/job_runner.py +73 -25
- backend/tests/test_ai_analyzer.py +98 -2
- backend/tests/test_iiif_service_detection.py +247 -0
- backend/tests/test_schemas.py +82 -0
- frontend/src/components/Viewer.tsx +19 -6
- frontend/src/lib/api.ts +6 -0
- frontend/src/pages/Editor.tsx +4 -3
- frontend/src/pages/Reader.tsx +6 -5
CLAUDE.md
CHANGED
|
@@ -159,9 +159,12 @@ iiif-studio/
|
|
| 159 |
βββ data/ β JAMAIS versionnΓ© (.gitignore)
|
| 160 |
β βββ corpora/
|
| 161 |
β βββ {corpus_slug}/
|
| 162 |
-
β βββ masters/ β images
|
| 163 |
-
β βββ derivatives/ β JPEG 1500px
|
| 164 |
-
β βββ thumbnails/ β aperΓ§us 300px
|
|
|
|
|
|
|
|
|
|
| 165 |
β βββ iiif/
|
| 166 |
β β βββ manifest.json
|
| 167 |
β β βββ annotations/
|
|
|
|
| 159 |
βββ data/ β JAMAIS versionnΓ© (.gitignore)
|
| 160 |
β βββ corpora/
|
| 161 |
β βββ {corpus_slug}/
|
| 162 |
+
β βββ masters/ β images uploadΓ©es (mode fichier uniquement)
|
| 163 |
+
β βββ derivatives/ β JPEG 1500px (mode fichier uniquement)
|
| 164 |
+
β βββ thumbnails/ β aperΓ§us 300px (mode fichier uniquement)
|
| 165 |
+
β β NOTE : en mode IIIF natif, masters/, derivatives/ et
|
| 166 |
+
β β thumbnails/ sont VIDES β les images sont streamΓ©es
|
| 167 |
+
β β depuis le serveur IIIF d'origine.
|
| 168 |
β βββ iiif/
|
| 169 |
β β βββ manifest.json
|
| 170 |
β β βββ annotations/
|
backend/app/api/v1/ingest.py
CHANGED
|
@@ -144,6 +144,10 @@ async def _create_page(
|
|
| 144 |
folio_label: str,
|
| 145 |
sequence: int,
|
| 146 |
image_master_path: str | None = None,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
) -> PageModel | None:
|
| 148 |
"""CrΓ©e une page si elle n'existe pas dΓ©jΓ . Retourne None si l'ID est dΓ©jΓ pris."""
|
| 149 |
existing = await db.get(PageModel, page_id)
|
|
@@ -157,6 +161,10 @@ async def _create_page(
|
|
| 157 |
folio_label=folio_label,
|
| 158 |
sequence=sequence,
|
| 159 |
image_master_path=image_master_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
processing_status="INGESTED",
|
| 161 |
)
|
| 162 |
db.add(page)
|
|
@@ -214,6 +222,78 @@ def _extract_canvas_label(canvas: dict, index: int) -> str:
|
|
| 214 |
return f"f{index + 1:03d}r"
|
| 215 |
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
def _extract_canvas_image_url(canvas: dict) -> str | None:
|
| 218 |
"""Extrait l'URL de l'image principale d'un canvas IIIF (3.0 ou 2.x)."""
|
| 219 |
# IIIF 3.0
|
|
@@ -385,9 +465,14 @@ async def ingest_iiif_manifest(
|
|
| 385 |
folio_label = labels[i]
|
| 386 |
page_id = _make_page_id(corpus.slug, folio_label, seq + i, dupes)
|
| 387 |
image_url = _extract_canvas_image_url(canvas)
|
|
|
|
| 388 |
page = await _create_page(
|
| 389 |
db, ms.id, page_id, folio_label, seq + i,
|
| 390 |
image_master_path=image_url,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
)
|
| 392 |
if page is None:
|
| 393 |
skipped += 1
|
|
@@ -406,7 +491,13 @@ async def ingest_iiif_manifest(
|
|
| 406 |
|
| 407 |
logger.info(
|
| 408 |
"Manifest IIIF ingΓ©rΓ©",
|
| 409 |
-
extra={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
)
|
| 411 |
return IngestResponse(
|
| 412 |
corpus_id=corpus_id,
|
|
@@ -446,9 +537,12 @@ async def ingest_iiif_images(
|
|
| 446 |
skipped = 0
|
| 447 |
for i, (url, folio_label) in enumerate(zip(body.urls, sanitized_labels)):
|
| 448 |
page_id = _make_page_id(corpus.slug, folio_label, seq + i, dupes)
|
|
|
|
|
|
|
| 449 |
page = await _create_page(
|
| 450 |
db, ms.id, page_id, folio_label, seq + i,
|
| 451 |
image_master_path=url,
|
|
|
|
| 452 |
)
|
| 453 |
if page is None:
|
| 454 |
skipped += 1
|
|
|
|
| 144 |
folio_label: str,
|
| 145 |
sequence: int,
|
| 146 |
image_master_path: str | None = None,
|
| 147 |
+
iiif_service_url: str | None = None,
|
| 148 |
+
canvas_width: int | None = None,
|
| 149 |
+
canvas_height: int | None = None,
|
| 150 |
+
manifest_url: str | None = None,
|
| 151 |
) -> PageModel | None:
|
| 152 |
"""CrΓ©e une page si elle n'existe pas dΓ©jΓ . Retourne None si l'ID est dΓ©jΓ pris."""
|
| 153 |
existing = await db.get(PageModel, page_id)
|
|
|
|
| 161 |
folio_label=folio_label,
|
| 162 |
sequence=sequence,
|
| 163 |
image_master_path=image_master_path,
|
| 164 |
+
iiif_service_url=iiif_service_url,
|
| 165 |
+
canvas_width=canvas_width,
|
| 166 |
+
canvas_height=canvas_height,
|
| 167 |
+
manifest_url=manifest_url,
|
| 168 |
processing_status="INGESTED",
|
| 169 |
)
|
| 170 |
db.add(page)
|
|
|
|
| 222 |
return f"f{index + 1:03d}r"
|
| 223 |
|
| 224 |
|
| 225 |
+
# Pattern IIIF Image API : {service}/full/{size}/{rotation}/{quality}.{format}
|
| 226 |
+
_IIIF_IMAGE_API_RE = re.compile(
|
| 227 |
+
r"^(https?://.+)/full/[^/]+/\d+/default\.\w+$"
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def _extract_iiif_service(canvas: dict) -> tuple[str | None, int | None, int | None]:
|
| 232 |
+
"""DΓ©tecte le IIIF Image Service d'un canvas et ses dimensions.
|
| 233 |
+
|
| 234 |
+
Retourne (service_url, canvas_width, canvas_height).
|
| 235 |
+
service_url est None si aucun service IIIF trouvΓ© (image statique).
|
| 236 |
+
"""
|
| 237 |
+
canvas_w = canvas.get("width")
|
| 238 |
+
canvas_h = canvas.get("height")
|
| 239 |
+
|
| 240 |
+
# ββ IIIF 3.0 : body β service[] βββββββββββββββββββββββββββββββββββββ
|
| 241 |
+
items = canvas.get("items") or []
|
| 242 |
+
if items:
|
| 243 |
+
ann_items = (items[0].get("items") or []) if items else []
|
| 244 |
+
if ann_items:
|
| 245 |
+
body = ann_items[0].get("body") or {}
|
| 246 |
+
if isinstance(body, dict):
|
| 247 |
+
# Chercher un service IIIF sur le body
|
| 248 |
+
services = body.get("service") or []
|
| 249 |
+
if isinstance(services, dict):
|
| 250 |
+
services = [services]
|
| 251 |
+
for svc in services:
|
| 252 |
+
svc_type = svc.get("type") or svc.get("@type") or ""
|
| 253 |
+
if "ImageService" in svc_type:
|
| 254 |
+
svc_url = (svc.get("id") or svc.get("@id") or "").rstrip("/")
|
| 255 |
+
if svc_url:
|
| 256 |
+
return svc_url, canvas_w, canvas_h
|
| 257 |
+
|
| 258 |
+
# Fallback : dΓ©tecter le pattern Image API dans body.id
|
| 259 |
+
body_id = body.get("id") or body.get("@id") or ""
|
| 260 |
+
m = _IIIF_IMAGE_API_RE.match(body_id)
|
| 261 |
+
if m:
|
| 262 |
+
return m.group(1), canvas_w, canvas_h
|
| 263 |
+
|
| 264 |
+
# ββ IIIF 2.x : resource β service βββββββββββββββββββββββββββββββββββ
|
| 265 |
+
images = canvas.get("images") or []
|
| 266 |
+
if images:
|
| 267 |
+
resource = images[0].get("resource") or {}
|
| 268 |
+
services = resource.get("service") or []
|
| 269 |
+
if isinstance(services, dict):
|
| 270 |
+
services = [services]
|
| 271 |
+
for svc in services:
|
| 272 |
+
svc_type = svc.get("@type") or svc.get("type") or ""
|
| 273 |
+
if "ImageService" in svc_type:
|
| 274 |
+
svc_url = (svc.get("@id") or svc.get("id") or "").rstrip("/")
|
| 275 |
+
if svc_url:
|
| 276 |
+
return svc_url, canvas_w, canvas_h
|
| 277 |
+
|
| 278 |
+
# Fallback : pattern Image API dans resource @id
|
| 279 |
+
res_id = resource.get("@id") or resource.get("id") or ""
|
| 280 |
+
m = _IIIF_IMAGE_API_RE.match(res_id)
|
| 281 |
+
if m:
|
| 282 |
+
return m.group(1), canvas_w, canvas_h
|
| 283 |
+
|
| 284 |
+
return None, canvas_w, canvas_h
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def _detect_iiif_service_from_url(url: str) -> str | None:
|
| 288 |
+
"""Tente de dΓ©tecter une URL de service IIIF Γ partir d'une URL d'image directe.
|
| 289 |
+
|
| 290 |
+
Si l'URL suit le pattern IIIF Image API ({base}/full/{size}/{rot}/{qual}.{fmt}),
|
| 291 |
+
retourne la base. Sinon retourne None.
|
| 292 |
+
"""
|
| 293 |
+
m = _IIIF_IMAGE_API_RE.match(url)
|
| 294 |
+
return m.group(1) if m else None
|
| 295 |
+
|
| 296 |
+
|
| 297 |
def _extract_canvas_image_url(canvas: dict) -> str | None:
|
| 298 |
"""Extrait l'URL de l'image principale d'un canvas IIIF (3.0 ou 2.x)."""
|
| 299 |
# IIIF 3.0
|
|
|
|
| 465 |
folio_label = labels[i]
|
| 466 |
page_id = _make_page_id(corpus.slug, folio_label, seq + i, dupes)
|
| 467 |
image_url = _extract_canvas_image_url(canvas)
|
| 468 |
+
service_url, c_width, c_height = _extract_iiif_service(canvas)
|
| 469 |
page = await _create_page(
|
| 470 |
db, ms.id, page_id, folio_label, seq + i,
|
| 471 |
image_master_path=image_url,
|
| 472 |
+
iiif_service_url=service_url,
|
| 473 |
+
canvas_width=c_width,
|
| 474 |
+
canvas_height=c_height,
|
| 475 |
+
manifest_url=body.manifest_url,
|
| 476 |
)
|
| 477 |
if page is None:
|
| 478 |
skipped += 1
|
|
|
|
| 491 |
|
| 492 |
logger.info(
|
| 493 |
"Manifest IIIF ingΓ©rΓ©",
|
| 494 |
+
extra={
|
| 495 |
+
"corpus_id": corpus_id,
|
| 496 |
+
"url": body.manifest_url,
|
| 497 |
+
"created": len(created),
|
| 498 |
+
"skipped": skipped,
|
| 499 |
+
"iiif_service_detected": sum(1 for p in created if p.iiif_service_url),
|
| 500 |
+
},
|
| 501 |
)
|
| 502 |
return IngestResponse(
|
| 503 |
corpus_id=corpus_id,
|
|
|
|
| 537 |
skipped = 0
|
| 538 |
for i, (url, folio_label) in enumerate(zip(body.urls, sanitized_labels)):
|
| 539 |
page_id = _make_page_id(corpus.slug, folio_label, seq + i, dupes)
|
| 540 |
+
# Tenter de dΓ©tecter un service IIIF Γ partir du pattern URL
|
| 541 |
+
service_url = _detect_iiif_service_from_url(url)
|
| 542 |
page = await _create_page(
|
| 543 |
db, ms.id, page_id, folio_label, seq + i,
|
| 544 |
image_master_path=url,
|
| 545 |
+
iiif_service_url=service_url,
|
| 546 |
)
|
| 547 |
if page is None:
|
| 548 |
skipped += 1
|
backend/app/api/v1/manuscripts.py
CHANGED
|
@@ -24,6 +24,10 @@ class PageResponse(BaseModel):
|
|
| 24 |
folio_label: str
|
| 25 |
sequence: int
|
| 26 |
image_master_path: str | None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
processing_status: str
|
| 28 |
confidence_summary: float | None
|
| 29 |
|
|
|
|
| 24 |
folio_label: str
|
| 25 |
sequence: int
|
| 26 |
image_master_path: str | None
|
| 27 |
+
iiif_service_url: str | None = None
|
| 28 |
+
canvas_width: int | None = None
|
| 29 |
+
canvas_height: int | None = None
|
| 30 |
+
manifest_url: str | None = None
|
| 31 |
processing_status: str
|
| 32 |
confidence_summary: float | None
|
| 33 |
|
backend/app/api/v1/pages.py
CHANGED
|
@@ -64,6 +64,10 @@ class PageResponse(BaseModel):
|
|
| 64 |
folio_label: str
|
| 65 |
sequence: int
|
| 66 |
image_master_path: str | None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
processing_status: str
|
| 68 |
confidence_summary: float | None
|
| 69 |
|
|
|
|
| 64 |
folio_label: str
|
| 65 |
sequence: int
|
| 66 |
image_master_path: str | None
|
| 67 |
+
iiif_service_url: str | None = None
|
| 68 |
+
canvas_width: int | None = None
|
| 69 |
+
canvas_height: int | None = None
|
| 70 |
+
manifest_url: str | None = None
|
| 71 |
processing_status: str
|
| 72 |
confidence_summary: float | None
|
| 73 |
|
backend/app/models/corpus.py
CHANGED
|
@@ -74,6 +74,10 @@ class PageModel(Base):
|
|
| 74 |
folio_label: Mapped[str] = mapped_column(String, nullable=False)
|
| 75 |
sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
| 76 |
image_master_path: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
processing_status: Mapped[str] = mapped_column(
|
| 78 |
String, nullable=False, default="CREATED"
|
| 79 |
)
|
|
|
|
| 74 |
folio_label: Mapped[str] = mapped_column(String, nullable=False)
|
| 75 |
sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
| 76 |
image_master_path: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 77 |
+
iiif_service_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 78 |
+
canvas_width: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
| 79 |
+
canvas_height: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
| 80 |
+
manifest_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 81 |
processing_status: Mapped[str] = mapped_column(
|
| 82 |
String, nullable=False, default="CREATED"
|
| 83 |
)
|
backend/app/schemas/image.py
CHANGED
|
@@ -1,12 +1,19 @@
|
|
| 1 |
"""
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
# 2. third-party
|
| 5 |
from pydantic import BaseModel
|
| 6 |
|
| 7 |
|
| 8 |
class ImageDerivativeInfo(BaseModel):
|
| 9 |
-
"""RΓ©sultat de la normalisation d'une image : dimensions originales et chemins des dΓ©rivΓ©s.
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
original_url: str
|
| 12 |
original_width: int
|
|
@@ -17,3 +24,19 @@ class ImageDerivativeInfo(BaseModel):
|
|
| 17 |
thumbnail_path: str
|
| 18 |
thumbnail_width: int
|
| 19 |
thumbnail_height: int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
SchΓ©mas Pydantic pour les mΓ©tadonnΓ©es image du pipeline.
|
| 3 |
+
|
| 4 |
+
Deux schΓ©mas coexistent :
|
| 5 |
+
- ImageDerivativeInfo : dΓ©rivΓ©s stockΓ©s sur disque (upload de fichiers)
|
| 6 |
+
- ImageSourceInfo : source IIIF sans stockage local (mode natif)
|
| 7 |
"""
|
| 8 |
# 2. third-party
|
| 9 |
from pydantic import BaseModel
|
| 10 |
|
| 11 |
|
| 12 |
class ImageDerivativeInfo(BaseModel):
|
| 13 |
+
"""RΓ©sultat de la normalisation d'une image : dimensions originales et chemins des dΓ©rivΓ©s.
|
| 14 |
+
|
| 15 |
+
UtilisΓ© pour les images uploadΓ©es via /ingest/files (stockage local).
|
| 16 |
+
"""
|
| 17 |
|
| 18 |
original_url: str
|
| 19 |
original_width: int
|
|
|
|
| 24 |
thumbnail_path: str
|
| 25 |
thumbnail_width: int
|
| 26 |
thumbnail_height: int
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ImageSourceInfo(BaseModel):
|
| 30 |
+
"""Source d'image IIIF β pas de stockage local.
|
| 31 |
+
|
| 32 |
+
UtilisΓ© pour les images ingΓ©rΓ©es via manifest ou URLs IIIF.
|
| 33 |
+
Les bytes ne sont jamais Γ©crits sur disque : tΓ©lΓ©chargΓ©s en RAM
|
| 34 |
+
pour l'IA, puis jetΓ©s.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
original_url: str # URL statique de l'image (fallback)
|
| 38 |
+
iiif_service_url: str | None = None # URL du service IIIF Image API
|
| 39 |
+
manifest_url: str | None = None # URL du manifest source
|
| 40 |
+
is_iiif: bool = False # a un IIIF Image Service dΓ©tectΓ© ?
|
| 41 |
+
original_width: int
|
| 42 |
+
original_height: int
|
backend/app/schemas/page_master.py
CHANGED
|
@@ -38,14 +38,23 @@ class Region(BaseModel):
|
|
| 38 |
|
| 39 |
|
| 40 |
class ImageInfo(BaseModel):
|
| 41 |
-
"""MΓ©tadonnΓ©es image β CLAUDE.md Β§4.2.
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
class OCRResult(BaseModel):
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
class ImageInfo(BaseModel):
|
| 41 |
+
"""MΓ©tadonnΓ©es image β CLAUDE.md Β§4.2.
|
| 42 |
+
|
| 43 |
+
Supporte deux modes :
|
| 44 |
+
- IIIF natif : iiif_service_url renseignΓ©, images streamΓ©es depuis le serveur
|
| 45 |
+
d'origine (pas de stockage local). derivative_web / thumbnail = None.
|
| 46 |
+
- Upload local : master = chemin local, derivative_web / thumbnail = chemins
|
| 47 |
+
des dΓ©rivΓ©s sur disque (mode legacy ou upload de fichiers).
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
master: str # URL source (service IIIF ou statique) ou chemin local
|
| 51 |
+
derivative_web: str | None = None # chemin dΓ©rivΓ© 1500px (legacy/upload)
|
| 52 |
+
thumbnail: str | None = None # chemin thumbnail 256px (legacy/upload)
|
| 53 |
+
iiif_base: str | None = None # compat arrière
|
| 54 |
+
iiif_service_url: str | None = None # URL du IIIF Image Service (zoom tuilΓ©)
|
| 55 |
+
manifest_url: str | None = None # URL du manifest source (provenance)
|
| 56 |
+
width: int # largeur du canvas original
|
| 57 |
+
height: int # hauteur du canvas original
|
| 58 |
|
| 59 |
|
| 60 |
class OCRResult(BaseModel):
|
backend/app/services/ai/analyzer.py
CHANGED
|
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
| 11 |
|
| 12 |
# 3. local
|
| 13 |
from app.schemas.corpus_profile import CorpusProfile
|
| 14 |
-
from app.schemas.image import ImageDerivativeInfo
|
| 15 |
from app.schemas.model_config import ModelConfig
|
| 16 |
from app.schemas.page_master import EditorialInfo, EditorialStatus, ImageInfo, PageMaster, ProcessingInfo
|
| 17 |
from app.services.ai.master_writer import write_ai_raw, write_master_json
|
|
@@ -22,8 +22,32 @@ from app.services.ai.response_parser import ParseError, parse_ai_response # noq
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def run_primary_analysis(
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
corpus_profile: CorpusProfile,
|
| 28 |
model_config: ModelConfig,
|
| 29 |
page_id: str,
|
|
@@ -31,38 +55,22 @@ def run_primary_analysis(
|
|
| 31 |
corpus_slug: str,
|
| 32 |
folio_label: str,
|
| 33 |
sequence: int,
|
| 34 |
-
image_info: ImageDerivativeInfo,
|
|
|
|
|
|
|
| 35 |
base_data_dir: Path = Path("data"),
|
| 36 |
project_root: Path = Path("."),
|
| 37 |
) -> PageMaster:
|
| 38 |
"""Analyse primaire d'un folio : charge le prompt, appelle l'IA, Γ©crit les fichiers.
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
corpus_profile: profil du corpus (pilote le prompt et les layers).
|
| 49 |
-
model_config: configuration du modèle sélectionné (provider + model_id).
|
| 50 |
-
page_id: identifiant unique de la page (ex. "beatus-lat8878-0013r").
|
| 51 |
-
manuscript_id: identifiant du manuscrit.
|
| 52 |
-
corpus_slug: identifiant du corpus (ex. "beatus-lat8878").
|
| 53 |
-
folio_label: label du folio (ex. "0013r").
|
| 54 |
-
sequence: numΓ©ro de sΓ©quence dans le manuscrit.
|
| 55 |
-
image_info: mΓ©tadonnΓ©es de l'image normalisΓ©e (dimensions, chemins).
|
| 56 |
-
base_data_dir: racine du dossier data.
|
| 57 |
-
project_root: racine du projet (pour rΓ©soudre les chemins des prompts).
|
| 58 |
-
|
| 59 |
-
Returns:
|
| 60 |
-
PageMaster validΓ© (ai_raw.json et master.json Γ©crits sur disque).
|
| 61 |
-
|
| 62 |
-
Raises:
|
| 63 |
-
ParseError: si la rΓ©ponse IA n'est pas un JSON valide.
|
| 64 |
-
FileNotFoundError: si le template de prompt est introuvable.
|
| 65 |
-
RuntimeError: si le provider n'est pas configurΓ© (variable d'env absente).
|
| 66 |
"""
|
| 67 |
# ββ Chemins de sortie βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
page_dir = base_data_dir / "corpora" / corpus_slug / "pages" / folio_label
|
|
@@ -85,13 +93,18 @@ def run_primary_analysis(
|
|
| 85 |
extra={"template": prompt_rel_path, "corpus": corpus_slug, "folio": folio_label},
|
| 86 |
)
|
| 87 |
|
| 88 |
-
# ββ 2.
|
| 89 |
-
if not
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
# ββ 3. Appel IA via le provider sΓ©lectionnΓ© βββββββββββββββββββββββββββββ
|
| 97 |
provider = get_provider(model_config.provider)
|
|
@@ -116,21 +129,45 @@ def run_primary_analysis(
|
|
| 116 |
# ββ 5. Parsing + validation (ParseError si JSON invalide) βββββββββββββββ
|
| 117 |
layout, ocr = parse_ai_response(raw_text)
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
# ββ 6. Construction du PageMaster βββββββββββββββββββββββββββββββββββββββ
|
| 120 |
processed_at = datetime.now(tz=timezone.utc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
page_master = PageMaster(
|
| 122 |
page_id=page_id,
|
| 123 |
corpus_profile=corpus_profile.profile_id,
|
| 124 |
manuscript_id=manuscript_id,
|
| 125 |
folio_label=folio_label,
|
| 126 |
sequence=sequence,
|
| 127 |
-
image=
|
| 128 |
-
master=image_info.original_url,
|
| 129 |
-
derivative_web=image_info.derivative_path,
|
| 130 |
-
thumbnail=image_info.thumbnail_path,
|
| 131 |
-
width=image_info.derivative_width,
|
| 132 |
-
height=image_info.derivative_height,
|
| 133 |
-
),
|
| 134 |
layout=layout,
|
| 135 |
ocr=ocr,
|
| 136 |
processing=ProcessingInfo(
|
|
@@ -154,6 +191,7 @@ def run_primary_analysis(
|
|
| 154 |
"corpus": corpus_slug,
|
| 155 |
"folio": folio_label,
|
| 156 |
"regions": len(layout.get("regions", [])),
|
|
|
|
| 157 |
},
|
| 158 |
)
|
| 159 |
return page_master
|
|
|
|
| 11 |
|
| 12 |
# 3. local
|
| 13 |
from app.schemas.corpus_profile import CorpusProfile
|
| 14 |
+
from app.schemas.image import ImageDerivativeInfo, ImageSourceInfo
|
| 15 |
from app.schemas.model_config import ModelConfig
|
| 16 |
from app.schemas.page_master import EditorialInfo, EditorialStatus, ImageInfo, PageMaster, ProcessingInfo
|
| 17 |
from app.services.ai.master_writer import write_ai_raw, write_master_json
|
|
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
|
| 25 |
+
def _scale_bbox_coordinates(layout: dict, scale_x: float, scale_y: float) -> dict:
|
| 26 |
+
"""Met Γ l'Γ©chelle les bbox de l'espace dΓ©rivΓ© vers l'espace canvas original.
|
| 27 |
+
|
| 28 |
+
L'IA analyse un dΓ©rivΓ© 1500px mais les coordonnΓ©es dans master.json
|
| 29 |
+
doivent Γͺtre en pixels absolus du canvas original (convention IIIF).
|
| 30 |
+
"""
|
| 31 |
+
if abs(scale_x - 1.0) < 0.01 and abs(scale_y - 1.0) < 0.01:
|
| 32 |
+
return layout # pas de scaling nΓ©cessaire
|
| 33 |
+
|
| 34 |
+
regions = layout.get("regions", [])
|
| 35 |
+
for region in regions:
|
| 36 |
+
bbox = region.get("bbox")
|
| 37 |
+
if bbox and len(bbox) == 4:
|
| 38 |
+
region["bbox"] = [
|
| 39 |
+
round(bbox[0] * scale_x),
|
| 40 |
+
round(bbox[1] * scale_y),
|
| 41 |
+
round(bbox[2] * scale_x),
|
| 42 |
+
round(bbox[3] * scale_y),
|
| 43 |
+
]
|
| 44 |
+
return layout
|
| 45 |
+
|
| 46 |
+
|
| 47 |
def run_primary_analysis(
|
| 48 |
+
*,
|
| 49 |
+
derivative_image_bytes: bytes | None = None,
|
| 50 |
+
derivative_image_path: Path | None = None,
|
| 51 |
corpus_profile: CorpusProfile,
|
| 52 |
model_config: ModelConfig,
|
| 53 |
page_id: str,
|
|
|
|
| 55 |
corpus_slug: str,
|
| 56 |
folio_label: str,
|
| 57 |
sequence: int,
|
| 58 |
+
image_info: ImageDerivativeInfo | ImageSourceInfo,
|
| 59 |
+
derivative_width: int | None = None,
|
| 60 |
+
derivative_height: int | None = None,
|
| 61 |
base_data_dir: Path = Path("data"),
|
| 62 |
project_root: Path = Path("."),
|
| 63 |
) -> PageMaster:
|
| 64 |
"""Analyse primaire d'un folio : charge le prompt, appelle l'IA, Γ©crit les fichiers.
|
| 65 |
|
| 66 |
+
Supporte deux modes :
|
| 67 |
+
- IIIF natif : derivative_image_bytes fourni (bytes en RAM, jamais sur disque)
|
| 68 |
+
- Legacy : derivative_image_path fourni (chemin fichier sur disque)
|
| 69 |
+
|
| 70 |
+
Respecte R05 : ai_raw.json toujours Γ©crit en premier.
|
| 71 |
+
|
| 72 |
+
Si les dimensions originales (canvas) diffèrent du dérivé, les bbox sont
|
| 73 |
+
mises Γ l'Γ©chelle de l'espace dΓ©rivΓ© vers l'espace canvas original.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
"""
|
| 75 |
# ββ Chemins de sortie βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
page_dir = base_data_dir / "corpora" / corpus_slug / "pages" / folio_label
|
|
|
|
| 93 |
extra={"template": prompt_rel_path, "corpus": corpus_slug, "folio": folio_label},
|
| 94 |
)
|
| 95 |
|
| 96 |
+
# ββ 2. Obtention des bytes image ββββββββββββββββββββββββββββββββββββββββ
|
| 97 |
+
if derivative_image_bytes is not None:
|
| 98 |
+
jpeg_bytes = derivative_image_bytes
|
| 99 |
+
elif derivative_image_path is not None:
|
| 100 |
+
if not derivative_image_path.exists():
|
| 101 |
+
raise FileNotFoundError(f"Image dΓ©rivΓ©e introuvable : {derivative_image_path}")
|
| 102 |
+
try:
|
| 103 |
+
jpeg_bytes = derivative_image_path.read_bytes()
|
| 104 |
+
except OSError as exc:
|
| 105 |
+
raise RuntimeError(f"Erreur lecture image {derivative_image_path} : {exc}") from exc
|
| 106 |
+
else:
|
| 107 |
+
raise ValueError("Il faut fournir derivative_image_bytes ou derivative_image_path")
|
| 108 |
|
| 109 |
# ββ 3. Appel IA via le provider sΓ©lectionnΓ© βββββββββββββββββββββββββββββ
|
| 110 |
provider = get_provider(model_config.provider)
|
|
|
|
| 129 |
# ββ 5. Parsing + validation (ParseError si JSON invalide) βββββββββββββββ
|
| 130 |
layout, ocr = parse_ai_response(raw_text)
|
| 131 |
|
| 132 |
+
# ββ 5b. Scaling bbox si les dimensions originales diffΓ¨rent du dΓ©rivΓ© ββ
|
| 133 |
+
is_iiif_source = isinstance(image_info, ImageSourceInfo)
|
| 134 |
+
original_w = image_info.original_width
|
| 135 |
+
original_h = image_info.original_height
|
| 136 |
+
deriv_w = derivative_width or (getattr(image_info, "derivative_width", None)) or original_w
|
| 137 |
+
deriv_h = derivative_height or (getattr(image_info, "derivative_height", None)) or original_h
|
| 138 |
+
|
| 139 |
+
if original_w > 0 and deriv_w > 0 and (original_w != deriv_w or original_h != deriv_h):
|
| 140 |
+
scale_x = original_w / deriv_w
|
| 141 |
+
scale_y = original_h / deriv_h
|
| 142 |
+
layout = _scale_bbox_coordinates(layout, scale_x, scale_y)
|
| 143 |
+
|
| 144 |
# ββ 6. Construction du PageMaster βββββββββββββββββββββββββββββββββββββββ
|
| 145 |
processed_at = datetime.now(tz=timezone.utc)
|
| 146 |
+
|
| 147 |
+
if is_iiif_source:
|
| 148 |
+
image_block = ImageInfo(
|
| 149 |
+
master=image_info.original_url,
|
| 150 |
+
iiif_service_url=image_info.iiif_service_url,
|
| 151 |
+
manifest_url=image_info.manifest_url,
|
| 152 |
+
width=original_w,
|
| 153 |
+
height=original_h,
|
| 154 |
+
)
|
| 155 |
+
else:
|
| 156 |
+
image_block = ImageInfo(
|
| 157 |
+
master=image_info.original_url,
|
| 158 |
+
derivative_web=getattr(image_info, "derivative_path", None),
|
| 159 |
+
thumbnail=getattr(image_info, "thumbnail_path", None),
|
| 160 |
+
width=original_w,
|
| 161 |
+
height=original_h,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
page_master = PageMaster(
|
| 165 |
page_id=page_id,
|
| 166 |
corpus_profile=corpus_profile.profile_id,
|
| 167 |
manuscript_id=manuscript_id,
|
| 168 |
folio_label=folio_label,
|
| 169 |
sequence=sequence,
|
| 170 |
+
image=image_block,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
layout=layout,
|
| 172 |
ocr=ocr,
|
| 173 |
processing=ProcessingInfo(
|
|
|
|
| 191 |
"corpus": corpus_slug,
|
| 192 |
"folio": folio_label,
|
| 193 |
"regions": len(layout.get("regions", [])),
|
| 194 |
+
"iiif_native": is_iiif_source,
|
| 195 |
},
|
| 196 |
)
|
| 197 |
return page_master
|
backend/app/services/export/alto.py
CHANGED
|
@@ -160,7 +160,7 @@ def generate_alto(master: PageMaster) -> str:
|
|
| 160 |
etree.SubElement(desc, _a("MeasurementUnit")).text = "pixel"
|
| 161 |
|
| 162 |
src_info = etree.SubElement(desc, _a("sourceImageInformation"))
|
| 163 |
-
file_name = master.image.master or master.image.derivative_web or master.page_id
|
| 164 |
etree.SubElement(src_info, _a("fileName")).text = str(file_name)
|
| 165 |
|
| 166 |
if master.processing:
|
|
|
|
| 160 |
etree.SubElement(desc, _a("MeasurementUnit")).text = "pixel"
|
| 161 |
|
| 162 |
src_info = etree.SubElement(desc, _a("sourceImageInformation"))
|
| 163 |
+
file_name = master.image.iiif_service_url or master.image.master or master.image.derivative_web or master.page_id
|
| 164 |
etree.SubElement(src_info, _a("fileName")).text = str(file_name)
|
| 165 |
|
| 166 |
if master.processing:
|
backend/app/services/export/iiif.py
CHANGED
|
@@ -108,6 +108,23 @@ def generate_manifest(
|
|
| 108 |
annotation_page_id = f"{canvas_id}/annotation-page/1"
|
| 109 |
annotation_id = f"{canvas_id}/annotation/painting"
|
| 110 |
image_url = page.image.master or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
canvas: dict = {
|
| 113 |
"id": canvas_id,
|
|
@@ -124,14 +141,8 @@ def generate_manifest(
|
|
| 124 |
"id": annotation_id,
|
| 125 |
"type": "Annotation",
|
| 126 |
"motivation": "painting",
|
| 127 |
-
"body":
|
| 128 |
-
|
| 129 |
-
"type": "Image",
|
| 130 |
-
"format": "image/jpeg",
|
| 131 |
-
"width": width,
|
| 132 |
-
"height": height,
|
| 133 |
-
},
|
| 134 |
-
"target": canvas_id,
|
| 135 |
}
|
| 136 |
],
|
| 137 |
}
|
|
|
|
| 108 |
annotation_page_id = f"{canvas_id}/annotation-page/1"
|
| 109 |
annotation_id = f"{canvas_id}/annotation/painting"
|
| 110 |
image_url = page.image.master or ""
|
| 111 |
+
iiif_svc = page.image.iiif_service_url
|
| 112 |
+
|
| 113 |
+
# Corps de l'annotation painting
|
| 114 |
+
body: dict = {
|
| 115 |
+
"id": image_url,
|
| 116 |
+
"type": "Image",
|
| 117 |
+
"format": "image/jpeg",
|
| 118 |
+
"width": width,
|
| 119 |
+
"height": height,
|
| 120 |
+
}
|
| 121 |
+
# Si un IIIF Image Service est connu, le dΓ©clarer (zoom tuilΓ© natif)
|
| 122 |
+
if iiif_svc:
|
| 123 |
+
body["service"] = [{
|
| 124 |
+
"id": iiif_svc,
|
| 125 |
+
"type": "ImageService3",
|
| 126 |
+
"profile": "level2",
|
| 127 |
+
}]
|
| 128 |
|
| 129 |
canvas: dict = {
|
| 130 |
"id": canvas_id,
|
|
|
|
| 141 |
"id": annotation_id,
|
| 142 |
"type": "Annotation",
|
| 143 |
"motivation": "painting",
|
| 144 |
+
"body": body,
|
| 145 |
+
"target": canvas_id,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
}
|
| 147 |
],
|
| 148 |
}
|
backend/app/services/export/mets.py
CHANGED
|
@@ -178,20 +178,28 @@ def generate_mets(
|
|
| 178 |
for page in pages:
|
| 179 |
sid = _safe_id(page.page_id)
|
| 180 |
|
| 181 |
-
# master image
|
|
|
|
|
|
|
|
|
|
| 182 |
f_master = _el(grp_master, f"{_M}file", {"ID": f"IMG_MASTER_{sid}", "MIMETYPE": "image/jpeg"})
|
| 183 |
_el(f_master, f"{_M}FLocat", {
|
| 184 |
"LOCTYPE": "URL",
|
| 185 |
-
f"{_XL}href":
|
| 186 |
f"{_XL}type": "simple",
|
| 187 |
})
|
| 188 |
|
| 189 |
-
# dΓ©rivΓ© web
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
f_deriv = _el(grp_deriv, f"{_M}file", {"ID": f"IMG_DERIV_{sid}", "MIMETYPE": "image/jpeg"})
|
| 191 |
_el(f_deriv, f"{_M}FLocat", {
|
| 192 |
-
|
| 193 |
-
"
|
| 194 |
-
f"{_XL}href": page.image.derivative_web or "",
|
| 195 |
f"{_XL}type": "simple",
|
| 196 |
})
|
| 197 |
|
|
|
|
| 178 |
for page in pages:
|
| 179 |
sid = _safe_id(page.page_id)
|
| 180 |
|
| 181 |
+
# master image (IIIF service URL ou URL statique)
|
| 182 |
+
master_url = page.image.iiif_service_url or page.image.master or ""
|
| 183 |
+
if page.image.iiif_service_url:
|
| 184 |
+
master_url = f"{page.image.iiif_service_url}/full/max/0/default.jpg"
|
| 185 |
f_master = _el(grp_master, f"{_M}file", {"ID": f"IMG_MASTER_{sid}", "MIMETYPE": "image/jpeg"})
|
| 186 |
_el(f_master, f"{_M}FLocat", {
|
| 187 |
"LOCTYPE": "URL",
|
| 188 |
+
f"{_XL}href": master_url,
|
| 189 |
f"{_XL}type": "simple",
|
| 190 |
})
|
| 191 |
|
| 192 |
+
# dΓ©rivΓ© web (URL IIIF 1500px ou chemin local legacy)
|
| 193 |
+
if page.image.iiif_service_url:
|
| 194 |
+
deriv_href = f"{page.image.iiif_service_url}/full/!1500,1500/0/default.jpg"
|
| 195 |
+
deriv_loctype_attrs = {"LOCTYPE": "URL"}
|
| 196 |
+
else:
|
| 197 |
+
deriv_href = page.image.derivative_web or ""
|
| 198 |
+
deriv_loctype_attrs = {"LOCTYPE": "OTHER", "OTHERLOCTYPE": "filepath"}
|
| 199 |
f_deriv = _el(grp_deriv, f"{_M}file", {"ID": f"IMG_DERIV_{sid}", "MIMETYPE": "image/jpeg"})
|
| 200 |
_el(f_deriv, f"{_M}FLocat", {
|
| 201 |
+
**deriv_loctype_attrs,
|
| 202 |
+
f"{_XL}href": deriv_href,
|
|
|
|
| 203 |
f"{_XL}type": "simple",
|
| 204 |
})
|
| 205 |
|
backend/app/services/image/normalizer.py
CHANGED
|
@@ -143,3 +143,55 @@ def fetch_and_normalize(
|
|
| 143 |
"""
|
| 144 |
source_bytes = fetch_iiif_image(url)
|
| 145 |
return create_derivatives(source_bytes, url, corpus_slug, folio_label, base_data_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
"""
|
| 144 |
source_bytes = fetch_iiif_image(url)
|
| 145 |
return create_derivatives(source_bytes, url, corpus_slug, folio_label, base_data_dir)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
# ββ Mode IIIF natif : images en mΓ©moire, jamais sur disque βββββββββββββββββββ
|
| 149 |
+
|
| 150 |
+
def fetch_ai_derivative_bytes(
|
| 151 |
+
iiif_service_url: str | None,
|
| 152 |
+
fallback_url: str | None,
|
| 153 |
+
) -> tuple[bytes, int, int]:
|
| 154 |
+
"""Retourne (jpeg_bytes, width, height) pour l'IA β jamais sauvΓ© sur disque.
|
| 155 |
+
|
| 156 |
+
- Si iiif_service_url est fourni : utilise l'IIIF Image API pour demander
|
| 157 |
+
au serveur un dΓ©rivΓ© 1500px directement redimensionnΓ© cΓ΄tΓ© serveur.
|
| 158 |
+
- Sinon (fallback_url) : télécharge l'image complète et redimensionne
|
| 159 |
+
en mΓ©moire.
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
Tuple (jpeg_bytes, derivative_width, derivative_height).
|
| 163 |
+
|
| 164 |
+
Raises:
|
| 165 |
+
ValueError: si aucune source n'est fournie.
|
| 166 |
+
httpx.HTTPStatusError: si le serveur retourne une erreur.
|
| 167 |
+
"""
|
| 168 |
+
from app.services.ingest.iiif_fetcher import fetch_iiif_derivative, fetch_iiif_image
|
| 169 |
+
|
| 170 |
+
if iiif_service_url:
|
| 171 |
+
raw_bytes = fetch_iiif_derivative(iiif_service_url, max_px=_MAX_DERIVATIVE_PX)
|
| 172 |
+
elif fallback_url:
|
| 173 |
+
raw_bytes = fetch_iiif_image(fallback_url)
|
| 174 |
+
else:
|
| 175 |
+
raise ValueError("Aucune source image fournie (ni iiif_service_url ni fallback_url)")
|
| 176 |
+
|
| 177 |
+
# Ouvrir en mΓ©moire pour obtenir les dimensions (et redimensionner si fallback)
|
| 178 |
+
image = Image.open(io.BytesIO(raw_bytes))
|
| 179 |
+
if image.mode != "RGB":
|
| 180 |
+
image = image.convert("RGB")
|
| 181 |
+
|
| 182 |
+
if not iiif_service_url:
|
| 183 |
+
# Fallback : le serveur n'a pas redimensionnΓ©, on le fait en mΓ©moire
|
| 184 |
+
image = _resize_to_max(image, _MAX_DERIVATIVE_PX)
|
| 185 |
+
|
| 186 |
+
w, h = image.size
|
| 187 |
+
|
| 188 |
+
# Encoder en JPEG en mΓ©moire
|
| 189 |
+
buf = io.BytesIO()
|
| 190 |
+
image.save(buf, format="JPEG", quality=_DERIVATIVE_QUALITY)
|
| 191 |
+
jpeg_bytes = buf.getvalue()
|
| 192 |
+
|
| 193 |
+
logger.info(
|
| 194 |
+
"DΓ©rivΓ© IA en mΓ©moire",
|
| 195 |
+
extra={"iiif": bool(iiif_service_url), "size": f"{w}x{h}", "bytes": len(jpeg_bytes)},
|
| 196 |
+
)
|
| 197 |
+
return jpeg_bytes, w, h
|
backend/app/services/ingest/iiif_fetcher.py
CHANGED
|
@@ -48,3 +48,38 @@ def fetch_iiif_image(url: str, timeout: float = _DEFAULT_TIMEOUT) -> bytes:
|
|
| 48 |
extra={"url": url, "size_bytes": len(response.content)},
|
| 49 |
)
|
| 50 |
return response.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
extra={"url": url, "size_bytes": len(response.content)},
|
| 49 |
)
|
| 50 |
return response.content
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def fetch_iiif_derivative(
|
| 54 |
+
service_url: str,
|
| 55 |
+
max_px: int = 1500,
|
| 56 |
+
timeout: float = _DEFAULT_TIMEOUT,
|
| 57 |
+
) -> bytes:
|
| 58 |
+
"""TΓ©lΓ©charge un dΓ©rivΓ© via l'IIIF Image API β jamais stockΓ© sur disque.
|
| 59 |
+
|
| 60 |
+
Construit l'URL : {service_url}/full/!{max_px},{max_px}/0/default.jpg
|
| 61 |
+
Le serveur IIIF retourne une image redimensionnΓ©e cΓ΄tΓ© serveur.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
service_url: URL du IIIF Image Service (sans le suffix /full/.../default.jpg).
|
| 65 |
+
max_px: taille max du grand cΓ΄tΓ© (dΓ©faut : 1500).
|
| 66 |
+
timeout: dΓ©lai maximal en secondes.
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Contenu brut de l'image JPEG en bytes.
|
| 70 |
+
"""
|
| 71 |
+
# Pattern IIIF Image API : !w,h = "best fit" (le serveur choisit)
|
| 72 |
+
derivative_url = f"{service_url.rstrip('/')}/full/!{max_px},{max_px}/0/default.jpg"
|
| 73 |
+
logger.info("Fetching IIIF derivative", extra={"url": derivative_url, "max_px": max_px})
|
| 74 |
+
response = httpx.get(
|
| 75 |
+
derivative_url,
|
| 76 |
+
headers=_HEADERS,
|
| 77 |
+
follow_redirects=True,
|
| 78 |
+
timeout=httpx.Timeout(timeout, connect=10.0),
|
| 79 |
+
)
|
| 80 |
+
response.raise_for_status()
|
| 81 |
+
logger.info(
|
| 82 |
+
"IIIF derivative fetched",
|
| 83 |
+
extra={"url": derivative_url, "size_bytes": len(response.content)},
|
| 84 |
+
)
|
| 85 |
+
return response.content
|
backend/app/services/job_runner.py
CHANGED
|
@@ -32,8 +32,13 @@ from app.models.database import async_session_factory
|
|
| 32 |
from app.models.job import JobModel
|
| 33 |
from app.models.model_config_db import ModelConfigDB
|
| 34 |
from app.schemas.corpus_profile import CorpusProfile
|
|
|
|
| 35 |
from app.schemas.model_config import ModelConfig, ProviderType
|
| 36 |
-
from app.services.image.normalizer import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
logger = logging.getLogger(__name__)
|
| 39 |
|
|
@@ -126,19 +131,65 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
|
|
| 126 |
available_models=[],
|
| 127 |
)
|
| 128 |
|
| 129 |
-
# ββ 5.
|
| 130 |
data_dir = _config_module.settings.data_dir
|
| 131 |
image_source = page.image_master_path or ""
|
| 132 |
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
image_info = fetch_and_normalize(
|
| 135 |
image_source, corpus.slug, page.folio_label, data_dir
|
| 136 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
elif image_source:
|
| 138 |
-
#
|
| 139 |
-
# sous data_dir. EmpΓͺche la lecture de fichiers arbitraires
|
| 140 |
-
# si image_master_path contient des sΓ©quences ../ ou un
|
| 141 |
-
# chemin absolu hors du rΓ©pertoire de donnΓ©es.
|
| 142 |
source_path = Path(image_source).resolve()
|
| 143 |
data_dir_resolved = data_dir.resolve()
|
| 144 |
if not str(source_path).startswith(str(data_dir_resolved) + "/") and source_path != data_dir_resolved:
|
|
@@ -150,29 +201,26 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
|
|
| 150 |
image_info = create_derivatives(
|
| 151 |
source_bytes, image_source, corpus.slug, page.folio_label, data_dir
|
| 152 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
else:
|
| 154 |
raise ValueError(
|
| 155 |
f"La page {page.id} n'a pas d'image source "
|
| 156 |
-
"(
|
| 157 |
)
|
| 158 |
|
| 159 |
-
# ββ 6. Analyse primaire IA (R05 : double stockage) βββββββββββββββββββ
|
| 160 |
-
from app.services.ai.analyzer import run_primary_analysis
|
| 161 |
-
|
| 162 |
-
page_master = run_primary_analysis(
|
| 163 |
-
derivative_image_path=Path(image_info.derivative_path),
|
| 164 |
-
corpus_profile=corpus_profile,
|
| 165 |
-
model_config=model_config,
|
| 166 |
-
page_id=page.id,
|
| 167 |
-
manuscript_id=manuscript.id,
|
| 168 |
-
corpus_slug=corpus.slug,
|
| 169 |
-
folio_label=page.folio_label,
|
| 170 |
-
sequence=page.sequence,
|
| 171 |
-
image_info=image_info,
|
| 172 |
-
base_data_dir=data_dir,
|
| 173 |
-
project_root=_PROJECT_ROOT,
|
| 174 |
-
)
|
| 175 |
-
|
| 176 |
# ββ 7. GΓ©nΓ©rer et Γ©crire l'ALTO XML ββββββββββββββββββββββββββββββββββ
|
| 177 |
from app.services.export.alto import generate_alto, write_alto
|
| 178 |
|
|
|
|
| 32 |
from app.models.job import JobModel
|
| 33 |
from app.models.model_config_db import ModelConfigDB
|
| 34 |
from app.schemas.corpus_profile import CorpusProfile
|
| 35 |
+
from app.schemas.image import ImageSourceInfo
|
| 36 |
from app.schemas.model_config import ModelConfig, ProviderType
|
| 37 |
+
from app.services.image.normalizer import (
|
| 38 |
+
create_derivatives,
|
| 39 |
+
fetch_ai_derivative_bytes,
|
| 40 |
+
fetch_and_normalize,
|
| 41 |
+
)
|
| 42 |
|
| 43 |
logger = logging.getLogger(__name__)
|
| 44 |
|
|
|
|
| 131 |
available_models=[],
|
| 132 |
)
|
| 133 |
|
| 134 |
+
# ββ 5. Obtenir l'image pour l'IA βββββββββββββββββββββββββββββββββββββ
|
| 135 |
data_dir = _config_module.settings.data_dir
|
| 136 |
image_source = page.image_master_path or ""
|
| 137 |
|
| 138 |
+
from app.services.ai.analyzer import run_primary_analysis
|
| 139 |
+
|
| 140 |
+
if page.iiif_service_url:
|
| 141 |
+
# ββ Mode IIIF natif : fetch en mΓ©moire, zΓ©ro stockage ββββββββββββ
|
| 142 |
+
deriv_bytes, deriv_w, deriv_h = fetch_ai_derivative_bytes(
|
| 143 |
+
iiif_service_url=page.iiif_service_url,
|
| 144 |
+
fallback_url=None,
|
| 145 |
+
)
|
| 146 |
+
image_source_info = ImageSourceInfo(
|
| 147 |
+
original_url=image_source or page.iiif_service_url,
|
| 148 |
+
iiif_service_url=page.iiif_service_url,
|
| 149 |
+
manifest_url=page.manifest_url,
|
| 150 |
+
is_iiif=True,
|
| 151 |
+
original_width=page.canvas_width or deriv_w,
|
| 152 |
+
original_height=page.canvas_height or deriv_h,
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# ββ 6. Analyse primaire IA (R05 : double stockage) βββββββββββββββ
|
| 156 |
+
page_master = run_primary_analysis(
|
| 157 |
+
derivative_image_bytes=deriv_bytes,
|
| 158 |
+
derivative_width=deriv_w,
|
| 159 |
+
derivative_height=deriv_h,
|
| 160 |
+
corpus_profile=corpus_profile,
|
| 161 |
+
model_config=model_config,
|
| 162 |
+
page_id=page.id,
|
| 163 |
+
manuscript_id=manuscript.id,
|
| 164 |
+
corpus_slug=corpus.slug,
|
| 165 |
+
folio_label=page.folio_label,
|
| 166 |
+
sequence=page.sequence,
|
| 167 |
+
image_info=image_source_info,
|
| 168 |
+
base_data_dir=data_dir,
|
| 169 |
+
project_root=_PROJECT_ROOT,
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
elif image_source.startswith(("http://", "https://")):
|
| 173 |
+
# ββ Mode fallback URL : tΓ©lΓ©charge + stocke sur disque (legacy) ββ
|
| 174 |
image_info = fetch_and_normalize(
|
| 175 |
image_source, corpus.slug, page.folio_label, data_dir
|
| 176 |
)
|
| 177 |
+
page_master = run_primary_analysis(
|
| 178 |
+
derivative_image_path=Path(image_info.derivative_path),
|
| 179 |
+
corpus_profile=corpus_profile,
|
| 180 |
+
model_config=model_config,
|
| 181 |
+
page_id=page.id,
|
| 182 |
+
manuscript_id=manuscript.id,
|
| 183 |
+
corpus_slug=corpus.slug,
|
| 184 |
+
folio_label=page.folio_label,
|
| 185 |
+
sequence=page.sequence,
|
| 186 |
+
image_info=image_info,
|
| 187 |
+
base_data_dir=data_dir,
|
| 188 |
+
project_root=_PROJECT_ROOT,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
elif image_source:
|
| 192 |
+
# ββ Mode fichier local (upload) ββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
| 193 |
source_path = Path(image_source).resolve()
|
| 194 |
data_dir_resolved = data_dir.resolve()
|
| 195 |
if not str(source_path).startswith(str(data_dir_resolved) + "/") and source_path != data_dir_resolved:
|
|
|
|
| 201 |
image_info = create_derivatives(
|
| 202 |
source_bytes, image_source, corpus.slug, page.folio_label, data_dir
|
| 203 |
)
|
| 204 |
+
page_master = run_primary_analysis(
|
| 205 |
+
derivative_image_path=Path(image_info.derivative_path),
|
| 206 |
+
corpus_profile=corpus_profile,
|
| 207 |
+
model_config=model_config,
|
| 208 |
+
page_id=page.id,
|
| 209 |
+
manuscript_id=manuscript.id,
|
| 210 |
+
corpus_slug=corpus.slug,
|
| 211 |
+
folio_label=page.folio_label,
|
| 212 |
+
sequence=page.sequence,
|
| 213 |
+
image_info=image_info,
|
| 214 |
+
base_data_dir=data_dir,
|
| 215 |
+
project_root=_PROJECT_ROOT,
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
else:
|
| 219 |
raise ValueError(
|
| 220 |
f"La page {page.id} n'a pas d'image source "
|
| 221 |
+
"(ni iiif_service_url, ni image_master_path)"
|
| 222 |
)
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
# ββ 7. GΓ©nΓ©rer et Γ©crire l'ALTO XML ββββββββββββββββββββββββββββββββββ
|
| 225 |
from app.services.export.alto import generate_alto, write_alto
|
| 226 |
|
backend/tests/test_ai_analyzer.py
CHANGED
|
@@ -663,8 +663,9 @@ def test_run_primary_analysis_image_dict(tmp_path):
|
|
| 663 |
)
|
| 664 |
|
| 665 |
assert result.image.master == image_info.original_url
|
| 666 |
-
|
| 667 |
-
assert result.image.
|
|
|
|
| 668 |
|
| 669 |
|
| 670 |
def test_run_primary_analysis_regions_in_layout(tmp_path):
|
|
@@ -866,3 +867,98 @@ def test_run_primary_analysis_invalid_region_skipped(tmp_path):
|
|
| 866 |
|
| 867 |
assert len(result.layout["regions"]) == 1
|
| 868 |
assert result.layout["regions"][0]["id"] == "r_good"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
)
|
| 664 |
|
| 665 |
assert result.image.master == image_info.original_url
|
| 666 |
+
# L'analyzer stocke dΓ©sormais les dimensions originales (pas celles du dΓ©rivΓ©)
|
| 667 |
+
assert result.image.width == image_info.original_width
|
| 668 |
+
assert result.image.height == image_info.original_height
|
| 669 |
|
| 670 |
|
| 671 |
def test_run_primary_analysis_regions_in_layout(tmp_path):
|
|
|
|
| 867 |
|
| 868 |
assert len(result.layout["regions"]) == 1
|
| 869 |
assert result.layout["regions"][0]["id"] == "r_good"
|
| 870 |
+
|
| 871 |
+
|
| 872 |
+
# ---------------------------------------------------------------------------
|
| 873 |
+
# Mode IIIF natif β bytes en mΓ©moire
|
| 874 |
+
# ---------------------------------------------------------------------------
|
| 875 |
+
|
| 876 |
+
from app.schemas.image import ImageSourceInfo
|
| 877 |
+
|
| 878 |
+
|
| 879 |
+
def _make_image_source_info() -> ImageSourceInfo:
|
| 880 |
+
return ImageSourceInfo(
|
| 881 |
+
original_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 882 |
+
iiif_service_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
|
| 883 |
+
manifest_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
|
| 884 |
+
is_iiif=True,
|
| 885 |
+
original_width=3543,
|
| 886 |
+
original_height=4724,
|
| 887 |
+
)
|
| 888 |
+
|
| 889 |
+
|
| 890 |
+
def test_run_primary_analysis_iiif_bytes_mode(tmp_path):
|
| 891 |
+
"""Mode IIIF natif : passe des bytes directement, pas de chemin fichier."""
|
| 892 |
+
prompt_rel = "prompts/medieval-illuminated/primary_v1.txt"
|
| 893 |
+
_setup_prompt_file(tmp_path, prompt_rel)
|
| 894 |
+
|
| 895 |
+
jpeg_bytes = _make_jpeg_bytes(200, 300)
|
| 896 |
+
mock_provider = _make_mock_provider(_valid_ai_json())
|
| 897 |
+
|
| 898 |
+
with patch("app.services.ai.analyzer.get_provider", return_value=mock_provider):
|
| 899 |
+
result = run_primary_analysis(
|
| 900 |
+
derivative_image_bytes=jpeg_bytes,
|
| 901 |
+
derivative_width=200,
|
| 902 |
+
derivative_height=300,
|
| 903 |
+
corpus_profile=_make_corpus_profile(prompt_rel_path=prompt_rel),
|
| 904 |
+
model_config=_make_model_config(),
|
| 905 |
+
page_id="test-iiif-0001r",
|
| 906 |
+
manuscript_id="ms-test",
|
| 907 |
+
corpus_slug="test-corpus",
|
| 908 |
+
folio_label="0001r",
|
| 909 |
+
sequence=1,
|
| 910 |
+
image_info=_make_image_source_info(),
|
| 911 |
+
base_data_dir=tmp_path / "data",
|
| 912 |
+
project_root=tmp_path,
|
| 913 |
+
)
|
| 914 |
+
|
| 915 |
+
assert result.image.iiif_service_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
|
| 916 |
+
assert result.image.manifest_url is not None
|
| 917 |
+
assert result.image.derivative_web is None
|
| 918 |
+
assert result.image.width == 3543 # dimensions originales, pas dΓ©rivΓ©
|
| 919 |
+
assert result.image.height == 4724
|
| 920 |
+
|
| 921 |
+
|
| 922 |
+
def test_run_primary_analysis_iiif_bbox_scaling(tmp_path):
|
| 923 |
+
"""Les bbox sont mises Γ l'Γ©chelle du dΓ©rivΓ© vers le canvas original."""
|
| 924 |
+
prompt_rel = "prompts/medieval-illuminated/primary_v1.txt"
|
| 925 |
+
_setup_prompt_file(tmp_path, prompt_rel)
|
| 926 |
+
|
| 927 |
+
# Image source : 4000x6000 original, dΓ©rivΓ© 1000x1500
|
| 928 |
+
source_info = ImageSourceInfo(
|
| 929 |
+
original_url="https://example.com/img",
|
| 930 |
+
iiif_service_url="https://example.com/img",
|
| 931 |
+
is_iiif=True,
|
| 932 |
+
original_width=4000,
|
| 933 |
+
original_height=6000,
|
| 934 |
+
)
|
| 935 |
+
|
| 936 |
+
# RΓ©ponse IA avec bbox dans l'espace du dΓ©rivΓ© (1000x1500)
|
| 937 |
+
ai_response = json.dumps({
|
| 938 |
+
"layout": {"regions": [
|
| 939 |
+
{"id": "r1", "type": "text_block", "bbox": [100, 200, 500, 300], "confidence": 0.9},
|
| 940 |
+
]},
|
| 941 |
+
"ocr": {"diplomatic_text": "test", "language": "la", "confidence": 0.8},
|
| 942 |
+
})
|
| 943 |
+
mock_provider = _make_mock_provider(ai_response)
|
| 944 |
+
|
| 945 |
+
with patch("app.services.ai.analyzer.get_provider", return_value=mock_provider):
|
| 946 |
+
result = run_primary_analysis(
|
| 947 |
+
derivative_image_bytes=_make_jpeg_bytes(100, 150),
|
| 948 |
+
derivative_width=1000,
|
| 949 |
+
derivative_height=1500,
|
| 950 |
+
corpus_profile=_make_corpus_profile(prompt_rel_path=prompt_rel),
|
| 951 |
+
model_config=_make_model_config(),
|
| 952 |
+
page_id="test-scale-0001r",
|
| 953 |
+
manuscript_id="ms-test",
|
| 954 |
+
corpus_slug="test-corpus",
|
| 955 |
+
folio_label="0001r",
|
| 956 |
+
sequence=1,
|
| 957 |
+
image_info=source_info,
|
| 958 |
+
base_data_dir=tmp_path / "data",
|
| 959 |
+
project_root=tmp_path,
|
| 960 |
+
)
|
| 961 |
+
|
| 962 |
+
# Scale factor : 4000/1000 = 4.0, 6000/1500 = 4.0
|
| 963 |
+
bbox = result.layout["regions"][0]["bbox"]
|
| 964 |
+
assert bbox == [400, 800, 2000, 1200] # 100*4, 200*4, 500*4, 300*4
|
backend/tests/test_iiif_service_detection.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests de dΓ©tection du IIIF Image Service Γ l'ingestion.
|
| 3 |
+
|
| 4 |
+
VΓ©rifie :
|
| 5 |
+
- Extraction depuis un canvas IIIF 3.0 avec ImageService3
|
| 6 |
+
- Extraction depuis un canvas IIIF 2.x avec service @id
|
| 7 |
+
- DΓ©tection par pattern URL (Gallica, etc.)
|
| 8 |
+
- Fallback quand aucun service n'est trouvΓ©
|
| 9 |
+
- DΓ©tection depuis URL directe (ingest/iiif-images)
|
| 10 |
+
"""
|
| 11 |
+
import pytest
|
| 12 |
+
|
| 13 |
+
from app.api.v1.ingest import (
|
| 14 |
+
_detect_iiif_service_from_url,
|
| 15 |
+
_extract_iiif_service,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
# _extract_iiif_service β IIIF 3.0
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
|
| 23 |
+
def test_extract_iiif3_with_image_service3():
|
| 24 |
+
"""Canvas IIIF 3.0 avec service ImageService3 explicite."""
|
| 25 |
+
canvas = {
|
| 26 |
+
"width": 3543,
|
| 27 |
+
"height": 4724,
|
| 28 |
+
"items": [{
|
| 29 |
+
"items": [{
|
| 30 |
+
"body": {
|
| 31 |
+
"id": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 32 |
+
"type": "Image",
|
| 33 |
+
"service": [{
|
| 34 |
+
"id": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
|
| 35 |
+
"type": "ImageService3",
|
| 36 |
+
"profile": "level2",
|
| 37 |
+
}],
|
| 38 |
+
},
|
| 39 |
+
}],
|
| 40 |
+
}],
|
| 41 |
+
}
|
| 42 |
+
svc_url, w, h = _extract_iiif_service(canvas)
|
| 43 |
+
assert svc_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
|
| 44 |
+
assert w == 3543
|
| 45 |
+
assert h == 4724
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_extract_iiif3_with_image_service2():
|
| 49 |
+
"""Canvas IIIF 3.0 avec un service de type ImageService2."""
|
| 50 |
+
canvas = {
|
| 51 |
+
"width": 2000,
|
| 52 |
+
"height": 3000,
|
| 53 |
+
"items": [{
|
| 54 |
+
"items": [{
|
| 55 |
+
"body": {
|
| 56 |
+
"id": "https://example.com/image/1/full/max/0/default.jpg",
|
| 57 |
+
"type": "Image",
|
| 58 |
+
"service": [{
|
| 59 |
+
"id": "https://example.com/image/1",
|
| 60 |
+
"type": "ImageService2",
|
| 61 |
+
"profile": "level1",
|
| 62 |
+
}],
|
| 63 |
+
},
|
| 64 |
+
}],
|
| 65 |
+
}],
|
| 66 |
+
}
|
| 67 |
+
svc_url, w, h = _extract_iiif_service(canvas)
|
| 68 |
+
assert svc_url == "https://example.com/image/1"
|
| 69 |
+
assert w == 2000
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_extract_iiif3_service_as_dict():
|
| 73 |
+
"""Le champ service peut Γͺtre un dict au lieu d'une liste."""
|
| 74 |
+
canvas = {
|
| 75 |
+
"width": 1000,
|
| 76 |
+
"height": 1500,
|
| 77 |
+
"items": [{
|
| 78 |
+
"items": [{
|
| 79 |
+
"body": {
|
| 80 |
+
"id": "https://example.com/img/full/max/0/default.jpg",
|
| 81 |
+
"service": {
|
| 82 |
+
"id": "https://example.com/img",
|
| 83 |
+
"type": "ImageService3",
|
| 84 |
+
},
|
| 85 |
+
},
|
| 86 |
+
}],
|
| 87 |
+
}],
|
| 88 |
+
}
|
| 89 |
+
svc_url, _, _ = _extract_iiif_service(canvas)
|
| 90 |
+
assert svc_url == "https://example.com/img"
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def test_extract_iiif3_fallback_url_pattern():
|
| 94 |
+
"""Sans service explicite, dΓ©tecte le pattern Image API dans body.id."""
|
| 95 |
+
canvas = {
|
| 96 |
+
"width": 3000,
|
| 97 |
+
"height": 4000,
|
| 98 |
+
"items": [{
|
| 99 |
+
"items": [{
|
| 100 |
+
"body": {
|
| 101 |
+
"id": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 102 |
+
"type": "Image",
|
| 103 |
+
# Pas de "service" !
|
| 104 |
+
},
|
| 105 |
+
}],
|
| 106 |
+
}],
|
| 107 |
+
}
|
| 108 |
+
svc_url, w, h = _extract_iiif_service(canvas)
|
| 109 |
+
assert svc_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
|
| 110 |
+
assert w == 3000
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def test_extract_iiif3_no_service_no_pattern():
|
| 114 |
+
"""Canvas sans service et sans pattern Image API β None."""
|
| 115 |
+
canvas = {
|
| 116 |
+
"width": 800,
|
| 117 |
+
"height": 600,
|
| 118 |
+
"items": [{
|
| 119 |
+
"items": [{
|
| 120 |
+
"body": {
|
| 121 |
+
"id": "https://example.com/static/page1.jpg",
|
| 122 |
+
"type": "Image",
|
| 123 |
+
},
|
| 124 |
+
}],
|
| 125 |
+
}],
|
| 126 |
+
}
|
| 127 |
+
svc_url, w, h = _extract_iiif_service(canvas)
|
| 128 |
+
assert svc_url is None
|
| 129 |
+
assert w == 800
|
| 130 |
+
assert h == 600
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# ---------------------------------------------------------------------------
|
| 134 |
+
# _extract_iiif_service β IIIF 2.x
|
| 135 |
+
# ---------------------------------------------------------------------------
|
| 136 |
+
|
| 137 |
+
def test_extract_iiif2_with_service():
|
| 138 |
+
"""Canvas IIIF 2.x avec service dans resource."""
|
| 139 |
+
canvas = {
|
| 140 |
+
"width": 4000,
|
| 141 |
+
"height": 5000,
|
| 142 |
+
"images": [{
|
| 143 |
+
"resource": {
|
| 144 |
+
"@id": "https://example.com/image/2/full/full/0/default.jpg",
|
| 145 |
+
"service": {
|
| 146 |
+
"@id": "https://example.com/image/2",
|
| 147 |
+
"@type": "ImageService2",
|
| 148 |
+
},
|
| 149 |
+
},
|
| 150 |
+
}],
|
| 151 |
+
}
|
| 152 |
+
svc_url, w, h = _extract_iiif_service(canvas)
|
| 153 |
+
assert svc_url == "https://example.com/image/2"
|
| 154 |
+
assert w == 4000
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def test_extract_iiif2_fallback_url_pattern():
|
| 158 |
+
"""IIIF 2.x : dΓ©tection par pattern dans resource @id."""
|
| 159 |
+
canvas = {
|
| 160 |
+
"width": 2500,
|
| 161 |
+
"height": 3500,
|
| 162 |
+
"images": [{
|
| 163 |
+
"resource": {
|
| 164 |
+
"@id": "https://iiif.bodleian.ox.ac.uk/image/abc123/full/full/0/default.jpg",
|
| 165 |
+
},
|
| 166 |
+
}],
|
| 167 |
+
}
|
| 168 |
+
svc_url, _, _ = _extract_iiif_service(canvas)
|
| 169 |
+
assert svc_url == "https://iiif.bodleian.ox.ac.uk/image/abc123"
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def test_extract_iiif2_no_service():
|
| 173 |
+
"""IIIF 2.x sans service et URL statique β None."""
|
| 174 |
+
canvas = {
|
| 175 |
+
"width": 1200,
|
| 176 |
+
"height": 1600,
|
| 177 |
+
"images": [{
|
| 178 |
+
"resource": {
|
| 179 |
+
"@id": "https://example.com/images/scan.png",
|
| 180 |
+
},
|
| 181 |
+
}],
|
| 182 |
+
}
|
| 183 |
+
svc_url, w, h = _extract_iiif_service(canvas)
|
| 184 |
+
assert svc_url is None
|
| 185 |
+
assert w == 1200
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
# ---------------------------------------------------------------------------
|
| 189 |
+
# _extract_iiif_service β cas limites
|
| 190 |
+
# ---------------------------------------------------------------------------
|
| 191 |
+
|
| 192 |
+
def test_extract_empty_canvas():
|
| 193 |
+
"""Canvas vide β None sans crash."""
|
| 194 |
+
svc_url, w, h = _extract_iiif_service({})
|
| 195 |
+
assert svc_url is None
|
| 196 |
+
assert w is None
|
| 197 |
+
assert h is None
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def test_extract_service_url_trailing_slash_stripped():
|
| 201 |
+
"""L'URL du service ne doit pas se terminer par /."""
|
| 202 |
+
canvas = {
|
| 203 |
+
"width": 1000,
|
| 204 |
+
"height": 1000,
|
| 205 |
+
"items": [{
|
| 206 |
+
"items": [{
|
| 207 |
+
"body": {
|
| 208 |
+
"id": "https://example.com/img/full/max/0/default.jpg",
|
| 209 |
+
"service": [{
|
| 210 |
+
"id": "https://example.com/img/",
|
| 211 |
+
"type": "ImageService3",
|
| 212 |
+
}],
|
| 213 |
+
},
|
| 214 |
+
}],
|
| 215 |
+
}],
|
| 216 |
+
}
|
| 217 |
+
svc_url, _, _ = _extract_iiif_service(canvas)
|
| 218 |
+
assert svc_url == "https://example.com/img"
|
| 219 |
+
assert not svc_url.endswith("/")
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
# ---------------------------------------------------------------------------
|
| 223 |
+
# _detect_iiif_service_from_url β dΓ©tection depuis URL directe
|
| 224 |
+
# ---------------------------------------------------------------------------
|
| 225 |
+
|
| 226 |
+
def test_detect_from_gallica_url():
|
| 227 |
+
"""URL Gallica complΓ¨te β service URL dΓ©duit."""
|
| 228 |
+
url = "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg"
|
| 229 |
+
assert _detect_iiif_service_from_url(url) == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def test_detect_from_iiif_url_with_size():
|
| 233 |
+
"""URL avec taille spΓ©cifique β service URL dΓ©duit."""
|
| 234 |
+
url = "https://example.com/iiif/img1/full/!1500,1500/0/default.jpg"
|
| 235 |
+
assert _detect_iiif_service_from_url(url) == "https://example.com/iiif/img1"
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def test_detect_from_static_url_returns_none():
|
| 239 |
+
"""URL statique (pas de pattern IIIF) β None."""
|
| 240 |
+
url = "https://example.com/images/page1.jpg"
|
| 241 |
+
assert _detect_iiif_service_from_url(url) is None
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def test_detect_from_iiif_url_different_format():
|
| 245 |
+
"""URL avec format PNG au lieu de JPEG."""
|
| 246 |
+
url = "https://example.com/iiif/img2/full/max/0/default.png"
|
| 247 |
+
assert _detect_iiif_service_from_url(url) == "https://example.com/iiif/img2"
|
backend/tests/test_schemas.py
CHANGED
|
@@ -300,3 +300,85 @@ def test_annotation_layer_all_layer_types():
|
|
| 300 |
created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
|
| 301 |
)
|
| 302 |
assert layer.layer_type == layer_type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
|
| 301 |
)
|
| 302 |
assert layer.layer_type == layer_type
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
# ---------------------------------------------------------------------------
|
| 306 |
+
# ImageInfo β compatibilitΓ© arriΓ¨re et champs IIIF natifs
|
| 307 |
+
# ---------------------------------------------------------------------------
|
| 308 |
+
|
| 309 |
+
from app.schemas.page_master import ImageInfo
|
| 310 |
+
from app.schemas.image import ImageSourceInfo
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def test_image_info_backward_compat_without_iiif_fields():
|
| 314 |
+
"""Un ImageInfo sans les nouveaux champs IIIF doit toujours valider."""
|
| 315 |
+
info = ImageInfo.model_validate({
|
| 316 |
+
"master": "data/corpora/test/masters/0001r.tif",
|
| 317 |
+
"derivative_web": "data/corpora/test/derivatives/0001r.jpg",
|
| 318 |
+
"width": 2000,
|
| 319 |
+
"height": 3000,
|
| 320 |
+
})
|
| 321 |
+
assert info.iiif_service_url is None
|
| 322 |
+
assert info.manifest_url is None
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def test_image_info_with_iiif_service_url():
|
| 326 |
+
"""Un ImageInfo avec iiif_service_url doit valider."""
|
| 327 |
+
info = ImageInfo.model_validate({
|
| 328 |
+
"master": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 329 |
+
"iiif_service_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
|
| 330 |
+
"manifest_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
|
| 331 |
+
"width": 3543,
|
| 332 |
+
"height": 4724,
|
| 333 |
+
})
|
| 334 |
+
assert info.iiif_service_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
|
| 335 |
+
assert info.manifest_url is not None
|
| 336 |
+
assert info.derivative_web is None
|
| 337 |
+
assert info.thumbnail is None
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def test_image_info_iiif_native_no_local_paths():
|
| 341 |
+
"""En mode IIIF natif, derivative_web et thumbnail sont None."""
|
| 342 |
+
info = ImageInfo(
|
| 343 |
+
master="https://example.com/image.jpg",
|
| 344 |
+
iiif_service_url="https://example.com/iiif/img1",
|
| 345 |
+
width=5000,
|
| 346 |
+
height=7000,
|
| 347 |
+
)
|
| 348 |
+
assert info.derivative_web is None
|
| 349 |
+
assert info.thumbnail is None
|
| 350 |
+
assert info.width == 5000
|
| 351 |
+
assert info.height == 7000
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def test_page_master_backward_compat_v10(minimal_page_master):
|
| 355 |
+
"""Un PageMaster v1.0 (sans champs IIIF) doit toujours valider."""
|
| 356 |
+
pm = PageMaster.model_validate(minimal_page_master)
|
| 357 |
+
assert pm.schema_version == "1.0"
|
| 358 |
+
assert pm.image.iiif_service_url is None
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def test_image_source_info_iiif():
|
| 362 |
+
"""ImageSourceInfo avec service IIIF dΓ©tectΓ©."""
|
| 363 |
+
info = ImageSourceInfo(
|
| 364 |
+
original_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 365 |
+
iiif_service_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
|
| 366 |
+
manifest_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
|
| 367 |
+
is_iiif=True,
|
| 368 |
+
original_width=3543,
|
| 369 |
+
original_height=4724,
|
| 370 |
+
)
|
| 371 |
+
assert info.is_iiif is True
|
| 372 |
+
assert "gallica" in info.iiif_service_url
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def test_image_source_info_static_fallback():
|
| 376 |
+
"""ImageSourceInfo sans service IIIF (image statique)."""
|
| 377 |
+
info = ImageSourceInfo(
|
| 378 |
+
original_url="https://example.com/static/page1.jpg",
|
| 379 |
+
is_iiif=False,
|
| 380 |
+
original_width=2000,
|
| 381 |
+
original_height=3000,
|
| 382 |
+
)
|
| 383 |
+
assert info.is_iiif is False
|
| 384 |
+
assert info.iiif_service_url is None
|
frontend/src/components/Viewer.tsx
CHANGED
|
@@ -3,14 +3,16 @@ import OpenSeadragon from 'openseadragon'
|
|
| 3 |
import { RetroButton } from './retro'
|
| 4 |
|
| 5 |
interface Props {
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
onViewerReady?: (viewer: OpenSeadragon.Viewer) => void
|
| 8 |
}
|
| 9 |
|
| 10 |
-
const Viewer: FC<Props> = ({
|
| 11 |
const containerRef = useRef<HTMLDivElement>(null)
|
| 12 |
const viewerRef = useRef<OpenSeadragon.Viewer | null>(null)
|
| 13 |
-
// Ref pour toujours accΓ©der au callback le plus rΓ©cent (Γ©vite stale closure)
|
| 14 |
const onViewerReadyRef = useRef(onViewerReady)
|
| 15 |
onViewerReadyRef.current = onViewerReady
|
| 16 |
|
|
@@ -25,6 +27,7 @@ const Viewer: FC<Props> = ({ imageUrl, onViewerReady }) => {
|
|
| 25 |
animationTime: 0.3,
|
| 26 |
minZoomLevel: 0.1,
|
| 27 |
maxZoomLevel: 20,
|
|
|
|
| 28 |
})
|
| 29 |
|
| 30 |
viewerRef.current = viewer
|
|
@@ -35,15 +38,25 @@ const Viewer: FC<Props> = ({ imageUrl, onViewerReady }) => {
|
|
| 35 |
}
|
| 36 |
}, [])
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
useEffect(() => {
|
| 39 |
const viewer = viewerRef.current
|
| 40 |
-
if (!viewer || !
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
viewer.open({ type: 'image', url: imageUrl })
|
| 43 |
viewer.addOnceHandler('open', () => {
|
| 44 |
onViewerReadyRef.current?.(viewer)
|
| 45 |
})
|
| 46 |
-
}, [
|
| 47 |
|
| 48 |
return (
|
| 49 |
<div className="relative w-full h-full bg-retro-black">
|
|
|
|
| 3 |
import { RetroButton } from './retro'
|
| 4 |
|
| 5 |
interface Props {
|
| 6 |
+
/** URL du IIIF Image Service (zoom tuilΓ© natif) */
|
| 7 |
+
iiifServiceUrl?: string | null
|
| 8 |
+
/** URL image statique (fallback si pas de service IIIF) */
|
| 9 |
+
fallbackImageUrl?: string | null
|
| 10 |
onViewerReady?: (viewer: OpenSeadragon.Viewer) => void
|
| 11 |
}
|
| 12 |
|
| 13 |
+
const Viewer: FC<Props> = ({ iiifServiceUrl, fallbackImageUrl, onViewerReady }) => {
|
| 14 |
const containerRef = useRef<HTMLDivElement>(null)
|
| 15 |
const viewerRef = useRef<OpenSeadragon.Viewer | null>(null)
|
|
|
|
| 16 |
const onViewerReadyRef = useRef(onViewerReady)
|
| 17 |
onViewerReadyRef.current = onViewerReady
|
| 18 |
|
|
|
|
| 27 |
animationTime: 0.3,
|
| 28 |
minZoomLevel: 0.1,
|
| 29 |
maxZoomLevel: 20,
|
| 30 |
+
crossOriginPolicy: 'Anonymous',
|
| 31 |
})
|
| 32 |
|
| 33 |
viewerRef.current = viewer
|
|
|
|
| 38 |
}
|
| 39 |
}, [])
|
| 40 |
|
| 41 |
+
// Source Γ ouvrir : prΓ©fΓ©rer le service IIIF (zoom tuilΓ©), sinon image statique
|
| 42 |
+
const source = iiifServiceUrl || fallbackImageUrl || ''
|
| 43 |
+
|
| 44 |
useEffect(() => {
|
| 45 |
const viewer = viewerRef.current
|
| 46 |
+
if (!viewer || !source) return
|
| 47 |
+
|
| 48 |
+
if (iiifServiceUrl) {
|
| 49 |
+
// Zoom tuilΓ© natif β OpenSeadragon fetch info.json et configure les tuiles
|
| 50 |
+
viewer.open(iiifServiceUrl + '/info.json')
|
| 51 |
+
} else {
|
| 52 |
+
// Image statique simple (pas de zoom tuilΓ©)
|
| 53 |
+
viewer.open({ type: 'image', url: source })
|
| 54 |
+
}
|
| 55 |
|
|
|
|
| 56 |
viewer.addOnceHandler('open', () => {
|
| 57 |
onViewerReadyRef.current?.(viewer)
|
| 58 |
})
|
| 59 |
+
}, [source, iiifServiceUrl])
|
| 60 |
|
| 61 |
return (
|
| 62 |
<div className="relative w-full h-full bg-retro-black">
|
frontend/src/lib/api.ts
CHANGED
|
@@ -83,6 +83,10 @@ export interface Page {
|
|
| 83 |
folio_label: string
|
| 84 |
sequence: number
|
| 85 |
image_master_path: string | null
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
processing_status: string
|
| 87 |
confidence_summary: number | null
|
| 88 |
}
|
|
@@ -150,6 +154,8 @@ export interface ImageInfo {
|
|
| 150 |
derivative_web?: string | null
|
| 151 |
thumbnail?: string | null
|
| 152 |
iiif_base?: string | null
|
|
|
|
|
|
|
| 153 |
width: number
|
| 154 |
height: number
|
| 155 |
}
|
|
|
|
| 83 |
folio_label: string
|
| 84 |
sequence: number
|
| 85 |
image_master_path: string | null
|
| 86 |
+
iiif_service_url: string | null
|
| 87 |
+
canvas_width: number | null
|
| 88 |
+
canvas_height: number | null
|
| 89 |
+
manifest_url: string | null
|
| 90 |
processing_status: string
|
| 91 |
confidence_summary: number | null
|
| 92 |
}
|
|
|
|
| 154 |
derivative_web?: string | null
|
| 155 |
thumbnail?: string | null
|
| 156 |
iiif_base?: string | null
|
| 157 |
+
iiif_service_url?: string | null
|
| 158 |
+
manifest_url?: string | null
|
| 159 |
width: number
|
| 160 |
height: number
|
| 161 |
}
|
frontend/src/pages/Editor.tsx
CHANGED
|
@@ -154,7 +154,8 @@ export default function Editor() {
|
|
| 154 |
)
|
| 155 |
}
|
| 156 |
|
| 157 |
-
const
|
|
|
|
| 158 |
const regions = master?.layout?.regions ?? []
|
| 159 |
|
| 160 |
return (
|
|
@@ -194,8 +195,8 @@ export default function Editor() {
|
|
| 194 |
className="flex-1 min-w-0"
|
| 195 |
>
|
| 196 |
<div className="relative w-full h-full">
|
| 197 |
-
<Viewer
|
| 198 |
-
{!
|
| 199 |
<div className="absolute inset-0 flex items-center justify-center bg-retro-gray text-retro-darkgray text-retro-sm">
|
| 200 |
Apercu non disponible
|
| 201 |
</div>
|
|
|
|
| 154 |
)
|
| 155 |
}
|
| 156 |
|
| 157 |
+
const iiifServiceUrl = master?.image?.iiif_service_url ?? null
|
| 158 |
+
const fallbackImageUrl = master?.image?.derivative_web ?? master?.image?.master ?? ''
|
| 159 |
const regions = master?.layout?.regions ?? []
|
| 160 |
|
| 161 |
return (
|
|
|
|
| 195 |
className="flex-1 min-w-0"
|
| 196 |
>
|
| 197 |
<div className="relative w-full h-full">
|
| 198 |
+
<Viewer iiifServiceUrl={iiifServiceUrl} fallbackImageUrl={fallbackImageUrl} onViewerReady={() => {}} />
|
| 199 |
+
{!iiifServiceUrl && !fallbackImageUrl && (
|
| 200 |
<div className="absolute inset-0 flex items-center justify-center bg-retro-gray text-retro-darkgray text-retro-sm">
|
| 201 |
Apercu non disponible
|
| 202 |
</div>
|
frontend/src/pages/Reader.tsx
CHANGED
|
@@ -122,7 +122,8 @@ export default function Reader() {
|
|
| 122 |
}
|
| 123 |
|
| 124 |
const currentPage = pages[currentIndex]
|
| 125 |
-
const
|
|
|
|
| 126 |
const regions: Region[] = master?.layout?.regions ?? []
|
| 127 |
|
| 128 |
return (
|
|
@@ -168,12 +169,12 @@ export default function Reader() {
|
|
| 168 |
statusBar={
|
| 169 |
master
|
| 170 |
? `${master.editorial.status} β v${master.editorial.version}`
|
| 171 |
-
:
|
| 172 |
}
|
| 173 |
className="flex-[7] min-w-0"
|
| 174 |
>
|
| 175 |
<div className="relative w-full h-full">
|
| 176 |
-
<Viewer
|
| 177 |
<RegionOverlay
|
| 178 |
viewer={osdViewer}
|
| 179 |
regions={regions}
|
|
@@ -211,7 +212,7 @@ export default function Reader() {
|
|
| 211 |
)}
|
| 212 |
|
| 213 |
{/* Not analyzed / error badge */}
|
| 214 |
-
{!master && !loading &&
|
| 215 |
<div className="absolute top-2 left-2">
|
| 216 |
{masterError
|
| 217 |
? <RetroBadge variant="error">Erreur: {masterError}</RetroBadge>
|
|
@@ -261,7 +262,7 @@ export default function Reader() {
|
|
| 261 |
</div>
|
| 262 |
) : (
|
| 263 |
<div className="p-3 text-retro-sm text-retro-darkgray">
|
| 264 |
-
{
|
| 265 |
? 'Page non encore analysee par l\'IA.'
|
| 266 |
: 'Aucune image associee a cette page.'
|
| 267 |
}
|
|
|
|
| 122 |
}
|
| 123 |
|
| 124 |
const currentPage = pages[currentIndex]
|
| 125 |
+
const iiifServiceUrl = currentPage.iiif_service_url ?? null
|
| 126 |
+
const fallbackImageUrl = currentPage.image_master_path ?? ''
|
| 127 |
const regions: Region[] = master?.layout?.regions ?? []
|
| 128 |
|
| 129 |
return (
|
|
|
|
| 169 |
statusBar={
|
| 170 |
master
|
| 171 |
? `${master.editorial.status} β v${master.editorial.version}`
|
| 172 |
+
: (iiifServiceUrl || fallbackImageUrl) ? 'Page non analysee' : 'Aucune image'
|
| 173 |
}
|
| 174 |
className="flex-[7] min-w-0"
|
| 175 |
>
|
| 176 |
<div className="relative w-full h-full">
|
| 177 |
+
<Viewer iiifServiceUrl={iiifServiceUrl} fallbackImageUrl={fallbackImageUrl} onViewerReady={handleViewerReady} />
|
| 178 |
<RegionOverlay
|
| 179 |
viewer={osdViewer}
|
| 180 |
regions={regions}
|
|
|
|
| 212 |
)}
|
| 213 |
|
| 214 |
{/* Not analyzed / error badge */}
|
| 215 |
+
{!master && !loading && (iiifServiceUrl || fallbackImageUrl) && (
|
| 216 |
<div className="absolute top-2 left-2">
|
| 217 |
{masterError
|
| 218 |
? <RetroBadge variant="error">Erreur: {masterError}</RetroBadge>
|
|
|
|
| 262 |
</div>
|
| 263 |
) : (
|
| 264 |
<div className="p-3 text-retro-sm text-retro-darkgray">
|
| 265 |
+
{(iiifServiceUrl || fallbackImageUrl)
|
| 266 |
? 'Page non encore analysee par l\'IA.'
|
| 267 |
: 'Aucune image associee a cette page.'
|
| 268 |
}
|