Spaces:
Build error
feat(schemas): IIIF-native Sprint 1 — schema evolution for streaming architecture
Browse filesFoundation for IIIF-native image streaming (no local image storage):
- ImageInfo: add iiif_service_url (IIIF Image Service URL for tiled zoom)
and manifest_url (provenance). All new fields have None defaults for
100% backward compatibility with existing master.json files.
- ImageSourceInfo: new companion schema for the pipeline — describes image
source without local file paths (iiif_service_url, is_iiif flag).
- PageModel: add iiif_service_url, canvas_width, canvas_height, manifest_url
columns (all nullable) for storing IIIF metadata at ingestion time.
- PageResponse: expose new fields to frontend in both pages.py and manuscripts.py.
- Frontend api.ts: add IIIF fields to Page and ImageInfo interfaces.
- 6 new tests: backward compat (v1.0 without IIIF fields validates),
IIIF-native ImageInfo, ImageSourceInfo with/without service.
569 tests pass (+6 new), 0 regressions. TypeScript clean.
https://claude.ai/code/session_01UB4he7RdRPHLvNjky4X8Sw
- backend/app/api/v1/manuscripts.py +4 -0
- backend/app/api/v1/pages.py +4 -0
- backend/app/models/corpus.py +4 -0
- backend/app/schemas/image.py +25 -2
- backend/app/schemas/page_master.py +17 -8
- backend/tests/test_schemas.py +82 -0
- frontend/src/lib/api.ts +6 -0
|
@@ -24,6 +24,10 @@ class PageResponse(BaseModel):
|
|
| 24 |
folio_label: str
|
| 25 |
sequence: int
|
| 26 |
image_master_path: str | None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
processing_status: str
|
| 28 |
confidence_summary: float | None
|
| 29 |
|
|
|
|
| 24 |
folio_label: str
|
| 25 |
sequence: int
|
| 26 |
image_master_path: str | None
|
| 27 |
+
iiif_service_url: str | None = None
|
| 28 |
+
canvas_width: int | None = None
|
| 29 |
+
canvas_height: int | None = None
|
| 30 |
+
manifest_url: str | None = None
|
| 31 |
processing_status: str
|
| 32 |
confidence_summary: float | None
|
| 33 |
|
|
@@ -64,6 +64,10 @@ class PageResponse(BaseModel):
|
|
| 64 |
folio_label: str
|
| 65 |
sequence: int
|
| 66 |
image_master_path: str | None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
processing_status: str
|
| 68 |
confidence_summary: float | None
|
| 69 |
|
|
|
|
| 64 |
folio_label: str
|
| 65 |
sequence: int
|
| 66 |
image_master_path: str | None
|
| 67 |
+
iiif_service_url: str | None = None
|
| 68 |
+
canvas_width: int | None = None
|
| 69 |
+
canvas_height: int | None = None
|
| 70 |
+
manifest_url: str | None = None
|
| 71 |
processing_status: str
|
| 72 |
confidence_summary: float | None
|
| 73 |
|
|
@@ -74,6 +74,10 @@ class PageModel(Base):
|
|
| 74 |
folio_label: Mapped[str] = mapped_column(String, nullable=False)
|
| 75 |
sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
| 76 |
image_master_path: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
processing_status: Mapped[str] = mapped_column(
|
| 78 |
String, nullable=False, default="CREATED"
|
| 79 |
)
|
|
|
|
| 74 |
folio_label: Mapped[str] = mapped_column(String, nullable=False)
|
| 75 |
sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
| 76 |
image_master_path: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 77 |
+
iiif_service_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 78 |
+
canvas_width: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
| 79 |
+
canvas_height: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
| 80 |
+
manifest_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 81 |
processing_status: Mapped[str] = mapped_column(
|
| 82 |
String, nullable=False, default="CREATED"
|
| 83 |
)
|
|
@@ -1,12 +1,19 @@
|
|
| 1 |
"""
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
# 2. third-party
|
| 5 |
from pydantic import BaseModel
|
| 6 |
|
| 7 |
|
| 8 |
class ImageDerivativeInfo(BaseModel):
|
| 9 |
-
"""Résultat de la normalisation d'une image : dimensions originales et chemins des dérivés.
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
original_url: str
|
| 12 |
original_width: int
|
|
@@ -17,3 +24,19 @@ class ImageDerivativeInfo(BaseModel):
|
|
| 17 |
thumbnail_path: str
|
| 18 |
thumbnail_width: int
|
| 19 |
thumbnail_height: int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Schémas Pydantic pour les métadonnées image du pipeline.
|
| 3 |
+
|
| 4 |
+
Deux schémas coexistent :
|
| 5 |
+
- ImageDerivativeInfo : dérivés stockés sur disque (upload de fichiers)
|
| 6 |
+
- ImageSourceInfo : source IIIF sans stockage local (mode natif)
|
| 7 |
"""
|
| 8 |
# 2. third-party
|
| 9 |
from pydantic import BaseModel
|
| 10 |
|
| 11 |
|
| 12 |
class ImageDerivativeInfo(BaseModel):
|
| 13 |
+
"""Résultat de la normalisation d'une image : dimensions originales et chemins des dérivés.
|
| 14 |
+
|
| 15 |
+
Utilisé pour les images uploadées via /ingest/files (stockage local).
|
| 16 |
+
"""
|
| 17 |
|
| 18 |
original_url: str
|
| 19 |
original_width: int
|
|
|
|
| 24 |
thumbnail_path: str
|
| 25 |
thumbnail_width: int
|
| 26 |
thumbnail_height: int
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ImageSourceInfo(BaseModel):
|
| 30 |
+
"""Source d'image IIIF — pas de stockage local.
|
| 31 |
+
|
| 32 |
+
Utilisé pour les images ingérées via manifest ou URLs IIIF.
|
| 33 |
+
Les bytes ne sont jamais écrits sur disque : téléchargés en RAM
|
| 34 |
+
pour l'IA, puis jetés.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
original_url: str # URL statique de l'image (fallback)
|
| 38 |
+
iiif_service_url: str | None = None # URL du service IIIF Image API
|
| 39 |
+
manifest_url: str | None = None # URL du manifest source
|
| 40 |
+
is_iiif: bool = False # a un IIIF Image Service détecté ?
|
| 41 |
+
original_width: int
|
| 42 |
+
original_height: int
|
|
@@ -38,14 +38,23 @@ class Region(BaseModel):
|
|
| 38 |
|
| 39 |
|
| 40 |
class ImageInfo(BaseModel):
|
| 41 |
-
"""Métadonnées image — CLAUDE.md §4.2.
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
class OCRResult(BaseModel):
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
class ImageInfo(BaseModel):
|
| 41 |
+
"""Métadonnées image — CLAUDE.md §4.2.
|
| 42 |
+
|
| 43 |
+
Supporte deux modes :
|
| 44 |
+
- IIIF natif : iiif_service_url renseigné, images streamées depuis le serveur
|
| 45 |
+
d'origine (pas de stockage local). derivative_web / thumbnail = None.
|
| 46 |
+
- Upload local : master = chemin local, derivative_web / thumbnail = chemins
|
| 47 |
+
des dérivés sur disque (mode legacy ou upload de fichiers).
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
master: str # URL source (service IIIF ou statique) ou chemin local
|
| 51 |
+
derivative_web: str | None = None # chemin dérivé 1500px (legacy/upload)
|
| 52 |
+
thumbnail: str | None = None # chemin thumbnail 256px (legacy/upload)
|
| 53 |
+
iiif_base: str | None = None # compat arrière
|
| 54 |
+
iiif_service_url: str | None = None # URL du IIIF Image Service (zoom tuilé)
|
| 55 |
+
manifest_url: str | None = None # URL du manifest source (provenance)
|
| 56 |
+
width: int # largeur du canvas original
|
| 57 |
+
height: int # hauteur du canvas original
|
| 58 |
|
| 59 |
|
| 60 |
class OCRResult(BaseModel):
|
|
@@ -300,3 +300,85 @@ def test_annotation_layer_all_layer_types():
|
|
| 300 |
created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
|
| 301 |
)
|
| 302 |
assert layer.layer_type == layer_type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
|
| 301 |
)
|
| 302 |
assert layer.layer_type == layer_type
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
# ---------------------------------------------------------------------------
|
| 306 |
+
# ImageInfo — compatibilité arrière et champs IIIF natifs
|
| 307 |
+
# ---------------------------------------------------------------------------
|
| 308 |
+
|
| 309 |
+
from app.schemas.page_master import ImageInfo
|
| 310 |
+
from app.schemas.image import ImageSourceInfo
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def test_image_info_backward_compat_without_iiif_fields():
|
| 314 |
+
"""Un ImageInfo sans les nouveaux champs IIIF doit toujours valider."""
|
| 315 |
+
info = ImageInfo.model_validate({
|
| 316 |
+
"master": "data/corpora/test/masters/0001r.tif",
|
| 317 |
+
"derivative_web": "data/corpora/test/derivatives/0001r.jpg",
|
| 318 |
+
"width": 2000,
|
| 319 |
+
"height": 3000,
|
| 320 |
+
})
|
| 321 |
+
assert info.iiif_service_url is None
|
| 322 |
+
assert info.manifest_url is None
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def test_image_info_with_iiif_service_url():
|
| 326 |
+
"""Un ImageInfo avec iiif_service_url doit valider."""
|
| 327 |
+
info = ImageInfo.model_validate({
|
| 328 |
+
"master": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 329 |
+
"iiif_service_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
|
| 330 |
+
"manifest_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
|
| 331 |
+
"width": 3543,
|
| 332 |
+
"height": 4724,
|
| 333 |
+
})
|
| 334 |
+
assert info.iiif_service_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
|
| 335 |
+
assert info.manifest_url is not None
|
| 336 |
+
assert info.derivative_web is None
|
| 337 |
+
assert info.thumbnail is None
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def test_image_info_iiif_native_no_local_paths():
|
| 341 |
+
"""En mode IIIF natif, derivative_web et thumbnail sont None."""
|
| 342 |
+
info = ImageInfo(
|
| 343 |
+
master="https://example.com/image.jpg",
|
| 344 |
+
iiif_service_url="https://example.com/iiif/img1",
|
| 345 |
+
width=5000,
|
| 346 |
+
height=7000,
|
| 347 |
+
)
|
| 348 |
+
assert info.derivative_web is None
|
| 349 |
+
assert info.thumbnail is None
|
| 350 |
+
assert info.width == 5000
|
| 351 |
+
assert info.height == 7000
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def test_page_master_backward_compat_v10(minimal_page_master):
|
| 355 |
+
"""Un PageMaster v1.0 (sans champs IIIF) doit toujours valider."""
|
| 356 |
+
pm = PageMaster.model_validate(minimal_page_master)
|
| 357 |
+
assert pm.schema_version == "1.0"
|
| 358 |
+
assert pm.image.iiif_service_url is None
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def test_image_source_info_iiif():
|
| 362 |
+
"""ImageSourceInfo avec service IIIF détecté."""
|
| 363 |
+
info = ImageSourceInfo(
|
| 364 |
+
original_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
|
| 365 |
+
iiif_service_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
|
| 366 |
+
manifest_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
|
| 367 |
+
is_iiif=True,
|
| 368 |
+
original_width=3543,
|
| 369 |
+
original_height=4724,
|
| 370 |
+
)
|
| 371 |
+
assert info.is_iiif is True
|
| 372 |
+
assert "gallica" in info.iiif_service_url
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def test_image_source_info_static_fallback():
|
| 376 |
+
"""ImageSourceInfo sans service IIIF (image statique)."""
|
| 377 |
+
info = ImageSourceInfo(
|
| 378 |
+
original_url="https://example.com/static/page1.jpg",
|
| 379 |
+
is_iiif=False,
|
| 380 |
+
original_width=2000,
|
| 381 |
+
original_height=3000,
|
| 382 |
+
)
|
| 383 |
+
assert info.is_iiif is False
|
| 384 |
+
assert info.iiif_service_url is None
|
|
@@ -83,6 +83,10 @@ export interface Page {
|
|
| 83 |
folio_label: string
|
| 84 |
sequence: number
|
| 85 |
image_master_path: string | null
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
processing_status: string
|
| 87 |
confidence_summary: number | null
|
| 88 |
}
|
|
@@ -150,6 +154,8 @@ export interface ImageInfo {
|
|
| 150 |
derivative_web?: string | null
|
| 151 |
thumbnail?: string | null
|
| 152 |
iiif_base?: string | null
|
|
|
|
|
|
|
| 153 |
width: number
|
| 154 |
height: number
|
| 155 |
}
|
|
|
|
| 83 |
folio_label: string
|
| 84 |
sequence: number
|
| 85 |
image_master_path: string | null
|
| 86 |
+
iiif_service_url: string | null
|
| 87 |
+
canvas_width: number | null
|
| 88 |
+
canvas_height: number | null
|
| 89 |
+
manifest_url: string | null
|
| 90 |
processing_status: string
|
| 91 |
confidence_summary: number | null
|
| 92 |
}
|
|
|
|
| 154 |
derivative_web?: string | null
|
| 155 |
thumbnail?: string | null
|
| 156 |
iiif_base?: string | null
|
| 157 |
+
iiif_service_url?: string | null
|
| 158 |
+
manifest_url?: string | null
|
| 159 |
width: number
|
| 160 |
height: number
|
| 161 |
}
|