Claude commited on
Commit
1865b8a
·
unverified ·
1 Parent(s): ed62931

feat(schemas): IIIF-native Sprint 1 — schema evolution for streaming architecture

Browse files

Foundation for IIIF-native image streaming (no local image storage):

- ImageInfo: add iiif_service_url (IIIF Image Service URL for tiled zoom)
and manifest_url (provenance). All new fields have None defaults for
100% backward compatibility with existing master.json files.
- ImageSourceInfo: new companion schema for the pipeline — describes image
source without local file paths (iiif_service_url, is_iiif flag).
- PageModel: add iiif_service_url, canvas_width, canvas_height, manifest_url
columns (all nullable) for storing IIIF metadata at ingestion time.
- PageResponse: expose new fields to frontend in both pages.py and manuscripts.py.
- Frontend api.ts: add IIIF fields to Page and ImageInfo interfaces.
- 6 new tests: backward compat (v1.0 without IIIF fields validates),
IIIF-native ImageInfo, ImageSourceInfo with/without service.

569 tests pass (+6 new), 0 regressions. TypeScript clean.

https://claude.ai/code/session_01UB4he7RdRPHLvNjky4X8Sw

backend/app/api/v1/manuscripts.py CHANGED
@@ -24,6 +24,10 @@ class PageResponse(BaseModel):
24
  folio_label: str
25
  sequence: int
26
  image_master_path: str | None
 
 
 
 
27
  processing_status: str
28
  confidence_summary: float | None
29
 
 
24
  folio_label: str
25
  sequence: int
26
  image_master_path: str | None
27
+ iiif_service_url: str | None = None
28
+ canvas_width: int | None = None
29
+ canvas_height: int | None = None
30
+ manifest_url: str | None = None
31
  processing_status: str
32
  confidence_summary: float | None
33
 
backend/app/api/v1/pages.py CHANGED
@@ -64,6 +64,10 @@ class PageResponse(BaseModel):
64
  folio_label: str
65
  sequence: int
66
  image_master_path: str | None
 
 
 
 
67
  processing_status: str
68
  confidence_summary: float | None
69
 
 
64
  folio_label: str
65
  sequence: int
66
  image_master_path: str | None
67
+ iiif_service_url: str | None = None
68
+ canvas_width: int | None = None
69
+ canvas_height: int | None = None
70
+ manifest_url: str | None = None
71
  processing_status: str
72
  confidence_summary: float | None
73
 
backend/app/models/corpus.py CHANGED
@@ -74,6 +74,10 @@ class PageModel(Base):
74
  folio_label: Mapped[str] = mapped_column(String, nullable=False)
75
  sequence: Mapped[int] = mapped_column(Integer, nullable=False)
76
  image_master_path: Mapped[str | None] = mapped_column(Text, nullable=True)
 
 
 
 
77
  processing_status: Mapped[str] = mapped_column(
78
  String, nullable=False, default="CREATED"
79
  )
 
74
  folio_label: Mapped[str] = mapped_column(String, nullable=False)
75
  sequence: Mapped[int] = mapped_column(Integer, nullable=False)
76
  image_master_path: Mapped[str | None] = mapped_column(Text, nullable=True)
77
+ iiif_service_url: Mapped[str | None] = mapped_column(Text, nullable=True)
78
+ canvas_width: Mapped[int | None] = mapped_column(Integer, nullable=True)
79
+ canvas_height: Mapped[int | None] = mapped_column(Integer, nullable=True)
80
+ manifest_url: Mapped[str | None] = mapped_column(Text, nullable=True)
81
  processing_status: Mapped[str] = mapped_column(
82
  String, nullable=False, default="CREATED"
83
  )
backend/app/schemas/image.py CHANGED
@@ -1,12 +1,19 @@
1
  """
2
- Schéma Pydantic pour les métadonnées du dérivé image produit par le pipeline.
 
 
 
 
3
  """
4
  # 2. third-party
5
  from pydantic import BaseModel
6
 
7
 
8
  class ImageDerivativeInfo(BaseModel):
9
- """Résultat de la normalisation d'une image : dimensions originales et chemins des dérivés."""
 
 
 
10
 
11
  original_url: str
12
  original_width: int
@@ -17,3 +24,19 @@ class ImageDerivativeInfo(BaseModel):
17
  thumbnail_path: str
18
  thumbnail_width: int
19
  thumbnail_height: int
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Schémas Pydantic pour les métadonnées image du pipeline.
3
+
4
+ Deux schémas coexistent :
5
+ - ImageDerivativeInfo : dérivés stockés sur disque (upload de fichiers)
6
+ - ImageSourceInfo : source IIIF sans stockage local (mode natif)
7
  """
8
  # 2. third-party
9
  from pydantic import BaseModel
10
 
11
 
12
  class ImageDerivativeInfo(BaseModel):
13
+ """Résultat de la normalisation d'une image : dimensions originales et chemins des dérivés.
14
+
15
+ Utilisé pour les images uploadées via /ingest/files (stockage local).
16
+ """
17
 
18
  original_url: str
19
  original_width: int
 
24
  thumbnail_path: str
25
  thumbnail_width: int
26
  thumbnail_height: int
27
+
28
+
29
+ class ImageSourceInfo(BaseModel):
30
+ """Source d'image IIIF — pas de stockage local.
31
+
32
+ Utilisé pour les images ingérées via manifest ou URLs IIIF.
33
+ Les bytes ne sont jamais écrits sur disque : téléchargés en RAM
34
+ pour l'IA, puis jetés.
35
+ """
36
+
37
+ original_url: str # URL statique de l'image (fallback)
38
+ iiif_service_url: str | None = None # URL du service IIIF Image API
39
+ manifest_url: str | None = None # URL du manifest source
40
+ is_iiif: bool = False # a un IIIF Image Service détecté ?
41
+ original_width: int
42
+ original_height: int
backend/app/schemas/page_master.py CHANGED
@@ -38,14 +38,23 @@ class Region(BaseModel):
38
 
39
 
40
  class ImageInfo(BaseModel):
41
- """Métadonnées image — CLAUDE.md §4.2."""
42
-
43
- master: str
44
- derivative_web: str | None = None
45
- thumbnail: str | None = None
46
- iiif_base: str | None = None
47
- width: int
48
- height: int
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  class OCRResult(BaseModel):
 
38
 
39
 
40
  class ImageInfo(BaseModel):
41
+ """Métadonnées image — CLAUDE.md §4.2.
42
+
43
+ Supporte deux modes :
44
+ - IIIF natif : iiif_service_url renseigné, images streamées depuis le serveur
45
+ d'origine (pas de stockage local). derivative_web / thumbnail = None.
46
+ - Upload local : master = chemin local, derivative_web / thumbnail = chemins
47
+ des dérivés sur disque (mode legacy ou upload de fichiers).
48
+ """
49
+
50
+ master: str # URL source (service IIIF ou statique) ou chemin local
51
+ derivative_web: str | None = None # chemin dérivé 1500px (legacy/upload)
52
+ thumbnail: str | None = None # chemin thumbnail 256px (legacy/upload)
53
+ iiif_base: str | None = None # compat arrière
54
+ iiif_service_url: str | None = None # URL du IIIF Image Service (zoom tuilé)
55
+ manifest_url: str | None = None # URL du manifest source (provenance)
56
+ width: int # largeur du canvas original
57
+ height: int # hauteur du canvas original
58
 
59
 
60
  class OCRResult(BaseModel):
backend/tests/test_schemas.py CHANGED
@@ -300,3 +300,85 @@ def test_annotation_layer_all_layer_types():
300
  created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
301
  )
302
  assert layer.layer_type == layer_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
301
  )
302
  assert layer.layer_type == layer_type
303
+
304
+
305
+ # ---------------------------------------------------------------------------
306
+ # ImageInfo — compatibilité arrière et champs IIIF natifs
307
+ # ---------------------------------------------------------------------------
308
+
309
+ from app.schemas.page_master import ImageInfo
310
+ from app.schemas.image import ImageSourceInfo
311
+
312
+
313
+ def test_image_info_backward_compat_without_iiif_fields():
314
+ """Un ImageInfo sans les nouveaux champs IIIF doit toujours valider."""
315
+ info = ImageInfo.model_validate({
316
+ "master": "data/corpora/test/masters/0001r.tif",
317
+ "derivative_web": "data/corpora/test/derivatives/0001r.jpg",
318
+ "width": 2000,
319
+ "height": 3000,
320
+ })
321
+ assert info.iiif_service_url is None
322
+ assert info.manifest_url is None
323
+
324
+
325
+ def test_image_info_with_iiif_service_url():
326
+ """Un ImageInfo avec iiif_service_url doit valider."""
327
+ info = ImageInfo.model_validate({
328
+ "master": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
329
+ "iiif_service_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
330
+ "manifest_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
331
+ "width": 3543,
332
+ "height": 4724,
333
+ })
334
+ assert info.iiif_service_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
335
+ assert info.manifest_url is not None
336
+ assert info.derivative_web is None
337
+ assert info.thumbnail is None
338
+
339
+
340
+ def test_image_info_iiif_native_no_local_paths():
341
+ """En mode IIIF natif, derivative_web et thumbnail sont None."""
342
+ info = ImageInfo(
343
+ master="https://example.com/image.jpg",
344
+ iiif_service_url="https://example.com/iiif/img1",
345
+ width=5000,
346
+ height=7000,
347
+ )
348
+ assert info.derivative_web is None
349
+ assert info.thumbnail is None
350
+ assert info.width == 5000
351
+ assert info.height == 7000
352
+
353
+
354
+ def test_page_master_backward_compat_v10(minimal_page_master):
355
+ """Un PageMaster v1.0 (sans champs IIIF) doit toujours valider."""
356
+ pm = PageMaster.model_validate(minimal_page_master)
357
+ assert pm.schema_version == "1.0"
358
+ assert pm.image.iiif_service_url is None
359
+
360
+
361
+ def test_image_source_info_iiif():
362
+ """ImageSourceInfo avec service IIIF détecté."""
363
+ info = ImageSourceInfo(
364
+ original_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
365
+ iiif_service_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
366
+ manifest_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
367
+ is_iiif=True,
368
+ original_width=3543,
369
+ original_height=4724,
370
+ )
371
+ assert info.is_iiif is True
372
+ assert "gallica" in info.iiif_service_url
373
+
374
+
375
+ def test_image_source_info_static_fallback():
376
+ """ImageSourceInfo sans service IIIF (image statique)."""
377
+ info = ImageSourceInfo(
378
+ original_url="https://example.com/static/page1.jpg",
379
+ is_iiif=False,
380
+ original_width=2000,
381
+ original_height=3000,
382
+ )
383
+ assert info.is_iiif is False
384
+ assert info.iiif_service_url is None
frontend/src/lib/api.ts CHANGED
@@ -83,6 +83,10 @@ export interface Page {
83
  folio_label: string
84
  sequence: number
85
  image_master_path: string | null
 
 
 
 
86
  processing_status: string
87
  confidence_summary: number | null
88
  }
@@ -150,6 +154,8 @@ export interface ImageInfo {
150
  derivative_web?: string | null
151
  thumbnail?: string | null
152
  iiif_base?: string | null
 
 
153
  width: number
154
  height: number
155
  }
 
83
  folio_label: string
84
  sequence: number
85
  image_master_path: string | null
86
+ iiif_service_url: string | null
87
+ canvas_width: number | null
88
+ canvas_height: number | null
89
+ manifest_url: string | null
90
  processing_status: string
91
  confidence_summary: number | null
92
  }
 
154
  derivative_web?: string | null
155
  thumbnail?: string | null
156
  iiif_base?: string | null
157
+ iiif_service_url?: string | null
158
+ manifest_url?: string | null
159
  width: number
160
  height: number
161
  }