| from __future__ import annotations
|
|
|
| import base64
|
| import io
|
| import os
|
| import shutil
|
| import uuid
|
| from pathlib import Path
|
|
|
| from PIL import Image
|
|
|
| from state import ImageRef
|
|
|
|
|
| class ImageStore:
|
| """Disk-based image manager. LangGraph state only carries lightweight
|
| ``ImageRef`` dicts; all heavy image bytes live on disk."""
|
|
|
| def __init__(self, base_dir: str):
|
| self.base_dir = Path(base_dir)
|
| self.base_dir.mkdir(parents=True, exist_ok=True)
|
| self._pages_dir = self.base_dir / "pages"
|
| self._crops_dir = self.base_dir / "crops"
|
| self._annotated_dir = self.base_dir / "annotated"
|
| for d in (self._pages_dir, self._crops_dir, self._annotated_dir):
|
| d.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
| def save_page_image(self, page_num: int, image_bytes: bytes) -> ImageRef:
|
| img = Image.open(io.BytesIO(image_bytes))
|
| fname = f"page_{page_num}.png"
|
| path = self._pages_dir / fname
|
| img.save(str(path), format="PNG")
|
| return ImageRef(
|
| id=f"page_{page_num}",
|
| path=str(path),
|
| label=f"Page {page_num} (full page)",
|
| page_num=page_num,
|
| crop_type="full_page",
|
| width=img.width,
|
| height=img.height,
|
| )
|
|
|
| def save_crop(
|
| self,
|
| page_num: int,
|
| crop_id: str,
|
| image: Image.Image,
|
| label: str,
|
| ) -> ImageRef:
|
| fname = f"page_{page_num}_{crop_id}.png"
|
| path = self._crops_dir / fname
|
| image.save(str(path), format="PNG")
|
| return ImageRef(
|
| id=f"page_{page_num}_{crop_id}",
|
| path=str(path),
|
| label=label,
|
| page_num=page_num,
|
| crop_type="crop",
|
| width=image.width,
|
| height=image.height,
|
| )
|
|
|
| def save_annotated(
|
| self,
|
| source_ref: ImageRef,
|
| annotated_image: Image.Image,
|
| ) -> ImageRef:
|
| ann_id = f"{source_ref['id']}_ann"
|
| fname = f"{ann_id}.png"
|
| path = self._annotated_dir / fname
|
| annotated_image.save(str(path), format="PNG")
|
| return ImageRef(
|
| id=ann_id,
|
| path=str(path),
|
| label=f"{source_ref['label']} [annotated]",
|
| page_num=source_ref["page_num"],
|
| crop_type="annotated",
|
| width=annotated_image.width,
|
| height=annotated_image.height,
|
| )
|
|
|
|
|
|
|
|
|
|
|
| def load_image(self, ref: ImageRef) -> Image.Image:
|
| return Image.open(ref["path"])
|
|
|
| def load_bytes(self, ref: ImageRef) -> bytes:
|
| with open(ref["path"], "rb") as f:
|
| return f.read()
|
|
|
| def get_page_image_path(self, page_num: int) -> str:
|
| return str(self._pages_dir / f"page_{page_num}.png")
|
|
|
| def load_page_bytes(self, page_num: int) -> bytes:
|
| path = self.get_page_image_path(page_num)
|
| with open(path, "rb") as f:
|
| return f.read()
|
|
|
|
|
|
|
|
|
|
|
| def to_gemini_part(self, ref: ImageRef):
|
| """Return a ``google.genai.types.Part`` for Gemini multimodal prompts."""
|
| from google.genai import types
|
| return types.Part.from_bytes(
|
| data=self.load_bytes(ref),
|
| mime_type="image/png",
|
| )
|
|
|
| def to_openai_base64(self, ref: ImageRef) -> dict:
|
| """Return an OpenAI-compatible image content block (base64 data URI)."""
|
| b64 = base64.b64encode(self.load_bytes(ref)).decode("utf-8")
|
| return {
|
| "type": "image_url",
|
| "image_url": {"url": f"data:image/png;base64,{b64}"},
|
| }
|
|
|
| def create_thumbnail(self, ref: ImageRef, max_size: int = 400) -> bytes:
|
| img = self.load_image(ref)
|
| img.thumbnail((max_size, max_size))
|
| buf = io.BytesIO()
|
| img.save(buf, format="PNG")
|
| return buf.getvalue()
|
|
|
|
|
|
|
|
|
|
|
| def cleanup(self):
|
| if self.base_dir.exists():
|
| shutil.rmtree(self.base_dir, ignore_errors=True)
|
|
|