MHamdan's picture
Initial commit: SPARKNET framework
d520909
"""
Image Document Loading
Handles single images and multi-page TIFF documents.
"""
import logging
from pathlib import Path
from typing import Iterator, List, Optional, Tuple, Union
import numpy as np
from PIL import Image
from .base import (
DocumentFormat,
DocumentInfo,
DocumentLoader,
PageInfo,
PageRenderer,
RenderOptions,
)
logger = logging.getLogger(__name__)
class ImageLoader(DocumentLoader):
"""
Image document loader.
Handles common image formats (JPEG, PNG, etc.) and multi-page TIFF.
"""
SUPPORTED_EXTENSIONS = {
".jpg", ".jpeg", ".png", ".bmp", ".gif",
".tif", ".tiff", ".webp"
}
def __init__(self):
self._images: List[Image.Image] = []
self._info: Optional[DocumentInfo] = None
self._path: Optional[Path] = None
def load(self, path: Union[str, Path]) -> DocumentInfo:
"""Load image(s) and extract metadata."""
self._path = Path(path)
if not self._path.exists():
raise FileNotFoundError(f"Image file not found: {self._path}")
suffix = self._path.suffix.lower()
if suffix not in self.SUPPORTED_EXTENSIONS:
raise ValueError(f"Unsupported image format: {suffix}")
# Close any previously loaded images
self.close()
# Load image(s)
img = Image.open(self._path)
# Handle multi-page TIFF
if suffix in {".tif", ".tiff"}:
self._load_multipage_tiff(img)
else:
# Single image
self._images = [img.convert("RGB")]
# Build page info
pages = []
for i, page_img in enumerate(self._images):
dpi = page_img.info.get("dpi", (72, 72))
if isinstance(dpi, tuple):
dpi = int(dpi[0])
else:
dpi = int(dpi)
page_info = PageInfo(
page_number=i + 1,
width_pixels=page_img.width,
height_pixels=page_img.height,
dpi=dpi,
has_images=True
)
pages.append(page_info)
# Determine format
if suffix in {".tif", ".tiff"} and len(self._images) > 1:
doc_format = DocumentFormat.TIFF_MULTIPAGE
else:
doc_format = DocumentFormat.IMAGE
self._info = DocumentInfo(
path=self._path,
format=doc_format,
num_pages=len(self._images),
pages=pages,
file_size_bytes=self._path.stat().st_size,
is_scanned=True, # Images are always "scanned"
has_text_layer=False
)
return self._info
def _load_multipage_tiff(self, img: Image.Image) -> None:
"""Load all pages from a multi-page TIFF."""
self._images = []
try:
page_num = 0
while True:
img.seek(page_num)
# Copy the frame to avoid issues with lazy loading
self._images.append(img.copy().convert("RGB"))
page_num += 1
except EOFError:
# Reached end of TIFF
pass
if not self._images:
raise ValueError("No pages found in TIFF file")
def close(self) -> None:
"""Close all loaded images."""
for img in self._images:
try:
img.close()
except Exception:
pass
self._images = []
def is_loaded(self) -> bool:
"""Check if images are loaded."""
return len(self._images) > 0
@property
def info(self) -> Optional[DocumentInfo]:
"""Get document info."""
return self._info
def get_image(self, page_number: int) -> Image.Image:
"""Get PIL Image for a specific page (1-indexed)."""
if not self._images:
raise RuntimeError("No images loaded")
if page_number < 1 or page_number > len(self._images):
raise ValueError(f"Invalid page number: {page_number}")
return self._images[page_number - 1]
class ImageRenderer(PageRenderer):
"""
Image page renderer.
Renders images with optional resizing and format conversion.
"""
def __init__(self, loader: ImageLoader):
self._loader = loader
def render_page(
self,
page_number: int,
options: Optional[RenderOptions] = None
) -> np.ndarray:
"""Render an image page."""
if not self._loader.is_loaded():
raise RuntimeError("No document loaded")
options = options or RenderOptions()
img = self._loader.get_image(page_number)
# Get original DPI
original_dpi = img.info.get("dpi", (72, 72))
if isinstance(original_dpi, tuple):
original_dpi = original_dpi[0]
# Resize if DPI differs
if options.dpi != original_dpi and original_dpi > 0:
scale = options.dpi / original_dpi
new_size = (int(img.width * scale), int(img.height * scale))
resample = Image.LANCZOS if options.antialias else Image.NEAREST
img = img.resize(new_size, resample=resample)
# Convert color mode
if options.color_mode == "L":
img = img.convert("L")
elif options.color_mode == "RGBA":
img = img.convert("RGBA")
else: # RGB
img = img.convert("RGB")
return np.array(img)
def render_pages(
self,
page_numbers: Optional[List[int]] = None,
options: Optional[RenderOptions] = None
) -> Iterator[Tuple[int, np.ndarray]]:
"""Render multiple pages."""
if not self._loader.is_loaded():
raise RuntimeError("No document loaded")
info = self._loader.info
if page_numbers is None:
page_numbers = list(range(1, info.num_pages + 1))
for page_num in page_numbers:
yield page_num, self.render_page(page_num, options)
def load_image(path: Union[str, Path]) -> Tuple[ImageLoader, ImageRenderer]:
"""
Convenience function to load an image document.
Returns:
Tuple of (loader, renderer)
"""
loader = ImageLoader()
loader.load(path)
renderer = ImageRenderer(loader)
return loader, renderer