| """ |
| PDF I/O utilities - Load and render PDFs |
| Placeholder for Task 2 |
| """ |
|
|
| from typing import List, Tuple |
| from PIL import Image |
| import io |
|
|
|
|
| def load_pdf_pages(pdf_bytes: bytes, dpi: int = 150, max_pages: int = 10) -> List[Image.Image]: |
| """ |
| Load PDF and render each page to PIL Image. |
| |
| Args: |
| pdf_bytes: PDF file as bytes |
| dpi: Resolution for rendering (default 150) |
| max_pages: Maximum pages to render (default 10) |
| |
| Returns: |
| List of PIL Images, one per page |
| """ |
| try: |
| import fitz |
|
|
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
| pages = [] |
|
|
| for i in range(min(len(doc), max_pages)): |
| page = doc[i] |
| |
| pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72)) |
| img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples) |
| pages.append(img) |
|
|
| doc.close() |
| return pages |
|
|
| except ImportError: |
| |
| print("⚠️ PyMuPDF not available, returning placeholder") |
| img = Image.new("RGB", (800, 1000), color=(200, 200, 200)) |
| return [img] * 3 |
|
|
|
|
| def get_page_count(pdf_bytes: bytes) -> int: |
| """Get total page count of PDF.""" |
| try: |
| import fitz |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
| count = len(doc) |
| doc.close() |
| return count |
| except Exception: |
| return 0 |
|
|