import io import os from typing import Literal import fitz from PIL import Image def determine_file_type(file_path: str) -> str: import os _, file_extension = os.path.splitext(file_path) if file_extension.lower() in ['.pdf']: return 'pdf' elif file_extension.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']: return 'image' else: supported_types = ', '.join(['PDF', 'JPG', 'JPEG', 'PNG', 'BMP', 'GIF', 'TIFF']) raise ValueError( f"Unsupported file type: {file_extension}. " f"Only the following file types are supported: {supported_types}" ) def get_bytes_from_path(file_path: str) -> bytes: with open(file_path, 'rb') as file: file_bytes = file.read() return file_bytes def get_PIL_image_from_path(file_path: str) -> Image.Image: image = Image.open(file_path) return image def get_images_from_path(file_path: str) -> list[Image.Image]: file_type = determine_file_type(file_path) if file_type == 'image': return [get_PIL_image_from_path(file_path)] elif file_type == 'pdf': doc = fitz.open(file_path) images = [] for page_num in range(len(doc)): page = doc[page_num] pix = page.get_pixmap() img_data = pix.tobytes("ppm") img = Image.open(io.BytesIO(img_data)) images.append(img) doc.close() return images