pdf_to_single_image

Sleeping

App Files Files Community

pdf_to_single_image / pdf_processor.py

tsphan

breaks away from single file

576a588 6 months ago

raw

history blame contribute delete

7.7 kB

	# pdf_processor.py
	"""
	Handles the core logic of converting a PDF document into a single image.
	"""

	import fitz # PyMuPDF
	from PIL import Image
	import io
	import streamlit as st # Imported for progress bar updates
	from typing import Tuple, List, Union

	# Constants
	DEFAULT_PDF_DPI = 72 # Standard PDF DPI used for scaling calculations
	JPEG_QUALITY = 95 # Quality setting for JPEG output

	def calculate_image_dimensions(pdf_document: fitz.Document, dpi: int) -> Tuple[int, int, List[float]]:
	"""
	Calculates the total dimensions required for the final image canvas.

	Iterates through PDF pages to determine the maximum width and total height
	needed when rendered at the specified DPI.

	Parameters
	----------
	pdf_document : fitz.Document
	The opened PyMuPDF document object.
	dpi : int
	The target resolution in dots per inch.

	Returns
	-------
	Tuple[int, int, List[float]]
	A tuple containing:
	- max_width (int): The maximum width required among all pages.
	- total_height (int): The sum of heights of all pages.
	- zooms (List[float]): A list of zoom factors for each page.
	"""
	total_height = 0
	max_width = 0
	zooms = []
	num_pages = len(pdf_document)

	# First pass: Calculate dimensions and zoom factors
	for page_num in range(num_pages):
	page = pdf_document[page_num]
	# Calculate the zoom factor needed to achieve the target DPI
	zoom = dpi / DEFAULT_PDF_DPI
	zooms.append(zoom)
	# Get page dimensions in pixels at the calculated zoom
	rect = page.rect
	page_width = int(rect.width * zoom)
	page_height = int(rect.height * zoom)
	# Update maximum width and total height
	max_width = max(max_width, page_width)
	total_height += page_height

	return max_width, total_height, zooms

	def render_pages_to_image(
	pdf_document: fitz.Document,
	zooms: List[float],
	canvas_width: int,
	canvas_height: int
	) -> Image.Image:
	"""
	Renders each page of the PDF onto a single PIL Image canvas.

	Parameters
	----------
	pdf_document : fitz.Document
	The opened PyMuPDF document object.
	zooms : List[float]
	A list of zoom factors, one for each page.
	canvas_width : int
	The width of the final image canvas.
	canvas_height : int
	The height of the final image canvas.

	Returns
	-------
	Image.Image
	A PIL Image object containing all rendered PDF pages.
	"""
	num_pages = len(pdf_document)
	# Create a new blank image canvas (RGB white background)
	result_image = Image.new("RGB", (canvas_width, canvas_height), (255, 255, 255))
	current_height = 0

	# Initialize Streamlit progress reporting
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Second pass: Render each page and paste it onto the canvas
	for page_num in range(num_pages):
	status_text.text(f"Processing page {page_num + 1}/{num_pages}...")
	page = pdf_document[page_num]
	zoom = zooms[page_num]

	# Generate a pixmap (raster image) of the page
	# Use fitz.Matrix for transformation with the calculated zoom
	pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))

	# Convert the pixmap to a PIL Image
	# Ensure the mode ("RGB" or "RGBA") matches pix.samples structure if issues arise
	try:
	page_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	except ValueError as e:
	st.error(f"Error converting page {page_num+1} to Image: {e}")
	st.warning(f"Pixmap details: width={pix.width}, height={pix.height}, alpha={pix.alpha}, samples length={len(pix.samples)}")
	# Attempt RGBA conversion as a fallback if alpha channel is present
	if pix.alpha:
	page_image = Image.frombytes("RGBA", [pix.width, pix.height], pix.samples).convert("RGB")
	st.info("Retrying page conversion with RGBA mode.")
	else:
	raise # Re-raise the original error if not an alpha channel issue

	# Paste the page image onto the main canvas
	# The paste position is (0, current_height)
	result_image.paste(page_image, (0, current_height))
	current_height += pix.height # Move down for the next page

	# Update Streamlit progress bar
	progress_bar.progress((page_num + 1) / num_pages)

	status_text.text("Rendering complete!")
	return result_image

	def pdf_to_single_image(pdf_path: str, output_format: str = "PNG", dpi: int = 300) -> io.BytesIO:
	"""
	Converts all pages of a PDF file into a single vertical image.

	Opens the PDF, calculates the required dimensions, renders each page
	at the specified DPI, stitches them together vertically, and returns
	the result as an image in a BytesIO buffer.

	Parameters
	----------
	pdf_path : str
	The file path to the input PDF document.
	output_format : str, optional
	The desired output image format ("PNG" or "JPG"), by default "PNG".
	dpi : int, optional
	The resolution (dots per inch) for rendering the PDF pages, by default 300.
	Higher DPI results in better quality but larger file size.

	Returns
	-------
	io.BytesIO
	A BytesIO buffer containing the generated image data in the specified format.

	Raises
	------
	fitz.FitzError
	If there is an error opening or processing the PDF file.
	Exception
	For other potential errors during image processing or saving.
	"""
	pdf_document = None # Initialize to ensure it's defined in finally block
	try:
	# Open the PDF document
	pdf_document = fitz.open(pdf_path)

	# Calculate the necessary dimensions for the final image
	canvas_width, canvas_height, zooms = calculate_image_dimensions(pdf_document, dpi)

	if canvas_width == 0 or canvas_height == 0:
	st.warning("Could not determine valid dimensions for the PDF. It might be empty or corrupted.")
	return io.BytesIO() # Return empty buffer

	# Render pages onto the canvas
	result_image = render_pages_to_image(pdf_document, zooms, canvas_width, canvas_height)

	# Create an in-memory buffer to save the image
	img_buffer = io.BytesIO()

	# Save the final image to the buffer in the specified format
	if output_format.upper() == "PNG":
	result_image.save(img_buffer, format="PNG")
	elif output_format.upper() == "JPG" or output_format.upper() == "JPEG":
	# Save as JPEG with specified quality, converting RGBA to RGB if necessary
	if result_image.mode == 'RGBA':
	result_image = result_image.convert('RGB')
	result_image.save(img_buffer, format="JPEG", quality=JPEG_QUALITY)
	else:
	# Default to PNG if format is unknown
	st.warning(f"Unsupported format '{output_format}'. Defaulting to PNG.")
	result_image.save(img_buffer, format="PNG")

	# Reset buffer position to the beginning for reading
	img_buffer.seek(0)

	return img_buffer

	except fitz.FitzError as e:
	st.error(f"Error processing PDF: {e}")
	raise # Re-raise the specific exception
	except Exception as e:
	st.error(f"An unexpected error occurred during conversion: {e}")
	raise # Re-raise general exceptions
	finally:
	# Ensure the PDF document is closed even if errors occur
	if pdf_document:
	pdf_document.close()
	# st.write("PDF document closed.") # Optional debug message