Spaces:

broadfield-dev
/

pdf2png

Sleeping

App Files Files Community

pdf2png / app.py

broadfield-dev

Update app.py

f2decc0 verified 2 months ago

raw

history blame contribute delete

5.25 kB

	import gradio as gr
	from PIL import Image
	import os
	import requests
	import time

	# Ensure the correct libraries are installed.
	os.system("pip uninstall -y fitz && pip install --force-reinstall pymupdf requests")
	import fitz

	# --- Constants for Configuration ---
	REQUESTS_HEADERS = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}
	IMAGE_ZOOM_FACTOR = 2 # 2 = 144 DPI
	SPACER_HEIGHT = 30 # Vertical space between pages
	BACKGROUND_COLOR = (255, 255, 255) # White

	def pdf_to_png_streaming(pdf_file, pdf_url, progress=gr.Progress(track_tqdm=True)):
	"""
	Converts a PDF (from file or URL) to a single PNG, streaming the output
	image as each page is processed.

	This function is a a generator, yielding the progressively built image.

	Args:
	pdf_file: Gradio file object.
	pdf_url: String URL to a PDF.
	progress: Gradio progress object, automatically provided by the UI.
	"""
	# --- 1. Get PDF Document Object ---
	doc = None
	try:
	if pdf_file is not None:
	progress(0, desc="Opening uploaded file...")
	source_desc = f"uploaded file '{os.path.basename(pdf_file.name)}'"
	doc = fitz.open(pdf_file.name)
	elif pdf_url and pdf_url.strip().startswith(('http://', 'https://')):
	progress(0, desc=f"Fetching PDF from URL...")
	source_desc = f"URL '{pdf_url}'"
	response = requests.get(pdf_url, headers=REQUESTS_HEADERS, timeout=30)
	response.raise_for_status()
	content_type = response.headers.get('Content-Type', '').lower()
	if 'application/pdf' not in content_type:
	raise gr.Error(f"URL content is not a PDF. Type: '{content_type}'.")
	pdf_bytes = response.content
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	else:
	raise gr.Error("Please upload a PDF or provide a valid URL.")
	except Exception as e:
	raise gr.Error(f"Failed to load PDF from {source_desc}. Error: {e}")

	# --- 2. Pre-scan for Dimensions (for efficient canvas creation) ---
	progress(0, desc="Analyzing PDF layout...")
	page_dims = [page.rect for page in doc]
	if not page_dims:
	raise gr.Error("PDF is valid but contains no pages.")

	# Calculate final canvas size based on scanned dimensions and zoom factor
	matrix = fitz.Matrix(IMAGE_ZOOM_FACTOR, IMAGE_ZOOM_FACTOR)
	max_width = int(max(p.width for p in page_dims) * matrix.a)
	total_height = int(sum(p.height for p in page_dims) * matrix.d) + (SPACER_HEIGHT * (doc.page_count - 1))

	# Create the final canvas ONCE
	combined_img = Image.new("RGB", (max_width, total_height), BACKGROUND_COLOR)

	# --- 3. Render, Paste, and Stream (Yield) ---
	current_y = 0
	for i, page in enumerate(doc):
	page_num = i + 1
	progress(page_num / doc.page_count, desc=f"Processing Page {page_num} of {doc.page_count}")

	# Render page to a pixmap
	pix = page.get_pixmap(matrix=matrix, alpha=False)
	page_img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	# Calculate offset to center the page horizontally
	x_offset = (max_width - pix.width) // 2

	# Paste the current page onto the main canvas
	combined_img.paste(page_img, (x_offset, current_y))

	# Update the y-position for the next page
	current_y += pix.height + SPACER_HEIGHT

	# YIELD the progressively built image to update the Gradio UI
	yield combined_img

	doc.close()
	print("Streaming conversion complete.")
	# The final yield has already sent the completed image. No return needed.


	# --- Gradio Interface Definition using Blocks ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# Live PDF to PNG Converter
	Upload a PDF file OR enter a URL. The output image will update in real-time as each page is processed.
	"""
	)
	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(label="Upload PDF File")
	url_input = gr.Textbox(
	label="Or Enter PDF URL",
	placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf"
	)
	submit_btn = gr.Button("Convert to PNG", variant="primary")
	with gr.Column(scale=2):
	png_output = gr.Image(label="Live PNG Output", interactive=False)

	clear_btn = gr.ClearButton(components=[pdf_input, url_input, png_output], value="Clear All")

	gr.Examples(
	examples=[
	[None, "https://arxiv.org/pdf/1706.03762.pdf"], # "Attention Is All You Need" paper
	[None, "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"],
	],
	inputs=[pdf_input, url_input]
	)

	# Wire the button to the streaming function
	submit_btn.click(
	fn=pdf_to_png_streaming,
	inputs=[pdf_input, url_input],
	outputs=png_output
	)

	# Launch the app
	if __name__ == "__main__":
	print("Starting Gradio app with streaming...")
	demo.launch()
	print("Gradio app finished.")