pdf2png / app.py
broadfield-dev's picture
Update app.py
f2decc0 verified
import gradio as gr
from PIL import Image
import os
import requests
import time
# Ensure the correct libraries are installed.
os.system("pip uninstall -y fitz && pip install --force-reinstall pymupdf requests")
import fitz
# --- Constants for Configuration ---
REQUESTS_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
IMAGE_ZOOM_FACTOR = 2 # 2 = 144 DPI
SPACER_HEIGHT = 30 # Vertical space between pages
BACKGROUND_COLOR = (255, 255, 255) # White
def pdf_to_png_streaming(pdf_file, pdf_url, progress=gr.Progress(track_tqdm=True)):
"""
Converts a PDF (from file or URL) to a single PNG, streaming the output
image as each page is processed.
This function is a a generator, yielding the progressively built image.
Args:
pdf_file: Gradio file object.
pdf_url: String URL to a PDF.
progress: Gradio progress object, automatically provided by the UI.
"""
# --- 1. Get PDF Document Object ---
doc = None
try:
if pdf_file is not None:
progress(0, desc="Opening uploaded file...")
source_desc = f"uploaded file '{os.path.basename(pdf_file.name)}'"
doc = fitz.open(pdf_file.name)
elif pdf_url and pdf_url.strip().startswith(('http://', 'https://')):
progress(0, desc=f"Fetching PDF from URL...")
source_desc = f"URL '{pdf_url}'"
response = requests.get(pdf_url, headers=REQUESTS_HEADERS, timeout=30)
response.raise_for_status()
content_type = response.headers.get('Content-Type', '').lower()
if 'application/pdf' not in content_type:
raise gr.Error(f"URL content is not a PDF. Type: '{content_type}'.")
pdf_bytes = response.content
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
else:
raise gr.Error("Please upload a PDF or provide a valid URL.")
except Exception as e:
raise gr.Error(f"Failed to load PDF from {source_desc}. Error: {e}")
# --- 2. Pre-scan for Dimensions (for efficient canvas creation) ---
progress(0, desc="Analyzing PDF layout...")
page_dims = [page.rect for page in doc]
if not page_dims:
raise gr.Error("PDF is valid but contains no pages.")
# Calculate final canvas size based on scanned dimensions and zoom factor
matrix = fitz.Matrix(IMAGE_ZOOM_FACTOR, IMAGE_ZOOM_FACTOR)
max_width = int(max(p.width for p in page_dims) * matrix.a)
total_height = int(sum(p.height for p in page_dims) * matrix.d) + (SPACER_HEIGHT * (doc.page_count - 1))
# Create the final canvas ONCE
combined_img = Image.new("RGB", (max_width, total_height), BACKGROUND_COLOR)
# --- 3. Render, Paste, and Stream (Yield) ---
current_y = 0
for i, page in enumerate(doc):
page_num = i + 1
progress(page_num / doc.page_count, desc=f"Processing Page {page_num} of {doc.page_count}")
# Render page to a pixmap
pix = page.get_pixmap(matrix=matrix, alpha=False)
page_img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Calculate offset to center the page horizontally
x_offset = (max_width - pix.width) // 2
# Paste the current page onto the main canvas
combined_img.paste(page_img, (x_offset, current_y))
# Update the y-position for the next page
current_y += pix.height + SPACER_HEIGHT
# YIELD the progressively built image to update the Gradio UI
yield combined_img
doc.close()
print("Streaming conversion complete.")
# The final yield has already sent the completed image. No return needed.
# --- Gradio Interface Definition using Blocks ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# Live PDF to PNG Converter
**Upload a PDF file OR enter a URL.** The output image will update in real-time as each page is processed.
"""
)
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(label="Upload PDF File")
url_input = gr.Textbox(
label="Or Enter PDF URL",
placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf"
)
submit_btn = gr.Button("Convert to PNG", variant="primary")
with gr.Column(scale=2):
png_output = gr.Image(label="Live PNG Output", interactive=False)
clear_btn = gr.ClearButton(components=[pdf_input, url_input, png_output], value="Clear All")
gr.Examples(
examples=[
[None, "https://arxiv.org/pdf/1706.03762.pdf"], # "Attention Is All You Need" paper
[None, "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"],
],
inputs=[pdf_input, url_input]
)
# Wire the button to the streaming function
submit_btn.click(
fn=pdf_to_png_streaming,
inputs=[pdf_input, url_input],
outputs=png_output
)
# Launch the app
if __name__ == "__main__":
print("Starting Gradio app with streaming...")
demo.launch()
print("Gradio app finished.")