Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PIL import Image | |
import os | |
import requests | |
import time | |
# Ensure the correct libraries are installed. | |
os.system("pip uninstall -y fitz && pip install --force-reinstall pymupdf requests") | |
import fitz | |
# --- Constants for Configuration --- | |
REQUESTS_HEADERS = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
IMAGE_ZOOM_FACTOR = 2 # 2 = 144 DPI | |
SPACER_HEIGHT = 30 # Vertical space between pages | |
BACKGROUND_COLOR = (255, 255, 255) # White | |
def pdf_to_png_streaming(pdf_file, pdf_url, progress=gr.Progress(track_tqdm=True)): | |
""" | |
Converts a PDF (from file or URL) to a single PNG, streaming the output | |
image as each page is processed. | |
This function is a a generator, yielding the progressively built image. | |
Args: | |
pdf_file: Gradio file object. | |
pdf_url: String URL to a PDF. | |
progress: Gradio progress object, automatically provided by the UI. | |
""" | |
# --- 1. Get PDF Document Object --- | |
doc = None | |
try: | |
if pdf_file is not None: | |
progress(0, desc="Opening uploaded file...") | |
source_desc = f"uploaded file '{os.path.basename(pdf_file.name)}'" | |
doc = fitz.open(pdf_file.name) | |
elif pdf_url and pdf_url.strip().startswith(('http://', 'https://')): | |
progress(0, desc=f"Fetching PDF from URL...") | |
source_desc = f"URL '{pdf_url}'" | |
response = requests.get(pdf_url, headers=REQUESTS_HEADERS, timeout=30) | |
response.raise_for_status() | |
content_type = response.headers.get('Content-Type', '').lower() | |
if 'application/pdf' not in content_type: | |
raise gr.Error(f"URL content is not a PDF. Type: '{content_type}'.") | |
pdf_bytes = response.content | |
doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
else: | |
raise gr.Error("Please upload a PDF or provide a valid URL.") | |
except Exception as e: | |
raise gr.Error(f"Failed to load PDF from {source_desc}. Error: {e}") | |
# --- 2. Pre-scan for Dimensions (for efficient canvas creation) --- | |
progress(0, desc="Analyzing PDF layout...") | |
page_dims = [page.rect for page in doc] | |
if not page_dims: | |
raise gr.Error("PDF is valid but contains no pages.") | |
# Calculate final canvas size based on scanned dimensions and zoom factor | |
matrix = fitz.Matrix(IMAGE_ZOOM_FACTOR, IMAGE_ZOOM_FACTOR) | |
max_width = int(max(p.width for p in page_dims) * matrix.a) | |
total_height = int(sum(p.height for p in page_dims) * matrix.d) + (SPACER_HEIGHT * (doc.page_count - 1)) | |
# Create the final canvas ONCE | |
combined_img = Image.new("RGB", (max_width, total_height), BACKGROUND_COLOR) | |
# --- 3. Render, Paste, and Stream (Yield) --- | |
current_y = 0 | |
for i, page in enumerate(doc): | |
page_num = i + 1 | |
progress(page_num / doc.page_count, desc=f"Processing Page {page_num} of {doc.page_count}") | |
# Render page to a pixmap | |
pix = page.get_pixmap(matrix=matrix, alpha=False) | |
page_img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
# Calculate offset to center the page horizontally | |
x_offset = (max_width - pix.width) // 2 | |
# Paste the current page onto the main canvas | |
combined_img.paste(page_img, (x_offset, current_y)) | |
# Update the y-position for the next page | |
current_y += pix.height + SPACER_HEIGHT | |
# YIELD the progressively built image to update the Gradio UI | |
yield combined_img | |
doc.close() | |
print("Streaming conversion complete.") | |
# The final yield has already sent the completed image. No return needed. | |
# --- Gradio Interface Definition using Blocks --- | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown( | |
""" | |
# Live PDF to PNG Converter | |
**Upload a PDF file OR enter a URL.** The output image will update in real-time as each page is processed. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pdf_input = gr.File(label="Upload PDF File") | |
url_input = gr.Textbox( | |
label="Or Enter PDF URL", | |
placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf" | |
) | |
submit_btn = gr.Button("Convert to PNG", variant="primary") | |
with gr.Column(scale=2): | |
png_output = gr.Image(label="Live PNG Output", interactive=False) | |
clear_btn = gr.ClearButton(components=[pdf_input, url_input, png_output], value="Clear All") | |
gr.Examples( | |
examples=[ | |
[None, "https://arxiv.org/pdf/1706.03762.pdf"], # "Attention Is All You Need" paper | |
[None, "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"], | |
], | |
inputs=[pdf_input, url_input] | |
) | |
# Wire the button to the streaming function | |
submit_btn.click( | |
fn=pdf_to_png_streaming, | |
inputs=[pdf_input, url_input], | |
outputs=png_output | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
print("Starting Gradio app with streaming...") | |
demo.launch() | |
print("Gradio app finished.") |