Spaces:
Build error
Build error
File size: 823 Bytes
07e0b4e b756001 07e0b4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import gradio as gr
import fitz # PyMuPDF
def pdf_to_markdown(pdf_file):
"""Extract text from a PDF and format it into markdown."""
if pdf_file is None:
return "No file uploaded."
# Open the uploaded PDF file using PyMuPDF
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
markdown_text = []
for i, page in enumerate(doc):
text = page.get_text("text")
markdown_text.append(f"=== Page {i + 1}\n\n{text.strip()}")
return "\n\n".join(markdown_text)
# Gradio Interface
iface = gr.Interface(
fn=pdf_to_markdown,
inputs=gr.File(type="file"),
outputs=gr.Textbox(label="Markdown Output", lines=15),
title="PDF to Markdown Extractor",
description="Upload a PDF and get a copyable markdown output.",
)
if __name__ == "__main__":
iface.launch()
|