Spaces:
Running
Running
import gradio as gr | |
from docling.document_converter import DocumentConverter | |
import tempfile | |
import os | |
import shutil | |
import requests | |
converter = DocumentConverter() | |
# def convert_input(pdf_file, pdf_url): | |
# temp_pdf_path = None | |
# try: | |
# if pdf_file: | |
# with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
# tmp.write(pdf_file.read()) | |
# temp_pdf_path = tmp.name | |
# source = temp_pdf_path | |
# elif pdf_url: | |
# source = pdf_url | |
# else: | |
# raise gr.Error("Please upload a PDF or provide a URL.") | |
# doc = converter.convert(source).document | |
# markdown = doc.export_to_markdown() | |
# # Save markdown to a file | |
# with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md: | |
# tmp_md.write(markdown) | |
# markdown_path = tmp_md.name | |
# return pdf_file.name if pdf_file else None, markdown, markdown_path | |
# except Exception as e: | |
# return None, f"Error: {str(e)}", None | |
# finally: | |
# if temp_pdf_path and os.path.exists(temp_pdf_path): | |
# os.remove(temp_pdf_path) | |
def convert_input(pdf_file, pdf_url): | |
source = None | |
try: | |
if pdf_file is not None: | |
# pdf_file is a NamedString object with .name being the local path | |
source = pdf_file.name | |
pdf_preview_path = source | |
elif pdf_url: | |
source = pdf_url | |
pdf_preview_path = None # No local file to preview | |
else: | |
raise gr.Error("Please upload a PDF or provide a URL.") | |
doc = converter.convert(source).document | |
markdown = doc.export_to_markdown() | |
# Save markdown to a file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md: | |
tmp_md.write(markdown) | |
markdown_path = tmp_md.name | |
return pdf_preview_path, markdown, markdown_path | |
except Exception as e: | |
return None, f"Error: {str(e)}", None | |
with gr.Blocks() as demo: | |
gr.Markdown("# π PDF to Markdown with `docling`") | |
gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.") | |
with gr.Row(): | |
pdf_input = gr.File(label="π€ Upload PDF", file_types=[".pdf"]) | |
url_input = gr.Textbox(label="π Or enter PDF URL (e.g., https://arxiv.org/pdf/...)") | |
convert_btn = gr.Button("Convert to Markdown") | |
with gr.Row(): | |
pdf_preview = gr.File(label="π PDF Preview", interactive=False) | |
md_file = gr.File(label="π Download Markdown", interactive=False) | |
md_output = gr.Markdown(label="π Markdown Output") | |
convert_btn.click( | |
fn=convert_input, | |
inputs=[pdf_input, url_input], | |
outputs=[pdf_preview, md_output, md_file] | |
) | |
if __name__ == "__main__": | |
demo.launch() |