uartimcs's picture
Update app.py
c7f3bef verified
raw
history blame
999 Bytes
import gradio as gr
import fitz
import os
import zipfile
from donut import DonutModel
def process(input_pdf):
# Conversion of PDF to JPG images
pdf = fitz.open(input_pdf)
first_page = pdf[0]
pix = first_page.get_pixmap()
image_bytes = pix.tobytes("jpg")
pdf.close()
temp_dir = "images"
basename = os.path.basename(input_pdf).split('.')[0]
image_name = basename + "jpg"
os.makedirs(temp_dir, exist_ok=True)
with open(os.path.join(temp_dir, image_name), "wb") as f:
f.write(image_bytes)
image_path = os.path.join(temp_dir, image_name)
output = model.inference(image=image_path, prompt=task_prompt)["predictions"][0]
return output
task_name = "SGSInvoice"
task_prompt = f"<s_{task_name}>"
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
model.eval()
demo = gr.Interface(fn=process,inputs=gr.File(file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
demo.launch()