Spaces:
Sleeping
Sleeping
import gradio as gr | |
from extract_images.services import ( | |
extract_images_pymupdf, | |
extract_images_pdfplumber, | |
extract_images_gemini, | |
extract_images_gpt, | |
) | |
from extract_tables.services import ( | |
extract_tables_pymupdf, | |
extract_tables_tab_transformer, | |
extract_tables_img2table, | |
extract_tables_gemini, | |
extract_tables_gpt, | |
) | |
from utils import clear_directory | |
def handle_model_selection(pdf_file, model_option): | |
if model_option == "PyMuPDF": | |
images = extract_images_pymupdf(pdf_file) | |
tables = extract_tables_pymupdf(pdf_file) | |
elif model_option == "PdfPlumber (Extracts Images only)": | |
images = extract_images_pdfplumber(pdf_file) | |
tables = None | |
elif model_option == "Table Transformer (Extracts Tables only)": | |
images = None | |
tables = extract_tables_tab_transformer(pdf_file) | |
elif model_option == "img2table (Extracts Tables only)": | |
images = None | |
tables = extract_tables_img2table(pdf_file) | |
elif model_option == "Gemini Pro": | |
images = extract_images_gemini("gemini-pro-vision", pdf_file) | |
tables = extract_tables_gemini("gemini-pro-vision", pdf_file) | |
elif model_option == "Gemini Flash": | |
images = extract_images_gemini("gemini-1.5-flash-latest", pdf_file) | |
tables = extract_tables_gemini("gemini-1.5-flash-latest", pdf_file) | |
elif model_option == "GPT 4 Turbo": | |
images = extract_images_gpt("gpt-4-turbo", pdf_file) | |
tables = extract_tables_gpt("gpt-4-turbo", pdf_file) | |
elif model_option == "GPT 4o": | |
images = extract_images_gpt("gpt-4o", pdf_file) | |
tables = extract_tables_gpt("gpt-4o", pdf_file) | |
clear_directory("extract_tables/table_outputs") | |
clear_directory("extract_images/image_outputs") | |
return images, tables | |
interface = gr.Interface( | |
fn=handle_model_selection, | |
inputs=[ | |
gr.File(type="binary", label="Upload PDF"), | |
gr.Dropdown( | |
label="Select Model", | |
choices=[ | |
"PdfPlumber (Extracts Images only)", | |
"Table Transformer (Extracts Tables only)", | |
"img2table (Extracts Tables only)", | |
"PyMuPDF", | |
"Gemini Pro", | |
"Gemini Flash", | |
"GPT 4 Turbo", | |
"GPT 4o", | |
], | |
value="PyMuPDF", | |
), | |
], | |
outputs=[ | |
gr.Gallery(label="Extracted Images"), | |
gr.Gallery(label="Extracted Tables"), | |
], | |
title="PDF Image and Table Extractor", | |
description="Upload a PDF to extract images and tables. Choose the model for extraction.", | |
) | |
interface.launch(share=True) | |