Spaces:
Sleeping
Sleeping
File size: 2,675 Bytes
d2cb17f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from extract_images.services import (
extract_images_pymupdf,
extract_images_pdfplumber,
extract_images_gemini,
extract_images_gpt,
)
from extract_tables.services import (
extract_tables_pymupdf,
extract_tables_tab_transformer,
extract_tables_img2table,
extract_tables_gemini,
extract_tables_gpt,
)
from utils import clear_directory
def handle_model_selection(pdf_file, model_option):
if model_option == "PyMuPDF":
images = extract_images_pymupdf(pdf_file)
tables = extract_tables_pymupdf(pdf_file)
elif model_option == "PdfPlumber (Extracts Images only)":
images = extract_images_pdfplumber(pdf_file)
tables = None
elif model_option == "Table Transformer (Extracts Tables only)":
images = None
tables = extract_tables_tab_transformer(pdf_file)
elif model_option == "img2table (Extracts Tables only)":
images = None
tables = extract_tables_img2table(pdf_file)
elif model_option == "Gemini Pro":
images = extract_images_gemini("gemini-pro-vision", pdf_file)
tables = extract_tables_gemini("gemini-pro-vision", pdf_file)
elif model_option == "Gemini Flash":
images = extract_images_gemini("gemini-1.5-flash-latest", pdf_file)
tables = extract_tables_gemini("gemini-1.5-flash-latest", pdf_file)
elif model_option == "GPT 4 Turbo":
images = extract_images_gpt("gpt-4-turbo", pdf_file)
tables = extract_tables_gpt("gpt-4-turbo", pdf_file)
elif model_option == "GPT 4o":
images = extract_images_gpt("gpt-4o", pdf_file)
tables = extract_tables_gpt("gpt-4o", pdf_file)
clear_directory("extract_tables/table_outputs")
clear_directory("extract_images/image_outputs")
return images, tables
interface = gr.Interface(
fn=handle_model_selection,
inputs=[
gr.File(type="binary", label="Upload PDF"),
gr.Dropdown(
label="Select Model",
choices=[
"PdfPlumber (Extracts Images only)",
"Table Transformer (Extracts Tables only)",
"img2table (Extracts Tables only)",
"PyMuPDF",
"Gemini Pro",
"Gemini Flash",
"GPT 4 Turbo",
"GPT 4o",
],
value="PyMuPDF",
),
],
outputs=[
gr.Gallery(label="Extracted Images"),
gr.Gallery(label="Extracted Tables"),
],
title="PDF Image and Table Extractor",
description="Upload a PDF to extract images and tables. Choose the model for extraction.",
)
interface.launch(share=True)
|