File size: 773 Bytes
4f8402b
3469319
8b139bf
d97cfeb
b87add3
8f0b178
4f8402b
 
f6ae938
3469319
8f0b178
 
 
 
 
 
 
359e981
b87add3
3469319
ea64bb3
b87add3
3469319
b87add3
8b139bf
3469319
 
b87add3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import subprocess
import camelot
import pandas as pd
import gradio as gr

def install_ghostscript():
    subprocess.run(["apt-get", "update"])
    subprocess.run(["apt-get", "install", "-y", "ghostscript"])

def extract_tables(pdf_file):
    try:
        tables = camelot.read_pdf(pdf_file.name, pages="all")
        df = pd.concat([table.df for table in tables], ignore_index=True)
        return df
    except OSError:
        install_ghostscript()
        return "Ghostscript installed. Please retry."

demo = gr.Interface(
    fn=extract_tables,
    inputs=gr.File(label="Upload PDF"),
    outputs=gr.DataFrame(label="Extracted Tables"),
    title="PDF Table Extractor",
    description="Extract tables from PDF files.",
)

if __name__ == "__main__":
    demo.launch()