table-extractor / app.py
alimghori's picture
Upload app.py
92e2de0
import streamlit as st
import camelot as cam
import subprocess
from subprocess import STDOUT, check_call
import os
import base64
# Enable at the time of Hosting in any Linux based server
@st.cache
def gh():
proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None,
stdout=open(os.devnull, "wb"), stderr=STDOUT, executable="/bin/bash")
proc.wait()
gh()
st.title("Table Extractor form PDF")
st.subheader("Upload any non-scanned PDF for better result")
input_pdf = st.file_uploader(label="Upload your PDF here...", type='pdf')
st.markdown("### Page Number")
page_number = st.text_input(
"Enter thr Page from where you want to extract in the PDF eg:2 ", value=1)
if input_pdf is not None:
with open("input.pdf", "wb") as f:
base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
f.write(base64.b64encode(base64_pdf))
f.close()
table = cam.read_pdf("input.pdf, pages = page_number", flavor='stream')
st.markdown("### Number of Tables")
st.write(table)
if len(table) > 0:
option = st.slectbox(
lable="select the Table to be displayed", option=range(len(table)+1))
st.markdown('### Output Table')
st.dataframe(table[int(option)-1].df)
else:
pass