File size: 1,371 Bytes
92e2de0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
import camelot as cam
import subprocess
from subprocess import STDOUT, check_call
import os
import base64


#  Enable at the time of Hosting in any Linux based server


@st.cache
def gh():
    proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None,
                            stdout=open(os.devnull, "wb"), stderr=STDOUT, executable="/bin/bash")
    proc.wait()


gh()


st.title("Table Extractor form PDF")
st.subheader("Upload any non-scanned PDF for better result")


input_pdf = st.file_uploader(label="Upload your PDF here...", type='pdf')

st.markdown("### Page Number")

page_number = st.text_input(
    "Enter thr Page from where you want to extract in the PDF eg:2 ", value=1)

if input_pdf is not None:
    with open("input.pdf", "wb") as f:
        base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
        f.write(base64.b64encode(base64_pdf))
    f.close()

    table = cam.read_pdf("input.pdf, pages = page_number", flavor='stream')

    st.markdown("### Number of Tables")

    st.write(table)

    if len(table) > 0:
        option = st.slectbox(
            lable="select the Table to be displayed", option=range(len(table)+1))

        st.markdown('### Output Table')
        st.dataframe(table[int(option)-1].df)
    else:
        pass