alimghori commited on
Commit
92e2de0
1 Parent(s): d0eaa5a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import camelot as cam
3
+ import subprocess
4
+ from subprocess import STDOUT, check_call
5
+ import os
6
+ import base64
7
+
8
+
9
+ # Enable at the time of Hosting in any Linux based server
10
+
11
+
12
+ @st.cache
13
+ def gh():
14
+ proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None,
15
+ stdout=open(os.devnull, "wb"), stderr=STDOUT, executable="/bin/bash")
16
+ proc.wait()
17
+
18
+
19
+ gh()
20
+
21
+
22
+ st.title("Table Extractor form PDF")
23
+ st.subheader("Upload any non-scanned PDF for better result")
24
+
25
+
26
+ input_pdf = st.file_uploader(label="Upload your PDF here...", type='pdf')
27
+
28
+ st.markdown("### Page Number")
29
+
30
+ page_number = st.text_input(
31
+ "Enter thr Page from where you want to extract in the PDF eg:2 ", value=1)
32
+
33
+ if input_pdf is not None:
34
+ with open("input.pdf", "wb") as f:
35
+ base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
36
+ f.write(base64.b64encode(base64_pdf))
37
+ f.close()
38
+
39
+ table = cam.read_pdf("input.pdf, pages = page_number", flavor='stream')
40
+
41
+ st.markdown("### Number of Tables")
42
+
43
+ st.write(table)
44
+
45
+ if len(table) > 0:
46
+ option = st.slectbox(
47
+ lable="select the Table to be displayed", option=range(len(table)+1))
48
+
49
+ st.markdown('### Output Table')
50
+ st.dataframe(table[int(option)-1].df)
51
+ else:
52
+ pass