Files changed (1) hide show
  1. app.py +4 -70
app.py CHANGED
@@ -1,71 +1,5 @@
1
  import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import subprocess
5
- from subprocess import STDOUT, check_call
6
- import os
7
- import base64
8
- import camelot
9
-
10
- # to run this only once and it's cached
11
- @st.cache
12
- def ghostscript():
13
- """install ghostscript on the linux machine"""
14
- proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
15
- proc.wait()
16
-
17
- ghostscript()
18
-
19
- #heading
20
- html_temp = """
21
- <div style="background-color:tomato;padding:10px">
22
- <h2 style="color:white;text-align:center;">PDF Table Extractor WebApp </h2>
23
- </div>
24
- """
25
- st.markdown(html_temp,unsafe_allow_html=True)
26
-
27
-
28
- # file uploader on streamlit
29
- #st.sidebar.markdown('Upload PDF files')
30
- input_pdf = st.sidebar.file_uploader(label = "Upload PDF files here", type = 'pdf')
31
-
32
- # run this only when a PDF is uploaded
33
- if input_pdf is not None:
34
- # byte object into a PDF file
35
- with open("input.pdf", "wb") as f:
36
- base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
37
- f.write(base64.b64decode(base64_pdf))
38
- f.close()
39
-
40
- #To print uploaded pdf
41
- def show_pdf(file_path):
42
- with open(file_path,"rb") as f:
43
- base64_pdf = base64.b64encode(f.read()).decode('utf-8')
44
- pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
45
- st.markdown('## Uploaded PDF')
46
- st.markdown(pdf_display, unsafe_allow_html=True)
47
-
48
- #st.sidebar.markdown('Display Uploaded PDF')
49
- #if st.sidebar.button('Show'):
50
- #show_pdf("input.pdf")
51
-
52
- # read the pdf and parse it using stream
53
- if input_pdf is not None:
54
- table = camelot.read_pdf('input.pdf', flavor='stream',split_text = True,layout_kwargs={'detect_vertical':True},backend='poppler')
55
- df = table[0].df
56
- #df = df.dropna(axis=1, thresh=int(len(df)*0.7),inplace=True)
57
- df = pd.DataFrame(df)
58
- for i in df.index:
59
- if df['Alerted'][i]=='o':
60
- df['Alerted'][i]='NO'
61
- else:
62
- df['Alerted'][i]='YES'
63
-
64
- st.sidebar.markdown('Extract tables from PDF')
65
- if st.sidebar.button('Extract Table'):
66
- st.markdown('## Extracted table from PDF')
67
- st.dataframe(df)
68
-
69
- if input_pdf is not None:
70
- st.sidebar.markdown('Download Extracted Table as CSV file')
71
- st.sidebar.download_button("Download",df.to_csv(),file_name = 'extracted_table.csv', mime = 'text/csv')
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ st.title('Earthquake Data Explorer')
5
+ st.text('This is a web app to allow exploration of Earthquake Data')