Spaces:

ashcodes
/

pdf-table-extractor-tabula

Build error

App Files Files Community

Upload app.py

by Mitali295 - opened Oct 25, 2022

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

-70

Files changed (1) hide show

app.py +4 -70

app.py CHANGED Viewed

@@ -1,71 +1,5 @@
 import streamlit as st
-import numpy as np
-import pandas as pd
-import subprocess
-from subprocess import STDOUT, check_call
-import os
-import base64
-import camelot
-# to run this only once and it's cached
-@st.cache
-def ghostscript():
-    """install ghostscript on the linux machine"""
-    proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
-    proc.wait()
-ghostscript()
-#heading
-html_temp = """
-    <div style="background-color:tomato;padding:10px">
-    <h2 style="color:white;text-align:center;">PDF Table Extractor WebApp </h2>
-    </div>
-    """
-st.markdown(html_temp,unsafe_allow_html=True)
-# file uploader on streamlit
-#st.sidebar.markdown('Upload PDF files')
-input_pdf = st.sidebar.file_uploader(label = "Upload PDF files here", type = 'pdf')
-# run this only when a PDF is uploaded
-if input_pdf is not None:
-    # byte object into a PDF file
-    with open("input.pdf", "wb") as f:
-        base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
-        f.write(base64.b64decode(base64_pdf))
-    f.close()
-#To print uploaded pdf
-def show_pdf(file_path):
-    with open(file_path,"rb") as f:
-        base64_pdf = base64.b64encode(f.read()).decode('utf-8')
-        pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
-    st.markdown('## Uploaded PDF')
-    st.markdown(pdf_display, unsafe_allow_html=True)
-#st.sidebar.markdown('Display Uploaded PDF')
-#if st.sidebar.button('Show'):
-    #show_pdf("input.pdf")
-# read the pdf and parse it using stream
-if input_pdf is not None:
-    table = camelot.read_pdf('input.pdf', flavor='stream',split_text = True,layout_kwargs={'detect_vertical':True},backend='poppler')
-    df = table[0].df
-    #df = df.dropna(axis=1, thresh=int(len(df)*0.7),inplace=True)
-    df = pd.DataFrame(df)
-    for i in df.index:
-        if df['Alerted'][i]=='o':
-            df['Alerted'][i]='NO'
-        else:
-            df['Alerted'][i]='YES'
-st.sidebar.markdown('Extract tables from PDF')
-if st.sidebar.button('Extract Table'):
-    st.markdown('## Extracted table from PDF')
-    st.dataframe(df)
-if input_pdf is not None:
-    st.sidebar.markdown('Download Extracted Table as CSV file')
-    st.sidebar.download_button("Download",df.to_csv(),file_name = 'extracted_table.csv', mime = 'text/csv')

 import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+st.title('Earthquake Data Explorer')
+st.text('This is a web app to allow exploration of Earthquake Data')