Spaces:

ashcodes
/

test

Runtime error

App Files Files Community

test / app.py

ashcodes

Upload 2 files

d317fef over 1 year ago

raw history blame contribute delete

No virus

2.2 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import subprocess
	from subprocess import STDOUT, check_call
	import os
	import base64
	import camelot

	# to run this only once and it's cached
	@st.cache
	def ghostscript():
	"""install ghostscript on the linux machine"""
	proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
	proc.wait()

	ghostscript()

	#heading
	html_temp = """
	<div style="background-color:tomato;padding:10px">
	<h2 style="color:white;text-align:center;">PDF Table Extractor WebApp </h2>
	</div>
	"""
	st.markdown(html_temp,unsafe_allow_html=True)


	# file uploader on streamlit
	#st.sidebar.markdown('Upload PDF files')
	input_pdf = st.sidebar.file_uploader(label = "Upload PDF files here", type = 'pdf')

	# run this only when a PDF is uploaded
	if input_pdf is not None:
	# byte object into a PDF file
	with open("input.pdf", "wb") as f:
	base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
	f.write(base64.b64decode(base64_pdf))
	f.close()

	#To print uploaded pdf
	def show_pdf(file_path):
	with open(file_path,"rb") as f:
	base64_pdf = base64.b64encode(f.read()).decode('utf-8')
	pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
	st.markdown('## Uploaded PDF')
	st.markdown(pdf_display, unsafe_allow_html=True)

	#st.sidebar.markdown('Display Uploaded PDF')
	#if st.sidebar.button('Show'):
	#show_pdf("input.pdf")

	# read the pdf and parse it using stream
	if input_pdf is not None:
	table = camelot.read_pdf('input.pdf', flavor='stream',layout_kwargs={'detect_vertical':True},backend='poppler')
	csv_table = table[0].df

	st.sidebar.markdown('Extract tables from PDF')
	if st.sidebar.button('Extract Table'):
	st.markdown('## Extracted table from PDF')
	st.dataframe(csv_table)

	if input_pdf is not None:
	st.sidebar.markdown('Download Extracted Table as CSV file')
	st.sidebar.download_button("Download",csv_table.to_csv(),file_name = 'extracted_table.csv', mime = 'text/csv')