Broadridge_AiContract

Sleeping

App Files Files Community

Broadridge_AiContract / pdfDocumentProcessor.py

karthikeyan-r

Create pdfDocumentProcessor.py

450bd79 verified 4 months ago

raw

history blame

2.63 kB

	import streamlit as st
	import os
	import pandas as pd
	import base64
	from findUpdate import FindUpdate # Import the FindUpdate class
	from tempfile import NamedTemporaryFile

	class PDFDocumentProcessor:
	def __init__(self):
	self.uploaded_agreement = None
	self.uploaded_template = None
	self.find_update = FindUpdate() # Create an instance of FindUpdate class

	def file_uploaders(self):
	"""Function to handle file uploads."""
	self.uploaded_agreement = st.file_uploader("Upload the PDF Agreement", type=['pdf'])
	self.uploaded_template = st.file_uploader("Upload the PDF Template", type=['pdf'])

	def process_files(self, agreement_path, template_path):
	"""Main file processing logic."""
	try:
	# Use the find_update instance to call the processing function
	result = self.find_update.main_processing_function(agreement_path, template_path)
	st.success("Files successfully processed!")

	# Convert the result dictionary to a DataFrame
	df_changes = pd.DataFrame(result['changes'])
	df_changes = df_changes[['section_number', 'page_number', 'actual', 'changed', 'analysis', 'type_of_change']]

	# Display the DataFrame in the UI
	st.dataframe(df_changes, height=600) # You can adjust height based on your needs

	# Convert DataFrame to CSV for download
	csv = df_changes.to_csv(index=False)
	b64 = base64.b64encode(csv.encode()).decode() # some browsers need base64 encoding
	# href = f'<a href="data:file/csv;base64,{b64}" download="document_changes.csv">Download CSV File</a>'
	st.markdown(href, unsafe_allow_html=True)

	except Exception as e:
	st.error(f"Error processing files: {e}")
	finally:
	# Clean up temporary files after processing
	os.remove(agreement_path)
	os.remove(template_path)

	def save_uploaded_files(self):
	"""Save the uploaded files temporarily for processing."""
	if self.uploaded_agreement and self.uploaded_template:
	with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_agreement:
	temp_agreement.write(self.uploaded_agreement.read())
	agreement_path = temp_agreement.name

	with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_template:
	temp_template.write(self.uploaded_template.read())
	template_path = temp_template.name

	self.process_files(agreement_path, template_path)