Broadridge_AiContract / pdfDocumentProcessor.py
karthikeyan-r's picture
Create pdfDocumentProcessor.py
450bd79 verified
raw
history blame
2.63 kB
import streamlit as st
import os
import pandas as pd
import base64
from findUpdate import FindUpdate # Import the FindUpdate class
from tempfile import NamedTemporaryFile
class PDFDocumentProcessor:
def __init__(self):
self.uploaded_agreement = None
self.uploaded_template = None
self.find_update = FindUpdate() # Create an instance of FindUpdate class
def file_uploaders(self):
"""Function to handle file uploads."""
self.uploaded_agreement = st.file_uploader("Upload the PDF Agreement", type=['pdf'])
self.uploaded_template = st.file_uploader("Upload the PDF Template", type=['pdf'])
def process_files(self, agreement_path, template_path):
"""Main file processing logic."""
try:
# Use the find_update instance to call the processing function
result = self.find_update.main_processing_function(agreement_path, template_path)
st.success("Files successfully processed!")
# Convert the result dictionary to a DataFrame
df_changes = pd.DataFrame(result['changes'])
df_changes = df_changes[['section_number', 'page_number', 'actual', 'changed', 'analysis', 'type_of_change']]
# Display the DataFrame in the UI
st.dataframe(df_changes, height=600) # You can adjust height based on your needs
# Convert DataFrame to CSV for download
csv = df_changes.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode() # some browsers need base64 encoding
# href = f'<a href="data:file/csv;base64,{b64}" download="document_changes.csv">Download CSV File</a>'
st.markdown(href, unsafe_allow_html=True)
except Exception as e:
st.error(f"Error processing files: {e}")
finally:
# Clean up temporary files after processing
os.remove(agreement_path)
os.remove(template_path)
def save_uploaded_files(self):
"""Save the uploaded files temporarily for processing."""
if self.uploaded_agreement and self.uploaded_template:
with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_agreement:
temp_agreement.write(self.uploaded_agreement.read())
agreement_path = temp_agreement.name
with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_template:
temp_template.write(self.uploaded_template.read())
template_path = temp_template.name
self.process_files(agreement_path, template_path)