Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from io import BytesIO | |
from helper import get_res_df | |
import nltk | |
nltk.download('wordnet') | |
def to_excel(df): | |
""" | |
Convert a Pandas DataFrame to an Excel file in memory. | |
Parameters: | |
df (DataFrame): The DataFrame to be converted to Excel format. | |
Returns: | |
bytes: The in-memory Excel file data. | |
""" | |
output = BytesIO() | |
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
df.to_excel(writer, index=False) | |
processed_data = output.getvalue() | |
return processed_data | |
def validate_columns(df): | |
required_columns = { | |
"Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province", | |
"Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses" | |
} | |
missing_columns = required_columns - set(df.columns) | |
if missing_columns: | |
st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}") | |
return None | |
else: | |
return df | |
def process_files(excel_file, text_file): | |
""" | |
Process the uploaded Excel/CSV and text files and return cleaned dataframes. | |
Parameters: | |
excel_file (UploadedFile): The uploaded Excel or CSV file. | |
text_file (UploadedFile): The uploaded text file. | |
Returns: | |
Tuple[DataFrame, DataFrame]: A tuple containing the cleaned DataFrame from the Excel/CSV file | |
and a DataFrame created from the text file data. | |
""" | |
print(excel_file, text_file) # Debugging information | |
# Read the Excel/CSV file into a DataFrame | |
if excel_file.name.endswith('.csv'): | |
df_excel = pd.read_csv(excel_file) | |
else: | |
df_excel = pd.read_excel(excel_file) | |
# Validate Master Sheet | |
df_excel = validate_columns(df_excel) | |
if df_excel is None: | |
return None, None | |
# Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings | |
df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']] | |
df_excel = df_excel.dropna(subset=['Employer Name']) | |
# Read and process the text file content into a list of lines | |
lines = text_file.read().decode('utf-8').splitlines() | |
data = [line.strip().split(',') for line in lines] # Split each line by commas | |
# Create a DataFrame from the parsed text file data | |
df = pd.DataFrame(data) | |
df = df[df[1].isin(['1001010071', '1001233102'])] | |
return df_excel, df | |
# Streamlit UI section | |
st.title("Generate E-Batch") # Application title | |
# Sidebar file uploader widgets to allow users to upload an Excel/CSV file and a text file | |
with st.sidebar: | |
st.header("Upload Files") | |
uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"]) | |
uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"]) | |
threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12) | |
# Check if both files are uploaded | |
if uploaded_excel and uploaded_text: | |
master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files | |
if master_data is None and df is None: | |
st.stop() | |
res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function | |
# Create a layout with four columns to display the metrics | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Direct Match", fbdm) | |
with col2: | |
st.metric("Employer Match", fben) | |
with col3: | |
st.metric("AI Based Match", fbbm25) | |
with col4: | |
st.metric("No Clear Match", nf) | |
st.write("Final Output") # Display the result of file processing | |
st.dataframe(res) # Show the result in a table format on the web app | |
# Convert the result DataFrame to an Excel file for download | |
excel_data = to_excel(res) | |
# Provide a button for the user to download the result as an Excel file | |
st.download_button(label="Download Excel", | |
data=excel_data, | |
file_name='_Consolidated E-Batch for Banking_.xlsx', | |
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') | |