Spaces:
Sleeping
Sleeping
File size: 4,290 Bytes
2d3bc6e 09f0d23 2d3bc6e b7a3211 2d3bc6e b7a3211 2d3bc6e 891d816 4efd35b 2d3bc6e 4efd35b 2d3bc6e fc3792c 2d3bc6e fd2307e 2d3bc6e 7381cc5 2d3bc6e b7a3211 2d3bc6e 7381cc5 fd2307e fc3792c fd2307e fc3792c fd2307e fc3792c fd2307e fc3792c fd2307e 2d3bc6e 80b184f 2d3bc6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import streamlit as st
import pandas as pd
from io import BytesIO
from helper import get_res_df
import nltk
nltk.download('wordnet')
def to_excel(df):
"""
Convert a Pandas DataFrame to an Excel file in memory.
Parameters:
df (DataFrame): The DataFrame to be converted to Excel format.
Returns:
bytes: The in-memory Excel file data.
"""
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False)
processed_data = output.getvalue()
return processed_data
def validate_columns(df):
required_columns = {
"Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province",
"Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses"
}
missing_columns = required_columns - set(df.columns)
if missing_columns:
st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}")
return None
else:
return df
def process_files(excel_file, text_file):
"""
Process the uploaded Excel/CSV and text files and return cleaned dataframes.
Parameters:
excel_file (UploadedFile): The uploaded Excel or CSV file.
text_file (UploadedFile): The uploaded text file.
Returns:
Tuple[DataFrame, DataFrame]: A tuple containing the cleaned DataFrame from the Excel/CSV file
and a DataFrame created from the text file data.
"""
print(excel_file, text_file) # Debugging information
# Read the Excel/CSV file into a DataFrame
if excel_file.name.endswith('.csv'):
df_excel = pd.read_csv(excel_file)
else:
df_excel = pd.read_excel(excel_file)
# Validate Master Sheet
df_excel = validate_columns(df_excel)
if df_excel is None:
return None, None
# Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
df_excel = df_excel.dropna(subset=['Employer Name'])
# Read and process the text file content into a list of lines
lines = text_file.read().decode('utf-8').splitlines()
data = [line.strip().split(',') for line in lines] # Split each line by commas
# Create a DataFrame from the parsed text file data
df = pd.DataFrame(data)
df = df[df[1].isin(['1001010071', '1001233102'])]
return df_excel, df
# Streamlit UI section
st.title("Generate E-Batch") # Application title
# Sidebar file uploader widgets to allow users to upload an Excel/CSV file and a text file
with st.sidebar:
st.header("Upload Files")
uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])
threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12)
# Check if both files are uploaded
if uploaded_excel and uploaded_text:
master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
if master_data is None and df is None:
st.stop()
res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
# Create a layout with four columns to display the metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Direct Match", fbdm)
with col2:
st.metric("Employer Match", fben)
with col3:
st.metric("AI Based Match", fbbm25)
with col4:
st.metric("No Clear Match", nf)
st.write("Final Output") # Display the result of file processing
st.dataframe(res) # Show the result in a table format on the web app
# Convert the result DataFrame to an Excel file for download
excel_data = to_excel(res)
# Provide a button for the user to download the result as an Excel file
st.download_button(label="Download Excel",
data=excel_data,
file_name='_Consolidated E-Batch for Banking_.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
|