Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| from io import BytesIO | |
| from helper import get_res_df | |
| import nltk | |
| nltk.download('wordnet') | |
| def to_excel(df): | |
| """ | |
| Convert a Pandas DataFrame to an Excel file in memory. | |
| Parameters: | |
| df (DataFrame): The DataFrame to be converted to Excel format. | |
| Returns: | |
| bytes: The in-memory Excel file data. | |
| """ | |
| output = BytesIO() | |
| with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
| df.to_excel(writer, index=False) | |
| processed_data = output.getvalue() | |
| return processed_data | |
| def validate_columns(df): | |
| required_columns = { | |
| "Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province", | |
| "Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses" | |
| } | |
| missing_columns = required_columns - set(df.columns) | |
| if missing_columns: | |
| st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}") | |
| return None | |
| else: | |
| return df | |
| def process_files(excel_file, text_file): | |
| """ | |
| Process the uploaded Excel/CSV and text files and return cleaned dataframes. | |
| Parameters: | |
| excel_file (UploadedFile): The uploaded Excel or CSV file. | |
| text_file (UploadedFile): The uploaded text file. | |
| Returns: | |
| Tuple[DataFrame, DataFrame]: A tuple containing the cleaned DataFrame from the Excel/CSV file | |
| and a DataFrame created from the text file data. | |
| """ | |
| print(excel_file, text_file) # Debugging information | |
| # Read the Excel/CSV file into a DataFrame | |
| if excel_file.name.endswith('.csv'): | |
| df_excel = pd.read_csv(excel_file) | |
| else: | |
| df_excel = pd.read_excel(excel_file) | |
| # Validate Master Sheet | |
| df_excel = validate_columns(df_excel) | |
| if df_excel is None: | |
| return None, None | |
| # Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings | |
| df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']] | |
| df_excel = df_excel.dropna(subset=['Employer Name']) | |
| # Read and process the text file content into a list of lines | |
| lines = text_file.read().decode('utf-8').splitlines() | |
| data = [line.strip().split(',') for line in lines] # Split each line by commas | |
| # Create a DataFrame from the parsed text file data | |
| df = pd.DataFrame(data) | |
| df = df[df[1].isin(['1001010071', '1001233102'])] | |
| return df_excel, df | |
| # Streamlit UI section | |
| st.title("Generate E-Batch") # Application title | |
| # Sidebar file uploader widgets to allow users to upload an Excel/CSV file and a text file | |
| with st.sidebar: | |
| st.header("Upload Files") | |
| uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"]) | |
| uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"]) | |
| threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12) | |
| # Check if both files are uploaded | |
| if uploaded_excel and uploaded_text: | |
| master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files | |
| if master_data is None and df is None: | |
| st.stop() | |
| res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function | |
| # Create a layout with four columns to display the metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Direct Match", fbdm) | |
| with col2: | |
| st.metric("Employer Match", fben) | |
| with col3: | |
| st.metric("AI Based Match", fbbm25) | |
| with col4: | |
| st.metric("No Clear Match", nf) | |
| st.write("Final Output") # Display the result of file processing | |
| st.dataframe(res) # Show the result in a table format on the web app | |
| # Convert the result DataFrame to an Excel file for download | |
| excel_data = to_excel(res) | |
| # Provide a button for the user to download the result as an Excel file | |
| st.download_button(label="Download Excel", | |
| data=excel_data, | |
| file_name='_Consolidated E-Batch for Banking_.xlsx', | |
| mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') | |