Spaces:

chandanzeon
/

Fetch_Employer_Name

Sleeping

App Files Files Community

Fetch_Employer_Name / app.py

chandanzeon

Validation For Master file

b7a3211 4 months ago

raw

history blame contribute delete

4.29 kB

	import streamlit as st
	import pandas as pd
	from io import BytesIO
	from helper import get_res_df
	import nltk

	nltk.download('wordnet')

	def to_excel(df):
	"""
	Convert a Pandas DataFrame to an Excel file in memory.

	Parameters:
	df (DataFrame): The DataFrame to be converted to Excel format.

	Returns:
	bytes: The in-memory Excel file data.
	"""
	output = BytesIO()
	with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
	df.to_excel(writer, index=False)
	processed_data = output.getvalue()
	return processed_data

	def validate_columns(df):
	required_columns = {
	"Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province",
	"Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses"
	}
	missing_columns = required_columns - set(df.columns)

	if missing_columns:
	st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}")
	return None
	else:
	return df

	def process_files(excel_file, text_file):
	"""
	Process the uploaded Excel/CSV and text files and return cleaned dataframes.

	Parameters:
	excel_file (UploadedFile): The uploaded Excel or CSV file.
	text_file (UploadedFile): The uploaded text file.

	Returns:
	Tuple[DataFrame, DataFrame]: A tuple containing the cleaned DataFrame from the Excel/CSV file
	and a DataFrame created from the text file data.
	"""
	print(excel_file, text_file) # Debugging information

	# Read the Excel/CSV file into a DataFrame
	if excel_file.name.endswith('.csv'):
	df_excel = pd.read_csv(excel_file)
	else:
	df_excel = pd.read_excel(excel_file)

	# Validate Master Sheet
	df_excel = validate_columns(df_excel)
	if df_excel is None:
	return None, None

	# Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
	df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
	df_excel = df_excel.dropna(subset=['Employer Name'])

	# Read and process the text file content into a list of lines
	lines = text_file.read().decode('utf-8').splitlines()
	data = [line.strip().split(',') for line in lines] # Split each line by commas

	# Create a DataFrame from the parsed text file data
	df = pd.DataFrame(data)
	df = df[df[1].isin(['1001010071', '1001233102'])]

	return df_excel, df


	# Streamlit UI section
	st.title("Generate E-Batch") # Application title

	# Sidebar file uploader widgets to allow users to upload an Excel/CSV file and a text file
	with st.sidebar:
	st.header("Upload Files")
	uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
	uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])

	threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12)

	# Check if both files are uploaded
	if uploaded_excel and uploaded_text:
	master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
	if master_data is None and df is None:
	st.stop()

	res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function

	# Create a layout with four columns to display the metrics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Direct Match", fbdm)
	with col2:
	st.metric("Employer Match", fben)
	with col3:
	st.metric("AI Based Match", fbbm25)
	with col4:
	st.metric("No Clear Match", nf)

	st.write("Final Output") # Display the result of file processing
	st.dataframe(res) # Show the result in a table format on the web app

	# Convert the result DataFrame to an Excel file for download
	excel_data = to_excel(res)

	# Provide a button for the user to download the result as an Excel file
	st.download_button(label="Download Excel",
	data=excel_data,
	file_name='_Consolidated E-Batch for Banking_.xlsx',
	mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')