Spaces:

siddop
/

Attrition_Predictor

Sleeping

App Files Files Community

Attrition_Predictor / app.py

siddop

Update app.py

272185d verified 7 months ago

raw

history blame

4.91 kB

	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import LabelEncoder, MinMaxScaler
	from tensorflow import keras
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import r2_score
	from keras.models import Sequential
	from keras.layers import Dense, Dropout, BatchNormalization
	from keras import regularizers
	import tensorflow as tf
	import joblib
	from nltk.tokenize import word_tokenize
	import re
	from lime.lime_tabular import LimeTabularExplainer
	from keras.utils import to_categorical
	from sklearn.preprocessing import OneHotEncoder
	import nltk
	import gradio as gr
	nltk.download('punkt')
	from nltk.tokenize import word_tokenize

	# label encode object columns
	df = pd.read_csv("Data.csv")
	df2 = df.copy()

	object_cols = df2.select_dtypes(include=['object']).columns
	object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
	int_cols = df2.select_dtypes(exclude=['object']).columns

	le_dict = {}
	classes_dict = {}
	for col in object_cols:
	le = LabelEncoder()
	df2[col] = le.fit_transform(df[col])
	le_dict[col] = le
	classes_dict[col] = le.classes_

	X = df2.iloc[:, :-1]
	y = df2.iloc[:, -1]

	encoder = OneHotEncoder()
	y2 = encoder.fit_transform(np.array(y).reshape(-1, 1))
	y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes'])


	colList = []
	for col in object_cols:
	colList.append(col)
	for col in int_cols:
	colList.append(col)


	scaler = MinMaxScaler()
	X_scaled = scaler.fit_transform(X)

	# Split the data into training and test sets
	X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0)

	# Load the model
	loaded_model = tf.keras.models.load_model('Final_NN_model.keras')

	# Create a LIME explainer
	explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns))

	# Your machine learning model function
	def predict_label(*args):
	if '' in args:
	return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"])

	# Create empty dictionaries to hold the input data
	input_dict = {}
	input_df = {}

	# Map inputs and col names
	for i, col in enumerate(colList):
	input_dict[col] = args[i]

	# Rearrange columns as X df
	for col in X.columns:
	input_df[col] = input_dict[col]

	# Add the input data to the DataFrame
	input_df = pd.DataFrame([input_df], columns=input_df.keys())

	# Encode labels of ibject columns
	for col in le_dict:
	input_df[col] = le_dict[col].transform(input_df[col])

	# Scale columns
	input_df = scaler.transform(input_df)

	# Load the pre-trained pipeline
	loaded_model = tf.keras.models.load_model('Final_NN_model.keras')

	# Make predictions
	predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
	predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100

	# Explain the prediction
	exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19)

	# Create dictionary to store top 5 influencing features
	featimp = {}
	for i in range(19):
	for word in word_tokenize(exp.as_list()[i][0]):
	if re.findall(r'[a-zA-Z]+', word):
	feature = word
	weight = round(exp.as_list()[i][1], 2)
	if weight<=0:
	featimp[feature] = 'positive impact on retention'
	elif weight>0:
	featimp[feature] = 'negative impact on retention'

	# Convert dictionary to list of tuples for Gradio Table
	featimp_table = [(key, value) for key, value in featimp.items()]

	# Return prediction
	if predof0>=60:
	return f"Low probability ({predof1:.2f}%) of attrition", featimp_table
	elif predof0>=30:
	return f"Some probability ({predof1:.2f}%) of attrition", featimp_table
	else:
	return f"High probability ({predof1:.2f}%) of attrition", featimp_table

	# Define the inputs with names and descriptions
	obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
	int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols]

	# Concatenate the two sets of input configurations
	input_config = obj_config + int_config

	# Gradio Interface
	iface = gr.Interface(
	title="Attrition Prediction",
	description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.",
	allow_flagging='never',
	fn=predict_label,
	inputs=input_config,
	outputs=[
	gr.Textbox(label="Prediction"),
	gr.DataFrame(headers=["Feature", "Impact"], label="All features and their impact on retention")
	],
	live=False # Set live to True to see the interface while running the code
	)

	# Launch the Gradio interface
	iface.launch(share=True)