Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import LabelEncoder, MinMaxScaler | |
from tensorflow import keras | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import r2_score | |
from keras.models import Sequential | |
from keras.layers import Dense, Dropout, BatchNormalization | |
from keras import regularizers | |
import tensorflow as tf | |
import joblib | |
from nltk.tokenize import word_tokenize | |
import re | |
from lime.lime_tabular import LimeTabularExplainer | |
from keras.utils import to_categorical | |
from sklearn.preprocessing import OneHotEncoder | |
import nltk | |
import gradio as gr | |
nltk.download('punkt') | |
from nltk.tokenize import word_tokenize | |
# label encode object columns | |
df = pd.read_csv("Data.csv") | |
df2 = df.copy() | |
object_cols = df2.select_dtypes(include=['object']).columns | |
object_cols = object_cols.delete(object_cols.get_loc('Attrition')) | |
int_cols = df2.select_dtypes(exclude=['object']).columns | |
le_dict = {} | |
classes_dict = {} | |
for col in object_cols: | |
le = LabelEncoder() | |
df2[col] = le.fit_transform(df[col]) | |
le_dict[col] = le | |
classes_dict[col] = le.classes_ | |
X = df2.iloc[:, :-1] | |
y = df2.iloc[:, -1] | |
encoder = OneHotEncoder() | |
y2 = encoder.fit_transform(np.array(y).reshape(-1, 1)) | |
y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes']) | |
colList = [] | |
for col in object_cols: | |
colList.append(col) | |
for col in int_cols: | |
colList.append(col) | |
scaler = MinMaxScaler() | |
X_scaled = scaler.fit_transform(X) | |
# Split the data into training and test sets | |
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0) | |
# Load the model | |
loaded_model = tf.keras.models.load_model('Final_NN_model.keras') | |
# Create a LIME explainer | |
explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns)) | |
# Your machine learning model function | |
def predict_label(*args): | |
if '' in args: | |
return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"]) | |
# Create empty dictionaries to hold the input data | |
input_dict = {} | |
input_df = {} | |
# Map inputs and col names | |
for i, col in enumerate(colList): | |
input_dict[col] = args[i] | |
# Rearrange columns as X df | |
for col in X.columns: | |
input_df[col] = input_dict[col] | |
# Add the input data to the DataFrame | |
input_df = pd.DataFrame([input_df], columns=input_df.keys()) | |
# Encode labels of ibject columns | |
for col in le_dict: | |
input_df[col] = le_dict[col].transform(input_df[col]) | |
# Scale columns | |
input_df = scaler.transform(input_df) | |
# Load the pre-trained pipeline | |
loaded_model = tf.keras.models.load_model('Final_NN_model.keras') | |
# Make predictions | |
predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100 | |
predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100 | |
# Explain the prediction | |
exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19) | |
# Create dictionary to store top 5 influencing features | |
featimp = {} | |
for i in range(19): | |
for word in word_tokenize(exp.as_list()[i][0]): | |
if re.findall(r'[a-zA-Z]+', word): | |
feature = word | |
weight = round(exp.as_list()[i][1], 2) | |
if weight<=0: | |
featimp[feature] = 'positive impact on retention' | |
elif weight>0: | |
featimp[feature] = 'negative impact on retention' | |
# Convert dictionary to list of tuples for Gradio Table | |
featimp_table = [(key, value) for key, value in featimp.items()] | |
# Return prediction | |
if predof0>=60: | |
return f"Low probability ({predof1:.2f}%) of attrition", featimp_table | |
elif predof0>=30: | |
return f"Some probability ({predof1:.2f}%) of attrition", featimp_table | |
else: | |
return f"High probability ({predof1:.2f}%) of attrition", featimp_table | |
# Define the inputs with names and descriptions | |
obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols] | |
int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols] | |
# Concatenate the two sets of input configurations | |
input_config = obj_config + int_config | |
# Gradio Interface | |
iface = gr.Interface( | |
title="Attrition Prediction", | |
description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.", | |
allow_flagging='never', | |
fn=predict_label, | |
inputs=input_config, | |
outputs=[ | |
gr.Textbox(label="Prediction"), | |
gr.DataFrame(headers=["Feature", "Impact"], label="All features and their impact on retention") | |
], | |
live=False # Set live to True to see the interface while running the code | |
) | |
# Launch the Gradio interface | |
iface.launch(share=True) | |