siddop's picture
Update app.py
272185d verified
raw
history blame
4.91 kB
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras import regularizers
import tensorflow as tf
import joblib
from nltk.tokenize import word_tokenize
import re
from lime.lime_tabular import LimeTabularExplainer
from keras.utils import to_categorical
from sklearn.preprocessing import OneHotEncoder
import nltk
import gradio as gr
nltk.download('punkt')
from nltk.tokenize import word_tokenize
# label encode object columns
df = pd.read_csv("Data.csv")
df2 = df.copy()
object_cols = df2.select_dtypes(include=['object']).columns
object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
int_cols = df2.select_dtypes(exclude=['object']).columns
le_dict = {}
classes_dict = {}
for col in object_cols:
le = LabelEncoder()
df2[col] = le.fit_transform(df[col])
le_dict[col] = le
classes_dict[col] = le.classes_
X = df2.iloc[:, :-1]
y = df2.iloc[:, -1]
encoder = OneHotEncoder()
y2 = encoder.fit_transform(np.array(y).reshape(-1, 1))
y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes'])
colList = []
for col in object_cols:
colList.append(col)
for col in int_cols:
colList.append(col)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0)
# Load the model
loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
# Create a LIME explainer
explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns))
# Your machine learning model function
def predict_label(*args):
if '' in args:
return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"])
# Create empty dictionaries to hold the input data
input_dict = {}
input_df = {}
# Map inputs and col names
for i, col in enumerate(colList):
input_dict[col] = args[i]
# Rearrange columns as X df
for col in X.columns:
input_df[col] = input_dict[col]
# Add the input data to the DataFrame
input_df = pd.DataFrame([input_df], columns=input_df.keys())
# Encode labels of ibject columns
for col in le_dict:
input_df[col] = le_dict[col].transform(input_df[col])
# Scale columns
input_df = scaler.transform(input_df)
# Load the pre-trained pipeline
loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
# Make predictions
predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100
# Explain the prediction
exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19)
# Create dictionary to store top 5 influencing features
featimp = {}
for i in range(19):
for word in word_tokenize(exp.as_list()[i][0]):
if re.findall(r'[a-zA-Z]+', word):
feature = word
weight = round(exp.as_list()[i][1], 2)
if weight<=0:
featimp[feature] = 'positive impact on retention'
elif weight>0:
featimp[feature] = 'negative impact on retention'
# Convert dictionary to list of tuples for Gradio Table
featimp_table = [(key, value) for key, value in featimp.items()]
# Return prediction
if predof0>=60:
return f"Low probability ({predof1:.2f}%) of attrition", featimp_table
elif predof0>=30:
return f"Some probability ({predof1:.2f}%) of attrition", featimp_table
else:
return f"High probability ({predof1:.2f}%) of attrition", featimp_table
# Define the inputs with names and descriptions
obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols]
# Concatenate the two sets of input configurations
input_config = obj_config + int_config
# Gradio Interface
iface = gr.Interface(
title="Attrition Prediction",
description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.",
allow_flagging='never',
fn=predict_label,
inputs=input_config,
outputs=[
gr.Textbox(label="Prediction"),
gr.DataFrame(headers=["Feature", "Impact"], label="All features and their impact on retention")
],
live=False # Set live to True to see the interface while running the code
)
# Launch the Gradio interface
iface.launch(share=True)