siddop's picture
Update app.py
272185d verified
raw
history blame contribute delete
No virus
4.91 kB
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras import regularizers
import tensorflow as tf
import joblib
from nltk.tokenize import word_tokenize
import re
from lime.lime_tabular import LimeTabularExplainer
from keras.utils import to_categorical
from sklearn.preprocessing import OneHotEncoder
import nltk
import gradio as gr
nltk.download('punkt')
from nltk.tokenize import word_tokenize
# label encode object columns
df = pd.read_csv("Data.csv")
df2 = df.copy()
object_cols = df2.select_dtypes(include=['object']).columns
object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
int_cols = df2.select_dtypes(exclude=['object']).columns
le_dict = {}
classes_dict = {}
for col in object_cols:
le = LabelEncoder()
df2[col] = le.fit_transform(df[col])
le_dict[col] = le
classes_dict[col] = le.classes_
X = df2.iloc[:, :-1]
y = df2.iloc[:, -1]
encoder = OneHotEncoder()
y2 = encoder.fit_transform(np.array(y).reshape(-1, 1))
y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes'])
colList = []
for col in object_cols:
colList.append(col)
for col in int_cols:
colList.append(col)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0)
# Load the model
loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
# Create a LIME explainer
explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns))
# Your machine learning model function
def predict_label(*args):
if '' in args:
return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"])
# Create empty dictionaries to hold the input data
input_dict = {}
input_df = {}
# Map inputs and col names
for i, col in enumerate(colList):
input_dict[col] = args[i]
# Rearrange columns as X df
for col in X.columns:
input_df[col] = input_dict[col]
# Add the input data to the DataFrame
input_df = pd.DataFrame([input_df], columns=input_df.keys())
# Encode labels of ibject columns
for col in le_dict:
input_df[col] = le_dict[col].transform(input_df[col])
# Scale columns
input_df = scaler.transform(input_df)
# Load the pre-trained pipeline
loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
# Make predictions
predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100
# Explain the prediction
exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19)
# Create dictionary to store top 5 influencing features
featimp = {}
for i in range(19):
for word in word_tokenize(exp.as_list()[i][0]):
if re.findall(r'[a-zA-Z]+', word):
feature = word
weight = round(exp.as_list()[i][1], 2)
if weight<=0:
featimp[feature] = 'positive impact on retention'
elif weight>0:
featimp[feature] = 'negative impact on retention'
# Convert dictionary to list of tuples for Gradio Table
featimp_table = [(key, value) for key, value in featimp.items()]
# Return prediction
if predof0>=60:
return f"Low probability ({predof1:.2f}%) of attrition", featimp_table
elif predof0>=30:
return f"Some probability ({predof1:.2f}%) of attrition", featimp_table
else:
return f"High probability ({predof1:.2f}%) of attrition", featimp_table
# Define the inputs with names and descriptions
obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols]
# Concatenate the two sets of input configurations
input_config = obj_config + int_config
# Gradio Interface
iface = gr.Interface(
title="Attrition Prediction",
description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.",
allow_flagging='never',
fn=predict_label,
inputs=input_config,
outputs=[
gr.Textbox(label="Prediction"),
gr.DataFrame(headers=["Feature", "Impact"], label="All features and their impact on retention")
],
live=False # Set live to True to see the interface while running the code
)
# Launch the Gradio interface
iface.launch(share=True)