import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder, MinMaxScaler from tensorflow import keras from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score from keras.models import Sequential from keras.layers import Dense, Dropout, BatchNormalization from keras import regularizers import tensorflow as tf import joblib from nltk.tokenize import word_tokenize import re from lime.lime_tabular import LimeTabularExplainer from keras.utils import to_categorical from sklearn.preprocessing import OneHotEncoder import nltk import gradio as gr nltk.download('punkt') from nltk.tokenize import word_tokenize # label encode object columns df = pd.read_csv("Data.csv") df2 = df.copy() object_cols = df2.select_dtypes(include=['object']).columns object_cols = object_cols.delete(object_cols.get_loc('Attrition')) int_cols = df2.select_dtypes(exclude=['object']).columns le_dict = {} classes_dict = {} for col in object_cols: le = LabelEncoder() df2[col] = le.fit_transform(df[col]) le_dict[col] = le classes_dict[col] = le.classes_ X = df2.iloc[:, :-1] y = df2.iloc[:, -1] encoder = OneHotEncoder() y2 = encoder.fit_transform(np.array(y).reshape(-1, 1)) y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes']) colList = [] for col in object_cols: colList.append(col) for col in int_cols: colList.append(col) scaler = MinMaxScaler() X_scaled = scaler.fit_transform(X) # Split the data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0) # Load the model loaded_model = tf.keras.models.load_model('Final_NN_model.keras') # Create a LIME explainer explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns)) # Your machine learning model function def predict_label(*args): if '' in args: return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"]) # Create empty dictionaries to hold the input data input_dict = {} input_df = {} # Map inputs and col names for i, col in enumerate(colList): input_dict[col] = args[i] # Rearrange columns as X df for col in X.columns: input_df[col] = input_dict[col] # Add the input data to the DataFrame input_df = pd.DataFrame([input_df], columns=input_df.keys()) # Encode labels of ibject columns for col in le_dict: input_df[col] = le_dict[col].transform(input_df[col]) # Scale columns input_df = scaler.transform(input_df) # Load the pre-trained pipeline loaded_model = tf.keras.models.load_model('Final_NN_model.keras') # Make predictions predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100 predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100 # Explain the prediction exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19) # Create dictionary to store top 5 influencing features featimp = {} for i in range(19): for word in word_tokenize(exp.as_list()[i][0]): if re.findall(r'[a-zA-Z]+', word): feature = word weight = round(exp.as_list()[i][1], 2) if weight<=0: featimp[feature] = 'positive impact on retention' elif weight>0: featimp[feature] = 'negative impact on retention' # Convert dictionary to list of tuples for Gradio Table featimp_table = [(key, value) for key, value in featimp.items()] # Return prediction if predof0>=60: return f"Low probability ({predof1:.2f}%) of attrition", featimp_table elif predof0>=30: return f"Some probability ({predof1:.2f}%) of attrition", featimp_table else: return f"High probability ({predof1:.2f}%) of attrition", featimp_table # Define the inputs with names and descriptions obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols] int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols] # Concatenate the two sets of input configurations input_config = obj_config + int_config # Gradio Interface iface = gr.Interface( title="Attrition Prediction", description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.", allow_flagging='never', fn=predict_label, inputs=input_config, outputs=[ gr.Textbox(label="Prediction"), gr.DataFrame(headers=["Feature", "Impact"], label="All features and their impact on retention") ], live=False # Set live to True to see the interface while running the code ) # Launch the Gradio interface iface.launch(share=True)