import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder, MinMaxScaler from tensorflow import keras from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score from keras.models import Sequential from keras.layers import Dense, Dropout, BatchNormalization from keras import regularizers import tensorflow as tf import joblib import re from lime.lime_tabular import LimeTabularExplainer import gradio as gr import nltk nltk.download('punkt') from nltk.tokenize import word_tokenize # label encode object columns le_dict = {} df = pd.read_csv("Data.csv") df2 = df.copy() object_cols = df2.select_dtypes(include=['object']).columns object_cols = object_cols.delete(object_cols.get_loc('Attrition')) int_cols = df2.select_dtypes(exclude=['object']).columns for col in object_cols: le = LabelEncoder() df2[col] = le.fit_transform(df[col]) le_dict[col] = le X = df2.iloc[:, :-1] y = df2.iloc[:, -1] colList = [] for col in object_cols: colList.append(col) for col in int_cols: colList.append(col) classes_dict = {} for col in object_cols: le_col = LabelEncoder() df2[col] = le_col.fit_transform(df[col]) classes_dict[col] = le_col.classes_ scaler = MinMaxScaler() X_scaled = scaler.fit_transform(X) # Split the data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0) # Load the model loaded_model = tf.keras.models.load_model('Final_NN_model.keras') # Create a LIME explainer explainer = LimeTabularExplainer(X_scaled, mode="classification", feature_names=X.columns) # Your machine learning model function def predict_label(*args): if '' in args: return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Importance"]) # Create empty dictionaries to hold the input data input_dict = {} input_df = {} # Map inputs and col names for i, col in enumerate(colList): input_dict[col] = args[i] # Rearrange columns as X df for col in X.columns: input_df[col] = input_dict[col] # Add the input data to the DataFrame input_df = pd.DataFrame([input_df], columns=input_df.keys()) # Encode labels of ibject columns for col in le_dict: input_df[col] = le_dict[col].transform(input_df[col]) # Scale columns input_df = scaler.transform(input_df) # Load the pre-trained pipeline loaded_model = tf.keras.models.load_model('Final_NN_model.keras') # Make predictions pred = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100 # Explain the prediction exp = explainer.explain_instance(input_df[0], loaded_model.predict, labels=(0, ), num_features=len(X.columns)) # Create dictionary to store top 5 influencing features top5 = {} for i in range(5): for word in word_tokenize(exp.as_list(0)[i][0]): if re.findall(r'[a-zA-Z]+', word): feature = word weight = round(exp.as_list(0)[i][1], 2) top5[feature] = weight # Convert dictionary to list of tuples for Gradio Table top5_table = [(key, value) for key, value in top5.items()] # top5_table = pd.DataFrame(top5_table, columns=["Feature", "Importance"]) # Return prediction if pred<=30: return f"Low probability ({pred:.2f}%) of attrition", top5_table elif pred<=70: return f"Some probability ({pred:.2f}%) of attrition", top5_table else: return f"High probability ({pred:.2f}%) of attrition", top5_table # Define the inputs with names and descriptions obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols] int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols] # Concatenate the two sets of input configurations input_config = obj_config + int_config # Gradio Interface iface = gr.Interface( title="Attrition Prediction", description = "This app predicts if an employee in your organisation would resign or not. The values shown under top features shows influence of each feature on the prediction. A higher number indicates that the feature is more influential in determining the prediction, while a lower number indicates less influence.", allow_flagging='never', fn=predict_label, inputs=input_config, outputs=[ gr.Textbox(label="Prediction"), gr.DataFrame(headers=["Feature", "Importance"], label="Top 5 featured influencing prediction") ], live=False # Set live to True to see the interface while running the code ) # Launch the Gradio interface iface.launch(share=True)