Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import LabelEncoder, MinMaxScaler | |
| from tensorflow import keras | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import r2_score | |
| from keras.models import Sequential | |
| from keras.layers import Dense, Dropout, BatchNormalization | |
| from keras import regularizers | |
| import tensorflow as tf | |
| import joblib | |
| from nltk.tokenize import word_tokenize | |
| import re | |
| from lime.lime_tabular import LimeTabularExplainer | |
| from keras.utils import to_categorical | |
| from sklearn.preprocessing import OneHotEncoder | |
| import nltk | |
| import gradio as gr | |
| nltk.download('punkt') | |
| from nltk.tokenize import word_tokenize | |
| # label encode object columns | |
| df = pd.read_csv("Data.csv") | |
| df2 = df.copy() | |
| object_cols = df2.select_dtypes(include=['object']).columns | |
| object_cols = object_cols.delete(object_cols.get_loc('Attrition')) | |
| int_cols = df2.select_dtypes(exclude=['object']).columns | |
| le_dict = {} | |
| classes_dict = {} | |
| for col in object_cols: | |
| le = LabelEncoder() | |
| df2[col] = le.fit_transform(df[col]) | |
| le_dict[col] = le | |
| classes_dict[col] = le.classes_ | |
| X = df2.iloc[:, :-1] | |
| y = df2.iloc[:, -1] | |
| encoder = OneHotEncoder() | |
| y2 = encoder.fit_transform(np.array(y).reshape(-1, 1)) | |
| y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes']) | |
| colList = [] | |
| for col in object_cols: | |
| colList.append(col) | |
| for col in int_cols: | |
| colList.append(col) | |
| scaler = MinMaxScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # Split the data into training and test sets | |
| X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0) | |
| # Load the model | |
| loaded_model = tf.keras.models.load_model('Final_NN_model.keras') | |
| # Create a LIME explainer | |
| explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns)) | |
| # Your machine learning model function | |
| def predict_label(*args): | |
| if '' in args: | |
| return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"]) | |
| # Create empty dictionaries to hold the input data | |
| input_dict = {} | |
| input_df = {} | |
| # Map inputs and col names | |
| for i, col in enumerate(colList): | |
| input_dict[col] = args[i] | |
| # Rearrange columns as X df | |
| for col in X.columns: | |
| input_df[col] = input_dict[col] | |
| # Add the input data to the DataFrame | |
| input_df = pd.DataFrame([input_df], columns=input_df.keys()) | |
| # Encode labels of ibject columns | |
| for col in le_dict: | |
| input_df[col] = le_dict[col].transform(input_df[col]) | |
| # Scale columns | |
| input_df = scaler.transform(input_df) | |
| # Load the pre-trained pipeline | |
| loaded_model = tf.keras.models.load_model('Final_NN_model.keras') | |
| # Make predictions | |
| predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100 | |
| predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100 | |
| # Explain the prediction | |
| exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19) | |
| # Create dictionary to store top 5 influencing features | |
| featimp = {} | |
| for i in range(19): | |
| for word in word_tokenize(exp.as_list()[i][0]): | |
| if re.findall(r'[a-zA-Z]+', word): | |
| feature = word | |
| weight = round(exp.as_list()[i][1], 2) | |
| if weight<=0: | |
| featimp[feature] = 'positive impact on retention' | |
| elif weight>0: | |
| featimp[feature] = 'negative impact on retention' | |
| # Convert dictionary to list of tuples for Gradio Table | |
| featimp_table = [(key, value) for key, value in featimp.items()] | |
| # Return prediction | |
| if predof0>=60: | |
| return f"Low probability ({predof1:.2f}%) of attrition", featimp_table | |
| elif predof0>=30: | |
| return f"Some probability ({predof1:.2f}%) of attrition", featimp_table | |
| else: | |
| return f"High probability ({predof1:.2f}%) of attrition", featimp_table | |
| # Define the inputs with names and descriptions | |
| obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols] | |
| int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols] | |
| # Concatenate the two sets of input configurations | |
| input_config = obj_config + int_config | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| title="Attrition Prediction", | |
| description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.", | |
| allow_flagging='never', | |
| fn=predict_label, | |
| inputs=input_config, | |
| outputs=[ | |
| gr.Textbox(label="Prediction"), | |
| gr.DataFrame(headers=["Feature", "Impact"], label="All features and their impact on retention") | |
| ], | |
| live=False # Set live to True to see the interface while running the code | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch(share=True) | |