Spaces:

siddop
/

Attrition_Predictor

Sleeping

App Files Files Community

siddop commited on May 1, 2024

Commit

2c90708

verified ·

1 Parent(s): 2878842

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -30

app.py CHANGED Viewed

@@ -9,31 +9,37 @@ from keras.layers import Dense, Dropout, BatchNormalization
 from keras import regularizers
 import tensorflow as tf
 import joblib
 import re
 from lime.lime_tabular import LimeTabularExplainer
-import gradio as gr
-import nltk
 nltk.download('punkt')
 from nltk.tokenize import word_tokenize
 # label encode object columns
-le_dict = {}
-df = pd.read_csv("Data.csv")
 df2 = df.copy()
 object_cols = df2.select_dtypes(include=['object']).columns
 object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
 int_cols = df2.select_dtypes(exclude=['object']).columns
 for col in object_cols:
     le = LabelEncoder()
     df2[col] = le.fit_transform(df[col])
     le_dict[col] = le
 X = df2.iloc[:, :-1]
 y = df2.iloc[:, -1]
 colList = []
 for col in object_cols:
@@ -41,29 +47,32 @@ for col in object_cols:
 for col in int_cols:
     colList.append(col)
-classes_dict = {}
-for col in object_cols:
-    le_col = LabelEncoder()
-    df2[col] = le_col.fit_transform(df[col])
-    classes_dict[col] = le_col.classes_
 scaler = MinMaxScaler()
 X_scaled = scaler.fit_transform(X)
 # Split the data into training and test sets
-X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)
 # Load the model
 loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
 # Create a LIME explainer
-explainer = LimeTabularExplainer(X_scaled, mode="classification", feature_names=X.columns)
 # Your machine learning model function
 def predict_label(*args):
     if '' in args:
-        return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Importance"])
     # Create empty dictionaries to hold the input data
     input_dict = {}
@@ -91,31 +100,34 @@ def predict_label(*args):
     loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
     # Make predictions
-    pred = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
     # Explain the prediction
-    exp = explainer.explain_instance(input_df[0], loaded_model.predict, labels=(0, ), num_features=len(X.columns))
     # Create dictionary to store top 5 influencing features
-    top5 = {}
-    for i in range(5):
-        for word in word_tokenize(exp.as_list(0)[i][0]):
             if re.findall(r'[a-zA-Z]+', word):
                 feature = word
-                weight = round(exp.as_list(0)[i][1], 2)
-        top5[feature] = weight
     # Convert dictionary to list of tuples for Gradio Table
-    top5_table = [(key, value) for key, value in top5.items()]
-    # top5_table = pd.DataFrame(top5_table, columns=["Feature", "Importance"])
     # Return prediction
-    if pred<=30:
-        return f"Low probability ({pred:.2f}%) of attrition", top5_table
-    elif pred<=70:
-        return f"Some probability ({pred:.2f}%) of attrition", top5_table
     else:
-        return f"High probability ({pred:.2f}%) of attrition", top5_table
 # Define the inputs with names and descriptions
 obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
@@ -127,13 +139,13 @@ input_config = obj_config + int_config
 # Gradio Interface
 iface = gr.Interface(
     title="Attrition Prediction",
-    description = "This app predicts if an employee in your organisation would resign or not. The values shown under top features shows influence of each feature on the prediction. A higher number indicates that the feature is more influential in determining the prediction, while a lower number indicates less influence.",
     allow_flagging='never',
     fn=predict_label,
     inputs=input_config,
     outputs=[
         gr.Textbox(label="Prediction"),
-        gr.DataFrame(headers=["Feature", "Importance"], label="Top 5 featured influencing prediction")
     ],
     live=False # Set live to True to see the interface while running the code
 )

 from keras import regularizers
 import tensorflow as tf
 import joblib
+from nltk.tokenize import word_tokenize
 import re
 from lime.lime_tabular import LimeTabularExplainer
+from keras.utils import to_categorical
+from sklearn.preprocessing import OneHotEncoder
 nltk.download('punkt')
 from nltk.tokenize import word_tokenize
 # label encode object columns
+df = pd.read_csv(r"C:\Users\bhati\Documents\MachineLearning\FreelanceProject\SimpleAttritionPredictionsWithSuggestions\Data.csv")
 df2 = df.copy()
 object_cols = df2.select_dtypes(include=['object']).columns
 object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
 int_cols = df2.select_dtypes(exclude=['object']).columns
+le_dict = {}
+classes_dict = {}
 for col in object_cols:
     le = LabelEncoder()
     df2[col] = le.fit_transform(df[col])
     le_dict[col] = le
+    classes_dict[col] = le.classes_
 X = df2.iloc[:, :-1]
 y = df2.iloc[:, -1]
+encoder = OneHotEncoder()
+y2 = encoder.fit_transform(np.array(y).reshape(-1, 1))
+y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes'])
 colList = []
 for col in object_cols:
 for col in int_cols:
     colList.append(col)
+# Get the original class labels
+original_labels = le.inverse_transform(y)
+# Get the classes and their corresponding labels
+classes = le.classes_
+class_dict = {i: label for i, label in enumerate(classes)}
 scaler = MinMaxScaler()
 X_scaled = scaler.fit_transform(X)
 # Split the data into training and test sets
+X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0)
 # Load the model
 loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
 # Create a LIME explainer
+explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns))
 # Your machine learning model function
 def predict_label(*args):
     if '' in args:
+        return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"])
     # Create empty dictionaries to hold the input data
     input_dict = {}
     loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
     # Make predictions
+    predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
+    predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100
     # Explain the prediction
+    exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19)
     # Create dictionary to store top 5 influencing features
+    featimp = {}
+    for i in range(19):
+        for word in word_tokenize(exp.as_list()[i][0]):
             if re.findall(r'[a-zA-Z]+', word):
                 feature = word
+                weight = round(exp.as_list()[i][1], 2)
+        if weight<=0:
+            featimp[feature] = 'positive impact on retention'
+        elif weight>0:
+            featimp[feature] = 'negative impact on retention'
     # Convert dictionary to list of tuples for Gradio Table
+    featimp_table = [(key, value) for key, value in featimp.items()]
     # Return prediction
+    if predof0>=60:
+        return f"Low probability ({predof1:.2f}%) of attrition", featimp_table
+    elif predof0>=30:
+        return f"Some probability ({predof1:.2f}%) of attrition", featimp_table
     else:
+        return f"High probability ({predof1:.2f}%) of attrition", featimp_table
 # Define the inputs with names and descriptions
 obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
 # Gradio Interface
 iface = gr.Interface(
     title="Attrition Prediction",
+    description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.",
     allow_flagging='never',
     fn=predict_label,
     inputs=input_config,
     outputs=[
         gr.Textbox(label="Prediction"),
+        gr.DataFrame(headers=["Feature", "Impact"], label="Top 10 features and their impact on retention")
     ],
     live=False # Set live to True to see the interface while running the code
 )