siddop commited on
Commit
2c90708
1 Parent(s): 2878842

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -30
app.py CHANGED
@@ -9,31 +9,37 @@ from keras.layers import Dense, Dropout, BatchNormalization
9
  from keras import regularizers
10
  import tensorflow as tf
11
  import joblib
 
12
  import re
13
  from lime.lime_tabular import LimeTabularExplainer
14
- import gradio as gr
15
- import nltk
16
  nltk.download('punkt')
17
  from nltk.tokenize import word_tokenize
18
 
19
  # label encode object columns
20
- le_dict = {}
21
- df = pd.read_csv("Data.csv")
22
  df2 = df.copy()
23
 
24
  object_cols = df2.select_dtypes(include=['object']).columns
25
  object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
26
  int_cols = df2.select_dtypes(exclude=['object']).columns
27
 
28
-
 
29
  for col in object_cols:
30
  le = LabelEncoder()
31
  df2[col] = le.fit_transform(df[col])
32
  le_dict[col] = le
 
33
 
34
  X = df2.iloc[:, :-1]
35
  y = df2.iloc[:, -1]
36
 
 
 
 
 
37
 
38
  colList = []
39
  for col in object_cols:
@@ -41,29 +47,32 @@ for col in object_cols:
41
  for col in int_cols:
42
  colList.append(col)
43
 
44
- classes_dict = {}
45
- for col in object_cols:
46
- le_col = LabelEncoder()
47
- df2[col] = le_col.fit_transform(df[col])
48
- classes_dict[col] = le_col.classes_
 
 
 
49
 
50
 
51
  scaler = MinMaxScaler()
52
  X_scaled = scaler.fit_transform(X)
53
 
54
  # Split the data into training and test sets
55
- X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)
56
 
57
  # Load the model
58
  loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
59
 
60
  # Create a LIME explainer
61
- explainer = LimeTabularExplainer(X_scaled, mode="classification", feature_names=X.columns)
62
 
63
  # Your machine learning model function
64
  def predict_label(*args):
65
  if '' in args:
66
- return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Importance"])
67
 
68
  # Create empty dictionaries to hold the input data
69
  input_dict = {}
@@ -91,31 +100,34 @@ def predict_label(*args):
91
  loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
92
 
93
  # Make predictions
94
- pred = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
95
-
 
96
  # Explain the prediction
97
- exp = explainer.explain_instance(input_df[0], loaded_model.predict, labels=(0, ), num_features=len(X.columns))
98
 
99
  # Create dictionary to store top 5 influencing features
100
- top5 = {}
101
- for i in range(5):
102
- for word in word_tokenize(exp.as_list(0)[i][0]):
103
  if re.findall(r'[a-zA-Z]+', word):
104
  feature = word
105
- weight = round(exp.as_list(0)[i][1], 2)
106
- top5[feature] = weight
 
 
 
107
 
108
  # Convert dictionary to list of tuples for Gradio Table
109
- top5_table = [(key, value) for key, value in top5.items()]
110
- # top5_table = pd.DataFrame(top5_table, columns=["Feature", "Importance"])
111
 
112
  # Return prediction
113
- if pred<=30:
114
- return f"Low probability ({pred:.2f}%) of attrition", top5_table
115
- elif pred<=70:
116
- return f"Some probability ({pred:.2f}%) of attrition", top5_table
117
  else:
118
- return f"High probability ({pred:.2f}%) of attrition", top5_table
119
 
120
  # Define the inputs with names and descriptions
121
  obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
@@ -127,13 +139,13 @@ input_config = obj_config + int_config
127
  # Gradio Interface
128
  iface = gr.Interface(
129
  title="Attrition Prediction",
130
- description = "This app predicts if an employee in your organisation would resign or not. The values shown under top features shows influence of each feature on the prediction. A higher number indicates that the feature is more influential in determining the prediction, while a lower number indicates less influence.",
131
  allow_flagging='never',
132
  fn=predict_label,
133
  inputs=input_config,
134
  outputs=[
135
  gr.Textbox(label="Prediction"),
136
- gr.DataFrame(headers=["Feature", "Importance"], label="Top 5 featured influencing prediction")
137
  ],
138
  live=False # Set live to True to see the interface while running the code
139
  )
 
9
  from keras import regularizers
10
  import tensorflow as tf
11
  import joblib
12
+ from nltk.tokenize import word_tokenize
13
  import re
14
  from lime.lime_tabular import LimeTabularExplainer
15
+ from keras.utils import to_categorical
16
+ from sklearn.preprocessing import OneHotEncoder
17
  nltk.download('punkt')
18
  from nltk.tokenize import word_tokenize
19
 
20
  # label encode object columns
21
+ df = pd.read_csv(r"C:\Users\bhati\Documents\MachineLearning\FreelanceProject\SimpleAttritionPredictionsWithSuggestions\Data.csv")
 
22
  df2 = df.copy()
23
 
24
  object_cols = df2.select_dtypes(include=['object']).columns
25
  object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
26
  int_cols = df2.select_dtypes(exclude=['object']).columns
27
 
28
+ le_dict = {}
29
+ classes_dict = {}
30
  for col in object_cols:
31
  le = LabelEncoder()
32
  df2[col] = le.fit_transform(df[col])
33
  le_dict[col] = le
34
+ classes_dict[col] = le.classes_
35
 
36
  X = df2.iloc[:, :-1]
37
  y = df2.iloc[:, -1]
38
 
39
+ encoder = OneHotEncoder()
40
+ y2 = encoder.fit_transform(np.array(y).reshape(-1, 1))
41
+ y3 = pd.DataFrame(y2.toarray(), columns=['No', 'Yes'])
42
+
43
 
44
  colList = []
45
  for col in object_cols:
 
47
  for col in int_cols:
48
  colList.append(col)
49
 
50
+ # Get the original class labels
51
+ original_labels = le.inverse_transform(y)
52
+
53
+ # Get the classes and their corresponding labels
54
+ classes = le.classes_
55
+
56
+ class_dict = {i: label for i, label in enumerate(classes)}
57
+
58
 
59
 
60
  scaler = MinMaxScaler()
61
  X_scaled = scaler.fit_transform(X)
62
 
63
  # Split the data into training and test sets
64
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y3, test_size=0.2, random_state=0)
65
 
66
  # Load the model
67
  loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
68
 
69
  # Create a LIME explainer
70
+ explainer = LimeTabularExplainer(training_data=X_scaled, class_names=[0, 1], mode="classification", feature_names=list(X.columns))
71
 
72
  # Your machine learning model function
73
  def predict_label(*args):
74
  if '' in args:
75
+ return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Impact"])
76
 
77
  # Create empty dictionaries to hold the input data
78
  input_dict = {}
 
100
  loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
101
 
102
  # Make predictions
103
+ predof0 = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
104
+ predof1 = round(loaded_model.predict(input_df.reshape(1, -1))[0][1], 4)*100
105
+
106
  # Explain the prediction
107
+ exp = explainer.explain_instance(data_row=input_df[0], predict_fn=loaded_model.predict, num_features=19)
108
 
109
  # Create dictionary to store top 5 influencing features
110
+ featimp = {}
111
+ for i in range(19):
112
+ for word in word_tokenize(exp.as_list()[i][0]):
113
  if re.findall(r'[a-zA-Z]+', word):
114
  feature = word
115
+ weight = round(exp.as_list()[i][1], 2)
116
+ if weight<=0:
117
+ featimp[feature] = 'positive impact on retention'
118
+ elif weight>0:
119
+ featimp[feature] = 'negative impact on retention'
120
 
121
  # Convert dictionary to list of tuples for Gradio Table
122
+ featimp_table = [(key, value) for key, value in featimp.items()]
 
123
 
124
  # Return prediction
125
+ if predof0>=60:
126
+ return f"Low probability ({predof1:.2f}%) of attrition", featimp_table
127
+ elif predof0>=30:
128
+ return f"Some probability ({predof1:.2f}%) of attrition", featimp_table
129
  else:
130
+ return f"High probability ({predof1:.2f}%) of attrition", featimp_table
131
 
132
  # Define the inputs with names and descriptions
133
  obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
 
139
  # Gradio Interface
140
  iface = gr.Interface(
141
  title="Attrition Prediction",
142
+ description = "Based on your inputs this model predicts if an employee in an organisation would resign or not.",
143
  allow_flagging='never',
144
  fn=predict_label,
145
  inputs=input_config,
146
  outputs=[
147
  gr.Textbox(label="Prediction"),
148
+ gr.DataFrame(headers=["Feature", "Impact"], label="Top 10 features and their impact on retention")
149
  ],
150
  live=False # Set live to True to see the interface while running the code
151
  )