siddop commited on
Commit
847b911
1 Parent(s): 3274b50

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.preprocessing import LabelEncoder, MinMaxScaler
4
+ from tensorflow import keras
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import r2_score
7
+ from keras.models import Sequential
8
+ from keras.layers import Dense, Dropout, BatchNormalization
9
+ from keras import regularizers
10
+ import tensorflow as tf
11
+ import joblib
12
+ from nltk.tokenize import word_tokenize
13
+ import re
14
+ from lime.lime_tabular import LimeTabularExplainer
15
+ import gradio as gr
16
+
17
+ # label encode object columns
18
+ le_dict = {}
19
+ df = pd.read_csv(r"C:\Users\bhati\Documents\MachineLearning\FreelanceProject\SimpleAttritionPredictionsWithSuggestions\Data.csv")
20
+ df2 = df.copy()
21
+
22
+ object_cols = df2.select_dtypes(include=['object']).columns
23
+ object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
24
+ int_cols = df2.select_dtypes(exclude=['object']).columns
25
+
26
+
27
+ for col in object_cols:
28
+ le = LabelEncoder()
29
+ df2[col] = le.fit_transform(df[col])
30
+ le_dict[col] = le
31
+
32
+ X = df2.iloc[:, :-1]
33
+ y = df2.iloc[:, -1]
34
+
35
+
36
+ colList = []
37
+ for col in object_cols:
38
+ colList.append(col)
39
+ for col in int_cols:
40
+ colList.append(col)
41
+
42
+ classes_dict = {}
43
+ for col in object_cols:
44
+ le_col = LabelEncoder()
45
+ df2[col] = le_col.fit_transform(df[col])
46
+ classes_dict[col] = le_col.classes_
47
+
48
+
49
+ scaler = MinMaxScaler()
50
+ X_scaled = scaler.fit_transform(X)
51
+
52
+ # Split the data into training and test sets
53
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)
54
+
55
+ # Load the model
56
+ loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
57
+
58
+
59
+ # Your machine learning model function
60
+ def predict_label(*args):
61
+ if '' in args:
62
+ return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Importance"])
63
+
64
+ # Create empty dictionaries to hold the input data
65
+ input_dict = {}
66
+ input_df = {}
67
+
68
+ # Map inputs and col names
69
+ for i, col in enumerate(colList):
70
+ input_dict[col] = args[i]
71
+
72
+ # Rearrange columns as X df
73
+ for col in X.columns:
74
+ input_df[col] = input_dict[col]
75
+
76
+ # Add the input data to the DataFrame
77
+ input_df = pd.DataFrame([input_df], columns=input_df.keys())
78
+
79
+ # Encode labels of ibject columns
80
+ for col in le_dict:
81
+ input_df[col] = le_dict[col].transform(input_df[col])
82
+
83
+ # Scale columns
84
+ input_df = scaler.transform(input_df)
85
+
86
+ # Load the pre-trained pipeline
87
+ loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
88
+
89
+ # Make predictions
90
+ pred = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
91
+
92
+ # Explain the prediction
93
+ exp = explainer.explain_instance(input_df[0], loaded_model.predict, labels=(0, ), num_features=len(X.columns))
94
+
95
+ # Create dictionary to store top 5 influencing features
96
+ top5 = {}
97
+ for i in range(5):
98
+ for word in word_tokenize(exp.as_list(0)[i][0]):
99
+ if re.findall(r'[a-zA-Z]+', word):
100
+ feature = word
101
+ weight = round(exp.as_list(0)[i][1], 2)
102
+ top5[feature] = weight
103
+
104
+ # Convert dictionary to list of tuples for Gradio Table
105
+ top5_table = [(key, value) for key, value in top5.items()]
106
+ # top5_table = pd.DataFrame(top5_table, columns=["Feature", "Importance"])
107
+
108
+ # Return prediction
109
+ if pred<=0.30:
110
+ return f"Low probability ({pred:.2f}%) of attrition", top5_table
111
+ elif pred<=0.70:
112
+ return f"Some probability ({pred:.2f}%) of attrition", top5_table
113
+ else:
114
+ return f"High probability ({pred:.2f}%) of attrition", top5_table
115
+
116
+ # Define the inputs with names and descriptions
117
+ obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
118
+ int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols]
119
+
120
+ # Concatenate the two sets of input configurations
121
+ input_config = obj_config + int_config
122
+
123
+ # Gradio Interface
124
+ iface = gr.Interface(
125
+ title="Attrition Prediction",
126
+ description = "The values shown under top features shows influence of each feature on the prediction. A higher number indicates that the feature is more influential in determining the prediction, while a lower number indicates less influence.",
127
+ allow_flagging='never',
128
+ fn=predict_label,
129
+ inputs=input_config,
130
+ outputs=[
131
+ gr.Textbox(label="Prediction"),
132
+ gr.DataFrame(headers=["Feature", "Importance"], label="Top 5 featured influencing prediction")
133
+ ],
134
+ live=False # Set live to True to see the interface while running the code
135
+ )
136
+
137
+ # Launch the Gradio interface
138
+ iface.launch(share=True)