ellyothim commited on
Commit
7ebf710
1 Parent(s): 5493919

Upload capstone_gradio_app_embedding.py

Browse files
Files changed (1) hide show
  1. capstone_gradio_app_embedding.py +263 -0
capstone_gradio_app_embedding.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Capstone Gradio App Embedding.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1zsT_lHGVHzG29XSb4tMF3UdA6glyWnRx
8
+ """
9
+
10
+ from google.colab import drive
11
+ drive.mount('/content/drive')
12
+
13
+ !pip install gradio
14
+
15
+ """### **DATA PREP**"""
16
+
17
+ import pandas as pd
18
+ # # Specify the correct file paths for your data files
19
+ # test = pd.read_csv('/content/drive/MyDrive/datasets capstone/test.csv')
20
+ # y_train = pd.read_csv("/content/drive/MyDrive/datasets capstone/y_train.csv")
21
+ # X_train =pd.read_csv('/content/drive/MyDrive/datasets capstone/X_train.csv')
22
+
23
+ # #X_test = pd.read_csv("/content/test.csv")
24
+
25
+ path = "/content/drive/MyDrive/Capstone Project /"
26
+
27
+ train = pd.read_csv(path + 'Train.csv')
28
+
29
+ # use lambda function to remove \t make our model more robst
30
+ train = train.applymap(lambda x: x.replace("\t" , '' ) if isinstance (x , str) else x)
31
+ # " " , " "
32
+ train = train.applymap(lambda x: x.replace(" " , ' ' ) if isinstance (x , str) else x)
33
+
34
+ # drop what we don't need
35
+ train.drop(columns=['MRG', 'user_id', 'ZONE1', 'ZONE2', 'TOP_PACK'], inplace=True)
36
+
37
+ train.head(1)
38
+
39
+ train["REGION"].fillna(method='ffill', inplace=True)
40
+ train["TENURE"].fillna(method='ffill', inplace=True)
41
+ train["MONTANT"].fillna(train["MONTANT"].median(), inplace=True)
42
+ train["FREQUENCE_RECH"].fillna(0, inplace=True)
43
+ train["REVENUE"].fillna(train["REVENUE"].median(), inplace=True)
44
+ train["ARPU_SEGMENT"].fillna(0, inplace=True)
45
+ train["FREQUENCE"].fillna(0, inplace=True)
46
+ train["DATA_VOLUME"].fillna(0, inplace=True)
47
+ train["ON_NET"].fillna(0, inplace=True)
48
+ train["ORANGE"].fillna(0, inplace=True)
49
+ train["TIGO"].fillna(0, inplace=True)
50
+ # train["ZONE1"].fillna(0, inplace=True)
51
+ # train["ZONE2"].fillna(0, inplace=True)
52
+ # train["MRG"].fillna(method='ffill', inplace=True)
53
+ train["REGULARITY"].fillna(train["REGULARITY"].mean(), inplace=True)
54
+ #train["TOP_PACK"].fillna(method='ffill', inplace=True)
55
+ train["FREQ_TOP_PACK"].fillna(train["FREQ_TOP_PACK"].mean(), inplace=True)
56
+
57
+ train['TENURE'] = train['TENURE'].str.replace('D 3-6 month', '1', regex=True)
58
+ train['TENURE'] = train['TENURE'].str.replace('E 6-9 month', '2', regex=True)
59
+ train['TENURE'] = train['TENURE'].str.replace('F 9-12 month', '3', regex=True)
60
+ train['TENURE'] = train['TENURE'].str.replace('J 21-24 month', '4', regex=True)
61
+ train['TENURE'] = train['TENURE'].str.replace('G 12-15 month', '5', regex=True)
62
+ train['TENURE'] = train['TENURE'].str.replace('H 15-18 month', '6', regex=True)
63
+ train['TENURE'] = train['TENURE'].str.replace('I 18-21 month', '7', regex=True)
64
+ train['TENURE'] = train['TENURE'].str.replace('K > 24 month', '8', regex=True)
65
+
66
+ # train['TENURE'].value_counts()
67
+
68
+ # Define a dictionary to map values
69
+ region_mapping = {
70
+ 'DAKAR': '1',
71
+ 'THIES': '2',
72
+ 'SAINT-LOUIS': '3',
73
+ 'LOUGA': '4',
74
+ 'KAOLACK': '5',
75
+ 'DIOURBEL': '6',
76
+ 'TAMBACOUNDA': '7',
77
+ 'KAFFRINE': '8',
78
+ 'KOLDA': '9',
79
+ 'FATICK': '10',
80
+ 'ZIGUINCHOR': '11',
81
+ 'SEDHIOU': '12',
82
+ 'KEDOUGOU': '13',
83
+ 'MATAM' : '14'
84
+ }
85
+
86
+ # Use the replace method to map values
87
+ train['REGION'] = train['REGION'].replace(region_mapping)
88
+
89
+ # Look at the new value_counts
90
+ # print(train['REGION'].value_counts())
91
+
92
+ """## **FITTING AND TRAINING**"""
93
+
94
+ # train.head(1)
95
+
96
+ """Select target and features"""
97
+
98
+ y = train['CHURN']
99
+
100
+ x = train.drop(columns='CHURN', axis=1)
101
+
102
+ # y.head(3)
103
+
104
+ # x.head(2)
105
+
106
+ from sklearn.model_selection import train_test_split
107
+
108
+ X_train, X_test, y_train, y_test = train_test_split(x,y,test_size = 0.5,random_state=45 )# , stratify=y)
109
+
110
+ #Further split X_train and y_train into train and validation sets
111
+ X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size = 0.3, random_state=1 )#, stratify=y)
112
+
113
+ """### SCALE NUMERICAL COLUMNS"""
114
+
115
+ num_cols = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
116
+ 'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO',
117
+ 'REGULARITY', 'FREQ_TOP_PACK']
118
+
119
+ scaler = StandardScaler()
120
+
121
+ X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
122
+
123
+ X_val[num_cols] = scaler.fit_transform(X_val[num_cols])
124
+
125
+ #X_train.head(3)
126
+
127
+ """### ENCODE CATEGORICAL COLS WITH NUMERICAL VALUES WITH MANY N_UNIQUE( ) VALS
128
+
129
+ """
130
+
131
+ !pip install category_encoders
132
+
133
+ # import category_encoders as ce
134
+
135
+ # encoder_ = ce.SumEncoder(cols=['TOP_PACK'])
136
+
137
+ # encoder.fit(x, y)
138
+ # X_train = encoder.transform(X_train)
139
+
140
+ # X_val = encoder.transform(X_val)
141
+
142
+ # X_test = encoder.transform(X_test)
143
+
144
+ from sklearn.model_selection import train_test_split, cross_val_score
145
+ from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_recall_curve, f1_score
146
+ from sklearn.preprocessing import StandardScaler
147
+ from sklearn.ensemble import ExtraTreesRegressor
148
+ from sklearn.preprocessing import LabelEncoder
149
+
150
+ # Create an instance
151
+ model = ExtraTreesRegressor(
152
+ n_estimators=100, # Number of trees in the forest
153
+ max_depth=10, # Maximum depth of the tree
154
+ random_state=42 # Random seed for reproducibility
155
+ )
156
+
157
+ # Train the model
158
+ MODEL = model.fit(X_train, y_train)
159
+
160
+ """## **Check if our model is working**"""
161
+
162
+ y_pred = MODEL.predict(X_test)
163
+
164
+ y_pred
165
+
166
+ """Since our model is working correctly , Inspect the X_test features , to be used as the user input to interact with the model"""
167
+
168
+ X_test.head()
169
+
170
+
171
+
172
+ def classifier_1(result):
173
+ if result > 0.9:
174
+ return "Customer will churn"
175
+ else:
176
+ return "Customer will not churn"
177
+
178
+ def predict(REGION,TENURE , MONTANT , FREQUENCE_RECH, REVENUE , ARPU_SEGMENT ,FREQUENCE , DATA_VOLUME , ON_NET, ORANGE , TIGO, REGULARITY ,FREQ_TOP_PACK):
179
+ input_array = np.array([[REGION,TENURE , MONTANT , FREQUENCE_RECH, REVENUE , ARPU_SEGMENT ,FREQUENCE , DATA_VOLUME , ON_NET, ORANGE , TIGO, REGULARITY ,FREQ_TOP_PACK]])
180
+
181
+ pred = MODEL.predict(input_array)
182
+
183
+ output = classifier_1 (pred[0])
184
+
185
+ if output == "Customer will churn":
186
+ return [(0, output)]
187
+ else :
188
+ return [(1, output)]
189
+
190
+ """Check if the function will work"""
191
+
192
+ predict(1,2,3,4,5,6,7,8,9,1,1,1,1)
193
+
194
+ variable_definitions
195
+
196
+ X_test['FREQ_TOP_PACK'].min() , X_test['FREQ_TOP_PACK'].max()
197
+
198
+ import gradio as gr
199
+
200
+ #tenure = tenure_dropdown
201
+ REGION = gr.inputs.Slider(minimum=1, maximum=13, label='Location of each client')
202
+ TENURE = gr.inputs.Slider(minimum=1, maximum=8, label="Duration in network")
203
+ MONTANT = gr.inputs.Slider(minimum=22, maximum=470000, label="Top up amount")
204
+ FREQUENCE_RECH = gr.inputs.Slider(minimum=1, maximum=131, label="income frequency")
205
+ REVENUE = gr.inputs.Slider(minimum=1, maximum=532177, label="ARPU_SEGMENT")
206
+ ARPU_SEGMENT = gr.inputs.Slider(minimum=1, maximum= 177392, label="FREQUENCE")
207
+ FREQUENCE = gr.inputs.Slider(minimum=1, maximum=91, label="DATA_VOLUME")
208
+ DATA_VOLUME =gr.inputs.Slider(minimum=0, maximum=1702309, label="ON_NET")
209
+ ON_NET = gr.inputs.Slider(minimum=0, maximum=36687, label="ORANGE")
210
+ ORANGE = gr.inputs.Slider(minimum=0, maximum= 6721, label="TIGO")
211
+ TIGO = gr.inputs.Slider(minimum=0, maximum=4174, label="ZONE1")
212
+ REGULARITY = gr.inputs.Slider(minimum=1, maximum=62, label="ZONE2")
213
+ FREQ_TOP_PACK = gr.inputs.Slider(minimum=1, maximum= 592, label="REGULARITY")
214
+
215
+
216
+ op = gr.outputs.HighlightedText(color_map={"Customer will churn":"pink", "Customer will not churn":"yellow"})
217
+
218
+ gr.Interface(predict , inputs = [REGION,TENURE, MONTANT , FREQUENCE_RECH, REVENUE , ARPU_SEGMENT ,FREQUENCE , DATA_VOLUME , ON_NET, ORANGE ,TIGO, REGULARITY ,FREQ_TOP_PACK], outputs=op,
219
+ live = True).launch(debug=True)
220
+
221
+ import gradio as gr
222
+
223
+ # Input sliders
224
+ REGION = gr.inputs.Slider(minimum=1, maximum=13, label='Location of each client')
225
+ TENURE = gr.inputs.Slider(minimum=1, maximum=8, label="Duration in network")
226
+ MONTANT = gr.inputs.Slider(minimum=22, maximum=470000, label="Top-up amount")
227
+ FREQUENCE_RECH = gr.inputs.Slider(minimum=1, maximum=131, label="Income frequency")
228
+ REVENUE = gr.inputs.Slider(minimum=1, maximum=532177, label="ARPU_SEGMENT")
229
+ ARPU_SEGMENT = gr.inputs.Slider(minimum=1, maximum=177392, label="FREQUENCE")
230
+ FREQUENCE = gr.inputs.Slider(minimum=1, maximum=91, label="DATA_VOLUME")
231
+ DATA_VOLUME = gr.inputs.Slider(minimum=0, maximum=1702309, label="ON_NET")
232
+ ON_NET = gr.inputs.Slider(minimum=0, maximum=36687, label="ORANGE")
233
+ ORANGE = gr.inputs.Slider(minimum=0, maximum=6721, label="TIGO")
234
+ TIGO = gr.inputs.Slider(minimum=0, maximum=4174, label="ZONE1")
235
+ REGULARITY = gr.inputs.Slider(minimum=1, maximum=62, label="ZONE2")
236
+ FREQ_TOP_PACK = gr.inputs.Slider(minimum=1, maximum=592, label="REGULARITY")
237
+
238
+ # Output configuration
239
+ op = gr.outputs.HighlightedText(color_map={"Customer will churn": "pink", "Customer will not churn": "yellow"})
240
+
241
+ # Create and launch the interface
242
+ gr.Interface(predict, inputs=[REGION, TENURE, MONTANT, FREQUENCE_RECH, REVENUE, ARPU_SEGMENT, FREQUENCE,
243
+ DATA_VOLUME, ON_NET, ORANGE, TIGO, REGULARITY, FREQ_TOP_PACK], outputs=op,
244
+ live=False).launch(debug=False)
245
+
246
+ # # Map numerical values to labels
247
+ # tenure_labels = {
248
+ # 0: "3-6 months",
249
+ # 1: "6-9 months",
250
+ # 2: "9-12 months",
251
+ # 3: "12-15 months",
252
+ # 4: "15-18 months",
253
+ # 5: "18-21 months",
254
+ # 6: "21-24 months",
255
+ # 7: "> 24 months"
256
+ # }
257
+
258
+ # # Reverse the mapping for predictions
259
+ # tenure_values = {v: k for k, v in tenure_labels.items()}
260
+
261
+ # # Create a dropdown menu with labels
262
+ # tenure_dropdown = gr.inputs.Dropdown(list(tenure_labels.values()), label="TENURE")
263
+