scorpion237 commited on
Commit
6a19a00
1 Parent(s): 74fa9ef

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +441 -0
app.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st, base64
2
+ import pandas as pd, seaborn as sns
3
+ import os, matplotlib.pyplot as plt
4
+ import pickle, numpy as np
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
7
+
8
+
9
+ # image de fond
10
+ def add_bg_from_local(image_file):
11
+ with open(image_file, "rb") as image_file:
12
+ encoded_string = base64.b64encode(image_file.read())
13
+ st.markdown(
14
+ f"""
15
+ <style>
16
+ .stApp {{
17
+ background-image: url(data:image/{"png"};base64,{encoded_string.decode()});
18
+ background-size: cover
19
+ }}
20
+ </style>
21
+ """,
22
+ unsafe_allow_html=True
23
+ )
24
+ add_bg_from_local('./images/route.png')
25
+
26
+
27
+ fig = plt.figure(figsize=(10, 10))
28
+ _, middle, _ = st.columns((2, 3, 2))
29
+ with middle:
30
+ st.title(":orange[_Scoring App_]")
31
+ # path du dossier data
32
+ path = ".\data"
33
+
34
+ # fonction pour loader le dataset
35
+ @st.cache_data
36
+ def load_data(file_path):
37
+ return pd.read_csv(os.path.join(path, file_path))
38
+
39
+ # convertir dataframe en csv
40
+ def convert_df_to_csv(frame):
41
+ return frame.to_csv(index=False).encode("utf-8")
42
+
43
+ # fonction principale
44
+ st.sidebar.image(r"./images/picture1.png")
45
+ def main():
46
+ st.markdown("<h2 style = 'text-align:center; \
47
+ color:green;'> Classification pour l'octroi de credit </h2>", unsafe_allow_html = True)
48
+
49
+ # charger le fichier
50
+ uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
51
+
52
+ # creation du menu
53
+ menu = ["Home", "Data Exploration", "Data Visualisation", "Make prediction"]
54
+ choice = st.sidebar.selectbox("Select menu", menu)
55
+
56
+ # charger le jeu de donnees
57
+ data = load_data("loan.csv")
58
+
59
+ # supprime la colonne Loan_ID
60
+ data.drop("Loan_ID", axis=1, inplace=True)
61
+ if choice == "Home":
62
+ st.write("Nous avons develeopper pour ce projet un model de classification\
63
+ qui permet, sur la base de certaines variables, de determiner si oui ou non\
64
+ il est envisageable d'octroyer un pret bancaire a une tierce personne.")
65
+
66
+ st.subheader(":orange[__Presentation du jeu de donnee__] :memo:")
67
+
68
+ st.markdown("Le jeu de donnees comporte 614 lignes et 13 colonnes. **Loan_Status**\
69
+ est la variables a predire (categorielle a deuc classe: **Y** pour le pret a ete \
70
+ octroyer et **N**) pour le contraire. afin d'avoir les reultats les plus\
71
+ optimaux possibles, nous allons dans un premier temps faire une \
72
+ ***Analyse exploratoire** de nos donnees. Par suite nous passerons\
73
+ a la phase de preparation des donnees pour afin finir avec \
74
+ la phase de creation et optimisation des models.\
75
+ `Si vous televerser un fichier au format csv, vous avez la\
76
+ possibilite de comparer les prediction pour chaque\
77
+ model et de telechager le fichier csv correspondant.`")
78
+ #st.image("./images/processor.jpg")
79
+
80
+ if choice == "Data Exploration":
81
+ st.subheader(":orange[_Data Exploration_] :bar_chart:")
82
+ # afficher les donnees
83
+ st.write(data.head())
84
+
85
+ # valeurs manquante
86
+ if st.sidebar.checkbox("Valeur Manquante"):
87
+ st.subheader(":orange[Valeur Manquante]")
88
+ na_count = data.isnull().sum().to_frame(name='count')
89
+ na_per = (data.isnull().sum().to_frame(name='percentage %')/data.shape[0]*100).round(2)
90
+ st.write(pd.concat([na_count, na_per], axis=1).sort_values(by='count', ascending=False).T)
91
+
92
+ # valeur unique par colonne
93
+ if st.sidebar.checkbox("Valeur Unique par colonnes"):
94
+ st.subheader(":orange[Valeur Unique par colonnes]")
95
+ only = data.nunique().sort_values(ascending=False).to_frame(name='count')
96
+ perc = (data.nunique().sort_values(ascending=False).to_frame(name='percentage %')/data.shape[0]*100).round(2)
97
+ dtype = data.dtypes.to_frame(name='dtypes')
98
+ st.write(pd.concat([only, perc, dtype], axis=1).T)
99
+
100
+ # statistique sommaire
101
+ if st.sidebar.checkbox("Statistiques somaire"):
102
+ st.subheader(":orange[Statistiques sommaire]")
103
+ st.write(data.describe())
104
+
105
+ # matrice de correlation
106
+ if st.sidebar.checkbox("Matrice de correlation"):
107
+ fig = plt.figure(figsize=(7,5))
108
+ st.subheader(":orange[Matrice de correlation]")
109
+ st.write(sns.heatmap(data.corr(), annot=True, vmin=-1, vmax=1, cmap='ocean'))
110
+ st.pyplot(fig)
111
+ plt.show()
112
+
113
+ if choice == "Data Visualisation":
114
+ st.subheader(":orange[_Data Visualisation_] :chart:")
115
+ if st.sidebar.checkbox("Analyse Univariee"):
116
+ # selection des variables qualitatives
117
+ categorical_columns = data.select_dtypes(include='object').columns.tolist()
118
+ st.write("Liste des variables qaulitatives")
119
+ st.write(categorical_columns)
120
+ fig = plt.figure(figsize=(14, 8))
121
+ sns.set_theme(context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=True, rc=None)
122
+ for idx, col in enumerate(categorical_columns[:-1]):
123
+ plt.subplot(2, 3, idx+1)
124
+ sns.countplot(data=data, x=col, hue="Loan_Status")
125
+ sns.countplot(data=data, x='Loan_Status')
126
+ st.pyplot(fig)
127
+ plt.show()
128
+
129
+ # selection des variables quantitatives
130
+ numerical_columns = data.select_dtypes(include='number').columns.tolist()
131
+ st.write("Liste des variables quantitatives")
132
+ st.write(numerical_columns)
133
+ fig = plt.figure(figsize=(15,7))
134
+ for idx, col in enumerate(numerical_columns):
135
+ plt.subplot(2,3, idx+1)
136
+ plt.hist(data[col], density=True)
137
+ sns.kdeplot(data=data, x=col)
138
+ plt.title(col)
139
+ #plt.subplots_adjust(hspace=0.5)
140
+ plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2))
141
+ st.pyplot(fig)
142
+ plt.show()
143
+
144
+ if st.sidebar.checkbox("Analyse bivariee"):
145
+ st.subheader(":orange[Analyse bivariee]")
146
+ numerical_columns = data.select_dtypes(include='number').columns.tolist()
147
+ fig = plt.figure(figsize = (14, 8))
148
+ for idx, num_col in enumerate(numerical_columns[:-2]):
149
+ plt.subplot(2, 2, idx+1)
150
+ sns.boxplot(y=num_col, data=data, x='Loan_Status')
151
+ plt.tight_layout(h_pad=2, w_pad=3., rect=(1,1,2,2))
152
+ st.pyplot(fig)
153
+ plt.show()
154
+
155
+ if choice == "Make prediction":
156
+ st.subheader(":orange[Make prediction] :fleur_de_lis:")
157
+ if uploaded_file is not None:
158
+ data = pd.read_csv(uploaded_file)
159
+
160
+ # data preprocessing
161
+ from sklearn.impute import SimpleImputer
162
+ try:
163
+ data.drop(["Loan_ID"], axis=1, inplace=True)
164
+ except:
165
+ pass
166
+ # encodage
167
+ data_encoded = pd.get_dummies(data, drop_first=True)
168
+ st.subheader(":orange[Donnees encodees]")
169
+ st.write(data_encoded)
170
+
171
+ # separation du jeu de donnee
172
+ X, y = data_encoded.drop(["Loan_Status_Y"], axis=1), data_encoded["Loan_Status_Y"]
173
+
174
+ # traintement des valeurs manquantes
175
+ sp = SimpleImputer(strategy="most_frequent")
176
+ X = sp.fit_transform(X)
177
+
178
+ # mis a l'echelle des variables
179
+ std = StandardScaler()
180
+ X = std.fit_transform(X)
181
+
182
+ # Prediction
183
+ # Random Forest predictor
184
+ if st.sidebar.checkbox("Random Forest"):
185
+ st.subheader(":orange[Random Forest] :sunglasses:")
186
+ rf = pickle.load(open("scoring_rf.pkl", "rb"))
187
+ pred = rf.predict(X)
188
+ pred_proba = rf.predict_proba(X)
189
+ st.subheader(':green[Prediction]')
190
+ loan_status = np.array(['N','Y'])
191
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
192
+ df = pd.concat([data, prediction], axis=1)
193
+ st.write(df)
194
+ # download frame
195
+ csv = convert_df_to_csv(df)
196
+ st.download_button("Press to Download",
197
+ csv,
198
+ "random_forest.csv",
199
+ "text/csv",
200
+ key='download_csv')
201
+
202
+ st.text("Model report : \n " + classification_report(y, pred))
203
+
204
+ # Accuracy score
205
+ rf_score = accuracy_score(pred,y)
206
+ st.write(":green[score d'exactitude]")
207
+ st.write(f"{round(rf_score*100,2)}% d'exactitude")
208
+ st.subheader(':green[Prediction Probability]')
209
+ st.write(pred_proba)
210
+
211
+ # Linear Discriminant Analysis
212
+ if st.sidebar.checkbox("Discriminant Analysis"):
213
+ st.subheader(":orange[Discriminant Analysis] :sunglasses:")
214
+ lda = pickle.load(open("scoring_lda.pkl", "rb"))
215
+ pred = lda.predict(X)
216
+ pred_proba = lda.predict_proba(X)
217
+ st.subheader(':green[Prediction]')
218
+ loan_status = np.array(['N','Y'])
219
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
220
+ df = pd.concat([data, prediction], axis=1)
221
+ st.write(df)
222
+ # download
223
+ csv = convert_df_to_csv(df)
224
+ st.download_button("Press to Download",
225
+ csv,
226
+ "discriminant.csv",
227
+ "text/csv",
228
+ key='download_csv')
229
+ st.text("Model report : \n " + classification_report(y, pred))
230
+
231
+ # Accuracy score
232
+ lda_score = accuracy_score(pred,y)
233
+ st.subheader(":green[score d'exactitude]")
234
+ st.write(f"{round(lda_score*100,2)}% d'exactitude")
235
+ st.subheader(':green[Prediction Probability]')
236
+ st.write(pred_proba)
237
+
238
+ # matrice de confusion
239
+ fig = plt.figure(figsize=(2,1))
240
+ cm = confusion_matrix(y, pred)
241
+ st.subheader(":green[Matrice de confusion]")
242
+ sns.heatmap(cm, annot=True, cmap='Dark2')
243
+ st.pyplot(fig)
244
+ plt.plot()
245
+
246
+ # XGBoost
247
+ if st.sidebar.checkbox("XGBoost"):
248
+ st.subheader(":orange[XGBoost] :sunglasses:")
249
+ xg = pickle.load(open("scoring_xg.pkl", "rb"))
250
+ pred = xg.predict(X)
251
+ pred_proba = xg.predict_proba(X)
252
+ st.subheader(':green[Prediction]')
253
+ loan_status = np.array(['N','Y'])
254
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
255
+ df = pd.concat([data, prediction], axis=1)
256
+ st.write(df)
257
+ # download
258
+ csv = convert_df_to_csv(df)
259
+ st.download_button("Press to Download",
260
+ csv,
261
+ "xgboost.csv",
262
+ "text/csv",
263
+ key='download_csv')
264
+ st.text("Model report : \n " + classification_report(y, pred))
265
+
266
+ # Accuracy score
267
+ xg_score = accuracy_score(pred,y)
268
+ st.subheader(":green[score d'exactitude]")
269
+ st.write(f"{round(xg_score*100,2)}% d'exactitude")
270
+ st.subheader(':green[Prediction Probability]')
271
+ st.write(pred_proba)
272
+
273
+ # ANN
274
+ if st.sidebar.checkbox("Neural Network"):
275
+ st.subheader(":orange[Neural Network] :sunglasses:")
276
+ ann = pickle.load(open("scoring_ann.pkl", "rb"))
277
+ pred = ann.predict(X)
278
+ pred_proba = ann.predict_proba(X)
279
+ st.subheader(':green[Prediction]')
280
+ loan_status = np.array(['N','Y'])
281
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
282
+ df = pd.concat([data, prediction], axis=1)
283
+ st.write(df)
284
+ # download
285
+ csv = convert_df_to_csv(df)
286
+ st.download_button("Press to Download",
287
+ csv,
288
+ "neural_network.csv",
289
+ "text/csv",
290
+ key='download_csv')
291
+ st.text("Model report : \n " + classification_report(y, pred))
292
+
293
+ # Accuracy score
294
+ ann_score = accuracy_score(pred,y)
295
+ st.subheader(":green[score d'exactitude]")
296
+ st.write(f"{round(ann_score*100,2)}% d'exactitude")
297
+ st.subheader(':green[Prediction Probability]')
298
+ st.write(pred_proba)
299
+
300
+ else:
301
+ def user_input_features():
302
+ gender = st.sidebar.selectbox('Gender',('Male','Female'))
303
+ married = st.sidebar.selectbox('Married',('Yes','No'))
304
+ depedents = st.sidebar.selectbox('Dependent',(0, 1, 2, "3+"))
305
+ education = st.sidebar.selectbox('Education',('Graduate','Not Graduate'))
306
+ self_employed = st.sidebar.selectbox('Self_employed',('Yes','No'))
307
+ applicanincome = st.sidebar.slider('ApplicanIncome', 150, 81000)
308
+ coapplicanincome = st.sidebar.slider('CoapplicanIncome', 0, 42000)
309
+ loan_amount = st.sidebar.slider('LoanAmount', 0, 800)
310
+ loan_amount_term = st.sidebar.slider('Loan_Amount_Term', 10, 500)
311
+ credit_history = st.sidebar.selectbox('Credi_History', (0, 1))
312
+ property_area = st.sidebar.selectbox('Property_Area', ("Urban", "Rural", "Semiurban"))
313
+
314
+ if gender == "Male":
315
+ gender = 1
316
+ else:
317
+ gender = 0
318
+
319
+ if married == 'Yes':
320
+ married = 1
321
+ else:
322
+ married = 0
323
+
324
+ depedents_1, depedents_2, depedents_3 = 0,0,0
325
+ if depedents == 1:
326
+ depedents_1=1
327
+ elif depedents == 2:
328
+ depedents_2=1
329
+ elif depedents > 2 :
330
+ depedents_3=1
331
+
332
+ if education == "Not Graduate":
333
+ education=1
334
+ else:
335
+ education=0
336
+
337
+ if self_employed == "Yes":
338
+ self_employed = 1
339
+ else:
340
+ self_employed = 0
341
+
342
+ property_urban, property_semiurban = 0, 0
343
+ if property_area == "Semiurban":
344
+ property_semiurban = 1
345
+ elif property_area == "Urban":
346
+ property_urban == 1
347
+
348
+ data = { 'ApplicationIncome': (applicanincome - 5403)/6109,
349
+ 'CoapplicationIncome': (coapplicanincome - 1621) / 2926,
350
+ 'LoanAmount': (loan_amount -146)/85,
351
+ 'Loan_Amount_Term': (loan_amount_term - 342)/65,
352
+ 'Credi_History': (credit_history -0.84)/0.35,
353
+ 'Gender_Male': gender,
354
+ 'Married_Yes': married,
355
+ 'Depedents_1': depedents_1,
356
+ 'Depedents_2': depedents_2,
357
+ 'Depedents_3+': depedents_3,
358
+ 'Education_Not_Graduate': education,
359
+ 'Self_Employed_Yes': self_employed,
360
+ 'Property_Area_Semiurban': property_semiurban,
361
+ 'Property_Area_Urban': property_urban
362
+ }
363
+ features = pd.DataFrame(data, index=[0])
364
+ return features
365
+ data_input = user_input_features()
366
+
367
+ # Random Forest
368
+ if st.sidebar.checkbox("Random Forest"):
369
+ st.subheader(":orange[Random Forest]")
370
+ rf = pickle.load(open("scoring_rf.pkl", "rb"))
371
+ pred = rf.predict(data_input)
372
+ if pred == 1:
373
+ st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
374
+ else:
375
+ st.write(":red[__Desole,...__] :disappointed:")
376
+ pred_proba = rf.predict_proba(data_input)
377
+ loan_status = np.array(['N','Y'])
378
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
379
+ df = pd.concat([data_input, prediction], axis=1)
380
+ st.write(df)
381
+ st.subheader(":green[probability] :question:")
382
+ st.write(pred_proba)
383
+
384
+ # Discriminant Analysis
385
+ if st.sidebar.checkbox("Discriminant Analysis"):
386
+ st.subheader(":orange[Discriminant Analysis]")
387
+ lda = pickle.load(open("scoring_lda.pkl", "rb"))
388
+ pred = lda.predict(data_input)
389
+ if pred == 1:
390
+ st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
391
+ else:
392
+ st.write(":red[__Desole,...__] :disappointed:")
393
+ pred_proba = lda.predict_proba(data_input)
394
+ loan_status = np.array(['N','Y'])
395
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
396
+ df = pd.concat([data_input, prediction], axis=1)
397
+ st.write(df)
398
+ st.subheader(":green[probability] :question:")
399
+ st.write(pred_proba)
400
+
401
+ # XGboost
402
+ if st.sidebar.checkbox("XGBoost"):
403
+ st.subheader(":orange[XGBoost]")
404
+ xg = pickle.load(open("scoring_xg.pkl", "rb"))
405
+ pred = xg.predict(data_input)
406
+ if pred == 1:
407
+ st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
408
+ else:
409
+ st.write(":red[__Desole,...__] :disappointed:")
410
+ pred_proba = xg.predict_proba(data_input)
411
+ loan_status = np.array(['N','Y'])
412
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
413
+ df = pd.concat([data_input, prediction], axis=1)
414
+ st.write(df)
415
+ st.subheader(":green[probability] :question:")
416
+ st.write(pred_proba)
417
+
418
+ # ANN
419
+ if st.sidebar.checkbox("Neural Network"):
420
+ st.subheader(":orange[Neural Network]")
421
+ ann = pickle.load(open("scoring_ann.pkl", "rb"))
422
+ pred = ann.predict(data_input)
423
+ if pred == 1:
424
+ st.write(":orange[__Le pret peut etre octroyer__] :white_check_mark:")
425
+ else:
426
+ st.write(":red[__Desole,...__] :disappointed:")
427
+ pred_proba = ann.predict_proba(data_input)
428
+ loan_status = np.array(['N','Y'])
429
+ prediction = pd.DataFrame(loan_status[pred], columns=['prediction'])
430
+ df = pd.concat([data_input, prediction], axis=1)
431
+ st.write(df)
432
+ st.subheader(":green[probability] :question:")
433
+ st.write(pred_proba)
434
+
435
+
436
+
437
+
438
+
439
+ # lancer l'application
440
+ if __name__ == "__main__":
441
+ main()