kennethv1706 commited on
Commit
b752079
1 Parent(s): 1e47b5d

Upload 11 files

Browse files
P1G1_Kenneth Vincentius.csv ADDED
The diff for this file is too large to render. See raw diff
 
P1G1_Kenneth Vincentius.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import model
4
+
5
+ page = st.sidebar.selectbox(label='Select Page:', options=['Home Page', 'Exploration Data Analysis', 'Model Prediksi'])
6
+
7
+ if page == 'Home Page':
8
+ st.header('Welcome Page - Silahkan pilih menu lain di Select Box untuk memulai!')
9
+ elif page == 'Exploration Data Analysis':
10
+ eda.run()
11
+ else:
12
+ model.run()
13
+
apps.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import streamlit as st
3
+ import subprocess
4
+ st.title("Select Your Data Sientist")
5
+
6
+ # # Buat tiga checkbox
7
+ # option1 = st.checkbox("Rafif Aditio")
8
+ # option2 = st.checkbox("Fahmi")
9
+
10
+ selected_option = st.radio("Pilih satu opsi:", ["Rafif Aditio", "Fahmi"])
11
+
12
+ # Tampilkan pilihan yang dipilih
13
+ # if selected_option:
14
+ # st.write(f"Opsi yang dipilih: {selected_option}")
15
+
16
+ # Tampilkan gambar sesuai dengan pilihan checkbox
17
+ if selected_option=='Rafif Aditio':
18
+ st.image("rafif.png", width=341, caption="Rafif Aditio")
19
+ else:
20
+ st.image("Fahmi.png", width=341, caption="Fhami iman")
21
+
22
+
23
+ # Tambahkan tombol untuk pergi ke aplikasi lain
24
+ if st.button("Characther Selected"):
25
+ # Jalankan aplikasi lain menggunakan subprocess
26
+ subprocess.run(["streamlit", "run", "app.py"])
datainf_P1G2_Kenneth Vincentius.ipynb ADDED
@@ -0,0 +1,1107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# xi. Model Inference"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import pickle\n",
17
+ "import pandas as pd\n",
18
+ "import numpy as np\n",
19
+ "import random"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "#load model scaler dan model knn yang sudah disimpen dalam bentuk pkl\n",
29
+ "with open('scaler.pkl', 'rb') as file_1:\n",
30
+ " scaler = pickle.load(file_1)\n",
31
+ "\n",
32
+ "with open('model_knn.pkl', 'rb') as file_2:\n",
33
+ " model_knn = pickle.load(file_2)"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "markdown",
38
+ "metadata": {},
39
+ "source": [
40
+ "## Membuat data dummy"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 3,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "# Atur seed di sini\n",
50
+ "seed_value = 777\n",
51
+ "random.seed(seed_value)\n",
52
+ "np.random.seed(seed_value)\n",
53
+ "\n",
54
+ "# Jumlah data point yang ingin dibuat\n",
55
+ "n = 100\n",
56
+ "\n",
57
+ "# Membuat data untuk kolom 'distance'\n",
58
+ "limit_balance = np.round(np.random.uniform(10000, 800000, n), 2)\n",
59
+ "\n",
60
+ "# Membuat data untuk kolom 'surge_multiplier'\n",
61
+ "pay_1 = [-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0]\n",
62
+ "pay_1 = random.choices(pay_1, k=n)\n",
63
+ "\n",
64
+ "pay_2 = [-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0]\n",
65
+ "pay_2 = random.choices(pay_2, k=n)\n",
66
+ "\n",
67
+ "pay_3 = [-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0]\n",
68
+ "pay_3 = random.choices(pay_3, k=n)\n",
69
+ "\n",
70
+ "pay_4 = [-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0]\n",
71
+ "pay_4 = random.choices(pay_4, k=n)\n",
72
+ "\n",
73
+ "pay_5 = [-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0]\n",
74
+ "pay_5 = random.choices(pay_5, k=n)\n",
75
+ "\n",
76
+ "pay_6 = [-2.0,-1.0,0.0,2.0,3.0,4.0,6.0,7.0]\n",
77
+ "pay_6 = random.choices(pay_6, k=n)\n",
78
+ "\n",
79
+ "# Membuat DataFrame\n",
80
+ "df_inf = pd.DataFrame({\n",
81
+ " 'limit_balance': limit_balance,\n",
82
+ " 'pay_1': pay_1,\n",
83
+ " 'pay_2': pay_2,\n",
84
+ " 'pay_3': pay_3,\n",
85
+ " 'pay_4': pay_4,\n",
86
+ " 'pay_5': pay_5,\n",
87
+ " 'pay_6': pay_6,\n",
88
+ " \n",
89
+ "})"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": 4,
95
+ "metadata": {},
96
+ "outputs": [
97
+ {
98
+ "data": {
99
+ "text/html": [
100
+ "<div>\n",
101
+ "<style scoped>\n",
102
+ " .dataframe tbody tr th:only-of-type {\n",
103
+ " vertical-align: middle;\n",
104
+ " }\n",
105
+ "\n",
106
+ " .dataframe tbody tr th {\n",
107
+ " vertical-align: top;\n",
108
+ " }\n",
109
+ "\n",
110
+ " .dataframe thead th {\n",
111
+ " text-align: right;\n",
112
+ " }\n",
113
+ "</style>\n",
114
+ "<table border=\"1\" class=\"dataframe\">\n",
115
+ " <thead>\n",
116
+ " <tr style=\"text-align: right;\">\n",
117
+ " <th></th>\n",
118
+ " <th>limit_balance</th>\n",
119
+ " <th>pay_1</th>\n",
120
+ " <th>pay_2</th>\n",
121
+ " <th>pay_3</th>\n",
122
+ " <th>pay_4</th>\n",
123
+ " <th>pay_5</th>\n",
124
+ " <th>pay_6</th>\n",
125
+ " </tr>\n",
126
+ " </thead>\n",
127
+ " <tbody>\n",
128
+ " <tr>\n",
129
+ " <th>0</th>\n",
130
+ " <td>130604.35</td>\n",
131
+ " <td>0.0</td>\n",
132
+ " <td>7.0</td>\n",
133
+ " <td>5.0</td>\n",
134
+ " <td>5.0</td>\n",
135
+ " <td>2.0</td>\n",
136
+ " <td>6.0</td>\n",
137
+ " </tr>\n",
138
+ " <tr>\n",
139
+ " <th>1</th>\n",
140
+ " <td>248861.72</td>\n",
141
+ " <td>2.0</td>\n",
142
+ " <td>-2.0</td>\n",
143
+ " <td>3.0</td>\n",
144
+ " <td>0.0</td>\n",
145
+ " <td>-2.0</td>\n",
146
+ " <td>6.0</td>\n",
147
+ " </tr>\n",
148
+ " <tr>\n",
149
+ " <th>2</th>\n",
150
+ " <td>59008.77</td>\n",
151
+ " <td>2.0</td>\n",
152
+ " <td>-2.0</td>\n",
153
+ " <td>3.0</td>\n",
154
+ " <td>5.0</td>\n",
155
+ " <td>2.0</td>\n",
156
+ " <td>7.0</td>\n",
157
+ " </tr>\n",
158
+ " <tr>\n",
159
+ " <th>3</th>\n",
160
+ " <td>373289.67</td>\n",
161
+ " <td>0.0</td>\n",
162
+ " <td>7.0</td>\n",
163
+ " <td>2.0</td>\n",
164
+ " <td>2.0</td>\n",
165
+ " <td>-2.0</td>\n",
166
+ " <td>0.0</td>\n",
167
+ " </tr>\n",
168
+ " <tr>\n",
169
+ " <th>4</th>\n",
170
+ " <td>669850.17</td>\n",
171
+ " <td>1.0</td>\n",
172
+ " <td>4.0</td>\n",
173
+ " <td>-1.0</td>\n",
174
+ " <td>0.0</td>\n",
175
+ " <td>7.0</td>\n",
176
+ " <td>7.0</td>\n",
177
+ " </tr>\n",
178
+ " <tr>\n",
179
+ " <th>...</th>\n",
180
+ " <td>...</td>\n",
181
+ " <td>...</td>\n",
182
+ " <td>...</td>\n",
183
+ " <td>...</td>\n",
184
+ " <td>...</td>\n",
185
+ " <td>...</td>\n",
186
+ " <td>...</td>\n",
187
+ " </tr>\n",
188
+ " <tr>\n",
189
+ " <th>95</th>\n",
190
+ " <td>772548.37</td>\n",
191
+ " <td>2.0</td>\n",
192
+ " <td>-2.0</td>\n",
193
+ " <td>0.0</td>\n",
194
+ " <td>0.0</td>\n",
195
+ " <td>6.0</td>\n",
196
+ " <td>3.0</td>\n",
197
+ " </tr>\n",
198
+ " <tr>\n",
199
+ " <th>96</th>\n",
200
+ " <td>780417.84</td>\n",
201
+ " <td>7.0</td>\n",
202
+ " <td>6.0</td>\n",
203
+ " <td>7.0</td>\n",
204
+ " <td>5.0</td>\n",
205
+ " <td>3.0</td>\n",
206
+ " <td>-2.0</td>\n",
207
+ " </tr>\n",
208
+ " <tr>\n",
209
+ " <th>97</th>\n",
210
+ " <td>169988.27</td>\n",
211
+ " <td>4.0</td>\n",
212
+ " <td>2.0</td>\n",
213
+ " <td>3.0</td>\n",
214
+ " <td>-2.0</td>\n",
215
+ " <td>-1.0</td>\n",
216
+ " <td>4.0</td>\n",
217
+ " </tr>\n",
218
+ " <tr>\n",
219
+ " <th>98</th>\n",
220
+ " <td>547083.91</td>\n",
221
+ " <td>4.0</td>\n",
222
+ " <td>1.0</td>\n",
223
+ " <td>4.0</td>\n",
224
+ " <td>-1.0</td>\n",
225
+ " <td>5.0</td>\n",
226
+ " <td>2.0</td>\n",
227
+ " </tr>\n",
228
+ " <tr>\n",
229
+ " <th>99</th>\n",
230
+ " <td>377622.60</td>\n",
231
+ " <td>-2.0</td>\n",
232
+ " <td>5.0</td>\n",
233
+ " <td>3.0</td>\n",
234
+ " <td>7.0</td>\n",
235
+ " <td>3.0</td>\n",
236
+ " <td>7.0</td>\n",
237
+ " </tr>\n",
238
+ " </tbody>\n",
239
+ "</table>\n",
240
+ "<p>100 rows × 7 columns</p>\n",
241
+ "</div>"
242
+ ],
243
+ "text/plain": [
244
+ " limit_balance pay_1 pay_2 pay_3 pay_4 pay_5 pay_6\n",
245
+ "0 130604.35 0.0 7.0 5.0 5.0 2.0 6.0\n",
246
+ "1 248861.72 2.0 -2.0 3.0 0.0 -2.0 6.0\n",
247
+ "2 59008.77 2.0 -2.0 3.0 5.0 2.0 7.0\n",
248
+ "3 373289.67 0.0 7.0 2.0 2.0 -2.0 0.0\n",
249
+ "4 669850.17 1.0 4.0 -1.0 0.0 7.0 7.0\n",
250
+ ".. ... ... ... ... ... ... ...\n",
251
+ "95 772548.37 2.0 -2.0 0.0 0.0 6.0 3.0\n",
252
+ "96 780417.84 7.0 6.0 7.0 5.0 3.0 -2.0\n",
253
+ "97 169988.27 4.0 2.0 3.0 -2.0 -1.0 4.0\n",
254
+ "98 547083.91 4.0 1.0 4.0 -1.0 5.0 2.0\n",
255
+ "99 377622.60 -2.0 5.0 3.0 7.0 3.0 7.0\n",
256
+ "\n",
257
+ "[100 rows x 7 columns]"
258
+ ]
259
+ },
260
+ "execution_count": 4,
261
+ "metadata": {},
262
+ "output_type": "execute_result"
263
+ }
264
+ ],
265
+ "source": [
266
+ "df_inf"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": 5,
272
+ "metadata": {},
273
+ "outputs": [
274
+ {
275
+ "name": "stdout",
276
+ "output_type": "stream",
277
+ "text": [
278
+ " pay_1 pay_2 pay_3 pay_4 pay_5 pay_6\n",
279
+ "0 0.0 7.0 5.0 5.0 2.0 6.0\n",
280
+ "1 2.0 -2.0 3.0 0.0 -2.0 6.0\n",
281
+ "2 2.0 -2.0 3.0 5.0 2.0 7.0\n",
282
+ "3 0.0 7.0 2.0 2.0 -2.0 0.0\n",
283
+ "4 1.0 4.0 -1.0 0.0 7.0 7.0\n",
284
+ ".. ... ... ... ... ... ...\n",
285
+ "95 2.0 -2.0 0.0 0.0 6.0 3.0\n",
286
+ "96 7.0 6.0 7.0 5.0 3.0 -2.0\n",
287
+ "97 4.0 2.0 3.0 -2.0 -1.0 4.0\n",
288
+ "98 4.0 1.0 4.0 -1.0 5.0 2.0\n",
289
+ "99 -2.0 5.0 3.0 7.0 3.0 7.0\n",
290
+ "\n",
291
+ "[100 rows x 6 columns]\n",
292
+ " limit_balance\n",
293
+ "0 130604.35\n",
294
+ "1 248861.72\n",
295
+ "2 59008.77\n",
296
+ "3 373289.67\n",
297
+ "4 669850.17\n",
298
+ ".. ...\n",
299
+ "95 772548.37\n",
300
+ "96 780417.84\n",
301
+ "97 169988.27\n",
302
+ "98 547083.91\n",
303
+ "99 377622.60\n",
304
+ "\n",
305
+ "[100 rows x 1 columns]\n"
306
+ ]
307
+ }
308
+ ],
309
+ "source": [
310
+ "#membagi 2 kolom numerical dan kategorical\n",
311
+ "df_inf_num = df_inf[['limit_balance']]\n",
312
+ "df_inf_cat= df_inf[['pay_1', 'pay_2', 'pay_3', 'pay_4','pay_5','pay_6']]\n",
313
+ "\n",
314
+ "print(df_inf_cat)\n",
315
+ "print(df_inf_num)"
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "markdown",
320
+ "metadata": {},
321
+ "source": [
322
+ "## Scaling"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": 6,
328
+ "metadata": {},
329
+ "outputs": [
330
+ {
331
+ "data": {
332
+ "text/html": [
333
+ "<div>\n",
334
+ "<style scoped>\n",
335
+ " .dataframe tbody tr th:only-of-type {\n",
336
+ " vertical-align: middle;\n",
337
+ " }\n",
338
+ "\n",
339
+ " .dataframe tbody tr th {\n",
340
+ " vertical-align: top;\n",
341
+ " }\n",
342
+ "\n",
343
+ " .dataframe thead th {\n",
344
+ " text-align: right;\n",
345
+ " }\n",
346
+ "</style>\n",
347
+ "<table border=\"1\" class=\"dataframe\">\n",
348
+ " <thead>\n",
349
+ " <tr style=\"text-align: right;\">\n",
350
+ " <th></th>\n",
351
+ " <th>0</th>\n",
352
+ " </tr>\n",
353
+ " </thead>\n",
354
+ " <tbody>\n",
355
+ " <tr>\n",
356
+ " <th>0</th>\n",
357
+ " <td>0.241209</td>\n",
358
+ " </tr>\n",
359
+ " <tr>\n",
360
+ " <th>1</th>\n",
361
+ " <td>0.477723</td>\n",
362
+ " </tr>\n",
363
+ " <tr>\n",
364
+ " <th>2</th>\n",
365
+ " <td>0.098018</td>\n",
366
+ " </tr>\n",
367
+ " <tr>\n",
368
+ " <th>3</th>\n",
369
+ " <td>0.726579</td>\n",
370
+ " </tr>\n",
371
+ " <tr>\n",
372
+ " <th>4</th>\n",
373
+ " <td>1.319700</td>\n",
374
+ " </tr>\n",
375
+ " <tr>\n",
376
+ " <th>...</th>\n",
377
+ " <td>...</td>\n",
378
+ " </tr>\n",
379
+ " <tr>\n",
380
+ " <th>95</th>\n",
381
+ " <td>1.525097</td>\n",
382
+ " </tr>\n",
383
+ " <tr>\n",
384
+ " <th>96</th>\n",
385
+ " <td>1.540836</td>\n",
386
+ " </tr>\n",
387
+ " <tr>\n",
388
+ " <th>97</th>\n",
389
+ " <td>0.319977</td>\n",
390
+ " </tr>\n",
391
+ " <tr>\n",
392
+ " <th>98</th>\n",
393
+ " <td>1.074168</td>\n",
394
+ " </tr>\n",
395
+ " <tr>\n",
396
+ " <th>99</th>\n",
397
+ " <td>0.735245</td>\n",
398
+ " </tr>\n",
399
+ " </tbody>\n",
400
+ "</table>\n",
401
+ "<p>100 rows × 1 columns</p>\n",
402
+ "</div>"
403
+ ],
404
+ "text/plain": [
405
+ " 0\n",
406
+ "0 0.241209\n",
407
+ "1 0.477723\n",
408
+ "2 0.098018\n",
409
+ "3 0.726579\n",
410
+ "4 1.319700\n",
411
+ ".. ...\n",
412
+ "95 1.525097\n",
413
+ "96 1.540836\n",
414
+ "97 0.319977\n",
415
+ "98 1.074168\n",
416
+ "99 0.735245\n",
417
+ "\n",
418
+ "[100 rows x 1 columns]"
419
+ ]
420
+ },
421
+ "execution_count": 6,
422
+ "metadata": {},
423
+ "output_type": "execute_result"
424
+ }
425
+ ],
426
+ "source": [
427
+ "df_inf_num_scaled = scaler.transform(df_inf_num)\n",
428
+ "df_inf_num_scaled=pd.DataFrame(df_inf_num_scaled)\n",
429
+ "df_inf_num_scaled\n"
430
+ ]
431
+ },
432
+ {
433
+ "cell_type": "markdown",
434
+ "metadata": {},
435
+ "source": [
436
+ "## Menggabungkan 2 kolom numerikal yang sudah di scaling dan kolom kategorical"
437
+ ]
438
+ },
439
+ {
440
+ "cell_type": "code",
441
+ "execution_count": 7,
442
+ "metadata": {},
443
+ "outputs": [
444
+ {
445
+ "data": {
446
+ "text/plain": [
447
+ "array([[ 0.2412087 , 0. , 7. , 5. , 5. ,\n",
448
+ " 2. , 6. ],\n",
449
+ " [ 0.47772344, 2. , -2. , 3. , 0. ,\n",
450
+ " -2. , 6. ],\n",
451
+ " [ 0.09801754, 2. , -2. , 3. , 5. ,\n",
452
+ " 2. , 7. ],\n",
453
+ " [ 0.72657934, 0. , 7. , 2. , 2. ,\n",
454
+ " -2. , 0. ],\n",
455
+ " [ 1.31970034, 1. , 4. , -1. , 0. ,\n",
456
+ " 7. , 7. ],\n",
457
+ " [ 1.46465534, 6. , 0. , 7. , 6. ,\n",
458
+ " 4. , 3. ],\n",
459
+ " [ 1.1486426 , 8. , -1. , -2. , 5. ,\n",
460
+ " 5. , 3. ],\n",
461
+ " [ 1.21422404, 8. , 6. , 5. , 4. ,\n",
462
+ " 4. , 3. ],\n",
463
+ " [ 0.425344 , 4. , 4. , 5. , 2. ,\n",
464
+ " 0. , 3. ],\n",
465
+ " [ 1.01756628, 2. , 2. , 3. , 2. ,\n",
466
+ " 2. , 7. ],\n",
467
+ " [ 0.14752974, 1. , 3. , 7. , 5. ,\n",
468
+ " 2. , 3. ],\n",
469
+ " [ 0.1259037 , 8. , 3. , 0. , 3. ,\n",
470
+ " 3. , 0. ],\n",
471
+ " [ 0.93158974, 8. , 5. , 5. , 8. ,\n",
472
+ " 4. , 2. ],\n",
473
+ " [ 0.54247806, 5. , 1. , 4. , 3. ,\n",
474
+ " 7. , 6. ],\n",
475
+ " [ 1.56242432, 7. , -2. , 3. , 6. ,\n",
476
+ " 0. , -2. ],\n",
477
+ " [ 0.98982766, -1. , 7. , 4. , 0. ,\n",
478
+ " 5. , -1. ],\n",
479
+ " [ 1.07721126, 6. , -1. , 5. , 4. ,\n",
480
+ " 4. , 6. ],\n",
481
+ " [ 0.87256576, 8. , -1. , 4. , 5. ,\n",
482
+ " -2. , 2. ],\n",
483
+ " [ 0.4247989 , -2. , 2. , 3. , 8. ,\n",
484
+ " 7. , -1. ],\n",
485
+ " [ 0.58974984, 5. , 1. , 0. , 5. ,\n",
486
+ " 6. , 3. ],\n",
487
+ " [ 0.3522264 , 1. , 0. , 4. , 5. ,\n",
488
+ " 3. , -1. ],\n",
489
+ " [ 0.2945793 , 4. , 5. , 5. , -2. ,\n",
490
+ " 7. , 3. ],\n",
491
+ " [ 0.61722398, -1. , 6. , 5. , -1. ,\n",
492
+ " -1. , 3. ],\n",
493
+ " [ 0.3051966 , 5. , 4. , 5. , 0. ,\n",
494
+ " 3. , 2. ],\n",
495
+ " [ 0.96523928, 0. , 2. , -2. , 6. ,\n",
496
+ " 3. , 4. ],\n",
497
+ " [ 1.39483734, 3. , -2. , 4. , 2. ,\n",
498
+ " 4. , 4. ],\n",
499
+ " [ 0.98329534, 6. , -2. , 7. , 5. ,\n",
500
+ " 4. , 7. ],\n",
501
+ " [ 0.39992794, 7. , 0. , -2. , 7. ,\n",
502
+ " 5. , -1. ],\n",
503
+ " [ 0.28428988, 4. , 6. , 3. , -2. ,\n",
504
+ " 6. , 7. ],\n",
505
+ " [ 1.28991906, -2. , -2. , 2. , 8. ,\n",
506
+ " 6. , 2. ],\n",
507
+ " [ 0.35608716, 4. , 4. , 4. , 6. ,\n",
508
+ " -1. , 6. ],\n",
509
+ " [ 0.81663428, 4. , -2. , 6. , -1. ,\n",
510
+ " 0. , -1. ],\n",
511
+ " [ 0.8192234 , -2. , -2. , 6. , 5. ,\n",
512
+ " 2. , 0. ],\n",
513
+ " [ 0.9485924 , 4. , 3. , 7. , 3. ,\n",
514
+ " -2. , 7. ],\n",
515
+ " [ 0.84154036, 1. , 4. , -1. , -1. ,\n",
516
+ " 0. , 4. ],\n",
517
+ " [ 0.02102988, 6. , 7. , 4. , -1. ,\n",
518
+ " 0. , 7. ],\n",
519
+ " [ 0.82807368, 3. , 2. , -1. , 6. ,\n",
520
+ " 2. , 3. ],\n",
521
+ " [ 1.41549784, 4. , -1. , 3. , 8. ,\n",
522
+ " 0. , -1. ],\n",
523
+ " [ 1.21644404, -1. , 2. , -1. , 6. ,\n",
524
+ " 6. , -2. ],\n",
525
+ " [ 0.19410568, 2. , 6. , 5. , 4. ,\n",
526
+ " 3. , 3. ],\n",
527
+ " [ 0.46747886, 0. , 3. , -2. , 5. ,\n",
528
+ " 4. , 0. ],\n",
529
+ " [ 0.96699726, 4. , 1. , 7. , 5. ,\n",
530
+ " 0. , -2. ],\n",
531
+ " [ 1.14729824, -2. , 0. , 4. , 6. ,\n",
532
+ " 2. , 2. ],\n",
533
+ " [ 0.732326 , -1. , 6. , -1. , 3. ,\n",
534
+ " 5. , 0. ],\n",
535
+ " [ 1.21519438, 8. , 7. , 6. , 0. ,\n",
536
+ " 6. , 4. ],\n",
537
+ " [ 0.30277702, 6. , 6. , 5. , 2. ,\n",
538
+ " 0. , 3. ],\n",
539
+ " [ 0.88142942, 3. , 0. , -1. , -1. ,\n",
540
+ " 6. , 6. ],\n",
541
+ " [ 0.87022948, 7. , 6. , -1. , 8. ,\n",
542
+ " 2. , 6. ],\n",
543
+ " [ 0.74611628, 8. , 1. , 4. , 5. ,\n",
544
+ " 7. , 7. ],\n",
545
+ " [ 1.25117824, 1. , 4. , 2. , 6. ,\n",
546
+ " 0. , 3. ],\n",
547
+ " [ 0.1820945 , 8. , 0. , 3. , 6. ,\n",
548
+ " 5. , 4. ],\n",
549
+ " [ 1.07646016, 8. , 6. , 3. , 2. ,\n",
550
+ " 5. , 0. ],\n",
551
+ " [ 0.5724871 , -1. , 1. , 5. , 7. ,\n",
552
+ " 2. , 6. ],\n",
553
+ " [ 0.54385006, 7. , 7. , 4. , 7. ,\n",
554
+ " 2. , 2. ],\n",
555
+ " [ 0.71023962, -2. , 2. , 4. , -1. ,\n",
556
+ " 2. , 6. ],\n",
557
+ " [ 0.04256878, 8. , 0. , 7. , 7. ,\n",
558
+ " 0. , 2. ],\n",
559
+ " [ 0.65609134, 4. , 0. , 7. , -2. ,\n",
560
+ " 4. , 4. ],\n",
561
+ " [ 1.4571261 , 0. , -2. , 4. , 2. ,\n",
562
+ " -2. , 2. ],\n",
563
+ " [ 0.1441048 , 0. , 0. , 2. , 4. ,\n",
564
+ " 4. , -2. ],\n",
565
+ " [ 0.49789242, -2. , 3. , 6. , 6. ,\n",
566
+ " 7. , 0. ],\n",
567
+ " [ 0.83427514, 5. , -1. , -2. , 0. ,\n",
568
+ " 7. , 0. ],\n",
569
+ " [ 0.518338 , 6. , 2. , 3. , 8. ,\n",
570
+ " 3. , 0. ],\n",
571
+ " [ 0.70928656, 5. , 1. , 3. , -1. ,\n",
572
+ " 6. , -2. ],\n",
573
+ " [ 0.02580838, 2. , 2. , 0. , -1. ,\n",
574
+ " 7. , 3. ],\n",
575
+ " [ 0.1533025 , 5. , 1. , 7. , 5. ,\n",
576
+ " 7. , 6. ],\n",
577
+ " [ 1.09428994, 3. , 5. , 4. , -2. ,\n",
578
+ " -1. , 4. ],\n",
579
+ " [ 1.32079058, 2. , 0. , 3. , 2. ,\n",
580
+ " 5. , 2. ],\n",
581
+ " [ 0.67042874, 2. , 2. , -1. , 8. ,\n",
582
+ " -1. , -1. ],\n",
583
+ " [ 1.3410634 , 1. , 6. , 7. , 8. ,\n",
584
+ " 6. , -1. ],\n",
585
+ " [ 0.86393012, 5. , 5. , -2. , 7. ,\n",
586
+ " 0. , 4. ],\n",
587
+ " [ 0.55948346, 4. , 5. , 6. , 6. ,\n",
588
+ " -2. , 6. ],\n",
589
+ " [ 1.1490545 , 1. , 2. , 5. , 4. ,\n",
590
+ " 6. , -2. ],\n",
591
+ " [ 0.14828566, 3. , -2. , 5. , 0. ,\n",
592
+ " 0. , 4. ],\n",
593
+ " [ 1.4107169 , 3. , 4. , -2. , 8. ,\n",
594
+ " 0. , 0. ],\n",
595
+ " [ 0.53128808, 4. , 4. , 4. , -2. ,\n",
596
+ " 7. , 4. ],\n",
597
+ " [ 1.40909564, -2. , 0. , 7. , -2. ,\n",
598
+ " 7. , 3. ],\n",
599
+ " [ 0.46902142, 7. , 5. , 6. , 6. ,\n",
600
+ " 5. , 2. ],\n",
601
+ " [ 0.4766043 , 1. , 7. , 5. , 5. ,\n",
602
+ " 4. , 3. ],\n",
603
+ " [ 1.27386016, 4. , 1. , 6. , -2. ,\n",
604
+ " 7. , -1. ],\n",
605
+ " [ 1.32342374, 8. , 5. , 7. , -2. ,\n",
606
+ " 6. , 2. ],\n",
607
+ " [ 1.0021645 , 5. , 1. , -1. , 4. ,\n",
608
+ " 5. , 4. ],\n",
609
+ " [ 0.49189714, -1. , -1. , 3. , 3. ,\n",
610
+ " 2. , 7. ],\n",
611
+ " [ 0.04652876, 1. , -2. , 7. , 7. ,\n",
612
+ " 5. , 0. ],\n",
613
+ " [ 0.63164816, 5. , 1. , -2. , 3. ,\n",
614
+ " -1. , -2. ],\n",
615
+ " [ 0.81871406, 5. , 6. , 7. , 2. ,\n",
616
+ " -1. , 7. ],\n",
617
+ " [ 0.01167376, 7. , 1. , -2. , 4. ,\n",
618
+ " 0. , 4. ],\n",
619
+ " [ 1.22441748, 8. , -1. , -2. , 8. ,\n",
620
+ " 0. , 6. ],\n",
621
+ " [ 1.35006448, -2. , 2. , 5. , 3. ,\n",
622
+ " 7. , -2. ],\n",
623
+ " [ 0.20782186, 1. , 6. , 2. , 8. ,\n",
624
+ " 6. , 4. ],\n",
625
+ " [ 0.45452436, 1. , 5. , 3. , 8. ,\n",
626
+ " 3. , -1. ],\n",
627
+ " [ 0.51601032, 5. , 7. , -1. , 7. ,\n",
628
+ " 7. , -1. ],\n",
629
+ " [ 1.4323641 , -1. , 6. , -2. , -2. ,\n",
630
+ " 2. , 7. ],\n",
631
+ " [ 1.57930408, 1. , -1. , 5. , 6. ,\n",
632
+ " 2. , -1. ],\n",
633
+ " [ 1.21799718, 8. , 6. , 6. , 2. ,\n",
634
+ " 4. , 3. ],\n",
635
+ " [ 0.06769908, 0. , 2. , 7. , 7. ,\n",
636
+ " 3. , 3. ],\n",
637
+ " [ 1.52509674, 2. , -2. , 0. , 0. ,\n",
638
+ " 6. , 3. ],\n",
639
+ " [ 1.54083568, 7. , 6. , 7. , 5. ,\n",
640
+ " 3. , -2. ],\n",
641
+ " [ 0.31997654, 4. , 2. , 3. , -2. ,\n",
642
+ " -1. , 4. ],\n",
643
+ " [ 1.07416782, 4. , 1. , 4. , -1. ,\n",
644
+ " 5. , 2. ],\n",
645
+ " [ 0.7352452 , -2. , 5. , 3. , 7. ,\n",
646
+ " 3. , 7. ]])"
647
+ ]
648
+ },
649
+ "execution_count": 7,
650
+ "metadata": {},
651
+ "output_type": "execute_result"
652
+ }
653
+ ],
654
+ "source": [
655
+ "df_inf_final = np.concatenate([df_inf_num_scaled,df_inf_cat],axis = 1)\n",
656
+ "df_inf_final"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": 8,
662
+ "metadata": {},
663
+ "outputs": [
664
+ {
665
+ "data": {
666
+ "text/plain": [
667
+ "array([0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,\n",
668
+ " 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,\n",
669
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,\n",
670
+ " 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,\n",
671
+ " 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], dtype=int64)"
672
+ ]
673
+ },
674
+ "execution_count": 8,
675
+ "metadata": {},
676
+ "output_type": "execute_result"
677
+ }
678
+ ],
679
+ "source": [
680
+ "#membuat kolom predict \n",
681
+ "y_pred_inf = model_knn.predict(df_inf_final)\n",
682
+ "y_pred_inf"
683
+ ]
684
+ },
685
+ {
686
+ "cell_type": "code",
687
+ "execution_count": 9,
688
+ "metadata": {},
689
+ "outputs": [
690
+ {
691
+ "data": {
692
+ "text/html": [
693
+ "<div>\n",
694
+ "<style scoped>\n",
695
+ " .dataframe tbody tr th:only-of-type {\n",
696
+ " vertical-align: middle;\n",
697
+ " }\n",
698
+ "\n",
699
+ " .dataframe tbody tr th {\n",
700
+ " vertical-align: top;\n",
701
+ " }\n",
702
+ "\n",
703
+ " .dataframe thead th {\n",
704
+ " text-align: right;\n",
705
+ " }\n",
706
+ "</style>\n",
707
+ "<table border=\"1\" class=\"dataframe\">\n",
708
+ " <thead>\n",
709
+ " <tr style=\"text-align: right;\">\n",
710
+ " <th></th>\n",
711
+ " <th>Def_payment - Prediction</th>\n",
712
+ " </tr>\n",
713
+ " </thead>\n",
714
+ " <tbody>\n",
715
+ " <tr>\n",
716
+ " <th>0</th>\n",
717
+ " <td>0</td>\n",
718
+ " </tr>\n",
719
+ " <tr>\n",
720
+ " <th>1</th>\n",
721
+ " <td>1</td>\n",
722
+ " </tr>\n",
723
+ " <tr>\n",
724
+ " <th>2</th>\n",
725
+ " <td>0</td>\n",
726
+ " </tr>\n",
727
+ " <tr>\n",
728
+ " <th>3</th>\n",
729
+ " <td>1</td>\n",
730
+ " </tr>\n",
731
+ " <tr>\n",
732
+ " <th>4</th>\n",
733
+ " <td>1</td>\n",
734
+ " </tr>\n",
735
+ " <tr>\n",
736
+ " <th>...</th>\n",
737
+ " <td>...</td>\n",
738
+ " </tr>\n",
739
+ " <tr>\n",
740
+ " <th>95</th>\n",
741
+ " <td>1</td>\n",
742
+ " </tr>\n",
743
+ " <tr>\n",
744
+ " <th>96</th>\n",
745
+ " <td>1</td>\n",
746
+ " </tr>\n",
747
+ " <tr>\n",
748
+ " <th>97</th>\n",
749
+ " <td>1</td>\n",
750
+ " </tr>\n",
751
+ " <tr>\n",
752
+ " <th>98</th>\n",
753
+ " <td>1</td>\n",
754
+ " </tr>\n",
755
+ " <tr>\n",
756
+ " <th>99</th>\n",
757
+ " <td>1</td>\n",
758
+ " </tr>\n",
759
+ " </tbody>\n",
760
+ "</table>\n",
761
+ "<p>100 rows × 1 columns</p>\n",
762
+ "</div>"
763
+ ],
764
+ "text/plain": [
765
+ " Def_payment - Prediction\n",
766
+ "0 0\n",
767
+ "1 1\n",
768
+ "2 0\n",
769
+ "3 1\n",
770
+ "4 1\n",
771
+ ".. ...\n",
772
+ "95 1\n",
773
+ "96 1\n",
774
+ "97 1\n",
775
+ "98 1\n",
776
+ "99 1\n",
777
+ "\n",
778
+ "[100 rows x 1 columns]"
779
+ ]
780
+ },
781
+ "execution_count": 9,
782
+ "metadata": {},
783
+ "output_type": "execute_result"
784
+ }
785
+ ],
786
+ "source": [
787
+ "#membuat kolom predict kedalam data frame dan memberi nama kolomnya dengan 'Def_payment - Prediction'\n",
788
+ "y_pred_inf_df = pd.DataFrame(y_pred_inf, columns=['Def_payment - Prediction'])\n",
789
+ "y_pred_inf_df"
790
+ ]
791
+ },
792
+ {
793
+ "cell_type": "code",
794
+ "execution_count": 10,
795
+ "metadata": {},
796
+ "outputs": [
797
+ {
798
+ "data": {
799
+ "text/html": [
800
+ "<div>\n",
801
+ "<style scoped>\n",
802
+ " .dataframe tbody tr th:only-of-type {\n",
803
+ " vertical-align: middle;\n",
804
+ " }\n",
805
+ "\n",
806
+ " .dataframe tbody tr th {\n",
807
+ " vertical-align: top;\n",
808
+ " }\n",
809
+ "\n",
810
+ " .dataframe thead th {\n",
811
+ " text-align: right;\n",
812
+ " }\n",
813
+ "</style>\n",
814
+ "<table border=\"1\" class=\"dataframe\">\n",
815
+ " <thead>\n",
816
+ " <tr style=\"text-align: right;\">\n",
817
+ " <th></th>\n",
818
+ " <th>limit_balance</th>\n",
819
+ " <th>pay_1</th>\n",
820
+ " <th>pay_2</th>\n",
821
+ " <th>pay_3</th>\n",
822
+ " <th>pay_4</th>\n",
823
+ " <th>pay_5</th>\n",
824
+ " <th>pay_6</th>\n",
825
+ " <th>Def_payment - Prediction</th>\n",
826
+ " </tr>\n",
827
+ " </thead>\n",
828
+ " <tbody>\n",
829
+ " <tr>\n",
830
+ " <th>0</th>\n",
831
+ " <td>130604.35</td>\n",
832
+ " <td>0.0</td>\n",
833
+ " <td>7.0</td>\n",
834
+ " <td>5.0</td>\n",
835
+ " <td>5.0</td>\n",
836
+ " <td>2.0</td>\n",
837
+ " <td>6.0</td>\n",
838
+ " <td>0</td>\n",
839
+ " </tr>\n",
840
+ " <tr>\n",
841
+ " <th>1</th>\n",
842
+ " <td>248861.72</td>\n",
843
+ " <td>2.0</td>\n",
844
+ " <td>-2.0</td>\n",
845
+ " <td>3.0</td>\n",
846
+ " <td>0.0</td>\n",
847
+ " <td>-2.0</td>\n",
848
+ " <td>6.0</td>\n",
849
+ " <td>1</td>\n",
850
+ " </tr>\n",
851
+ " <tr>\n",
852
+ " <th>2</th>\n",
853
+ " <td>59008.77</td>\n",
854
+ " <td>2.0</td>\n",
855
+ " <td>-2.0</td>\n",
856
+ " <td>3.0</td>\n",
857
+ " <td>5.0</td>\n",
858
+ " <td>2.0</td>\n",
859
+ " <td>7.0</td>\n",
860
+ " <td>0</td>\n",
861
+ " </tr>\n",
862
+ " <tr>\n",
863
+ " <th>3</th>\n",
864
+ " <td>373289.67</td>\n",
865
+ " <td>0.0</td>\n",
866
+ " <td>7.0</td>\n",
867
+ " <td>2.0</td>\n",
868
+ " <td>2.0</td>\n",
869
+ " <td>-2.0</td>\n",
870
+ " <td>0.0</td>\n",
871
+ " <td>1</td>\n",
872
+ " </tr>\n",
873
+ " <tr>\n",
874
+ " <th>4</th>\n",
875
+ " <td>669850.17</td>\n",
876
+ " <td>1.0</td>\n",
877
+ " <td>4.0</td>\n",
878
+ " <td>-1.0</td>\n",
879
+ " <td>0.0</td>\n",
880
+ " <td>7.0</td>\n",
881
+ " <td>7.0</td>\n",
882
+ " <td>1</td>\n",
883
+ " </tr>\n",
884
+ " <tr>\n",
885
+ " <th>...</th>\n",
886
+ " <td>...</td>\n",
887
+ " <td>...</td>\n",
888
+ " <td>...</td>\n",
889
+ " <td>...</td>\n",
890
+ " <td>...</td>\n",
891
+ " <td>...</td>\n",
892
+ " <td>...</td>\n",
893
+ " <td>...</td>\n",
894
+ " </tr>\n",
895
+ " <tr>\n",
896
+ " <th>95</th>\n",
897
+ " <td>772548.37</td>\n",
898
+ " <td>2.0</td>\n",
899
+ " <td>-2.0</td>\n",
900
+ " <td>0.0</td>\n",
901
+ " <td>0.0</td>\n",
902
+ " <td>6.0</td>\n",
903
+ " <td>3.0</td>\n",
904
+ " <td>1</td>\n",
905
+ " </tr>\n",
906
+ " <tr>\n",
907
+ " <th>96</th>\n",
908
+ " <td>780417.84</td>\n",
909
+ " <td>7.0</td>\n",
910
+ " <td>6.0</td>\n",
911
+ " <td>7.0</td>\n",
912
+ " <td>5.0</td>\n",
913
+ " <td>3.0</td>\n",
914
+ " <td>-2.0</td>\n",
915
+ " <td>1</td>\n",
916
+ " </tr>\n",
917
+ " <tr>\n",
918
+ " <th>97</th>\n",
919
+ " <td>169988.27</td>\n",
920
+ " <td>4.0</td>\n",
921
+ " <td>2.0</td>\n",
922
+ " <td>3.0</td>\n",
923
+ " <td>-2.0</td>\n",
924
+ " <td>-1.0</td>\n",
925
+ " <td>4.0</td>\n",
926
+ " <td>1</td>\n",
927
+ " </tr>\n",
928
+ " <tr>\n",
929
+ " <th>98</th>\n",
930
+ " <td>547083.91</td>\n",
931
+ " <td>4.0</td>\n",
932
+ " <td>1.0</td>\n",
933
+ " <td>4.0</td>\n",
934
+ " <td>-1.0</td>\n",
935
+ " <td>5.0</td>\n",
936
+ " <td>2.0</td>\n",
937
+ " <td>1</td>\n",
938
+ " </tr>\n",
939
+ " <tr>\n",
940
+ " <th>99</th>\n",
941
+ " <td>377622.60</td>\n",
942
+ " <td>-2.0</td>\n",
943
+ " <td>5.0</td>\n",
944
+ " <td>3.0</td>\n",
945
+ " <td>7.0</td>\n",
946
+ " <td>3.0</td>\n",
947
+ " <td>7.0</td>\n",
948
+ " <td>1</td>\n",
949
+ " </tr>\n",
950
+ " </tbody>\n",
951
+ "</table>\n",
952
+ "<p>100 rows × 8 columns</p>\n",
953
+ "</div>"
954
+ ],
955
+ "text/plain": [
956
+ " limit_balance pay_1 pay_2 pay_3 pay_4 pay_5 pay_6 \\\n",
957
+ "0 130604.35 0.0 7.0 5.0 5.0 2.0 6.0 \n",
958
+ "1 248861.72 2.0 -2.0 3.0 0.0 -2.0 6.0 \n",
959
+ "2 59008.77 2.0 -2.0 3.0 5.0 2.0 7.0 \n",
960
+ "3 373289.67 0.0 7.0 2.0 2.0 -2.0 0.0 \n",
961
+ "4 669850.17 1.0 4.0 -1.0 0.0 7.0 7.0 \n",
962
+ ".. ... ... ... ... ... ... ... \n",
963
+ "95 772548.37 2.0 -2.0 0.0 0.0 6.0 3.0 \n",
964
+ "96 780417.84 7.0 6.0 7.0 5.0 3.0 -2.0 \n",
965
+ "97 169988.27 4.0 2.0 3.0 -2.0 -1.0 4.0 \n",
966
+ "98 547083.91 4.0 1.0 4.0 -1.0 5.0 2.0 \n",
967
+ "99 377622.60 -2.0 5.0 3.0 7.0 3.0 7.0 \n",
968
+ "\n",
969
+ " Def_payment - Prediction \n",
970
+ "0 0 \n",
971
+ "1 1 \n",
972
+ "2 0 \n",
973
+ "3 1 \n",
974
+ "4 1 \n",
975
+ ".. ... \n",
976
+ "95 1 \n",
977
+ "96 1 \n",
978
+ "97 1 \n",
979
+ "98 1 \n",
980
+ "99 1 \n",
981
+ "\n",
982
+ "[100 rows x 8 columns]"
983
+ ]
984
+ },
985
+ "execution_count": 10,
986
+ "metadata": {},
987
+ "output_type": "execute_result"
988
+ }
989
+ ],
990
+ "source": [
991
+ "#membuat variabel baru dengan menggabungkan 2 tabel yaitu df_inf dan y _preditc inf\n",
992
+ "tabel_inference = pd.concat([df_inf, y_pred_inf_df], axis=1)\n",
993
+ "tabel_inference"
994
+ ]
995
+ },
996
+ {
997
+ "cell_type": "markdown",
998
+ "metadata": {},
999
+ "source": [
1000
+ "# xii. Kesimpulan "
1001
+ ]
1002
+ },
1003
+ {
1004
+ "cell_type": "markdown",
1005
+ "metadata": {},
1006
+ "source": [
1007
+ "- Saya memilih f1 score sebagai parameter score karena menurut saya untuk meminimalisir false postive dan false negative penting .\n",
1008
+ "\n",
1009
+ "- Dari hasil std yang mendekati 0 dan berada di bawah mean berarti model KNN yang default nilai STD 0.03 sehingga modelnya best fit . Jadi bisa dikatakan bahwa model knn default memiliki konsistensi yang bagus akan tetapi validasi hasil predictnya kurang bagus dikarenakan hanya sekitar 48%. Dari sisi bisnis model ini belum bisa digunakan karena hasil predictnya yang masih kurang bagus kurang dari 50% sehingga bisa menimbulkan hasil predict yang salah dimana bisa menyebabkan perusahaan rugi.\n",
1010
+ "\n",
1011
+ "- Untuk Meningkatkan jumlah user yang bisa bayar ada baiknya untuk penagihan bisa dilakukan ditanggal gajian sekitar 25-31 pada saat gajian karena pada saat tanggal tersebut uang mereka jumlahnya banyak jadi bisa membayar \n"
1012
+ ]
1013
+ },
1014
+ {
1015
+ "cell_type": "markdown",
1016
+ "metadata": {},
1017
+ "source": [
1018
+ "## Conceptual Problems :"
1019
+ ]
1020
+ },
1021
+ {
1022
+ "cell_type": "markdown",
1023
+ "metadata": {},
1024
+ "source": [
1025
+ "1. Apakah yang dimaksud dengan coeficient pada logistic regression?"
1026
+ ]
1027
+ },
1028
+ {
1029
+ "cell_type": "markdown",
1030
+ "metadata": {},
1031
+ "source": [
1032
+ "Yang dimaksud dengan coeficient pada logistic regression adalah nilai bobot atau parameter yang digunakan untuk mengukur hubungan antara variabel independen (fitur) dengan variabel dependen (kelas atau target)"
1033
+ ]
1034
+ },
1035
+ {
1036
+ "cell_type": "markdown",
1037
+ "metadata": {},
1038
+ "source": [
1039
+ "2. Apakah fungsi parameter kernel pada SVM? Jelaskan salah satu kernel yang kalian pahami!"
1040
+ ]
1041
+ },
1042
+ {
1043
+ "cell_type": "markdown",
1044
+ "metadata": {},
1045
+ "source": [
1046
+ "Kernel adalah fungsi matematis yang digunakan untuk mengukur kesamaan antara pasangan data dalam ruang fitur\n",
1047
+ "Salah satu kernelnya adalah linear yang digunakan untuk pemisahan data yang dapat dipisahkan dengan garis lurus."
1048
+ ]
1049
+ },
1050
+ {
1051
+ "cell_type": "markdown",
1052
+ "metadata": {},
1053
+ "source": [
1054
+ "3. Bagaimana cara memilih K yang optimal pada KNN ?"
1055
+ ]
1056
+ },
1057
+ {
1058
+ "cell_type": "markdown",
1059
+ "metadata": {},
1060
+ "source": [
1061
+ "Cara memilih K yang optimal pada KNN adalah dengan membagi data set menjadi 2 yaitu train dan test lalu mendefine model knn. Lalu diolah dengan menggunakan cross validation untuk menentukan parameter mana yang terbaik dan memvariasikan nilai K dan mengamati performa validasi silang, kita bisa menemukan K yang optimal"
1062
+ ]
1063
+ },
1064
+ {
1065
+ "cell_type": "markdown",
1066
+ "metadata": {},
1067
+ "source": [
1068
+ "4. Apa yang dimaksud dengan metrics-metrics berikut : Accuracy, Precision, Recall, F1 Score, dan kapan waktu yang tepat untuk menggunakannya ?\n"
1069
+ ]
1070
+ },
1071
+ {
1072
+ "cell_type": "markdown",
1073
+ "metadata": {},
1074
+ "source": [
1075
+ "Accuracy: Merupakan rasio dari jumlah prediksi yang benar (positif dan negatif) dibandingkan dengan jumlah total sampel. Berguna ketika distribusi kelas seimbang. Namun, bisa menjadi bias jika kelas tidak seimbang.\n",
1076
+ "\n",
1077
+ "Precision: Merupakan rasio dari jumlah prediksi positif yang benar dibandingkan dengan total prediksi positif. Berguna ketika penting untuk menghindari false positive.\n",
1078
+ "\n",
1079
+ "Recall: Merupakan rasio dari jumlah prediksi positif yang benar dibandingkan dengan total jumlah sampel positif yang sebenarnya. Berguna ketika penting untuk menghindari false negative.\n",
1080
+ "\n",
1081
+ "F1 Score: Merupakan ukuran yang mengkombinasikan precision dan recall. Berguna ketika Anda ingin mencari keseimbangan antara precision dan recall."
1082
+ ]
1083
+ }
1084
+ ],
1085
+ "metadata": {
1086
+ "kernelspec": {
1087
+ "display_name": "base",
1088
+ "language": "python",
1089
+ "name": "python3"
1090
+ },
1091
+ "language_info": {
1092
+ "codemirror_mode": {
1093
+ "name": "ipython",
1094
+ "version": 3
1095
+ },
1096
+ "file_extension": ".py",
1097
+ "mimetype": "text/x-python",
1098
+ "name": "python",
1099
+ "nbconvert_exporter": "python",
1100
+ "pygments_lexer": "ipython3",
1101
+ "version": "3.9.17"
1102
+ },
1103
+ "orig_nbformat": 4
1104
+ },
1105
+ "nbformat": 4,
1106
+ "nbformat_minor": 2
1107
+ }
eda.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from PIL import Image
7
+
8
+ def run():
9
+ st.title('Welcome to Explaration Data Analysis')
10
+
11
+ df= pd.read_csv('P1G1_Kenneth Vincentius.csv')
12
+
13
+ st.table(df.head(5))
14
+
15
+ st.title('Presentasi Persentase Default Payment')
16
+
17
+ # Membuat barplot menampilkan persentase orang yang bisa membayar atau tidak
18
+ count_data = df['default_payment_next_month'].value_counts()
19
+ total_data = len(df)
20
+ percentage_data = (count_data / total_data) * 100
21
+
22
+ # Plot bar
23
+ fig_1 = plt.figure()
24
+ sns.barplot(x=percentage_data.index, y=percentage_data.values)
25
+ plt.title('Presentasi Persentase Def Payment')
26
+ plt.xlabel('def_payment')
27
+ plt.ylabel('Persentase (%)')
28
+
29
+ for i in range(len(percentage_data)):
30
+ plt.text(i, percentage_data[i], f'{percentage_data[i]:.2f}%', ha='center', va='bottom')
31
+
32
+ # Tampilkan plot menggunakan Streamlit
33
+ st.pyplot(fig_1)
34
+
35
+ with st.expander('Explanation'):
36
+ st.caption('Hasil : Kita bisa melihat bahwa 78.58 bisa bisa membayar dan 21.42% tidak bisa membayar')
37
+
38
+ st.title("Persentase dari Gender")
39
+
40
+ # Membuat barplot menampilkan persentase defaulting payment berdasarkan jenis kelamin
41
+ def_count = (df.sex.value_counts(normalize=True) * 100)
42
+ fig_2 = plt.figure(figsize=(6, 6))
43
+ def_count.plot.bar()
44
+ plt.xticks(fontsize=12, rotation=0)
45
+ plt.yticks(fontsize=12)
46
+ plt.title("Persentase dari Gender", fontsize=15)
47
+
48
+ # Menampilkan persentase di atas batang
49
+ for x, y in zip([0, 1], def_count):
50
+ plt.text(x, y, '{:.2f}%'.format(y), fontsize=12) # Memformat y menjadi dua angka desimal
51
+
52
+ # Tampilkan plot menggunakan Streamlit
53
+ st.pyplot(fig_2)
54
+
55
+ with st.expander('Explanation'):
56
+ st.caption('Bisa dilihat bahwa terdapat 39.24% di laki-laki=1 dan 60.76% didominasi oleh perempuan=2')
57
+
58
+ # Judul aplikasi
59
+ st.title('Distribusi Usia')
60
+
61
+ # Membuat plot histogram dari distribusi usia
62
+ fig_3, ax = plt.subplots(1, 2, figsize=(12, 6))
63
+
64
+ # Plot histogram
65
+ sns.histplot(df['age'], kde=True, ax=ax[1])
66
+ ax[1].set_title('Distribution of Age')
67
+ ax[1].set_xlabel('Age')
68
+ ax[1].set_ylabel('Frequency')
69
+
70
+ # Tampilkan plot menggunakan Streamlit
71
+ st.pyplot(fig_3)
72
+
73
+ with st.expander('Explanation'):
74
+ st.caption('Bisa dilihat bahwa persebaran umur dari umur 20 sampai 50')
fahmi.png ADDED
model.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ import ast
6
+
7
+ def run():
8
+ st.header("Model Prediction")
9
+ with open('scaler.pkl', 'rb') as file_1:
10
+ scaler = pickle.load(file_1)
11
+
12
+ with open('model_knn.pkl', 'rb') as file_2:
13
+ model_knn = pickle.load(file_2)
14
+
15
+ limit_balance = st.number_input(label='Limit balance nasabah')
16
+ pay_1 = st.selectbox(label='Delay Payment on September 2015',options=[-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0])
17
+ pay_2 = st.selectbox(label='Delay Payment on Agustus 2015',options=[-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0])
18
+ pay_3 = st.selectbox(label='Delay Payment on Juli 2015',options=[-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0])
19
+ pay_4 = st.selectbox(label='Delay Payment on Juni 2015',options=[-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0])
20
+ pay_5 = st.selectbox(label='Delay Payment on May 2015',options=[-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0])
21
+ pay_6 = st.selectbox(label='Delay Payment on April 2015',options=[-2.0,-1.0,0.0,2.0,3.0,4.0,6.0,7.0])
22
+
23
+ df_inf = pd.DataFrame({
24
+
25
+ 'limit_balance': limit_balance,
26
+ 'pay_1': pay_1,
27
+ 'pay_2': pay_2,
28
+ 'pay_3': pay_3,
29
+ 'pay_4': pay_4,
30
+ 'pay_5': pay_5,
31
+ 'pay_6': pay_6,
32
+
33
+ },index=[0])
34
+
35
+ st.table(df_inf)
36
+
37
+
38
+
39
+ if st.button(label='predict'):
40
+ # define data bedasarkan numerik dan kategori
41
+ df_inf_num = df_inf[['limit_balance']]
42
+ df_inf_cat= df_inf[['pay_1', 'pay_2', 'pay_3', 'pay_4','pay_5','pay_6']]
43
+
44
+ df_inf_num_scaled = scaler.transform(df_inf_num)
45
+ df_inf_num_scaled=pd.DataFrame(df_inf_num_scaled)
46
+
47
+ df_inf_final = np.concatenate([df_inf_num_scaled,df_inf_cat],axis = 1)
48
+
49
+ y_pred_inf = model_knn.predict(df_inf_final)
50
+
51
+
52
+ st.write(y_pred_inf[0])
53
+ if y_pred_inf == 0:
54
+ st.write('Nasabah Terprediksi bisa membayar')
55
+ else:
56
+ st.write('Nasabah Terprediksi tidak bisa membayar')
model_knn.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c509e7825b43bb20c105f2ba26282216afba5270925d7eea4f67792854e71e2f
3
+ size 286539
rafif.png ADDED
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99ac1868828d6f52d15e1d982dadb7ca930e5c5c1ef1c0f4ae97b55af9c383a
3
+ size 623