{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# xi. Model Inference" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "import pandas as pd\n", "import numpy as np\n", "import random" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#load model scaler dan model knn yang sudah disimpen dalam bentuk pkl\n", "with open('scaler.pkl', 'rb') as file_1:\n", " scaler = pickle.load(file_1)\n", "\n", "with open('model_knn.pkl', 'rb') as file_2:\n", " model_knn = pickle.load(file_2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Membuat data dummy" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Atur seed di sini\n", "seed_value = 777\n", "random.seed(seed_value)\n", "np.random.seed(seed_value)\n", "\n", "# Jumlah data point yang ingin dibuat\n", "n = 100\n", "\n", "# Membuat data untuk kolom 'distance'\n", "limit_balance = np.round(np.random.uniform(10000, 800000, n), 2)\n", "\n", "# Membuat data untuk kolom 'surge_multiplier'\n", "pay_1 = [-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0]\n", "pay_1 = random.choices(pay_1, k=n)\n", "\n", "pay_2 = [-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0]\n", "pay_2 = random.choices(pay_2, k=n)\n", "\n", "pay_3 = [-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0]\n", "pay_3 = random.choices(pay_3, k=n)\n", "\n", "pay_4 = [-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0]\n", "pay_4 = random.choices(pay_4, k=n)\n", "\n", "pay_5 = [-2.0,-1.0,0.0,2.0,3.0,4.0,5.0,6.0,7.0]\n", "pay_5 = random.choices(pay_5, k=n)\n", "\n", "pay_6 = [-2.0,-1.0,0.0,2.0,3.0,4.0,6.0,7.0]\n", "pay_6 = random.choices(pay_6, k=n)\n", "\n", "# Membuat DataFrame\n", "df_inf = pd.DataFrame({\n", " 'limit_balance': limit_balance,\n", " 'pay_1': pay_1,\n", " 'pay_2': pay_2,\n", " 'pay_3': pay_3,\n", " 'pay_4': pay_4,\n", " 'pay_5': pay_5,\n", " 'pay_6': pay_6,\n", " \n", "})" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
limit_balancepay_1pay_2pay_3pay_4pay_5pay_6
0130604.350.07.05.05.02.06.0
1248861.722.0-2.03.00.0-2.06.0
259008.772.0-2.03.05.02.07.0
3373289.670.07.02.02.0-2.00.0
4669850.171.04.0-1.00.07.07.0
........................
95772548.372.0-2.00.00.06.03.0
96780417.847.06.07.05.03.0-2.0
97169988.274.02.03.0-2.0-1.04.0
98547083.914.01.04.0-1.05.02.0
99377622.60-2.05.03.07.03.07.0
\n", "

100 rows × 7 columns

\n", "
" ], "text/plain": [ " limit_balance pay_1 pay_2 pay_3 pay_4 pay_5 pay_6\n", "0 130604.35 0.0 7.0 5.0 5.0 2.0 6.0\n", "1 248861.72 2.0 -2.0 3.0 0.0 -2.0 6.0\n", "2 59008.77 2.0 -2.0 3.0 5.0 2.0 7.0\n", "3 373289.67 0.0 7.0 2.0 2.0 -2.0 0.0\n", "4 669850.17 1.0 4.0 -1.0 0.0 7.0 7.0\n", ".. ... ... ... ... ... ... ...\n", "95 772548.37 2.0 -2.0 0.0 0.0 6.0 3.0\n", "96 780417.84 7.0 6.0 7.0 5.0 3.0 -2.0\n", "97 169988.27 4.0 2.0 3.0 -2.0 -1.0 4.0\n", "98 547083.91 4.0 1.0 4.0 -1.0 5.0 2.0\n", "99 377622.60 -2.0 5.0 3.0 7.0 3.0 7.0\n", "\n", "[100 rows x 7 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_inf" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " pay_1 pay_2 pay_3 pay_4 pay_5 pay_6\n", "0 0.0 7.0 5.0 5.0 2.0 6.0\n", "1 2.0 -2.0 3.0 0.0 -2.0 6.0\n", "2 2.0 -2.0 3.0 5.0 2.0 7.0\n", "3 0.0 7.0 2.0 2.0 -2.0 0.0\n", "4 1.0 4.0 -1.0 0.0 7.0 7.0\n", ".. ... ... ... ... ... ...\n", "95 2.0 -2.0 0.0 0.0 6.0 3.0\n", "96 7.0 6.0 7.0 5.0 3.0 -2.0\n", "97 4.0 2.0 3.0 -2.0 -1.0 4.0\n", "98 4.0 1.0 4.0 -1.0 5.0 2.0\n", "99 -2.0 5.0 3.0 7.0 3.0 7.0\n", "\n", "[100 rows x 6 columns]\n", " limit_balance\n", "0 130604.35\n", "1 248861.72\n", "2 59008.77\n", "3 373289.67\n", "4 669850.17\n", ".. ...\n", "95 772548.37\n", "96 780417.84\n", "97 169988.27\n", "98 547083.91\n", "99 377622.60\n", "\n", "[100 rows x 1 columns]\n" ] } ], "source": [ "#membagi 2 kolom numerical dan kategorical\n", "df_inf_num = df_inf[['limit_balance']]\n", "df_inf_cat= df_inf[['pay_1', 'pay_2', 'pay_3', 'pay_4','pay_5','pay_6']]\n", "\n", "print(df_inf_cat)\n", "print(df_inf_num)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Scaling" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
00.241209
10.477723
20.098018
30.726579
41.319700
......
951.525097
961.540836
970.319977
981.074168
990.735245
\n", "

100 rows × 1 columns

\n", "
" ], "text/plain": [ " 0\n", "0 0.241209\n", "1 0.477723\n", "2 0.098018\n", "3 0.726579\n", "4 1.319700\n", ".. ...\n", "95 1.525097\n", "96 1.540836\n", "97 0.319977\n", "98 1.074168\n", "99 0.735245\n", "\n", "[100 rows x 1 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_inf_num_scaled = scaler.transform(df_inf_num)\n", "df_inf_num_scaled=pd.DataFrame(df_inf_num_scaled)\n", "df_inf_num_scaled\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Menggabungkan 2 kolom numerikal yang sudah di scaling dan kolom kategorical" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0.2412087 , 0. , 7. , 5. , 5. ,\n", " 2. , 6. ],\n", " [ 0.47772344, 2. , -2. , 3. , 0. ,\n", " -2. , 6. ],\n", " [ 0.09801754, 2. , -2. , 3. , 5. ,\n", " 2. , 7. ],\n", " [ 0.72657934, 0. , 7. , 2. , 2. ,\n", " -2. , 0. ],\n", " [ 1.31970034, 1. , 4. , -1. , 0. ,\n", " 7. , 7. ],\n", " [ 1.46465534, 6. , 0. , 7. , 6. ,\n", " 4. , 3. ],\n", " [ 1.1486426 , 8. , -1. , -2. , 5. ,\n", " 5. , 3. ],\n", " [ 1.21422404, 8. , 6. , 5. , 4. ,\n", " 4. , 3. ],\n", " [ 0.425344 , 4. , 4. , 5. , 2. ,\n", " 0. , 3. ],\n", " [ 1.01756628, 2. , 2. , 3. , 2. ,\n", " 2. , 7. ],\n", " [ 0.14752974, 1. , 3. , 7. , 5. ,\n", " 2. , 3. ],\n", " [ 0.1259037 , 8. , 3. , 0. , 3. ,\n", " 3. , 0. ],\n", " [ 0.93158974, 8. , 5. , 5. , 8. ,\n", " 4. , 2. ],\n", " [ 0.54247806, 5. , 1. , 4. , 3. ,\n", " 7. , 6. ],\n", " [ 1.56242432, 7. , -2. , 3. , 6. ,\n", " 0. , -2. ],\n", " [ 0.98982766, -1. , 7. , 4. , 0. ,\n", " 5. , -1. ],\n", " [ 1.07721126, 6. , -1. , 5. , 4. ,\n", " 4. , 6. ],\n", " [ 0.87256576, 8. , -1. , 4. , 5. ,\n", " -2. , 2. ],\n", " [ 0.4247989 , -2. , 2. , 3. , 8. ,\n", " 7. , -1. ],\n", " [ 0.58974984, 5. , 1. , 0. , 5. ,\n", " 6. , 3. ],\n", " [ 0.3522264 , 1. , 0. , 4. , 5. ,\n", " 3. , -1. ],\n", " [ 0.2945793 , 4. , 5. , 5. , -2. ,\n", " 7. , 3. ],\n", " [ 0.61722398, -1. , 6. , 5. , -1. ,\n", " -1. , 3. ],\n", " [ 0.3051966 , 5. , 4. , 5. , 0. ,\n", " 3. , 2. ],\n", " [ 0.96523928, 0. , 2. , -2. , 6. ,\n", " 3. , 4. ],\n", " [ 1.39483734, 3. , -2. , 4. , 2. ,\n", " 4. , 4. ],\n", " [ 0.98329534, 6. , -2. , 7. , 5. ,\n", " 4. , 7. ],\n", " [ 0.39992794, 7. , 0. , -2. , 7. ,\n", " 5. , -1. ],\n", " [ 0.28428988, 4. , 6. , 3. , -2. ,\n", " 6. , 7. ],\n", " [ 1.28991906, -2. , -2. , 2. , 8. ,\n", " 6. , 2. ],\n", " [ 0.35608716, 4. , 4. , 4. , 6. ,\n", " -1. , 6. ],\n", " [ 0.81663428, 4. , -2. , 6. , -1. ,\n", " 0. , -1. ],\n", " [ 0.8192234 , -2. , -2. , 6. , 5. ,\n", " 2. , 0. ],\n", " [ 0.9485924 , 4. , 3. , 7. , 3. ,\n", " -2. , 7. ],\n", " [ 0.84154036, 1. , 4. , -1. , -1. ,\n", " 0. , 4. ],\n", " [ 0.02102988, 6. , 7. , 4. , -1. ,\n", " 0. , 7. ],\n", " [ 0.82807368, 3. , 2. , -1. , 6. ,\n", " 2. , 3. ],\n", " [ 1.41549784, 4. , -1. , 3. , 8. ,\n", " 0. , -1. ],\n", " [ 1.21644404, -1. , 2. , -1. , 6. ,\n", " 6. , -2. ],\n", " [ 0.19410568, 2. , 6. , 5. , 4. ,\n", " 3. , 3. ],\n", " [ 0.46747886, 0. , 3. , -2. , 5. ,\n", " 4. , 0. ],\n", " [ 0.96699726, 4. , 1. , 7. , 5. ,\n", " 0. , -2. ],\n", " [ 1.14729824, -2. , 0. , 4. , 6. ,\n", " 2. , 2. ],\n", " [ 0.732326 , -1. , 6. , -1. , 3. ,\n", " 5. , 0. ],\n", " [ 1.21519438, 8. , 7. , 6. , 0. ,\n", " 6. , 4. ],\n", " [ 0.30277702, 6. , 6. , 5. , 2. ,\n", " 0. , 3. ],\n", " [ 0.88142942, 3. , 0. , -1. , -1. ,\n", " 6. , 6. ],\n", " [ 0.87022948, 7. , 6. , -1. , 8. ,\n", " 2. , 6. ],\n", " [ 0.74611628, 8. , 1. , 4. , 5. ,\n", " 7. , 7. ],\n", " [ 1.25117824, 1. , 4. , 2. , 6. ,\n", " 0. , 3. ],\n", " [ 0.1820945 , 8. , 0. , 3. , 6. ,\n", " 5. , 4. ],\n", " [ 1.07646016, 8. , 6. , 3. , 2. ,\n", " 5. , 0. ],\n", " [ 0.5724871 , -1. , 1. , 5. , 7. ,\n", " 2. , 6. ],\n", " [ 0.54385006, 7. , 7. , 4. , 7. ,\n", " 2. , 2. ],\n", " [ 0.71023962, -2. , 2. , 4. , -1. ,\n", " 2. , 6. ],\n", " [ 0.04256878, 8. , 0. , 7. , 7. ,\n", " 0. , 2. ],\n", " [ 0.65609134, 4. , 0. , 7. , -2. ,\n", " 4. , 4. ],\n", " [ 1.4571261 , 0. , -2. , 4. , 2. ,\n", " -2. , 2. ],\n", " [ 0.1441048 , 0. , 0. , 2. , 4. ,\n", " 4. , -2. ],\n", " [ 0.49789242, -2. , 3. , 6. , 6. ,\n", " 7. , 0. ],\n", " [ 0.83427514, 5. , -1. , -2. , 0. ,\n", " 7. , 0. ],\n", " [ 0.518338 , 6. , 2. , 3. , 8. ,\n", " 3. , 0. ],\n", " [ 0.70928656, 5. , 1. , 3. , -1. ,\n", " 6. , -2. ],\n", " [ 0.02580838, 2. , 2. , 0. , -1. ,\n", " 7. , 3. ],\n", " [ 0.1533025 , 5. , 1. , 7. , 5. ,\n", " 7. , 6. ],\n", " [ 1.09428994, 3. , 5. , 4. , -2. ,\n", " -1. , 4. ],\n", " [ 1.32079058, 2. , 0. , 3. , 2. ,\n", " 5. , 2. ],\n", " [ 0.67042874, 2. , 2. , -1. , 8. ,\n", " -1. , -1. ],\n", " [ 1.3410634 , 1. , 6. , 7. , 8. ,\n", " 6. , -1. ],\n", " [ 0.86393012, 5. , 5. , -2. , 7. ,\n", " 0. , 4. ],\n", " [ 0.55948346, 4. , 5. , 6. , 6. ,\n", " -2. , 6. ],\n", " [ 1.1490545 , 1. , 2. , 5. , 4. ,\n", " 6. , -2. ],\n", " [ 0.14828566, 3. , -2. , 5. , 0. ,\n", " 0. , 4. ],\n", " [ 1.4107169 , 3. , 4. , -2. , 8. ,\n", " 0. , 0. ],\n", " [ 0.53128808, 4. , 4. , 4. , -2. ,\n", " 7. , 4. ],\n", " [ 1.40909564, -2. , 0. , 7. , -2. ,\n", " 7. , 3. ],\n", " [ 0.46902142, 7. , 5. , 6. , 6. ,\n", " 5. , 2. ],\n", " [ 0.4766043 , 1. , 7. , 5. , 5. ,\n", " 4. , 3. ],\n", " [ 1.27386016, 4. , 1. , 6. , -2. ,\n", " 7. , -1. ],\n", " [ 1.32342374, 8. , 5. , 7. , -2. ,\n", " 6. , 2. ],\n", " [ 1.0021645 , 5. , 1. , -1. , 4. ,\n", " 5. , 4. ],\n", " [ 0.49189714, -1. , -1. , 3. , 3. ,\n", " 2. , 7. ],\n", " [ 0.04652876, 1. , -2. , 7. , 7. ,\n", " 5. , 0. ],\n", " [ 0.63164816, 5. , 1. , -2. , 3. ,\n", " -1. , -2. ],\n", " [ 0.81871406, 5. , 6. , 7. , 2. ,\n", " -1. , 7. ],\n", " [ 0.01167376, 7. , 1. , -2. , 4. ,\n", " 0. , 4. ],\n", " [ 1.22441748, 8. , -1. , -2. , 8. ,\n", " 0. , 6. ],\n", " [ 1.35006448, -2. , 2. , 5. , 3. ,\n", " 7. , -2. ],\n", " [ 0.20782186, 1. , 6. , 2. , 8. ,\n", " 6. , 4. ],\n", " [ 0.45452436, 1. , 5. , 3. , 8. ,\n", " 3. , -1. ],\n", " [ 0.51601032, 5. , 7. , -1. , 7. ,\n", " 7. , -1. ],\n", " [ 1.4323641 , -1. , 6. , -2. , -2. ,\n", " 2. , 7. ],\n", " [ 1.57930408, 1. , -1. , 5. , 6. ,\n", " 2. , -1. ],\n", " [ 1.21799718, 8. , 6. , 6. , 2. ,\n", " 4. , 3. ],\n", " [ 0.06769908, 0. , 2. , 7. , 7. ,\n", " 3. , 3. ],\n", " [ 1.52509674, 2. , -2. , 0. , 0. ,\n", " 6. , 3. ],\n", " [ 1.54083568, 7. , 6. , 7. , 5. ,\n", " 3. , -2. ],\n", " [ 0.31997654, 4. , 2. , 3. , -2. ,\n", " -1. , 4. ],\n", " [ 1.07416782, 4. , 1. , 4. , -1. ,\n", " 5. , 2. ],\n", " [ 0.7352452 , -2. , 5. , 3. , 7. ,\n", " 3. , 7. ]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_inf_final = np.concatenate([df_inf_num_scaled,df_inf_cat],axis = 1)\n", "df_inf_final" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,\n", " 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,\n", " 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], dtype=int64)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#membuat kolom predict \n", "y_pred_inf = model_knn.predict(df_inf_final)\n", "y_pred_inf" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Def_payment - Prediction
00
11
20
31
41
......
951
961
971
981
991
\n", "

100 rows × 1 columns

\n", "
" ], "text/plain": [ " Def_payment - Prediction\n", "0 0\n", "1 1\n", "2 0\n", "3 1\n", "4 1\n", ".. ...\n", "95 1\n", "96 1\n", "97 1\n", "98 1\n", "99 1\n", "\n", "[100 rows x 1 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#membuat kolom predict kedalam data frame dan memberi nama kolomnya dengan 'Def_payment - Prediction'\n", "y_pred_inf_df = pd.DataFrame(y_pred_inf, columns=['Def_payment - Prediction'])\n", "y_pred_inf_df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
limit_balancepay_1pay_2pay_3pay_4pay_5pay_6Def_payment - Prediction
0130604.350.07.05.05.02.06.00
1248861.722.0-2.03.00.0-2.06.01
259008.772.0-2.03.05.02.07.00
3373289.670.07.02.02.0-2.00.01
4669850.171.04.0-1.00.07.07.01
...........................
95772548.372.0-2.00.00.06.03.01
96780417.847.06.07.05.03.0-2.01
97169988.274.02.03.0-2.0-1.04.01
98547083.914.01.04.0-1.05.02.01
99377622.60-2.05.03.07.03.07.01
\n", "

100 rows × 8 columns

\n", "
" ], "text/plain": [ " limit_balance pay_1 pay_2 pay_3 pay_4 pay_5 pay_6 \\\n", "0 130604.35 0.0 7.0 5.0 5.0 2.0 6.0 \n", "1 248861.72 2.0 -2.0 3.0 0.0 -2.0 6.0 \n", "2 59008.77 2.0 -2.0 3.0 5.0 2.0 7.0 \n", "3 373289.67 0.0 7.0 2.0 2.0 -2.0 0.0 \n", "4 669850.17 1.0 4.0 -1.0 0.0 7.0 7.0 \n", ".. ... ... ... ... ... ... ... \n", "95 772548.37 2.0 -2.0 0.0 0.0 6.0 3.0 \n", "96 780417.84 7.0 6.0 7.0 5.0 3.0 -2.0 \n", "97 169988.27 4.0 2.0 3.0 -2.0 -1.0 4.0 \n", "98 547083.91 4.0 1.0 4.0 -1.0 5.0 2.0 \n", "99 377622.60 -2.0 5.0 3.0 7.0 3.0 7.0 \n", "\n", " Def_payment - Prediction \n", "0 0 \n", "1 1 \n", "2 0 \n", "3 1 \n", "4 1 \n", ".. ... \n", "95 1 \n", "96 1 \n", "97 1 \n", "98 1 \n", "99 1 \n", "\n", "[100 rows x 8 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#membuat variabel baru dengan menggabungkan 2 tabel yaitu df_inf dan y _preditc inf\n", "tabel_inference = pd.concat([df_inf, y_pred_inf_df], axis=1)\n", "tabel_inference" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# xii. Kesimpulan " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Saya memilih f1 score sebagai parameter score karena menurut saya untuk meminimalisir false postive dan false negative penting .\n", "\n", "- Dari hasil std yang mendekati 0 dan berada di bawah mean berarti model KNN yang default nilai STD 0.03 sehingga modelnya best fit . Jadi bisa dikatakan bahwa model knn default memiliki konsistensi yang bagus akan tetapi validasi hasil predictnya kurang bagus dikarenakan hanya sekitar 48%. Dari sisi bisnis model ini belum bisa digunakan karena hasil predictnya yang masih kurang bagus kurang dari 50% sehingga bisa menimbulkan hasil predict yang salah dimana bisa menyebabkan perusahaan rugi.\n", "\n", "- Untuk Meningkatkan jumlah user yang bisa bayar ada baiknya untuk penagihan bisa dilakukan ditanggal gajian sekitar 25-31 pada saat gajian karena pada saat tanggal tersebut uang mereka jumlahnya banyak jadi bisa membayar \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conceptual Problems :" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. Apakah yang dimaksud dengan coeficient pada logistic regression?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Yang dimaksud dengan coeficient pada logistic regression adalah nilai bobot atau parameter yang digunakan untuk mengukur hubungan antara variabel independen (fitur) dengan variabel dependen (kelas atau target)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2. Apakah fungsi parameter kernel pada SVM? Jelaskan salah satu kernel yang kalian pahami!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Kernel adalah fungsi matematis yang digunakan untuk mengukur kesamaan antara pasangan data dalam ruang fitur\n", "Salah satu kernelnya adalah linear yang digunakan untuk pemisahan data yang dapat dipisahkan dengan garis lurus." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "3. Bagaimana cara memilih K yang optimal pada KNN ?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cara memilih K yang optimal pada KNN adalah dengan membagi data set menjadi 2 yaitu train dan test lalu mendefine model knn. Lalu diolah dengan menggunakan cross validation untuk menentukan parameter mana yang terbaik dan memvariasikan nilai K dan mengamati performa validasi silang, kita bisa menemukan K yang optimal" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "4. Apa yang dimaksud dengan metrics-metrics berikut : Accuracy, Precision, Recall, F1 Score, dan kapan waktu yang tepat untuk menggunakannya ?\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Accuracy: Merupakan rasio dari jumlah prediksi yang benar (positif dan negatif) dibandingkan dengan jumlah total sampel. Berguna ketika distribusi kelas seimbang. Namun, bisa menjadi bias jika kelas tidak seimbang.\n", "\n", "Precision: Merupakan rasio dari jumlah prediksi positif yang benar dibandingkan dengan total prediksi positif. Berguna ketika penting untuk menghindari false positive.\n", "\n", "Recall: Merupakan rasio dari jumlah prediksi positif yang benar dibandingkan dengan total jumlah sampel positif yang sebenarnya. Berguna ketika penting untuk menghindari false negative.\n", "\n", "F1 Score: Merupakan ukuran yang mengkombinasikan precision dan recall. Berguna ketika Anda ingin mencari keseimbangan antara precision dan recall." ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.17" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }