diff --git "a/h8dsft_P1G3_AlselloDM.ipynb" "b/h8dsft_P1G3_AlselloDM.ipynb" new file mode 100644--- /dev/null +++ "b/h8dsft_P1G3_AlselloDM.ipynb" @@ -0,0 +1,6195 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graded Challenge 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perkenalan\n", + "\n", + ">- Nama : Alsello Diveni Manuputty\n", + ">- Batch : HCK 6 Pondok Indah\n", + ">- Phase : 1\n", + "------\n", + "**Objective**\n", + ">Pada notebook ini akan dilakukan pengerjaan Graded Challenge 3 fase 1 dimama akan dibuat model Random Forest dan satu algoritma boosting untuk memprediksi apakah seorang pasien akan meninggal atau tidak menggunakan dataset yang sudah diberi." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "SELECT *\n", + "FROM `ftds-hacktiv8-project.phase1_ftds_006_hck.heart-failure`\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import Library" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: xgboost in c:\\users\\alsel\\anaconda3\\envs\\hack\\lib\\site-packages (1.7.6)\n", + "Requirement already satisfied: numpy in c:\\users\\alsel\\anaconda3\\envs\\hack\\lib\\site-packages (from xgboost) (1.24.3)\n", + "Requirement already satisfied: scipy in c:\\users\\alsel\\anaconda3\\envs\\hack\\lib\\site-packages (from xgboost) (1.10.1)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "\n", + "from feature_engine.outliers import Winsorizer\n", + "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier\n", + "!pip install xgboost\n", + "import xgboost as xgb\n", + "\n", + "from sklearn.feature_selection import SelectKBest, chi2\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import cross_val_score,KFold\n", + "from sklearn.metrics import recall_score\n", + "from sklearn.metrics import f1_score\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "\n", + "\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "mentah = pd.read_csv('h8dsft_P1G3_AlselloDM.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "DEATH_EVENT | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "42.0 | \n", + "1 | \n", + "250 | \n", + "1 | \n", + "15 | \n", + "0 | \n", + "213000.0 | \n", + "1.3 | \n", + "136 | \n", + "0 | \n", + "0 | \n", + "65 | \n", + "1 | \n", + "
1 | \n", + "46.0 | \n", + "0 | \n", + "168 | \n", + "1 | \n", + "17 | \n", + "1 | \n", + "271000.0 | \n", + "2.1 | \n", + "124 | \n", + "0 | \n", + "0 | \n", + "100 | \n", + "1 | \n", + "
2 | \n", + "65.0 | \n", + "1 | \n", + "160 | \n", + "1 | \n", + "20 | \n", + "0 | \n", + "327000.0 | \n", + "2.7 | \n", + "116 | \n", + "0 | \n", + "0 | \n", + "8 | \n", + "1 | \n", + "
3 | \n", + "53.0 | \n", + "1 | \n", + "91 | \n", + "0 | \n", + "20 | \n", + "1 | \n", + "418000.0 | \n", + "1.4 | \n", + "139 | \n", + "0 | \n", + "0 | \n", + "43 | \n", + "1 | \n", + "
4 | \n", + "50.0 | \n", + "1 | \n", + "582 | \n", + "1 | \n", + "20 | \n", + "1 | \n", + "279000.0 | \n", + "1.0 | \n", + "134 | \n", + "0 | \n", + "0 | \n", + "186 | \n", + "0 | \n", + "
\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "DEATH_EVENT | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
294 | \n", + "63.0 | \n", + "1 | \n", + "122 | \n", + "1 | \n", + "60 | \n", + "0 | \n", + "267000.00 | \n", + "1.2 | \n", + "145 | \n", + "1 | \n", + "0 | \n", + "147 | \n", + "0 | \n", + "
295 | \n", + "45.0 | \n", + "0 | \n", + "308 | \n", + "1 | \n", + "60 | \n", + "1 | \n", + "377000.00 | \n", + "1.0 | \n", + "136 | \n", + "1 | \n", + "0 | \n", + "186 | \n", + "0 | \n", + "
296 | \n", + "70.0 | \n", + "0 | \n", + "97 | \n", + "0 | \n", + "60 | \n", + "1 | \n", + "220000.00 | \n", + "0.9 | \n", + "138 | \n", + "1 | \n", + "0 | \n", + "186 | \n", + "0 | \n", + "
297 | \n", + "53.0 | \n", + "1 | \n", + "446 | \n", + "0 | \n", + "60 | \n", + "1 | \n", + "263358.03 | \n", + "1.0 | \n", + "139 | \n", + "1 | \n", + "0 | \n", + "215 | \n", + "0 | \n", + "
298 | \n", + "50.0 | \n", + "0 | \n", + "582 | \n", + "0 | \n", + "62 | \n", + "1 | \n", + "147000.00 | \n", + "0.8 | \n", + "140 | \n", + "1 | \n", + "1 | \n", + "192 | \n", + "0 | \n", + "
\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "death_event | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.00000 | \n", + "299.000000 | \n", + "299.000000 | \n", + "299.00000 | \n", + "299.000000 | \n", + "299.00000 | \n", + "
mean | \n", + "60.833893 | \n", + "0.431438 | \n", + "581.839465 | \n", + "0.418060 | \n", + "38.083612 | \n", + "0.351171 | \n", + "263358.029264 | \n", + "1.39388 | \n", + "136.625418 | \n", + "0.648829 | \n", + "0.32107 | \n", + "130.260870 | \n", + "0.32107 | \n", + "
std | \n", + "11.894809 | \n", + "0.496107 | \n", + "970.287881 | \n", + "0.494067 | \n", + "11.834841 | \n", + "0.478136 | \n", + "97804.236869 | \n", + "1.03451 | \n", + "4.412477 | \n", + "0.478136 | \n", + "0.46767 | \n", + "77.614208 | \n", + "0.46767 | \n", + "
min | \n", + "40.000000 | \n", + "0.000000 | \n", + "23.000000 | \n", + "0.000000 | \n", + "14.000000 | \n", + "0.000000 | \n", + "25100.000000 | \n", + "0.50000 | \n", + "113.000000 | \n", + "0.000000 | \n", + "0.00000 | \n", + "4.000000 | \n", + "0.00000 | \n", + "
25% | \n", + "51.000000 | \n", + "0.000000 | \n", + "116.500000 | \n", + "0.000000 | \n", + "30.000000 | \n", + "0.000000 | \n", + "212500.000000 | \n", + "0.90000 | \n", + "134.000000 | \n", + "0.000000 | \n", + "0.00000 | \n", + "73.000000 | \n", + "0.00000 | \n", + "
50% | \n", + "60.000000 | \n", + "0.000000 | \n", + "250.000000 | \n", + "0.000000 | \n", + "38.000000 | \n", + "0.000000 | \n", + "262000.000000 | \n", + "1.10000 | \n", + "137.000000 | \n", + "1.000000 | \n", + "0.00000 | \n", + "115.000000 | \n", + "0.00000 | \n", + "
75% | \n", + "70.000000 | \n", + "1.000000 | \n", + "582.000000 | \n", + "1.000000 | \n", + "45.000000 | \n", + "1.000000 | \n", + "303500.000000 | \n", + "1.40000 | \n", + "140.000000 | \n", + "1.000000 | \n", + "1.00000 | \n", + "203.000000 | \n", + "1.00000 | \n", + "
max | \n", + "95.000000 | \n", + "1.000000 | \n", + "7861.000000 | \n", + "1.000000 | \n", + "80.000000 | \n", + "1.000000 | \n", + "850000.000000 | \n", + "9.40000 | \n", + "148.000000 | \n", + "1.000000 | \n", + "1.00000 | \n", + "285.000000 | \n", + "1.00000 | \n", + "
\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "death_event | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
13 | \n", + "72.0 | \n", + "0 | \n", + "211 | \n", + "0 | \n", + "25 | \n", + "0 | \n", + "274000.0 | \n", + "1.2 | \n", + "134 | \n", + "0 | \n", + "0 | \n", + "207 | \n", + "0 | \n", + "
246 | \n", + "42.0 | \n", + "0 | \n", + "64 | \n", + "0 | \n", + "40 | \n", + "0 | \n", + "189000.0 | \n", + "0.7 | \n", + "140 | \n", + "1 | \n", + "0 | \n", + "245 | \n", + "0 | \n", + "
35 | \n", + "49.0 | \n", + "0 | \n", + "972 | \n", + "1 | \n", + "35 | \n", + "1 | \n", + "268000.0 | \n", + "0.8 | \n", + "130 | \n", + "0 | \n", + "0 | \n", + "187 | \n", + "0 | \n", + "
139 | \n", + "59.0 | \n", + "1 | \n", + "176 | \n", + "1 | \n", + "25 | \n", + "0 | \n", + "221000.0 | \n", + "1.0 | \n", + "136 | \n", + "1 | \n", + "1 | \n", + "150 | \n", + "1 | \n", + "
222 | \n", + "58.0 | \n", + "0 | \n", + "132 | \n", + "1 | \n", + "38 | \n", + "1 | \n", + "253000.0 | \n", + "1.0 | \n", + "139 | \n", + "1 | \n", + "0 | \n", + "230 | \n", + "0 | \n", + "
200 | \n", + "50.0 | \n", + "1 | \n", + "298 | \n", + "0 | \n", + "35 | \n", + "0 | \n", + "362000.0 | \n", + "0.9 | \n", + "140 | \n", + "1 | \n", + "1 | \n", + "240 | \n", + "0 | \n", + "
233 | \n", + "42.0 | \n", + "0 | \n", + "102 | \n", + "1 | \n", + "40 | \n", + "0 | \n", + "237000.0 | \n", + "1.2 | \n", + "140 | \n", + "1 | \n", + "0 | \n", + "74 | \n", + "0 | \n", + "
250 | \n", + "55.0 | \n", + "1 | \n", + "170 | \n", + "1 | \n", + "40 | \n", + "0 | \n", + "336000.0 | \n", + "1.2 | \n", + "135 | \n", + "1 | \n", + "0 | \n", + "250 | \n", + "0 | \n", + "
273 | \n", + "79.0 | \n", + "1 | \n", + "55 | \n", + "0 | \n", + "50 | \n", + "1 | \n", + "172000.0 | \n", + "1.8 | \n", + "133 | \n", + "1 | \n", + "0 | \n", + "78 | \n", + "0 | \n", + "
158 | \n", + "52.0 | \n", + "0 | \n", + "132 | \n", + "0 | \n", + "30 | \n", + "0 | \n", + "218000.0 | \n", + "0.7 | \n", + "136 | \n", + "1 | \n", + "1 | \n", + "112 | \n", + "0 | \n", + "
\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "death_event | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "42.0 | \n", + "1 | \n", + "250 | \n", + "1 | \n", + "15 | \n", + "0 | \n", + "213000.00 | \n", + "1.3 | \n", + "136 | \n", + "0 | \n", + "0 | \n", + "65 | \n", + "1 | \n", + "
1 | \n", + "46.0 | \n", + "0 | \n", + "168 | \n", + "1 | \n", + "17 | \n", + "1 | \n", + "271000.00 | \n", + "2.1 | \n", + "124 | \n", + "0 | \n", + "0 | \n", + "100 | \n", + "1 | \n", + "
2 | \n", + "65.0 | \n", + "1 | \n", + "160 | \n", + "1 | \n", + "20 | \n", + "0 | \n", + "327000.00 | \n", + "2.7 | \n", + "116 | \n", + "0 | \n", + "0 | \n", + "8 | \n", + "1 | \n", + "
3 | \n", + "53.0 | \n", + "1 | \n", + "91 | \n", + "0 | \n", + "20 | \n", + "1 | \n", + "418000.00 | \n", + "1.4 | \n", + "139 | \n", + "0 | \n", + "0 | \n", + "43 | \n", + "1 | \n", + "
4 | \n", + "50.0 | \n", + "1 | \n", + "582 | \n", + "1 | \n", + "20 | \n", + "1 | \n", + "279000.00 | \n", + "1.0 | \n", + "134 | \n", + "0 | \n", + "0 | \n", + "186 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
294 | \n", + "63.0 | \n", + "1 | \n", + "122 | \n", + "1 | \n", + "60 | \n", + "0 | \n", + "267000.00 | \n", + "1.2 | \n", + "145 | \n", + "1 | \n", + "0 | \n", + "147 | \n", + "0 | \n", + "
295 | \n", + "45.0 | \n", + "0 | \n", + "308 | \n", + "1 | \n", + "60 | \n", + "1 | \n", + "377000.00 | \n", + "1.0 | \n", + "136 | \n", + "1 | \n", + "0 | \n", + "186 | \n", + "0 | \n", + "
296 | \n", + "70.0 | \n", + "0 | \n", + "97 | \n", + "0 | \n", + "60 | \n", + "1 | \n", + "220000.00 | \n", + "0.9 | \n", + "138 | \n", + "1 | \n", + "0 | \n", + "186 | \n", + "0 | \n", + "
297 | \n", + "53.0 | \n", + "1 | \n", + "446 | \n", + "0 | \n", + "60 | \n", + "1 | \n", + "263358.03 | \n", + "1.0 | \n", + "139 | \n", + "1 | \n", + "0 | \n", + "215 | \n", + "0 | \n", + "
298 | \n", + "50.0 | \n", + "0 | \n", + "582 | \n", + "0 | \n", + "62 | \n", + "1 | \n", + "147000.00 | \n", + "0.8 | \n", + "140 | \n", + "1 | \n", + "1 | \n", + "192 | \n", + "0 | \n", + "
289 rows × 13 columns
\n", + "\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "death_event | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "72.0 | \n", + "0 | \n", + "211 | \n", + "0 | \n", + "25 | \n", + "0 | \n", + "274000.0 | \n", + "1.2 | \n", + "134 | \n", + "0 | \n", + "0 | \n", + "207 | \n", + "0 | \n", + "
1 | \n", + "42.0 | \n", + "0 | \n", + "64 | \n", + "0 | \n", + "40 | \n", + "0 | \n", + "189000.0 | \n", + "0.7 | \n", + "140 | \n", + "1 | \n", + "0 | \n", + "245 | \n", + "0 | \n", + "
2 | \n", + "49.0 | \n", + "0 | \n", + "972 | \n", + "1 | \n", + "35 | \n", + "1 | \n", + "268000.0 | \n", + "0.8 | \n", + "130 | \n", + "0 | \n", + "0 | \n", + "187 | \n", + "0 | \n", + "
3 | \n", + "59.0 | \n", + "1 | \n", + "176 | \n", + "1 | \n", + "25 | \n", + "0 | \n", + "221000.0 | \n", + "1.0 | \n", + "136 | \n", + "1 | \n", + "1 | \n", + "150 | \n", + "1 | \n", + "
4 | \n", + "58.0 | \n", + "0 | \n", + "132 | \n", + "1 | \n", + "38 | \n", + "1 | \n", + "253000.0 | \n", + "1.0 | \n", + "139 | \n", + "1 | \n", + "0 | \n", + "230 | \n", + "0 | \n", + "
5 | \n", + "50.0 | \n", + "1 | \n", + "298 | \n", + "0 | \n", + "35 | \n", + "0 | \n", + "362000.0 | \n", + "0.9 | \n", + "140 | \n", + "1 | \n", + "1 | \n", + "240 | \n", + "0 | \n", + "
6 | \n", + "42.0 | \n", + "0 | \n", + "102 | \n", + "1 | \n", + "40 | \n", + "0 | \n", + "237000.0 | \n", + "1.2 | \n", + "140 | \n", + "1 | \n", + "0 | \n", + "74 | \n", + "0 | \n", + "
7 | \n", + "55.0 | \n", + "1 | \n", + "170 | \n", + "1 | \n", + "40 | \n", + "0 | \n", + "336000.0 | \n", + "1.2 | \n", + "135 | \n", + "1 | \n", + "0 | \n", + "250 | \n", + "0 | \n", + "
8 | \n", + "79.0 | \n", + "1 | \n", + "55 | \n", + "0 | \n", + "50 | \n", + "1 | \n", + "172000.0 | \n", + "1.8 | \n", + "133 | \n", + "1 | \n", + "0 | \n", + "78 | \n", + "0 | \n", + "
9 | \n", + "52.0 | \n", + "0 | \n", + "132 | \n", + "0 | \n", + "30 | \n", + "0 | \n", + "218000.0 | \n", + "0.7 | \n", + "136 | \n", + "1 | \n", + "1 | \n", + "112 | \n", + "0 | \n", + "
\n", + " | variables | \n", + "VIF | \n", + "
---|---|---|
0 | \n", + "age | \n", + "28.891087 | \n", + "
1 | \n", + "creatinine_phosphokinase | \n", + "1.385313 | \n", + "
2 | \n", + "ejection_fraction | \n", + "11.663433 | \n", + "
3 | \n", + "platelets | \n", + "8.218574 | \n", + "
4 | \n", + "serum_creatinine | \n", + "2.877972 | \n", + "
5 | \n", + "time | \n", + "4.029306 | \n", + "
6 | \n", + "serum_sodium | \n", + "52.366216 | \n", + "
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf', RandomForestClassifier(random_state=69))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf', RandomForestClassifier(random_state=69))])
ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia', 'high_blood_pressure',\n", + " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
RandomForestClassifier(random_state=69)
GridSearchCV(cv=10,\n", + " estimator=Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " Simpl...\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf',\n", + " RandomForestClassifier(random_state=69))]),\n", + " param_grid={'rf__max_depth': [None, 5, 10],\n", + " 'rf__min_samples_leaf': [1, 2, 4],\n", + " 'rf__min_samples_split': [2, 5, 10],\n", + " 'rf__n_estimators': [100, 200, 300]},\n", + " scoring='f1')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=10,\n", + " estimator=Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " Simpl...\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf',\n", + " RandomForestClassifier(random_state=69))]),\n", + " param_grid={'rf__max_depth': [None, 5, 10],\n", + " 'rf__min_samples_leaf': [1, 2, 4],\n", + " 'rf__min_samples_split': [2, 5, 10],\n", + " 'rf__n_estimators': [100, 200, 300]},\n", + " scoring='f1')
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf', RandomForestClassifier(random_state=69))])
ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia', 'high_blood_pressure',\n", + " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
RandomForestClassifier(random_state=69)
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf',\n", + " RandomForestClassifier(max_depth=10, min_samples_leaf=2,\n", + " random_state=69))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia',\n", + " 'high_blood_pressure',\n", + " 'diabetes'])])),\n", + " ('rf',\n", + " RandomForestClassifier(max_depth=10, min_samples_leaf=2,\n", + " random_state=69))])
ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia', 'high_blood_pressure',\n", + " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
RandomForestClassifier(max_depth=10, min_samples_leaf=2, random_state=69)
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequ...\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None,\n", + " random_state=69, reg_alpha=None, ...))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequ...\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None,\n", + " random_state=69, reg_alpha=None, ...))])
ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia', 'high_blood_pressure',\n", + " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
XGBRFClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bytree=None,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, feature_types=None, gamma=None, gpu_id=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None, random_state=69,\n", + " reg_alpha=None, ...)
GridSearchCV(cv=10,\n", + " estimator=Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " Simpl...\n", + " missing=nan,\n", + " monotone_constraints=None,\n", + " n_estimators=100,\n", + " n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic',\n", + " predictor=None,\n", + " random_state=69,\n", + " reg_alpha=None, ...))]),\n", + " param_grid={'xgb__colsample_bytree': [0.8, 1.0],\n", + " 'xgb__learning_rate': [0.1, 0.01, 0.001],\n", + " 'xgb__max_depth': [3, 5, 7],\n", + " 'xgb__n_estimators': [100, 200, 300],\n", + " 'xgb__subsample': [0.8, 1.0]},\n", + " scoring='f1')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=10,\n", + " estimator=Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " Simpl...\n", + " missing=nan,\n", + " monotone_constraints=None,\n", + " n_estimators=100,\n", + " n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic',\n", + " predictor=None,\n", + " random_state=69,\n", + " reg_alpha=None, ...))]),\n", + " param_grid={'xgb__colsample_bytree': [0.8, 1.0],\n", + " 'xgb__learning_rate': [0.1, 0.01, 0.001],\n", + " 'xgb__max_depth': [3, 5, 7],\n", + " 'xgb__n_estimators': [100, 200, 300],\n", + " 'xgb__subsample': [0.8, 1.0]},\n", + " scoring='f1')
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequ...\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None,\n", + " random_state=69, reg_alpha=None, ...))])
ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia', 'high_blood_pressure',\n", + " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
XGBRFClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bytree=None,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, feature_types=None, gamma=None, gpu_id=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None, random_state=69,\n", + " reg_alpha=None, ...)
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequ...\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=200, n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None,\n", + " random_state=69, ...))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler',\n", + " MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequ...\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=200, n_jobs=None,\n", + " num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None,\n", + " random_state=69, ...))])
ColumnTransformer(transformers=[('numerikout',\n", + " Pipeline(steps=[('outlier',\n", + " Winsorizer(capping_method='iqr',\n", + " tail='both')),\n", + " ('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', MinMaxScaler())]),\n", + " ['serum_creatinine',\n", + " 'creatinine_phosphokinase']),\n", + " ('numerik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent')),\n", + " ('scaler', StandardScaler())]),\n", + " ['time']),\n", + " ('kategorik',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='most_frequent'))]),\n", + " ['anaemia', 'high_blood_pressure',\n", + " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
XGBRFClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bytree=1.0,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, feature_types=None, gamma=None, gpu_id=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=200, n_jobs=None, num_parallel_tree=None,\n", + " objective='binary:logistic', predictor=None, random_state=69, ...)
\n", + " | age | \n", + "anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "ejection_fraction | \n", + "high_blood_pressure | \n", + "platelets | \n", + "serum_creatinine | \n", + "serum_sodium | \n", + "sex | \n", + "smoking | \n", + "time | \n", + "death_event | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "72.0 | \n", + "0 | \n", + "211 | \n", + "0 | \n", + "25 | \n", + "0 | \n", + "274000.0 | \n", + "1.2 | \n", + "134 | \n", + "0 | \n", + "0 | \n", + "207 | \n", + "0 | \n", + "
1 | \n", + "42.0 | \n", + "0 | \n", + "64 | \n", + "0 | \n", + "40 | \n", + "0 | \n", + "189000.0 | \n", + "0.7 | \n", + "140 | \n", + "1 | \n", + "0 | \n", + "245 | \n", + "0 | \n", + "
2 | \n", + "49.0 | \n", + "0 | \n", + "972 | \n", + "1 | \n", + "35 | \n", + "1 | \n", + "268000.0 | \n", + "0.8 | \n", + "130 | \n", + "0 | \n", + "0 | \n", + "187 | \n", + "0 | \n", + "
3 | \n", + "59.0 | \n", + "1 | \n", + "176 | \n", + "1 | \n", + "25 | \n", + "0 | \n", + "221000.0 | \n", + "1.0 | \n", + "136 | \n", + "1 | \n", + "1 | \n", + "150 | \n", + "1 | \n", + "
4 | \n", + "58.0 | \n", + "0 | \n", + "132 | \n", + "1 | \n", + "38 | \n", + "1 | \n", + "253000.0 | \n", + "1.0 | \n", + "139 | \n", + "1 | \n", + "0 | \n", + "230 | \n", + "0 | \n", + "
5 | \n", + "50.0 | \n", + "1 | \n", + "298 | \n", + "0 | \n", + "35 | \n", + "0 | \n", + "362000.0 | \n", + "0.9 | \n", + "140 | \n", + "1 | \n", + "1 | \n", + "240 | \n", + "0 | \n", + "
6 | \n", + "42.0 | \n", + "0 | \n", + "102 | \n", + "1 | \n", + "40 | \n", + "0 | \n", + "237000.0 | \n", + "1.2 | \n", + "140 | \n", + "1 | \n", + "0 | \n", + "74 | \n", + "0 | \n", + "
7 | \n", + "55.0 | \n", + "1 | \n", + "170 | \n", + "1 | \n", + "40 | \n", + "0 | \n", + "336000.0 | \n", + "1.2 | \n", + "135 | \n", + "1 | \n", + "0 | \n", + "250 | \n", + "0 | \n", + "
8 | \n", + "79.0 | \n", + "1 | \n", + "55 | \n", + "0 | \n", + "50 | \n", + "1 | \n", + "172000.0 | \n", + "1.8 | \n", + "133 | \n", + "1 | \n", + "0 | \n", + "78 | \n", + "0 | \n", + "
9 | \n", + "52.0 | \n", + "0 | \n", + "132 | \n", + "0 | \n", + "30 | \n", + "0 | \n", + "218000.0 | \n", + "0.7 | \n", + "136 | \n", + "1 | \n", + "1 | \n", + "112 | \n", + "0 | \n", + "
\n", + " | anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "high_blood_pressure | \n", + "serum_creatinine | \n", + "time | \n", + "death_event | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "211 | \n", + "0 | \n", + "0 | \n", + "1.2 | \n", + "207 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "64 | \n", + "0 | \n", + "0 | \n", + "0.7 | \n", + "245 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "972 | \n", + "1 | \n", + "1 | \n", + "0.8 | \n", + "187 | \n", + "0 | \n", + "
3 | \n", + "1 | \n", + "176 | \n", + "1 | \n", + "0 | \n", + "1.0 | \n", + "150 | \n", + "1 | \n", + "
4 | \n", + "0 | \n", + "132 | \n", + "1 | \n", + "1 | \n", + "1.0 | \n", + "230 | \n", + "0 | \n", + "
5 | \n", + "1 | \n", + "298 | \n", + "0 | \n", + "0 | \n", + "0.9 | \n", + "240 | \n", + "0 | \n", + "
6 | \n", + "0 | \n", + "102 | \n", + "1 | \n", + "0 | \n", + "1.2 | \n", + "74 | \n", + "0 | \n", + "
7 | \n", + "1 | \n", + "170 | \n", + "1 | \n", + "0 | \n", + "1.2 | \n", + "250 | \n", + "0 | \n", + "
8 | \n", + "1 | \n", + "55 | \n", + "0 | \n", + "1 | \n", + "1.8 | \n", + "78 | \n", + "0 | \n", + "
9 | \n", + "0 | \n", + "132 | \n", + "0 | \n", + "0 | \n", + "0.7 | \n", + "112 | \n", + "0 | \n", + "
\n", + " | RF | \n", + "
---|---|
0 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "
5 | \n", + "0 | \n", + "
6 | \n", + "0 | \n", + "
7 | \n", + "0 | \n", + "
8 | \n", + "0 | \n", + "
9 | \n", + "0 | \n", + "
\n", + " | RFGS | \n", + "
---|---|
0 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "
5 | \n", + "0 | \n", + "
6 | \n", + "0 | \n", + "
7 | \n", + "0 | \n", + "
8 | \n", + "0 | \n", + "
9 | \n", + "0 | \n", + "
\n", + " | XG | \n", + "
---|---|
0 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "
5 | \n", + "0 | \n", + "
6 | \n", + "0 | \n", + "
7 | \n", + "0 | \n", + "
8 | \n", + "0 | \n", + "
9 | \n", + "0 | \n", + "
\n", + " | XGGS | \n", + "
---|---|
0 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "
5 | \n", + "0 | \n", + "
6 | \n", + "0 | \n", + "
7 | \n", + "0 | \n", + "
8 | \n", + "0 | \n", + "
9 | \n", + "0 | \n", + "
\n", + " | anaemia | \n", + "creatinine_phosphokinase | \n", + "diabetes | \n", + "high_blood_pressure | \n", + "serum_creatinine | \n", + "time | \n", + "death_event | \n", + "RF | \n", + "RFGS | \n", + "XG | \n", + "XGGS | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "211 | \n", + "0 | \n", + "0 | \n", + "1.2 | \n", + "207 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "64 | \n", + "0 | \n", + "0 | \n", + "0.7 | \n", + "245 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "972 | \n", + "1 | \n", + "1 | \n", + "0.8 | \n", + "187 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
3 | \n", + "1 | \n", + "176 | \n", + "1 | \n", + "0 | \n", + "1.0 | \n", + "150 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "132 | \n", + "1 | \n", + "1 | \n", + "1.0 | \n", + "230 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
5 | \n", + "1 | \n", + "298 | \n", + "0 | \n", + "0 | \n", + "0.9 | \n", + "240 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
6 | \n", + "0 | \n", + "102 | \n", + "1 | \n", + "0 | \n", + "1.2 | \n", + "74 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
7 | \n", + "1 | \n", + "170 | \n", + "1 | \n", + "0 | \n", + "1.2 | \n", + "250 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
8 | \n", + "1 | \n", + "55 | \n", + "0 | \n", + "1 | \n", + "1.8 | \n", + "78 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
9 | \n", + "0 | \n", + "132 | \n", + "0 | \n", + "0 | \n", + "0.7 | \n", + "112 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "