{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Graded Challenge 3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Perkenalan\n", "\n", ">- Nama : Alsello Diveni Manuputty\n", ">- Batch : HCK 6 Pondok Indah\n", ">- Phase : 1\n", "------\n", "**Objective**\n", ">Pada notebook ini akan dilakukan pengerjaan Graded Challenge 3 fase 1 dimama akan dibuat model Random Forest dan satu algoritma boosting untuk memprediksi apakah seorang pasien akan meninggal atau tidak menggunakan dataset yang sudah diberi." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "SELECT *\n", "FROM `ftds-hacktiv8-project.phase1_ftds_006_hck.heart-failure`\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Library" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: xgboost in c:\\users\\alsel\\anaconda3\\envs\\hack\\lib\\site-packages (1.7.6)\n", "Requirement already satisfied: numpy in c:\\users\\alsel\\anaconda3\\envs\\hack\\lib\\site-packages (from xgboost) (1.24.3)\n", "Requirement already satisfied: scipy in c:\\users\\alsel\\anaconda3\\envs\\hack\\lib\\site-packages (from xgboost) (1.10.1)\n" ] } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "\n", "from feature_engine.outliers import Winsorizer\n", "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n", "from sklearn.impute import SimpleImputer\n", "\n", "from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier\n", "!pip install xgboost\n", "import xgboost as xgb\n", "\n", "from sklearn.feature_selection import SelectKBest, chi2\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.model_selection import cross_val_score,KFold\n", "from sklearn.metrics import recall_score\n", "from sklearn.metrics import f1_score\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.compose import ColumnTransformer\n", "\n", "\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix\n", "\n", "import warnings\n", "warnings.filterwarnings(action='ignore')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "mentah = pd.read_csv('h8dsft_P1G3_AlselloDM.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "DEATH_EVENT | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "42.0 | \n", "1 | \n", "250 | \n", "1 | \n", "15 | \n", "0 | \n", "213000.0 | \n", "1.3 | \n", "136 | \n", "0 | \n", "0 | \n", "65 | \n", "1 | \n", "
1 | \n", "46.0 | \n", "0 | \n", "168 | \n", "1 | \n", "17 | \n", "1 | \n", "271000.0 | \n", "2.1 | \n", "124 | \n", "0 | \n", "0 | \n", "100 | \n", "1 | \n", "
2 | \n", "65.0 | \n", "1 | \n", "160 | \n", "1 | \n", "20 | \n", "0 | \n", "327000.0 | \n", "2.7 | \n", "116 | \n", "0 | \n", "0 | \n", "8 | \n", "1 | \n", "
3 | \n", "53.0 | \n", "1 | \n", "91 | \n", "0 | \n", "20 | \n", "1 | \n", "418000.0 | \n", "1.4 | \n", "139 | \n", "0 | \n", "0 | \n", "43 | \n", "1 | \n", "
4 | \n", "50.0 | \n", "1 | \n", "582 | \n", "1 | \n", "20 | \n", "1 | \n", "279000.0 | \n", "1.0 | \n", "134 | \n", "0 | \n", "0 | \n", "186 | \n", "0 | \n", "
\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "DEATH_EVENT | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
294 | \n", "63.0 | \n", "1 | \n", "122 | \n", "1 | \n", "60 | \n", "0 | \n", "267000.00 | \n", "1.2 | \n", "145 | \n", "1 | \n", "0 | \n", "147 | \n", "0 | \n", "
295 | \n", "45.0 | \n", "0 | \n", "308 | \n", "1 | \n", "60 | \n", "1 | \n", "377000.00 | \n", "1.0 | \n", "136 | \n", "1 | \n", "0 | \n", "186 | \n", "0 | \n", "
296 | \n", "70.0 | \n", "0 | \n", "97 | \n", "0 | \n", "60 | \n", "1 | \n", "220000.00 | \n", "0.9 | \n", "138 | \n", "1 | \n", "0 | \n", "186 | \n", "0 | \n", "
297 | \n", "53.0 | \n", "1 | \n", "446 | \n", "0 | \n", "60 | \n", "1 | \n", "263358.03 | \n", "1.0 | \n", "139 | \n", "1 | \n", "0 | \n", "215 | \n", "0 | \n", "
298 | \n", "50.0 | \n", "0 | \n", "582 | \n", "0 | \n", "62 | \n", "1 | \n", "147000.00 | \n", "0.8 | \n", "140 | \n", "1 | \n", "1 | \n", "192 | \n", "0 | \n", "
\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "death_event | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "299.000000 | \n", "299.000000 | \n", "299.000000 | \n", "299.000000 | \n", "299.000000 | \n", "299.000000 | \n", "299.000000 | \n", "299.00000 | \n", "299.000000 | \n", "299.000000 | \n", "299.00000 | \n", "299.000000 | \n", "299.00000 | \n", "
mean | \n", "60.833893 | \n", "0.431438 | \n", "581.839465 | \n", "0.418060 | \n", "38.083612 | \n", "0.351171 | \n", "263358.029264 | \n", "1.39388 | \n", "136.625418 | \n", "0.648829 | \n", "0.32107 | \n", "130.260870 | \n", "0.32107 | \n", "
std | \n", "11.894809 | \n", "0.496107 | \n", "970.287881 | \n", "0.494067 | \n", "11.834841 | \n", "0.478136 | \n", "97804.236869 | \n", "1.03451 | \n", "4.412477 | \n", "0.478136 | \n", "0.46767 | \n", "77.614208 | \n", "0.46767 | \n", "
min | \n", "40.000000 | \n", "0.000000 | \n", "23.000000 | \n", "0.000000 | \n", "14.000000 | \n", "0.000000 | \n", "25100.000000 | \n", "0.50000 | \n", "113.000000 | \n", "0.000000 | \n", "0.00000 | \n", "4.000000 | \n", "0.00000 | \n", "
25% | \n", "51.000000 | \n", "0.000000 | \n", "116.500000 | \n", "0.000000 | \n", "30.000000 | \n", "0.000000 | \n", "212500.000000 | \n", "0.90000 | \n", "134.000000 | \n", "0.000000 | \n", "0.00000 | \n", "73.000000 | \n", "0.00000 | \n", "
50% | \n", "60.000000 | \n", "0.000000 | \n", "250.000000 | \n", "0.000000 | \n", "38.000000 | \n", "0.000000 | \n", "262000.000000 | \n", "1.10000 | \n", "137.000000 | \n", "1.000000 | \n", "0.00000 | \n", "115.000000 | \n", "0.00000 | \n", "
75% | \n", "70.000000 | \n", "1.000000 | \n", "582.000000 | \n", "1.000000 | \n", "45.000000 | \n", "1.000000 | \n", "303500.000000 | \n", "1.40000 | \n", "140.000000 | \n", "1.000000 | \n", "1.00000 | \n", "203.000000 | \n", "1.00000 | \n", "
max | \n", "95.000000 | \n", "1.000000 | \n", "7861.000000 | \n", "1.000000 | \n", "80.000000 | \n", "1.000000 | \n", "850000.000000 | \n", "9.40000 | \n", "148.000000 | \n", "1.000000 | \n", "1.00000 | \n", "285.000000 | \n", "1.00000 | \n", "
\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "death_event | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
13 | \n", "72.0 | \n", "0 | \n", "211 | \n", "0 | \n", "25 | \n", "0 | \n", "274000.0 | \n", "1.2 | \n", "134 | \n", "0 | \n", "0 | \n", "207 | \n", "0 | \n", "
246 | \n", "42.0 | \n", "0 | \n", "64 | \n", "0 | \n", "40 | \n", "0 | \n", "189000.0 | \n", "0.7 | \n", "140 | \n", "1 | \n", "0 | \n", "245 | \n", "0 | \n", "
35 | \n", "49.0 | \n", "0 | \n", "972 | \n", "1 | \n", "35 | \n", "1 | \n", "268000.0 | \n", "0.8 | \n", "130 | \n", "0 | \n", "0 | \n", "187 | \n", "0 | \n", "
139 | \n", "59.0 | \n", "1 | \n", "176 | \n", "1 | \n", "25 | \n", "0 | \n", "221000.0 | \n", "1.0 | \n", "136 | \n", "1 | \n", "1 | \n", "150 | \n", "1 | \n", "
222 | \n", "58.0 | \n", "0 | \n", "132 | \n", "1 | \n", "38 | \n", "1 | \n", "253000.0 | \n", "1.0 | \n", "139 | \n", "1 | \n", "0 | \n", "230 | \n", "0 | \n", "
200 | \n", "50.0 | \n", "1 | \n", "298 | \n", "0 | \n", "35 | \n", "0 | \n", "362000.0 | \n", "0.9 | \n", "140 | \n", "1 | \n", "1 | \n", "240 | \n", "0 | \n", "
233 | \n", "42.0 | \n", "0 | \n", "102 | \n", "1 | \n", "40 | \n", "0 | \n", "237000.0 | \n", "1.2 | \n", "140 | \n", "1 | \n", "0 | \n", "74 | \n", "0 | \n", "
250 | \n", "55.0 | \n", "1 | \n", "170 | \n", "1 | \n", "40 | \n", "0 | \n", "336000.0 | \n", "1.2 | \n", "135 | \n", "1 | \n", "0 | \n", "250 | \n", "0 | \n", "
273 | \n", "79.0 | \n", "1 | \n", "55 | \n", "0 | \n", "50 | \n", "1 | \n", "172000.0 | \n", "1.8 | \n", "133 | \n", "1 | \n", "0 | \n", "78 | \n", "0 | \n", "
158 | \n", "52.0 | \n", "0 | \n", "132 | \n", "0 | \n", "30 | \n", "0 | \n", "218000.0 | \n", "0.7 | \n", "136 | \n", "1 | \n", "1 | \n", "112 | \n", "0 | \n", "
\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "death_event | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "42.0 | \n", "1 | \n", "250 | \n", "1 | \n", "15 | \n", "0 | \n", "213000.00 | \n", "1.3 | \n", "136 | \n", "0 | \n", "0 | \n", "65 | \n", "1 | \n", "
1 | \n", "46.0 | \n", "0 | \n", "168 | \n", "1 | \n", "17 | \n", "1 | \n", "271000.00 | \n", "2.1 | \n", "124 | \n", "0 | \n", "0 | \n", "100 | \n", "1 | \n", "
2 | \n", "65.0 | \n", "1 | \n", "160 | \n", "1 | \n", "20 | \n", "0 | \n", "327000.00 | \n", "2.7 | \n", "116 | \n", "0 | \n", "0 | \n", "8 | \n", "1 | \n", "
3 | \n", "53.0 | \n", "1 | \n", "91 | \n", "0 | \n", "20 | \n", "1 | \n", "418000.00 | \n", "1.4 | \n", "139 | \n", "0 | \n", "0 | \n", "43 | \n", "1 | \n", "
4 | \n", "50.0 | \n", "1 | \n", "582 | \n", "1 | \n", "20 | \n", "1 | \n", "279000.00 | \n", "1.0 | \n", "134 | \n", "0 | \n", "0 | \n", "186 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
294 | \n", "63.0 | \n", "1 | \n", "122 | \n", "1 | \n", "60 | \n", "0 | \n", "267000.00 | \n", "1.2 | \n", "145 | \n", "1 | \n", "0 | \n", "147 | \n", "0 | \n", "
295 | \n", "45.0 | \n", "0 | \n", "308 | \n", "1 | \n", "60 | \n", "1 | \n", "377000.00 | \n", "1.0 | \n", "136 | \n", "1 | \n", "0 | \n", "186 | \n", "0 | \n", "
296 | \n", "70.0 | \n", "0 | \n", "97 | \n", "0 | \n", "60 | \n", "1 | \n", "220000.00 | \n", "0.9 | \n", "138 | \n", "1 | \n", "0 | \n", "186 | \n", "0 | \n", "
297 | \n", "53.0 | \n", "1 | \n", "446 | \n", "0 | \n", "60 | \n", "1 | \n", "263358.03 | \n", "1.0 | \n", "139 | \n", "1 | \n", "0 | \n", "215 | \n", "0 | \n", "
298 | \n", "50.0 | \n", "0 | \n", "582 | \n", "0 | \n", "62 | \n", "1 | \n", "147000.00 | \n", "0.8 | \n", "140 | \n", "1 | \n", "1 | \n", "192 | \n", "0 | \n", "
289 rows × 13 columns
\n", "\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "death_event | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "72.0 | \n", "0 | \n", "211 | \n", "0 | \n", "25 | \n", "0 | \n", "274000.0 | \n", "1.2 | \n", "134 | \n", "0 | \n", "0 | \n", "207 | \n", "0 | \n", "
1 | \n", "42.0 | \n", "0 | \n", "64 | \n", "0 | \n", "40 | \n", "0 | \n", "189000.0 | \n", "0.7 | \n", "140 | \n", "1 | \n", "0 | \n", "245 | \n", "0 | \n", "
2 | \n", "49.0 | \n", "0 | \n", "972 | \n", "1 | \n", "35 | \n", "1 | \n", "268000.0 | \n", "0.8 | \n", "130 | \n", "0 | \n", "0 | \n", "187 | \n", "0 | \n", "
3 | \n", "59.0 | \n", "1 | \n", "176 | \n", "1 | \n", "25 | \n", "0 | \n", "221000.0 | \n", "1.0 | \n", "136 | \n", "1 | \n", "1 | \n", "150 | \n", "1 | \n", "
4 | \n", "58.0 | \n", "0 | \n", "132 | \n", "1 | \n", "38 | \n", "1 | \n", "253000.0 | \n", "1.0 | \n", "139 | \n", "1 | \n", "0 | \n", "230 | \n", "0 | \n", "
5 | \n", "50.0 | \n", "1 | \n", "298 | \n", "0 | \n", "35 | \n", "0 | \n", "362000.0 | \n", "0.9 | \n", "140 | \n", "1 | \n", "1 | \n", "240 | \n", "0 | \n", "
6 | \n", "42.0 | \n", "0 | \n", "102 | \n", "1 | \n", "40 | \n", "0 | \n", "237000.0 | \n", "1.2 | \n", "140 | \n", "1 | \n", "0 | \n", "74 | \n", "0 | \n", "
7 | \n", "55.0 | \n", "1 | \n", "170 | \n", "1 | \n", "40 | \n", "0 | \n", "336000.0 | \n", "1.2 | \n", "135 | \n", "1 | \n", "0 | \n", "250 | \n", "0 | \n", "
8 | \n", "79.0 | \n", "1 | \n", "55 | \n", "0 | \n", "50 | \n", "1 | \n", "172000.0 | \n", "1.8 | \n", "133 | \n", "1 | \n", "0 | \n", "78 | \n", "0 | \n", "
9 | \n", "52.0 | \n", "0 | \n", "132 | \n", "0 | \n", "30 | \n", "0 | \n", "218000.0 | \n", "0.7 | \n", "136 | \n", "1 | \n", "1 | \n", "112 | \n", "0 | \n", "
\n", " | variables | \n", "VIF | \n", "
---|---|---|
0 | \n", "age | \n", "28.891087 | \n", "
1 | \n", "creatinine_phosphokinase | \n", "1.385313 | \n", "
2 | \n", "ejection_fraction | \n", "11.663433 | \n", "
3 | \n", "platelets | \n", "8.218574 | \n", "
4 | \n", "serum_creatinine | \n", "2.877972 | \n", "
5 | \n", "time | \n", "4.029306 | \n", "
6 | \n", "serum_sodium | \n", "52.366216 | \n", "
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf', RandomForestClassifier(random_state=69))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf', RandomForestClassifier(random_state=69))])
ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia', 'high_blood_pressure',\n", " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
RandomForestClassifier(random_state=69)
GridSearchCV(cv=10,\n", " estimator=Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " Simpl...\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf',\n", " RandomForestClassifier(random_state=69))]),\n", " param_grid={'rf__max_depth': [None, 5, 10],\n", " 'rf__min_samples_leaf': [1, 2, 4],\n", " 'rf__min_samples_split': [2, 5, 10],\n", " 'rf__n_estimators': [100, 200, 300]},\n", " scoring='f1')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=10,\n", " estimator=Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " Simpl...\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf',\n", " RandomForestClassifier(random_state=69))]),\n", " param_grid={'rf__max_depth': [None, 5, 10],\n", " 'rf__min_samples_leaf': [1, 2, 4],\n", " 'rf__min_samples_split': [2, 5, 10],\n", " 'rf__n_estimators': [100, 200, 300]},\n", " scoring='f1')
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf', RandomForestClassifier(random_state=69))])
ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia', 'high_blood_pressure',\n", " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
RandomForestClassifier(random_state=69)
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf',\n", " RandomForestClassifier(max_depth=10, min_samples_leaf=2,\n", " random_state=69))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia',\n", " 'high_blood_pressure',\n", " 'diabetes'])])),\n", " ('rf',\n", " RandomForestClassifier(max_depth=10, min_samples_leaf=2,\n", " random_state=69))])
ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia', 'high_blood_pressure',\n", " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
RandomForestClassifier(max_depth=10, min_samples_leaf=2, random_state=69)
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequ...\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None,\n", " max_leaves=None, min_child_weight=None,\n", " missing=nan, monotone_constraints=None,\n", " n_estimators=100, n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None,\n", " random_state=69, reg_alpha=None, ...))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequ...\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None,\n", " max_leaves=None, min_child_weight=None,\n", " missing=nan, monotone_constraints=None,\n", " n_estimators=100, n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None,\n", " random_state=69, reg_alpha=None, ...))])
ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia', 'high_blood_pressure',\n", " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
XGBRFClassifier(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bytree=None,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, feature_types=None, gamma=None, gpu_id=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None, random_state=69,\n", " reg_alpha=None, ...)
GridSearchCV(cv=10,\n", " estimator=Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " Simpl...\n", " missing=nan,\n", " monotone_constraints=None,\n", " n_estimators=100,\n", " n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic',\n", " predictor=None,\n", " random_state=69,\n", " reg_alpha=None, ...))]),\n", " param_grid={'xgb__colsample_bytree': [0.8, 1.0],\n", " 'xgb__learning_rate': [0.1, 0.01, 0.001],\n", " 'xgb__max_depth': [3, 5, 7],\n", " 'xgb__n_estimators': [100, 200, 300],\n", " 'xgb__subsample': [0.8, 1.0]},\n", " scoring='f1')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=10,\n", " estimator=Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " Simpl...\n", " missing=nan,\n", " monotone_constraints=None,\n", " n_estimators=100,\n", " n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic',\n", " predictor=None,\n", " random_state=69,\n", " reg_alpha=None, ...))]),\n", " param_grid={'xgb__colsample_bytree': [0.8, 1.0],\n", " 'xgb__learning_rate': [0.1, 0.01, 0.001],\n", " 'xgb__max_depth': [3, 5, 7],\n", " 'xgb__n_estimators': [100, 200, 300],\n", " 'xgb__subsample': [0.8, 1.0]},\n", " scoring='f1')
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequ...\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None,\n", " max_leaves=None, min_child_weight=None,\n", " missing=nan, monotone_constraints=None,\n", " n_estimators=100, n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None,\n", " random_state=69, reg_alpha=None, ...))])
ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia', 'high_blood_pressure',\n", " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
XGBRFClassifier(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bytree=None,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, feature_types=None, gamma=None, gpu_id=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None, random_state=69,\n", " reg_alpha=None, ...)
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequ...\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=0.1, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=7,\n", " max_leaves=None, min_child_weight=None,\n", " missing=nan, monotone_constraints=None,\n", " n_estimators=200, n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None,\n", " random_state=69, ...))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocess',\n", " ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler',\n", " MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequ...\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=0.1, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=7,\n", " max_leaves=None, min_child_weight=None,\n", " missing=nan, monotone_constraints=None,\n", " n_estimators=200, n_jobs=None,\n", " num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None,\n", " random_state=69, ...))])
ColumnTransformer(transformers=[('numerikout',\n", " Pipeline(steps=[('outlier',\n", " Winsorizer(capping_method='iqr',\n", " tail='both')),\n", " ('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', MinMaxScaler())]),\n", " ['serum_creatinine',\n", " 'creatinine_phosphokinase']),\n", " ('numerik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('scaler', StandardScaler())]),\n", " ['time']),\n", " ('kategorik',\n", " Pipeline(steps=[('imputer',\n", " SimpleImputer(strategy='most_frequent'))]),\n", " ['anaemia', 'high_blood_pressure',\n", " 'diabetes'])])
['serum_creatinine', 'creatinine_phosphokinase']
Winsorizer(capping_method='iqr', tail='both')
SimpleImputer(strategy='most_frequent')
MinMaxScaler()
['time']
SimpleImputer(strategy='most_frequent')
StandardScaler()
['anaemia', 'high_blood_pressure', 'diabetes']
SimpleImputer(strategy='most_frequent')
XGBRFClassifier(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bytree=1.0,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, feature_types=None, gamma=None, gpu_id=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=7, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " n_estimators=200, n_jobs=None, num_parallel_tree=None,\n", " objective='binary:logistic', predictor=None, random_state=69, ...)
\n", " | age | \n", "anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "ejection_fraction | \n", "high_blood_pressure | \n", "platelets | \n", "serum_creatinine | \n", "serum_sodium | \n", "sex | \n", "smoking | \n", "time | \n", "death_event | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "72.0 | \n", "0 | \n", "211 | \n", "0 | \n", "25 | \n", "0 | \n", "274000.0 | \n", "1.2 | \n", "134 | \n", "0 | \n", "0 | \n", "207 | \n", "0 | \n", "
1 | \n", "42.0 | \n", "0 | \n", "64 | \n", "0 | \n", "40 | \n", "0 | \n", "189000.0 | \n", "0.7 | \n", "140 | \n", "1 | \n", "0 | \n", "245 | \n", "0 | \n", "
2 | \n", "49.0 | \n", "0 | \n", "972 | \n", "1 | \n", "35 | \n", "1 | \n", "268000.0 | \n", "0.8 | \n", "130 | \n", "0 | \n", "0 | \n", "187 | \n", "0 | \n", "
3 | \n", "59.0 | \n", "1 | \n", "176 | \n", "1 | \n", "25 | \n", "0 | \n", "221000.0 | \n", "1.0 | \n", "136 | \n", "1 | \n", "1 | \n", "150 | \n", "1 | \n", "
4 | \n", "58.0 | \n", "0 | \n", "132 | \n", "1 | \n", "38 | \n", "1 | \n", "253000.0 | \n", "1.0 | \n", "139 | \n", "1 | \n", "0 | \n", "230 | \n", "0 | \n", "
5 | \n", "50.0 | \n", "1 | \n", "298 | \n", "0 | \n", "35 | \n", "0 | \n", "362000.0 | \n", "0.9 | \n", "140 | \n", "1 | \n", "1 | \n", "240 | \n", "0 | \n", "
6 | \n", "42.0 | \n", "0 | \n", "102 | \n", "1 | \n", "40 | \n", "0 | \n", "237000.0 | \n", "1.2 | \n", "140 | \n", "1 | \n", "0 | \n", "74 | \n", "0 | \n", "
7 | \n", "55.0 | \n", "1 | \n", "170 | \n", "1 | \n", "40 | \n", "0 | \n", "336000.0 | \n", "1.2 | \n", "135 | \n", "1 | \n", "0 | \n", "250 | \n", "0 | \n", "
8 | \n", "79.0 | \n", "1 | \n", "55 | \n", "0 | \n", "50 | \n", "1 | \n", "172000.0 | \n", "1.8 | \n", "133 | \n", "1 | \n", "0 | \n", "78 | \n", "0 | \n", "
9 | \n", "52.0 | \n", "0 | \n", "132 | \n", "0 | \n", "30 | \n", "0 | \n", "218000.0 | \n", "0.7 | \n", "136 | \n", "1 | \n", "1 | \n", "112 | \n", "0 | \n", "
\n", " | anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "high_blood_pressure | \n", "serum_creatinine | \n", "time | \n", "death_event | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "211 | \n", "0 | \n", "0 | \n", "1.2 | \n", "207 | \n", "0 | \n", "
1 | \n", "0 | \n", "64 | \n", "0 | \n", "0 | \n", "0.7 | \n", "245 | \n", "0 | \n", "
2 | \n", "0 | \n", "972 | \n", "1 | \n", "1 | \n", "0.8 | \n", "187 | \n", "0 | \n", "
3 | \n", "1 | \n", "176 | \n", "1 | \n", "0 | \n", "1.0 | \n", "150 | \n", "1 | \n", "
4 | \n", "0 | \n", "132 | \n", "1 | \n", "1 | \n", "1.0 | \n", "230 | \n", "0 | \n", "
5 | \n", "1 | \n", "298 | \n", "0 | \n", "0 | \n", "0.9 | \n", "240 | \n", "0 | \n", "
6 | \n", "0 | \n", "102 | \n", "1 | \n", "0 | \n", "1.2 | \n", "74 | \n", "0 | \n", "
7 | \n", "1 | \n", "170 | \n", "1 | \n", "0 | \n", "1.2 | \n", "250 | \n", "0 | \n", "
8 | \n", "1 | \n", "55 | \n", "0 | \n", "1 | \n", "1.8 | \n", "78 | \n", "0 | \n", "
9 | \n", "0 | \n", "132 | \n", "0 | \n", "0 | \n", "0.7 | \n", "112 | \n", "0 | \n", "
\n", " | RF | \n", "
---|---|
0 | \n", "0 | \n", "
1 | \n", "0 | \n", "
2 | \n", "0 | \n", "
3 | \n", "0 | \n", "
4 | \n", "0 | \n", "
5 | \n", "0 | \n", "
6 | \n", "0 | \n", "
7 | \n", "0 | \n", "
8 | \n", "0 | \n", "
9 | \n", "0 | \n", "
\n", " | RFGS | \n", "
---|---|
0 | \n", "0 | \n", "
1 | \n", "0 | \n", "
2 | \n", "0 | \n", "
3 | \n", "0 | \n", "
4 | \n", "0 | \n", "
5 | \n", "0 | \n", "
6 | \n", "0 | \n", "
7 | \n", "0 | \n", "
8 | \n", "0 | \n", "
9 | \n", "0 | \n", "
\n", " | XG | \n", "
---|---|
0 | \n", "0 | \n", "
1 | \n", "0 | \n", "
2 | \n", "0 | \n", "
3 | \n", "0 | \n", "
4 | \n", "0 | \n", "
5 | \n", "0 | \n", "
6 | \n", "0 | \n", "
7 | \n", "0 | \n", "
8 | \n", "0 | \n", "
9 | \n", "0 | \n", "
\n", " | XGGS | \n", "
---|---|
0 | \n", "0 | \n", "
1 | \n", "0 | \n", "
2 | \n", "0 | \n", "
3 | \n", "0 | \n", "
4 | \n", "0 | \n", "
5 | \n", "0 | \n", "
6 | \n", "0 | \n", "
7 | \n", "0 | \n", "
8 | \n", "0 | \n", "
9 | \n", "0 | \n", "
\n", " | anaemia | \n", "creatinine_phosphokinase | \n", "diabetes | \n", "high_blood_pressure | \n", "serum_creatinine | \n", "time | \n", "death_event | \n", "RF | \n", "RFGS | \n", "XG | \n", "XGGS | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "211 | \n", "0 | \n", "0 | \n", "1.2 | \n", "207 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
1 | \n", "0 | \n", "64 | \n", "0 | \n", "0 | \n", "0.7 | \n", "245 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
2 | \n", "0 | \n", "972 | \n", "1 | \n", "1 | \n", "0.8 | \n", "187 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 | \n", "1 | \n", "176 | \n", "1 | \n", "0 | \n", "1.0 | \n", "150 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
4 | \n", "0 | \n", "132 | \n", "1 | \n", "1 | \n", "1.0 | \n", "230 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 | \n", "1 | \n", "298 | \n", "0 | \n", "0 | \n", "0.9 | \n", "240 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
6 | \n", "0 | \n", "102 | \n", "1 | \n", "0 | \n", "1.2 | \n", "74 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
7 | \n", "1 | \n", "170 | \n", "1 | \n", "0 | \n", "1.2 | \n", "250 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
8 | \n", "1 | \n", "55 | \n", "0 | \n", "1 | \n", "1.8 | \n", "78 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
9 | \n", "0 | \n", "132 | \n", "0 | \n", "0 | \n", "0.7 | \n", "112 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "