{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a0c8ad0b",
   "metadata": {},
   "source": [
    "# Dataset\n",
    "https://www.kaggle.com/datasets/abhia1999/chronic-kidney-disease"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "61347359",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b470f34b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     Bp     Sg   Al   Su  Rbc    Bu   Sc     Sod   Pot  Hemo    Wbcc  Rbcc  \\\n",
      "0  80.0  1.020  1.0  0.0  1.0  36.0  1.2  137.53  4.63  15.4  7800.0  5.20   \n",
      "1  50.0  1.020  4.0  0.0  1.0  18.0  0.8  137.53  4.63  11.3  6000.0  4.71   \n",
      "2  80.0  1.010  2.0  3.0  1.0  53.0  1.8  137.53  4.63   9.6  7500.0  4.71   \n",
      "3  70.0  1.005  4.0  0.0  1.0  56.0  3.8  111.00  2.50  11.2  6700.0  3.90   \n",
      "4  80.0  1.010  2.0  0.0  1.0  26.0  1.4  137.53  4.63  11.6  7300.0  4.60   \n",
      "\n",
      "   Htn  Class  \n",
      "0  1.0      1  \n",
      "1  0.0      1  \n",
      "2  0.0      1  \n",
      "3  1.0      1  \n",
      "4  0.0      1  \n"
     ]
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', None)\n",
    "df_kidney = pd.read_csv('new_model.csv')\n",
    "print(df_kidney.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "412d6119",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Bp', 'Sg', 'Al', 'Su', 'Rbc', 'Bu', 'Sc', 'Sod', 'Pot', 'Hemo', 'Wbcc',\n",
       "       'Rbcc', 'Htn', 'Class'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_kidney.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "46cacac6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bp         50.000\n",
       "Sg          1.005\n",
       "Al          0.000\n",
       "Su          0.000\n",
       "Rbc         0.000\n",
       "Bu          1.500\n",
       "Sc          0.400\n",
       "Sod         4.500\n",
       "Pot         2.500\n",
       "Hemo        3.100\n",
       "Wbcc     2200.000\n",
       "Rbcc        2.100\n",
       "Htn         0.000\n",
       "Class       0.000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_kidney.min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a4ac47e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bp         180.000\n",
       "Sg           1.025\n",
       "Al           5.000\n",
       "Su           5.000\n",
       "Rbc          1.000\n",
       "Bu         391.000\n",
       "Sc          76.000\n",
       "Sod        163.000\n",
       "Pot         47.000\n",
       "Hemo        17.800\n",
       "Wbcc     26400.000\n",
       "Rbcc         8.000\n",
       "Htn          1.000\n",
       "Class        1.000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_kidney.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7c835179",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 400 entries, 0 to 399\n",
      "Data columns (total 14 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   Bp      400 non-null    float64\n",
      " 1   Sg      400 non-null    float64\n",
      " 2   Al      400 non-null    float64\n",
      " 3   Su      400 non-null    float64\n",
      " 4   Rbc     400 non-null    float64\n",
      " 5   Bu      400 non-null    float64\n",
      " 6   Sc      400 non-null    float64\n",
      " 7   Sod     400 non-null    float64\n",
      " 8   Pot     400 non-null    float64\n",
      " 9   Hemo    400 non-null    float64\n",
      " 10  Wbcc    400 non-null    float64\n",
      " 11  Rbcc    400 non-null    float64\n",
      " 12  Htn     400 non-null    float64\n",
      " 13  Class   400 non-null    int64  \n",
      "dtypes: float64(13), int64(1)\n",
      "memory usage: 43.9 KB\n"
     ]
    }
   ],
   "source": [
    "df_kidney.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c5784db2",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df_kidney.drop('Class', axis=1)\n",
    "Y = df_kidney['Class']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b0c5426f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier()"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = RandomForestClassifier()\n",
    "clf.fit(X, Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "db98ab11",
   "metadata": {},
   "outputs": [],
   "source": [
    "pickle.dump(clf, open('kidney_clf.pkl', 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bbd5901",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}