{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Kidney Condition Detection" ], "metadata": { "id": "mWqGedZ2IHlm" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "L96SNQ8HVI7m" }, "outputs": [], "source": [ "# imports\n", "import tensorflow as tf\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from imblearn.over_sampling import RandomOverSampler\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "source": [ "# using drive to load our dataset\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ea3adROCVORJ", "outputId": "9fcec8fc-24df-4307-8efe-ad0bf7967226" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "df = pd.read_csv(\"/content/drive/MyDrive/dataset/kidney_disease.csv\")\n", "del df['id'],df['sg'],df['al'],df['su'],df['rbc'],df['pc'],df['pcc'],df['pcv'],df['ba'],df['sc'],df['dm'],df['cad'],df['pe']\n", "df" ], "metadata": { "id": "puQFhXRM_inf", "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "outputId": "8f6b8886-f61d-4b10-8eea-e1a38ecf8cc4" }, "execution_count": 34, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " age bp bgr bu sod pot hemo wc rc htn appet ane \\\n", "0 48.0 80.0 121.0 36.0 NaN NaN 15.4 7800 5.2 yes good no \n", "1 7.0 50.0 NaN 18.0 NaN NaN 11.3 6000 NaN no good no \n", "2 62.0 80.0 423.0 53.0 NaN NaN 9.6 7500 NaN no poor yes \n", "3 48.0 70.0 117.0 56.0 111.0 2.5 11.2 6700 3.9 yes poor yes \n", "4 51.0 80.0 106.0 26.0 NaN NaN 11.6 7300 4.6 no good no \n", ".. ... ... ... ... ... ... ... ... ... ... ... ... \n", "395 55.0 80.0 140.0 49.0 150.0 4.9 15.7 6700 4.9 no good no \n", "396 42.0 70.0 75.0 31.0 141.0 3.5 16.5 7800 6.2 no good no \n", "397 12.0 80.0 100.0 26.0 137.0 4.4 15.8 6600 5.4 no good no \n", "398 17.0 60.0 114.0 50.0 135.0 4.9 14.2 7200 5.9 no good no \n", "399 58.0 80.0 131.0 18.0 141.0 3.5 15.8 6800 6.1 no good no \n", "\n", " classification \n", "0 ckd \n", "1 ckd \n", "2 ckd \n", "3 ckd \n", "4 ckd \n", ".. ... \n", "395 notckd \n", "396 notckd \n", "397 notckd \n", "398 notckd \n", "399 notckd \n", "\n", "[400 rows x 13 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agebpbgrbusodpothemowcrchtnappetaneclassification
048.080.0121.036.0NaNNaN15.478005.2yesgoodnockd
17.050.0NaN18.0NaNNaN11.36000NaNnogoodnockd
262.080.0423.053.0NaNNaN9.67500NaNnopooryesckd
348.070.0117.056.0111.02.511.267003.9yespooryesckd
451.080.0106.026.0NaNNaN11.673004.6nogoodnockd
..........................................
39555.080.0140.049.0150.04.915.767004.9nogoodnonotckd
39642.070.075.031.0141.03.516.578006.2nogoodnonotckd
39712.080.0100.026.0137.04.415.866005.4nogoodnonotckd
39817.060.0114.050.0135.04.914.272005.9nogoodnonotckd
39958.080.0131.018.0141.03.515.868006.1nogoodnonotckd
\n", "

400 rows × 13 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 400,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17.16971408926224,\n \"min\": 2.0,\n \"max\": 90.0,\n \"num_unique_values\": 76,\n \"samples\": [\n 60.0,\n 64.0,\n 63.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13.68363749352526,\n \"min\": 50.0,\n \"max\": 180.0,\n \"num_unique_values\": 10,\n \"samples\": [\n 180.0,\n 50.0,\n 60.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bgr\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 79.28171423511773,\n \"min\": 22.0,\n \"max\": 490.0,\n \"num_unique_values\": 146,\n \"samples\": [\n 146.0,\n 273.0,\n 140.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bu\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 50.50300584922251,\n \"min\": 1.5,\n \"max\": 391.0,\n \"num_unique_values\": 118,\n \"samples\": [\n 48.0,\n 76.0,\n 26.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sod\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10.40875205179878,\n \"min\": 4.5,\n \"max\": 163.0,\n \"num_unique_values\": 34,\n \"samples\": [\n 133.0,\n 137.0,\n 124.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.193904176556695,\n \"min\": 2.5,\n \"max\": 47.0,\n \"num_unique_values\": 40,\n \"samples\": [\n 6.6,\n 5.9,\n 4.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hemo\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.9125866088267642,\n \"min\": 3.1,\n \"max\": 17.8,\n \"num_unique_values\": 115,\n \"samples\": [\n 6.3,\n 11.6,\n 4.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wc\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 92,\n \"samples\": [\n \"12500\",\n \"6400\",\n \"5000\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rc\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 49,\n \"samples\": [\n \"3.6\",\n \"6.4\",\n \"5.9\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"htn\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"no\",\n \"yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"appet\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"poor\",\n \"good\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ane\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"yes\",\n \"no\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"classification\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"ckd\",\n \"ckd\\t\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 34 } ] }, { "cell_type": "code", "source": [ "df['htn']=(df['htn']=='yes').astype(int)\n", "df['appet']=(df['appet']=='good').astype(int)\n", "df['ane']=(df['ane']=='yes').astype(int)\n", "df['classification']=(df['classification']=='ckd').astype(int)\n" ], "metadata": { "id": "Rf9xpgMNEG3y" }, "execution_count": 35, "outputs": [] }, { "cell_type": "code", "source": [ "df=df.fillna(0)" ], "metadata": { "id": "V5Eelp9zFbUV" }, "execution_count": 39, "outputs": [] }, { "cell_type": "code", "source": [ "df=df.drop([df.index[76],df.index[133],df.index[162],df.index[185]])" ], "metadata": { "id": "u96jEKrBHH0G" }, "execution_count": 40, "outputs": [] }, { "cell_type": "code", "source": [ "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "B4SayP5qHgwp", "outputId": "8b5af7c5-3834-4bdc-b693-1e7e8f2cb8d9" }, "execution_count": 41, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " age bp bgr bu sod pot hemo wc rc htn appet ane \\\n", "0 48.0 80.0 121.0 36.0 0.0 0.0 15.4 7800 5.2 1 1 0 \n", "1 7.0 50.0 0.0 18.0 0.0 0.0 11.3 6000 0 0 1 0 \n", "2 62.0 80.0 423.0 53.0 0.0 0.0 9.6 7500 0 0 0 1 \n", "3 48.0 70.0 117.0 56.0 111.0 2.5 11.2 6700 3.9 1 0 1 \n", "4 51.0 80.0 106.0 26.0 0.0 0.0 11.6 7300 4.6 0 1 0 \n", ".. ... ... ... ... ... ... ... ... ... ... ... ... \n", "395 55.0 80.0 140.0 49.0 150.0 4.9 15.7 6700 4.9 0 1 0 \n", "396 42.0 70.0 75.0 31.0 141.0 3.5 16.5 7800 6.2 0 1 0 \n", "397 12.0 80.0 100.0 26.0 137.0 4.4 15.8 6600 5.4 0 1 0 \n", "398 17.0 60.0 114.0 50.0 135.0 4.9 14.2 7200 5.9 0 1 0 \n", "399 58.0 80.0 131.0 18.0 141.0 3.5 15.8 6800 6.1 0 1 0 \n", "\n", " classification \n", "0 1 \n", "1 1 \n", "2 1 \n", "3 1 \n", "4 1 \n", ".. ... \n", "395 0 \n", "396 0 \n", "397 0 \n", "398 0 \n", "399 0 \n", "\n", "[392 rows x 13 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agebpbgrbusodpothemowcrchtnappetaneclassification
048.080.0121.036.00.00.015.478005.21101
17.050.00.018.00.00.011.3600000101
262.080.0423.053.00.00.09.6750000011
348.070.0117.056.0111.02.511.267003.91011
451.080.0106.026.00.00.011.673004.60101
..........................................
39555.080.0140.049.0150.04.915.767004.90100
39642.070.075.031.0141.03.516.578006.20100
39712.080.0100.026.0137.04.415.866005.40100
39817.060.0114.050.0135.04.914.272005.90100
39958.080.0131.018.0141.03.515.868006.10100
\n", "

392 rows × 13 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 392,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18.589049704965028,\n \"min\": 0.0,\n \"max\": 90.0,\n \"num_unique_values\": 76,\n \"samples\": [\n 60.0,\n 26.0,\n 63.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 18.513424355349287,\n \"min\": 0.0,\n \"max\": 180.0,\n \"num_unique_values\": 11,\n \"samples\": [\n 100.0,\n 80.0,\n 180.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bgr\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 88.27078573562652,\n \"min\": 0.0,\n \"max\": 490.0,\n \"num_unique_values\": 146,\n \"samples\": [\n 150.0,\n 424.0,\n 159.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bu\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 50.86006843947563,\n \"min\": 0.0,\n \"max\": 391.0,\n \"num_unique_values\": 118,\n \"samples\": [\n 85.0,\n 42.0,\n 26.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sod\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 57.76009035147553,\n \"min\": 0.0,\n \"max\": 163.0,\n \"num_unique_values\": 35,\n \"samples\": [\n 122.0,\n 129.0,\n 146.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.4365711690381584,\n \"min\": 0.0,\n \"max\": 47.0,\n \"num_unique_values\": 41,\n \"samples\": [\n 3.6,\n 3.8,\n 6.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hemo\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.034697701579665,\n \"min\": 0.0,\n \"max\": 17.8,\n \"num_unique_values\": 116,\n \"samples\": [\n 3.1,\n 11.6,\n 9.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wc\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 90,\n \"samples\": [\n \"5600\",\n \"7900\",\n \"16300\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rc\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 48,\n \"samples\": [\n \"5.6\",\n \"5.1\",\n \"2.1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"htn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"appet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ane\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"classification\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 41 } ] }, { "cell_type": "code", "source": [ "x_data = df.drop(['classification'], axis = 1)\n", "y = df.classification.values" ], "metadata": { "id": "jvdxSOtN35up" }, "execution_count": 42, "outputs": [] }, { "cell_type": "code", "source": [ "x_train, x_test, y_train, y_test = train_test_split(x_data, y, test_size = 0.2, random_state= 0)" ], "metadata": { "id": "dHaFMd8A94Ks" }, "execution_count": 43, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf = RandomForestClassifier(n_estimators = 1000, random_state= 1)\n", "rf.fit(x_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, "id": "JEFcVUBLW9Pi", "outputId": "dbf5c9f4-d229-4184-abe6-ffa193fb6f85" }, "execution_count": 44, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomForestClassifier(n_estimators=1000, random_state=1)" ], "text/html": [ "
RandomForestClassifier(n_estimators=1000, random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 44 } ] }, { "cell_type": "code", "source": [ "y_pred=rf.predict(x_test)" ], "metadata": { "id": "M66dC8FOXNEt" }, "execution_count": 45, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_pred,y_test))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L06DnXKhXPzS", "outputId": "b454914f-414f-407b-caa7-5599ab136d5a" }, "execution_count": 46, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " precision recall f1-score support\n", "\n", " 0 0.80 0.92 0.86 26\n", " 1 0.96 0.89 0.92 53\n", "\n", " accuracy 0.90 79\n", " macro avg 0.88 0.90 0.89 79\n", "weighted avg 0.91 0.90 0.90 79\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "import pickle\n", "\n", "with open('kcd.pkl','wb') as f:\n", " pickle.dump(rf,f)\n", "\n", "# load\n", "with open('kcd.pkl', 'rb') as f:\n", " rf = pickle.load(f)\n", "#rf.predict()" ], "metadata": { "id": "4IrkPQCLXhYw" }, "execution_count": 47, "outputs": [] } ] }