{ "cells": [ { "cell_type": "markdown", "id": "a0c8ad0b", "metadata": {}, "source": [ "# Dataset\n", "https://www.kaggle.com/datasets/abhia1999/chronic-kidney-disease" ] }, { "cell_type": "code", "execution_count": 1, "id": "61347359", "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "from sklearn.ensemble import RandomForestClassifier\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "id": "b470f34b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Bp Sg Al Su Rbc Bu Sc Sod Pot Hemo Wbcc Rbcc \\\n", "0 80.0 1.020 1.0 0.0 1.0 36.0 1.2 137.53 4.63 15.4 7800.0 5.20 \n", "1 50.0 1.020 4.0 0.0 1.0 18.0 0.8 137.53 4.63 11.3 6000.0 4.71 \n", "2 80.0 1.010 2.0 3.0 1.0 53.0 1.8 137.53 4.63 9.6 7500.0 4.71 \n", "3 70.0 1.005 4.0 0.0 1.0 56.0 3.8 111.00 2.50 11.2 6700.0 3.90 \n", "4 80.0 1.010 2.0 0.0 1.0 26.0 1.4 137.53 4.63 11.6 7300.0 4.60 \n", "\n", " Htn Class \n", "0 1.0 1 \n", "1 0.0 1 \n", "2 0.0 1 \n", "3 1.0 1 \n", "4 0.0 1 \n" ] } ], "source": [ "pd.set_option('display.max_columns', None)\n", "pd.set_option('display.max_rows', None)\n", "df_kidney = pd.read_csv('new_model.csv')\n", "print(df_kidney.head())" ] }, { "cell_type": "code", "execution_count": 3, "id": "412d6119", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Bp', 'Sg', 'Al', 'Su', 'Rbc', 'Bu', 'Sc', 'Sod', 'Pot', 'Hemo', 'Wbcc',\n", " 'Rbcc', 'Htn', 'Class'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_kidney.columns" ] }, { "cell_type": "code", "execution_count": 4, "id": "46cacac6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Bp 50.000\n", "Sg 1.005\n", "Al 0.000\n", "Su 0.000\n", "Rbc 0.000\n", "Bu 1.500\n", "Sc 0.400\n", "Sod 4.500\n", "Pot 2.500\n", "Hemo 3.100\n", "Wbcc 2200.000\n", "Rbcc 2.100\n", "Htn 0.000\n", "Class 0.000\n", "dtype: float64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_kidney.min()" ] }, { "cell_type": "code", "execution_count": 5, "id": "a4ac47e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Bp 180.000\n", "Sg 1.025\n", "Al 5.000\n", "Su 5.000\n", "Rbc 1.000\n", "Bu 391.000\n", "Sc 76.000\n", "Sod 163.000\n", "Pot 47.000\n", "Hemo 17.800\n", "Wbcc 26400.000\n", "Rbcc 8.000\n", "Htn 1.000\n", "Class 1.000\n", "dtype: float64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_kidney.max()" ] }, { "cell_type": "code", "execution_count": 6, "id": "7c835179", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 400 entries, 0 to 399\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Bp 400 non-null float64\n", " 1 Sg 400 non-null float64\n", " 2 Al 400 non-null float64\n", " 3 Su 400 non-null float64\n", " 4 Rbc 400 non-null float64\n", " 5 Bu 400 non-null float64\n", " 6 Sc 400 non-null float64\n", " 7 Sod 400 non-null float64\n", " 8 Pot 400 non-null float64\n", " 9 Hemo 400 non-null float64\n", " 10 Wbcc 400 non-null float64\n", " 11 Rbcc 400 non-null float64\n", " 12 Htn 400 non-null float64\n", " 13 Class 400 non-null int64 \n", "dtypes: float64(13), int64(1)\n", "memory usage: 43.9 KB\n" ] } ], "source": [ "df_kidney.info()" ] }, { "cell_type": "code", "execution_count": 7, "id": "c5784db2", "metadata": {}, "outputs": [], "source": [ "X = df_kidney.drop('Class', axis=1)\n", "Y = df_kidney['Class']" ] }, { "cell_type": "code", "execution_count": 8, "id": "b0c5426f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier()" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = RandomForestClassifier()\n", "clf.fit(X, Y)" ] }, { "cell_type": "code", "execution_count": 10, "id": "db98ab11", "metadata": {}, "outputs": [], "source": [ "pickle.dump(clf, open('kidney_clf.pkl', 'wb'))" ] }, { "cell_type": "code", "execution_count": null, "id": "3bbd5901", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }