{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Outlier-Sensitive Predictor" ], "metadata": { "id": "pUdgDToFZPsM" } }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "L96SNQ8HVI7m" }, "outputs": [], "source": [ "# imports\n", "import tensorflow as tf\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from imblearn.over_sampling import RandomOverSampler\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "source": [ "# using drive to load our dataset\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ea3adROCVORJ", "outputId": "337c92a7-9d72-4e6c-c4de-94c07507d1a1" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "df = pd.read_csv(\"/content/drive/MyDrive/dataset/heart.csv\") # loading\n", "del df['trestbps'], df['fbs'], df['restecg'], df['thalach'], df['exang'], df['slope'],df['oldpeak']\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "5XYS8syqVREm", "outputId": "d0c6e728-4ea8-420f-dfd1-7a823bb7de9b" }, "execution_count": 26, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " age sex cp chol ca thal target\n", "0 63 1 3 233 0 1 1\n", "1 37 1 2 250 0 2 1\n", "2 41 0 1 204 0 2 1\n", "3 56 1 1 236 0 2 1\n", "4 57 0 0 354 0 2 1\n", ".. ... ... .. ... .. ... ...\n", "298 57 0 0 241 0 3 0\n", "299 45 1 3 264 0 3 0\n", "300 68 1 0 193 2 3 0\n", "301 57 1 0 131 1 3 0\n", "302 57 0 1 236 1 2 0\n", "\n", "[303 rows x 7 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agesexcpcholcathaltarget
06313233011
13712250021
24101204021
35611236021
45700354021
........................
2985700241030
2994513264030
3006810193230
3015710131130
3025701236120
\n", "

303 rows × 7 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 303,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9,\n \"min\": 29,\n \"max\": 77,\n \"num_unique_values\": 41,\n \"samples\": [\n 46,\n 66,\n 48\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 126,\n \"max\": 564,\n \"num_unique_values\": 152,\n \"samples\": [\n 277,\n 169\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 4,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 26 } ] }, { "cell_type": "code", "source": [ "x_data = df.drop(['target'], axis = 1)\n", "y = df.target.values" ], "metadata": { "id": "vA58b9OtWIDv" }, "execution_count": 27, "outputs": [] }, { "cell_type": "code", "source": [ "x_train, x_test, y_train, y_test = train_test_split(x_data, y, test_size = 0.2, random_state= 0)" ], "metadata": { "id": "vK1Fycc-WqRj" }, "execution_count": 28, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf = RandomForestClassifier(n_estimators = 1000, random_state= 1)\n", "rf.fit(x_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, "id": "JEFcVUBLW9Pi", "outputId": "325b00b5-3a44-4396-8f58-d6b4ff5447b1" }, "execution_count": 29, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomForestClassifier(n_estimators=1000, random_state=1)" ], "text/html": [ "
RandomForestClassifier(n_estimators=1000, random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 29 } ] }, { "cell_type": "code", "source": [ "y_pred=rf.predict(x_test)" ], "metadata": { "id": "M66dC8FOXNEt" }, "execution_count": 30, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_pred,y_test))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L06DnXKhXPzS", "outputId": "fd3a39c7-f435-4363-9d68-725708e39fe5" }, "execution_count": 31, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " precision recall f1-score support\n", "\n", " 0 0.74 0.80 0.77 25\n", " 1 0.85 0.81 0.83 36\n", "\n", " accuracy 0.80 61\n", " macro avg 0.80 0.80 0.80 61\n", "weighted avg 0.81 0.80 0.80 61\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "import pickle\n", "\n", "with open('osp.pkl','wb') as f:\n", " pickle.dump(rf,f)\n", "\n", "# load\n", "with open('osp.pkl', 'rb') as f:\n", " rf = pickle.load(f)\n", "#rf.predict()" ], "metadata": { "id": "4IrkPQCLXhYw" }, "execution_count": 32, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "sTwBUL3vZhdQ" }, "execution_count": null, "outputs": [] } ] }