{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec70045d",
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "execution": {
     "iopub.execute_input": "2024-02-28T21:15:40.650918Z",
     "iopub.status.busy": "2024-02-28T21:15:40.650589Z",
     "iopub.status.idle": "2024-02-28T21:15:41.502437Z",
     "shell.execute_reply": "2024-02-28T21:15:41.501426Z"
    },
    "papermill": {
     "duration": 0.87031,
     "end_time": "2024-02-28T21:15:41.504554",
     "exception": false,
     "start_time": "2024-02-28T21:15:40.634244",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
    "# For example, here's several helpful packages to load\n",
    "\n",
    "import numpy as np  # linear algebra\n",
    "import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "\n",
    "# Input data files are available in the read-only \"../input/\" directory\n",
    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
    "\n",
    "import os\n",
    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
    "    for filename in filenames:\n",
    "        print(os.path.join(dirname, filename))\n",
    "\n",
    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\"\n",
    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31b2bdbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:15:41.536451Z",
     "iopub.status.busy": "2024-02-28T21:15:41.536047Z",
     "iopub.status.idle": "2024-02-28T21:15:42.592902Z",
     "shell.execute_reply": "2024-02-28T21:15:42.592121Z"
    },
    "papermill": {
     "duration": 1.07523,
     "end_time": "2024-02-28T21:15:42.595268",
     "exception": false,
     "start_time": "2024-02-28T21:15:41.520038",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(\n",
    "    \"/kaggle/input/personal-key-indicators-of-heart-disease/2020/heart_2020_cleaned.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a12bd286",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:15:42.629222Z",
     "iopub.status.busy": "2024-02-28T21:15:42.628916Z",
     "iopub.status.idle": "2024-02-28T21:15:43.061965Z",
     "shell.execute_reply": "2024-02-28T21:15:43.061012Z"
    },
    "papermill": {
     "duration": 0.453304,
     "end_time": "2024-02-28T21:15:43.064364",
     "exception": false,
     "start_time": "2024-02-28T21:15:42.611060",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98b4a85f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:15:43.098384Z",
     "iopub.status.busy": "2024-02-28T21:15:43.098077Z",
     "iopub.status.idle": "2024-02-28T21:15:43.549973Z",
     "shell.execute_reply": "2024-02-28T21:15:43.548934Z"
    },
    "papermill": {
     "duration": 0.470772,
     "end_time": "2024-02-28T21:15:43.552722",
     "exception": false,
     "start_time": "2024-02-28T21:15:43.081950",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = pd.get_dummies(df, columns=['Smoking', 'AlcoholDrinking', 'Sex', 'AgeCategory', 'Race',\n",
    "                    'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a49bbd7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:15:43.590333Z",
     "iopub.status.busy": "2024-02-28T21:15:43.589967Z",
     "iopub.status.idle": "2024-02-28T21:15:43.596311Z",
     "shell.execute_reply": "2024-02-28T21:15:43.595602Z"
    },
    "papermill": {
     "duration": 0.026491,
     "end_time": "2024-02-28T21:15:43.598298",
     "exception": false,
     "start_time": "2024-02-28T21:15:43.571807",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df['BMI'] = df['BMI'] / (df['BMI'] ** 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a1121c3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:15:43.629024Z",
     "iopub.status.busy": "2024-02-28T21:15:43.628731Z",
     "iopub.status.idle": "2024-02-28T21:15:45.174952Z",
     "shell.execute_reply": "2024-02-28T21:15:45.173525Z"
    },
    "papermill": {
     "duration": 1.563564,
     "end_time": "2024-02-28T21:15:45.176760",
     "exception": true,
     "start_time": "2024-02-28T21:15:43.613196",
     "status": "failed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "numerical_columns = ['BMI', 'Stroke', 'PhysicalHealth',\n",
    "                     'MentalHealth', 'DiffWalking', 'SleepTime']\n",
    "scaler = MinMaxScaler()\n",
    "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c34257e5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:24.347783Z",
     "iopub.status.busy": "2024-02-28T20:31:24.347070Z",
     "iopub.status.idle": "2024-02-28T20:31:24.504857Z",
     "shell.execute_reply": "2024-02-28T20:31:24.503875Z",
     "shell.execute_reply.started": "2024-02-28T20:31:24.347750Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "for column in df.columns:\n",
    "    print(column, df[column].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "305666d0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:26.899524Z",
     "iopub.status.busy": "2024-02-28T20:31:26.899205Z",
     "iopub.status.idle": "2024-02-28T20:31:26.961477Z",
     "shell.execute_reply": "2024-02-28T20:31:26.960639Z",
     "shell.execute_reply.started": "2024-02-28T20:31:26.899502Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df['Stroke'] = df['Stroke'].map({'No': 0, 'Yes': 1})\n",
    "df['DiffWalking'] = df['DiffWalking'].map({'No': 0, 'Yes': 1})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2cc4716",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:28.914217Z",
     "iopub.status.busy": "2024-02-28T20:31:28.913857Z",
     "iopub.status.idle": "2024-02-28T20:31:28.945954Z",
     "shell.execute_reply": "2024-02-28T20:31:28.944829Z",
     "shell.execute_reply.started": "2024-02-28T20:31:28.914181Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "scaler = MinMaxScaler()\n",
    "numerical_columns = ['BMI', 'PhysicalHealth',\n",
    "                     'MentalHealth', 'DiffWalking', 'SleepTime']\n",
    "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15944d03",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:30.518053Z",
     "iopub.status.busy": "2024-02-28T20:31:30.517356Z",
     "iopub.status.idle": "2024-02-28T20:31:30.592331Z",
     "shell.execute_reply": "2024-02-28T20:31:30.591365Z",
     "shell.execute_reply.started": "2024-02-28T20:31:30.518018Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "z_scores = df[numerical_columns].apply(lambda x: (x - x.mean()) / x.std())\n",
    "outliers = (z_scores > 3) | (z_scores < -3)\n",
    "df = df[~outliers.any(axis=1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3c04332",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:32.877312Z",
     "iopub.status.busy": "2024-02-28T20:31:32.876991Z",
     "iopub.status.idle": "2024-02-28T20:31:32.923278Z",
     "shell.execute_reply": "2024-02-28T20:31:32.922285Z",
     "shell.execute_reply.started": "2024-02-28T20:31:32.877287Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(df.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f883f424",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:35.118412Z",
     "iopub.status.busy": "2024-02-28T20:31:35.118046Z",
     "iopub.status.idle": "2024-02-28T20:31:35.138194Z",
     "shell.execute_reply": "2024-02-28T20:31:35.137356Z",
     "shell.execute_reply.started": "2024-02-28T20:31:35.118385Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "X = df.drop(columns=['HeartDisease'])\n",
    "y = df['HeartDisease']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "937f456d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:36.921083Z",
     "iopub.status.busy": "2024-02-28T20:31:36.920460Z",
     "iopub.status.idle": "2024-02-28T20:31:37.092675Z",
     "shell.execute_reply": "2024-02-28T20:31:37.091807Z",
     "shell.execute_reply.started": "2024-02-28T20:31:36.921053Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split  # Add this import statement\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d908667b",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# Logistic regression\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2c46021",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:40.429477Z",
     "iopub.status.busy": "2024-02-28T20:31:40.428714Z",
     "iopub.status.idle": "2024-02-28T20:31:40.563938Z",
     "shell.execute_reply": "2024-02-28T20:31:40.563215Z",
     "shell.execute_reply.started": "2024-02-28T20:31:40.429444Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
    "model = LogisticRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57788a5b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:43.108928Z",
     "iopub.status.busy": "2024-02-28T20:31:43.108194Z",
     "iopub.status.idle": "2024-02-28T20:31:46.611293Z",
     "shell.execute_reply": "2024-02-28T20:31:46.609836Z",
     "shell.execute_reply.started": "2024-02-28T20:31:43.108893Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a09075d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:53.227765Z",
     "iopub.status.busy": "2024-02-28T20:31:53.227012Z",
     "iopub.status.idle": "2024-02-28T20:31:53.251316Z",
     "shell.execute_reply": "2024-02-28T20:31:53.250025Z",
     "shell.execute_reply.started": "2024-02-28T20:31:53.227730Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "025c02d6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:55.958835Z",
     "iopub.status.busy": "2024-02-28T20:31:55.957996Z",
     "iopub.status.idle": "2024-02-28T20:31:56.206159Z",
     "shell.execute_reply": "2024-02-28T20:31:56.205249Z",
     "shell.execute_reply.started": "2024-02-28T20:31:55.958798Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "accuracy = accuracy_score(y_test, y_pred)\n",
    "print(\"Accuracy:\", accuracy)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "30f6e656",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# KNN\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53935959",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:31:59.594538Z",
     "iopub.status.busy": "2024-02-28T20:31:59.593874Z",
     "iopub.status.idle": "2024-02-28T20:31:59.644704Z",
     "shell.execute_reply": "2024-02-28T20:31:59.643728Z",
     "shell.execute_reply.started": "2024-02-28T20:31:59.594507Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "knn_model = KNeighborsClassifier(n_neighbors=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db4deede",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:32:05.418662Z",
     "iopub.status.busy": "2024-02-28T20:32:05.417912Z",
     "iopub.status.idle": "2024-02-28T20:32:06.188877Z",
     "shell.execute_reply": "2024-02-28T20:32:06.187632Z",
     "shell.execute_reply.started": "2024-02-28T20:32:05.418629Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "knn_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab01ea0d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:32:08.060681Z",
     "iopub.status.busy": "2024-02-28T20:32:08.059727Z",
     "iopub.status.idle": "2024-02-28T20:32:48.065781Z",
     "shell.execute_reply": "2024-02-28T20:32:48.064651Z",
     "shell.execute_reply.started": "2024-02-28T20:32:08.060638Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "knn_y_pred = knn_model.predict(X_test)\n",
    "knn_accuracy = accuracy_score(y_test, knn_y_pred)\n",
    "print(\"KNN Accuracy:\", knn_accuracy)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fbfb3f58",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# Naive Bayes\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59c6dc70",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:05.648469Z",
     "iopub.status.busy": "2024-02-28T20:33:05.647771Z",
     "iopub.status.idle": "2024-02-28T20:33:05.655089Z",
     "shell.execute_reply": "2024-02-28T20:33:05.653963Z",
     "shell.execute_reply.started": "2024-02-28T20:33:05.648437Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.naive_bayes import GaussianNB\n",
    "nb_model = GaussianNB()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bde5534f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:07.367575Z",
     "iopub.status.busy": "2024-02-28T20:33:07.366646Z",
     "iopub.status.idle": "2024-02-28T20:33:08.279224Z",
     "shell.execute_reply": "2024-02-28T20:33:08.278331Z",
     "shell.execute_reply.started": "2024-02-28T20:33:07.367527Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "nb_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13d88c07",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:11.507456Z",
     "iopub.status.busy": "2024-02-28T20:33:11.506783Z",
     "iopub.status.idle": "2024-02-28T20:33:11.557327Z",
     "shell.execute_reply": "2024-02-28T20:33:11.556531Z",
     "shell.execute_reply.started": "2024-02-28T20:33:11.507420Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "nb_y_pred = nb_model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92e9d434",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:17.627887Z",
     "iopub.status.busy": "2024-02-28T20:33:17.627102Z",
     "iopub.status.idle": "2024-02-28T20:33:17.872462Z",
     "shell.execute_reply": "2024-02-28T20:33:17.871605Z",
     "shell.execute_reply.started": "2024-02-28T20:33:17.627855Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "nb_accuracy = accuracy_score(y_test, nb_y_pred)\n",
    "print(\"Naive Bayes Accuracy:\", nb_accuracy)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32075ad4",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# Decision Tree\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65c78c41",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:20.370792Z",
     "iopub.status.busy": "2024-02-28T20:33:20.370439Z",
     "iopub.status.idle": "2024-02-28T20:33:20.399395Z",
     "shell.execute_reply": "2024-02-28T20:33:20.398573Z",
     "shell.execute_reply.started": "2024-02-28T20:33:20.370766Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "dt_model = DecisionTreeClassifier(random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a818077a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:24.678143Z",
     "iopub.status.busy": "2024-02-28T20:33:24.677822Z",
     "iopub.status.idle": "2024-02-28T20:33:28.015444Z",
     "shell.execute_reply": "2024-02-28T20:33:28.014553Z",
     "shell.execute_reply.started": "2024-02-28T20:33:24.678119Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dt_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8ca2ae9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:30.414733Z",
     "iopub.status.busy": "2024-02-28T20:33:30.413806Z",
     "iopub.status.idle": "2024-02-28T20:33:30.445350Z",
     "shell.execute_reply": "2024-02-28T20:33:30.444502Z",
     "shell.execute_reply.started": "2024-02-28T20:33:30.414688Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dt_y_pred = dt_model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e6dc11c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:32.917637Z",
     "iopub.status.busy": "2024-02-28T20:33:32.916912Z",
     "iopub.status.idle": "2024-02-28T20:33:33.162356Z",
     "shell.execute_reply": "2024-02-28T20:33:33.161428Z",
     "shell.execute_reply.started": "2024-02-28T20:33:32.917605Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dt_accuracy = accuracy_score(y_test, dt_y_pred)\n",
    "print(\"accuracy:\", dt_accuracy)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0dfe26a4",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# Random forests\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "580c6e88",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:37.146957Z",
     "iopub.status.busy": "2024-02-28T20:33:37.145942Z",
     "iopub.status.idle": "2024-02-28T20:33:40.375233Z",
     "shell.execute_reply": "2024-02-28T20:33:40.374273Z",
     "shell.execute_reply.started": "2024-02-28T20:33:37.146922Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "dt_model = DecisionTreeClassifier(random_state=42)\n",
    "dt_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fdc4234d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:33:42.697604Z",
     "iopub.status.busy": "2024-02-28T20:33:42.697221Z",
     "iopub.status.idle": "2024-02-28T20:33:42.965045Z",
     "shell.execute_reply": "2024-02-28T20:33:42.964106Z",
     "shell.execute_reply.started": "2024-02-28T20:33:42.697574Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dt_y_pred = dt_model.predict(X_test)\n",
    "\n",
    "# Evaluate the Decision Tree model\n",
    "dt_accuracy = accuracy_score(y_test, dt_y_pred)\n",
    "print(\"Decision Tree Accuracy:\", dt_accuracy)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1eef14a8",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# LSTM\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d95e691",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:03.811369Z",
     "iopub.status.busy": "2024-02-28T20:34:03.811034Z",
     "iopub.status.idle": "2024-02-28T20:34:16.297599Z",
     "shell.execute_reply": "2024-02-28T20:34:16.296591Z",
     "shell.execute_reply.started": "2024-02-28T20:34:03.811342Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45a3ea7a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:21.694938Z",
     "iopub.status.busy": "2024-02-28T20:34:21.693749Z",
     "iopub.status.idle": "2024-02-28T20:34:23.351375Z",
     "shell.execute_reply": "2024-02-28T20:34:23.350292Z",
     "shell.execute_reply.started": "2024-02-28T20:34:21.694901Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "X_train_array = X_train.values.astype(np.float32)\n",
    "X_test_array = X_test.values.astype(np.float32)\n",
    "label_encoder = LabelEncoder()\n",
    "y_train_encoded = label_encoder.fit_transform(y_train)\n",
    "y_test_encoded = label_encoder.transform(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b2b4168",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:29.567410Z",
     "iopub.status.busy": "2024-02-28T20:34:29.567010Z",
     "iopub.status.idle": "2024-02-28T20:34:29.573526Z",
     "shell.execute_reply": "2024-02-28T20:34:29.572343Z",
     "shell.execute_reply.started": "2024-02-28T20:34:29.567374Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "X_train_reshaped = np.reshape(\n",
    "    X_train_array, (X_train_array.shape[0], 1, X_train_array.shape[1]))\n",
    "X_test_reshaped = np.reshape(\n",
    "    X_test_array, (X_test_array.shape[0], 1, X_test_array.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ba22307",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:32.593258Z",
     "iopub.status.busy": "2024-02-28T20:34:32.592631Z",
     "iopub.status.idle": "2024-02-28T20:34:32.597849Z",
     "shell.execute_reply": "2024-02-28T20:34:32.596788Z",
     "shell.execute_reply.started": "2024-02-28T20:34:32.593225Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
    "from tensorflow.keras.models import Sequential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05d6c2a2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:35.967986Z",
     "iopub.status.busy": "2024-02-28T20:34:35.967129Z",
     "iopub.status.idle": "2024-02-28T20:34:37.983732Z",
     "shell.execute_reply": "2024-02-28T20:34:37.982934Z",
     "shell.execute_reply.started": "2024-02-28T20:34:35.967950Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(LSTM(units=128, input_shape=(\n",
    "    1, X_train_array.shape[1]), return_sequences=True))\n",
    "model.add(Dropout(0.2))\n",
    "model.add(LSTM(units=64, return_sequences=True))\n",
    "model.add(Dropout(0.2))\n",
    "model.add(LSTM(units=32, return_sequences=False))\n",
    "model.add(Dropout(0.2))\n",
    "model.add(Dense(units=64, activation='relu'))\n",
    "model.add(Dropout(0.2))\n",
    "model.add(Dense(units=32, activation='relu'))\n",
    "model.add(Dense(units=1, activation='sigmoid'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70506f9c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:47.317746Z",
     "iopub.status.busy": "2024-02-28T20:34:47.317029Z",
     "iopub.status.idle": "2024-02-28T20:34:47.340010Z",
     "shell.execute_reply": "2024-02-28T20:34:47.338881Z",
     "shell.execute_reply.started": "2024-02-28T20:34:47.317713Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model.compile(optimizer='adam', loss='binary_crossentropy',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6cafe58",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:50.132581Z",
     "iopub.status.busy": "2024-02-28T20:34:50.131713Z",
     "iopub.status.idle": "2024-02-28T20:34:50.168897Z",
     "shell.execute_reply": "2024-02-28T20:34:50.167980Z",
     "shell.execute_reply.started": "2024-02-28T20:34:50.132534Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb4e2eb7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T20:34:57.849042Z",
     "iopub.status.busy": "2024-02-28T20:34:57.848387Z",
     "iopub.status.idle": "2024-02-28T20:58:09.859733Z",
     "shell.execute_reply": "2024-02-28T20:58:09.858723Z",
     "shell.execute_reply.started": "2024-02-28T20:34:57.849008Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model.fit(X_train_reshaped, y_train_encoded, epochs=30,\n",
    "          batch_size=32, validation_split=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8df9b4ba",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:02:40.398517Z",
     "iopub.status.busy": "2024-02-28T21:02:40.397537Z",
     "iopub.status.idle": "2024-02-28T21:02:48.047278Z",
     "shell.execute_reply": "2024-02-28T21:02:48.046275Z",
     "shell.execute_reply.started": "2024-02-28T21:02:40.398480Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "y_pred_proba = model.predict(X_test_reshaped)\n",
    "y_pred = (y_pred_proba > 0.5).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c9c1b0ae",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:51:31.677208Z",
     "iopub.status.busy": "2024-02-27T02:51:31.676880Z",
     "iopub.status.idle": "2024-02-27T02:51:31.686765Z",
     "shell.execute_reply": "2024-02-27T02:51:31.685738Z",
     "shell.execute_reply.started": "2024-02-27T02:51:31.677180Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "accuracy = accuracy_score(y_test_encoded, y_pred)\n",
    "print(\"Accuracy:\", accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "963f04ba",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:51:35.877861Z",
     "iopub.status.busy": "2024-02-27T02:51:35.877122Z",
     "iopub.status.idle": "2024-02-27T02:51:35.881902Z",
     "shell.execute_reply": "2024-02-27T02:51:35.880765Z",
     "shell.execute_reply.started": "2024-02-27T02:51:35.877829Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb765c7d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:14:46.444310Z",
     "iopub.status.busy": "2024-02-28T21:14:46.443465Z",
     "iopub.status.idle": "2024-02-28T21:14:46.448272Z",
     "shell.execute_reply": "2024-02-28T21:14:46.447257Z",
     "shell.execute_reply.started": "2024-02-28T21:14:46.444277Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd47a5eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-28T21:14:49.219822Z",
     "iopub.status.busy": "2024-02-28T21:14:49.219019Z",
     "iopub.status.idle": "2024-02-28T21:14:49.316868Z",
     "shell.execute_reply": "2024-02-28T21:14:49.315889Z",
     "shell.execute_reply.started": "2024-02-28T21:14:49.219786Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "with open('model.pkl', 'wb') as f:\n",
    "    pickle.dump(model, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "93655540",
   "metadata": {
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "source": [
    "# CNN\n",
    "\n",
    "#### `probleme somewhere idk `\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8144cd90",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:25:33.293261Z",
     "iopub.status.busy": "2024-02-27T02:25:33.292866Z",
     "iopub.status.idle": "2024-02-27T02:25:33.298337Z",
     "shell.execute_reply": "2024-02-27T02:25:33.297216Z",
     "shell.execute_reply.started": "2024-02-27T02:25:33.293228Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense\n",
    "from tensorflow.keras.optimizers import Adam"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1507a936",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:25:41.828280Z",
     "iopub.status.busy": "2024-02-27T02:25:41.827891Z",
     "iopub.status.idle": "2024-02-27T02:25:41.914213Z",
     "shell.execute_reply": "2024-02-27T02:25:41.913392Z",
     "shell.execute_reply.started": "2024-02-27T02:25:41.828241Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "X_train, X_temp, y_train, y_temp = train_test_split(\n",
    "    X, y, test_size=0.2, random_state=42)\n",
    "X_val, X_test, y_val, y_test = train_test_split(\n",
    "    X_temp, y_temp, test_size=0.5, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "febb5829",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:25:45.647678Z",
     "iopub.status.busy": "2024-02-27T02:25:45.646969Z",
     "iopub.status.idle": "2024-02-27T02:25:45.651746Z",
     "shell.execute_reply": "2024-02-27T02:25:45.650798Z",
     "shell.execute_reply.started": "2024-02-27T02:25:45.647647Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "num_features = X_train.shape[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "131be457",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:25:55.207330Z",
     "iopub.status.busy": "2024-02-27T02:25:55.206931Z",
     "iopub.status.idle": "2024-02-27T02:25:55.281487Z",
     "shell.execute_reply": "2024-02-27T02:25:55.280767Z",
     "shell.execute_reply.started": "2024-02-27T02:25:55.207300Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model = Sequential([\n",
    "    Conv1D(filters=32, kernel_size=3, activation='relu',\n",
    "           input_shape=(num_features, 1)),\n",
    "    MaxPooling1D(pool_size=2),\n",
    "    Conv1D(filters=64, kernel_size=3, activation='relu'),\n",
    "    MaxPooling1D(pool_size=2),\n",
    "    Flatten(),\n",
    "    Dense(64, activation='relu'),\n",
    "    Dense(1, activation='sigmoid')\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1228e48c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:26:02.388725Z",
     "iopub.status.busy": "2024-02-27T02:26:02.388349Z",
     "iopub.status.idle": "2024-02-27T02:26:02.402654Z",
     "shell.execute_reply": "2024-02-27T02:26:02.401613Z",
     "shell.execute_reply.started": "2024-02-27T02:26:02.388685Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model.compile(optimizer=Adam(learning_rate=0.001),\n",
    "              loss='binary_crossentropy', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61cf26b0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:26:49.628751Z",
     "iopub.status.busy": "2024-02-27T02:26:49.628140Z",
     "iopub.status.idle": "2024-02-27T02:26:49.633203Z",
     "shell.execute_reply": "2024-02-27T02:26:49.632167Z",
     "shell.execute_reply.started": "2024-02-27T02:26:49.628710Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6bf5881a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:26:56.957652Z",
     "iopub.status.busy": "2024-02-27T02:26:56.957291Z",
     "iopub.status.idle": "2024-02-27T02:26:58.040698Z",
     "shell.execute_reply": "2024-02-27T02:26:58.039438Z",
     "shell.execute_reply.started": "2024-02-27T02:26:56.957622Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "scaler = StandardScaler()\n",
    "X_scaled = scaler.fit_transform(X)\n",
    "y_encoded = tf.keras.utils.to_categorical(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46ecaaa1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-27T02:26:08.518224Z",
     "iopub.status.busy": "2024-02-27T02:26:08.517758Z",
     "iopub.status.idle": "2024-02-27T02:26:09.398867Z",
     "shell.execute_reply": "2024-02-27T02:26:09.397682Z",
     "shell.execute_reply.started": "2024-02-27T02:26:08.518190Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "history = model.fit(X_train, y_train, epochs=10,\n",
    "                    batch_size=32, validation_data=(X_val, y_val))"
   ]
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "nvidiaTeslaT4",
   "dataSources": [
    {
     "datasetId": 1936563,
     "sourceId": 6674905,
     "sourceType": "datasetVersion"
    }
   ],
   "dockerImageVersionId": 30648,
   "isGpuEnabled": true,
   "isInternetEnabled": true,
   "language": "python",
   "sourceType": "notebook"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 8.029196,
   "end_time": "2024-02-28T21:15:45.609983",
   "environment_variables": {},
   "exception": true,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2024-02-28T21:15:37.580787",
   "version": "2.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}