{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "ec70045d", "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "execution": { "iopub.execute_input": "2024-02-28T21:15:40.650918Z", "iopub.status.busy": "2024-02-28T21:15:40.650589Z", "iopub.status.idle": "2024-02-28T21:15:41.502437Z", "shell.execute_reply": "2024-02-28T21:15:41.501426Z" }, "papermill": { "duration": 0.87031, "end_time": "2024-02-28T21:15:41.504554", "exception": false, "start_time": "2024-02-28T21:15:40.634244", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# This Python 3 environment comes with many helpful analytics libraries installed\n", "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n", "# For example, here's several helpful packages to load\n", "\n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "\n", "# Input data files are available in the read-only \"../input/\" directory\n", "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", "\n", "import os\n", "for dirname, _, filenames in os.walk('/kaggle/input'):\n", " for filename in filenames:\n", " print(os.path.join(dirname, filename))\n", "\n", "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\"\n", "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session" ] }, { "cell_type": "code", "execution_count": null, "id": "31b2bdbf", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:15:41.536451Z", "iopub.status.busy": "2024-02-28T21:15:41.536047Z", "iopub.status.idle": "2024-02-28T21:15:42.592902Z", "shell.execute_reply": "2024-02-28T21:15:42.592121Z" }, "papermill": { "duration": 1.07523, "end_time": "2024-02-28T21:15:42.595268", "exception": false, "start_time": "2024-02-28T21:15:41.520038", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df = pd.read_csv(\n", " \"/kaggle/input/personal-key-indicators-of-heart-disease/2020/heart_2020_cleaned.csv\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a12bd286", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:15:42.629222Z", "iopub.status.busy": "2024-02-28T21:15:42.628916Z", "iopub.status.idle": "2024-02-28T21:15:43.061965Z", "shell.execute_reply": "2024-02-28T21:15:43.061012Z" }, "papermill": { "duration": 0.453304, "end_time": "2024-02-28T21:15:43.064364", "exception": false, "start_time": "2024-02-28T21:15:42.611060", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": null, "id": "98b4a85f", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:15:43.098384Z", "iopub.status.busy": "2024-02-28T21:15:43.098077Z", "iopub.status.idle": "2024-02-28T21:15:43.549973Z", "shell.execute_reply": "2024-02-28T21:15:43.548934Z" }, "papermill": { "duration": 0.470772, "end_time": "2024-02-28T21:15:43.552722", "exception": false, "start_time": "2024-02-28T21:15:43.081950", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df = pd.get_dummies(df, columns=['Smoking', 'AlcoholDrinking', 'Sex', 'AgeCategory', 'Race',\n", " 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'])" ] }, { "cell_type": "code", "execution_count": null, "id": "4a49bbd7", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:15:43.590333Z", "iopub.status.busy": "2024-02-28T21:15:43.589967Z", "iopub.status.idle": "2024-02-28T21:15:43.596311Z", "shell.execute_reply": "2024-02-28T21:15:43.595602Z" }, "papermill": { "duration": 0.026491, "end_time": "2024-02-28T21:15:43.598298", "exception": false, "start_time": "2024-02-28T21:15:43.571807", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df['BMI'] = df['BMI'] / (df['BMI'] ** 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "8a1121c3", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:15:43.629024Z", "iopub.status.busy": "2024-02-28T21:15:43.628731Z", "iopub.status.idle": "2024-02-28T21:15:45.174952Z", "shell.execute_reply": "2024-02-28T21:15:45.173525Z" }, "papermill": { "duration": 1.563564, "end_time": "2024-02-28T21:15:45.176760", "exception": true, "start_time": "2024-02-28T21:15:43.613196", "status": "failed" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "numerical_columns = ['BMI', 'Stroke', 'PhysicalHealth',\n", " 'MentalHealth', 'DiffWalking', 'SleepTime']\n", "scaler = MinMaxScaler()\n", "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])" ] }, { "cell_type": "code", "execution_count": null, "id": "c34257e5", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:24.347783Z", "iopub.status.busy": "2024-02-28T20:31:24.347070Z", "iopub.status.idle": "2024-02-28T20:31:24.504857Z", "shell.execute_reply": "2024-02-28T20:31:24.503875Z", "shell.execute_reply.started": "2024-02-28T20:31:24.347750Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "for column in df.columns:\n", " print(column, df[column].unique())" ] }, { "cell_type": "code", "execution_count": null, "id": "305666d0", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:26.899524Z", "iopub.status.busy": "2024-02-28T20:31:26.899205Z", "iopub.status.idle": "2024-02-28T20:31:26.961477Z", "shell.execute_reply": "2024-02-28T20:31:26.960639Z", "shell.execute_reply.started": "2024-02-28T20:31:26.899502Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "df['Stroke'] = df['Stroke'].map({'No': 0, 'Yes': 1})\n", "df['DiffWalking'] = df['DiffWalking'].map({'No': 0, 'Yes': 1})" ] }, { "cell_type": "code", "execution_count": null, "id": "b2cc4716", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:28.914217Z", "iopub.status.busy": "2024-02-28T20:31:28.913857Z", "iopub.status.idle": "2024-02-28T20:31:28.945954Z", "shell.execute_reply": "2024-02-28T20:31:28.944829Z", "shell.execute_reply.started": "2024-02-28T20:31:28.914181Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "scaler = MinMaxScaler()\n", "numerical_columns = ['BMI', 'PhysicalHealth',\n", " 'MentalHealth', 'DiffWalking', 'SleepTime']\n", "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])" ] }, { "cell_type": "code", "execution_count": null, "id": "15944d03", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:30.518053Z", "iopub.status.busy": "2024-02-28T20:31:30.517356Z", "iopub.status.idle": "2024-02-28T20:31:30.592331Z", "shell.execute_reply": "2024-02-28T20:31:30.591365Z", "shell.execute_reply.started": "2024-02-28T20:31:30.518018Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "z_scores = df[numerical_columns].apply(lambda x: (x - x.mean()) / x.std())\n", "outliers = (z_scores > 3) | (z_scores < -3)\n", "df = df[~outliers.any(axis=1)]" ] }, { "cell_type": "code", "execution_count": null, "id": "b3c04332", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:32.877312Z", "iopub.status.busy": "2024-02-28T20:31:32.876991Z", "iopub.status.idle": "2024-02-28T20:31:32.923278Z", "shell.execute_reply": "2024-02-28T20:31:32.922285Z", "shell.execute_reply.started": "2024-02-28T20:31:32.877287Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "print(df.isnull().sum())" ] }, { "cell_type": "code", "execution_count": null, "id": "f883f424", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:35.118412Z", "iopub.status.busy": "2024-02-28T20:31:35.118046Z", "iopub.status.idle": "2024-02-28T20:31:35.138194Z", "shell.execute_reply": "2024-02-28T20:31:35.137356Z", "shell.execute_reply.started": "2024-02-28T20:31:35.118385Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "X = df.drop(columns=['HeartDisease'])\n", "y = df['HeartDisease']" ] }, { "cell_type": "code", "execution_count": null, "id": "937f456d", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:36.921083Z", "iopub.status.busy": "2024-02-28T20:31:36.920460Z", "iopub.status.idle": "2024-02-28T20:31:37.092675Z", "shell.execute_reply": "2024-02-28T20:31:37.091807Z", "shell.execute_reply.started": "2024-02-28T20:31:36.921053Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split # Add this import statement\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.2, random_state=42)" ] }, { "cell_type": "markdown", "id": "d908667b", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# Logistic regression\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d2c46021", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:40.429477Z", "iopub.status.busy": "2024-02-28T20:31:40.428714Z", "iopub.status.idle": "2024-02-28T20:31:40.563938Z", "shell.execute_reply": "2024-02-28T20:31:40.563215Z", "shell.execute_reply.started": "2024-02-28T20:31:40.429444Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n", "model = LogisticRegression()" ] }, { "cell_type": "code", "execution_count": null, "id": "57788a5b", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:43.108928Z", "iopub.status.busy": "2024-02-28T20:31:43.108194Z", "iopub.status.idle": "2024-02-28T20:31:46.611293Z", "shell.execute_reply": "2024-02-28T20:31:46.609836Z", "shell.execute_reply.started": "2024-02-28T20:31:43.108893Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "5a09075d", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:53.227765Z", "iopub.status.busy": "2024-02-28T20:31:53.227012Z", "iopub.status.idle": "2024-02-28T20:31:53.251316Z", "shell.execute_reply": "2024-02-28T20:31:53.250025Z", "shell.execute_reply.started": "2024-02-28T20:31:53.227730Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "y_pred = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "025c02d6", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:55.958835Z", "iopub.status.busy": "2024-02-28T20:31:55.957996Z", "iopub.status.idle": "2024-02-28T20:31:56.206159Z", "shell.execute_reply": "2024-02-28T20:31:56.205249Z", "shell.execute_reply.started": "2024-02-28T20:31:55.958798Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "accuracy = accuracy_score(y_test, y_pred)\n", "print(\"Accuracy:\", accuracy)" ] }, { "cell_type": "markdown", "id": "30f6e656", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# KNN\n" ] }, { "cell_type": "code", "execution_count": null, "id": "53935959", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:31:59.594538Z", "iopub.status.busy": "2024-02-28T20:31:59.593874Z", "iopub.status.idle": "2024-02-28T20:31:59.644704Z", "shell.execute_reply": "2024-02-28T20:31:59.643728Z", "shell.execute_reply.started": "2024-02-28T20:31:59.594507Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "knn_model = KNeighborsClassifier(n_neighbors=5)" ] }, { "cell_type": "code", "execution_count": null, "id": "db4deede", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:32:05.418662Z", "iopub.status.busy": "2024-02-28T20:32:05.417912Z", "iopub.status.idle": "2024-02-28T20:32:06.188877Z", "shell.execute_reply": "2024-02-28T20:32:06.187632Z", "shell.execute_reply.started": "2024-02-28T20:32:05.418629Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "knn_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "ab01ea0d", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:32:08.060681Z", "iopub.status.busy": "2024-02-28T20:32:08.059727Z", "iopub.status.idle": "2024-02-28T20:32:48.065781Z", "shell.execute_reply": "2024-02-28T20:32:48.064651Z", "shell.execute_reply.started": "2024-02-28T20:32:08.060638Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "knn_y_pred = knn_model.predict(X_test)\n", "knn_accuracy = accuracy_score(y_test, knn_y_pred)\n", "print(\"KNN Accuracy:\", knn_accuracy)" ] }, { "cell_type": "markdown", "id": "fbfb3f58", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# Naive Bayes\n" ] }, { "cell_type": "code", "execution_count": null, "id": "59c6dc70", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:05.648469Z", "iopub.status.busy": "2024-02-28T20:33:05.647771Z", "iopub.status.idle": "2024-02-28T20:33:05.655089Z", "shell.execute_reply": "2024-02-28T20:33:05.653963Z", "shell.execute_reply.started": "2024-02-28T20:33:05.648437Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.naive_bayes import GaussianNB\n", "nb_model = GaussianNB()" ] }, { "cell_type": "code", "execution_count": null, "id": "bde5534f", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:07.367575Z", "iopub.status.busy": "2024-02-28T20:33:07.366646Z", "iopub.status.idle": "2024-02-28T20:33:08.279224Z", "shell.execute_reply": "2024-02-28T20:33:08.278331Z", "shell.execute_reply.started": "2024-02-28T20:33:07.367527Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "nb_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "13d88c07", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:11.507456Z", "iopub.status.busy": "2024-02-28T20:33:11.506783Z", "iopub.status.idle": "2024-02-28T20:33:11.557327Z", "shell.execute_reply": "2024-02-28T20:33:11.556531Z", "shell.execute_reply.started": "2024-02-28T20:33:11.507420Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "nb_y_pred = nb_model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "92e9d434", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:17.627887Z", "iopub.status.busy": "2024-02-28T20:33:17.627102Z", "iopub.status.idle": "2024-02-28T20:33:17.872462Z", "shell.execute_reply": "2024-02-28T20:33:17.871605Z", "shell.execute_reply.started": "2024-02-28T20:33:17.627855Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "nb_accuracy = accuracy_score(y_test, nb_y_pred)\n", "print(\"Naive Bayes Accuracy:\", nb_accuracy)" ] }, { "cell_type": "markdown", "id": "32075ad4", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# Decision Tree\n" ] }, { "cell_type": "code", "execution_count": null, "id": "65c78c41", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:20.370792Z", "iopub.status.busy": "2024-02-28T20:33:20.370439Z", "iopub.status.idle": "2024-02-28T20:33:20.399395Z", "shell.execute_reply": "2024-02-28T20:33:20.398573Z", "shell.execute_reply.started": "2024-02-28T20:33:20.370766Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "dt_model = DecisionTreeClassifier(random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "id": "a818077a", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:24.678143Z", "iopub.status.busy": "2024-02-28T20:33:24.677822Z", "iopub.status.idle": "2024-02-28T20:33:28.015444Z", "shell.execute_reply": "2024-02-28T20:33:28.014553Z", "shell.execute_reply.started": "2024-02-28T20:33:24.678119Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "dt_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "c8ca2ae9", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:30.414733Z", "iopub.status.busy": "2024-02-28T20:33:30.413806Z", "iopub.status.idle": "2024-02-28T20:33:30.445350Z", "shell.execute_reply": "2024-02-28T20:33:30.444502Z", "shell.execute_reply.started": "2024-02-28T20:33:30.414688Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "dt_y_pred = dt_model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "8e6dc11c", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:32.917637Z", "iopub.status.busy": "2024-02-28T20:33:32.916912Z", "iopub.status.idle": "2024-02-28T20:33:33.162356Z", "shell.execute_reply": "2024-02-28T20:33:33.161428Z", "shell.execute_reply.started": "2024-02-28T20:33:32.917605Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "dt_accuracy = accuracy_score(y_test, dt_y_pred)\n", "print(\"accuracy:\", dt_accuracy)" ] }, { "cell_type": "markdown", "id": "0dfe26a4", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# Random forests\n" ] }, { "cell_type": "code", "execution_count": null, "id": "580c6e88", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:37.146957Z", "iopub.status.busy": "2024-02-28T20:33:37.145942Z", "iopub.status.idle": "2024-02-28T20:33:40.375233Z", "shell.execute_reply": "2024-02-28T20:33:40.374273Z", "shell.execute_reply.started": "2024-02-28T20:33:37.146922Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "fdc4234d", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:33:42.697604Z", "iopub.status.busy": "2024-02-28T20:33:42.697221Z", "iopub.status.idle": "2024-02-28T20:33:42.965045Z", "shell.execute_reply": "2024-02-28T20:33:42.964106Z", "shell.execute_reply.started": "2024-02-28T20:33:42.697574Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "dt_y_pred = dt_model.predict(X_test)\n", "\n", "# Evaluate the Decision Tree model\n", "dt_accuracy = accuracy_score(y_test, dt_y_pred)\n", "print(\"Decision Tree Accuracy:\", dt_accuracy)" ] }, { "cell_type": "markdown", "id": "1eef14a8", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# LSTM\n" ] }, { "cell_type": "code", "execution_count": null, "id": "3d95e691", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:03.811369Z", "iopub.status.busy": "2024-02-28T20:34:03.811034Z", "iopub.status.idle": "2024-02-28T20:34:16.297599Z", "shell.execute_reply": "2024-02-28T20:34:16.296591Z", "shell.execute_reply.started": "2024-02-28T20:34:03.811342Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import LSTM, Dense, Dropout\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": null, "id": "45a3ea7a", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:21.694938Z", "iopub.status.busy": "2024-02-28T20:34:21.693749Z", "iopub.status.idle": "2024-02-28T20:34:23.351375Z", "shell.execute_reply": "2024-02-28T20:34:23.350292Z", "shell.execute_reply.started": "2024-02-28T20:34:21.694901Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "X_train_array = X_train.values.astype(np.float32)\n", "X_test_array = X_test.values.astype(np.float32)\n", "label_encoder = LabelEncoder()\n", "y_train_encoded = label_encoder.fit_transform(y_train)\n", "y_test_encoded = label_encoder.transform(y_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "3b2b4168", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:29.567410Z", "iopub.status.busy": "2024-02-28T20:34:29.567010Z", "iopub.status.idle": "2024-02-28T20:34:29.573526Z", "shell.execute_reply": "2024-02-28T20:34:29.572343Z", "shell.execute_reply.started": "2024-02-28T20:34:29.567374Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "X_train_reshaped = np.reshape(\n", " X_train_array, (X_train_array.shape[0], 1, X_train_array.shape[1]))\n", "X_test_reshaped = np.reshape(\n", " X_test_array, (X_test_array.shape[0], 1, X_test_array.shape[1]))" ] }, { "cell_type": "code", "execution_count": null, "id": "5ba22307", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:32.593258Z", "iopub.status.busy": "2024-02-28T20:34:32.592631Z", "iopub.status.idle": "2024-02-28T20:34:32.597849Z", "shell.execute_reply": "2024-02-28T20:34:32.596788Z", "shell.execute_reply.started": "2024-02-28T20:34:32.593225Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from tensorflow.keras.layers import LSTM, Dense, Dropout\n", "from tensorflow.keras.models import Sequential" ] }, { "cell_type": "code", "execution_count": null, "id": "05d6c2a2", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:35.967986Z", "iopub.status.busy": "2024-02-28T20:34:35.967129Z", "iopub.status.idle": "2024-02-28T20:34:37.983732Z", "shell.execute_reply": "2024-02-28T20:34:37.982934Z", "shell.execute_reply.started": "2024-02-28T20:34:35.967950Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model = Sequential()\n", "model.add(LSTM(units=128, input_shape=(\n", " 1, X_train_array.shape[1]), return_sequences=True))\n", "model.add(Dropout(0.2))\n", "model.add(LSTM(units=64, return_sequences=True))\n", "model.add(Dropout(0.2))\n", "model.add(LSTM(units=32, return_sequences=False))\n", "model.add(Dropout(0.2))\n", "model.add(Dense(units=64, activation='relu'))\n", "model.add(Dropout(0.2))\n", "model.add(Dense(units=32, activation='relu'))\n", "model.add(Dense(units=1, activation='sigmoid'))" ] }, { "cell_type": "code", "execution_count": null, "id": "70506f9c", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:47.317746Z", "iopub.status.busy": "2024-02-28T20:34:47.317029Z", "iopub.status.idle": "2024-02-28T20:34:47.340010Z", "shell.execute_reply": "2024-02-28T20:34:47.338881Z", "shell.execute_reply.started": "2024-02-28T20:34:47.317713Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model.compile(optimizer='adam', loss='binary_crossentropy',\n", " metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "id": "a6cafe58", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:50.132581Z", "iopub.status.busy": "2024-02-28T20:34:50.131713Z", "iopub.status.idle": "2024-02-28T20:34:50.168897Z", "shell.execute_reply": "2024-02-28T20:34:50.167980Z", "shell.execute_reply.started": "2024-02-28T20:34:50.132534Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "fb4e2eb7", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T20:34:57.849042Z", "iopub.status.busy": "2024-02-28T20:34:57.848387Z", "iopub.status.idle": "2024-02-28T20:58:09.859733Z", "shell.execute_reply": "2024-02-28T20:58:09.858723Z", "shell.execute_reply.started": "2024-02-28T20:34:57.849008Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model.fit(X_train_reshaped, y_train_encoded, epochs=30,\n", " batch_size=32, validation_split=0.1)" ] }, { "cell_type": "code", "execution_count": null, "id": "8df9b4ba", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:02:40.398517Z", "iopub.status.busy": "2024-02-28T21:02:40.397537Z", "iopub.status.idle": "2024-02-28T21:02:48.047278Z", "shell.execute_reply": "2024-02-28T21:02:48.046275Z", "shell.execute_reply.started": "2024-02-28T21:02:40.398480Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "y_pred_proba = model.predict(X_test_reshaped)\n", "y_pred = (y_pred_proba > 0.5).astype(int)" ] }, { "cell_type": "code", "execution_count": null, "id": "c9c1b0ae", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:51:31.677208Z", "iopub.status.busy": "2024-02-27T02:51:31.676880Z", "iopub.status.idle": "2024-02-27T02:51:31.686765Z", "shell.execute_reply": "2024-02-27T02:51:31.685738Z", "shell.execute_reply.started": "2024-02-27T02:51:31.677180Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "accuracy = accuracy_score(y_test_encoded, y_pred)\n", "print(\"Accuracy:\", accuracy)" ] }, { "cell_type": "code", "execution_count": null, "id": "963f04ba", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:51:35.877861Z", "iopub.status.busy": "2024-02-27T02:51:35.877122Z", "iopub.status.idle": "2024-02-27T02:51:35.881902Z", "shell.execute_reply": "2024-02-27T02:51:35.880765Z", "shell.execute_reply.started": "2024-02-27T02:51:35.877829Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "id": "cb765c7d", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:14:46.444310Z", "iopub.status.busy": "2024-02-28T21:14:46.443465Z", "iopub.status.idle": "2024-02-28T21:14:46.448272Z", "shell.execute_reply": "2024-02-28T21:14:46.447257Z", "shell.execute_reply.started": "2024-02-28T21:14:46.444277Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "import pickle" ] }, { "cell_type": "code", "execution_count": null, "id": "dd47a5eb", "metadata": { "execution": { "iopub.execute_input": "2024-02-28T21:14:49.219822Z", "iopub.status.busy": "2024-02-28T21:14:49.219019Z", "iopub.status.idle": "2024-02-28T21:14:49.316868Z", "shell.execute_reply": "2024-02-28T21:14:49.315889Z", "shell.execute_reply.started": "2024-02-28T21:14:49.219786Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "with open('model.pkl', 'wb') as f:\n", " pickle.dump(model, f)" ] }, { "cell_type": "markdown", "id": "93655540", "metadata": { "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "source": [ "# CNN\n", "\n", "#### `probleme somewhere idk `\n" ] }, { "cell_type": "code", "execution_count": null, "id": "8144cd90", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:25:33.293261Z", "iopub.status.busy": "2024-02-27T02:25:33.292866Z", "iopub.status.idle": "2024-02-27T02:25:33.298337Z", "shell.execute_reply": "2024-02-27T02:25:33.297216Z", "shell.execute_reply.started": "2024-02-27T02:25:33.293228Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense\n", "from tensorflow.keras.optimizers import Adam" ] }, { "cell_type": "code", "execution_count": null, "id": "1507a936", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:25:41.828280Z", "iopub.status.busy": "2024-02-27T02:25:41.827891Z", "iopub.status.idle": "2024-02-27T02:25:41.914213Z", "shell.execute_reply": "2024-02-27T02:25:41.913392Z", "shell.execute_reply.started": "2024-02-27T02:25:41.828241Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "X_train, X_temp, y_train, y_temp = train_test_split(\n", " X, y, test_size=0.2, random_state=42)\n", "X_val, X_test, y_val, y_test = train_test_split(\n", " X_temp, y_temp, test_size=0.5, random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "id": "febb5829", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:25:45.647678Z", "iopub.status.busy": "2024-02-27T02:25:45.646969Z", "iopub.status.idle": "2024-02-27T02:25:45.651746Z", "shell.execute_reply": "2024-02-27T02:25:45.650798Z", "shell.execute_reply.started": "2024-02-27T02:25:45.647647Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "num_features = X_train.shape[1]" ] }, { "cell_type": "code", "execution_count": null, "id": "131be457", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:25:55.207330Z", "iopub.status.busy": "2024-02-27T02:25:55.206931Z", "iopub.status.idle": "2024-02-27T02:25:55.281487Z", "shell.execute_reply": "2024-02-27T02:25:55.280767Z", "shell.execute_reply.started": "2024-02-27T02:25:55.207300Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model = Sequential([\n", " Conv1D(filters=32, kernel_size=3, activation='relu',\n", " input_shape=(num_features, 1)),\n", " MaxPooling1D(pool_size=2),\n", " Conv1D(filters=64, kernel_size=3, activation='relu'),\n", " MaxPooling1D(pool_size=2),\n", " Flatten(),\n", " Dense(64, activation='relu'),\n", " Dense(1, activation='sigmoid')\n", "])" ] }, { "cell_type": "code", "execution_count": null, "id": "1228e48c", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:26:02.388725Z", "iopub.status.busy": "2024-02-27T02:26:02.388349Z", "iopub.status.idle": "2024-02-27T02:26:02.402654Z", "shell.execute_reply": "2024-02-27T02:26:02.401613Z", "shell.execute_reply.started": "2024-02-27T02:26:02.388685Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "model.compile(optimizer=Adam(learning_rate=0.001),\n", " loss='binary_crossentropy', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "id": "61cf26b0", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:26:49.628751Z", "iopub.status.busy": "2024-02-27T02:26:49.628140Z", "iopub.status.idle": "2024-02-27T02:26:49.633203Z", "shell.execute_reply": "2024-02-27T02:26:49.632167Z", "shell.execute_reply.started": "2024-02-27T02:26:49.628710Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "import tensorflow as tf\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": null, "id": "6bf5881a", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:26:56.957652Z", "iopub.status.busy": "2024-02-27T02:26:56.957291Z", "iopub.status.idle": "2024-02-27T02:26:58.040698Z", "shell.execute_reply": "2024-02-27T02:26:58.039438Z", "shell.execute_reply.started": "2024-02-27T02:26:56.957622Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "scaler = StandardScaler()\n", "X_scaled = scaler.fit_transform(X)\n", "y_encoded = tf.keras.utils.to_categorical(y)" ] }, { "cell_type": "code", "execution_count": null, "id": "46ecaaa1", "metadata": { "execution": { "iopub.execute_input": "2024-02-27T02:26:08.518224Z", "iopub.status.busy": "2024-02-27T02:26:08.517758Z", "iopub.status.idle": "2024-02-27T02:26:09.398867Z", "shell.execute_reply": "2024-02-27T02:26:09.397682Z", "shell.execute_reply.started": "2024-02-27T02:26:08.518190Z" }, "papermill": { "duration": null, "end_time": null, "exception": null, "start_time": null, "status": "pending" }, "tags": [] }, "outputs": [], "source": [ "history = model.fit(X_train, y_train, epochs=10,\n", " batch_size=32, validation_data=(X_val, y_val))" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "datasetId": 1936563, "sourceId": 6674905, "sourceType": "datasetVersion" } ], "dockerImageVersionId": 30648, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 8.029196, "end_time": "2024-02-28T21:15:45.609983", "environment_variables": {}, "exception": true, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-02-28T21:15:37.580787", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }