TarekBouras commited on
Commit
3206628
1 Parent(s): cffc818

Upload model.ipynb

Browse files
Files changed (1) hide show
  1. model.ipynb +1593 -0
model.ipynb ADDED
@@ -0,0 +1,1593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "ec70045d",
7
+ "metadata": {
8
+ "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
9
+ "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
10
+ "execution": {
11
+ "iopub.execute_input": "2024-02-28T21:15:40.650918Z",
12
+ "iopub.status.busy": "2024-02-28T21:15:40.650589Z",
13
+ "iopub.status.idle": "2024-02-28T21:15:41.502437Z",
14
+ "shell.execute_reply": "2024-02-28T21:15:41.501426Z"
15
+ },
16
+ "papermill": {
17
+ "duration": 0.87031,
18
+ "end_time": "2024-02-28T21:15:41.504554",
19
+ "exception": false,
20
+ "start_time": "2024-02-28T21:15:40.634244",
21
+ "status": "completed"
22
+ },
23
+ "tags": []
24
+ },
25
+ "outputs": [],
26
+ "source": [
27
+ "# This Python 3 environment comes with many helpful analytics libraries installed\n",
28
+ "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
29
+ "# For example, here's several helpful packages to load\n",
30
+ "\n",
31
+ "import numpy as np # linear algebra\n",
32
+ "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
33
+ "\n",
34
+ "# Input data files are available in the read-only \"../input/\" directory\n",
35
+ "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
36
+ "\n",
37
+ "import os\n",
38
+ "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
39
+ " for filename in filenames:\n",
40
+ " print(os.path.join(dirname, filename))\n",
41
+ "\n",
42
+ "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\"\n",
43
+ "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": null,
49
+ "id": "31b2bdbf",
50
+ "metadata": {
51
+ "execution": {
52
+ "iopub.execute_input": "2024-02-28T21:15:41.536451Z",
53
+ "iopub.status.busy": "2024-02-28T21:15:41.536047Z",
54
+ "iopub.status.idle": "2024-02-28T21:15:42.592902Z",
55
+ "shell.execute_reply": "2024-02-28T21:15:42.592121Z"
56
+ },
57
+ "papermill": {
58
+ "duration": 1.07523,
59
+ "end_time": "2024-02-28T21:15:42.595268",
60
+ "exception": false,
61
+ "start_time": "2024-02-28T21:15:41.520038",
62
+ "status": "completed"
63
+ },
64
+ "tags": []
65
+ },
66
+ "outputs": [],
67
+ "source": [
68
+ "df = pd.read_csv(\n",
69
+ " \"/kaggle/input/personal-key-indicators-of-heart-disease/2020/heart_2020_cleaned.csv\")"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": null,
75
+ "id": "a12bd286",
76
+ "metadata": {
77
+ "execution": {
78
+ "iopub.execute_input": "2024-02-28T21:15:42.629222Z",
79
+ "iopub.status.busy": "2024-02-28T21:15:42.628916Z",
80
+ "iopub.status.idle": "2024-02-28T21:15:43.061965Z",
81
+ "shell.execute_reply": "2024-02-28T21:15:43.061012Z"
82
+ },
83
+ "papermill": {
84
+ "duration": 0.453304,
85
+ "end_time": "2024-02-28T21:15:43.064364",
86
+ "exception": false,
87
+ "start_time": "2024-02-28T21:15:42.611060",
88
+ "status": "completed"
89
+ },
90
+ "tags": []
91
+ },
92
+ "outputs": [],
93
+ "source": [
94
+ "df.isnull().sum()"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "id": "98b4a85f",
101
+ "metadata": {
102
+ "execution": {
103
+ "iopub.execute_input": "2024-02-28T21:15:43.098384Z",
104
+ "iopub.status.busy": "2024-02-28T21:15:43.098077Z",
105
+ "iopub.status.idle": "2024-02-28T21:15:43.549973Z",
106
+ "shell.execute_reply": "2024-02-28T21:15:43.548934Z"
107
+ },
108
+ "papermill": {
109
+ "duration": 0.470772,
110
+ "end_time": "2024-02-28T21:15:43.552722",
111
+ "exception": false,
112
+ "start_time": "2024-02-28T21:15:43.081950",
113
+ "status": "completed"
114
+ },
115
+ "tags": []
116
+ },
117
+ "outputs": [],
118
+ "source": [
119
+ "df = pd.get_dummies(df, columns=['Smoking', 'AlcoholDrinking', 'Sex', 'AgeCategory', 'Race',\n",
120
+ " 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'])"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": null,
126
+ "id": "4a49bbd7",
127
+ "metadata": {
128
+ "execution": {
129
+ "iopub.execute_input": "2024-02-28T21:15:43.590333Z",
130
+ "iopub.status.busy": "2024-02-28T21:15:43.589967Z",
131
+ "iopub.status.idle": "2024-02-28T21:15:43.596311Z",
132
+ "shell.execute_reply": "2024-02-28T21:15:43.595602Z"
133
+ },
134
+ "papermill": {
135
+ "duration": 0.026491,
136
+ "end_time": "2024-02-28T21:15:43.598298",
137
+ "exception": false,
138
+ "start_time": "2024-02-28T21:15:43.571807",
139
+ "status": "completed"
140
+ },
141
+ "tags": []
142
+ },
143
+ "outputs": [],
144
+ "source": [
145
+ "df['BMI'] = df['BMI'] / (df['BMI'] ** 2)"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": null,
151
+ "id": "8a1121c3",
152
+ "metadata": {
153
+ "execution": {
154
+ "iopub.execute_input": "2024-02-28T21:15:43.629024Z",
155
+ "iopub.status.busy": "2024-02-28T21:15:43.628731Z",
156
+ "iopub.status.idle": "2024-02-28T21:15:45.174952Z",
157
+ "shell.execute_reply": "2024-02-28T21:15:45.173525Z"
158
+ },
159
+ "papermill": {
160
+ "duration": 1.563564,
161
+ "end_time": "2024-02-28T21:15:45.176760",
162
+ "exception": true,
163
+ "start_time": "2024-02-28T21:15:43.613196",
164
+ "status": "failed"
165
+ },
166
+ "tags": []
167
+ },
168
+ "outputs": [],
169
+ "source": [
170
+ "from sklearn.preprocessing import MinMaxScaler\n",
171
+ "numerical_columns = ['BMI', 'Stroke', 'PhysicalHealth',\n",
172
+ " 'MentalHealth', 'DiffWalking', 'SleepTime']\n",
173
+ "scaler = MinMaxScaler()\n",
174
+ "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": null,
180
+ "id": "c34257e5",
181
+ "metadata": {
182
+ "execution": {
183
+ "iopub.execute_input": "2024-02-28T20:31:24.347783Z",
184
+ "iopub.status.busy": "2024-02-28T20:31:24.347070Z",
185
+ "iopub.status.idle": "2024-02-28T20:31:24.504857Z",
186
+ "shell.execute_reply": "2024-02-28T20:31:24.503875Z",
187
+ "shell.execute_reply.started": "2024-02-28T20:31:24.347750Z"
188
+ },
189
+ "papermill": {
190
+ "duration": null,
191
+ "end_time": null,
192
+ "exception": null,
193
+ "start_time": null,
194
+ "status": "pending"
195
+ },
196
+ "tags": []
197
+ },
198
+ "outputs": [],
199
+ "source": [
200
+ "for column in df.columns:\n",
201
+ " print(column, df[column].unique())"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "id": "305666d0",
208
+ "metadata": {
209
+ "execution": {
210
+ "iopub.execute_input": "2024-02-28T20:31:26.899524Z",
211
+ "iopub.status.busy": "2024-02-28T20:31:26.899205Z",
212
+ "iopub.status.idle": "2024-02-28T20:31:26.961477Z",
213
+ "shell.execute_reply": "2024-02-28T20:31:26.960639Z",
214
+ "shell.execute_reply.started": "2024-02-28T20:31:26.899502Z"
215
+ },
216
+ "papermill": {
217
+ "duration": null,
218
+ "end_time": null,
219
+ "exception": null,
220
+ "start_time": null,
221
+ "status": "pending"
222
+ },
223
+ "tags": []
224
+ },
225
+ "outputs": [],
226
+ "source": [
227
+ "df['Stroke'] = df['Stroke'].map({'No': 0, 'Yes': 1})\n",
228
+ "df['DiffWalking'] = df['DiffWalking'].map({'No': 0, 'Yes': 1})"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": null,
234
+ "id": "b2cc4716",
235
+ "metadata": {
236
+ "execution": {
237
+ "iopub.execute_input": "2024-02-28T20:31:28.914217Z",
238
+ "iopub.status.busy": "2024-02-28T20:31:28.913857Z",
239
+ "iopub.status.idle": "2024-02-28T20:31:28.945954Z",
240
+ "shell.execute_reply": "2024-02-28T20:31:28.944829Z",
241
+ "shell.execute_reply.started": "2024-02-28T20:31:28.914181Z"
242
+ },
243
+ "papermill": {
244
+ "duration": null,
245
+ "end_time": null,
246
+ "exception": null,
247
+ "start_time": null,
248
+ "status": "pending"
249
+ },
250
+ "tags": []
251
+ },
252
+ "outputs": [],
253
+ "source": [
254
+ "scaler = MinMaxScaler()\n",
255
+ "numerical_columns = ['BMI', 'PhysicalHealth',\n",
256
+ " 'MentalHealth', 'DiffWalking', 'SleepTime']\n",
257
+ "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": null,
263
+ "id": "15944d03",
264
+ "metadata": {
265
+ "execution": {
266
+ "iopub.execute_input": "2024-02-28T20:31:30.518053Z",
267
+ "iopub.status.busy": "2024-02-28T20:31:30.517356Z",
268
+ "iopub.status.idle": "2024-02-28T20:31:30.592331Z",
269
+ "shell.execute_reply": "2024-02-28T20:31:30.591365Z",
270
+ "shell.execute_reply.started": "2024-02-28T20:31:30.518018Z"
271
+ },
272
+ "papermill": {
273
+ "duration": null,
274
+ "end_time": null,
275
+ "exception": null,
276
+ "start_time": null,
277
+ "status": "pending"
278
+ },
279
+ "tags": []
280
+ },
281
+ "outputs": [],
282
+ "source": [
283
+ "z_scores = df[numerical_columns].apply(lambda x: (x - x.mean()) / x.std())\n",
284
+ "outliers = (z_scores > 3) | (z_scores < -3)\n",
285
+ "df = df[~outliers.any(axis=1)]"
286
+ ]
287
+ },
288
+ {
289
+ "cell_type": "code",
290
+ "execution_count": null,
291
+ "id": "b3c04332",
292
+ "metadata": {
293
+ "execution": {
294
+ "iopub.execute_input": "2024-02-28T20:31:32.877312Z",
295
+ "iopub.status.busy": "2024-02-28T20:31:32.876991Z",
296
+ "iopub.status.idle": "2024-02-28T20:31:32.923278Z",
297
+ "shell.execute_reply": "2024-02-28T20:31:32.922285Z",
298
+ "shell.execute_reply.started": "2024-02-28T20:31:32.877287Z"
299
+ },
300
+ "papermill": {
301
+ "duration": null,
302
+ "end_time": null,
303
+ "exception": null,
304
+ "start_time": null,
305
+ "status": "pending"
306
+ },
307
+ "tags": []
308
+ },
309
+ "outputs": [],
310
+ "source": [
311
+ "print(df.isnull().sum())"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": null,
317
+ "id": "f883f424",
318
+ "metadata": {
319
+ "execution": {
320
+ "iopub.execute_input": "2024-02-28T20:31:35.118412Z",
321
+ "iopub.status.busy": "2024-02-28T20:31:35.118046Z",
322
+ "iopub.status.idle": "2024-02-28T20:31:35.138194Z",
323
+ "shell.execute_reply": "2024-02-28T20:31:35.137356Z",
324
+ "shell.execute_reply.started": "2024-02-28T20:31:35.118385Z"
325
+ },
326
+ "papermill": {
327
+ "duration": null,
328
+ "end_time": null,
329
+ "exception": null,
330
+ "start_time": null,
331
+ "status": "pending"
332
+ },
333
+ "tags": []
334
+ },
335
+ "outputs": [],
336
+ "source": [
337
+ "X = df.drop(columns=['HeartDisease'])\n",
338
+ "y = df['HeartDisease']"
339
+ ]
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": null,
344
+ "id": "937f456d",
345
+ "metadata": {
346
+ "execution": {
347
+ "iopub.execute_input": "2024-02-28T20:31:36.921083Z",
348
+ "iopub.status.busy": "2024-02-28T20:31:36.920460Z",
349
+ "iopub.status.idle": "2024-02-28T20:31:37.092675Z",
350
+ "shell.execute_reply": "2024-02-28T20:31:37.091807Z",
351
+ "shell.execute_reply.started": "2024-02-28T20:31:36.921053Z"
352
+ },
353
+ "papermill": {
354
+ "duration": null,
355
+ "end_time": null,
356
+ "exception": null,
357
+ "start_time": null,
358
+ "status": "pending"
359
+ },
360
+ "tags": []
361
+ },
362
+ "outputs": [],
363
+ "source": [
364
+ "from sklearn.model_selection import train_test_split # Add this import statement\n",
365
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
366
+ " X, y, test_size=0.2, random_state=42)"
367
+ ]
368
+ },
369
+ {
370
+ "cell_type": "markdown",
371
+ "id": "d908667b",
372
+ "metadata": {
373
+ "papermill": {
374
+ "duration": null,
375
+ "end_time": null,
376
+ "exception": null,
377
+ "start_time": null,
378
+ "status": "pending"
379
+ },
380
+ "tags": []
381
+ },
382
+ "source": [
383
+ "# Logistic regression\n"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": null,
389
+ "id": "d2c46021",
390
+ "metadata": {
391
+ "execution": {
392
+ "iopub.execute_input": "2024-02-28T20:31:40.429477Z",
393
+ "iopub.status.busy": "2024-02-28T20:31:40.428714Z",
394
+ "iopub.status.idle": "2024-02-28T20:31:40.563938Z",
395
+ "shell.execute_reply": "2024-02-28T20:31:40.563215Z",
396
+ "shell.execute_reply.started": "2024-02-28T20:31:40.429444Z"
397
+ },
398
+ "papermill": {
399
+ "duration": null,
400
+ "end_time": null,
401
+ "exception": null,
402
+ "start_time": null,
403
+ "status": "pending"
404
+ },
405
+ "tags": []
406
+ },
407
+ "outputs": [],
408
+ "source": [
409
+ "from sklearn.linear_model import LogisticRegression\n",
410
+ "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
411
+ "model = LogisticRegression()"
412
+ ]
413
+ },
414
+ {
415
+ "cell_type": "code",
416
+ "execution_count": null,
417
+ "id": "57788a5b",
418
+ "metadata": {
419
+ "execution": {
420
+ "iopub.execute_input": "2024-02-28T20:31:43.108928Z",
421
+ "iopub.status.busy": "2024-02-28T20:31:43.108194Z",
422
+ "iopub.status.idle": "2024-02-28T20:31:46.611293Z",
423
+ "shell.execute_reply": "2024-02-28T20:31:46.609836Z",
424
+ "shell.execute_reply.started": "2024-02-28T20:31:43.108893Z"
425
+ },
426
+ "papermill": {
427
+ "duration": null,
428
+ "end_time": null,
429
+ "exception": null,
430
+ "start_time": null,
431
+ "status": "pending"
432
+ },
433
+ "tags": []
434
+ },
435
+ "outputs": [],
436
+ "source": [
437
+ "model.fit(X_train, y_train)"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": null,
443
+ "id": "5a09075d",
444
+ "metadata": {
445
+ "execution": {
446
+ "iopub.execute_input": "2024-02-28T20:31:53.227765Z",
447
+ "iopub.status.busy": "2024-02-28T20:31:53.227012Z",
448
+ "iopub.status.idle": "2024-02-28T20:31:53.251316Z",
449
+ "shell.execute_reply": "2024-02-28T20:31:53.250025Z",
450
+ "shell.execute_reply.started": "2024-02-28T20:31:53.227730Z"
451
+ },
452
+ "papermill": {
453
+ "duration": null,
454
+ "end_time": null,
455
+ "exception": null,
456
+ "start_time": null,
457
+ "status": "pending"
458
+ },
459
+ "tags": []
460
+ },
461
+ "outputs": [],
462
+ "source": [
463
+ "y_pred = model.predict(X_test)"
464
+ ]
465
+ },
466
+ {
467
+ "cell_type": "code",
468
+ "execution_count": null,
469
+ "id": "025c02d6",
470
+ "metadata": {
471
+ "execution": {
472
+ "iopub.execute_input": "2024-02-28T20:31:55.958835Z",
473
+ "iopub.status.busy": "2024-02-28T20:31:55.957996Z",
474
+ "iopub.status.idle": "2024-02-28T20:31:56.206159Z",
475
+ "shell.execute_reply": "2024-02-28T20:31:56.205249Z",
476
+ "shell.execute_reply.started": "2024-02-28T20:31:55.958798Z"
477
+ },
478
+ "papermill": {
479
+ "duration": null,
480
+ "end_time": null,
481
+ "exception": null,
482
+ "start_time": null,
483
+ "status": "pending"
484
+ },
485
+ "tags": []
486
+ },
487
+ "outputs": [],
488
+ "source": [
489
+ "accuracy = accuracy_score(y_test, y_pred)\n",
490
+ "print(\"Accuracy:\", accuracy)"
491
+ ]
492
+ },
493
+ {
494
+ "cell_type": "markdown",
495
+ "id": "30f6e656",
496
+ "metadata": {
497
+ "papermill": {
498
+ "duration": null,
499
+ "end_time": null,
500
+ "exception": null,
501
+ "start_time": null,
502
+ "status": "pending"
503
+ },
504
+ "tags": []
505
+ },
506
+ "source": [
507
+ "# KNN\n"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "execution_count": null,
513
+ "id": "53935959",
514
+ "metadata": {
515
+ "execution": {
516
+ "iopub.execute_input": "2024-02-28T20:31:59.594538Z",
517
+ "iopub.status.busy": "2024-02-28T20:31:59.593874Z",
518
+ "iopub.status.idle": "2024-02-28T20:31:59.644704Z",
519
+ "shell.execute_reply": "2024-02-28T20:31:59.643728Z",
520
+ "shell.execute_reply.started": "2024-02-28T20:31:59.594507Z"
521
+ },
522
+ "papermill": {
523
+ "duration": null,
524
+ "end_time": null,
525
+ "exception": null,
526
+ "start_time": null,
527
+ "status": "pending"
528
+ },
529
+ "tags": []
530
+ },
531
+ "outputs": [],
532
+ "source": [
533
+ "from sklearn.neighbors import KNeighborsClassifier\n",
534
+ "knn_model = KNeighborsClassifier(n_neighbors=5)"
535
+ ]
536
+ },
537
+ {
538
+ "cell_type": "code",
539
+ "execution_count": null,
540
+ "id": "db4deede",
541
+ "metadata": {
542
+ "execution": {
543
+ "iopub.execute_input": "2024-02-28T20:32:05.418662Z",
544
+ "iopub.status.busy": "2024-02-28T20:32:05.417912Z",
545
+ "iopub.status.idle": "2024-02-28T20:32:06.188877Z",
546
+ "shell.execute_reply": "2024-02-28T20:32:06.187632Z",
547
+ "shell.execute_reply.started": "2024-02-28T20:32:05.418629Z"
548
+ },
549
+ "papermill": {
550
+ "duration": null,
551
+ "end_time": null,
552
+ "exception": null,
553
+ "start_time": null,
554
+ "status": "pending"
555
+ },
556
+ "tags": []
557
+ },
558
+ "outputs": [],
559
+ "source": [
560
+ "knn_model.fit(X_train, y_train)"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": null,
566
+ "id": "ab01ea0d",
567
+ "metadata": {
568
+ "execution": {
569
+ "iopub.execute_input": "2024-02-28T20:32:08.060681Z",
570
+ "iopub.status.busy": "2024-02-28T20:32:08.059727Z",
571
+ "iopub.status.idle": "2024-02-28T20:32:48.065781Z",
572
+ "shell.execute_reply": "2024-02-28T20:32:48.064651Z",
573
+ "shell.execute_reply.started": "2024-02-28T20:32:08.060638Z"
574
+ },
575
+ "papermill": {
576
+ "duration": null,
577
+ "end_time": null,
578
+ "exception": null,
579
+ "start_time": null,
580
+ "status": "pending"
581
+ },
582
+ "tags": []
583
+ },
584
+ "outputs": [],
585
+ "source": [
586
+ "knn_y_pred = knn_model.predict(X_test)\n",
587
+ "knn_accuracy = accuracy_score(y_test, knn_y_pred)\n",
588
+ "print(\"KNN Accuracy:\", knn_accuracy)"
589
+ ]
590
+ },
591
+ {
592
+ "cell_type": "markdown",
593
+ "id": "fbfb3f58",
594
+ "metadata": {
595
+ "papermill": {
596
+ "duration": null,
597
+ "end_time": null,
598
+ "exception": null,
599
+ "start_time": null,
600
+ "status": "pending"
601
+ },
602
+ "tags": []
603
+ },
604
+ "source": [
605
+ "# Naive Bayes\n"
606
+ ]
607
+ },
608
+ {
609
+ "cell_type": "code",
610
+ "execution_count": null,
611
+ "id": "59c6dc70",
612
+ "metadata": {
613
+ "execution": {
614
+ "iopub.execute_input": "2024-02-28T20:33:05.648469Z",
615
+ "iopub.status.busy": "2024-02-28T20:33:05.647771Z",
616
+ "iopub.status.idle": "2024-02-28T20:33:05.655089Z",
617
+ "shell.execute_reply": "2024-02-28T20:33:05.653963Z",
618
+ "shell.execute_reply.started": "2024-02-28T20:33:05.648437Z"
619
+ },
620
+ "papermill": {
621
+ "duration": null,
622
+ "end_time": null,
623
+ "exception": null,
624
+ "start_time": null,
625
+ "status": "pending"
626
+ },
627
+ "tags": []
628
+ },
629
+ "outputs": [],
630
+ "source": [
631
+ "from sklearn.naive_bayes import GaussianNB\n",
632
+ "nb_model = GaussianNB()"
633
+ ]
634
+ },
635
+ {
636
+ "cell_type": "code",
637
+ "execution_count": null,
638
+ "id": "bde5534f",
639
+ "metadata": {
640
+ "execution": {
641
+ "iopub.execute_input": "2024-02-28T20:33:07.367575Z",
642
+ "iopub.status.busy": "2024-02-28T20:33:07.366646Z",
643
+ "iopub.status.idle": "2024-02-28T20:33:08.279224Z",
644
+ "shell.execute_reply": "2024-02-28T20:33:08.278331Z",
645
+ "shell.execute_reply.started": "2024-02-28T20:33:07.367527Z"
646
+ },
647
+ "papermill": {
648
+ "duration": null,
649
+ "end_time": null,
650
+ "exception": null,
651
+ "start_time": null,
652
+ "status": "pending"
653
+ },
654
+ "tags": []
655
+ },
656
+ "outputs": [],
657
+ "source": [
658
+ "nb_model.fit(X_train, y_train)"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "execution_count": null,
664
+ "id": "13d88c07",
665
+ "metadata": {
666
+ "execution": {
667
+ "iopub.execute_input": "2024-02-28T20:33:11.507456Z",
668
+ "iopub.status.busy": "2024-02-28T20:33:11.506783Z",
669
+ "iopub.status.idle": "2024-02-28T20:33:11.557327Z",
670
+ "shell.execute_reply": "2024-02-28T20:33:11.556531Z",
671
+ "shell.execute_reply.started": "2024-02-28T20:33:11.507420Z"
672
+ },
673
+ "papermill": {
674
+ "duration": null,
675
+ "end_time": null,
676
+ "exception": null,
677
+ "start_time": null,
678
+ "status": "pending"
679
+ },
680
+ "tags": []
681
+ },
682
+ "outputs": [],
683
+ "source": [
684
+ "nb_y_pred = nb_model.predict(X_test)"
685
+ ]
686
+ },
687
+ {
688
+ "cell_type": "code",
689
+ "execution_count": null,
690
+ "id": "92e9d434",
691
+ "metadata": {
692
+ "execution": {
693
+ "iopub.execute_input": "2024-02-28T20:33:17.627887Z",
694
+ "iopub.status.busy": "2024-02-28T20:33:17.627102Z",
695
+ "iopub.status.idle": "2024-02-28T20:33:17.872462Z",
696
+ "shell.execute_reply": "2024-02-28T20:33:17.871605Z",
697
+ "shell.execute_reply.started": "2024-02-28T20:33:17.627855Z"
698
+ },
699
+ "papermill": {
700
+ "duration": null,
701
+ "end_time": null,
702
+ "exception": null,
703
+ "start_time": null,
704
+ "status": "pending"
705
+ },
706
+ "tags": []
707
+ },
708
+ "outputs": [],
709
+ "source": [
710
+ "nb_accuracy = accuracy_score(y_test, nb_y_pred)\n",
711
+ "print(\"Naive Bayes Accuracy:\", nb_accuracy)"
712
+ ]
713
+ },
714
+ {
715
+ "cell_type": "markdown",
716
+ "id": "32075ad4",
717
+ "metadata": {
718
+ "papermill": {
719
+ "duration": null,
720
+ "end_time": null,
721
+ "exception": null,
722
+ "start_time": null,
723
+ "status": "pending"
724
+ },
725
+ "tags": []
726
+ },
727
+ "source": [
728
+ "# Decision Tree\n"
729
+ ]
730
+ },
731
+ {
732
+ "cell_type": "code",
733
+ "execution_count": null,
734
+ "id": "65c78c41",
735
+ "metadata": {
736
+ "execution": {
737
+ "iopub.execute_input": "2024-02-28T20:33:20.370792Z",
738
+ "iopub.status.busy": "2024-02-28T20:33:20.370439Z",
739
+ "iopub.status.idle": "2024-02-28T20:33:20.399395Z",
740
+ "shell.execute_reply": "2024-02-28T20:33:20.398573Z",
741
+ "shell.execute_reply.started": "2024-02-28T20:33:20.370766Z"
742
+ },
743
+ "papermill": {
744
+ "duration": null,
745
+ "end_time": null,
746
+ "exception": null,
747
+ "start_time": null,
748
+ "status": "pending"
749
+ },
750
+ "tags": []
751
+ },
752
+ "outputs": [],
753
+ "source": [
754
+ "from sklearn.tree import DecisionTreeClassifier\n",
755
+ "dt_model = DecisionTreeClassifier(random_state=42)"
756
+ ]
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": null,
761
+ "id": "a818077a",
762
+ "metadata": {
763
+ "execution": {
764
+ "iopub.execute_input": "2024-02-28T20:33:24.678143Z",
765
+ "iopub.status.busy": "2024-02-28T20:33:24.677822Z",
766
+ "iopub.status.idle": "2024-02-28T20:33:28.015444Z",
767
+ "shell.execute_reply": "2024-02-28T20:33:28.014553Z",
768
+ "shell.execute_reply.started": "2024-02-28T20:33:24.678119Z"
769
+ },
770
+ "papermill": {
771
+ "duration": null,
772
+ "end_time": null,
773
+ "exception": null,
774
+ "start_time": null,
775
+ "status": "pending"
776
+ },
777
+ "tags": []
778
+ },
779
+ "outputs": [],
780
+ "source": [
781
+ "dt_model.fit(X_train, y_train)"
782
+ ]
783
+ },
784
+ {
785
+ "cell_type": "code",
786
+ "execution_count": null,
787
+ "id": "c8ca2ae9",
788
+ "metadata": {
789
+ "execution": {
790
+ "iopub.execute_input": "2024-02-28T20:33:30.414733Z",
791
+ "iopub.status.busy": "2024-02-28T20:33:30.413806Z",
792
+ "iopub.status.idle": "2024-02-28T20:33:30.445350Z",
793
+ "shell.execute_reply": "2024-02-28T20:33:30.444502Z",
794
+ "shell.execute_reply.started": "2024-02-28T20:33:30.414688Z"
795
+ },
796
+ "papermill": {
797
+ "duration": null,
798
+ "end_time": null,
799
+ "exception": null,
800
+ "start_time": null,
801
+ "status": "pending"
802
+ },
803
+ "tags": []
804
+ },
805
+ "outputs": [],
806
+ "source": [
807
+ "dt_y_pred = dt_model.predict(X_test)"
808
+ ]
809
+ },
810
+ {
811
+ "cell_type": "code",
812
+ "execution_count": null,
813
+ "id": "8e6dc11c",
814
+ "metadata": {
815
+ "execution": {
816
+ "iopub.execute_input": "2024-02-28T20:33:32.917637Z",
817
+ "iopub.status.busy": "2024-02-28T20:33:32.916912Z",
818
+ "iopub.status.idle": "2024-02-28T20:33:33.162356Z",
819
+ "shell.execute_reply": "2024-02-28T20:33:33.161428Z",
820
+ "shell.execute_reply.started": "2024-02-28T20:33:32.917605Z"
821
+ },
822
+ "papermill": {
823
+ "duration": null,
824
+ "end_time": null,
825
+ "exception": null,
826
+ "start_time": null,
827
+ "status": "pending"
828
+ },
829
+ "tags": []
830
+ },
831
+ "outputs": [],
832
+ "source": [
833
+ "dt_accuracy = accuracy_score(y_test, dt_y_pred)\n",
834
+ "print(\"accuracy:\", dt_accuracy)"
835
+ ]
836
+ },
837
+ {
838
+ "cell_type": "markdown",
839
+ "id": "0dfe26a4",
840
+ "metadata": {
841
+ "papermill": {
842
+ "duration": null,
843
+ "end_time": null,
844
+ "exception": null,
845
+ "start_time": null,
846
+ "status": "pending"
847
+ },
848
+ "tags": []
849
+ },
850
+ "source": [
851
+ "# Random forests\n"
852
+ ]
853
+ },
854
+ {
855
+ "cell_type": "code",
856
+ "execution_count": null,
857
+ "id": "580c6e88",
858
+ "metadata": {
859
+ "execution": {
860
+ "iopub.execute_input": "2024-02-28T20:33:37.146957Z",
861
+ "iopub.status.busy": "2024-02-28T20:33:37.145942Z",
862
+ "iopub.status.idle": "2024-02-28T20:33:40.375233Z",
863
+ "shell.execute_reply": "2024-02-28T20:33:40.374273Z",
864
+ "shell.execute_reply.started": "2024-02-28T20:33:37.146922Z"
865
+ },
866
+ "papermill": {
867
+ "duration": null,
868
+ "end_time": null,
869
+ "exception": null,
870
+ "start_time": null,
871
+ "status": "pending"
872
+ },
873
+ "tags": []
874
+ },
875
+ "outputs": [],
876
+ "source": [
877
+ "from sklearn.tree import DecisionTreeClassifier\n",
878
+ "dt_model = DecisionTreeClassifier(random_state=42)\n",
879
+ "dt_model.fit(X_train, y_train)"
880
+ ]
881
+ },
882
+ {
883
+ "cell_type": "code",
884
+ "execution_count": null,
885
+ "id": "fdc4234d",
886
+ "metadata": {
887
+ "execution": {
888
+ "iopub.execute_input": "2024-02-28T20:33:42.697604Z",
889
+ "iopub.status.busy": "2024-02-28T20:33:42.697221Z",
890
+ "iopub.status.idle": "2024-02-28T20:33:42.965045Z",
891
+ "shell.execute_reply": "2024-02-28T20:33:42.964106Z",
892
+ "shell.execute_reply.started": "2024-02-28T20:33:42.697574Z"
893
+ },
894
+ "papermill": {
895
+ "duration": null,
896
+ "end_time": null,
897
+ "exception": null,
898
+ "start_time": null,
899
+ "status": "pending"
900
+ },
901
+ "tags": []
902
+ },
903
+ "outputs": [],
904
+ "source": [
905
+ "dt_y_pred = dt_model.predict(X_test)\n",
906
+ "\n",
907
+ "# Evaluate the Decision Tree model\n",
908
+ "dt_accuracy = accuracy_score(y_test, dt_y_pred)\n",
909
+ "print(\"Decision Tree Accuracy:\", dt_accuracy)"
910
+ ]
911
+ },
912
+ {
913
+ "cell_type": "markdown",
914
+ "id": "1eef14a8",
915
+ "metadata": {
916
+ "papermill": {
917
+ "duration": null,
918
+ "end_time": null,
919
+ "exception": null,
920
+ "start_time": null,
921
+ "status": "pending"
922
+ },
923
+ "tags": []
924
+ },
925
+ "source": [
926
+ "# LSTM\n"
927
+ ]
928
+ },
929
+ {
930
+ "cell_type": "code",
931
+ "execution_count": null,
932
+ "id": "3d95e691",
933
+ "metadata": {
934
+ "execution": {
935
+ "iopub.execute_input": "2024-02-28T20:34:03.811369Z",
936
+ "iopub.status.busy": "2024-02-28T20:34:03.811034Z",
937
+ "iopub.status.idle": "2024-02-28T20:34:16.297599Z",
938
+ "shell.execute_reply": "2024-02-28T20:34:16.296591Z",
939
+ "shell.execute_reply.started": "2024-02-28T20:34:03.811342Z"
940
+ },
941
+ "papermill": {
942
+ "duration": null,
943
+ "end_time": null,
944
+ "exception": null,
945
+ "start_time": null,
946
+ "status": "pending"
947
+ },
948
+ "tags": []
949
+ },
950
+ "outputs": [],
951
+ "source": [
952
+ "import numpy as np\n",
953
+ "from tensorflow.keras.models import Sequential\n",
954
+ "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
955
+ "from sklearn.preprocessing import LabelEncoder\n",
956
+ "from sklearn.metrics import accuracy_score\n",
957
+ "from sklearn.model_selection import train_test_split"
958
+ ]
959
+ },
960
+ {
961
+ "cell_type": "code",
962
+ "execution_count": null,
963
+ "id": "45a3ea7a",
964
+ "metadata": {
965
+ "execution": {
966
+ "iopub.execute_input": "2024-02-28T20:34:21.694938Z",
967
+ "iopub.status.busy": "2024-02-28T20:34:21.693749Z",
968
+ "iopub.status.idle": "2024-02-28T20:34:23.351375Z",
969
+ "shell.execute_reply": "2024-02-28T20:34:23.350292Z",
970
+ "shell.execute_reply.started": "2024-02-28T20:34:21.694901Z"
971
+ },
972
+ "papermill": {
973
+ "duration": null,
974
+ "end_time": null,
975
+ "exception": null,
976
+ "start_time": null,
977
+ "status": "pending"
978
+ },
979
+ "tags": []
980
+ },
981
+ "outputs": [],
982
+ "source": [
983
+ "X_train_array = X_train.values.astype(np.float32)\n",
984
+ "X_test_array = X_test.values.astype(np.float32)\n",
985
+ "label_encoder = LabelEncoder()\n",
986
+ "y_train_encoded = label_encoder.fit_transform(y_train)\n",
987
+ "y_test_encoded = label_encoder.transform(y_test)"
988
+ ]
989
+ },
990
+ {
991
+ "cell_type": "code",
992
+ "execution_count": null,
993
+ "id": "3b2b4168",
994
+ "metadata": {
995
+ "execution": {
996
+ "iopub.execute_input": "2024-02-28T20:34:29.567410Z",
997
+ "iopub.status.busy": "2024-02-28T20:34:29.567010Z",
998
+ "iopub.status.idle": "2024-02-28T20:34:29.573526Z",
999
+ "shell.execute_reply": "2024-02-28T20:34:29.572343Z",
1000
+ "shell.execute_reply.started": "2024-02-28T20:34:29.567374Z"
1001
+ },
1002
+ "papermill": {
1003
+ "duration": null,
1004
+ "end_time": null,
1005
+ "exception": null,
1006
+ "start_time": null,
1007
+ "status": "pending"
1008
+ },
1009
+ "tags": []
1010
+ },
1011
+ "outputs": [],
1012
+ "source": [
1013
+ "X_train_reshaped = np.reshape(\n",
1014
+ " X_train_array, (X_train_array.shape[0], 1, X_train_array.shape[1]))\n",
1015
+ "X_test_reshaped = np.reshape(\n",
1016
+ " X_test_array, (X_test_array.shape[0], 1, X_test_array.shape[1]))"
1017
+ ]
1018
+ },
1019
+ {
1020
+ "cell_type": "code",
1021
+ "execution_count": null,
1022
+ "id": "5ba22307",
1023
+ "metadata": {
1024
+ "execution": {
1025
+ "iopub.execute_input": "2024-02-28T20:34:32.593258Z",
1026
+ "iopub.status.busy": "2024-02-28T20:34:32.592631Z",
1027
+ "iopub.status.idle": "2024-02-28T20:34:32.597849Z",
1028
+ "shell.execute_reply": "2024-02-28T20:34:32.596788Z",
1029
+ "shell.execute_reply.started": "2024-02-28T20:34:32.593225Z"
1030
+ },
1031
+ "papermill": {
1032
+ "duration": null,
1033
+ "end_time": null,
1034
+ "exception": null,
1035
+ "start_time": null,
1036
+ "status": "pending"
1037
+ },
1038
+ "tags": []
1039
+ },
1040
+ "outputs": [],
1041
+ "source": [
1042
+ "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
1043
+ "from tensorflow.keras.models import Sequential"
1044
+ ]
1045
+ },
1046
+ {
1047
+ "cell_type": "code",
1048
+ "execution_count": null,
1049
+ "id": "05d6c2a2",
1050
+ "metadata": {
1051
+ "execution": {
1052
+ "iopub.execute_input": "2024-02-28T20:34:35.967986Z",
1053
+ "iopub.status.busy": "2024-02-28T20:34:35.967129Z",
1054
+ "iopub.status.idle": "2024-02-28T20:34:37.983732Z",
1055
+ "shell.execute_reply": "2024-02-28T20:34:37.982934Z",
1056
+ "shell.execute_reply.started": "2024-02-28T20:34:35.967950Z"
1057
+ },
1058
+ "papermill": {
1059
+ "duration": null,
1060
+ "end_time": null,
1061
+ "exception": null,
1062
+ "start_time": null,
1063
+ "status": "pending"
1064
+ },
1065
+ "tags": []
1066
+ },
1067
+ "outputs": [],
1068
+ "source": [
1069
+ "model = Sequential()\n",
1070
+ "model.add(LSTM(units=128, input_shape=(\n",
1071
+ " 1, X_train_array.shape[1]), return_sequences=True))\n",
1072
+ "model.add(Dropout(0.2))\n",
1073
+ "model.add(LSTM(units=64, return_sequences=True))\n",
1074
+ "model.add(Dropout(0.2))\n",
1075
+ "model.add(LSTM(units=32, return_sequences=False))\n",
1076
+ "model.add(Dropout(0.2))\n",
1077
+ "model.add(Dense(units=64, activation='relu'))\n",
1078
+ "model.add(Dropout(0.2))\n",
1079
+ "model.add(Dense(units=32, activation='relu'))\n",
1080
+ "model.add(Dense(units=1, activation='sigmoid'))"
1081
+ ]
1082
+ },
1083
+ {
1084
+ "cell_type": "code",
1085
+ "execution_count": null,
1086
+ "id": "70506f9c",
1087
+ "metadata": {
1088
+ "execution": {
1089
+ "iopub.execute_input": "2024-02-28T20:34:47.317746Z",
1090
+ "iopub.status.busy": "2024-02-28T20:34:47.317029Z",
1091
+ "iopub.status.idle": "2024-02-28T20:34:47.340010Z",
1092
+ "shell.execute_reply": "2024-02-28T20:34:47.338881Z",
1093
+ "shell.execute_reply.started": "2024-02-28T20:34:47.317713Z"
1094
+ },
1095
+ "papermill": {
1096
+ "duration": null,
1097
+ "end_time": null,
1098
+ "exception": null,
1099
+ "start_time": null,
1100
+ "status": "pending"
1101
+ },
1102
+ "tags": []
1103
+ },
1104
+ "outputs": [],
1105
+ "source": [
1106
+ "model.compile(optimizer='adam', loss='binary_crossentropy',\n",
1107
+ " metrics=['accuracy'])"
1108
+ ]
1109
+ },
1110
+ {
1111
+ "cell_type": "code",
1112
+ "execution_count": null,
1113
+ "id": "a6cafe58",
1114
+ "metadata": {
1115
+ "execution": {
1116
+ "iopub.execute_input": "2024-02-28T20:34:50.132581Z",
1117
+ "iopub.status.busy": "2024-02-28T20:34:50.131713Z",
1118
+ "iopub.status.idle": "2024-02-28T20:34:50.168897Z",
1119
+ "shell.execute_reply": "2024-02-28T20:34:50.167980Z",
1120
+ "shell.execute_reply.started": "2024-02-28T20:34:50.132534Z"
1121
+ },
1122
+ "papermill": {
1123
+ "duration": null,
1124
+ "end_time": null,
1125
+ "exception": null,
1126
+ "start_time": null,
1127
+ "status": "pending"
1128
+ },
1129
+ "tags": []
1130
+ },
1131
+ "outputs": [],
1132
+ "source": [
1133
+ "model.summary()"
1134
+ ]
1135
+ },
1136
+ {
1137
+ "cell_type": "code",
1138
+ "execution_count": null,
1139
+ "id": "fb4e2eb7",
1140
+ "metadata": {
1141
+ "execution": {
1142
+ "iopub.execute_input": "2024-02-28T20:34:57.849042Z",
1143
+ "iopub.status.busy": "2024-02-28T20:34:57.848387Z",
1144
+ "iopub.status.idle": "2024-02-28T20:58:09.859733Z",
1145
+ "shell.execute_reply": "2024-02-28T20:58:09.858723Z",
1146
+ "shell.execute_reply.started": "2024-02-28T20:34:57.849008Z"
1147
+ },
1148
+ "papermill": {
1149
+ "duration": null,
1150
+ "end_time": null,
1151
+ "exception": null,
1152
+ "start_time": null,
1153
+ "status": "pending"
1154
+ },
1155
+ "tags": []
1156
+ },
1157
+ "outputs": [],
1158
+ "source": [
1159
+ "model.fit(X_train_reshaped, y_train_encoded, epochs=30,\n",
1160
+ " batch_size=32, validation_split=0.1)"
1161
+ ]
1162
+ },
1163
+ {
1164
+ "cell_type": "code",
1165
+ "execution_count": null,
1166
+ "id": "8df9b4ba",
1167
+ "metadata": {
1168
+ "execution": {
1169
+ "iopub.execute_input": "2024-02-28T21:02:40.398517Z",
1170
+ "iopub.status.busy": "2024-02-28T21:02:40.397537Z",
1171
+ "iopub.status.idle": "2024-02-28T21:02:48.047278Z",
1172
+ "shell.execute_reply": "2024-02-28T21:02:48.046275Z",
1173
+ "shell.execute_reply.started": "2024-02-28T21:02:40.398480Z"
1174
+ },
1175
+ "papermill": {
1176
+ "duration": null,
1177
+ "end_time": null,
1178
+ "exception": null,
1179
+ "start_time": null,
1180
+ "status": "pending"
1181
+ },
1182
+ "tags": []
1183
+ },
1184
+ "outputs": [],
1185
+ "source": [
1186
+ "y_pred_proba = model.predict(X_test_reshaped)\n",
1187
+ "y_pred = (y_pred_proba > 0.5).astype(int)"
1188
+ ]
1189
+ },
1190
+ {
1191
+ "cell_type": "code",
1192
+ "execution_count": null,
1193
+ "id": "c9c1b0ae",
1194
+ "metadata": {
1195
+ "execution": {
1196
+ "iopub.execute_input": "2024-02-27T02:51:31.677208Z",
1197
+ "iopub.status.busy": "2024-02-27T02:51:31.676880Z",
1198
+ "iopub.status.idle": "2024-02-27T02:51:31.686765Z",
1199
+ "shell.execute_reply": "2024-02-27T02:51:31.685738Z",
1200
+ "shell.execute_reply.started": "2024-02-27T02:51:31.677180Z"
1201
+ },
1202
+ "papermill": {
1203
+ "duration": null,
1204
+ "end_time": null,
1205
+ "exception": null,
1206
+ "start_time": null,
1207
+ "status": "pending"
1208
+ },
1209
+ "tags": []
1210
+ },
1211
+ "outputs": [],
1212
+ "source": [
1213
+ "accuracy = accuracy_score(y_test_encoded, y_pred)\n",
1214
+ "print(\"Accuracy:\", accuracy)"
1215
+ ]
1216
+ },
1217
+ {
1218
+ "cell_type": "code",
1219
+ "execution_count": null,
1220
+ "id": "963f04ba",
1221
+ "metadata": {
1222
+ "execution": {
1223
+ "iopub.execute_input": "2024-02-27T02:51:35.877861Z",
1224
+ "iopub.status.busy": "2024-02-27T02:51:35.877122Z",
1225
+ "iopub.status.idle": "2024-02-27T02:51:35.881902Z",
1226
+ "shell.execute_reply": "2024-02-27T02:51:35.880765Z",
1227
+ "shell.execute_reply.started": "2024-02-27T02:51:35.877829Z"
1228
+ },
1229
+ "papermill": {
1230
+ "duration": null,
1231
+ "end_time": null,
1232
+ "exception": null,
1233
+ "start_time": null,
1234
+ "status": "pending"
1235
+ },
1236
+ "tags": []
1237
+ },
1238
+ "outputs": [],
1239
+ "source": [
1240
+ "import matplotlib.pyplot as plt"
1241
+ ]
1242
+ },
1243
+ {
1244
+ "cell_type": "code",
1245
+ "execution_count": null,
1246
+ "id": "cb765c7d",
1247
+ "metadata": {
1248
+ "execution": {
1249
+ "iopub.execute_input": "2024-02-28T21:14:46.444310Z",
1250
+ "iopub.status.busy": "2024-02-28T21:14:46.443465Z",
1251
+ "iopub.status.idle": "2024-02-28T21:14:46.448272Z",
1252
+ "shell.execute_reply": "2024-02-28T21:14:46.447257Z",
1253
+ "shell.execute_reply.started": "2024-02-28T21:14:46.444277Z"
1254
+ },
1255
+ "papermill": {
1256
+ "duration": null,
1257
+ "end_time": null,
1258
+ "exception": null,
1259
+ "start_time": null,
1260
+ "status": "pending"
1261
+ },
1262
+ "tags": []
1263
+ },
1264
+ "outputs": [],
1265
+ "source": [
1266
+ "import pickle"
1267
+ ]
1268
+ },
1269
+ {
1270
+ "cell_type": "code",
1271
+ "execution_count": null,
1272
+ "id": "dd47a5eb",
1273
+ "metadata": {
1274
+ "execution": {
1275
+ "iopub.execute_input": "2024-02-28T21:14:49.219822Z",
1276
+ "iopub.status.busy": "2024-02-28T21:14:49.219019Z",
1277
+ "iopub.status.idle": "2024-02-28T21:14:49.316868Z",
1278
+ "shell.execute_reply": "2024-02-28T21:14:49.315889Z",
1279
+ "shell.execute_reply.started": "2024-02-28T21:14:49.219786Z"
1280
+ },
1281
+ "papermill": {
1282
+ "duration": null,
1283
+ "end_time": null,
1284
+ "exception": null,
1285
+ "start_time": null,
1286
+ "status": "pending"
1287
+ },
1288
+ "tags": []
1289
+ },
1290
+ "outputs": [],
1291
+ "source": [
1292
+ "with open('model.pkl', 'wb') as f:\n",
1293
+ " pickle.dump(model, f)"
1294
+ ]
1295
+ },
1296
+ {
1297
+ "cell_type": "markdown",
1298
+ "id": "93655540",
1299
+ "metadata": {
1300
+ "papermill": {
1301
+ "duration": null,
1302
+ "end_time": null,
1303
+ "exception": null,
1304
+ "start_time": null,
1305
+ "status": "pending"
1306
+ },
1307
+ "tags": []
1308
+ },
1309
+ "source": [
1310
+ "# CNN\n",
1311
+ "\n",
1312
+ "#### `probleme somewhere idk `\n"
1313
+ ]
1314
+ },
1315
+ {
1316
+ "cell_type": "code",
1317
+ "execution_count": null,
1318
+ "id": "8144cd90",
1319
+ "metadata": {
1320
+ "execution": {
1321
+ "iopub.execute_input": "2024-02-27T02:25:33.293261Z",
1322
+ "iopub.status.busy": "2024-02-27T02:25:33.292866Z",
1323
+ "iopub.status.idle": "2024-02-27T02:25:33.298337Z",
1324
+ "shell.execute_reply": "2024-02-27T02:25:33.297216Z",
1325
+ "shell.execute_reply.started": "2024-02-27T02:25:33.293228Z"
1326
+ },
1327
+ "papermill": {
1328
+ "duration": null,
1329
+ "end_time": null,
1330
+ "exception": null,
1331
+ "start_time": null,
1332
+ "status": "pending"
1333
+ },
1334
+ "tags": []
1335
+ },
1336
+ "outputs": [],
1337
+ "source": [
1338
+ "from sklearn.model_selection import train_test_split\n",
1339
+ "from tensorflow.keras.models import Sequential\n",
1340
+ "from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense\n",
1341
+ "from tensorflow.keras.optimizers import Adam"
1342
+ ]
1343
+ },
1344
+ {
1345
+ "cell_type": "code",
1346
+ "execution_count": null,
1347
+ "id": "1507a936",
1348
+ "metadata": {
1349
+ "execution": {
1350
+ "iopub.execute_input": "2024-02-27T02:25:41.828280Z",
1351
+ "iopub.status.busy": "2024-02-27T02:25:41.827891Z",
1352
+ "iopub.status.idle": "2024-02-27T02:25:41.914213Z",
1353
+ "shell.execute_reply": "2024-02-27T02:25:41.913392Z",
1354
+ "shell.execute_reply.started": "2024-02-27T02:25:41.828241Z"
1355
+ },
1356
+ "papermill": {
1357
+ "duration": null,
1358
+ "end_time": null,
1359
+ "exception": null,
1360
+ "start_time": null,
1361
+ "status": "pending"
1362
+ },
1363
+ "tags": []
1364
+ },
1365
+ "outputs": [],
1366
+ "source": [
1367
+ "X_train, X_temp, y_train, y_temp = train_test_split(\n",
1368
+ " X, y, test_size=0.2, random_state=42)\n",
1369
+ "X_val, X_test, y_val, y_test = train_test_split(\n",
1370
+ " X_temp, y_temp, test_size=0.5, random_state=42)"
1371
+ ]
1372
+ },
1373
+ {
1374
+ "cell_type": "code",
1375
+ "execution_count": null,
1376
+ "id": "febb5829",
1377
+ "metadata": {
1378
+ "execution": {
1379
+ "iopub.execute_input": "2024-02-27T02:25:45.647678Z",
1380
+ "iopub.status.busy": "2024-02-27T02:25:45.646969Z",
1381
+ "iopub.status.idle": "2024-02-27T02:25:45.651746Z",
1382
+ "shell.execute_reply": "2024-02-27T02:25:45.650798Z",
1383
+ "shell.execute_reply.started": "2024-02-27T02:25:45.647647Z"
1384
+ },
1385
+ "papermill": {
1386
+ "duration": null,
1387
+ "end_time": null,
1388
+ "exception": null,
1389
+ "start_time": null,
1390
+ "status": "pending"
1391
+ },
1392
+ "tags": []
1393
+ },
1394
+ "outputs": [],
1395
+ "source": [
1396
+ "num_features = X_train.shape[1]"
1397
+ ]
1398
+ },
1399
+ {
1400
+ "cell_type": "code",
1401
+ "execution_count": null,
1402
+ "id": "131be457",
1403
+ "metadata": {
1404
+ "execution": {
1405
+ "iopub.execute_input": "2024-02-27T02:25:55.207330Z",
1406
+ "iopub.status.busy": "2024-02-27T02:25:55.206931Z",
1407
+ "iopub.status.idle": "2024-02-27T02:25:55.281487Z",
1408
+ "shell.execute_reply": "2024-02-27T02:25:55.280767Z",
1409
+ "shell.execute_reply.started": "2024-02-27T02:25:55.207300Z"
1410
+ },
1411
+ "papermill": {
1412
+ "duration": null,
1413
+ "end_time": null,
1414
+ "exception": null,
1415
+ "start_time": null,
1416
+ "status": "pending"
1417
+ },
1418
+ "tags": []
1419
+ },
1420
+ "outputs": [],
1421
+ "source": [
1422
+ "model = Sequential([\n",
1423
+ " Conv1D(filters=32, kernel_size=3, activation='relu',\n",
1424
+ " input_shape=(num_features, 1)),\n",
1425
+ " MaxPooling1D(pool_size=2),\n",
1426
+ " Conv1D(filters=64, kernel_size=3, activation='relu'),\n",
1427
+ " MaxPooling1D(pool_size=2),\n",
1428
+ " Flatten(),\n",
1429
+ " Dense(64, activation='relu'),\n",
1430
+ " Dense(1, activation='sigmoid')\n",
1431
+ "])"
1432
+ ]
1433
+ },
1434
+ {
1435
+ "cell_type": "code",
1436
+ "execution_count": null,
1437
+ "id": "1228e48c",
1438
+ "metadata": {
1439
+ "execution": {
1440
+ "iopub.execute_input": "2024-02-27T02:26:02.388725Z",
1441
+ "iopub.status.busy": "2024-02-27T02:26:02.388349Z",
1442
+ "iopub.status.idle": "2024-02-27T02:26:02.402654Z",
1443
+ "shell.execute_reply": "2024-02-27T02:26:02.401613Z",
1444
+ "shell.execute_reply.started": "2024-02-27T02:26:02.388685Z"
1445
+ },
1446
+ "papermill": {
1447
+ "duration": null,
1448
+ "end_time": null,
1449
+ "exception": null,
1450
+ "start_time": null,
1451
+ "status": "pending"
1452
+ },
1453
+ "tags": []
1454
+ },
1455
+ "outputs": [],
1456
+ "source": [
1457
+ "model.compile(optimizer=Adam(learning_rate=0.001),\n",
1458
+ " loss='binary_crossentropy', metrics=['accuracy'])"
1459
+ ]
1460
+ },
1461
+ {
1462
+ "cell_type": "code",
1463
+ "execution_count": null,
1464
+ "id": "61cf26b0",
1465
+ "metadata": {
1466
+ "execution": {
1467
+ "iopub.execute_input": "2024-02-27T02:26:49.628751Z",
1468
+ "iopub.status.busy": "2024-02-27T02:26:49.628140Z",
1469
+ "iopub.status.idle": "2024-02-27T02:26:49.633203Z",
1470
+ "shell.execute_reply": "2024-02-27T02:26:49.632167Z",
1471
+ "shell.execute_reply.started": "2024-02-27T02:26:49.628710Z"
1472
+ },
1473
+ "papermill": {
1474
+ "duration": null,
1475
+ "end_time": null,
1476
+ "exception": null,
1477
+ "start_time": null,
1478
+ "status": "pending"
1479
+ },
1480
+ "tags": []
1481
+ },
1482
+ "outputs": [],
1483
+ "source": [
1484
+ "import tensorflow as tf\n",
1485
+ "from sklearn.model_selection import train_test_split\n",
1486
+ "from sklearn.preprocessing import StandardScaler"
1487
+ ]
1488
+ },
1489
+ {
1490
+ "cell_type": "code",
1491
+ "execution_count": null,
1492
+ "id": "6bf5881a",
1493
+ "metadata": {
1494
+ "execution": {
1495
+ "iopub.execute_input": "2024-02-27T02:26:56.957652Z",
1496
+ "iopub.status.busy": "2024-02-27T02:26:56.957291Z",
1497
+ "iopub.status.idle": "2024-02-27T02:26:58.040698Z",
1498
+ "shell.execute_reply": "2024-02-27T02:26:58.039438Z",
1499
+ "shell.execute_reply.started": "2024-02-27T02:26:56.957622Z"
1500
+ },
1501
+ "papermill": {
1502
+ "duration": null,
1503
+ "end_time": null,
1504
+ "exception": null,
1505
+ "start_time": null,
1506
+ "status": "pending"
1507
+ },
1508
+ "tags": []
1509
+ },
1510
+ "outputs": [],
1511
+ "source": [
1512
+ "scaler = StandardScaler()\n",
1513
+ "X_scaled = scaler.fit_transform(X)\n",
1514
+ "y_encoded = tf.keras.utils.to_categorical(y)"
1515
+ ]
1516
+ },
1517
+ {
1518
+ "cell_type": "code",
1519
+ "execution_count": null,
1520
+ "id": "46ecaaa1",
1521
+ "metadata": {
1522
+ "execution": {
1523
+ "iopub.execute_input": "2024-02-27T02:26:08.518224Z",
1524
+ "iopub.status.busy": "2024-02-27T02:26:08.517758Z",
1525
+ "iopub.status.idle": "2024-02-27T02:26:09.398867Z",
1526
+ "shell.execute_reply": "2024-02-27T02:26:09.397682Z",
1527
+ "shell.execute_reply.started": "2024-02-27T02:26:08.518190Z"
1528
+ },
1529
+ "papermill": {
1530
+ "duration": null,
1531
+ "end_time": null,
1532
+ "exception": null,
1533
+ "start_time": null,
1534
+ "status": "pending"
1535
+ },
1536
+ "tags": []
1537
+ },
1538
+ "outputs": [],
1539
+ "source": [
1540
+ "history = model.fit(X_train, y_train, epochs=10,\n",
1541
+ " batch_size=32, validation_data=(X_val, y_val))"
1542
+ ]
1543
+ }
1544
+ ],
1545
+ "metadata": {
1546
+ "kaggle": {
1547
+ "accelerator": "nvidiaTeslaT4",
1548
+ "dataSources": [
1549
+ {
1550
+ "datasetId": 1936563,
1551
+ "sourceId": 6674905,
1552
+ "sourceType": "datasetVersion"
1553
+ }
1554
+ ],
1555
+ "dockerImageVersionId": 30648,
1556
+ "isGpuEnabled": true,
1557
+ "isInternetEnabled": true,
1558
+ "language": "python",
1559
+ "sourceType": "notebook"
1560
+ },
1561
+ "kernelspec": {
1562
+ "display_name": "Python 3",
1563
+ "language": "python",
1564
+ "name": "python3"
1565
+ },
1566
+ "language_info": {
1567
+ "codemirror_mode": {
1568
+ "name": "ipython",
1569
+ "version": 3
1570
+ },
1571
+ "file_extension": ".py",
1572
+ "mimetype": "text/x-python",
1573
+ "name": "python",
1574
+ "nbconvert_exporter": "python",
1575
+ "pygments_lexer": "ipython3",
1576
+ "version": "3.10.13"
1577
+ },
1578
+ "papermill": {
1579
+ "default_parameters": {},
1580
+ "duration": 8.029196,
1581
+ "end_time": "2024-02-28T21:15:45.609983",
1582
+ "environment_variables": {},
1583
+ "exception": true,
1584
+ "input_path": "__notebook__.ipynb",
1585
+ "output_path": "__notebook__.ipynb",
1586
+ "parameters": {},
1587
+ "start_time": "2024-02-28T21:15:37.580787",
1588
+ "version": "2.5.0"
1589
+ }
1590
+ },
1591
+ "nbformat": 4,
1592
+ "nbformat_minor": 5
1593
+ }