Pankaj001 commited on
Commit
d5c146c
1 Parent(s): effd366

Upload wine-quality.ipynb

Browse files
Files changed (1) hide show
  1. wine-quality.ipynb +634 -0
wine-quality.ipynb ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "d6ffc7b7",
6
+ "metadata": {},
7
+ "source": [
8
+ "# 1.0 Importing libraries"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "4ca597ab",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "\"\"\"\n",
19
+ "Description: Import libraries\n",
20
+ "\"\"\"\n",
21
+ "import numpy as np\n",
22
+ "from sklearn.model_selection import train_test_split\n",
23
+ "from sklearn import metrics\n",
24
+ "import pandas as pd\n",
25
+ "import os\n",
26
+ "import random\n",
27
+ "from humanfriendly import format_timespan\n",
28
+ "from sklearn.preprocessing import MinMaxScaler\n",
29
+ "from sklearn.ensemble import RandomForestClassifier\n",
30
+ "import pickle\n",
31
+ "# from sklearn.svm import SVC\n",
32
+ "# from sklearn.linear_model import LogisticRegression"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 2,
38
+ "id": "fffc59ee",
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "\"\"\"\n",
43
+ "Description: Specify data path\n",
44
+ "\"\"\"\n",
45
+ "data_path = r'data\\winequality_red_label_remapped.csv'"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 3,
51
+ "id": "5a2e912f",
52
+ "metadata": {},
53
+ "outputs": [
54
+ {
55
+ "data": {
56
+ "text/html": [
57
+ "<div>\n",
58
+ "<style scoped>\n",
59
+ " .dataframe tbody tr th:only-of-type {\n",
60
+ " vertical-align: middle;\n",
61
+ " }\n",
62
+ "\n",
63
+ " .dataframe tbody tr th {\n",
64
+ " vertical-align: top;\n",
65
+ " }\n",
66
+ "\n",
67
+ " .dataframe thead th {\n",
68
+ " text-align: right;\n",
69
+ " }\n",
70
+ "</style>\n",
71
+ "<table border=\"1\" class=\"dataframe\">\n",
72
+ " <thead>\n",
73
+ " <tr style=\"text-align: right;\">\n",
74
+ " <th></th>\n",
75
+ " <th>fixed acidity</th>\n",
76
+ " <th>volatile acidity</th>\n",
77
+ " <th>citric acid</th>\n",
78
+ " <th>residual sugar</th>\n",
79
+ " <th>chlorides</th>\n",
80
+ " <th>free sulfur dioxide</th>\n",
81
+ " <th>total sulfur dioxide</th>\n",
82
+ " <th>density</th>\n",
83
+ " <th>pH</th>\n",
84
+ " <th>sulphates</th>\n",
85
+ " <th>alcohol</th>\n",
86
+ " <th>quality</th>\n",
87
+ " </tr>\n",
88
+ " </thead>\n",
89
+ " <tbody>\n",
90
+ " <tr>\n",
91
+ " <th>0</th>\n",
92
+ " <td>7.4</td>\n",
93
+ " <td>0.70</td>\n",
94
+ " <td>0.00</td>\n",
95
+ " <td>1.9</td>\n",
96
+ " <td>0.076</td>\n",
97
+ " <td>11.0</td>\n",
98
+ " <td>34.0</td>\n",
99
+ " <td>0.9978</td>\n",
100
+ " <td>3.51</td>\n",
101
+ " <td>0.56</td>\n",
102
+ " <td>9.4</td>\n",
103
+ " <td>2</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>1</th>\n",
107
+ " <td>7.8</td>\n",
108
+ " <td>0.88</td>\n",
109
+ " <td>0.00</td>\n",
110
+ " <td>2.6</td>\n",
111
+ " <td>0.098</td>\n",
112
+ " <td>25.0</td>\n",
113
+ " <td>67.0</td>\n",
114
+ " <td>0.9968</td>\n",
115
+ " <td>3.20</td>\n",
116
+ " <td>0.68</td>\n",
117
+ " <td>9.8</td>\n",
118
+ " <td>2</td>\n",
119
+ " </tr>\n",
120
+ " <tr>\n",
121
+ " <th>2</th>\n",
122
+ " <td>7.8</td>\n",
123
+ " <td>0.76</td>\n",
124
+ " <td>0.04</td>\n",
125
+ " <td>2.3</td>\n",
126
+ " <td>0.092</td>\n",
127
+ " <td>15.0</td>\n",
128
+ " <td>54.0</td>\n",
129
+ " <td>0.9970</td>\n",
130
+ " <td>3.26</td>\n",
131
+ " <td>0.65</td>\n",
132
+ " <td>9.8</td>\n",
133
+ " <td>2</td>\n",
134
+ " </tr>\n",
135
+ " <tr>\n",
136
+ " <th>3</th>\n",
137
+ " <td>11.2</td>\n",
138
+ " <td>0.28</td>\n",
139
+ " <td>0.56</td>\n",
140
+ " <td>1.9</td>\n",
141
+ " <td>0.075</td>\n",
142
+ " <td>17.0</td>\n",
143
+ " <td>60.0</td>\n",
144
+ " <td>0.9980</td>\n",
145
+ " <td>3.16</td>\n",
146
+ " <td>0.58</td>\n",
147
+ " <td>9.8</td>\n",
148
+ " <td>3</td>\n",
149
+ " </tr>\n",
150
+ " <tr>\n",
151
+ " <th>4</th>\n",
152
+ " <td>7.4</td>\n",
153
+ " <td>0.70</td>\n",
154
+ " <td>0.00</td>\n",
155
+ " <td>1.9</td>\n",
156
+ " <td>0.076</td>\n",
157
+ " <td>11.0</td>\n",
158
+ " <td>34.0</td>\n",
159
+ " <td>0.9978</td>\n",
160
+ " <td>3.51</td>\n",
161
+ " <td>0.56</td>\n",
162
+ " <td>9.4</td>\n",
163
+ " <td>2</td>\n",
164
+ " </tr>\n",
165
+ " </tbody>\n",
166
+ "</table>\n",
167
+ "</div>"
168
+ ],
169
+ "text/plain": [
170
+ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
171
+ "0 7.4 0.70 0.00 1.9 0.076 \n",
172
+ "1 7.8 0.88 0.00 2.6 0.098 \n",
173
+ "2 7.8 0.76 0.04 2.3 0.092 \n",
174
+ "3 11.2 0.28 0.56 1.9 0.075 \n",
175
+ "4 7.4 0.70 0.00 1.9 0.076 \n",
176
+ "\n",
177
+ " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
178
+ "0 11.0 34.0 0.9978 3.51 0.56 \n",
179
+ "1 25.0 67.0 0.9968 3.20 0.68 \n",
180
+ "2 15.0 54.0 0.9970 3.26 0.65 \n",
181
+ "3 17.0 60.0 0.9980 3.16 0.58 \n",
182
+ "4 11.0 34.0 0.9978 3.51 0.56 \n",
183
+ "\n",
184
+ " alcohol quality \n",
185
+ "0 9.4 2 \n",
186
+ "1 9.8 2 \n",
187
+ "2 9.8 2 \n",
188
+ "3 9.8 3 \n",
189
+ "4 9.4 2 "
190
+ ]
191
+ },
192
+ "execution_count": 3,
193
+ "metadata": {},
194
+ "output_type": "execute_result"
195
+ }
196
+ ],
197
+ "source": [
198
+ "\"\"\"\n",
199
+ "Description: Load data\n",
200
+ "\"\"\"\n",
201
+ "df = pd.read_csv(data_path)\n",
202
+ "df.head()"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 4,
208
+ "id": "2815d511",
209
+ "metadata": {},
210
+ "outputs": [
211
+ {
212
+ "data": {
213
+ "text/plain": [
214
+ "array([0, 1, 2, 3, 4, 5], dtype=int64)"
215
+ ]
216
+ },
217
+ "execution_count": 4,
218
+ "metadata": {},
219
+ "output_type": "execute_result"
220
+ }
221
+ ],
222
+ "source": [
223
+ "\"\"\"\n",
224
+ "Description: Get classes\n",
225
+ "\"\"\"\n",
226
+ "np.unique(df['quality'])"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 5,
232
+ "id": "d11d9540",
233
+ "metadata": {},
234
+ "outputs": [
235
+ {
236
+ "data": {
237
+ "text/plain": [
238
+ "'\\nDescription: Remap \\n'"
239
+ ]
240
+ },
241
+ "execution_count": 5,
242
+ "metadata": {},
243
+ "output_type": "execute_result"
244
+ }
245
+ ],
246
+ "source": [
247
+ "\"\"\"\n",
248
+ "Description: Remap \n",
249
+ "\"\"\"\n",
250
+ "# df['quality'] = df['quality'].apply(lambda x: x-3)"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": 6,
256
+ "id": "4d694106",
257
+ "metadata": {},
258
+ "outputs": [
259
+ {
260
+ "data": {
261
+ "text/plain": [
262
+ "array([0, 1, 2, 3, 4, 5], dtype=int64)"
263
+ ]
264
+ },
265
+ "execution_count": 6,
266
+ "metadata": {},
267
+ "output_type": "execute_result"
268
+ }
269
+ ],
270
+ "source": [
271
+ "\"\"\"\n",
272
+ "Description: Get classes\n",
273
+ "\"\"\"\n",
274
+ "np.unique(df['quality'])"
275
+ ]
276
+ },
277
+ {
278
+ "cell_type": "code",
279
+ "execution_count": 7,
280
+ "id": "43458438",
281
+ "metadata": {},
282
+ "outputs": [],
283
+ "source": [
284
+ "df.to_csv(\"winequality_red_label_remapped.csv\",index=False)"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 8,
290
+ "id": "ade5900f",
291
+ "metadata": {},
292
+ "outputs": [
293
+ {
294
+ "data": {
295
+ "text/plain": [
296
+ "fixed acidity 0\n",
297
+ "volatile acidity 0\n",
298
+ "citric acid 0\n",
299
+ "residual sugar 0\n",
300
+ "chlorides 0\n",
301
+ "free sulfur dioxide 0\n",
302
+ "total sulfur dioxide 0\n",
303
+ "density 0\n",
304
+ "pH 0\n",
305
+ "sulphates 0\n",
306
+ "alcohol 0\n",
307
+ "quality 0\n",
308
+ "dtype: int64"
309
+ ]
310
+ },
311
+ "execution_count": 8,
312
+ "metadata": {},
313
+ "output_type": "execute_result"
314
+ }
315
+ ],
316
+ "source": [
317
+ "\"\"\"\n",
318
+ "Description: Check null value\n",
319
+ "\"\"\"\n",
320
+ "df.isnull().sum()"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": 9,
326
+ "id": "1b34f13e",
327
+ "metadata": {},
328
+ "outputs": [
329
+ {
330
+ "data": {
331
+ "text/plain": [
332
+ "(1599, 11)"
333
+ ]
334
+ },
335
+ "execution_count": 9,
336
+ "metadata": {},
337
+ "output_type": "execute_result"
338
+ }
339
+ ],
340
+ "source": [
341
+ "\"\"\"\n",
342
+ "Description: Prepare data\n",
343
+ "\"\"\"\n",
344
+ "x=df.drop(['quality'], axis=1)\n",
345
+ "x.shape"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": 10,
351
+ "id": "238dc707",
352
+ "metadata": {},
353
+ "outputs": [
354
+ {
355
+ "data": {
356
+ "text/plain": [
357
+ "(1599,)"
358
+ ]
359
+ },
360
+ "execution_count": 10,
361
+ "metadata": {},
362
+ "output_type": "execute_result"
363
+ }
364
+ ],
365
+ "source": [
366
+ "\"\"\"\n",
367
+ "Description: Get target label\n",
368
+ "\"\"\"\n",
369
+ "y = df['quality']\n",
370
+ "y.shape"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": 11,
376
+ "id": "5617aeb1",
377
+ "metadata": {},
378
+ "outputs": [],
379
+ "source": [
380
+ "\"\"\"\n",
381
+ "Description: Split data\n",
382
+ "\"\"\"\n",
383
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=40,stratify=y)"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": 12,
389
+ "id": "f5d3b86f",
390
+ "metadata": {},
391
+ "outputs": [
392
+ {
393
+ "name": "stdout",
394
+ "output_type": "stream",
395
+ "text": [
396
+ "shape of x_train: (1279, 11)\n",
397
+ "shape of y_train: (1279,)\n",
398
+ "shape of x_test: (320, 11)\n",
399
+ "shape of y_test: (320,)\n"
400
+ ]
401
+ }
402
+ ],
403
+ "source": [
404
+ "'''\n",
405
+ "Description : Check size of dataset\n",
406
+ "'''\n",
407
+ "print(\"shape of x_train: \",x_train.shape)\n",
408
+ "print(\"shape of y_train: {}\".format(y_train.shape))\n",
409
+ "print(f'shape of x_test: {x_test.shape}')\n",
410
+ "print(f'shape of y_test: {y_test.shape}')"
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "code",
415
+ "execution_count": 13,
416
+ "id": "67168e49",
417
+ "metadata": {},
418
+ "outputs": [
419
+ {
420
+ "data": {
421
+ "text/html": [
422
+ "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=1000)</pre></div></div></div></div></div>"
423
+ ],
424
+ "text/plain": [
425
+ "RandomForestClassifier(n_estimators=1000)"
426
+ ]
427
+ },
428
+ "execution_count": 13,
429
+ "metadata": {},
430
+ "output_type": "execute_result"
431
+ }
432
+ ],
433
+ "source": [
434
+ "\"\"\"\n",
435
+ "Description: Create model architecture\n",
436
+ "\"\"\"\n",
437
+ "model = RandomForestClassifier(n_estimators=1000)\n",
438
+ "model"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": 14,
444
+ "id": "fcad50e5",
445
+ "metadata": {},
446
+ "outputs": [
447
+ {
448
+ "data": {
449
+ "text/html": [
450
+ "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=1000)</pre></div></div></div></div></div>"
451
+ ],
452
+ "text/plain": [
453
+ "RandomForestClassifier(n_estimators=1000)"
454
+ ]
455
+ },
456
+ "execution_count": 14,
457
+ "metadata": {},
458
+ "output_type": "execute_result"
459
+ }
460
+ ],
461
+ "source": [
462
+ "\"\"\"\n",
463
+ "Description: Train model\n",
464
+ "\"\"\"\n",
465
+ "model.fit(x_train, y_train)"
466
+ ]
467
+ },
468
+ {
469
+ "cell_type": "code",
470
+ "execution_count": 15,
471
+ "id": "a20a2ec3",
472
+ "metadata": {
473
+ "scrolled": true
474
+ },
475
+ "outputs": [
476
+ {
477
+ "name": "stdout",
478
+ "output_type": "stream",
479
+ "text": [
480
+ "RandomForestClassifier(n_estimators=1000) : \n",
481
+ "Training Accuracy : 1.0\n",
482
+ "Validation Accuracy : 0.66875\n"
483
+ ]
484
+ }
485
+ ],
486
+ "source": [
487
+ "\"\"\"\n",
488
+ "Description: Get training and test accuracy\n",
489
+ "\"\"\"\n",
490
+ "print(f'{model} : ')\n",
491
+ "print('Training Accuracy : ', metrics.accuracy_score(y_train, model.predict(x_train)))\n",
492
+ "print('Validation Accuracy : ', metrics.accuracy_score(y_test, model.predict(x_test)))"
493
+ ]
494
+ },
495
+ {
496
+ "cell_type": "code",
497
+ "execution_count": 16,
498
+ "id": "5c20bc9e",
499
+ "metadata": {},
500
+ "outputs": [],
501
+ "source": [
502
+ "pickle.dump(model, open(\"random_forest_model.pkl\", 'wb'))"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 17,
508
+ "id": "f55a0ec8",
509
+ "metadata": {},
510
+ "outputs": [
511
+ {
512
+ "data": {
513
+ "text/plain": [
514
+ "fixed acidity 15.90000\n",
515
+ "volatile acidity 1.58000\n",
516
+ "citric acid 1.00000\n",
517
+ "residual sugar 15.50000\n",
518
+ "chlorides 0.61100\n",
519
+ "free sulfur dioxide 72.00000\n",
520
+ "total sulfur dioxide 289.00000\n",
521
+ "density 1.00369\n",
522
+ "pH 4.01000\n",
523
+ "sulphates 2.00000\n",
524
+ "alcohol 14.90000\n",
525
+ "quality 5.00000\n",
526
+ "dtype: float64"
527
+ ]
528
+ },
529
+ "execution_count": 17,
530
+ "metadata": {},
531
+ "output_type": "execute_result"
532
+ }
533
+ ],
534
+ "source": [
535
+ "\"\"\"\n",
536
+ "Description: min, max\n",
537
+ "\"\"\"\n",
538
+ "df.max()"
539
+ ]
540
+ },
541
+ {
542
+ "cell_type": "code",
543
+ "execution_count": 18,
544
+ "id": "234d7a65",
545
+ "metadata": {},
546
+ "outputs": [
547
+ {
548
+ "data": {
549
+ "text/plain": [
550
+ "fixed acidity 4.60000\n",
551
+ "volatile acidity 0.12000\n",
552
+ "citric acid 0.00000\n",
553
+ "residual sugar 0.90000\n",
554
+ "chlorides 0.01200\n",
555
+ "free sulfur dioxide 1.00000\n",
556
+ "total sulfur dioxide 6.00000\n",
557
+ "density 0.99007\n",
558
+ "pH 2.74000\n",
559
+ "sulphates 0.33000\n",
560
+ "alcohol 8.40000\n",
561
+ "quality 0.00000\n",
562
+ "dtype: float64"
563
+ ]
564
+ },
565
+ "execution_count": 18,
566
+ "metadata": {},
567
+ "output_type": "execute_result"
568
+ }
569
+ ],
570
+ "source": [
571
+ "\"\"\"\n",
572
+ "Description: min, max\n",
573
+ "\"\"\"\n",
574
+ "df.min()"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": 19,
580
+ "id": "3fcb0d81",
581
+ "metadata": {},
582
+ "outputs": [
583
+ {
584
+ "data": {
585
+ "text/plain": [
586
+ "Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',\n",
587
+ " 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',\n",
588
+ " 'pH', 'sulphates', 'alcohol', 'quality'],\n",
589
+ " dtype='object')"
590
+ ]
591
+ },
592
+ "execution_count": 19,
593
+ "metadata": {},
594
+ "output_type": "execute_result"
595
+ }
596
+ ],
597
+ "source": [
598
+ "\"\"\"\n",
599
+ "Description: Check columns\n",
600
+ "\"\"\"\n",
601
+ "df.columns"
602
+ ]
603
+ },
604
+ {
605
+ "cell_type": "code",
606
+ "execution_count": null,
607
+ "id": "29e30ec2",
608
+ "metadata": {},
609
+ "outputs": [],
610
+ "source": []
611
+ }
612
+ ],
613
+ "metadata": {
614
+ "kernelspec": {
615
+ "display_name": "Python 3 (ipykernel)",
616
+ "language": "python",
617
+ "name": "python3"
618
+ },
619
+ "language_info": {
620
+ "codemirror_mode": {
621
+ "name": "ipython",
622
+ "version": 3
623
+ },
624
+ "file_extension": ".py",
625
+ "mimetype": "text/x-python",
626
+ "name": "python",
627
+ "nbconvert_exporter": "python",
628
+ "pygments_lexer": "ipython3",
629
+ "version": "3.9.0"
630
+ }
631
+ },
632
+ "nbformat": 4,
633
+ "nbformat_minor": 5
634
+ }