Manupriya12's picture
app file
8c50098 verified
{
"cells": [
{
"cell_type": "markdown",
"id": "8de49094",
"metadata": {},
"source": [
"# Machine Learning November Minor Project\n",
"## create a classification model to predict whether price range of mobile based on certain specification"
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "9229995e",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd \n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 133,
"id": "14ea8a12",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>battery_power</th>\n",
" <th>blue</th>\n",
" <th>clock_speed</th>\n",
" <th>dual_sim</th>\n",
" <th>fc</th>\n",
" <th>four_g</th>\n",
" <th>int_memory</th>\n",
" <th>m_dep</th>\n",
" <th>mobile_wt</th>\n",
" <th>n_cores</th>\n",
" <th>...</th>\n",
" <th>px_height</th>\n",
" <th>px_width</th>\n",
" <th>ram</th>\n",
" <th>sc_h</th>\n",
" <th>sc_w</th>\n",
" <th>talk_time</th>\n",
" <th>three_g</th>\n",
" <th>touch_screen</th>\n",
" <th>wifi</th>\n",
" <th>price_range</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>842</td>\n",
" <td>0</td>\n",
" <td>2.2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0.6</td>\n",
" <td>188</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>20</td>\n",
" <td>756</td>\n",
" <td>2549</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1021</td>\n",
" <td>1</td>\n",
" <td>0.5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>53</td>\n",
" <td>0.7</td>\n",
" <td>136</td>\n",
" <td>3</td>\n",
" <td>...</td>\n",
" <td>905</td>\n",
" <td>1988</td>\n",
" <td>2631</td>\n",
" <td>17</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>563</td>\n",
" <td>1</td>\n",
" <td>0.5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>41</td>\n",
" <td>0.9</td>\n",
" <td>145</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>1263</td>\n",
" <td>1716</td>\n",
" <td>2603</td>\n",
" <td>11</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>615</td>\n",
" <td>1</td>\n",
" <td>2.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0.8</td>\n",
" <td>131</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>1216</td>\n",
" <td>1786</td>\n",
" <td>2769</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1821</td>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>44</td>\n",
" <td>0.6</td>\n",
" <td>141</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1208</td>\n",
" <td>1212</td>\n",
" <td>1411</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>15</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1995</th>\n",
" <td>794</td>\n",
" <td>1</td>\n",
" <td>0.5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.8</td>\n",
" <td>106</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>1222</td>\n",
" <td>1890</td>\n",
" <td>668</td>\n",
" <td>13</td>\n",
" <td>4</td>\n",
" <td>19</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1996</th>\n",
" <td>1965</td>\n",
" <td>1</td>\n",
" <td>2.6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>39</td>\n",
" <td>0.2</td>\n",
" <td>187</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>915</td>\n",
" <td>1965</td>\n",
" <td>2032</td>\n",
" <td>11</td>\n",
" <td>10</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1997</th>\n",
" <td>1911</td>\n",
" <td>0</td>\n",
" <td>0.9</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>36</td>\n",
" <td>0.7</td>\n",
" <td>108</td>\n",
" <td>8</td>\n",
" <td>...</td>\n",
" <td>868</td>\n",
" <td>1632</td>\n",
" <td>3057</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1998</th>\n",
" <td>1512</td>\n",
" <td>0</td>\n",
" <td>0.9</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>46</td>\n",
" <td>0.1</td>\n",
" <td>145</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>336</td>\n",
" <td>670</td>\n",
" <td>869</td>\n",
" <td>18</td>\n",
" <td>10</td>\n",
" <td>19</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1999</th>\n",
" <td>510</td>\n",
" <td>1</td>\n",
" <td>2.0</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>45</td>\n",
" <td>0.9</td>\n",
" <td>168</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>483</td>\n",
" <td>754</td>\n",
" <td>3919</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2000 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" battery_power blue clock_speed dual_sim fc four_g int_memory \\\n",
"0 842 0 2.2 0 1 0 7 \n",
"1 1021 1 0.5 1 0 1 53 \n",
"2 563 1 0.5 1 2 1 41 \n",
"3 615 1 2.5 0 0 0 10 \n",
"4 1821 1 1.2 0 13 1 44 \n",
"... ... ... ... ... .. ... ... \n",
"1995 794 1 0.5 1 0 1 2 \n",
"1996 1965 1 2.6 1 0 0 39 \n",
"1997 1911 0 0.9 1 1 1 36 \n",
"1998 1512 0 0.9 0 4 1 46 \n",
"1999 510 1 2.0 1 5 1 45 \n",
"\n",
" m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w \\\n",
"0 0.6 188 2 ... 20 756 2549 9 7 \n",
"1 0.7 136 3 ... 905 1988 2631 17 3 \n",
"2 0.9 145 5 ... 1263 1716 2603 11 2 \n",
"3 0.8 131 6 ... 1216 1786 2769 16 8 \n",
"4 0.6 141 2 ... 1208 1212 1411 8 2 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"1995 0.8 106 6 ... 1222 1890 668 13 4 \n",
"1996 0.2 187 4 ... 915 1965 2032 11 10 \n",
"1997 0.7 108 8 ... 868 1632 3057 9 1 \n",
"1998 0.1 145 5 ... 336 670 869 18 10 \n",
"1999 0.9 168 6 ... 483 754 3919 19 4 \n",
"\n",
" talk_time three_g touch_screen wifi price_range \n",
"0 19 0 0 1 1 \n",
"1 7 1 1 0 2 \n",
"2 9 1 1 0 2 \n",
"3 11 1 0 0 2 \n",
"4 15 1 1 0 1 \n",
"... ... ... ... ... ... \n",
"1995 19 1 1 0 0 \n",
"1996 16 1 1 1 2 \n",
"1997 5 1 1 0 3 \n",
"1998 19 1 1 1 0 \n",
"1999 2 1 1 1 3 \n",
"\n",
"[2000 rows x 21 columns]"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df=pd.read_csv(\"MOBILE.csv\")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 134,
"id": "3502e202",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>battery_power</th>\n",
" <th>blue</th>\n",
" <th>clock_speed</th>\n",
" <th>dual_sim</th>\n",
" <th>fc</th>\n",
" <th>four_g</th>\n",
" <th>int_memory</th>\n",
" <th>m_dep</th>\n",
" <th>mobile_wt</th>\n",
" <th>n_cores</th>\n",
" <th>...</th>\n",
" <th>px_height</th>\n",
" <th>px_width</th>\n",
" <th>ram</th>\n",
" <th>sc_h</th>\n",
" <th>sc_w</th>\n",
" <th>talk_time</th>\n",
" <th>three_g</th>\n",
" <th>touch_screen</th>\n",
" <th>wifi</th>\n",
" <th>price_range</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>842</td>\n",
" <td>0</td>\n",
" <td>2.2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0.6</td>\n",
" <td>188</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>20</td>\n",
" <td>756</td>\n",
" <td>2549</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1021</td>\n",
" <td>1</td>\n",
" <td>0.5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>53</td>\n",
" <td>0.7</td>\n",
" <td>136</td>\n",
" <td>3</td>\n",
" <td>...</td>\n",
" <td>905</td>\n",
" <td>1988</td>\n",
" <td>2631</td>\n",
" <td>17</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>563</td>\n",
" <td>1</td>\n",
" <td>0.5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>41</td>\n",
" <td>0.9</td>\n",
" <td>145</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>1263</td>\n",
" <td>1716</td>\n",
" <td>2603</td>\n",
" <td>11</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>615</td>\n",
" <td>1</td>\n",
" <td>2.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0.8</td>\n",
" <td>131</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>1216</td>\n",
" <td>1786</td>\n",
" <td>2769</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1821</td>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>44</td>\n",
" <td>0.6</td>\n",
" <td>141</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1208</td>\n",
" <td>1212</td>\n",
" <td>1411</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>15</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" battery_power blue clock_speed dual_sim fc four_g int_memory m_dep \\\n",
"0 842 0 2.2 0 1 0 7 0.6 \n",
"1 1021 1 0.5 1 0 1 53 0.7 \n",
"2 563 1 0.5 1 2 1 41 0.9 \n",
"3 615 1 2.5 0 0 0 10 0.8 \n",
"4 1821 1 1.2 0 13 1 44 0.6 \n",
"\n",
" mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time \\\n",
"0 188 2 ... 20 756 2549 9 7 19 \n",
"1 136 3 ... 905 1988 2631 17 3 7 \n",
"2 145 5 ... 1263 1716 2603 11 2 9 \n",
"3 131 6 ... 1216 1786 2769 16 8 11 \n",
"4 141 2 ... 1208 1212 1411 8 2 15 \n",
"\n",
" three_g touch_screen wifi price_range \n",
"0 0 0 1 1 \n",
"1 1 1 0 2 \n",
"2 1 1 0 2 \n",
"3 1 0 0 2 \n",
"4 1 1 0 1 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 135,
"id": "3494f177",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2000, 21)"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"id": "583f8b35",
"metadata": {},
"source": [
"TARGET VARIABLE"
]
},
{
"cell_type": "code",
"execution_count": 136,
"id": "81466eab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 500\n",
"2 500\n",
"3 500\n",
"0 500\n",
"Name: price_range, dtype: int64"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['price_range'].value_counts()"
]
},
{
"cell_type": "markdown",
"id": "f89ae5f7",
"metadata": {},
"source": [
"# 1) REMOVE HANDLE NULL VALUES(IF ANY)\n"
]
},
{
"cell_type": "code",
"execution_count": 137,
"id": "67af79d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"battery_power 0\n",
"blue 0\n",
"clock_speed 0\n",
"dual_sim 0\n",
"fc 0\n",
"four_g 0\n",
"int_memory 0\n",
"m_dep 0\n",
"mobile_wt 0\n",
"n_cores 0\n",
"pc 0\n",
"px_height 0\n",
"px_width 0\n",
"ram 0\n",
"sc_h 0\n",
"sc_w 0\n",
"talk_time 0\n",
"three_g 0\n",
"touch_screen 0\n",
"wifi 0\n",
"price_range 0\n",
"dtype: int64"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull().sum()"
]
},
{
"cell_type": "markdown",
"id": "2a9229f6",
"metadata": {},
"source": [
"HANDLING DUPLICATES"
]
},
{
"cell_type": "code",
"execution_count": 138,
"id": "68e9c1a0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.duplicated().sum()"
]
},
{
"cell_type": "code",
"execution_count": 139,
"id": "45b814b1",
"metadata": {},
"outputs": [],
"source": [
"df.drop_duplicates(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 140,
"id": "86e353c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.duplicated().sum()"
]
},
{
"cell_type": "markdown",
"id": "ae3fb359",
"metadata": {},
"source": [
"CHECKING DATATYPES"
]
},
{
"cell_type": "code",
"execution_count": 141,
"id": "4206a483",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"battery_power int64\n",
"blue int64\n",
"clock_speed float64\n",
"dual_sim int64\n",
"fc int64\n",
"four_g int64\n",
"int_memory int64\n",
"m_dep float64\n",
"mobile_wt int64\n",
"n_cores int64\n",
"pc int64\n",
"px_height int64\n",
"px_width int64\n",
"ram int64\n",
"sc_h int64\n",
"sc_w int64\n",
"talk_time int64\n",
"three_g int64\n",
"touch_screen int64\n",
"wifi int64\n",
"price_range int64\n",
"dtype: object"
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "markdown",
"id": "27c38f40",
"metadata": {},
"source": [
"#selecting dependent(x) and independent(y) variables"
]
},
{
"cell_type": "code",
"execution_count": 142,
"id": "df81170a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"<class 'pandas.core.series.Series'>\n",
"(2000, 20)\n",
"(2000,)\n"
]
}
],
"source": [
"x=df.drop('price_range',axis=1)\n",
"y=df['price_range']\n",
"print(type(x))\n",
"print(type(y))\n",
"print(x.shape)\n",
"print(y.shape)"
]
},
{
"cell_type": "markdown",
"id": "caddcb4b",
"metadata": {},
"source": [
"x=df.drop('price_range',axis=1)\n",
"y=df['price_range']\n",
"print(type(x))\n",
"print(type(y))\n",
"print(x.shape)\n",
"print(y.shape)\n",
"#x-tarin , y_train"
]
},
{
"cell_type": "markdown",
"id": "50e080a5",
"metadata": {},
"source": [
"# #2) spliting data into training and test data"
]
},
{
"cell_type": "code",
"execution_count": 143,
"id": "0a2e0e9c",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 144,
"id": "e72f938f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"500.0\n"
]
}
],
"source": [
"print(2000*0.25)"
]
},
{
"cell_type": "code",
"execution_count": 145,
"id": "e79d1a83",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1500, 20)\n",
"(500, 20)\n",
"(1500,)\n",
"(500,)\n"
]
}
],
"source": [
"x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=42)\n",
"print(x_train.shape)\n",
"print(x_test.shape)\n",
"print(y_train.shape)\n",
"print(y_test.shape) "
]
},
{
"cell_type": "markdown",
"id": "5b530940",
"metadata": {},
"source": [
"### CONFUSION MATRIX"
]
},
{
"cell_type": "code",
"execution_count": 146,
"id": "aa92457c",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix,classification_report\n"
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "82b8c07b",
"metadata": {},
"outputs": [],
"source": [
"def eval_model(ytest,ypred):\n",
" cm=confusion_matrix(ytest,ypred)\n",
" print(cm)\n",
" print(classification_report(ytest,ypred))\n",
"def nscore(model):\n",
" print('training score', model.score(x_train,y_train))\n",
" print('testing score',model.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51d24954",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "95fea4e2",
"metadata": {},
"source": [
"# #3 Apply the following models on the training dataset and generate the predicted value for the test dataset\n",
"\n",
"# (a) LOGISTIC REGRESSION\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "21ec5184",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "bd982d05",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression "
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "e76e207e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LogisticRegression(max_iter=10000, solver='liblinear')"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" #applying linear regression\n",
"m1=LogisticRegression(max_iter=10000,solver=\"liblinear\")\n",
"m1.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 151,
"id": "b9aef55f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.7946666666666666\n",
"train score 0.782\n"
]
}
],
"source": [
"#Accuracy\n",
"print('train score',m1.score(x_train,y_train))\n",
"print('train score',m1.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "dddc5c20",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"training score 0.7946666666666666\n",
"testing score 0.782\n"
]
}
],
"source": [
"nscore(m1)"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "986ee64d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0 2 0 3 1 2 2 0 3 1 0 1 1 3 3 2 3 3 1 0 0 1 2 2 0 2 3 3 2 0 0 0 3 0 2 1 2\n",
" 0 3 0 2 3 3 0 2 3 1 1 3 1 3 1 0 0 1 2 2 3 0 0 1 3 3 2 1 0 3 3 2 2 2 1 0 1\n",
" 3 0 1 3 1 1 3 1 2 0 1 3 2 3 3 0 3 3 2 1 3 2 2 3 2 1 0 0 1 0 0 3 2 0 2 1 0\n",
" 0 3 1 3 2 3 3 0 2 1 3 3 2 3 3 0 3 0 2 3 0 1 3 0 3 1 0 0 2 3 1 3 3 0 0 0 2\n",
" 2 2 3 1 1 0 2 3 0 1 0 1 2 3 3 1 1 0 0 2 2 3 3 0 0 0 3 1 2 2 1 0 0 0 0 0 3\n",
" 2 0 3 0 0 0 0 1 3 3 1 0 1 2 0 1 2 1 3 3 3 1 2 0 0 0 1 1 1 3 1 1 2 1 1 3 1\n",
" 3 0 0 2 0 3 0 0 1 0 1 3 2 1 1 2 3 0 2 3 2 3 0 3 1 3 3 3 2 1 0 3 3 1 3 3 3\n",
" 3 3 0 1 2 3 2 3 0 2 3 2 3 2 0 0 2 0 3 3 1 3 2 0 3 1 2 0 0 3 0 1 2 3 3 3 0\n",
" 1 0 0 3 3 0 1 2 2 0 3 3 2 3 1 3 3 0 2 1 2 2 0 0 0 3 3 3 1 0 1 0 2 3 2 0 2\n",
" 3 2 1 3 0 0 3 1 3 1 0 1 1 2 1 2 3 1 0 1 2 3 0 3 0 0 1 0 2 2 2 2 3 0 3 2 3\n",
" 3 3 3 3 1 2 0 3 2 3 3 0 2 3 1 3 3 3 1 0 2 3 0 0 2 3 2 1 2 2 1 3 0 3 1 3 0\n",
" 0 1 0 1 0 2 0 2 3 3 1 2 1 3 1 1 3 1 0 0 3 0 2 0 0 2 3 3 0 2 0 1 2 3 3 0 3\n",
" 0 2 0 0 3 3 0 2 1 2 3 2 1 0 1 3 1 0 3 1 0 0 3 2 3 2 0 3 2 0 1 2 3 2 1 0 0\n",
" 0 2 3 1 0 2 3 1 3 1 2 2 3 0 0 1 2 3 1]\n"
]
}
],
"source": [
"ypred_m1=m1.predict(x_test)\n",
"print(ypred_m1)"
]
},
{
"cell_type": "code",
"execution_count": 154,
"id": "8ed82313",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[124 7 1 0]\n",
" [ 13 72 33 0]\n",
" [ 0 25 69 26]\n",
" [ 0 0 4 126]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.91 0.94 0.92 132\n",
" 1 0.69 0.61 0.65 118\n",
" 2 0.64 0.57 0.61 120\n",
" 3 0.83 0.97 0.89 130\n",
"\n",
" accuracy 0.78 500\n",
" macro avg 0.77 0.77 0.77 500\n",
"weighted avg 0.77 0.78 0.77 500\n",
"\n"
]
}
],
"source": [
"ypred_m1=m1.predict(x_test)\n",
"eval_model(y_test,ypred_m1)"
]
},
{
"cell_type": "code",
"execution_count": 155,
"id": "a309d30d",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix,classification_report"
]
},
{
"cell_type": "code",
"execution_count": 156,
"id": "8c470b57",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[124 7 1 0]\n",
" [ 13 72 33 0]\n",
" [ 0 25 69 26]\n",
" [ 0 0 4 126]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.91 0.94 0.92 132\n",
" 1 0.69 0.61 0.65 118\n",
" 2 0.64 0.57 0.61 120\n",
" 3 0.83 0.97 0.89 130\n",
"\n",
" accuracy 0.78 500\n",
" macro avg 0.77 0.77 0.77 500\n",
"weighted avg 0.77 0.78 0.77 500\n",
"\n"
]
}
],
"source": [
"print(confusion_matrix(y_test,ypred_m1))\n",
"print(classification_report(y_test,ypred_m1))"
]
},
{
"cell_type": "code",
"execution_count": 157,
"id": "2899dae3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.782\n"
]
}
],
"source": [
"print('train score',m1.score(x_test,y_test))"
]
},
{
"cell_type": "markdown",
"id": "7c414d0b",
"metadata": {},
"source": [
"# (b)KNN classification"
]
},
{
"cell_type": "code",
"execution_count": 158,
"id": "15b82df3",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier\n"
]
},
{
"cell_type": "code",
"execution_count": 159,
"id": "43b4899b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"KNeighborsClassifier(n_neighbors=11)"
]
},
"execution_count": 159,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2=KNeighborsClassifier(n_neighbors=11)\n",
"m2.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 160,
"id": "1def1105",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.952\n",
"train score 0.938\n"
]
}
],
"source": [
"#Accuracy\n",
"print('train score',m2.score(x_train,y_train))\n",
"print('train score',m2.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": 161,
"id": "8ed4a6e8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0 2 1 3 1 1 2 0 3 1 0 1 2 3 2 2 3 3 1 0 0 1 1 2 0 1 3 2 2 0 0 0 3 0 1 1 2\n",
" 0 3 0 2 2 2 0 3 2 2 1 3 1 3 1 0 0 0 1 1 3 0 0 1 3 3 1 0 0 3 3 1 2 2 2 0 1\n",
" 2 0 0 3 2 1 3 2 1 0 1 3 1 3 3 0 3 3 2 1 3 2 2 3 1 1 0 0 1 0 0 3 2 0 1 1 0\n",
" 0 3 1 3 2 3 2 0 2 1 3 2 1 3 3 0 2 0 2 3 0 2 2 0 3 1 0 0 2 2 1 2 2 0 0 0 1\n",
" 1 2 3 1 1 0 2 2 0 1 0 2 2 3 3 2 1 0 1 2 2 3 3 0 1 0 3 1 1 2 1 0 0 0 0 0 3\n",
" 2 0 3 0 0 0 0 1 3 3 1 0 1 1 1 1 1 2 3 3 3 1 2 0 0 0 2 1 1 3 1 0 2 1 1 3 2\n",
" 3 0 0 2 1 3 0 1 2 0 2 3 2 0 1 3 3 0 1 3 2 3 0 3 1 2 3 3 2 1 1 3 3 1 3 3 3\n",
" 3 3 0 2 2 2 1 3 0 1 3 2 2 2 1 0 1 0 3 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 3 3 1\n",
" 1 0 1 3 3 0 1 2 2 0 3 3 2 3 2 3 2 0 2 1 1 1 0 0 0 3 2 3 1 0 1 0 1 2 3 0 3\n",
" 3 2 1 2 0 0 2 1 3 2 0 1 1 1 0 1 3 2 0 0 3 3 0 3 0 0 2 0 1 2 2 2 3 0 3 2 2\n",
" 3 3 3 2 1 1 0 3 1 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 2 1 1 2 1 2 1 3 1 2 0\n",
" 0 1 0 1 0 1 0 2 2 3 2 1 1 3 1 0 3 1 0 0 3 0 1 0 0 1 3 3 0 2 0 1 1 3 3 1 2\n",
" 0 2 0 0 3 3 0 2 2 1 3 1 2 0 1 3 1 0 3 1 0 0 3 2 3 2 0 2 1 0 1 2 3 2 1 1 0\n",
" 1 2 2 1 1 1 3 1 2 0 2 2 3 1 0 1 2 3 1]\n"
]
}
],
"source": [
"ypred_m2=m2.predict(x_test)\n",
"print(ypred_m2)"
]
},
{
"cell_type": "code",
"execution_count": 162,
"id": "30329e38",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix,classification_report"
]
},
{
"cell_type": "code",
"execution_count": 163,
"id": "676b2ec2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[127 5 0 0]\n",
" [ 4 113 1 0]\n",
" [ 0 11 106 3]\n",
" [ 0 0 7 123]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.97 0.96 0.97 132\n",
" 1 0.88 0.96 0.91 118\n",
" 2 0.93 0.88 0.91 120\n",
" 3 0.98 0.95 0.96 130\n",
"\n",
" accuracy 0.94 500\n",
" macro avg 0.94 0.94 0.94 500\n",
"weighted avg 0.94 0.94 0.94 500\n",
"\n"
]
}
],
"source": [
"print(confusion_matrix(y_test,ypred_m2))\n",
"print(classification_report(y_test,ypred_m2))"
]
},
{
"cell_type": "code",
"execution_count": 164,
"id": "fc497909",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.938\n"
]
}
],
"source": [
"print('train score',m2.score(x_test,y_test))"
]
},
{
"cell_type": "markdown",
"id": "c99e4956",
"metadata": {},
"source": [
"# # (c) SVM classifier with linear and rbf kernal\n"
]
},
{
"cell_type": "code",
"execution_count": 165,
"id": "9bf791f9",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.svm import SVC"
]
},
{
"cell_type": "code",
"execution_count": 166,
"id": "b65a6b55",
"metadata": {},
"outputs": [],
"source": [
"m3=SVC(kernel='linear')\n"
]
},
{
"cell_type": "code",
"execution_count": 167,
"id": "d459ed8e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SVC(kernel='linear')"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m3.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 168,
"id": "f77d04cd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"training score 0.992\n",
"testing score 0.97\n"
]
}
],
"source": [
"nscore(m3)"
]
},
{
"cell_type": "code",
"execution_count": 169,
"id": "03c26eff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.992\n",
"train score 0.97\n"
]
}
],
"source": [
"#Accuracy\n",
"print('train score',m3.score(x_train,y_train))\n",
"print('train score',m3.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": 170,
"id": "c9f4fdd1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0 2 1 3 1 1 2 0 3 1 0 1 2 3 3 2 3 3 1 0 0 2 1 2 0 1 3 2 2 0 0 0 3 0 1 1 2\n",
" 0 3 0 2 3 2 0 3 3 2 1 3 1 3 1 0 0 1 1 1 3 0 0 1 3 3 1 0 0 3 3 1 2 2 2 0 1\n",
" 2 0 1 3 2 2 3 2 1 0 1 3 1 3 3 0 3 3 2 1 3 2 2 3 1 1 0 0 1 0 1 3 2 0 1 1 0\n",
" 0 3 1 3 2 3 2 0 2 1 3 2 1 3 3 0 2 0 2 3 0 2 2 0 3 1 0 0 2 2 1 2 2 0 0 0 1\n",
" 1 2 3 1 1 0 2 2 0 1 0 2 2 3 3 3 1 0 1 2 2 3 3 0 1 0 3 1 1 2 1 0 0 0 0 0 3\n",
" 2 0 3 0 0 0 0 1 3 3 1 0 1 1 1 1 2 2 3 3 3 1 2 0 0 0 2 1 1 3 1 1 2 1 1 3 2\n",
" 3 0 0 2 1 3 0 1 2 0 2 3 2 0 1 3 3 0 1 3 3 3 0 3 1 2 3 3 2 1 0 3 3 1 3 3 3\n",
" 3 3 0 1 2 2 2 3 0 2 3 2 2 2 1 0 2 0 3 3 1 3 1 1 3 1 2 0 0 3 0 1 2 3 3 3 1\n",
" 1 0 1 3 3 0 1 2 2 0 3 3 2 3 2 3 2 0 2 1 1 1 0 0 0 3 3 3 1 0 1 0 1 2 3 0 3\n",
" 3 2 1 3 0 0 2 1 3 2 0 1 1 1 1 1 3 2 0 0 3 3 0 3 0 0 2 0 1 2 2 2 3 0 3 2 3\n",
" 3 3 3 2 1 1 0 3 1 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 2 1 1 2 1 3 0 3 1 2 0\n",
" 0 1 0 1 0 1 0 2 2 3 2 1 1 2 1 1 3 1 0 0 3 0 1 0 0 2 3 3 0 2 0 1 1 3 3 1 2\n",
" 0 2 0 0 3 3 0 2 2 2 3 1 2 0 1 3 1 0 3 1 0 0 3 2 3 2 0 2 1 0 1 2 3 2 1 1 0\n",
" 1 2 2 1 0 1 3 1 2 0 2 2 3 0 0 1 2 3 1]\n"
]
}
],
"source": [
"ypred_m3=m3.predict(x_test)\n",
"print(ypred_m3)"
]
},
{
"cell_type": "code",
"execution_count": 171,
"id": "05b042e0",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix,classification_report"
]
},
{
"cell_type": "code",
"execution_count": 172,
"id": "d8b9303d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[127 5 0 0]\n",
" [ 1 117 0 0]\n",
" [ 0 3 112 5]\n",
" [ 0 0 1 129]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.99 0.96 0.98 132\n",
" 1 0.94 0.99 0.96 118\n",
" 2 0.99 0.93 0.96 120\n",
" 3 0.96 0.99 0.98 130\n",
"\n",
" accuracy 0.97 500\n",
" macro avg 0.97 0.97 0.97 500\n",
"weighted avg 0.97 0.97 0.97 500\n",
"\n"
]
}
],
"source": [
"print(confusion_matrix(y_test,ypred_m3))\n",
"print(classification_report(y_test,ypred_m3))"
]
},
{
"cell_type": "code",
"execution_count": 173,
"id": "1ea71973",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.97\n"
]
}
],
"source": [
"print('train score',m3.score(x_test,y_test))"
]
},
{
"cell_type": "markdown",
"id": "278a84c7",
"metadata": {},
"source": [
"# (d)Decision Tree Classifier\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 174,
"id": "961480bf",
"metadata": {},
"outputs": [],
"source": [
"#(d)Decision Tree Classifier\n",
"from sklearn.tree import DecisionTreeClassifier\n"
]
},
{
"cell_type": "code",
"execution_count": 175,
"id": "ddb57ea4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(criterion='entropy', max_depth=5)"
]
},
"execution_count": 175,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m4 = DecisionTreeClassifier(criterion='entropy',max_depth=5)\n",
"m4.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 176,
"id": "f09f8a74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"training score 0.8793333333333333\n",
"testing score 0.826\n"
]
}
],
"source": [
"nscore(m4)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"id": "5f52e8f9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[121 11 0 0]\n",
" [ 11 97 10 0]\n",
" [ 0 19 82 19]\n",
" [ 0 0 17 113]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.92 0.92 0.92 132\n",
" 1 0.76 0.82 0.79 118\n",
" 2 0.75 0.68 0.72 120\n",
" 3 0.86 0.87 0.86 130\n",
"\n",
" accuracy 0.83 500\n",
" macro avg 0.82 0.82 0.82 500\n",
"weighted avg 0.83 0.83 0.83 500\n",
"\n"
]
}
],
"source": [
"ypred_m4=m4.predict(x_test)\n",
"eval_model(y_test,ypred_m4)"
]
},
{
"cell_type": "code",
"execution_count": 178,
"id": "3b2cea88",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.8793333333333333\n",
"train score 0.826\n"
]
}
],
"source": [
"#Accuracy\n",
"print('train score',m4.score(x_train,y_train))\n",
"print('train score',m4.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": 179,
"id": "e6ba891e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0 1 1 3 1 1 2 0 2 1 0 1 2 3 2 2 3 3 1 0 0 2 1 2 0 1 2 2 2 0 0 0 3 0 1 1 3\n",
" 0 3 0 1 3 2 0 2 2 2 1 3 1 3 1 0 0 1 1 1 2 0 0 0 3 3 1 0 0 3 3 1 2 1 2 0 1\n",
" 3 0 1 3 2 1 3 2 1 0 2 3 2 3 3 0 2 3 1 1 3 2 2 3 1 1 0 0 0 0 0 3 2 0 1 1 0\n",
" 0 2 1 2 2 2 3 0 2 1 3 1 1 3 3 0 3 0 2 3 0 2 2 0 2 1 0 0 2 3 1 3 3 0 0 0 1\n",
" 2 3 3 2 0 0 2 2 0 2 0 1 2 3 2 3 1 0 0 2 2 3 3 1 1 0 3 1 1 2 1 0 0 0 0 0 3\n",
" 2 0 3 0 0 0 0 1 3 2 2 0 0 1 1 1 2 2 2 3 3 1 2 0 0 0 2 1 1 3 1 1 2 1 1 3 2\n",
" 3 0 0 1 1 3 0 0 1 0 2 3 2 1 1 3 3 0 1 3 3 3 0 3 1 2 3 3 2 1 1 3 3 1 3 3 3\n",
" 3 3 0 1 2 3 1 3 0 1 3 2 2 2 1 0 1 0 2 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 3 3 1\n",
" 1 0 1 3 3 0 2 2 2 0 3 3 2 3 2 3 2 0 2 1 1 1 0 0 0 3 2 3 2 0 1 0 2 3 3 1 2\n",
" 3 2 1 3 0 0 3 1 3 2 0 1 1 1 0 1 3 1 0 0 3 3 0 3 0 0 2 0 1 2 2 2 3 0 3 2 2\n",
" 3 3 3 2 1 1 0 3 1 3 3 0 2 3 1 3 3 3 0 0 2 3 0 0 2 3 1 1 1 2 2 3 1 3 2 2 0\n",
" 1 1 0 1 0 1 0 2 2 3 2 1 1 3 1 1 3 1 0 0 3 0 1 0 0 1 3 3 0 2 0 1 1 3 3 0 3\n",
" 1 2 0 0 3 3 0 1 2 2 3 1 2 0 1 3 1 0 3 2 0 0 3 2 3 2 0 3 1 0 1 2 3 2 1 0 0\n",
" 1 2 1 1 1 1 3 1 2 0 3 3 3 0 0 0 2 3 1]\n"
]
}
],
"source": [
"ypred_m4=m4.predict(x_test)\n",
"print(ypred_m4)"
]
},
{
"cell_type": "code",
"execution_count": 180,
"id": "ff999dc0",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix,classification_report"
]
},
{
"cell_type": "code",
"execution_count": 181,
"id": "d37a464e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[121 11 0 0]\n",
" [ 11 97 10 0]\n",
" [ 0 19 82 19]\n",
" [ 0 0 17 113]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.92 0.92 0.92 132\n",
" 1 0.76 0.82 0.79 118\n",
" 2 0.75 0.68 0.72 120\n",
" 3 0.86 0.87 0.86 130\n",
"\n",
" accuracy 0.83 500\n",
" macro avg 0.82 0.82 0.82 500\n",
"weighted avg 0.83 0.83 0.83 500\n",
"\n"
]
}
],
"source": [
"print(confusion_matrix(y_test,ypred_m4))\n",
"print(classification_report(y_test,ypred_m4))"
]
},
{
"cell_type": "code",
"execution_count": 182,
"id": "0a1eb1ab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.826\n"
]
}
],
"source": [
"print('train score',m4.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": 183,
"id": "d392f771",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',\n",
" 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',\n",
" 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',\n",
" 'touch_screen', 'wifi'],\n",
" dtype='object')\n",
"['0', '1']\n"
]
}
],
"source": [
"fn = x_train.columns\n",
"cn = ['0','1']\n",
"print(fn)\n",
"print(cn)"
]
},
{
"cell_type": "markdown",
"id": "3c95769b",
"metadata": {},
"source": [
"# (e)Random forest Classifier\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 184,
"id": "5118604b",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 185,
"id": "ba453d34",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestClassifier(criterion='entropy', max_depth=7, n_estimators=80)"
]
},
"execution_count": 185,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m5=RandomForestClassifier(n_estimators=80,criterion='entropy',max_depth=7)\n",
"m5.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 186,
"id": "fe2aee8d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"training score 0.9713333333333334\n",
"testing score 0.874\n"
]
}
],
"source": [
"nscore(m5)"
]
},
{
"cell_type": "code",
"execution_count": 187,
"id": "82f28d7b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[124 8 0 0]\n",
" [ 10 97 11 0]\n",
" [ 0 14 89 17]\n",
" [ 0 0 3 127]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.93 0.94 0.93 132\n",
" 1 0.82 0.82 0.82 118\n",
" 2 0.86 0.74 0.80 120\n",
" 3 0.88 0.98 0.93 130\n",
"\n",
" accuracy 0.87 500\n",
" macro avg 0.87 0.87 0.87 500\n",
"weighted avg 0.87 0.87 0.87 500\n",
"\n"
]
}
],
"source": [
"ypred_m5=m5.predict(x_test)\n",
"eval_model(y_test,ypred_m5)"
]
},
{
"cell_type": "code",
"execution_count": 188,
"id": "a299dc61",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.9713333333333334\n",
"train score 0.874\n"
]
}
],
"source": [
"#Accuracy\n",
"print('train score',m5.score(x_train,y_train))\n",
"print('train score',m5.score(x_test,y_test))"
]
},
{
"cell_type": "code",
"execution_count": 189,
"id": "a021aa67",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0 2 1 3 1 2 2 0 3 1 0 1 2 3 3 2 3 3 1 0 0 1 1 2 0 1 3 2 2 0 0 0 3 0 1 1 3\n",
" 0 3 0 2 3 2 0 3 3 2 1 3 1 3 1 0 0 1 0 1 3 0 0 0 3 3 1 0 0 3 3 1 2 2 2 0 1\n",
" 3 0 0 3 2 2 3 2 1 0 1 3 2 3 3 0 3 3 2 1 3 2 2 3 2 1 0 0 1 0 0 3 2 0 1 1 0\n",
" 0 3 1 3 2 3 3 0 2 1 3 3 1 3 3 0 3 1 2 3 0 2 2 0 3 1 0 0 2 3 0 2 3 0 0 0 1\n",
" 2 2 3 1 1 0 2 2 0 1 0 2 2 3 3 3 1 0 0 2 2 3 3 1 1 0 3 1 1 2 1 0 0 0 0 0 3\n",
" 2 0 3 0 0 0 0 1 3 3 1 0 1 2 1 1 2 2 3 3 3 1 2 0 0 0 2 1 1 3 1 0 2 2 1 3 1\n",
" 3 0 0 2 1 3 0 0 1 0 1 3 2 0 1 3 3 0 1 3 3 3 0 3 1 2 3 3 3 1 1 3 3 1 3 3 3\n",
" 3 3 0 1 2 2 2 3 0 2 3 2 3 2 1 0 2 0 3 3 1 3 1 0 3 1 2 0 0 3 0 1 2 3 3 3 1\n",
" 1 0 1 3 3 0 1 1 2 0 3 3 2 3 1 3 2 0 2 1 2 1 0 0 0 3 3 3 1 0 1 1 2 2 2 0 3\n",
" 3 2 1 3 0 0 3 1 3 2 0 1 1 2 1 1 3 1 0 0 3 3 0 3 0 0 1 0 0 2 2 2 3 0 3 2 2\n",
" 3 3 3 2 1 2 0 3 2 3 3 0 2 3 2 3 3 3 0 0 2 3 0 0 2 3 1 1 1 2 1 2 0 3 1 2 0\n",
" 0 1 0 1 0 2 1 2 2 3 2 1 1 3 1 0 3 1 0 0 3 0 1 0 0 1 3 3 0 2 1 1 1 3 3 0 2\n",
" 0 2 0 0 3 3 0 2 2 1 3 1 1 0 1 3 1 0 3 1 0 0 3 2 3 2 0 2 0 0 1 2 3 2 1 1 0\n",
" 1 2 2 1 1 1 3 1 2 0 3 3 3 0 0 1 2 3 1]\n"
]
}
],
"source": [
"ypred_m5=m5.predict(x_test)\n",
"print(ypred_m5)"
]
},
{
"cell_type": "code",
"execution_count": 190,
"id": "c277f0cc",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix,classification_report"
]
},
{
"cell_type": "code",
"execution_count": 191,
"id": "5b18949e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[124 8 0 0]\n",
" [ 10 97 11 0]\n",
" [ 0 14 89 17]\n",
" [ 0 0 3 127]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.93 0.94 0.93 132\n",
" 1 0.82 0.82 0.82 118\n",
" 2 0.86 0.74 0.80 120\n",
" 3 0.88 0.98 0.93 130\n",
"\n",
" accuracy 0.87 500\n",
" macro avg 0.87 0.87 0.87 500\n",
"weighted avg 0.87 0.87 0.87 500\n",
"\n"
]
}
],
"source": [
"print(confusion_matrix(y_test,ypred_m5))\n",
"print(classification_report(y_test,ypred_m5))"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "7825a333",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train score 0.874\n"
]
}
],
"source": [
"print('train score',m5.score(x_test,y_test))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.7 (tags/v3.10.7:6cc6b13, Sep 5 2022, 14:08:36) [MSC v.1933 64 bit (AMD64)]"
},
"vscode": {
"interpreter": {
"hash": "22a7a93b3c39d93f69fcd4b86203bbf6ef8378a8960b182d8683d26141f52fbf"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}