diff --git "a/Prediction_MODEL_FInal.ipynb" "b/Prediction_MODEL_FInal.ipynb" new file mode 100644--- /dev/null +++ "b/Prediction_MODEL_FInal.ipynb" @@ -0,0 +1,6656 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ff58dba0", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np \n", + "import pandas as pd \n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "26da1e5b", + "metadata": {}, + "outputs": [], + "source": [ + "df= pd.read_csv(\"laptop_data.csv\") #importing dataset " + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "99d24ad7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPrice
00AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8GB128GB SSDIntel Iris Plus Graphics 640macOS1.37kg71378.6832
11AppleUltrabook13.31440x900Intel Core i5 1.8GHz8GB128GB Flash StorageIntel HD Graphics 6000macOS1.34kg47895.5232
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Company TypeName Inches ScreenResolution \\\n", + "0 0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "1 1 Apple Ultrabook 13.3 1440x900 \n", + "\n", + " Cpu Ram Memory \\\n", + "0 Intel Core i5 2.3GHz 8GB 128GB SSD \n", + "1 Intel Core i5 1.8GHz 8GB 128GB Flash Storage \n", + "\n", + " Gpu OpSys Weight Price \n", + "0 Intel Iris Plus Graphics 640 macOS 1.37kg 71378.6832 \n", + "1 Intel HD Graphics 6000 macOS 1.34kg 47895.5232 " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "42938375", + "metadata": {}, + "source": [ + "# Here are some columns which we do not need but at the time of data cleaning we will be removing those columns too.\n", + "## STAGE:-\n", + "### 1.Data Cleaning\n", + "### 2. Feature Engineering\n", + "### 3. EDA (Exploratory Data Analysis\n", + "### 4. Selecting The Best Model\n", + "### 5. Making Website For The Model\n", + "### 6. Finally Deployment Of It. " + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "5613e1f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1303, 12)" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape #rows and columns in dara " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "22d3ae32", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1303 entries, 0 to 1302\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Unnamed: 0 1303 non-null int64 \n", + " 1 Company 1303 non-null object \n", + " 2 TypeName 1303 non-null object \n", + " 3 Inches 1303 non-null float64\n", + " 4 ScreenResolution 1303 non-null object \n", + " 5 Cpu 1303 non-null object \n", + " 6 Ram 1303 non-null object \n", + " 7 Memory 1303 non-null object \n", + " 8 Gpu 1303 non-null object \n", + " 9 OpSys 1303 non-null object \n", + " 10 Weight 1303 non-null object \n", + " 11 Price 1303 non-null float64\n", + "dtypes: float64(2), int64(1), object(9)\n", + "memory usage: 122.3+ KB\n" + ] + } + ], + "source": [ + "df.info() #information of the each column in the database " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "75d135ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().sum() #to check the duplicate values in the dataset " + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "534db510", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Unnamed: 0 0\n", + "Company 0\n", + "TypeName 0\n", + "Inches 0\n", + "ScreenResolution 0\n", + "Cpu 0\n", + "Ram 0\n", + "Memory 0\n", + "Gpu 0\n", + "OpSys 0\n", + "Weight 0\n", + "Price 0\n", + "dtype: int64" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum() #to check the missing values of the dataset " + ] + }, + { + "cell_type": "markdown", + "id": "53efc5c7", + "metadata": {}, + "source": [ + "# DATA CLEANING SECTION" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "b1291b3d", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(columns=[\"Unnamed: 0\"],inplace= True) " + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "0419494e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPrice
0AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8GB128GB SSDIntel Iris Plus Graphics 640macOS1.37kg71378.6832
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches ScreenResolution \\\n", + "0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "\n", + " Cpu Ram Memory Gpu OpSys \\\n", + "0 Intel Core i5 2.3GHz 8GB 128GB SSD Intel Iris Plus Graphics 640 macOS \n", + "\n", + " Weight Price \n", + "0 1.37kg 71378.6832 " + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1) #Unnamed column has been droppped" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "2edf5488", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Ram\"]= df[\"Ram\"].str.replace(\"GB\",\"\") #Removed the Gb from the Ram " + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "dad4e31c", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Weight\"]= df[\"Weight\"].str.replace(\"kg\",\"\") #Removed the Weight from the Weight column" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "6c283cae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPrice
0AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.6832
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches ScreenResolution \\\n", + "0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "\n", + " Cpu Ram Memory Gpu OpSys \\\n", + "0 Intel Core i5 2.3GHz 8 128GB SSD Intel Iris Plus Graphics 640 macOS \n", + "\n", + " Weight Price \n", + "0 1.37 71378.6832 " + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "10bd9666", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Ram\"]= df[\"Ram\"].astype(\"int32\") #Converted Ram dtype from object to int 32" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "e2c59a02", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Weight\"]= df[\"Weight\"].astype(\"float32\") #Converted Weight dtype from object to int float" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "759b1a63", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1303 entries, 0 to 1302\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Company 1303 non-null object \n", + " 1 TypeName 1303 non-null object \n", + " 2 Inches 1303 non-null float64\n", + " 3 ScreenResolution 1303 non-null object \n", + " 4 Cpu 1303 non-null object \n", + " 5 Ram 1303 non-null int32 \n", + " 6 Memory 1303 non-null object \n", + " 7 Gpu 1303 non-null object \n", + " 8 OpSys 1303 non-null object \n", + " 9 Weight 1303 non-null float32\n", + " 10 Price 1303 non-null float64\n", + "dtypes: float32(1), float64(2), int32(1), object(7)\n", + "memory usage: 101.9+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "d4b85288", + "metadata": {}, + "source": [ + "# EDA OR ANALYSIS ON THE DATA\n", + "## UNIVARIATE" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "cdab8319", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\941010651.py:1: UserWarning: \n", + "\n", + "`distplot` is a deprecated function and will be removed in seaborn v0.14.0.\n", + "\n", + "Please adapt your code to use either `displot` (a figure-level function with\n", + "similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "For a guide to updating your code to use the new functions, please see\n", + "https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751\n", + "\n", + " sns.distplot(df[\"Price\"])\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df[\"Price\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "dc3683af", + "metadata": {}, + "outputs": [], + "source": [ + "#There are few laptops which have higher prices but so many with lower prices ##OR YOU CAN SAY DATA IS SKEWED " + ] + }, + { + "cell_type": "markdown", + "id": "5836c4c2", + "metadata": {}, + "source": [ + "# Column Company\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "474bf90e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[\"Company\"].value_counts().plot(kind=\"bar\") #To see how many laptops are there of each company " + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "cf2138b3", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"Company\"], y=df[\"Price\"])\n", + "plt.xticks(rotation='vertical')\n", + "plt.show() # to see the avaerage price of a company, this indicates that the company also effects the price " + ] + }, + { + "cell_type": "markdown", + "id": "be837b41", + "metadata": {}, + "source": [ + "# Column TypeName" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "db90287e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[\"TypeName\"].value_counts().plot(kind=\"bar\") #To see the how many types of laptop we have " + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "e365cdbe", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"TypeName\"], y=df[\"Price\"])\n", + "plt.xticks(rotation='vertical') \n", + "plt.show() # To See the average value of the TypeName" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "1dfe2363", + "metadata": {}, + "outputs": [], + "source": [ + "#laptop which you want to buy makes the big diffeence in price " + ] + }, + { + "cell_type": "markdown", + "id": "83161cb3", + "metadata": {}, + "source": [ + "# Column Inches" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "fa51b837", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\4187967466.py:1: UserWarning: \n", + "\n", + "`distplot` is a deprecated function and will be removed in seaborn v0.14.0.\n", + "\n", + "Please adapt your code to use either `displot` (a figure-level function with\n", + "similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "For a guide to updating your code to use the new functions, please see\n", + "https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751\n", + "\n", + " sns.distplot(df[\"Inches\"])\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df[\"Inches\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "c8dbba0f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x=df[\"Inches\"],y=df[\"Price\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "f77a4a40", + "metadata": {}, + "outputs": [], + "source": [ + "# Scatter plot shows that as the laptop size is increasing the price is also increasing /\n", + "#it was not a strong correleation but a minor one" + ] + }, + { + "cell_type": "markdown", + "id": "e862e1c9", + "metadata": {}, + "source": [ + "# Column ScreenResolution" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "e4b216d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPrice
0AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.6832
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches ScreenResolution \\\n", + "0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "\n", + " Cpu Ram Memory Gpu OpSys \\\n", + "0 Intel Core i5 2.3GHz 8 128GB SSD Intel Iris Plus Graphics 640 macOS \n", + "\n", + " Weight Price \n", + "0 1.37 71378.6832 " + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "5a2324e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Full HD 1920x1080 507\n", + "1366x768 281\n", + "IPS Panel Full HD 1920x1080 230\n", + "IPS Panel Full HD / Touchscreen 1920x1080 53\n", + "Full HD / Touchscreen 1920x1080 47\n", + "1600x900 23\n", + "Touchscreen 1366x768 16\n", + "Quad HD+ / Touchscreen 3200x1800 15\n", + "IPS Panel 4K Ultra HD 3840x2160 12\n", + "IPS Panel 4K Ultra HD / Touchscreen 3840x2160 11\n", + "4K Ultra HD / Touchscreen 3840x2160 10\n", + "4K Ultra HD 3840x2160 7\n", + "Touchscreen 2560x1440 7\n", + "IPS Panel 1366x768 7\n", + "IPS Panel Quad HD+ / Touchscreen 3200x1800 6\n", + "IPS Panel Retina Display 2560x1600 6\n", + "IPS Panel Retina Display 2304x1440 6\n", + "Touchscreen 2256x1504 6\n", + "IPS Panel Touchscreen 2560x1440 5\n", + "IPS Panel Retina Display 2880x1800 4\n", + "IPS Panel Touchscreen 1920x1200 4\n", + "1440x900 4\n", + "IPS Panel 2560x1440 4\n", + "IPS Panel Quad HD+ 2560x1440 3\n", + "Quad HD+ 3200x1800 3\n", + "1920x1080 3\n", + "Touchscreen 2400x1600 3\n", + "2560x1440 3\n", + "IPS Panel Touchscreen 1366x768 3\n", + "IPS Panel Touchscreen / 4K Ultra HD 3840x2160 2\n", + "IPS Panel Full HD 2160x1440 2\n", + "IPS Panel Quad HD+ 3200x1800 2\n", + "IPS Panel Retina Display 2736x1824 1\n", + "IPS Panel Full HD 1920x1200 1\n", + "IPS Panel Full HD 2560x1440 1\n", + "IPS Panel Full HD 1366x768 1\n", + "Touchscreen / Full HD 1920x1080 1\n", + "Touchscreen / Quad HD+ 3200x1800 1\n", + "Touchscreen / 4K Ultra HD 3840x2160 1\n", + "IPS Panel Touchscreen 2400x1600 1\n", + "Name: ScreenResolution, dtype: int64" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"ScreenResolution\"].value_counts() #data inside screenresolution and its value counts(occurence)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "a31dce70", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Touchscreen\"]=df[\"ScreenResolution\"].apply(lambda x: 1 if \"Touchscreen\" in x else 0) #Creating a new column only for those values wich have touch screen laptops" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "7b2f5f50", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPriceTouchscreen
1256AsusGaming17.3IPS Panel Full HD 1920x1080Intel Core i7 6700HQ 2.6GHz16128GB SSD + 1TB HDDNvidia GeForce GTX 970MWindows 104.00101232.000
37DellNotebook17.3IPS Panel Full HD 1920x1080Intel Core i5 8250U 1.6GHz8128GB SSD + 1TB HDDAMD Radeon 530Windows 102.8052161.120
420Lenovo2 in 1 Convertible15.6IPS Panel 4K Ultra HD / Touchscreen 3840x2160Intel Core i7 7700HQ 2.8GHz16512GB SSDNvidia GeForce GTX 1050Windows 102.00101178.721
945LenovoGaming15.6IPS Panel Full HD 1920x1080AMD FX 8800P 2.1GHz16512GB SSD + 1TB HDDAMD Radeon R9 M385Windows 102.5053226.720
1191Samsung2 in 1 Convertible12.3IPS Panel Touchscreen 2400x1600Samsung Cortex A72&A53 2.0GHz432GB Flash StorageARM Mali T860 MP4Chrome OS1.1535111.521
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches \\\n", + "1256 Asus Gaming 17.3 \n", + "37 Dell Notebook 17.3 \n", + "420 Lenovo 2 in 1 Convertible 15.6 \n", + "945 Lenovo Gaming 15.6 \n", + "1191 Samsung 2 in 1 Convertible 12.3 \n", + "\n", + " ScreenResolution \\\n", + "1256 IPS Panel Full HD 1920x1080 \n", + "37 IPS Panel Full HD 1920x1080 \n", + "420 IPS Panel 4K Ultra HD / Touchscreen 3840x2160 \n", + "945 IPS Panel Full HD 1920x1080 \n", + "1191 IPS Panel Touchscreen 2400x1600 \n", + "\n", + " Cpu Ram Memory \\\n", + "1256 Intel Core i7 6700HQ 2.6GHz 16 128GB SSD + 1TB HDD \n", + "37 Intel Core i5 8250U 1.6GHz 8 128GB SSD + 1TB HDD \n", + "420 Intel Core i7 7700HQ 2.8GHz 16 512GB SSD \n", + "945 AMD FX 8800P 2.1GHz 16 512GB SSD + 1TB HDD \n", + "1191 Samsung Cortex A72&A53 2.0GHz 4 32GB Flash Storage \n", + "\n", + " Gpu OpSys Weight Price Touchscreen \n", + "1256 Nvidia GeForce GTX 970M Windows 10 4.00 101232.00 0 \n", + "37 AMD Radeon 530 Windows 10 2.80 52161.12 0 \n", + "420 Nvidia GeForce GTX 1050 Windows 10 2.00 101178.72 1 \n", + "945 AMD Radeon R9 M385 Windows 10 2.50 53226.72 0 \n", + "1191 ARM Mali T860 MP4 Chrome OS 1.15 35111.52 1 " + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)\n", + "df.sample(2)\n", + "df.sample(5)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "190fb140", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[\"Touchscreen\"].value_counts().plot(kind=\"bar\") # TO see how many laptops are touchscreen and how many are not " + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "59a1c9a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"Touchscreen\"],y=df[\"Price\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "72f0f94e", + "metadata": {}, + "outputs": [], + "source": [ + "#It shows that touchscreen laptops have higher price than non touch screen laptops" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "44d0bcbf", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"IPS\"]=df[\"ScreenResolution\"].apply(lambda x: 1 if \"IPS\" in x else 0) #Creating a new column only for those values wich have IPS in laptops" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "8acf76ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPS
0AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201
1AppleUltrabook13.31440x900Intel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.523200
2HPNotebook15.6Full HD 1920x1080Intel Core i5 7200U 2.5GHz8256GB SSDIntel HD Graphics 620No OS1.8630636.000000
3AppleUltrabook15.4IPS Panel Retina Display 2880x1800Intel Core i7 2.7GHz16512GB SSDAMD Radeon Pro 455macOS1.83135195.336001
4AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 3.1GHz8256GB SSDIntel Iris Plus Graphics 650macOS1.3796095.808001
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches ScreenResolution \\\n", + "0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "1 Apple Ultrabook 13.3 1440x900 \n", + "2 HP Notebook 15.6 Full HD 1920x1080 \n", + "3 Apple Ultrabook 15.4 IPS Panel Retina Display 2880x1800 \n", + "4 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "\n", + " Cpu Ram Memory \\\n", + "0 Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "2 Intel Core i5 7200U 2.5GHz 8 256GB SSD \n", + "3 Intel Core i7 2.7GHz 16 512GB SSD \n", + "4 Intel Core i5 3.1GHz 8 256GB SSD \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "2 Intel HD Graphics 620 No OS 1.86 30636.0000 0 0 \n", + "3 AMD Radeon Pro 455 macOS 1.83 135195.3360 0 1 \n", + "4 Intel Iris Plus Graphics 650 macOS 1.37 96095.8080 0 1 " + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "b623a7be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[\"IPS\"].value_counts().plot(kind=\"bar\") #To see how many laptops have iPS or not " + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "284afd9d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"IPS\"],y=df[\"Price\"]) #price distribution of IPS with price" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "1735fc69", + "metadata": {}, + "outputs": [], + "source": [ + "#IPS displays have higher price than non IPS devices " + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "e502a693", + "metadata": {}, + "outputs": [], + "source": [ + "# To extract the resolution into column X and Column Y \n", + "#for example 1280x460 ,X= 1280 and Y= 460\n" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "9cb9ee1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0IPS Panel Retina Display 25601600
11440900
2Full HD 19201080
3IPS Panel Retina Display 28801800
4IPS Panel Retina Display 25601600
.........
1298IPS Panel Full HD / Touchscreen 19201080
1299IPS Panel Quad HD+ / Touchscreen 32001800
13001366768
13011366768
13021366768
\n", + "

1303 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 IPS Panel Retina Display 2560 1600\n", + "1 1440 900\n", + "2 Full HD 1920 1080\n", + "3 IPS Panel Retina Display 2880 1800\n", + "4 IPS Panel Retina Display 2560 1600\n", + "... ... ...\n", + "1298 IPS Panel Full HD / Touchscreen 1920 1080\n", + "1299 IPS Panel Quad HD+ / Touchscreen 3200 1800\n", + "1300 1366 768\n", + "1301 1366 768\n", + "1302 1366 768\n", + "\n", + "[1303 rows x 2 columns]" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"ScreenResolution\"].str.split('x',n=1,expand=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "a2e83dc7", + "metadata": {}, + "outputs": [], + "source": [ + "new = df[\"ScreenResolution\"].str.split('x',n=1,expand=True) #made a new DataFrame from the column ScreenResolution" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "f4e35269", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"X_res\"] = new[0] #Storing Index oth value to the X_res and making new column in dbase\n", + "df[\"y_res\"]= new[1] #storing index 1st value to the Y_res and making new column in dbase" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "fd8477cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSX_resy_res
0AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201IPS Panel Retina Display 25601600
1AppleUltrabook13.31440x900Intel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.5232001440900
2HPNotebook15.6Full HD 1920x1080Intel Core i5 7200U 2.5GHz8256GB SSDIntel HD Graphics 620No OS1.8630636.000000Full HD 19201080
3AppleUltrabook15.4IPS Panel Retina Display 2880x1800Intel Core i7 2.7GHz16512GB SSDAMD Radeon Pro 455macOS1.83135195.336001IPS Panel Retina Display 28801800
4AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 3.1GHz8256GB SSDIntel Iris Plus Graphics 650macOS1.3796095.808001IPS Panel Retina Display 25601600
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches ScreenResolution \\\n", + "0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "1 Apple Ultrabook 13.3 1440x900 \n", + "2 HP Notebook 15.6 Full HD 1920x1080 \n", + "3 Apple Ultrabook 15.4 IPS Panel Retina Display 2880x1800 \n", + "4 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "\n", + " Cpu Ram Memory \\\n", + "0 Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "2 Intel Core i5 7200U 2.5GHz 8 256GB SSD \n", + "3 Intel Core i7 2.7GHz 16 512GB SSD \n", + "4 Intel Core i5 3.1GHz 8 256GB SSD \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "2 Intel HD Graphics 620 No OS 1.86 30636.0000 0 0 \n", + "3 AMD Radeon Pro 455 macOS 1.83 135195.3360 0 1 \n", + "4 Intel Iris Plus Graphics 650 macOS 1.37 96095.8080 0 1 \n", + "\n", + " X_res y_res \n", + "0 IPS Panel Retina Display 2560 1600 \n", + "1 1440 900 \n", + "2 Full HD 1920 1080 \n", + "3 IPS Panel Retina Display 2880 1800 \n", + "4 IPS Panel Retina Display 2560 1600 " + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "88b43c50", + "metadata": {}, + "outputs": [], + "source": [ + "#As you can seee our Y_res is perfect but there is a slight problem in X_res" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "6ed5b44e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [2560]\n", + "1 [1440]\n", + "2 [1920]\n", + "3 [2880]\n", + "4 [2560]\n", + " ... \n", + "1298 [1920]\n", + "1299 [3200]\n", + "1300 [1366]\n", + "1301 [1366]\n", + "1302 [1366]\n", + "Name: X_res, Length: 1303, dtype: object" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"X_res\"].str.replace(\",\",\"\").str.findall(r'(\\d+\\.?\\d+)') \n", + "#in This we are finding a pattern with the help of regular expression. Pattern is where we have 3 or 4 digits" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "ab707b53", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2560\n", + "1 1440\n", + "2 1920\n", + "3 2880\n", + "4 2560\n", + " ... \n", + "1298 1920\n", + "1299 3200\n", + "1300 1366\n", + "1301 1366\n", + "1302 1366\n", + "Name: X_res, Length: 1303, dtype: object" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"X_res\"].str.replace(\",\",\"\").str.findall(r'(\\d+\\.?\\d+)').apply(lambda x:x[0])\n", + "#removed it from list" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "06432c2e", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"X_res\"] =df[\"X_res\"].str.replace(\",\",\"\").str.findall(r'(\\d+\\.?\\d+)').apply(lambda x:x[0]) #removed it from list\n", + "#stored all this opeartions into the X_res" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "2c799a2f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesScreenResolutionCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSX_resy_res
0AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.68320125601600
1AppleUltrabook13.31440x900Intel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.5232001440900
2HPNotebook15.6Full HD 1920x1080Intel Core i5 7200U 2.5GHz8256GB SSDIntel HD Graphics 620No OS1.8630636.00000019201080
3AppleUltrabook15.4IPS Panel Retina Display 2880x1800Intel Core i7 2.7GHz16512GB SSDAMD Radeon Pro 455macOS1.83135195.33600128801800
4AppleUltrabook13.3IPS Panel Retina Display 2560x1600Intel Core i5 3.1GHz8256GB SSDIntel Iris Plus Graphics 650macOS1.3796095.80800125601600
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches ScreenResolution \\\n", + "0 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "1 Apple Ultrabook 13.3 1440x900 \n", + "2 HP Notebook 15.6 Full HD 1920x1080 \n", + "3 Apple Ultrabook 15.4 IPS Panel Retina Display 2880x1800 \n", + "4 Apple Ultrabook 13.3 IPS Panel Retina Display 2560x1600 \n", + "\n", + " Cpu Ram Memory \\\n", + "0 Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "2 Intel Core i5 7200U 2.5GHz 8 256GB SSD \n", + "3 Intel Core i7 2.7GHz 16 512GB SSD \n", + "4 Intel Core i5 3.1GHz 8 256GB SSD \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "2 Intel HD Graphics 620 No OS 1.86 30636.0000 0 0 \n", + "3 AMD Radeon Pro 455 macOS 1.83 135195.3360 0 1 \n", + "4 Intel Iris Plus Graphics 650 macOS 1.37 96095.8080 0 1 \n", + "\n", + " X_res y_res \n", + "0 2560 1600 \n", + "1 1440 900 \n", + "2 1920 1080 \n", + "3 2880 1800 \n", + "4 2560 1600 " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "e6c1658a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1303 entries, 0 to 1302\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Company 1303 non-null object \n", + " 1 TypeName 1303 non-null object \n", + " 2 Inches 1303 non-null float64\n", + " 3 ScreenResolution 1303 non-null object \n", + " 4 Cpu 1303 non-null object \n", + " 5 Ram 1303 non-null int32 \n", + " 6 Memory 1303 non-null object \n", + " 7 Gpu 1303 non-null object \n", + " 8 OpSys 1303 non-null object \n", + " 9 Weight 1303 non-null float32\n", + " 10 Price 1303 non-null float64\n", + " 11 Touchscreen 1303 non-null int64 \n", + " 12 IPS 1303 non-null int64 \n", + " 13 X_res 1303 non-null object \n", + " 14 y_res 1303 non-null object \n", + "dtypes: float32(1), float64(2), int32(1), int64(2), object(9)\n", + "memory usage: 142.6+ KB\n" + ] + } + ], + "source": [ + "df.info() #dtype ofX_res and Y_res is in object , we have to convert it into the Int" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "f2111a9e", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"X_res\"]= df[\"X_res\"].astype('int')\n", + "df[\"y_res\"]= df[\"y_res\"].astype('int')" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "687a56ed", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1303 entries, 0 to 1302\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Company 1303 non-null object \n", + " 1 TypeName 1303 non-null object \n", + " 2 Inches 1303 non-null float64\n", + " 3 ScreenResolution 1303 non-null object \n", + " 4 Cpu 1303 non-null object \n", + " 5 Ram 1303 non-null int32 \n", + " 6 Memory 1303 non-null object \n", + " 7 Gpu 1303 non-null object \n", + " 8 OpSys 1303 non-null object \n", + " 9 Weight 1303 non-null float32\n", + " 10 Price 1303 non-null float64\n", + " 11 Touchscreen 1303 non-null int64 \n", + " 12 IPS 1303 non-null int64 \n", + " 13 X_res 1303 non-null int32 \n", + " 14 y_res 1303 non-null int32 \n", + "dtypes: float32(1), float64(2), int32(3), int64(2), object(7)\n", + "memory usage: 132.5+ KB\n" + ] + } + ], + "source": [ + "df.info() #converted X_res , y-res from object to the int" + ] + }, + { + "cell_type": "markdown", + "id": "53aeb03b", + "metadata": {}, + "source": [ + "# Q:Why do we have splitted the X_res and y_res?\n", + "# Ans: Because it has the very high correaltion" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "50171724", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\4121284421.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " df.corr()[\"Price\"] #You can see reslution has the very high correlation with price\n" + ] + }, + { + "data": { + "text/plain": [ + "Inches 0.068197\n", + "Ram 0.743007\n", + "Weight 0.210370\n", + "Price 1.000000\n", + "Touchscreen 0.191226\n", + "IPS 0.252208\n", + "X_res 0.556529\n", + "y_res 0.552809\n", + "Name: Price, dtype: float64" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()[\"Price\"] #You can see reslution has the very high correlation with price " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "0a9a7d5c", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"ppi\"]= (((df[\"X_res\"]**2) + (df[\"y_res\"]**2))**0.5/df[\"Inches\"]).astype(\"float\")\n", + "#created a new columns with the help of the values x_res, y_res and Inches" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "49462faf", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\815546952.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " df.corr()['Price']\n" + ] + }, + { + "data": { + "text/plain": [ + "Inches 0.068197\n", + "Ram 0.743007\n", + "Weight 0.210370\n", + "Price 1.000000\n", + "Touchscreen 0.191226\n", + "IPS 0.252208\n", + "X_res 0.556529\n", + "y_res 0.552809\n", + "ppi 0.473487\n", + "Name: Price, dtype: float64" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "f869032b", + "metadata": {}, + "outputs": [], + "source": [ + "#now we have done and extracted all the values from the ScreenResolution column and now we are dropping it \n", + "df.drop(columns=[\"ScreenResolution\"],inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "6f11b626", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameInchesCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSX_resy_resppi
0AppleUltrabook13.3Intel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.68320125601600226.983005
1AppleUltrabook13.3Intel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.5232001440900127.677940
\n", + "
" + ], + "text/plain": [ + " Company TypeName Inches Cpu Ram Memory \\\n", + "0 Apple Ultrabook 13.3 Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Apple Ultrabook 13.3 Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "\n", + " X_res y_res ppi \n", + "0 2560 1600 226.983005 \n", + "1 1440 900 127.677940 " + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)\n", + "#also now we do not need Inches, X_res and Y_res because we have ppi \n", + "#so lets drop those columns too " + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "373c159d", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(columns=[\"Inches\"],inplace = True) #Dropped inches\n", + "df.drop(columns=[\"X_res\"],inplace = True) #Dropped X_res\n", + "df.drop(columns=[\"y_res\"],inplace = True) #Dropped y_res\n" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "aff1dca7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSppi
0AppleUltrabookIntel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201226.983005
1AppleUltrabookIntel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.523200127.677940
\n", + "
" + ], + "text/plain": [ + " Company TypeName Cpu Ram Memory \\\n", + "0 Apple Ultrabook Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Apple Ultrabook Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "\n", + " ppi \n", + "0 226.983005 \n", + "1 127.677940 " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "4609ebcd", + "metadata": {}, + "source": [ + "# Column Cpu" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "61a7a715", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSppi
0AppleUltrabookIntel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201226.983005
1AppleUltrabookIntel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.523200127.677940
\n", + "
" + ], + "text/plain": [ + " Company TypeName Cpu Ram Memory \\\n", + "0 Apple Ultrabook Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Apple Ultrabook Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "\n", + " ppi \n", + "0 226.983005 \n", + "1 127.677940 " + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "f3ce68a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Intel Core i5 7200U 2.5GHz 190\n", + "Intel Core i7 7700HQ 2.8GHz 146\n", + "Intel Core i7 7500U 2.7GHz 134\n", + "Intel Core i7 8550U 1.8GHz 73\n", + "Intel Core i5 8250U 1.6GHz 72\n", + " ... \n", + "Intel Core M M3-6Y30 0.9GHz 1\n", + "AMD A9-Series 9420 2.9GHz 1\n", + "Intel Core i3 6006U 2.2GHz 1\n", + "AMD A6-Series 7310 2GHz 1\n", + "Intel Xeon E3-1535M v6 3.1GHz 1\n", + "Name: Cpu, Length: 118, dtype: int64" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Cpu\"].value_counts() #to see the values in the Cpu column " + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "id": "f54606a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [Intel, Core, i5, 2.3GHz]\n", + "1 [Intel, Core, i5, 1.8GHz]\n", + "2 [Intel, Core, i5, 7200U, 2.5GHz]\n", + "3 [Intel, Core, i7, 2.7GHz]\n", + "4 [Intel, Core, i5, 3.1GHz]\n", + " ... \n", + "1298 [Intel, Core, i7, 6500U, 2.5GHz]\n", + "1299 [Intel, Core, i7, 6500U, 2.5GHz]\n", + "1300 [Intel, Celeron, Dual, Core, N3050, 1.6GHz]\n", + "1301 [Intel, Core, i7, 6500U, 2.5GHz]\n", + "1302 [Intel, Celeron, Dual, Core, N3050, 1.6GHz]\n", + "Name: Cpu, Length: 1303, dtype: object" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Cpu\"].apply(lambda x: x.split()) #Splitted all the values of column Cpu" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "c1843165", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Cpu_Name\"] =df[\"Cpu\"].apply(lambda x:\" \".join( x.split()[0:3])) \n", + "#Made a new column for first three words of CPu as CPU_Name" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "d9fd412c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSppiCpu_Name
0AppleUltrabookIntel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201226.983005Intel Core i5
1AppleUltrabookIntel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.523200127.677940Intel Core i5
\n", + "
" + ], + "text/plain": [ + " Company TypeName Cpu Ram Memory \\\n", + "0 Apple Ultrabook Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Apple Ultrabook Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "\n", + " ppi Cpu_Name \n", + "0 226.983005 Intel Core i5 \n", + "1 127.677940 Intel Core i5 " + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "59d70abb", + "metadata": {}, + "outputs": [], + "source": [ + "def fetch_processor(text):\n", + " if text == \"Intel Core i7\" or text == \"Intel Core i5\" or text == \"Intel Core i3\":\n", + " return text\n", + " else: \n", + " if text.split()[0] == \"Intel\":\n", + " return \"Some Other Processor\"\n", + " else:\n", + " return \"AMD Processor\"\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "1582f1ee", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Cpu brand\"] = df[\"Cpu_Name\"].apply(fetch_processor)" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "d9f7a84c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameCpuRamMemoryGpuOpSysWeightPriceTouchscreenIPSppiCpu_NameCpu brand
0AppleUltrabookIntel Core i5 2.3GHz8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201226.983005Intel Core i5Intel Core i5
1AppleUltrabookIntel Core i5 1.8GHz8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.523200127.677940Intel Core i5Intel Core i5
2HPNotebookIntel Core i5 7200U 2.5GHz8256GB SSDIntel HD Graphics 620No OS1.8630636.000000141.211998Intel Core i5Intel Core i5
3AppleUltrabookIntel Core i7 2.7GHz16512GB SSDAMD Radeon Pro 455macOS1.83135195.336001220.534624Intel Core i7Intel Core i7
4AppleUltrabookIntel Core i5 3.1GHz8256GB SSDIntel Iris Plus Graphics 650macOS1.3796095.808001226.983005Intel Core i5Intel Core i5
\n", + "
" + ], + "text/plain": [ + " Company TypeName Cpu Ram Memory \\\n", + "0 Apple Ultrabook Intel Core i5 2.3GHz 8 128GB SSD \n", + "1 Apple Ultrabook Intel Core i5 1.8GHz 8 128GB Flash Storage \n", + "2 HP Notebook Intel Core i5 7200U 2.5GHz 8 256GB SSD \n", + "3 Apple Ultrabook Intel Core i7 2.7GHz 16 512GB SSD \n", + "4 Apple Ultrabook Intel Core i5 3.1GHz 8 256GB SSD \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "0 Intel Iris Plus Graphics 640 macOS 1.37 71378.6832 0 1 \n", + "1 Intel HD Graphics 6000 macOS 1.34 47895.5232 0 0 \n", + "2 Intel HD Graphics 620 No OS 1.86 30636.0000 0 0 \n", + "3 AMD Radeon Pro 455 macOS 1.83 135195.3360 0 1 \n", + "4 Intel Iris Plus Graphics 650 macOS 1.37 96095.8080 0 1 \n", + "\n", + " ppi Cpu_Name Cpu brand \n", + "0 226.983005 Intel Core i5 Intel Core i5 \n", + "1 127.677940 Intel Core i5 Intel Core i5 \n", + "2 141.211998 Intel Core i5 Intel Core i5 \n", + "3 220.534624 Intel Core i7 Intel Core i7 \n", + "4 226.983005 Intel Core i5 Intel Core i5 " + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "d2995032", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df['Cpu brand'].value_counts().plot(kind=\"bar\")" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "2a9a3e34", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"Cpu brand\"],y=df[\"Price\"])\n", + "plt.xticks(rotation=\"vertical\")\n", + "plt.show()\n", + "\n", + "# shows which processor has the highest price and which doesnt \n", + "#in short it shows that the Cpu processor affects the price of the commodity " + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "1ff7c3e9", + "metadata": {}, + "outputs": [], + "source": [ + "#Since we have extracted all the values from cpu and cpu name , we cand rop them now \n", + "df.drop(columns=[\"Cpu\"],inplace = True) # DRopped Cpu column" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "8607e220", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(columns=[\"Cpu_Name\"],inplace = True) #droppped Cpu_name column " + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "d345c344", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamMemoryGpuOpSysWeightPriceTouchscreenIPSppiCpu brand
0AppleUltrabook8128GB SSDIntel Iris Plus Graphics 640macOS1.3771378.683201226.983005Intel Core i5
1AppleUltrabook8128GB Flash StorageIntel HD Graphics 6000macOS1.3447895.523200127.677940Intel Core i5
2HPNotebook8256GB SSDIntel HD Graphics 620No OS1.8630636.000000141.211998Intel Core i5
3AppleUltrabook16512GB SSDAMD Radeon Pro 455macOS1.83135195.336001220.534624Intel Core i7
4AppleUltrabook8256GB SSDIntel Iris Plus Graphics 650macOS1.3796095.808001226.983005Intel Core i5
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Memory Gpu \\\n", + "0 Apple Ultrabook 8 128GB SSD Intel Iris Plus Graphics 640 \n", + "1 Apple Ultrabook 8 128GB Flash Storage Intel HD Graphics 6000 \n", + "2 HP Notebook 8 256GB SSD Intel HD Graphics 620 \n", + "3 Apple Ultrabook 16 512GB SSD AMD Radeon Pro 455 \n", + "4 Apple Ultrabook 8 256GB SSD Intel Iris Plus Graphics 650 \n", + "\n", + " OpSys Weight Price Touchscreen IPS ppi Cpu brand \n", + "0 macOS 1.37 71378.6832 0 1 226.983005 Intel Core i5 \n", + "1 macOS 1.34 47895.5232 0 0 127.677940 Intel Core i5 \n", + "2 No OS 1.86 30636.0000 0 0 141.211998 Intel Core i5 \n", + "3 macOS 1.83 135195.3360 0 1 220.534624 Intel Core i7 \n", + "4 macOS 1.37 96095.8080 0 1 226.983005 Intel Core i5 " + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "16d30ff8", + "metadata": {}, + "source": [ + "# Column Ram" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "20ffbeb0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[\"Ram\"].value_counts().plot(kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "19936ffc", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"Ram\"],y=df[\"Price\"])\n", + "plt.xticks(rotation=\"vertical\")\n", + "plt.show()\n", + "\n", + "#This shows that as the Ram size is increasing the price is also increasing " + ] + }, + { + "cell_type": "markdown", + "id": "373713f1", + "metadata": {}, + "source": [ + "# Column Memory" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "35f4e020", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "256GB SSD 412\n", + "1TB HDD 223\n", + "500GB HDD 132\n", + "512GB SSD 118\n", + "128GB SSD + 1TB HDD 94\n", + "128GB SSD 76\n", + "256GB SSD + 1TB HDD 73\n", + "32GB Flash Storage 38\n", + "2TB HDD 16\n", + "64GB Flash Storage 15\n", + "512GB SSD + 1TB HDD 14\n", + "1TB SSD 14\n", + "256GB SSD + 2TB HDD 10\n", + "1.0TB Hybrid 9\n", + "256GB Flash Storage 8\n", + "16GB Flash Storage 7\n", + "32GB SSD 6\n", + "180GB SSD 5\n", + "128GB Flash Storage 4\n", + "512GB SSD + 2TB HDD 3\n", + "16GB SSD 3\n", + "512GB Flash Storage 2\n", + "1TB SSD + 1TB HDD 2\n", + "256GB SSD + 500GB HDD 2\n", + "128GB SSD + 2TB HDD 2\n", + "256GB SSD + 256GB SSD 2\n", + "512GB SSD + 256GB SSD 1\n", + "512GB SSD + 512GB SSD 1\n", + "64GB Flash Storage + 1TB HDD 1\n", + "1TB HDD + 1TB HDD 1\n", + "32GB HDD 1\n", + "64GB SSD 1\n", + "128GB HDD 1\n", + "240GB SSD 1\n", + "8GB SSD 1\n", + "508GB Hybrid 1\n", + "1.0TB HDD 1\n", + "512GB SSD + 1.0TB Hybrid 1\n", + "256GB SSD + 1.0TB Hybrid 1\n", + "Name: Memory, dtype: int64" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Memory\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "1a208037", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\4023190604.py:16: FutureWarning: The default value of regex will change from True to False in a future version.\n", + " df['first'] = df['first'].str.replace(r'\\D', '')\n", + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\4023190604.py:25: FutureWarning: The default value of regex will change from True to False in a future version.\n", + " df['second'] = df['second'].str.replace(r'\\D', '')\n" + ] + } + ], + "source": [ + "df['Memory'] = df['Memory'].astype(str).replace('\\.0', '', regex=True)\n", + "df[\"Memory\"] = df[\"Memory\"].str.replace('GB', '')\n", + "df[\"Memory\"] = df[\"Memory\"].str.replace('TB', '000')\n", + "new = df[\"Memory\"].str.split(\"+\", n = 1, expand = True)\n", + "\n", + "df[\"first\"]= new[0]\n", + "df[\"first\"]=df[\"first\"].str.strip()\n", + "\n", + "df[\"second\"]= new[1]\n", + "\n", + "df[\"Layer1HDD\"] = df[\"first\"].apply(lambda x: 1 if \"HDD\" in x else 0)\n", + "df[\"Layer1SSD\"] = df[\"first\"].apply(lambda x: 1 if \"SSD\" in x else 0)\n", + "df[\"Layer1Hybrid\"] = df[\"first\"].apply(lambda x: 1 if \"Hybrid\" in x else 0)\n", + "df[\"Layer1Flash_Storage\"] = df[\"first\"].apply(lambda x: 1 if \"Flash Storage\" in x else 0)\n", + "\n", + "df['first'] = df['first'].str.replace(r'\\D', '')\n", + "\n", + "df[\"second\"].fillna(\"0\", inplace = True)\n", + "\n", + "df[\"Layer2HDD\"] = df[\"second\"].apply(lambda x: 1 if \"HDD\" in x else 0)\n", + "df[\"Layer2SSD\"] = df[\"second\"].apply(lambda x: 1 if \"SSD\" in x else 0)\n", + "df[\"Layer2Hybrid\"] = df[\"second\"].apply(lambda x: 1 if \"Hybrid\" in x else 0)\n", + "df[\"Layer2Flash_Storage\"] = df[\"second\"].apply(lambda x: 1 if \"Flash Storage\" in x else 0)\n", + "\n", + "df['second'] = df['second'].str.replace(r'\\D', '')\n", + "\n", + "df[\"first\"] = df[\"first\"].astype(int)\n", + "df[\"second\"] = df[\"second\"].astype(int)\n", + "\n", + "df[\"HDD\"]=(df[\"first\"]*df[\"Layer1HDD\"]+df[\"second\"]*df[\"Layer2HDD\"])\n", + "df[\"SSD\"]=(df[\"first\"]*df[\"Layer1SSD\"]+df[\"second\"]*df[\"Layer2SSD\"])\n", + "df[\"Hybrid\"]=(df[\"first\"]*df[\"Layer1Hybrid\"]+df[\"second\"]*df[\"Layer2Hybrid\"])\n", + "df[\"Flash_Storage\"]=(df[\"first\"]*df[\"Layer1Flash_Storage\"]+df[\"second\"]*df[\"Layer2Flash_Storage\"])\n", + "\n", + "df.drop(columns=['first', 'second', 'Layer1HDD', 'Layer1SSD', 'Layer1Hybrid',\n", + " 'Layer1Flash_Storage', 'Layer2HDD', 'Layer2SSD', 'Layer2Hybrid',\n", + " 'Layer2Flash_Storage'],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "998133f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamMemoryGpuOpSysWeightPriceTouchscreenIPSppiCpu brandHDDSSDHybridFlash_Storage
746SamsungUltrabook16256 SSDIntel HD Graphics 620Windows 100.8187858.72000165.632118Intel Core i7025600
684Lenovo2 in 1 Convertible8512 SSDIntel UHD Graphics 620Windows 101.1979866.72011165.632118Intel Core i7051200
1130HPNotebook82000 HDDIntel HD Graphics 620Windows 102.0433513.12000100.454670Intel Core i72000000
1096DellNotebook81000 HDDAMD Radeon R7 M445Windows 102.3634035.26400141.211998Intel Core i51000000
556MediacomNotebook432 Flash StorageIntel HD GraphicsWindows 101.3513586.40001165.632118Some Other Processor00032
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Memory \\\n", + "746 Samsung Ultrabook 16 256 SSD \n", + "684 Lenovo 2 in 1 Convertible 8 512 SSD \n", + "1130 HP Notebook 8 2000 HDD \n", + "1096 Dell Notebook 8 1000 HDD \n", + "556 Mediacom Notebook 4 32 Flash Storage \n", + "\n", + " Gpu OpSys Weight Price Touchscreen IPS \\\n", + "746 Intel HD Graphics 620 Windows 10 0.81 87858.720 0 0 \n", + "684 Intel UHD Graphics 620 Windows 10 1.19 79866.720 1 1 \n", + "1130 Intel HD Graphics 620 Windows 10 2.04 33513.120 0 0 \n", + "1096 AMD Radeon R7 M445 Windows 10 2.36 34035.264 0 0 \n", + "556 Intel HD Graphics Windows 10 1.35 13586.400 0 1 \n", + "\n", + " ppi Cpu brand HDD SSD Hybrid Flash_Storage \n", + "746 165.632118 Intel Core i7 0 256 0 0 \n", + "684 165.632118 Intel Core i7 0 512 0 0 \n", + "1130 100.454670 Intel Core i7 2000 0 0 0 \n", + "1096 141.211998 Intel Core i5 1000 0 0 0 \n", + "556 165.632118 Some Other Processor 0 0 0 32 " + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "a0d78722", + "metadata": {}, + "outputs": [], + "source": [ + "#since i have extracted all the values from column memroy. now i can drop it \n", + "df.drop(columns=[\"Memory\"],inplace= True) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "58f9fe2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamGpuOpSysWeightPriceTouchscreenIPSppiCpu brandHDDSSDHybridFlash_Storage
0AppleUltrabook8Intel Iris Plus Graphics 640macOS1.3771378.683201226.983005Intel Core i5012800
1AppleUltrabook8Intel HD Graphics 6000macOS1.3447895.523200127.677940Intel Core i5000128
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Gpu OpSys Weight \\\n", + "0 Apple Ultrabook 8 Intel Iris Plus Graphics 640 macOS 1.37 \n", + "1 Apple Ultrabook 8 Intel HD Graphics 6000 macOS 1.34 \n", + "\n", + " Price Touchscreen IPS ppi Cpu brand HDD SSD Hybrid \\\n", + "0 71378.6832 0 1 226.983005 Intel Core i5 0 128 0 \n", + "1 47895.5232 0 0 127.677940 Intel Core i5 0 0 0 \n", + "\n", + " Flash_Storage \n", + "0 0 \n", + "1 128 " + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "id": "10c49486", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\1734695575.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " df.corr()[\"Price\"]\n" + ] + }, + { + "data": { + "text/plain": [ + "Ram 0.743007\n", + "Weight 0.210370\n", + "Price 1.000000\n", + "Touchscreen 0.191226\n", + "IPS 0.252208\n", + "ppi 0.473487\n", + "HDD -0.096441\n", + "SSD 0.670799\n", + "Hybrid 0.007989\n", + "Flash_Storage -0.040511\n", + "Name: Price, dtype: float64" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()[\"Price\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "e6cfc1d0", + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(columns=[\"Hybrid\"],inplace= True) #dropped column hybrid \n", + "df.drop(columns=[\"Flash_Storage\"],inplace= True) #dropped column Flash_Storage\n" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "id": "ab67cfe0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamGpuOpSysWeightPriceTouchscreenIPSppiCpu brandHDDSSD
0AppleUltrabook8Intel Iris Plus Graphics 640macOS1.3771378.683201226.983005Intel Core i50128
1AppleUltrabook8Intel HD Graphics 6000macOS1.3447895.523200127.677940Intel Core i500
2HPNotebook8Intel HD Graphics 620No OS1.8630636.000000141.211998Intel Core i50256
3AppleUltrabook16AMD Radeon Pro 455macOS1.83135195.336001220.534624Intel Core i70512
4AppleUltrabook8Intel Iris Plus Graphics 650macOS1.3796095.808001226.983005Intel Core i50256
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Gpu OpSys Weight \\\n", + "0 Apple Ultrabook 8 Intel Iris Plus Graphics 640 macOS 1.37 \n", + "1 Apple Ultrabook 8 Intel HD Graphics 6000 macOS 1.34 \n", + "2 HP Notebook 8 Intel HD Graphics 620 No OS 1.86 \n", + "3 Apple Ultrabook 16 AMD Radeon Pro 455 macOS 1.83 \n", + "4 Apple Ultrabook 8 Intel Iris Plus Graphics 650 macOS 1.37 \n", + "\n", + " Price Touchscreen IPS ppi Cpu brand HDD SSD \n", + "0 71378.6832 0 1 226.983005 Intel Core i5 0 128 \n", + "1 47895.5232 0 0 127.677940 Intel Core i5 0 0 \n", + "2 30636.0000 0 0 141.211998 Intel Core i5 0 256 \n", + "3 135195.3360 0 1 220.534624 Intel Core i7 0 512 \n", + "4 96095.8080 0 1 226.983005 Intel Core i5 0 256 " + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "5451c5a4", + "metadata": {}, + "source": [ + "# Column GPU" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "3d00793b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Intel HD Graphics 620 281\n", + "Intel HD Graphics 520 185\n", + "Intel UHD Graphics 620 68\n", + "Nvidia GeForce GTX 1050 66\n", + "Nvidia GeForce GTX 1060 48\n", + " ... \n", + "AMD Radeon R5 520 1\n", + "AMD Radeon R7 1\n", + "Intel HD Graphics 540 1\n", + "AMD Radeon 540 1\n", + "ARM Mali T860 MP4 1\n", + "Name: Gpu, Length: 110, dtype: int64" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Gpu\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "id": "834d01dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Intel\n", + "1 Intel\n", + "2 Intel\n", + "3 AMD\n", + "4 Intel\n", + " ... \n", + "1298 Intel\n", + "1299 Intel\n", + "1300 Intel\n", + "1301 AMD\n", + "1302 Intel\n", + "Name: Gpu, Length: 1303, dtype: object" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Gpu\"].apply(lambda x:x.split()[0]) #fetched the 0th index value from the graphics card that is company name" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "51a6d4eb", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Gpu brand\"] = df[\"Gpu\"].apply(lambda x:x.split()[0]) #made a new column of name Gpu Brand" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "8adfa3b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Intel 722\n", + "Nvidia 400\n", + "AMD 180\n", + "ARM 1\n", + "Name: Gpu brand, dtype: int64" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Gpu brand\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "99512b35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamGpuOpSysWeightPriceTouchscreenIPSppiCpu brandHDDSSDGpu brand
1191Samsung2 in 1 Convertible4ARM Mali T860 MP4Chrome OS1.1535111.5211234.5074AMD Processor00ARM
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Gpu OpSys Weight \\\n", + "1191 Samsung 2 in 1 Convertible 4 ARM Mali T860 MP4 Chrome OS 1.15 \n", + "\n", + " Price Touchscreen IPS ppi Cpu brand HDD SSD Gpu brand \n", + "1191 35111.52 1 1 234.5074 AMD Processor 0 0 ARM " + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"Gpu brand\"]== \"ARM\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "id": "5b083a8e", + "metadata": {}, + "outputs": [], + "source": [ + "df=df[df[\"Gpu brand\"]!= \"ARM\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "56e031b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Intel 722\n", + "Nvidia 400\n", + "AMD 180\n", + "Name: Gpu brand, dtype: int64" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Gpu brand\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "id": "0dde0c7f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHOCAYAAACFNj2zAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBHklEQVR4nO3df3RU9Z3/8deQkDFEMg3EZJyvgcaaRtIEV0MbAt1CCwSUkPrFLZbgCCsGNJaYNSmUdbWoJVR+u2SryKKgoHH7RVxbNAasskYIYGrU8Eu7RRI0IVSGCWCchHC/f7jcZQjiBYE7Ic/HOXNO5vN5z73vOyeHvPjcO3cchmEYAgAAwBl1s7sBAACAzoDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwIt7uBS8nx48f16aefqmfPnnI4HHa3AwAALDAMQ4cPH5bH41G3bl+9nkRoOo8+/fRTJSQk2N0GAAA4B/X19brqqqu+cp7QdB717NlT0pdvenR0tM3dAAAAK5qbm5WQkGD+Hf8qhKbz6MQpuejoaEITAACdzNddWsOF4AAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgQbjdDQAAgLNz77336sCBA5KkK664Qo899pjNHXUNhCYAADqZAwcOaP/+/Xa30eVweg4AAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFtgamo4dO6Z/+Zd/UWJioiIjI3X11Vfr4Ycf1vHjx80awzA0a9YseTweRUZGaujQodq+fXvQdgKBgKZNm6bY2FhFRUUpJydH+/btC6rx+Xzyer1yuVxyuVzyer06dOhQUE1dXZ3GjBmjqKgoxcbGqqCgQK2trRfs+AEAQOdha2h69NFH9cQTT6i0tFQ7d+7U3LlzNW/ePC1ZssSsmTt3rhYuXKjS0lJt27ZNbrdbI0aM0OHDh82awsJCrV27VmVlZaqsrNSRI0eUnZ2t9vZ2syY3N1c1NTUqLy9XeXm5ampq5PV6zfn29naNHj1aR48eVWVlpcrKyrRmzRoVFRVdnDcDAACENsNGo0ePNu64446gsbFjxxq33XabYRiGcfz4ccPtdhu//e1vzfkvvvjCcLlcxhNPPGEYhmEcOnTI6N69u1FWVmbWfPLJJ0a3bt2M8vJywzAMY8eOHYYko6qqyqzZvHmzIcnYtWuXYRiG8corrxjdunUzPvnkE7Pm+eefN5xOp+H3+y0dj9/vNyRZrgcA4FyMHz/e+MlPfmL85Cc/McaPH293O52e1b/ftq40/fCHP9Trr7+uDz/8UJL03nvvqbKyUjfddJMkac+ePWpsbFRWVpb5GqfTqSFDhmjTpk2SpOrqarW1tQXVeDwepaammjWbN2+Wy+VSRkaGWTNw4EC5XK6gmtTUVHk8HrNm5MiRCgQCqq6uPm3/gUBAzc3NQQ8AAHBpCrdz5zNmzJDf79e1116rsLAwtbe3a/bs2Ro/frwkqbGxUZIUHx8f9Lr4+Hjt3bvXrImIiFBMTEyHmhOvb2xsVFxcXIf9x8XFBdWcup+YmBhFRESYNaeaM2eOHnroobM9bAAA0AnZutL0wgsvaNWqVXruuef05z//WStXrtT8+fO1cuXKoDqHwxH03DCMDmOnOrXmdPXnUnOymTNnyu/3m4/6+voz9gQAADovW1eafvnLX+pXv/qVfv7zn0uS0tLStHfvXs2ZM0cTJ06U2+2W9OUq0JVXXmm+rqmpyVwVcrvdam1tlc/nC1ptampq0qBBg8ya/fv3d9j/gQMHgrazZcuWoHmfz6e2trYOK1AnOJ1OOZ3Ocz18AMA5GrxksN0t2MrZ7JRDX/6HvrG5scu/H29Pe/ui7MfWlabPP/9c3boFtxAWFmbeciAxMVFut1vr168351tbW7Vx40YzEKWnp6t79+5BNQ0NDaqtrTVrMjMz5ff7tXXrVrNmy5Yt8vv9QTW1tbVqaGgwayoqKuR0OpWenn6ejxwAAHQ2tq40jRkzRrNnz1afPn30ve99T++++64WLlyoO+64Q9KXp8sKCwtVUlKipKQkJSUlqaSkRD169FBubq4kyeVyafLkySoqKlLv3r3Vq1cvFRcXKy0tTcOHD5ck9evXT6NGjVJeXp6WLl0qSZoyZYqys7OVnJwsScrKylJKSoq8Xq/mzZungwcPqri4WHl5eYqOjrbh3QEAAKHE1tC0ZMkSPfDAA8rPz1dTU5M8Ho+mTp2qBx980KyZPn26WlpalJ+fL5/Pp4yMDFVUVKhnz55mzaJFixQeHq5x48appaVFw4YN04oVKxQWFmbWrF69WgUFBean7HJyclRaWmrOh4WFad26dcrPz9fgwYMVGRmp3NxczZ8//yK8EwAAINQ5DMMw7G7iUtHc3CyXyyW/38/qFABcQF39Gh7na045Wr68psmINBQYGbC5I3t902uarP795rvnAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMCCcLsbAAAAZ8eINE77My4sQhMAAJ1M649a7W6hS+L0HAAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAltD07e//W05HI4Oj3vuuUeSZBiGZs2aJY/Ho8jISA0dOlTbt28P2kYgENC0adMUGxurqKgo5eTkaN++fUE1Pp9PXq9XLpdLLpdLXq9Xhw4dCqqpq6vTmDFjFBUVpdjYWBUUFKi1tfWCHj8AAOg8bA1N27ZtU0NDg/lYv369JOlnP/uZJGnu3LlauHChSktLtW3bNrndbo0YMUKHDx82t1FYWKi1a9eqrKxMlZWVOnLkiLKzs9Xe3m7W5ObmqqamRuXl5SovL1dNTY28Xq85397ertGjR+vo0aOqrKxUWVmZ1qxZo6Kioov0TgAAgFDnMAzDsLuJEwoLC/XHP/5RH330kSTJ4/GosLBQM2bMkPTlqlJ8fLweffRRTZ06VX6/X1dccYWeffZZ3XrrrZKkTz/9VAkJCXrllVc0cuRI7dy5UykpKaqqqlJGRoYkqaqqSpmZmdq1a5eSk5P16quvKjs7W/X19fJ4PJKksrIyTZo0SU1NTYqOjrbUf3Nzs1wul/x+v+XXAADO3uAlg+1uASHk7Wlvf6PXW/37HTLXNLW2tmrVqlW644475HA4tGfPHjU2NiorK8uscTqdGjJkiDZt2iRJqq6uVltbW1CNx+NRamqqWbN582a5XC4zMEnSwIED5XK5gmpSU1PNwCRJI0eOVCAQUHV19Vf2HAgE1NzcHPQAAACXppAJTS+99JIOHTqkSZMmSZIaGxslSfHx8UF18fHx5lxjY6MiIiIUExNzxpq4uLgO+4uLiwuqOXU/MTExioiIMGtOZ86cOeZ1Ui6XSwkJCWdxxAAAoDMJmdC0fPly3XjjjUGrPZLkcDiCnhuG0WHsVKfWnK7+XGpONXPmTPn9fvNRX19/xr4AAEDnFRKhae/evdqwYYPuvPNOc8ztdktSh5WepqYmc1XI7XartbVVPp/vjDX79+/vsM8DBw4E1Zy6H5/Pp7a2tg4rUCdzOp2Kjo4OegAAgEtTSISmp59+WnFxcRo9erQ5lpiYKLfbbX6iTvryuqeNGzdq0KBBkqT09HR17949qKahoUG1tbVmTWZmpvx+v7Zu3WrWbNmyRX6/P6imtrZWDQ0NZk1FRYWcTqfS09MvzEEDAIBOJdzuBo4fP66nn35aEydOVHj4/7bjcDhUWFiokpISJSUlKSkpSSUlJerRo4dyc3MlSS6XS5MnT1ZRUZF69+6tXr16qbi4WGlpaRo+fLgkqV+/fho1apTy8vK0dOlSSdKUKVOUnZ2t5ORkSVJWVpZSUlLk9Xo1b948HTx4UMXFxcrLy2P1CAAASAqB0LRhwwbV1dXpjjvu6DA3ffp0tbS0KD8/Xz6fTxkZGaqoqFDPnj3NmkWLFik8PFzjxo1TS0uLhg0bphUrVigsLMysWb16tQoKCsxP2eXk5Ki0tNScDwsL07p165Sfn6/BgwcrMjJSubm5mj9//gU8cgAA0JmE1H2aOjvu0wQAFwf3acLJutx9mgAAAEIZoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYYHto+uSTT3Tbbbepd+/e6tGjh/7u7/5O1dXV5rxhGJo1a5Y8Ho8iIyM1dOhQbd++PWgbgUBA06ZNU2xsrKKiopSTk6N9+/YF1fh8Pnm9XrlcLrlcLnm9Xh06dCiopq6uTmPGjFFUVJRiY2NVUFCg1tbWC3bsAACg87A1NPl8Pg0ePFjdu3fXq6++qh07dmjBggX61re+ZdbMnTtXCxcuVGlpqbZt2ya3260RI0bo8OHDZk1hYaHWrl2rsrIyVVZW6siRI8rOzlZ7e7tZk5ubq5qaGpWXl6u8vFw1NTXyer3mfHt7u0aPHq2jR4+qsrJSZWVlWrNmjYqKii7KewEAAEKbwzAMw66d/+pXv9Lbb7+tt95667TzhmHI4/GosLBQM2bMkPTlqlJ8fLweffRRTZ06VX6/X1dccYWeffZZ3XrrrZKkTz/9VAkJCXrllVc0cuRI7dy5UykpKaqqqlJGRoYkqaqqSpmZmdq1a5eSk5P16quvKjs7W/X19fJ4PJKksrIyTZo0SU1NTYqOjv7a42lubpbL5ZLf77dUDwA4N4OXDLa7BYSQt6e9/Y1eb/Xvt60rTS+//LIGDBign/3sZ4qLi9P111+vZcuWmfN79uxRY2OjsrKyzDGn06khQ4Zo06ZNkqTq6mq1tbUF1Xg8HqWmppo1mzdvlsvlMgOTJA0cOFAulyuoJjU11QxMkjRy5EgFAoGg04UnCwQCam5uDnoAAIBLk62h6a9//asef/xxJSUl6bXXXtNdd92lgoICPfPMM5KkxsZGSVJ8fHzQ6+Lj4825xsZGRUREKCYm5ow1cXFxHfYfFxcXVHPqfmJiYhQREWHWnGrOnDnmNVIul0sJCQln+xYAAIBOwtbQdPz4cd1www0qKSnR9ddfr6lTpyovL0+PP/54UJ3D4Qh6bhhGh7FTnVpzuvpzqTnZzJkz5ff7zUd9ff0ZewIAAJ2XraHpyiuvVEpKStBYv379VFdXJ0lyu92S1GGlp6mpyVwVcrvdam1tlc/nO2PN/v37O+z/wIEDQTWn7sfn86mtra3DCtQJTqdT0dHRQQ8AAHBpsjU0DR48WLt37w4a+/DDD9W3b19JUmJiotxut9avX2/Ot7a2auPGjRo0aJAkKT09Xd27dw+qaWhoUG1trVmTmZkpv9+vrVu3mjVbtmyR3+8PqqmtrVVDQ4NZU1FRIafTqfT09PN85AAAoLMJt3Pn//RP/6RBgwappKRE48aN09atW/Xkk0/qySeflPTl6bLCwkKVlJQoKSlJSUlJKikpUY8ePZSbmytJcrlcmjx5soqKitS7d2/16tVLxcXFSktL0/DhwyV9uXo1atQo5eXlaenSpZKkKVOmKDs7W8nJyZKkrKwspaSkyOv1at68eTp48KCKi4uVl5fHChIAALA3NH3/+9/X2rVrNXPmTD388MNKTEzU4sWLNWHCBLNm+vTpamlpUX5+vnw+nzIyMlRRUaGePXuaNYsWLVJ4eLjGjRunlpYWDRs2TCtWrFBYWJhZs3r1ahUUFJifssvJyVFpaak5HxYWpnXr1ik/P1+DBw9WZGSkcnNzNX/+/IvwTgAAgFBn632aLjXcpwkALg7u04STdYn7NAEAAHQWhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgga03twROde+99+rAgQOSpCuuuEKPPfaYzR0BAPAlQhNCyoEDB0775coAANiN03MAAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALOBrVEJM+i+fsbsFW0X7jphJvsF3pMu/H9Xzbre7BQDA/2ClCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFfI0KQsrx7lGn/RkAALvZutI0a9YsORyOoIfb7TbnDcPQrFmz5PF4FBkZqaFDh2r79u1B2wgEApo2bZpiY2MVFRWlnJwc7du3L6jG5/PJ6/XK5XLJ5XLJ6/Xq0KFDQTV1dXUaM2aMoqKiFBsbq4KCArW2tl6wY8fpHUm+Uc2p/6Dm1H/QkeQb7W4HAACT7afnvve976mhocF8fPDBB+bc3LlztXDhQpWWlmrbtm1yu90aMWKEDh8+bNYUFhZq7dq1KisrU2VlpY4cOaLs7Gy1t7ebNbm5uaqpqVF5ebnKy8tVU1Mjr9drzre3t2v06NE6evSoKisrVVZWpjVr1qioqOjivAkAACDk2X56Ljw8PGh16QTDMLR48WLdf//9Gjt2rCRp5cqVio+P13PPPaepU6fK7/dr+fLlevbZZzV8+HBJ0qpVq5SQkKANGzZo5MiR2rlzp8rLy1VVVaWMjAxJ0rJly5SZmandu3crOTlZFRUV2rFjh+rr6+XxeCRJCxYs0KRJkzR79mxFR0dfpHcDAACEKttXmj766CN5PB4lJibq5z//uf76179Kkvbs2aPGxkZlZWWZtU6nU0OGDNGmTZskSdXV1Wprawuq8Xg8Sk1NNWs2b94sl8tlBiZJGjhwoFwuV1BNamqqGZgkaeTIkQoEAqqurv7K3gOBgJqbm4MeAADg0mRraMrIyNAzzzyj1157TcuWLVNjY6MGDRqkzz77TI2NjZKk+Pj4oNfEx8ebc42NjYqIiFBMTMwZa+Li4jrsOy4uLqjm1P3ExMQoIiLCrDmdOXPmmNdJuVwuJSQknOU7AAAAOgtbQ9ONN96oW265RWlpaRo+fLjWrVsn6cvTcCc4HI6g1xiG0WHsVKfWnK7+XGpONXPmTPn9fvNRX19/xr4AAEDnZfvpuZNFRUUpLS1NH330kXmd06krPU1NTeaqkNvtVmtrq3w+3xlr9u/f32FfBw4cCKo5dT8+n09tbW0dVqBO5nQ6FR0dHfQAAACXppAKTYFAQDt37tSVV16pxMREud1urV+/3pxvbW3Vxo0bNWjQIElSenq6unfvHlTT0NCg2tpasyYzM1N+v19bt241a7Zs2SK/3x9UU1tbq4aGBrOmoqJCTqdT6enpF/SYAQBA52Drp+eKi4s1ZswY9enTR01NTfrNb36j5uZmTZw4UQ6HQ4WFhSopKVFSUpKSkpJUUlKiHj16KDc3V5Lkcrk0efJkFRUVqXfv3urVq5eKi4vN032S1K9fP40aNUp5eXlaunSpJGnKlCnKzs5WcnKyJCkrK0spKSnyer2aN2+eDh48qOLiYuXl5bF6BAAAJNkcmvbt26fx48frb3/7m6644goNHDhQVVVV6tu3ryRp+vTpamlpUX5+vnw+nzIyMlRRUaGePXua21i0aJHCw8M1btw4tbS0aNiwYVqxYoXCwsLMmtWrV6ugoMD8lF1OTo5KS0vN+bCwMK1bt075+fkaPHiwIiMjlZubq/nz51+kdwIAAIQ6h2EYht1NXCqam5vlcrnk9/vPeYUq/ZfPnOeu0JlVz7vd7haAkDR4yWC7W0AIeXva29/o9Vb/fofUNU0AAAChitAEAABgAaEJAADAAkITAACABYQmAAAAC75RaPrLX/6i1157TS0tLZK+/NoRAACAS9E5habPPvtMw4cP13e/+13ddNNN5p2077zzThUVFZ3XBgEAAELBOYWmf/qnf1J4eLjq6urUo0cPc/zWW29VeXn5eWsOAAAgVJzTHcErKir02muv6aqrrgoaT0pK0t69e89LYwAAAKHknFaajh49GrTCdMLf/vY3OZ3Ob9wUAABAqDmn0PSjH/1Izzzzv1/34XA4dPz4cc2bN08//vGPz1tzAAAAoeKcTs/NmzdPQ4cO1TvvvKPW1lZNnz5d27dv18GDB/X229/s+18AAABC0TmtNKWkpOj999/XD37wA40YMUJHjx7V2LFj9e677+o73/nO+e4RAADAdue00iRJbrdbDz300PnsBQBC0r333qsDBw5Ikq644go99thjNncEwA7nFJqefvppXX755frZz34WNP773/9en3/+uSZOnHhemgOAUHDgwAHt37/f7jYA2OycTs/99re/VWxsbIfxuLg4lZSUfOOmAAAAQs05haa9e/cqMTGxw3jfvn1VV1f3jZsCAAAINecUmuLi4vT+++93GH/vvffUu3fvb9wUAABAqDmn0PTzn/9cBQUFeuONN9Te3q729nb96U9/0r333quf//zn57tHAAAA253TheC/+c1vtHfvXg0bNkzh4V9u4vjx47r99tu5pgkAAFySzik0RURE6IUXXtAjjzyi9957T5GRkUpLS1Pfvn3Pd38AAAAh4Zzv0yRJ3/3ud/Xd7373fPUCAAAQsiyHpvvuu0+PPPKIoqKidN99952xduHChd+4MQAAgFBiOTS9++67amtrkyT9+c9/lsPhOG3dV40DAAB0ZpZD0xtvvGH+/Oabb16IXgAAAELWWd9y4NixYwoPD1dtbe2F6AcAACAknXVoCg8PV9++fdXe3n4h+gEAAAhJ53Rzy3/5l3/RzJkzdfDgwfPdDwAAQEg6p1sO/Ou//qv+8pe/yOPxqG/fvoqKigqa//Of/3xemgMAAAgV5xSabr75ZjkcDhmGcb77ARCC6h5Os7sFWx071FtS2P/8/GmXfz/6PPiB3S0Atjir0PT555/rl7/8pV566SW1tbVp2LBhWrJkiWJjYy9UfwAAACHhrK5p+vWvf60VK1Zo9OjRGj9+vDZs2KC77777vDQyZ84cORwOFRYWmmOGYWjWrFnyeDyKjIzU0KFDtX379qDXBQIBTZs2TbGxsYqKilJOTo727dsXVOPz+eT1euVyueRyueT1enXo0KGgmrq6Oo0ZM0ZRUVGKjY1VQUGBWltbz8uxAQCAzu+sQtOLL76o5cuX68knn9Rjjz2mdevW6aWXXvrGn6Tbtm2bnnzySfXv3z9ofO7cuVq4cKFKS0u1bds2ud1ujRgxQocPHzZrCgsLtXbtWpWVlamyslJHjhxRdnZ2UE+5ubmqqalReXm5ysvLVVNTI6/Xa863t7dr9OjROnr0qCorK1VWVqY1a9aoqKjoGx0XAAC4dJxVaKqvr9ff//3fm89/8IMfKDw8XJ9++uk5N3DkyBFNmDBBy5YtU0xMjDluGIYWL16s+++/X2PHjlVqaqpWrlypzz//XM8995wkye/3a/ny5VqwYIGGDx+u66+/XqtWrdIHH3ygDRs2SJJ27typ8vJy/fu//7syMzOVmZmpZcuW6Y9//KN2794tSaqoqNCOHTu0atUqXX/99Ro+fLgWLFigZcuWqbm5+ZyPDQAAXDrOKjS1t7crIiIiaCw8PFzHjh075wbuuecejR49WsOHDw8a37NnjxobG5WVlWWOOZ1ODRkyRJs2bZIkVVdXq62tLajG4/EoNTXVrNm8ebNcLpcyMjLMmoEDB8rlcgXVpKamyuPxmDUjR45UIBBQdXX1V/YeCATU3Nwc9AAAAJems7oQ3DAMTZo0SU6n0xz74osvdNdddwXdduDFF1+0tL2ysjL9+c9/1rZt2zrMNTY2SpLi4+ODxuPj47V3716zJiIiImiF6kTNidc3NjYqLi6uw/bj4uKCak7dT0xMjCIiIsya05kzZ44eeuihrztMAABwCTir0DRx4sQOY7fddts57bi+vl733nuvKioqdNlll31l3alfAGwYxtd+KfCpNaerP5eaU82cOVP33Xef+by5uVkJCQln7A0AAHROZxWann766fO24+rqajU1NSk9Pd0ca29v13/913+ptLTUvN6osbFRV155pVnT1NRkrgq53W61trbK5/MFrTY1NTVp0KBBZs3+/fs77P/AgQNB29myZUvQvM/nU1tbW4cVqJM5nc6gVTcAAHDpOqevUTkfhg0bpg8++EA1NTXmY8CAAZowYYJqamp09dVXy+12a/369eZrWltbtXHjRjMQpaenq3v37kE1DQ0Nqq2tNWsyMzPl9/u1detWs2bLli3y+/1BNbW1tWpoaDBrKioq5HQ6g0IdAADous7pjuDnQ8+ePZWamho0FhUVpd69e5vjhYWFKikpUVJSkpKSklRSUqIePXooNzdXkuRyuTR58mQVFRWpd+/e6tWrl4qLi5WWlmZeWN6vXz+NGjVKeXl5Wrp0qSRpypQpys7OVnJysiQpKytLKSkp8nq9mjdvng4ePKji4mLl5eUpOjr6Yr0lAAAghNkWmqyYPn26WlpalJ+fL5/Pp4yMDFVUVKhnz55mzaJFixQeHq5x48appaVFw4YN04oVKxQWFmbWrF69WgUFBean7HJyclRaWmrOh4WFad26dcrPz9fgwYMVGRmp3NxczZ8//+IdLAAACGkOgy+QO2+am5vlcrnk9/vPeYUq/ZfPnOeu0JlVz7vd7hYk8d1zxVW99Vngy/+I9Xa2a/7Az2zuyF6h8N1zg5cMtrsFhJC3p739jV5v9e+3bdc0AQAAdCYhfXoOAEJBL2f7aX8G0LUQmgDga/zz9YfsbgFACOD0HAAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACywNTQ9/vjj6t+/v6KjoxUdHa3MzEy9+uqr5rxhGJo1a5Y8Ho8iIyM1dOhQbd++PWgbgUBA06ZNU2xsrKKiopSTk6N9+/YF1fh8Pnm9XrlcLrlcLnm9Xh06dCiopq6uTmPGjFFUVJRiY2NVUFCg1tbWC3bsAACgc7E1NF111VX67W9/q3feeUfvvPOOfvKTn+inP/2pGYzmzp2rhQsXqrS0VNu2bZPb7daIESN0+PBhcxuFhYVau3atysrKVFlZqSNHjig7O1vt7e1mTW5urmpqalReXq7y8nLV1NTI6/Wa8+3t7Ro9erSOHj2qyspKlZWVac2aNSoqKrp4bwYAAAhp4XbufMyYMUHPZ8+erccff1xVVVVKSUnR4sWLdf/992vs2LGSpJUrVyo+Pl7PPfecpk6dKr/fr+XLl+vZZ5/V8OHDJUmrVq1SQkKCNmzYoJEjR2rnzp0qLy9XVVWVMjIyJEnLli1TZmamdu/ereTkZFVUVGjHjh2qr6+Xx+ORJC1YsECTJk3S7NmzFR0dfRHfFQAAEIpC5pqm9vZ2lZWV6ejRo8rMzNSePXvU2NiorKwss8bpdGrIkCHatGmTJKm6ulptbW1BNR6PR6mpqWbN5s2b5XK5zMAkSQMHDpTL5QqqSU1NNQOTJI0cOVKBQEDV1dVf2XMgEFBzc3PQAwAAXJpsD00ffPCBLr/8cjmdTt11111au3atUlJS1NjYKEmKj48Pqo+PjzfnGhsbFRERoZiYmDPWxMXFddhvXFxcUM2p+4mJiVFERIRZczpz5swxr5NyuVxKSEg4y6MHAACdhe2hKTk5WTU1NaqqqtLdd9+tiRMnaseOHea8w+EIqjcMo8PYqU6tOV39udScaubMmfL7/eajvr7+jH0BAIDOy/bQFBERoWuuuUYDBgzQnDlzdN111+mxxx6T2+2WpA4rPU1NTeaqkNvtVmtrq3w+3xlr9u/f32G/Bw4cCKo5dT8+n09tbW0dVqBO5nQ6zU/+nXgAAIBLk+2h6VSGYSgQCCgxMVFut1vr168351pbW7Vx40YNGjRIkpSenq7u3bsH1TQ0NKi2ttasyczMlN/v19atW82aLVu2yO/3B9XU1taqoaHBrKmoqJDT6VR6evoFPV4AANA52PrpuX/+53/WjTfeqISEBB0+fFhlZWV68803VV5eLofDocLCQpWUlCgpKUlJSUkqKSlRjx49lJubK0lyuVyaPHmyioqK1Lt3b/Xq1UvFxcVKS0szP03Xr18/jRo1Snl5eVq6dKkkacqUKcrOzlZycrIkKSsrSykpKfJ6vZo3b54OHjyo4uJi5eXlsXoEAAAk2Rya9u/fL6/Xq4aGBrlcLvXv31/l5eUaMWKEJGn69OlqaWlRfn6+fD6fMjIyVFFRoZ49e5rbWLRokcLDwzVu3Di1tLRo2LBhWrFihcLCwsya1atXq6CgwPyUXU5OjkpLS835sLAwrVu3Tvn5+Ro8eLAiIyOVm5ur+fPnX6R3AgAAhDqHYRiG3U1cKpqbm+VyueT3+895hSr9l8+c567QmVXPu93uFiRJdQ+n2d0CQkifBz+wuwUNXjLY7hYQQt6e9vY3er3Vv98hd00TAABAKCI0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAtsDU1z5szR97//ffXs2VNxcXG6+eabtXv37qAawzA0a9YseTweRUZGaujQodq+fXtQTSAQ0LRp0xQbG6uoqCjl5ORo3759QTU+n09er1cul0sul0ter1eHDh0Kqqmrq9OYMWMUFRWl2NhYFRQUqLW19YIcOwAA6FxsDU0bN27UPffco6qqKq1fv17Hjh1TVlaWjh49atbMnTtXCxcuVGlpqbZt2ya3260RI0bo8OHDZk1hYaHWrl2rsrIyVVZW6siRI8rOzlZ7e7tZk5ubq5qaGpWXl6u8vFw1NTXyer3mfHt7u0aPHq2jR4+qsrJSZWVlWrNmjYqKii7OmwEAAEJauJ07Ly8vD3r+9NNPKy4uTtXV1frRj34kwzC0ePFi3X///Ro7dqwkaeXKlYqPj9dzzz2nqVOnyu/3a/ny5Xr22Wc1fPhwSdKqVauUkJCgDRs2aOTIkdq5c6fKy8tVVVWljIwMSdKyZcuUmZmp3bt3Kzk5WRUVFdqxY4fq6+vl8XgkSQsWLNCkSZM0e/ZsRUdHX8R3BgAAhJqQuqbJ7/dLknr16iVJ2rNnjxobG5WVlWXWOJ1ODRkyRJs2bZIkVVdXq62tLajG4/EoNTXVrNm8ebNcLpcZmCRp4MCBcrlcQTWpqalmYJKkkSNHKhAIqLq6+rT9BgIBNTc3Bz0AAMClKWRCk2EYuu+++/TDH/5QqampkqTGxkZJUnx8fFBtfHy8OdfY2KiIiAjFxMScsSYuLq7DPuPi4oJqTt1PTEyMIiIizJpTzZkzx7xGyuVyKSEh4WwPGwAAdBIhE5p+8Ytf6P3339fzzz/fYc7hcAQ9Nwyjw9ipTq05Xf251Jxs5syZ8vv95qO+vv6MPQEAgM4rJELTtGnT9PLLL+uNN97QVVddZY673W5J6rDS09TUZK4Kud1utba2yufznbFm//79HfZ74MCBoJpT9+Pz+dTW1tZhBeoEp9Op6OjooAcAALg02RqaDMPQL37xC7344ov605/+pMTExKD5xMREud1urV+/3hxrbW3Vxo0bNWjQIElSenq6unfvHlTT0NCg2tpasyYzM1N+v19bt241a7Zs2SK/3x9UU1tbq4aGBrOmoqJCTqdT6enp5//gAQBAp2Lrp+fuuecePffcc/rP//xP9ezZ01zpcblcioyMlMPhUGFhoUpKSpSUlKSkpCSVlJSoR48eys3NNWsnT56soqIi9e7dW7169VJxcbHS0tLMT9P169dPo0aNUl5enpYuXSpJmjJlirKzs5WcnCxJysrKUkpKirxer+bNm6eDBw+quLhYeXl5rCABAAB7Q9Pjjz8uSRo6dGjQ+NNPP61JkyZJkqZPn66Wlhbl5+fL5/MpIyNDFRUV6tmzp1m/aNEihYeHa9y4cWppadGwYcO0YsUKhYWFmTWrV69WQUGB+Sm7nJwclZaWmvNhYWFat26d8vPzNXjwYEVGRio3N1fz58+/QEcPAAA6E4dhGIbdTVwqmpub5XK55Pf7z3l1Kv2Xz5znrtCZVc+73e4WJEl1D6fZ3QJCSJ8HP7C7BQ1eMtjuFhBC3p729jd6vdW/3yFxITgAAECoIzQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAAC2wNTf/1X/+lMWPGyOPxyOFw6KWXXgqaNwxDs2bNksfjUWRkpIYOHart27cH1QQCAU2bNk2xsbGKiopSTk6O9u3bF1Tj8/nk9Xrlcrnkcrnk9Xp16NChoJq6ujqNGTNGUVFRio2NVUFBgVpbWy/EYQMAgE7I1tB09OhRXXfddSotLT3t/Ny5c7Vw4UKVlpZq27ZtcrvdGjFihA4fPmzWFBYWau3atSorK1NlZaWOHDmi7Oxstbe3mzW5ubmqqalReXm5ysvLVVNTI6/Xa863t7dr9OjROnr0qCorK1VWVqY1a9aoqKjowh08AADoVMLt3PmNN96oG2+88bRzhmFo8eLFuv/++zV27FhJ0sqVKxUfH6/nnntOU6dOld/v1/Lly/Xss89q+PDhkqRVq1YpISFBGzZs0MiRI7Vz506Vl5erqqpKGRkZkqRly5YpMzNTu3fvVnJysioqKrRjxw7V19fL4/FIkhYsWKBJkyZp9uzZio6OvgjvBgAACGUhe03Tnj171NjYqKysLHPM6XRqyJAh2rRpkySpurpabW1tQTUej0epqalmzebNm+VyuczAJEkDBw6Uy+UKqklNTTUDkySNHDlSgUBA1dXVX9ljIBBQc3Nz0AMAAFyaQjY0NTY2SpLi4+ODxuPj4825xsZGRUREKCYm5ow1cXFxHbYfFxcXVHPqfmJiYhQREWHWnM6cOXPM66RcLpcSEhLO8igBAEBnEbKh6QSHwxH03DCMDmOnOrXmdPXnUnOqmTNnyu/3m4/6+voz9gUAADqvkA1Nbrdbkjqs9DQ1NZmrQm63W62trfL5fGes2b9/f4ftHzhwIKjm1P34fD61tbV1WIE6mdPpVHR0dNADAABcmkI2NCUmJsrtdmv9+vXmWGtrqzZu3KhBgwZJktLT09W9e/egmoaGBtXW1po1mZmZ8vv92rp1q1mzZcsW+f3+oJra2lo1NDSYNRUVFXI6nUpPT7+gxwkAADoHWz89d+TIEf3lL38xn+/Zs0c1NTXq1auX+vTpo8LCQpWUlCgpKUlJSUkqKSlRjx49lJubK0lyuVyaPHmyioqK1Lt3b/Xq1UvFxcVKS0szP03Xr18/jRo1Snl5eVq6dKkkacqUKcrOzlZycrIkKSsrSykpKfJ6vZo3b54OHjyo4uJi5eXlsXoEAAAk2Rya3nnnHf34xz82n993332SpIkTJ2rFihWaPn26WlpalJ+fL5/Pp4yMDFVUVKhnz57maxYtWqTw8HCNGzdOLS0tGjZsmFasWKGwsDCzZvXq1SooKDA/ZZeTkxN0b6iwsDCtW7dO+fn5Gjx4sCIjI5Wbm6v58+df6LcAAAB0Eg7DMAy7m7hUNDc3y+Vyye/3n/MKVfovnznPXaEzq553u90tSJLqHk6zuwWEkD4PfmB3Cxq8ZLDdLSCEvD3t7W/0eqt/v0P2miYAAIBQQmgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaTvG73/1OiYmJuuyyy5Senq633nrL7pYAAEAIIDSd5IUXXlBhYaHuv/9+vfvuu/r7v/973Xjjjaqrq7O7NQAAYDNC00kWLlyoyZMn684771S/fv20ePFiJSQk6PHHH7e7NQAAYLNwuxsIFa2traqurtavfvWroPGsrCxt2rTptK8JBAIKBALmc7/fL0lqbm4+5z7aAy3n/Fpcer7J79L5dPiLdrtbQAgJhd/LYy3H7G4BIeSb/k6eeL1hGGesIzT9j7/97W9qb29XfHx80Hh8fLwaGxtP+5o5c+booYce6jCekJBwQXpE1+NacpfdLQAdzXHZ3QEQxDXj/PxOHj58WC7XV2+L0HQKh8MR9NwwjA5jJ8ycOVP33Xef+fz48eM6ePCgevfu/ZWvwddrbm5WQkKC6uvrFR0dbXc7gCR+LxF6+J08fwzD0OHDh+XxeM5YR2j6H7GxsQoLC+uwqtTU1NRh9ekEp9Mpp9MZNPatb33rQrXY5URHR/MPAUIOv5cINfxOnh9nWmE6gQvB/0dERITS09O1fv36oPH169dr0KBBNnUFAABCBStNJ7nvvvvk9Xo1YMAAZWZm6sknn1RdXZ3uuovrSgAA6OoITSe59dZb9dlnn+nhhx9WQ0ODUlNT9corr6hv3752t9alOJ1O/frXv+5w6hOwE7+XCDX8Tl58DuPrPl8HAAAArmkCAACwgtAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFnCfJgA4DcMwVF1drY8//lgOh0OJiYm6/vrr+V5JoAsjNMFWL7/8suXanJycC9gJ8L/eeOMNTZ48WXv37tWJW9mdCE5PPfWUfvSjH9ncIbqyffv26eWXX1ZdXZ1aW1uD5hYuXGhTV10DN7eErbp1s3aG2OFwqL29/QJ3A0h/+ctfdN111ykjI0P33nuvrr32WhmGoR07duhf//Vf9c477+j999/X1VdfbXer6IJef/115eTkKDExUbt371Zqaqo+/vhjGYahG264QX/605/sbvGSRmgCgJP84he/0M6dO/X66693mDMMQ8OHD1dKSoqWLFliQ3fo6n7wgx9o1KhRevjhh9WzZ0+99957iouL04QJEzRq1Cjdfffddrd4SeNCcISkL774wu4W0EW9+eabKiwsPO2cw+FQYWGh3njjjYvbFPA/du7cqYkTJ0qSwsPD1dLSossvv1wPP/ywHn30UZu7u/QRmhAy2tvb9cgjj+j//J//o8svv1x//etfJUkPPPCAli9fbnN36Crq6uqUlpb2lfOpqanau3fvRewI+F9RUVEKBAKSJI/Ho//+7/825/72t7/Z1VaXQWhCyJg9e7ZWrFihuXPnKiIiwhxPS0vTv//7v9vYGbqSI0eOqEePHl8536NHD33++ecXsSPgfw0cOFBvv/22JGn06NEqKirS7Nmzdccdd2jgwIE2d3fp49NzCBnPPPOMnnzySQ0bNkx33XWXOd6/f3/t2rXLxs7Q1ezYsUONjY2nneN/87DTwoULdeTIEUnSrFmzdOTIEb3wwgu65pprtGjRIpu7u/QRmhAyPvnkE11zzTUdxo8fP662tjYbOkJXNWzYMJ3uMzIOh0OGYXCvJtjm5E9t9ujRQ7/73e9s7KbrITQhZHzve9/TW2+9pb59+waN//73v9f1119vU1foavbs2WN3CwBCFKEJIePXv/61vF6vPvnkEx0/flwvvviidu/erWeeeUZ//OMf7W4PXcSpoR2wW69evfThhx8qNjZWMTExZ1zpPHjw4EXsrOshNCFkjBkzRi+88IJKSkrkcDj04IMP6oYbbtAf/vAHjRgxwu720EXU1dVZquvTp88F7gT40qJFi9SzZ09J0uLFi+1tpovj5pYAcJKwsDDz55O/QuXkMe5QD3RNrDQhZFx99dXatm2bevfuHTR+6NAh3XDDDeZ9m4ALyeFw6KqrrtKkSZM0ZswYhYfzzyTs1dzcbLk2Ojr6AnYCVpoQMrp166bGxkbFxcUFje/fv199+vQxb+gGXEiNjY1auXKlVqxYIZ/Pp9tuu02TJ09Wv3797G4NXVS3bt0sf2KTFdALi/9CwXYvv/yy+fNrr70ml8tlPm9vb9frr7+ub3/72zZ0hq7I7XZrxowZmjFjhiorK/X0008rIyNDKSkpmjx5siZPnmz5i6aB8+Hkr+35+OOP9atf/UqTJk1SZmamJGnz5s1auXKl5syZY1eLXQYrTbDdiT9AJ+6Bc7Lu3bvr29/+thYsWKDs7Gw72gO0f/9+jR8/Xhs3btSBAwfUq1cvu1tCFzVs2DDdeeedGj9+fND4c889pyeffFJvvvmmPY11Efx3CbY7fvy4jh8/rj59+qipqcl8fvz4cQUCAe3evZvABFts2rRJd955p7773e/qyJEj+rd/+zd961vfsrstdGGbN2/WgAEDOowPGDBAW7dutaGjroXQhJCxZ88excbG2t0GuriGhgY9+uijuvbaa/V//+//VXR0tDZt2qStW7fqrrvu4tQcbJWQkKAnnniiw/jSpUuVkJBgQ0ddC6fnEFJef/11vf766+aK08meeuopm7pCVxIRESGPx6OJEycqJydH3bt3P21d//79L3JngPTKK6/olltu0Xe+8x3zC3qrqqr03//931qzZo1uuukmmzu8tBGaEDIeeughPfzwwxowYICuvPLKDp8WWbt2rU2doSs5eSXpxO/gqf9Mcp8m2Km+vl6PP/64du3aJcMwlJKSorvuuouVpouA0ISQceWVV2ru3Lnyer12t4IubO/evV9b4/P59Hd/93cXvhkAIYVbDiBktLa2atCgQXa3gS7uq757zu/3a/Xq1Vq+fLlqampYacJF8/777ys1NVXdunXT+++/f8ZaThtfWKw0IWTMmDFDl19+uR544AG7WwFMf/rTn/TUU0/pxRdfVN++fXXLLbfolltu0fXXX293a+giTr7x74kbXZ7uTzenjS88VpoQMr744gs9+eST2rBhg/r379/hAtyFCxfa1Bm6mn379mnFihV66qmndPToUY0bN05tbW1as2aNUlJS7G4PXcyePXt0xRVXmD/DPqw0IWT8+Mc/PuP8yXfFBS6Um266SZWVlcrOztaECRM0atQohYWFqXv37nrvvfcITbDV559/rh49etjdRpdFaAKAk4SHh6ugoEB33323kpKSzHFCE0LB5Zdfrptvvller1cjRozgvmEXGafnYLuxY8d+bY3D4dCaNWsuQjfo6t566y099dRTGjBggK699lp5vV7deuutdrcFSJKeeeYZPf/88+aNV2+99Vbddttt+v73v293a10CK02w3T/+4z9aqnv66acvcCfA//r8889VVlamp556Slu3blV7e7sWLlyoO+64Qz179rS7PXRxhw8f1v/7f/9Pzz//vN544w0lJibqtttu04MPPmh3a5c0QhMAfI3du3dr+fLlevbZZ3Xo0CGNGDFCL7/8st1tAZKkHTt2aMKECXr//ff59NwFxslQAPgaycnJmjt3rvbt26fnn3/e7nYAffHFF/qP//gP3Xzzzbrhhhv02Wefqbi42O62LnmsNAEA0ElUVFRo9erVeumllxQWFqZ/+Id/0IQJEzRkyBC7W+sSCE0AAHQSPXr00OjRozVhwgSNHj36K79QGhcGoQkAgE6iublZ0dHRdrfRZXHLAQAAQtyJr085E4fDoWPHjl2kjromQhMAACFu7dq1Xzm3adMmLVmy5LTfR4fzi9NzAAB0Qrt27dLMmTP1hz/8QRMmTNAjjzyiPn362N3WJY1bDgAA0Il8+umnysvLU//+/XXs2DHV1NRo5cqVBKaLgNAEAEAn4Pf7NWPGDF1zzTXavn27Xn/9df3hD39Qamqq3a11GVzTBABAiJs7d64effRRud1uPf/88/rpT39qd0tdEtc0AQAQ4rp166bIyEgNHz5cYWFhX1n34osvXsSuuh5WmgAACHG33377195yABceK00AAAAWcCE4AACABYQmAAAACwhNAAAAFhCaAAAALCA0AcAZOBwOvfTSS3a3ESQUewK6AkITgJDW2Nioe++9V9dcc40uu+wyxcfH64c//KGeeOIJff7553a3B6AL4T5NAELWX//6Vw0ePFjf+ta3VFJSorS0NB07dkwffvihnnrqKXk8HuXk5NjdZgdtbW3q3r273W0AOM9YaQIQsvLz8xUeHq533nlH48aNU79+/ZSWlqZbbrlF69at05gxY8xah8Ohxx9/XDfeeKMiIyOVmJio3//+9+b8m2++KYfDoUOHDpljNTU1cjgc+vjjj8/YR0NDw1du9+OPP5bD4dB//Md/aOjQobrsssu0atUqffbZZxo/fryuuuoq9ejRQ2lpaXr++eeDtjt06FAVFBRo+vTp6tWrl9xut2bNmhVU89FHH+lHP/qRLrvsMqWkpGj9+vVn/0YCOC8ITQBC0meffaaKigrdc889ioqKOm3NqXdIfuCBB3TLLbfovffe02233abx48dr586d37gXK9udMWOGCgoKtHPnTo0cOVJffPGF0tPT9cc//lG1tbWaMmWKvF6vtmzZEvS6lStXKioqSlu2bNHcuXP18MMPm8Ho+PHjGjt2rMLCwlRVVaUnnnhCM2bM+MbHA+AcGQAQgqqqqgxJxosvvhg03rt3byMqKsqIiooypk+fbo5LMu66666g2oyMDOPuu+82DMMw3njjDUOS4fP5zPl3333XkGTs2bPnK/v4uu3u2bPHkGQsXrz4a4/ppptuMoqKisznQ4YMMX74wx8G1Xz/+983ZsyYYRiGYbz22mtGWFiYUV9fb86/+uqrhiRj7dq1X7s/AOcX1zQBCGmnriZt3bpVx48f14QJExQIBILmMjMzOzyvqan5xj1Y2e6AAQOCnre3t+u3v/2tXnjhBX3yyScKBAIKBAIdVs369+8f9PzKK69UU1OTJGnnzp3q06ePrrrqqq/sBcDFQ2gCEJKuueYaORwO7dq1K2j86quvliRFRkZa2s6J0NWt25dXIxgnfd1mW1vbOfd3apg7NQwtWLBAixYt0uLFi5WWlqaoqCgVFhaqtbU1qO7UC8YdDoeOHz/eodev2i+Ai4drmgCEpN69e2vEiBEqLS3V0aNHLb2mqqqqw/Nrr71WknTFFVdI+vKi7hOsrkKdabtf5a233tJPf/pT3Xbbbbruuut09dVX66OPPrK0vxNSUlJUV1enTz/91BzbvHnzWW0DwPlDaAIQsn73u9/p2LFjGjBggF544QXt3LlTu3fv1qpVq7Rr1y6FhYUF1f/+97/XU089pQ8//FC//vWvtXXrVv3iF7+Q9OXKVUJCgmbNmqUPP/xQ69at04IFCyz1cabtfpVrrrlG69ev16ZNm7Rz505NnTpVjY2NZ3X8w4cPV3Jysm6//Xa99957euutt3T//fef1TYAnD+EJgAh6zvf+Y7effddDR8+XDNnztR1112nAQMGaMmSJSouLtYjjzwSVP/QQw+prKxM/fv318qVK7V69WqlpKRI+vI02PPPP69du3bpuuuu06OPPqrf/OY3lvo403a/ygMPPKAbbrhBI0eO1NChQ+V2u3XzzTef1fF369ZNa9euVSAQ0A9+8APdeeedmj179lltA8D54zBOd9IcADoZh8OhtWvXnnUwAQCrWGkCAACwgNAEAABgAbccAHBJ4EoDABcaK00AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAAC/4/Iv24hHeH6VEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Analysis of GPU with the price\n", + "sns.barplot(x=df[\"Gpu brand\"],y=df[\"Price\"])\n", + "plt.xticks(rotation=\"vertical\")\n", + "plt.show()\n", + "\n", + "#it shows that GPU brands affects the price " + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "id": "19c2176e", + "metadata": {}, + "outputs": [], + "source": [ + "#Since we have extracted the useful values from GPU lets drop the GPU colun from the database \n", + "df.drop(columns=[\"Gpu\"],inplace= True) " + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "0a65a640", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamOpSysWeightPriceTouchscreenIPSppiCpu brandHDDSSDGpu brand
0AppleUltrabook8macOS1.3771378.683201226.983005Intel Core i50128Intel
1AppleUltrabook8macOS1.3447895.523200127.677940Intel Core i500Intel
2HPNotebook8No OS1.8630636.000000141.211998Intel Core i50256Intel
3AppleUltrabook16macOS1.83135195.336001220.534624Intel Core i70512AMD
4AppleUltrabook8macOS1.3796095.808001226.983005Intel Core i50256Intel
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram OpSys Weight Price Touchscreen IPS \\\n", + "0 Apple Ultrabook 8 macOS 1.37 71378.6832 0 1 \n", + "1 Apple Ultrabook 8 macOS 1.34 47895.5232 0 0 \n", + "2 HP Notebook 8 No OS 1.86 30636.0000 0 0 \n", + "3 Apple Ultrabook 16 macOS 1.83 135195.3360 0 1 \n", + "4 Apple Ultrabook 8 macOS 1.37 96095.8080 0 1 \n", + "\n", + " ppi Cpu brand HDD SSD Gpu brand \n", + "0 226.983005 Intel Core i5 0 128 Intel \n", + "1 127.677940 Intel Core i5 0 0 Intel \n", + "2 141.211998 Intel Core i5 0 256 Intel \n", + "3 220.534624 Intel Core i7 0 512 AMD \n", + "4 226.983005 Intel Core i5 0 256 Intel " + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "d3785c0f", + "metadata": {}, + "source": [ + "# OpSys (Operating System)" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "1c3aa67b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Windows 10 1072\n", + "No OS 66\n", + "Linux 62\n", + "Windows 7 45\n", + "Chrome OS 26\n", + "macOS 13\n", + "Mac OS X 8\n", + "Windows 10 S 8\n", + "Android 2\n", + "Name: OpSys, dtype: int64" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"OpSys\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "6545af86", + "metadata": {}, + "outputs": [], + "source": [ + "#To reduce we can reduce the categories , \n", + "# for example we can store Windows !0, windwos 7 in a single windows category " + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "5e602e61", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"OpSys\"], y = df[\"Price\"])\n", + "plt.xticks(rotation=\"vertical\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "id": "fe6c0c4b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Windows 10 1072\n", + "No OS 66\n", + "Linux 62\n", + "Windows 7 45\n", + "Chrome OS 26\n", + "macOS 13\n", + "Mac OS X 8\n", + "Windows 10 S 8\n", + "Android 2\n", + "Name: OpSys, dtype: int64" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"OpSys\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "56eb8be8", + "metadata": {}, + "outputs": [], + "source": [ + "def cat_os(inp):\n", + " if inp== \"Windows 10\" or inp == \"Windows 7\" or inp == \"Windows 10 S\":\n", + " return \"Windows\"\n", + " elif inp == \"macOS\" or inp == \"Mac OS X\":\n", + " return \"Mac\"\n", + " else:\n", + " return \"Other/No OS/ Linux\"" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "b15eaf3a", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Os\"] = df[\"OpSys\"].apply(cat_os) " + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "dff3d73d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamOpSysWeightPriceTouchscreenIPSppiCpu brandHDDSSDGpu brandOs
0AppleUltrabook8macOS1.3771378.683201226.983005Intel Core i50128IntelMac
1AppleUltrabook8macOS1.3447895.523200127.677940Intel Core i500IntelMac
2HPNotebook8No OS1.8630636.000000141.211998Intel Core i50256IntelOther/No OS/ Linux
3AppleUltrabook16macOS1.83135195.336001220.534624Intel Core i70512AMDMac
4AppleUltrabook8macOS1.3796095.808001226.983005Intel Core i50256IntelMac
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram OpSys Weight Price Touchscreen IPS \\\n", + "0 Apple Ultrabook 8 macOS 1.37 71378.6832 0 1 \n", + "1 Apple Ultrabook 8 macOS 1.34 47895.5232 0 0 \n", + "2 HP Notebook 8 No OS 1.86 30636.0000 0 0 \n", + "3 Apple Ultrabook 16 macOS 1.83 135195.3360 0 1 \n", + "4 Apple Ultrabook 8 macOS 1.37 96095.8080 0 1 \n", + "\n", + " ppi Cpu brand HDD SSD Gpu brand Os \n", + "0 226.983005 Intel Core i5 0 128 Intel Mac \n", + "1 127.677940 Intel Core i5 0 0 Intel Mac \n", + "2 141.211998 Intel Core i5 0 256 Intel Other/No OS/ Linux \n", + "3 220.534624 Intel Core i7 0 512 AMD Mac \n", + "4 226.983005 Intel Core i5 0 256 Intel Mac " + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "id": "169c8747", + "metadata": {}, + "outputs": [], + "source": [ + "#Since we have extracted the useful values from Operating SYstem lets drop the OpSys colun from the database \n", + "df.drop(columns=[\"OpSys\"],inplace= True) " + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "8354b22b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x=df[\"Os\"], y = df[\"Price\"])\n", + "plt.xticks(rotation=\"vertical\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "9e5dc91e", + "metadata": {}, + "source": [ + "# Column Weight" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "148c9c50", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\seaborn\\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df[\"Weight\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "id": "185cddb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x=df[\"Weight\"], y=df[\"Price\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "3ae9db94", + "metadata": {}, + "outputs": [], + "source": [ + "#minor corealton , scatter plot shows that as the weight increases the price also increase " + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "id": "dd54d508", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\1734695575.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " df.corr()[\"Price\"]\n" + ] + }, + { + "data": { + "text/plain": [ + "Ram 0.742905\n", + "Weight 0.209867\n", + "Price 1.000000\n", + "Touchscreen 0.192917\n", + "IPS 0.253320\n", + "ppi 0.475368\n", + "HDD -0.096891\n", + "SSD 0.670660\n", + "Name: Price, dtype: float64" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()[\"Price\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "0aa9df91", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\1134722465.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " df.corr()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RamWeightPriceTouchscreenIPSppiHDDSSD
Ram1.0000000.3833620.7429050.1188750.2079490.3056880.0958080.603379
Weight0.3833621.0000000.209867-0.2930040.018643-0.3218830.514147-0.063818
Price0.7429050.2098671.0000000.1929170.2533200.475368-0.0968910.670660
Touchscreen0.118875-0.2930040.1929171.0000000.1480260.458571-0.2087660.257577
IPS0.2079490.0186430.2533200.1480261.0000000.299142-0.0935880.225311
ppi0.305688-0.3218830.4753680.4585710.2991421.000000-0.2946980.509437
HDD0.0958080.514147-0.096891-0.208766-0.093588-0.2946981.000000-0.400750
SSD0.603379-0.0638180.6706600.2575770.2253110.509437-0.4007501.000000
\n", + "
" + ], + "text/plain": [ + " Ram Weight Price Touchscreen IPS ppi \\\n", + "Ram 1.000000 0.383362 0.742905 0.118875 0.207949 0.305688 \n", + "Weight 0.383362 1.000000 0.209867 -0.293004 0.018643 -0.321883 \n", + "Price 0.742905 0.209867 1.000000 0.192917 0.253320 0.475368 \n", + "Touchscreen 0.118875 -0.293004 0.192917 1.000000 0.148026 0.458571 \n", + "IPS 0.207949 0.018643 0.253320 0.148026 1.000000 0.299142 \n", + "ppi 0.305688 -0.321883 0.475368 0.458571 0.299142 1.000000 \n", + "HDD 0.095808 0.514147 -0.096891 -0.208766 -0.093588 -0.294698 \n", + "SSD 0.603379 -0.063818 0.670660 0.257577 0.225311 0.509437 \n", + "\n", + " HDD SSD \n", + "Ram 0.095808 0.603379 \n", + "Weight 0.514147 -0.063818 \n", + "Price -0.096891 0.670660 \n", + "Touchscreen -0.208766 0.257577 \n", + "IPS -0.093588 0.225311 \n", + "ppi -0.294698 0.509437 \n", + "HDD 1.000000 -0.400750 \n", + "SSD -0.400750 1.000000 " + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "id": "8459bd0a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\58359773.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " sns.heatmap(df.corr())\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(df.corr())" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "id": "94087042", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\3798988075.py:2: UserWarning: \n", + "\n", + "`distplot` is a deprecated function and will be removed in seaborn v0.14.0.\n", + "\n", + "Please adapt your code to use either `displot` (a figure-level function with\n", + "similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "For a guide to updating your code to use the new functions, please see\n", + "https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751\n", + "\n", + " sns.distplot(df[\"Price\"])\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Our target column that is price is skewed\n", + "sns.distplot(df[\"Price\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "id": "ca7bc042", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_9696\\1611883942.py:1: UserWarning: \n", + "\n", + "`distplot` is a deprecated function and will be removed in seaborn v0.14.0.\n", + "\n", + "Please adapt your code to use either `displot` (a figure-level function with\n", + "similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "For a guide to updating your code to use the new functions, please see\n", + "https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751\n", + "\n", + " sns.distplot(np.log(df[\"Price\"]))\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(np.log(df[\"Price\"]))\n", + "#converted it from skewed to normal with the help of log transfer" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "d406d197", + "metadata": {}, + "outputs": [], + "source": [ + "x = df.drop(columns=[\"Price\"])\n", + "y = np.log(df[\"Price\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "id": "f7682e34", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamWeightTouchscreenIPSppiCpu brandHDDSSDGpu brandOs
0AppleUltrabook81.3701226.983005Intel Core i50128IntelMac
1AppleUltrabook81.3400127.677940Intel Core i500IntelMac
2HPNotebook81.8600141.211998Intel Core i50256IntelOther/No OS/ Linux
3AppleUltrabook161.8301220.534624Intel Core i70512AMDMac
4AppleUltrabook81.3701226.983005Intel Core i50256IntelMac
.......................................
1298Lenovo2 in 1 Convertible41.8011157.350512Intel Core i70128IntelWindows
1299Lenovo2 in 1 Convertible161.3011276.053530Intel Core i70512IntelWindows
1300LenovoNotebook21.5000111.935204Some Other Processor00IntelWindows
1301HPNotebook62.1900100.454670Intel Core i710000AMDWindows
1302AsusNotebook42.2000100.454670Some Other Processor5000IntelWindows
\n", + "

1302 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Weight Touchscreen IPS ppi \\\n", + "0 Apple Ultrabook 8 1.37 0 1 226.983005 \n", + "1 Apple Ultrabook 8 1.34 0 0 127.677940 \n", + "2 HP Notebook 8 1.86 0 0 141.211998 \n", + "3 Apple Ultrabook 16 1.83 0 1 220.534624 \n", + "4 Apple Ultrabook 8 1.37 0 1 226.983005 \n", + "... ... ... ... ... ... ... ... \n", + "1298 Lenovo 2 in 1 Convertible 4 1.80 1 1 157.350512 \n", + "1299 Lenovo 2 in 1 Convertible 16 1.30 1 1 276.053530 \n", + "1300 Lenovo Notebook 2 1.50 0 0 111.935204 \n", + "1301 HP Notebook 6 2.19 0 0 100.454670 \n", + "1302 Asus Notebook 4 2.20 0 0 100.454670 \n", + "\n", + " Cpu brand HDD SSD Gpu brand Os \n", + "0 Intel Core i5 0 128 Intel Mac \n", + "1 Intel Core i5 0 0 Intel Mac \n", + "2 Intel Core i5 0 256 Intel Other/No OS/ Linux \n", + "3 Intel Core i7 0 512 AMD Mac \n", + "4 Intel Core i5 0 256 Intel Mac \n", + "... ... ... ... ... ... \n", + "1298 Intel Core i7 0 128 Intel Windows \n", + "1299 Intel Core i7 0 512 Intel Windows \n", + "1300 Some Other Processor 0 0 Intel Windows \n", + "1301 Intel Core i7 1000 0 AMD Windows \n", + "1302 Some Other Processor 500 0 Intel Windows \n", + "\n", + "[1302 rows x 12 columns]" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "id": "9fb85ec0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 11.175755\n", + "1 10.776777\n", + "2 10.329931\n", + "3 11.814476\n", + "4 11.473101\n", + " ... \n", + "1298 10.433899\n", + "1299 11.288115\n", + "1300 9.409283\n", + "1301 10.614129\n", + "1302 9.886358\n", + "Name: Price, Length: 1302, dtype: float64" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "id": "d892dfba", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "id": "0506e95e", + "metadata": {}, + "outputs": [], + "source": [ + "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.15, random_state=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "id": "6f1644a2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamWeightTouchscreenIPSppiCpu brandHDDSSDGpu brandOs
183ToshibaNotebook82.0000100.454670Intel Core i50128IntelWindows
1141MSIGaming82.4000141.211998Intel Core i71000128NvidiaWindows
1049AsusNetbook41.2000135.094211Some Other Processor00IntelOther/No OS/ Linux
1020Dell2 in 1 Convertible42.0811141.211998Intel Core i310000IntelWindows
878DellNotebook42.1800141.211998Intel Core i51000128NvidiaWindows
.......................................
466AcerNotebook42.2000100.454670Intel Core i35000NvidiaWindows
299AsusUltrabook161.6300141.211998Intel Core i70512NvidiaWindows
493AcerNotebook82.2000100.454670AMD Processor10000AMDWindows
527LenovoNotebook82.2000100.454670Intel Core i320000NvidiaOther/No OS/ Linux
1193AppleUltrabook80.9201226.415547Some Other Processor00IntelMac
\n", + "

1106 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Weight Touchscreen IPS ppi \\\n", + "183 Toshiba Notebook 8 2.00 0 0 100.454670 \n", + "1141 MSI Gaming 8 2.40 0 0 141.211998 \n", + "1049 Asus Netbook 4 1.20 0 0 135.094211 \n", + "1020 Dell 2 in 1 Convertible 4 2.08 1 1 141.211998 \n", + "878 Dell Notebook 4 2.18 0 0 141.211998 \n", + "... ... ... ... ... ... ... ... \n", + "466 Acer Notebook 4 2.20 0 0 100.454670 \n", + "299 Asus Ultrabook 16 1.63 0 0 141.211998 \n", + "493 Acer Notebook 8 2.20 0 0 100.454670 \n", + "527 Lenovo Notebook 8 2.20 0 0 100.454670 \n", + "1193 Apple Ultrabook 8 0.92 0 1 226.415547 \n", + "\n", + " Cpu brand HDD SSD Gpu brand Os \n", + "183 Intel Core i5 0 128 Intel Windows \n", + "1141 Intel Core i7 1000 128 Nvidia Windows \n", + "1049 Some Other Processor 0 0 Intel Other/No OS/ Linux \n", + "1020 Intel Core i3 1000 0 Intel Windows \n", + "878 Intel Core i5 1000 128 Nvidia Windows \n", + "... ... ... ... ... ... \n", + "466 Intel Core i3 500 0 Nvidia Windows \n", + "299 Intel Core i7 0 512 Nvidia Windows \n", + "493 AMD Processor 1000 0 AMD Windows \n", + "527 Intel Core i3 2000 0 Nvidia Other/No OS/ Linux \n", + "1193 Some Other Processor 0 0 Intel Mac \n", + "\n", + "[1106 rows x 12 columns]" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train\n" + ] + }, + { + "cell_type": "markdown", + "id": "38eebad9", + "metadata": {}, + "source": [ + "# Importing all the best algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "id": "5014a7a6", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.metrics import r2_score,mean_absolute_error" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "id": "f2151e11", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression,Ridge,Lasso\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor,ExtraTreesRegressor\n", + "from sklearn.svm import SVR\n", + "# from xgboost import XGBRegressor" + ] + }, + { + "cell_type": "markdown", + "id": "af5b75a6", + "metadata": {}, + "source": [ + "# Linear Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "id": "fee51532", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.8073277448418734\n", + "MAE 0.21017827976428724\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = LinearRegression()\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score',r2_score(y_test,y_pred))\n", + "print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "id": "ce4ec864", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.2336780599567432" + ] + }, + "execution_count": 214, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.exp(0.21)" + ] + }, + { + "cell_type": "markdown", + "id": "20b60e96", + "metadata": {}, + "source": [ + "# RidgeRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "id": "0005b554", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score is: 0.8127331031311809\n", + "Mean Absolute Error is: 0.20926802242582965\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = Ridge(alpha=10)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score is: ',r2_score(y_test,y_pred))\n", + "print('Mean Absolute Error is: ',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "4d15b3c6", + "metadata": {}, + "source": [ + "# LassoRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "id": "9ec47334", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score is: 0.8071853945317105\n", + "Mean Absolute Error is: 0.21114361613472565\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = Lasso(alpha=0.001)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score is: ',r2_score(y_test,y_pred))\n", + "print('Mean Absolute Error is: ',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "c11fc091", + "metadata": {}, + "source": [ + "# KNN (k nearest neighbor)" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "id": "046facef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score is: 0.8027675291937718\n", + "Mean Absolute Error is: 0.193456293739003\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = KNeighborsRegressor(n_neighbors=3)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score is: ',r2_score(y_test,y_pred))\n", + "print('Mean Absolute Error is: ',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "a2e61be1", + "metadata": {}, + "source": [ + "# Decision Tree" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "id": "412c8454", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score is: 0.8366862340483114\n", + "Mean Absolute Error is: 0.18450460098777566\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = DecisionTreeRegressor(max_depth=8)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score is: ',r2_score(y_test,y_pred))\n", + "print('Mean Absolute Error is: ',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "b2519358", + "metadata": {}, + "source": [ + "# SVM" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "id": "30f3ce7a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score is: 0.8083180902257614\n", + "Mean Absolute Error is: 0.20239059427481307\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = SVR(kernel='rbf',C=10000,epsilon=0.1)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score is: ',r2_score(y_test,y_pred))\n", + "print('Mean Absolute Error is: ',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "34c30330", + "metadata": {}, + "source": [ + "# Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "id": "cc57a7ff", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score is: 0.8873402378382488\n", + "Mean Absolute Error is: 0.15860130110457718\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = RandomForestRegressor(n_estimators=100,\n", + " random_state=3,\n", + " max_samples=0.5,\n", + " max_features=0.75,\n", + " max_depth=15)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score is: ',r2_score(y_test,y_pred))\n", + "print('Mean Absolute Error is: ',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "4f01ad3f", + "metadata": {}, + "source": [ + "# Extra Trees" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "id": "c59ae054", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.8753793123440623\n", + "MAE 0.15979519126758127\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = ExtraTreesRegressor(n_estimators=100,\n", + " random_state=3,\n", + " max_samples=None,\n", + " max_features=0.75,\n", + " max_depth=15)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score',r2_score(y_test,y_pred))\n", + "print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "90abb77f", + "metadata": {}, + "source": [ + "# AdaBoost" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "id": "2080eabb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.793331570243383\n", + "MAE 0.22772195087368882\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = AdaBoostRegressor(n_estimators=15,learning_rate=1.0)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score',r2_score(y_test,y_pred))\n", + "print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "bc54d464", + "metadata": {}, + "source": [ + "# GradientBoost" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "id": "687b431b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.8832259431228083\n", + "MAE 0.15899186930352127\n" + ] + } + ], + "source": [ + "step1 = ColumnTransformer(transformers=[\n", + " ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "],remainder='passthrough')\n", + "\n", + "step2 = GradientBoostingRegressor(n_estimators=500)\n", + "\n", + "pipe = Pipeline([\n", + " ('step1',step1),\n", + " ('step2',step2)\n", + "])\n", + "pipe.fit(x_train,y_train)\n", + "\n", + "y_pred = pipe.predict(x_test)\n", + "\n", + "print('R2 score',r2_score(y_test,y_pred))\n", + "print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "70ff336e", + "metadata": {}, + "source": [ + "# XGBoost" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "id": "37851136", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.8811773435850243\n", + "MAE 0.16496203512600974\n" + ] + } + ], + "source": [ + "# step1 = ColumnTransformer(transformers=[\n", + "# ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "# ],remainder='passthrough')\n", + "\n", + "# step2 = XGBRegressor(n_estimators=45,max_depth=5,learning_rate=0.5)\n", + "\n", + "# pipe = Pipeline([\n", + "# ('step1',step1),\n", + "# ('step2',step2)\n", + "# ])\n", + "\n", + "# pipe.fit(x_train,y_train)\n", + "\n", + "# y_pred = pipe.predict(x_test)\n", + "\n", + "# print('R2 score',r2_score(y_test,y_pred))\n", + "# print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "b26eb63e", + "metadata": {}, + "source": [ + "# Voting Regressor\n", + "### combining the best performing models/algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "id": "51ded058", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.8904828063373843\n", + "MAE 0.15805080886686704\n" + ] + } + ], + "source": [ + "# from sklearn.ensemble import VotingRegressor,StackingRegressor\n", + "\n", + "# step1 = ColumnTransformer(transformers=[\n", + "# ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "# ],remainder='passthrough')\n", + "\n", + "\n", + "# rf = RandomForestRegressor(n_estimators=350,random_state=3,max_samples=0.5,max_features=0.75,max_depth=15)\n", + "# gbdt = GradientBoostingRegressor(n_estimators=100,max_features=0.5)\n", + "# xgb = XGBRegressor(n_estimators=25,learning_rate=0.3,max_depth=5)\n", + "# et = ExtraTreesRegressor(n_estimators=100,random_state=3,max_samples=None,max_features=0.75,max_depth=10)\n", + "\n", + "# step2 = VotingRegressor([('rf', rf), ('gbdt', gbdt), ('xgb',xgb), ('et',et)],weights=[5,1,1,1])\n", + "\n", + "# pipe = Pipeline([\n", + "# ('step1',step1),\n", + "# ('step2',step2)\n", + "# ])\n", + "\n", + "# pipe.fit(x_train,y_train)\n", + "\n", + "# y_pred = pipe.predict(x_test)\n", + "\n", + "# print('R2 score',r2_score(y_test,y_pred))\n", + "# print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "380e5e94", + "metadata": {}, + "source": [ + "# Stacking" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "id": "4f7a1692", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:828: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score 0.879816517413725\n", + "MAE 0.16716128617209366\n" + ] + } + ], + "source": [ + "# from sklearn.ensemble import VotingRegressor,StackingRegressor\n", + "\n", + "# step1 = ColumnTransformer(transformers=[\n", + "# ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])\n", + "# ],remainder='passthrough')\n", + "\n", + "\n", + "# estimators = [\n", + "# ('rf', RandomForestRegressor(n_estimators=350,random_state=3,max_samples=0.5,max_features=0.75,max_depth=15)),\n", + "# ('gbdt',GradientBoostingRegressor(n_estimators=100,max_features=0.5)),\n", + "# ('xgb', XGBRegressor(n_estimators=25,learning_rate=0.3,max_depth=5))\n", + "# ]\n", + "\n", + "# step2 = StackingRegressor(estimators=estimators, final_estimator=Ridge(alpha=100))\n", + "\n", + "# pipe = Pipeline([\n", + "# ('step1',step1),\n", + "# ('step2',step2)\n", + "# ])\n", + "\n", + "# pipe.fit(x_train,y_train)\n", + "\n", + "# y_pred = pipe.predict(x_test)\n", + "\n", + "# print('R2 score',r2_score(y_test,y_pred))\n", + "# print('MAE',mean_absolute_error(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "2e3a1281", + "metadata": {}, + "source": [ + "# Exporting The Model " + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "id": "181ed00f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamWeightPriceTouchscreenIPSppiCpu brandHDDSSDGpu brandOs
0AppleUltrabook81.3771378.683201226.983005Intel Core i50128IntelMac
1AppleUltrabook81.3447895.523200127.677940Intel Core i500IntelMac
2HPNotebook81.8630636.000000141.211998Intel Core i50256IntelOther/No OS/ Linux
3AppleUltrabook161.83135195.336001220.534624Intel Core i70512AMDMac
4AppleUltrabook81.3796095.808001226.983005Intel Core i50256IntelMac
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Weight Price Touchscreen IPS ppi \\\n", + "0 Apple Ultrabook 8 1.37 71378.6832 0 1 226.983005 \n", + "1 Apple Ultrabook 8 1.34 47895.5232 0 0 127.677940 \n", + "2 HP Notebook 8 1.86 30636.0000 0 0 141.211998 \n", + "3 Apple Ultrabook 16 1.83 135195.3360 0 1 220.534624 \n", + "4 Apple Ultrabook 8 1.37 96095.8080 0 1 226.983005 \n", + "\n", + " Cpu brand HDD SSD Gpu brand Os \n", + "0 Intel Core i5 0 128 Intel Mac \n", + "1 Intel Core i5 0 0 Intel Mac \n", + "2 Intel Core i5 0 256 Intel Other/No OS/ Linux \n", + "3 Intel Core i7 0 512 AMD Mac \n", + "4 Intel Core i5 0 256 Intel Mac " + ] + }, + "execution_count": 265, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "id": "b4e513ed", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle " + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "12b81e6e", + "metadata": {}, + "outputs": [], + "source": [ + "pickle.dump(df,open('df.pkl','wb')) #imported the data \n" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "id": "33721406", + "metadata": {}, + "outputs": [], + "source": [ + "pickle.dump(pipe,open('pipe.pkl','wb')) #imported the model" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "id": "4106f7b4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyTypeNameRamWeightPriceTouchscreenIPSppiCpu brandHDDSSDGpu brandOs
0AppleUltrabook81.3771378.683201226.983005Intel Core i50128IntelMac
\n", + "
" + ], + "text/plain": [ + " Company TypeName Ram Weight Price Touchscreen IPS ppi \\\n", + "0 Apple Ultrabook 8 1.37 71378.6832 0 1 226.983005 \n", + "\n", + " Cpu brand HDD SSD Gpu brand Os \n", + "0 Intel Core i5 0 128 Intel Mac " + ] + }, + "execution_count": 270, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "id": "1ce41b01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The syntax of the command is incorrect.\n" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 272, + "id": "8ba05bbe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: pandas\n", + "Version: 1.5.3\n", + "Summary: Powerful data structures for data analysis, time series, and statistics\n", + "Home-page: https://pandas.pydata.org\n", + "Author: The Pandas Development Team\n", + "Author-email: pandas-dev@python.org\n", + "License: BSD-3-Clause\n", + "Location: c:\\users\\user\\anaconda3\\lib\\site-packages\n", + "Requires: numpy, python-dateutil, pytz\n", + "Required-by: datashader, holoviews, hvplot, seaborn, statsmodels, xarray\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip show pandas\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ac57a18", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}