diff --git "a/notebook/MODEL_TRAINING.ipynb" "b/notebook/MODEL_TRAINING.ipynb" --- "a/notebook/MODEL_TRAINING.ipynb" +++ "b/notebook/MODEL_TRAINING.ipynb" @@ -0,0 +1,1098 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model Training \n", + "#### 1.1 Import Data and Required Packages\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":241: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 216 from C header, got 232 from PyObject\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "## modelling\n", + "\n", + "from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score\n", + "from sklearn.linear_model import LinearRegression, Ridge, Lasso\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n", + "from xgboost import XGBRegressor\n", + "from catboost import CatBoostRegressor\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "import warnings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import data as pandas dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('data/stud.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genderrace_ethnicityparental_level_of_educationlunchtest_preparation_coursemath_scorereading_scorewriting_score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
\n", + "
" + ], + "text/plain": [ + " gender race_ethnicity parental_level_of_education lunch \\\n", + "0 female group B bachelor's degree standard \n", + "1 female group C some college standard \n", + "2 female group B master's degree standard \n", + "3 male group A associate's degree free/reduced \n", + "4 male group C some college standard \n", + "\n", + " test_preparation_course math_score reading_score writing_score \n", + "0 none 72 72 74 \n", + "1 completed 69 90 88 \n", + "2 none 90 95 93 \n", + "3 none 47 57 44 \n", + "4 none 76 78 75 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genderrace_ethnicityparental_level_of_educationlunchtest_preparation_coursemath_scorereading_scorewriting_score
995femalegroup Emaster's degreestandardcompleted889995
996malegroup Chigh schoolfree/reducednone625555
997femalegroup Chigh schoolfree/reducedcompleted597165
998femalegroup Dsome collegestandardcompleted687877
999femalegroup Dsome collegefree/reducednone778686
\n", + "
" + ], + "text/plain": [ + " gender race_ethnicity parental_level_of_education lunch \\\n", + "995 female group E master's degree standard \n", + "996 male group C high school free/reduced \n", + "997 female group C high school free/reduced \n", + "998 female group D some college standard \n", + "999 female group D some college free/reduced \n", + "\n", + " test_preparation_course math_score reading_score writing_score \n", + "995 completed 88 99 95 \n", + "996 none 62 55 55 \n", + "997 completed 59 71 65 \n", + "998 completed 68 78 77 \n", + "999 none 77 86 86 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape of X: (1000, 7)\n", + "shape of y: (1000,)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genderrace_ethnicityparental_level_of_educationlunchtest_preparation_coursereading_scorewriting_score
0femalegroup Bbachelor's degreestandardnone7274
1femalegroup Csome collegestandardcompleted9088
2femalegroup Bmaster's degreestandardnone9593
3malegroup Aassociate's degreefree/reducednone5744
4malegroup Csome collegestandardnone7875
\n", + "
" + ], + "text/plain": [ + " gender race_ethnicity parental_level_of_education lunch \\\n", + "0 female group B bachelor's degree standard \n", + "1 female group C some college standard \n", + "2 female group B master's degree standard \n", + "3 male group A associate's degree free/reduced \n", + "4 male group C some college standard \n", + "\n", + " test_preparation_course reading_score writing_score \n", + "0 none 72 74 \n", + "1 completed 90 88 \n", + "2 none 95 93 \n", + "3 none 57 44 \n", + "4 none 78 75 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## dependent and independent variables\n", + "\n", + "X = df.drop('math_score',axis=1)\n", + "y = df.math_score\n", + "print(f\"shape of X: {X.shape}\")\n", + "print(f\"shape of y: {y.shape}\")\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "length of numerical cols: 2\n", + "length of categorical cols: 5\n" + ] + } + ], + "source": [ + "## Create columns tranformers\n", + "\n", + "num_cols = X.select_dtypes(exclude='object').columns\n", + "cat_cols = X.select_dtypes(include='object').columns\n", + "\n", + "print(f\"length of numerical cols: {len(num_cols)}\")\n", + "print(f\"length of categorical cols: {len(cat_cols)}\")\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n", + "from sklearn.compose import ColumnTransformer\n", + "\n", + "\n", + "numeric_tranformer = StandardScaler()\n", + "oh_transformer = OneHotEncoder()\n", + "\n", + "preprocessor = ColumnTransformer([\n", + " ('ohe',oh_transformer,cat_cols),\n", + " ('ss',numeric_tranformer,num_cols)\n", + "])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape of X_train: (700, 7)\n", + "shape of X_test: (300, 7)\n", + "shape of y_train: (700,)\n", + "shape of y_test: (300, 7)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\n", + "\n", + "print(f\"shape of X_train: {X_train.shape}\")\n", + "print(f\"shape of X_test: {X_test.shape}\")\n", + "print(f\"shape of y_train: {y_train.shape}\")\n", + "print(f\"shape of y_test: {X_test.shape}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape of X_train after transform: (700, 19)\n", + "shape of X_test after transform: (300, 19)\n" + ] + } + ], + "source": [ + "## fit the pipeline\n", + "\n", + "X_train = preprocessor.fit_transform(X_train)\n", + "X_test = preprocessor.transform(X_test)\n", + "\n", + "\n", + "print(f\"shape of X_train after transform: {X_train.shape}\")\n", + "print(f\"shape of X_test after transform: {X_test.shape}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "## Create an evaluation function\n", + "\n", + "def evaluate_model(actual, predicted):\n", + " mse = mean_squared_error(actual,predicted)\n", + " mae = mean_squared_error(actual,predicted)\n", + " rmse = np.sqrt(mean_squared_error(actual,predicted))\n", + " r2 = r2_score(actual,predicted)\n", + " return mae, mse, rmse, r2" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LinearRegression\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 27.834805385044643\n", + "-- Mean Squared Error: 27.834805385044643\n", + "-- Root Mean Squared Error: 5.275870106915507\n", + "-- R2 Score 0.8848768448103892\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 31.053682454427083\n", + "-- Mean Squared Error: 31.053682454427083\n", + "-- Root Mean Squared Error: 5.572583104308727\n", + "-- R2 Score 0.845791335372628\n", + "\n", + "\n", + "Ridge\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 27.739417770649194\n", + "-- Mean Squared Error: 27.739417770649194\n", + "-- Root Mean Squared Error: 5.266822359891132\n", + "-- R2 Score 0.88527136250086\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 30.76468904919587\n", + "-- Mean Squared Error: 30.76468904919587\n", + "-- Root Mean Squared Error: 5.546592562032646\n", + "-- R2 Score 0.8472264401197775\n", + "\n", + "\n", + "Lasso\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 43.58337232191542\n", + "-- Mean Squared Error: 43.58337232191542\n", + "-- Root Mean Squared Error: 6.6017703930018214\n", + "-- R2 Score 0.8197416771522217\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 43.51995000348161\n", + "-- Mean Squared Error: 43.51995000348161\n", + "-- Root Mean Squared Error: 6.596965211631907\n", + "-- R2 Score 0.7838854253586233\n", + "\n", + "\n", + "KNeighborsRegressor\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 34.57731428571429\n", + "-- Mean Squared Error: 34.57731428571429\n", + "-- Root Mean Squared Error: 5.8802478081892335\n", + "-- R2 Score 0.8569902155416903\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 48.27386666666667\n", + "-- Mean Squared Error: 48.27386666666667\n", + "-- Root Mean Squared Error: 6.947939742590365\n", + "-- R2 Score 0.7602780756842187\n", + "\n", + "\n", + "DecisionTreeRegressor\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 0.002857142857142857\n", + "-- Mean Squared Error: 0.002857142857142857\n", + "-- Root Mean Squared Error: 0.05345224838248488\n", + "-- R2 Score 0.999988183021365\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 54.67333333333333\n", + "-- Mean Squared Error: 54.67333333333333\n", + "-- Root Mean Squared Error: 7.394141825346153\n", + "-- R2 Score 0.7284991325446313\n", + "\n", + "\n", + "AdaBoostRegressor\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 35.601696238383866\n", + "-- Mean Squared Error: 35.601696238383866\n", + "-- Root Mean Squared Error: 5.96671569947688\n", + "-- R2 Score 0.8527534306646536\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 41.20842698518752\n", + "-- Mean Squared Error: 41.20842698518752\n", + "-- Root Mean Squared Error: 6.419379018658076\n", + "-- R2 Score 0.7953641566952268\n", + "\n", + "\n", + "RandomForestRegressor\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 5.512448115267249\n", + "-- Mean Squared Error: 5.512448115267249\n", + "-- Root Mean Squared Error: 2.3478603270355007\n", + "-- R2 Score 0.9772008314385572\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 36.26516815006047\n", + "-- Mean Squared Error: 36.26516815006047\n", + "-- Root Mean Squared Error: 6.02205680395498\n", + "-- R2 Score 0.8199117556793764\n", + "\n", + "\n", + "CatBoostRegressor\n", + "Model performance on train data\n", + "\n", + "-- Mean Absolute Error: 7.617468052163866\n", + "-- Mean Squared Error: 7.617468052163866\n", + "-- Root Mean Squared Error: 2.759976096302985\n", + "-- R2 Score 0.9684945899714342\n", + "==============================================================\n", + "\n", + "Model Perfromance on test data\n", + "\n", + "-- Mean Absolute Error: 33.10743895683533\n", + "-- Mean Squared Error: 33.10743895683533\n", + "-- Root Mean Squared Error: 5.753906408418139\n", + "-- R2 Score 0.8355926399950043\n", + "\n", + "\n" + ] + } + ], + "source": [ + "models = {\n", + " \"LinearRegression\":LinearRegression(),\n", + " \"Ridge\":Ridge(),\n", + " \"Lasso\":Lasso(),\n", + " \"KNeighborsRegressor\":KNeighborsRegressor(),\n", + " \"DecisionTreeRegressor\":DecisionTreeRegressor(),\n", + " \"AdaBoostRegressor\":AdaBoostRegressor(),\n", + " \"RandomForestRegressor\":RandomForestRegressor(),\n", + " \"CatBoostRegressor\":CatBoostRegressor(verbose=False),\n", + "}\n", + "\n", + "model_list = []\n", + "r2_list = []\n", + "\n", + "for i in range(len(list(models))):\n", + " model = list(models.values())[i]\n", + " model.fit(X_train,y_train)\n", + "\n", + " ## make predictions\n", + "\n", + " y_train_pred = model.predict(X_train)\n", + " y_test_pred = model.predict(X_test)\n", + "\n", + " ## evaluate models\n", + "\n", + " train_mae, train_mse, train_rmse, train_r2 = evaluate_model(y_train, y_train_pred)\n", + "\n", + " test_mae, test_mse, test_rmse, test_r2 = evaluate_model(y_test,y_test_pred)\n", + "\n", + "\n", + " print(list(models.keys())[i])\n", + " model_list.append(list(models.keys())[i])\n", + "\n", + " print(\"Model performance on train data\\n\")\n", + "\n", + " print(f\"-- Mean Absolute Error: {train_mae}\")\n", + " print(f\"-- Mean Squared Error: {train_mse}\")\n", + " print(f\"-- Root Mean Squared Error: {train_rmse}\")\n", + " print(f\"-- R2 Score {train_r2}\")\n", + "\n", + "\n", + " print(\"==============================================================\\n\")\n", + "\n", + " print(f\"Model Perfromance on test data\\n\")\n", + " print(f\"-- Mean Absolute Error: {test_mae}\")\n", + " print(f\"-- Mean Squared Error: {test_mse}\")\n", + " print(f\"-- Root Mean Squared Error: {test_rmse}\")\n", + " print(f\"-- R2 Score {test_r2}\\n\\n\")\n", + "\n", + " r2_list.append(test_r2)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## RESULTS" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Model NameR2_Score
1Ridge0.847226
0LinearRegression0.845791
7CatBoostRegressor0.835593
6RandomForestRegressor0.819912
5AdaBoostRegressor0.795364
2Lasso0.783885
3KNeighborsRegressor0.760278
4DecisionTreeRegressor0.728499
\n", + "
" + ], + "text/plain": [ + " Model Name R2_Score\n", + "1 Ridge 0.847226\n", + "0 LinearRegression 0.845791\n", + "7 CatBoostRegressor 0.835593\n", + "6 RandomForestRegressor 0.819912\n", + "5 AdaBoostRegressor 0.795364\n", + "2 Lasso 0.783885\n", + "3 KNeighborsRegressor 0.760278\n", + "4 DecisionTreeRegressor 0.728499" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(list(zip(model_list,r2_list)),columns=['Model Name','R2_Score']).sort_values(by='R2_Score',ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score (fit of the model): 84.58\n" + ] + } + ], + "source": [ + "lin_model = LinearRegression(fit_intercept=True)\n", + "lin_model.fit(X_train,y_train)\n", + "y_pred = lin_model.predict(X_test)\n", + "score = r2_score(y_test,y_pred)*100\n", + "print(f\"R2 score (fit of the model): {'%.2f'%score}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## plot y_pred and y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Text(0.5, 0, 'Actual'), Text(0, 0.5, 'predicted'))" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(y_test,y_pred)\n", + "plt.xlabel(\"Actual\"),plt.ylabel(\"predicted\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.regplot(x=y_test,y=y_pred,color='g')" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Actual ValuePredicted ValueDifferenec
16978.187500-9.187500
29084.3437505.656250
57172.765625-1.765625
86466.015625-2.015625
105858.406250-0.406250
............
9925566.125000-11.125000
9936262.968750-0.968750
9966258.7968753.203125
9986866.8281251.171875
9997776.3437500.656250
\n", + "

300 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Actual Value Predicted Value Differenec\n", + "1 69 78.187500 -9.187500\n", + "2 90 84.343750 5.656250\n", + "5 71 72.765625 -1.765625\n", + "8 64 66.015625 -2.015625\n", + "10 58 58.406250 -0.406250\n", + ".. ... ... ...\n", + "992 55 66.125000 -11.125000\n", + "993 62 62.968750 -0.968750\n", + "996 62 58.796875 3.203125\n", + "998 68 66.828125 1.171875\n", + "999 77 76.343750 0.656250\n", + "\n", + "[300 rows x 3 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame({'Actual Value':y_test,\n", + " \"Predicted Value\":y_pred,\n", + " \"Differenec\":y_test-y_pred}).sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}