diff --git "a/P1M2_Allen.ipynb" "b/P1M2_Allen.ipynb" new file mode 100644--- /dev/null +++ "b/P1M2_Allen.ipynb" @@ -0,0 +1,4551 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bab 1: Introduction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Name: Allen\n", + "\n", + "Batch: FTDS_BSD_003\n", + "\n", + "Dataset: `Hotel Reservation Datasets`\n", + "\n", + "Problem Statement:\n", + "The Hotel Reservation Project is to explore customer activity related to their booking and reservation status. The main or the output of this project is aimed to predict whether the customer will cancel their booking or not. The online hotel reservation have developed their booking process but they have also brought some challanges like the typical reasons for cancellations include change of plans, scheduling conflicts, etc. This is often made easier by the option to do so free of charge or preferably at a low cost which is beneficial to hotel guests but it is a less desirable and possibly revenue-diminishing factor for hotels to deal with. This modeling is useful for hotels to manage a problem like this and to reduce and minimalize the business loss. The dataset includes various features such as the number of adults and children, lead time, room type, and more. So at last the target feature in this dataset `booking_status`, which has two categories: \"Not_Canceled\" and \"Canceled.\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bab 2: Import Library" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "# Import Library\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pickle\n", + "import phik\n", + "from phik import resources, report\n", + "pd.set_option('display.max_columns', None)\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn import preprocessing\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.compose import ColumnTransformer\n", + "from scipy import stats\n", + "import seaborn as sns\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "from sklearn.svm import SVC\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import classification_report,ConfusionMatrixDisplay, precision_score,recall_score,accuracy_score,f1_score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bab 3: Data Loading" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Booking_ID | \n", + "no_of_adults | \n", + "no_of_children | \n", + "no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "type_of_meal_plan | \n", + "required_car_parking_space | \n", + "room_type_reserved | \n", + "lead_time | \n", + "arrival_year | \n", + "arrival_month | \n", + "arrival_date | \n", + "market_segment_type | \n", + "repeated_guest | \n", + "no_of_previous_cancellations | \n", + "no_of_previous_bookings_not_canceled | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "booking_status | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "INN00001 | \n", + "2 | \n", + "0 | \n", + "1 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "Room_Type 1 | \n", + "224 | \n", + "2017 | \n", + "10 | \n", + "2 | \n", + "Offline | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "65.00 | \n", + "0 | \n", + "Not_Canceled | \n", + "
1 | \n", + "INN00002 | \n", + "2 | \n", + "0 | \n", + "2 | \n", + "3 | \n", + "Not Selected | \n", + "0 | \n", + "Room_Type 1 | \n", + "5 | \n", + "2018 | \n", + "11 | \n", + "6 | \n", + "Online | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "106.68 | \n", + "1 | \n", + "Not_Canceled | \n", + "
2 | \n", + "INN00003 | \n", + "1 | \n", + "0 | \n", + "2 | \n", + "1 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "Room_Type 1 | \n", + "1 | \n", + "2018 | \n", + "2 | \n", + "28 | \n", + "Online | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "60.00 | \n", + "0 | \n", + "Canceled | \n", + "
3 | \n", + "INN00004 | \n", + "2 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "Room_Type 1 | \n", + "211 | \n", + "2018 | \n", + "5 | \n", + "20 | \n", + "Online | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "100.00 | \n", + "0 | \n", + "Canceled | \n", + "
4 | \n", + "INN00005 | \n", + "2 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "Not Selected | \n", + "0 | \n", + "Room_Type 1 | \n", + "48 | \n", + "2018 | \n", + "4 | \n", + "11 | \n", + "Online | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "94.50 | \n", + "0 | \n", + "Canceled | \n", + "
\n", + " | no_of_adults | \n", + "no_of_children | \n", + "no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "required_car_parking_space | \n", + "lead_time | \n", + "arrival_year | \n", + "arrival_month | \n", + "arrival_date | \n", + "repeated_guest | \n", + "no_of_previous_cancellations | \n", + "no_of_previous_bookings_not_canceled | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "36275.000000 | \n", + "
mean | \n", + "1.844962 | \n", + "0.105279 | \n", + "0.810724 | \n", + "2.204300 | \n", + "0.030986 | \n", + "85.232557 | \n", + "2017.820427 | \n", + "7.423653 | \n", + "15.596995 | \n", + "0.025637 | \n", + "0.023349 | \n", + "0.153411 | \n", + "103.423539 | \n", + "0.619655 | \n", + "
std | \n", + "0.518715 | \n", + "0.402648 | \n", + "0.870644 | \n", + "1.410905 | \n", + "0.173281 | \n", + "85.930817 | \n", + "0.383836 | \n", + "3.069894 | \n", + "8.740447 | \n", + "0.158053 | \n", + "0.368331 | \n", + "1.754171 | \n", + "35.089424 | \n", + "0.786236 | \n", + "
min | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "2017.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
25% | \n", + "2.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "17.000000 | \n", + "2018.000000 | \n", + "5.000000 | \n", + "8.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "80.300000 | \n", + "0.000000 | \n", + "
50% | \n", + "2.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "2.000000 | \n", + "0.000000 | \n", + "57.000000 | \n", + "2018.000000 | \n", + "8.000000 | \n", + "16.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "99.450000 | \n", + "0.000000 | \n", + "
75% | \n", + "2.000000 | \n", + "0.000000 | \n", + "2.000000 | \n", + "3.000000 | \n", + "0.000000 | \n", + "126.000000 | \n", + "2018.000000 | \n", + "10.000000 | \n", + "23.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "120.000000 | \n", + "1.000000 | \n", + "
max | \n", + "4.000000 | \n", + "10.000000 | \n", + "7.000000 | \n", + "17.000000 | \n", + "1.000000 | \n", + "443.000000 | \n", + "2018.000000 | \n", + "12.000000 | \n", + "31.000000 | \n", + "1.000000 | \n", + "13.000000 | \n", + "58.000000 | \n", + "540.000000 | \n", + "5.000000 | \n", + "
\n", + " | count | \n", + "unique | \n", + "top | \n", + "freq | \n", + "
---|---|---|---|---|
Booking_ID | \n", + "36275 | \n", + "36275 | \n", + "INN00001 | \n", + "1 | \n", + "
type_of_meal_plan | \n", + "36275 | \n", + "4 | \n", + "Meal Plan 1 | \n", + "27835 | \n", + "
room_type_reserved | \n", + "36275 | \n", + "7 | \n", + "Room_Type 1 | \n", + "28130 | \n", + "
market_segment_type | \n", + "36275 | \n", + "5 | \n", + "Online | \n", + "23214 | \n", + "
booking_status | \n", + "36275 | \n", + "2 | \n", + "Not_Canceled | \n", + "24390 | \n", + "
\n", + " | no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "type_of_meal_plan | \n", + "required_car_parking_space | \n", + "lead_time | \n", + "arrival_month | \n", + "market_segment_type | \n", + "repeated_guest | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "224 | \n", + "10 | \n", + "Offline | \n", + "0 | \n", + "65.00 | \n", + "0 | \n", + "
1 | \n", + "2 | \n", + "3 | \n", + "Not Selected | \n", + "0 | \n", + "5 | \n", + "11 | \n", + "Online | \n", + "0 | \n", + "106.68 | \n", + "1 | \n", + "
2 | \n", + "2 | \n", + "1 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "1 | \n", + "2 | \n", + "Online | \n", + "0 | \n", + "60.00 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "211 | \n", + "5 | \n", + "Online | \n", + "0 | \n", + "100.00 | \n", + "0 | \n", + "
4 | \n", + "1 | \n", + "1 | \n", + "Not Selected | \n", + "0 | \n", + "48 | \n", + "4 | \n", + "Online | \n", + "0 | \n", + "94.50 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
36270 | \n", + "2 | \n", + "6 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "85 | \n", + "8 | \n", + "Online | \n", + "0 | \n", + "167.80 | \n", + "1 | \n", + "
36271 | \n", + "1 | \n", + "3 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "228 | \n", + "10 | \n", + "Online | \n", + "0 | \n", + "90.95 | \n", + "2 | \n", + "
36272 | \n", + "2 | \n", + "6 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "148 | \n", + "7 | \n", + "Online | \n", + "0 | \n", + "98.39 | \n", + "2 | \n", + "
36273 | \n", + "0 | \n", + "3 | \n", + "Not Selected | \n", + "0 | \n", + "63 | \n", + "4 | \n", + "Online | \n", + "0 | \n", + "94.50 | \n", + "0 | \n", + "
36274 | \n", + "1 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "207 | \n", + "12 | \n", + "Offline | \n", + "0 | \n", + "161.67 | \n", + "0 | \n", + "
36275 rows × 10 columns
\n", + "\n", + " | no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "type_of_meal_plan | \n", + "required_car_parking_space | \n", + "lead_time | \n", + "arrival_month | \n", + "market_segment_type | \n", + "repeated_guest | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
18478 | \n", + "0 | \n", + "3 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "20 | \n", + "9 | \n", + "Online | \n", + "0 | \n", + "136.67 | \n", + "2 | \n", + "
11575 | \n", + "1 | \n", + "4 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "11 | \n", + "7 | \n", + "Offline | \n", + "0 | \n", + "85.00 | \n", + "0 | \n", + "
36108 | \n", + "2 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "24 | \n", + "12 | \n", + "Online | \n", + "0 | \n", + "95.20 | \n", + "1 | \n", + "
23151 | \n", + "0 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "188 | \n", + "6 | \n", + "Offline | \n", + "0 | \n", + "130.00 | \n", + "0 | \n", + "
19377 | \n", + "1 | \n", + "0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "286 | \n", + "10 | \n", + "Offline | \n", + "0 | \n", + "90.00 | \n", + "0 | \n", + "
\n", + " | no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "type_of_meal_plan | \n", + "required_car_parking_space | \n", + "lead_time | \n", + "arrival_month | \n", + "market_segment_type | \n", + "repeated_guest | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
3052 | \n", + "2 | \n", + "3 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "241 | \n", + "10 | \n", + "Online | \n", + "0 | \n", + "150.45 | \n", + "1 | \n", + "
1124 | \n", + "0 | \n", + "3 | \n", + "Not Selected | \n", + "0 | \n", + "15 | \n", + "4 | \n", + "Online | \n", + "0 | \n", + "117.67 | \n", + "0 | \n", + "
4052 | \n", + "1 | \n", + "2 | \n", + "Not Selected | \n", + "0 | \n", + "13 | \n", + "10 | \n", + "Online | \n", + "0 | \n", + "140.00 | \n", + "2 | \n", + "
29035 | \n", + "0 | \n", + "2 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "4 | \n", + "12 | \n", + "Online | \n", + "0 | \n", + "62.37 | \n", + "0 | \n", + "
22362 | \n", + "2 | \n", + "9 | \n", + "Meal Plan 2 | \n", + "0 | \n", + "5 | \n", + "2 | \n", + "Online | \n", + "0 | \n", + "146.00 | \n", + "0 | \n", + "
\n", + " | no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "type_of_meal_plan | \n", + "required_car_parking_space | \n", + "lead_time | \n", + "arrival_month | \n", + "market_segment_type | \n", + "repeated_guest | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
18478 | \n", + "0.0 | \n", + "3.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "20.0 | \n", + "9.0 | \n", + "Online | \n", + "0.0 | \n", + "136.67 | \n", + "2.0 | \n", + "
11575 | \n", + "1.0 | \n", + "4.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "11.0 | \n", + "7.0 | \n", + "Offline | \n", + "0.0 | \n", + "85.00 | \n", + "0.0 | \n", + "
36108 | \n", + "2.0 | \n", + "2.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "24.0 | \n", + "12.0 | \n", + "Online | \n", + "0.0 | \n", + "95.20 | \n", + "1.0 | \n", + "
23151 | \n", + "0.0 | \n", + "2.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "188.0 | \n", + "6.0 | \n", + "Offline | \n", + "0.0 | \n", + "130.00 | \n", + "0.0 | \n", + "
19377 | \n", + "1.0 | \n", + "0.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "286.0 | \n", + "10.0 | \n", + "Offline | \n", + "0.0 | \n", + "90.00 | \n", + "0.0 | \n", + "
\n", + " | no_of_weekend_nights | \n", + "no_of_week_nights | \n", + "type_of_meal_plan | \n", + "required_car_parking_space | \n", + "lead_time | \n", + "arrival_month | \n", + "market_segment_type | \n", + "repeated_guest | \n", + "avg_price_per_room | \n", + "no_of_special_requests | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
3052 | \n", + "2.0 | \n", + "3.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "241.0 | \n", + "10.0 | \n", + "Online | \n", + "0.0 | \n", + "150.45 | \n", + "1.0 | \n", + "
1124 | \n", + "0.0 | \n", + "3.0 | \n", + "Not Selected | \n", + "0 | \n", + "15.0 | \n", + "4.0 | \n", + "Online | \n", + "0.0 | \n", + "117.67 | \n", + "0.0 | \n", + "
4052 | \n", + "1.0 | \n", + "2.0 | \n", + "Not Selected | \n", + "0 | \n", + "13.0 | \n", + "10.0 | \n", + "Online | \n", + "0.0 | \n", + "140.00 | \n", + "2.0 | \n", + "
29035 | \n", + "0.0 | \n", + "2.0 | \n", + "Meal Plan 1 | \n", + "0 | \n", + "4.0 | \n", + "12.0 | \n", + "Online | \n", + "0.0 | \n", + "62.37 | \n", + "0.0 | \n", + "
22362 | \n", + "2.0 | \n", + "6.0 | \n", + "Meal Plan 2 | \n", + "0 | \n", + "5.0 | \n", + "2.0 | \n", + "Online | \n", + "0.0 | \n", + "146.00 | \n", + "0.0 | \n", + "
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('svm', SVC())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('svm', SVC())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
SVC()
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('knn', KNeighborsClassifier())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('knn', KNeighborsClassifier())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
KNeighborsClassifier()
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('dt', DecisionTreeClassifier())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('dt', DecisionTreeClassifier())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
DecisionTreeClassifier()
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
RandomForestClassifier()
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('Ada', AdaBoostClassifier())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('Ada', AdaBoostClassifier())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
AdaBoostClassifier()
\n", + " | Baseline (Default Hyperparameter) | \n", + "
---|---|
test - accuracy_score | \n", + "0.896808 | \n", + "
test - f1_score | \n", + "0.924257 | \n", + "
test - precision | \n", + "0.907023 | \n", + "
test - recall | \n", + "0.942157 | \n", + "
train - accuracy | \n", + "0.992796 | \n", + "
train - f1_score | \n", + "0.994659 | \n", + "
train - precision | \n", + "0.992603 | \n", + "
train - recall | \n", + "0.996723 | \n", + "
RandomizedSearchCV(cv=5,\n", + " estimator=Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier())]),\n", + " param_distributions={'rf__max_depth': [1, 2, 3, 4, 5],\n", + " 'rf__n_estimators': [1, 10, 100],\n", + " 'rf__random_state': [42]},\n", + " random_state=42, scoring='f1')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=5,\n", + " estimator=Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier())]),\n", + " param_distributions={'rf__max_depth': [1, 2, 3, 4, 5],\n", + " 'rf__n_estimators': [1, 10, 100],\n", + " 'rf__random_state': [42]},\n", + " random_state=42, scoring='f1')
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier())])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
RandomForestClassifier()
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier(max_depth=5, random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('preprocessing_pipeline',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler',\n", + " StandardScaler())]),\n", + " ['no_of_week_nights',\n", + " 'lead_time',\n", + " 'avg_price_per_room',\n", + " 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder',\n", + " OneHotEncoder())]),\n", + " ['market_segment_type',\n", + " 'type_of_meal_plan'])])),\n", + " ('rf', RandomForestClassifier(max_depth=5, random_state=42))])
ColumnTransformer(remainder='passthrough',\n", + " transformers=[('pipe_num',\n", + " Pipeline(steps=[('scaler', StandardScaler())]),\n", + " ['no_of_week_nights', 'lead_time',\n", + " 'avg_price_per_room', 'arrival_month']),\n", + " ('pipe_cat',\n", + " Pipeline(steps=[('encoder', OneHotEncoder())]),\n", + " ['market_segment_type', 'type_of_meal_plan'])])
['no_of_week_nights', 'lead_time', 'avg_price_per_room', 'arrival_month']
StandardScaler()
['market_segment_type', 'type_of_meal_plan']
OneHotEncoder()
['no_of_weekend_nights', 'required_car_parking_space', 'repeated_guest', 'no_of_special_requests']
passthrough
RandomForestClassifier(max_depth=5, random_state=42)
\n", + " | Baseline (Default Hyperparameter) | \n", + "Random Search | \n", + "
---|---|---|
train - precision | \n", + "0.992603 | \n", + "0.803865 | \n", + "
train - recall | \n", + "0.996723 | \n", + "0.958763 | \n", + "
train - accuracy | \n", + "0.992796 | \n", + "0.814820 | \n", + "
train - f1_score | \n", + "0.994659 | \n", + "0.874508 | \n", + "
test - precision | \n", + "0.907023 | \n", + "0.797417 | \n", + "
test - recall | \n", + "0.942157 | \n", + "0.954490 | \n", + "
test - accuracy_score | \n", + "0.896808 | \n", + "0.807545 | \n", + "
test - f1_score | \n", + "0.924257 | \n", + "0.868912 | \n", + "