diff --git "a/air_pollution_xgb (1)-Copy1 (1).ipynb" "b/air_pollution_xgb (1)-Copy1 (1).ipynb"
new file mode 100644--- /dev/null
+++ "b/air_pollution_xgb (1)-Copy1 (1).ipynb"
@@ -0,0 +1,4897 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
+ " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
+ "C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
+ " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
+ "C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
+ " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# learning curve, fitting,feature importance. rmse,mse, etc..\n",
+ "#import pmdarima as pm\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "# https://www.kaggle.com/code/rtatman/lightgbm-hyperparameter-optimisation-lb-0-761/notebook\n",
+ "import xgboost as xgb\n",
+ "import lightgbm as lgb\n",
+ "import numpy as np\n",
+ "pd.set_option('display.max_rows', 500)\n",
+ "pd.set_option('display.max_columns', 500)\n",
+ "pd.set_option('display.width', 1000) \n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " CO2 | \n",
+ " PM10 | \n",
+ " SO2 | \n",
+ " H2S | \n",
+ " NO | \n",
+ " NOX | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ " CO | \n",
+ " THC | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5 | \n",
+ " 100 | \n",
+ " 380 | \n",
+ " 101.000000 | \n",
+ " 0.001000 | \n",
+ " 0.003 | \n",
+ " 0.005 | \n",
+ " 0.023 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ " 0.58 | \n",
+ " 2.170000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3 | \n",
+ " 89 | \n",
+ " 410 | \n",
+ " 101.000000 | \n",
+ " 0.003000 | \n",
+ " 0.017 | \n",
+ " 0.065 | \n",
+ " 0.151 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ " 1.74 | \n",
+ " 2.830000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2 | \n",
+ " 88 | \n",
+ " 392 | \n",
+ " 100.000000 | \n",
+ " 0.002000 | \n",
+ " 0.004 | \n",
+ " 0.019 | \n",
+ " 0.072 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ " 0.95 | \n",
+ " 3.770000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2 | \n",
+ " 92 | \n",
+ " 403 | \n",
+ " 92.000000 | \n",
+ " 0.002000 | \n",
+ " 0.014 | \n",
+ " 0.051 | \n",
+ " 0.123 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ " 1.43 | \n",
+ " 2.720000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2 | \n",
+ " 94 | \n",
+ " 400 | \n",
+ " 69.000000 | \n",
+ " 0.002000 | \n",
+ " 0.018 | \n",
+ " 0.041 | \n",
+ " 0.105 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ " 1.32 | \n",
+ " 2.520000 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 22774 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 76 | \n",
+ " 404 | \n",
+ " 344.238675 | \n",
+ " 0.006386 | \n",
+ " 0.008 | \n",
+ " 0.014 | \n",
+ " 0.081 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ " 2.73 | \n",
+ " 1.464756 | \n",
+ "
\n",
+ " \n",
+ " 22775 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9 | \n",
+ " 75 | \n",
+ " 410 | \n",
+ " 281.815817 | \n",
+ " 0.006301 | \n",
+ " 0.009 | \n",
+ " 0.023 | \n",
+ " 0.100 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ " 2.81 | \n",
+ " 1.903803 | \n",
+ "
\n",
+ " \n",
+ " 22776 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10 | \n",
+ " 79 | \n",
+ " 424 | \n",
+ " 51.079508 | \n",
+ " 0.006520 | \n",
+ " 0.009 | \n",
+ " 0.064 | \n",
+ " 0.164 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ " 2.96 | \n",
+ " 2.039618 | \n",
+ "
\n",
+ " \n",
+ " 22777 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 81 | \n",
+ " 428 | \n",
+ " 120.974071 | \n",
+ " 0.006437 | \n",
+ " 0.009 | \n",
+ " 0.077 | \n",
+ " 0.182 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ " 3.25 | \n",
+ " 1.754740 | \n",
+ "
\n",
+ " \n",
+ " 22778 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10 | \n",
+ " 85 | \n",
+ " 425 | \n",
+ " 50.016188 | \n",
+ " 0.006457 | \n",
+ " 0.009 | \n",
+ " 0.050 | \n",
+ " 0.137 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ " 3.45 | \n",
+ " 1.818111 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
22779 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour CO2 PM10 SO2 H2S NO NOX NO2 O3 CO THC\n",
+ "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 380 101.000000 0.001000 0.003 0.005 0.023 0.017000 2.600000e-02 0.58 2.170000\n",
+ "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 410 101.000000 0.003000 0.017 0.065 0.151 0.086000 1.000000e-03 1.74 2.830000\n",
+ "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 392 100.000000 0.002000 0.004 0.019 0.072 0.053000 2.000000e-03 0.95 3.770000\n",
+ "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 403 92.000000 0.002000 0.014 0.051 0.123 0.072000 1.000000e-03 1.43 2.720000\n",
+ "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 400 69.000000 0.002000 0.018 0.041 0.105 0.063000 1.000000e-03 1.32 2.520000\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 404 344.238675 0.006386 0.008 0.014 0.081 0.076199 1.747050e-03 2.73 1.464756\n",
+ "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 410 281.815817 0.006301 0.009 0.023 0.100 0.098153 1.108100e-04 2.81 1.903803\n",
+ "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 424 51.079508 0.006520 0.009 0.064 0.164 0.106758 8.930000e-06 2.96 2.039618\n",
+ "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 428 120.974071 0.006437 0.009 0.077 0.182 0.087641 1.000000e-08 3.25 1.754740\n",
+ "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 425 50.016188 0.006457 0.009 0.050 0.137 0.089011 0.000000e+00 3.45 1.818111\n",
+ "\n",
+ "[22779 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_excel('Interpolation_Average_2013_2015.xlsx',sheet_name='Sheet2')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'CO2', 'PM10', 'SO2', 'H2S', 'NO', 'NOX', 'NO2', 'O3', 'CO', 'THC'], dtype='object')"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cols = ['MeasurementDateTime','WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour','NO2', 'O3'\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5 | \n",
+ " 100 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3 | \n",
+ " 89 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2 | \n",
+ " 88 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2 | \n",
+ " 92 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2 | \n",
+ " 94 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 22774 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 76 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ "
\n",
+ " \n",
+ " 22775 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9 | \n",
+ " 75 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ "
\n",
+ " \n",
+ " 22776 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10 | \n",
+ " 79 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ "
\n",
+ " \n",
+ " 22777 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 81 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ "
\n",
+ " \n",
+ " 22778 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10 | \n",
+ " 85 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
22779 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
+ "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02\n",
+ "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03\n",
+ "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03\n",
+ "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03\n",
+ "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03\n",
+ "... ... ... ... ... ... ... ... ...\n",
+ "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03\n",
+ "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04\n",
+ "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06\n",
+ "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08\n",
+ "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00\n",
+ "\n",
+ "[22779 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = df[cols]\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 22779 entries, 0 to 22778\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 MeasurementDateTime 22779 non-null datetime64[ns]\n",
+ " 1 WD-Hour 22779 non-null int64 \n",
+ " 2 WS-Hour 22779 non-null float64 \n",
+ " 3 Temp-Hour 22779 non-null float64 \n",
+ " 4 SR-Hour 22779 non-null int64 \n",
+ " 5 RH-Hour 22779 non-null int64 \n",
+ " 6 NO2 22779 non-null float64 \n",
+ " 7 O3 22779 non-null float64 \n",
+ "dtypes: datetime64[ns](1), float64(4), int64(3)\n",
+ "memory usage: 1.4 MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5 | \n",
+ " 100 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3 | \n",
+ " 89 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2 | \n",
+ " 88 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2 | \n",
+ " 92 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2 | \n",
+ " 94 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 22774 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 76 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ "
\n",
+ " \n",
+ " 22775 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9 | \n",
+ " 75 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ "
\n",
+ " \n",
+ " 22776 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10 | \n",
+ " 79 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ "
\n",
+ " \n",
+ " 22777 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 81 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ "
\n",
+ " \n",
+ " 22778 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10 | \n",
+ " 85 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
22779 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
+ "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02\n",
+ "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03\n",
+ "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03\n",
+ "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03\n",
+ "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03\n",
+ "... ... ... ... ... ... ... ... ...\n",
+ "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03\n",
+ "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04\n",
+ "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06\n",
+ "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08\n",
+ "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00\n",
+ "\n",
+ "[22779 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['MeasurementDateTime'] = pd.to_datetime(df['MeasurementDateTime'], errors='coerce')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ " date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5 | \n",
+ " 100 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3 | \n",
+ " 89 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2 | \n",
+ " 88 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2 | \n",
+ " 92 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2 | \n",
+ " 94 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 22774 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 76 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ "
\n",
+ " \n",
+ " 22775 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9 | \n",
+ " 75 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ "
\n",
+ " \n",
+ " 22776 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10 | \n",
+ " 79 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ "
\n",
+ " \n",
+ " 22777 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 81 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ "
\n",
+ " \n",
+ " 22778 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10 | \n",
+ " 85 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
22779 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 date\n",
+ "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02 2013-01-27 00:00:00\n",
+ "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03 2013-01-27 01:00:00\n",
+ "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03 2013-01-27 02:00:00\n",
+ "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03 2013-01-27 03:00:00\n",
+ "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03 2013-01-27 04:00:00\n",
+ "... ... ... ... ... ... ... ... ... ...\n",
+ "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03 2015-12-31 18:00:00\n",
+ "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04 2015-12-31 19:00:00\n",
+ "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06 2015-12-31 20:00:00\n",
+ "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08 2015-12-31 21:00:00\n",
+ "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00 2015-12-31 22:00:00\n",
+ "\n",
+ "[22779 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['date'] = df['MeasurementDateTime']\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ " date | \n",
+ " dayofweek | \n",
+ " quarter | \n",
+ " month | \n",
+ " year | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " datehour | \n",
+ " weekofyear | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5 | \n",
+ " 100 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " 27 | \n",
+ " 27 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3 | \n",
+ " 89 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " 27 | \n",
+ " 27 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2 | \n",
+ " 88 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " 27 | \n",
+ " 27 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2 | \n",
+ " 92 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " 27 | \n",
+ " 27 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2 | \n",
+ " 94 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " 27 | \n",
+ " 27 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 22774 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 76 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 2015 | \n",
+ " 365 | \n",
+ " 31 | \n",
+ " 18 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " 22775 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9 | \n",
+ " 75 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 2015 | \n",
+ " 365 | \n",
+ " 31 | \n",
+ " 19 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " 22776 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10 | \n",
+ " 79 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 2015 | \n",
+ " 365 | \n",
+ " 31 | \n",
+ " 20 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " 22777 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10 | \n",
+ " 81 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 2015 | \n",
+ " 365 | \n",
+ " 31 | \n",
+ " 21 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " 22778 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10 | \n",
+ " 85 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 2015 | \n",
+ " 365 | \n",
+ " 31 | \n",
+ " 22 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
22779 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 date dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n",
+ "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02 2013-01-27 00:00:00 6 1 1 2013 27 27 0 4\n",
+ "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03 2013-01-27 01:00:00 6 1 1 2013 27 27 1 4\n",
+ "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03 2013-01-27 02:00:00 6 1 1 2013 27 27 2 4\n",
+ "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03 2013-01-27 03:00:00 6 1 1 2013 27 27 3 4\n",
+ "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03 2013-01-27 04:00:00 6 1 1 2013 27 27 4 4\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03 2015-12-31 18:00:00 3 4 12 2015 365 31 18 53\n",
+ "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04 2015-12-31 19:00:00 3 4 12 2015 365 31 19 53\n",
+ "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06 2015-12-31 20:00:00 3 4 12 2015 365 31 20 53\n",
+ "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08 2015-12-31 21:00:00 3 4 12 2015 365 31 21 53\n",
+ "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00 2015-12-31 22:00:00 3 4 12 2015 365 31 22 53\n",
+ "\n",
+ "[22779 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from datetime import datetime\n",
+ "#df['MeasurementDateTime'] = df.index\n",
+ "df['dayofweek'] = df['date'].dt.dayofweek\n",
+ "df['quarter'] = df['date'].dt.quarter\n",
+ "df['month'] = df['date'].dt.month\n",
+ "df['year'] = df['date'].dt.year\n",
+ "df['dayofyear'] = df['date'].dt.dayofyear\n",
+ "df['dayofmonth'] = df['date'].dt.day\n",
+ "df['datehour'] = df['date'].dt.hour\n",
+ "df['weekofyear'] = df['date'].dt.weekofyear\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3', 'date', 'dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear'], dtype='object')"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ " dayofweek | \n",
+ " quarter | \n",
+ " month | \n",
+ " year | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " datehour | \n",
+ " weekofyear | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 1.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 18.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 19.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 20.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 21.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 22.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n",
+ "date \n",
+ "2013-01-27 00:00:00 2013-01-27 00:00:00 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0\n",
+ "2013-01-27 01:00:00 2013-01-27 01:00:00 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0\n",
+ "2013-01-27 02:00:00 2013-01-27 02:00:00 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0\n",
+ "2013-01-27 03:00:00 2013-01-27 03:00:00 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0\n",
+ "2013-01-27 04:00:00 2013-01-27 04:00:00 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 2015-12-31 18:00:00 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0\n",
+ "2015-12-31 19:00:00 2015-12-31 19:00:00 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0\n",
+ "2015-12-31 20:00:00 2015-12-31 20:00:00 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0\n",
+ "2015-12-31 21:00:00 2015-12-31 21:00:00 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0\n",
+ "2015-12-31 22:00:00 2015-12-31 22:00:00 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0\n",
+ "\n",
+ "[25655 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = df.set_index('date').asfreq('h')\n",
+ "df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MeasurementDateTime 2876\n",
+ "WD-Hour 2876\n",
+ "WS-Hour 2876\n",
+ "Temp-Hour 2876\n",
+ "SR-Hour 2876\n",
+ "RH-Hour 2876\n",
+ "NO2 2876\n",
+ "O3 2876\n",
+ "dayofweek 2876\n",
+ "quarter 2876\n",
+ "month 2876\n",
+ "year 2876\n",
+ "dayofyear 2876\n",
+ "dayofmonth 2876\n",
+ "datehour 2876\n",
+ "weekofyear 2876\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MeasurementDateTime | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ " dayofweek | \n",
+ " quarter | \n",
+ " month | \n",
+ " year | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " datehour | \n",
+ " weekofyear | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 1.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 18.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 19.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 20.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 21.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 22.0 | \n",
+ " 53.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n",
+ "date \n",
+ "2013-01-27 00:00:00 2013-01-27 00:00:00 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0\n",
+ "2013-01-27 01:00:00 2013-01-27 01:00:00 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0\n",
+ "2013-01-27 02:00:00 2013-01-27 02:00:00 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0\n",
+ "2013-01-27 03:00:00 2013-01-27 03:00:00 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0\n",
+ "2013-01-27 04:00:00 2013-01-27 04:00:00 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 2015-12-31 18:00:00 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0\n",
+ "2015-12-31 19:00:00 2015-12-31 19:00:00 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0\n",
+ "2015-12-31 20:00:00 2015-12-31 20:00:00 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0\n",
+ "2015-12-31 21:00:00 2015-12-31 21:00:00 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0\n",
+ "2015-12-31 22:00:00 2015-12-31 22:00:00 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0\n",
+ "\n",
+ "[25655 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1= df.interpolate(method='ffill')\n",
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dayofweek | \n",
+ " quarter | \n",
+ " month | \n",
+ " year | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " datehour | \n",
+ " weekofyear | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 1.0 | \n",
+ " 4.0 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 4.0 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 18.0 | \n",
+ " 53.0 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 19.0 | \n",
+ " 53.0 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 20.0 | \n",
+ " 53.0 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 21.0 | \n",
+ " 53.0 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 22.0 | \n",
+ " 53.0 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
+ "date \n",
+ "2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
+ "2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
+ "2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
+ "2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
+ "2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
+ "2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
+ "2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
+ "2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
+ "2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
+ "\n",
+ "[25655 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cols =['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3']\n",
+ "df1 = df1[cols]\n",
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dayofweek 0\n",
+ "quarter 0\n",
+ "month 0\n",
+ "year 0\n",
+ "dayofyear 0\n",
+ "dayofmonth 0\n",
+ "datehour 0\n",
+ "weekofyear 0\n",
+ "WD-Hour 0\n",
+ "WS-Hour 0\n",
+ "Temp-Hour 0\n",
+ "SR-Hour 0\n",
+ "RH-Hour 0\n",
+ "NO2 0\n",
+ "O3 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#cols =['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear','MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3']\n",
+ "#df = df[cols]\n",
+ "#df = df.set_index('MeasurementDateTime')\n",
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df1 = df1.drop(['year'],axis=1)\n",
+ "#df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Description | \n",
+ " Value | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Session id | \n",
+ " 123 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Target | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Target type | \n",
+ " regression | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Data shape | \n",
+ " (25655, 15) | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Train data shape | \n",
+ " (17958, 15) | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Test data shape | \n",
+ " (7697, 15) | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Numeric features | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Preprocess | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Imputation type | \n",
+ " simple | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Numeric imputation | \n",
+ " mean | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Categorical imputation | \n",
+ " constant | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Fold Generator | \n",
+ " KFold | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Fold Number | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " CPU Jobs | \n",
+ " -1 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Log Experiment | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Experiment Name | \n",
+ " reg-default-name | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " USI | \n",
+ " 9345 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from pycaret.regression import *\n",
+ "exp_reg101 = setup(data = df1, target = 'O3', session_id=123) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " MAE | \n",
+ " MSE | \n",
+ " RMSE | \n",
+ " R2 | \n",
+ " RMSLE | \n",
+ " MAPE | \n",
+ " TT (Sec) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " catboost | \n",
+ " CatBoost Regressor | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0063 | \n",
+ " 0.8809 | \n",
+ " 0.0061 | \n",
+ " 4.3849 | \n",
+ " 2.2550 | \n",
+ "
\n",
+ " \n",
+ " xgboost | \n",
+ " Extreme Gradient Boosting | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0064 | \n",
+ " 0.8787 | \n",
+ " 0.0061 | \n",
+ " 6.0682 | \n",
+ " 0.4340 | \n",
+ "
\n",
+ " \n",
+ " et | \n",
+ " Extra Trees Regressor | \n",
+ " 0.0041 | \n",
+ " 0.0000 | \n",
+ " 0.0064 | \n",
+ " 0.8777 | \n",
+ " 0.0061 | \n",
+ " 3.4533 | \n",
+ " 0.9890 | \n",
+ "
\n",
+ " \n",
+ " lightgbm | \n",
+ " Light Gradient Boosting Machine | \n",
+ " 0.0048 | \n",
+ " 0.0000 | \n",
+ " 0.0070 | \n",
+ " 0.8536 | \n",
+ " 0.0067 | \n",
+ " 9.2379 | \n",
+ " 0.2320 | \n",
+ "
\n",
+ " \n",
+ " rf | \n",
+ " Random Forest Regressor | \n",
+ " 0.0045 | \n",
+ " 0.0001 | \n",
+ " 0.0071 | \n",
+ " 0.8495 | \n",
+ " 0.0068 | \n",
+ " 3.9257 | \n",
+ " 1.6330 | \n",
+ "
\n",
+ " \n",
+ " gbr | \n",
+ " Gradient Boosting Regressor | \n",
+ " 0.0062 | \n",
+ " 0.0001 | \n",
+ " 0.0089 | \n",
+ " 0.7629 | \n",
+ " 0.0086 | \n",
+ " 15.3717 | \n",
+ " 0.5790 | \n",
+ "
\n",
+ " \n",
+ " dt | \n",
+ " Decision Tree Regressor | \n",
+ " 0.0062 | \n",
+ " 0.0001 | \n",
+ " 0.0104 | \n",
+ " 0.6787 | \n",
+ " 0.0100 | \n",
+ " 1.7337 | \n",
+ " 0.0430 | \n",
+ "
\n",
+ " \n",
+ " lr | \n",
+ " Linear Regression | \n",
+ " 0.0084 | \n",
+ " 0.0001 | \n",
+ " 0.0115 | \n",
+ " 0.6051 | \n",
+ " 0.0110 | \n",
+ " 37.2053 | \n",
+ " 0.6290 | \n",
+ "
\n",
+ " \n",
+ " lar | \n",
+ " Least Angle Regression | \n",
+ " 0.0084 | \n",
+ " 0.0001 | \n",
+ " 0.0115 | \n",
+ " 0.6051 | \n",
+ " 0.0110 | \n",
+ " 37.5542 | \n",
+ " 0.0130 | \n",
+ "
\n",
+ " \n",
+ " br | \n",
+ " Bayesian Ridge | \n",
+ " 0.0084 | \n",
+ " 0.0001 | \n",
+ " 0.0115 | \n",
+ " 0.6051 | \n",
+ " 0.0110 | \n",
+ " 37.2033 | \n",
+ " 0.0150 | \n",
+ "
\n",
+ " \n",
+ " ridge | \n",
+ " Ridge Regression | \n",
+ " 0.0084 | \n",
+ " 0.0001 | \n",
+ " 0.0115 | \n",
+ " 0.6035 | \n",
+ " 0.0110 | \n",
+ " 37.8741 | \n",
+ " 0.0140 | \n",
+ "
\n",
+ " \n",
+ " knn | \n",
+ " K Neighbors Regressor | \n",
+ " 0.0084 | \n",
+ " 0.0001 | \n",
+ " 0.0118 | \n",
+ " 0.5852 | \n",
+ " 0.0114 | \n",
+ " 48.5364 | \n",
+ " 0.0410 | \n",
+ "
\n",
+ " \n",
+ " ada | \n",
+ " AdaBoost Regressor | \n",
+ " 0.0110 | \n",
+ " 0.0002 | \n",
+ " 0.0131 | \n",
+ " 0.4864 | \n",
+ " 0.0127 | \n",
+ " 143.3527 | \n",
+ " 0.3020 | \n",
+ "
\n",
+ " \n",
+ " omp | \n",
+ " Orthogonal Matching Pursuit | \n",
+ " 0.0118 | \n",
+ " 0.0002 | \n",
+ " 0.0151 | \n",
+ " 0.3208 | \n",
+ " 0.0146 | \n",
+ " 159.7325 | \n",
+ " 0.0110 | \n",
+ "
\n",
+ " \n",
+ " en | \n",
+ " Elastic Net | \n",
+ " 0.0119 | \n",
+ " 0.0002 | \n",
+ " 0.0151 | \n",
+ " 0.3201 | \n",
+ " 0.0146 | \n",
+ " 177.8048 | \n",
+ " 0.0100 | \n",
+ "
\n",
+ " \n",
+ " lasso | \n",
+ " Lasso Regression | \n",
+ " 0.0121 | \n",
+ " 0.0002 | \n",
+ " 0.0154 | \n",
+ " 0.2932 | \n",
+ " 0.0149 | \n",
+ " 188.8234 | \n",
+ " 0.0140 | \n",
+ "
\n",
+ " \n",
+ " huber | \n",
+ " Huber Regressor | \n",
+ " 0.0116 | \n",
+ " 0.0003 | \n",
+ " 0.0169 | \n",
+ " 0.0927 | \n",
+ " 0.0159 | \n",
+ " 31.1456 | \n",
+ " 0.2690 | \n",
+ "
\n",
+ " \n",
+ " llar | \n",
+ " Lasso Least Angle Regression | \n",
+ " 0.0147 | \n",
+ " 0.0003 | \n",
+ " 0.0183 | \n",
+ " -0.0009 | \n",
+ " 0.0177 | \n",
+ " 233.4750 | \n",
+ " 0.0150 | \n",
+ "
\n",
+ " \n",
+ " dummy | \n",
+ " Dummy Regressor | \n",
+ " 0.0147 | \n",
+ " 0.0003 | \n",
+ " 0.0183 | \n",
+ " -0.0009 | \n",
+ " 0.0177 | \n",
+ " 233.4750 | \n",
+ " 0.0120 | \n",
+ "
\n",
+ " \n",
+ " par | \n",
+ " Passive Aggressive Regressor | \n",
+ " 0.0430 | \n",
+ " 0.0021 | \n",
+ " 0.0458 | \n",
+ " -5.2891 | \n",
+ " 0.0439 | \n",
+ " 586.9311 | \n",
+ " 0.0130 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "compare_models()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MAE | \n",
+ " MSE | \n",
+ " RMSE | \n",
+ " R2 | \n",
+ " RMSLE | \n",
+ " MAPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0061 | \n",
+ " 0.8947 | \n",
+ " 0.0059 | \n",
+ " 0.4868 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0064 | \n",
+ " 0.8761 | \n",
+ " 0.0061 | \n",
+ " 0.4432 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0066 | \n",
+ " 0.8744 | \n",
+ " 0.0063 | \n",
+ " 24.7232 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0062 | \n",
+ " 0.8876 | \n",
+ " 0.0059 | \n",
+ " 0.5085 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0061 | \n",
+ " 0.8934 | \n",
+ " 0.0059 | \n",
+ " 0.4957 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0062 | \n",
+ " 0.8721 | \n",
+ " 0.0060 | \n",
+ " 0.3841 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 0.0046 | \n",
+ " 0.0000 | \n",
+ " 0.0069 | \n",
+ " 0.8635 | \n",
+ " 0.0066 | \n",
+ " 7.5846 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 0.0044 | \n",
+ " 0.0000 | \n",
+ " 0.0064 | \n",
+ " 0.8815 | \n",
+ " 0.0061 | \n",
+ " 1.6161 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0063 | \n",
+ " 0.8763 | \n",
+ " 0.0060 | \n",
+ " 1.4757 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0059 | \n",
+ " 0.8894 | \n",
+ " 0.0057 | \n",
+ " 6.1310 | \n",
+ "
\n",
+ " \n",
+ " Mean | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0063 | \n",
+ " 0.8809 | \n",
+ " 0.0061 | \n",
+ " 4.3849 | \n",
+ "
\n",
+ " \n",
+ " SD | \n",
+ " 0.0001 | \n",
+ " 0.0000 | \n",
+ " 0.0003 | \n",
+ " 0.0096 | \n",
+ " 0.0002 | \n",
+ " 7.2108 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "catboost = create_model('catboost')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MAE | \n",
+ " MSE | \n",
+ " RMSE | \n",
+ " R2 | \n",
+ " RMSLE | \n",
+ " MAPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0063 | \n",
+ " 0.8883 | \n",
+ " 0.0061 | \n",
+ " 0.4205 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0066 | \n",
+ " 0.8697 | \n",
+ " 0.0063 | \n",
+ " 0.4188 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0067 | \n",
+ " 0.8706 | \n",
+ " 0.0064 | \n",
+ " 33.1689 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0062 | \n",
+ " 0.8869 | \n",
+ " 0.0060 | \n",
+ " 1.3693 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.0040 | \n",
+ " 0.0000 | \n",
+ " 0.0060 | \n",
+ " 0.8987 | \n",
+ " 0.0057 | \n",
+ " 0.4582 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0061 | \n",
+ " 0.8761 | \n",
+ " 0.0059 | \n",
+ " 0.3537 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 0.0045 | \n",
+ " 0.0000 | \n",
+ " 0.0070 | \n",
+ " 0.8611 | \n",
+ " 0.0067 | \n",
+ " 6.0101 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 0.0045 | \n",
+ " 0.0000 | \n",
+ " 0.0065 | \n",
+ " 0.8756 | \n",
+ " 0.0062 | \n",
+ " 0.8085 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0063 | \n",
+ " 0.8742 | \n",
+ " 0.0061 | \n",
+ " 1.3488 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 0.0042 | \n",
+ " 0.0000 | \n",
+ " 0.0060 | \n",
+ " 0.8858 | \n",
+ " 0.0058 | \n",
+ " 16.3256 | \n",
+ "
\n",
+ " \n",
+ " Mean | \n",
+ " 0.0043 | \n",
+ " 0.0000 | \n",
+ " 0.0064 | \n",
+ " 0.8787 | \n",
+ " 0.0061 | \n",
+ " 6.0682 | \n",
+ "
\n",
+ " \n",
+ " SD | \n",
+ " 0.0001 | \n",
+ " 0.0000 | \n",
+ " 0.0003 | \n",
+ " 0.0105 | \n",
+ " 0.0003 | \n",
+ " 10.1962 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "xgboost = create_model('xgboost')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2937c8c598234ec88f05eaabfd90b449",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "evaluate_model(catboost)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "21257fbf25164af38d6669499e8ed2a2",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "evaluate_model(catboost)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7a05f3d53a314fb3b797cf0869ac11b2",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "evaluate_model(catboost)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1dc376a7450645b7ba8002498790e066",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "evaluate_model(catboost)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2c31c6d3b05148de9ff22f3bb9deb8fb",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "evaluate_model(catboost)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3a4d314bb5d14394a82fca54b524a473",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "evaluate_model(catboost)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from xgboost import XGBRegressor\n",
+ "\n",
+ "xgb = (XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n",
+ " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,\n",
+ " early_stopping_rounds=None, enable_categorical=False,\n",
+ " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n",
+ " importance_type=None, interaction_constraints='',\n",
+ " learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,\n",
+ " max_delta_step=0, max_depth=9, max_leaves=0, min_child_weight=1,\n",
+ " monotone_constraints='()', n_estimators=290,\n",
+ " n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=123,\n",
+ " reg_alpha=0.05, reg_lambda=0.1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dayofweek | \n",
+ " quarter | \n",
+ " month | \n",
+ " year | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " datehour | \n",
+ " weekofyear | \n",
+ " WD-Hour | \n",
+ " WS-Hour | \n",
+ " Temp-Hour | \n",
+ " SR-Hour | \n",
+ " RH-Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 1.0 | \n",
+ " 4.0 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 4.0 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 18.0 | \n",
+ " 53.0 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 19.0 | \n",
+ " 53.0 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 20.0 | \n",
+ " 53.0 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 21.0 | \n",
+ " 53.0 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 22.0 | \n",
+ " 53.0 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
+ "date \n",
+ "2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
+ "2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
+ "2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
+ "2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
+ "2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
+ "2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
+ "2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
+ "2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
+ "2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
+ "\n",
+ "[25655 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3'], dtype='object')"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "df1.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dayofweek | \n",
+ " quarter | \n",
+ " month | \n",
+ " year | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " datehour | \n",
+ " weekofyear | \n",
+ " WD_Hour | \n",
+ " WS_Hour | \n",
+ " Temp_Hour | \n",
+ " SR_Hour | \n",
+ " RH_Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 1.0 | \n",
+ " 4.0 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2013.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 4.0 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 18.0 | \n",
+ " 53.0 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 19.0 | \n",
+ " 53.0 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 20.0 | \n",
+ " 53.0 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 21.0 | \n",
+ " 53.0 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 3.0 | \n",
+ " 4.0 | \n",
+ " 12.0 | \n",
+ " 2015.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 22.0 | \n",
+ " 53.0 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2 O3\n",
+ "date \n",
+ "2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
+ "2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
+ "2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
+ "2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
+ "2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
+ "2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
+ "2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
+ "2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
+ "2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
+ "\n",
+ "[25655 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.columns = df1.columns.str.replace('WD-Hour', 'WD_Hour')\n",
+ "df1.columns = df1.columns.str.replace('WS-Hour', 'WS_Hour')\n",
+ "df1.columns = df1.columns.str.replace('Temp-Hour', 'Temp_Hour')\n",
+ "df1.columns = df1.columns.str.replace('SR-Hour', 'SR_Hour')\n",
+ "df1.columns = df1.columns.str.replace('RH-Hour', 'RH_Hour')\n",
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " month | \n",
+ " quarter | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " weekofyear | \n",
+ " dayofweek | \n",
+ " datehour | \n",
+ " WD_Hour | \n",
+ " WS_Hour | \n",
+ " Temp_Hour | \n",
+ " SR_Hour | \n",
+ " RH_Hour | \n",
+ " NO2 | \n",
+ " O3 | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 0.0 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ " 2.600000e-02 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ " 2.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 3.0 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 4.0 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ " 1.000000e-03 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 18.0 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ " 1.747050e-03 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 19.0 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ " 1.108100e-04 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 20.0 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ " 8.930000e-06 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 21.0 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ " 1.000000e-08 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 22.0 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year month quarter dayofyear dayofmonth weekofyear dayofweek datehour WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2 O3\n",
+ "date \n",
+ "2013-01-27 00:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 0.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
+ "2013-01-27 01:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 1.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
+ "2013-01-27 02:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 2.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
+ "2013-01-27 03:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 3.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
+ "2013-01-27 04:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 18.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
+ "2015-12-31 19:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 19.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
+ "2015-12-31 20:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 20.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
+ "2015-12-31 21:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 21.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
+ "2015-12-31 22:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 22.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
+ "\n",
+ "[25655 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cols = ['year','month','quarter','dayofyear','dayofmonth','weekofyear','dayofweek','datehour','WD_Hour', 'WS_Hour', 'Temp_Hour', 'SR_Hour', 'RH_Hour', 'NO2', 'O3']\n",
+ "df1 = df1[cols]\n",
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X, y = df1.iloc[:, :-1], df1.iloc[:, -1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " month | \n",
+ " quarter | \n",
+ " dayofyear | \n",
+ " dayofmonth | \n",
+ " weekofyear | \n",
+ " dayofweek | \n",
+ " datehour | \n",
+ " WD_Hour | \n",
+ " WS_Hour | \n",
+ " Temp_Hour | \n",
+ " SR_Hour | \n",
+ " RH_Hour | \n",
+ " NO2 | \n",
+ "
\n",
+ " \n",
+ " date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-27 00:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 0.0 | \n",
+ " 114.0 | \n",
+ " 1.7 | \n",
+ " 18.1 | \n",
+ " 5.0 | \n",
+ " 100.0 | \n",
+ " 0.017000 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 01:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 151.0 | \n",
+ " 0.4 | \n",
+ " 17.8 | \n",
+ " 3.0 | \n",
+ " 89.0 | \n",
+ " 0.086000 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 02:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 2.0 | \n",
+ " 175.0 | \n",
+ " 0.3 | \n",
+ " 17.8 | \n",
+ " 2.0 | \n",
+ " 88.0 | \n",
+ " 0.053000 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 03:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 3.0 | \n",
+ " 264.0 | \n",
+ " 0.8 | \n",
+ " 16.8 | \n",
+ " 2.0 | \n",
+ " 92.0 | \n",
+ " 0.072000 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-27 04:00:00 | \n",
+ " 2013.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 27.0 | \n",
+ " 27.0 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 4.0 | \n",
+ " 187.0 | \n",
+ " 0.4 | \n",
+ " 16.2 | \n",
+ " 2.0 | \n",
+ " 94.0 | \n",
+ " 0.063000 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 18:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 18.0 | \n",
+ " 85.0 | \n",
+ " 1.4 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 76.0 | \n",
+ " 0.076199 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 19:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 19.0 | \n",
+ " 99.0 | \n",
+ " 1.3 | \n",
+ " 14.6 | \n",
+ " 9.0 | \n",
+ " 75.0 | \n",
+ " 0.098153 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 20:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 20.0 | \n",
+ " 210.0 | \n",
+ " 0.7 | \n",
+ " 14.5 | \n",
+ " 10.0 | \n",
+ " 79.0 | \n",
+ " 0.106758 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 21:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 21.0 | \n",
+ " 185.0 | \n",
+ " 0.8 | \n",
+ " 14.6 | \n",
+ " 10.0 | \n",
+ " 81.0 | \n",
+ " 0.087641 | \n",
+ "
\n",
+ " \n",
+ " 2015-12-31 22:00:00 | \n",
+ " 2015.0 | \n",
+ " 12.0 | \n",
+ " 4.0 | \n",
+ " 365.0 | \n",
+ " 31.0 | \n",
+ " 53.0 | \n",
+ " 3.0 | \n",
+ " 22.0 | \n",
+ " 147.0 | \n",
+ " 0.9 | \n",
+ " 14.1 | \n",
+ " 10.0 | \n",
+ " 85.0 | \n",
+ " 0.089011 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
25655 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year month quarter dayofyear dayofmonth weekofyear dayofweek datehour WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2\n",
+ "date \n",
+ "2013-01-27 00:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 0.0 114.0 1.7 18.1 5.0 100.0 0.017000\n",
+ "2013-01-27 01:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 1.0 151.0 0.4 17.8 3.0 89.0 0.086000\n",
+ "2013-01-27 02:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 2.0 175.0 0.3 17.8 2.0 88.0 0.053000\n",
+ "2013-01-27 03:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 3.0 264.0 0.8 16.8 2.0 92.0 0.072000\n",
+ "2013-01-27 04:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000\n",
+ "... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
+ "2015-12-31 18:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 18.0 85.0 1.4 14.6 10.0 76.0 0.076199\n",
+ "2015-12-31 19:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 19.0 99.0 1.3 14.6 9.0 75.0 0.098153\n",
+ "2015-12-31 20:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 20.0 210.0 0.7 14.5 10.0 79.0 0.106758\n",
+ "2015-12-31 21:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 21.0 185.0 0.8 14.6 10.0 81.0 0.087641\n",
+ "2015-12-31 22:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 22.0 147.0 0.9 14.1 10.0 85.0 0.089011\n",
+ "\n",
+ "[25655 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "date\n",
+ "2013-01-27 00:00:00 2.600000e-02\n",
+ "2013-01-27 01:00:00 1.000000e-03\n",
+ "2013-01-27 02:00:00 2.000000e-03\n",
+ "2013-01-27 03:00:00 1.000000e-03\n",
+ "2013-01-27 04:00:00 1.000000e-03\n",
+ " ... \n",
+ "2015-12-31 18:00:00 1.747050e-03\n",
+ "2015-12-31 19:00:00 1.108100e-04\n",
+ "2015-12-31 20:00:00 8.930000e-06\n",
+ "2015-12-31 21:00:00 1.000000e-08\n",
+ "2015-12-31 22:00:00 0.000000e+00\n",
+ "Freq: H, Name: O3, Length: 25655, dtype: float64"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_X, test_X, train_y, test_y = train_test_split(X, y,\n",
+ " test_size = 0.2, random_state = 123)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n",
+ " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,\n",
+ " early_stopping_rounds=None, enable_categorical=False,\n",
+ " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n",
+ " importance_type=None, interaction_constraints='',\n",
+ " learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,\n",
+ " max_delta_step=0, max_depth=9, max_leaves=0, min_child_weight=1,\n",
+ " missing=nan, monotone_constraints='()', n_estimators=290,\n",
+ " n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=123,\n",
+ " reg_alpha=0.05, reg_lambda=0.1, ...)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "xgb.fit(train_X, train_y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0.00045016, 0.02260762, 0.0020332 , ..., 0.03950962, 0.06227764,\n",
+ " 0.01274938], dtype=float32)"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pred = xgb.predict(test_X)\n",
+ "pred"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE : 0.005350\n"
+ ]
+ }
+ ],
+ "source": [
+ "# RMSE Computation\n",
+ "from sklearn.metrics import mean_squared_error as MSE\n",
+ "rmse = np.sqrt(MSE(test_y, pred))\n",
+ "print(\"RMSE : % f\" %(rmse))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9756177779346994"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "xgb.score(train_X, train_y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.918161859103899"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "xgb.score(test_X, test_y)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['year', 'month', 'quarter', 'dayofyear', 'dayofmonth', 'weekofyear', 'dayofweek', 'datehour', 'WD_Hour', 'WS_Hour', 'Temp_Hour', 'SR_Hour', 'RH_Hour', 'NO2', 'O3'], dtype='object')"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0.00045016, 0.02260762, 0.0020332 , ..., 0.03950962, 0.06227764,\n",
+ " 0.01274938], dtype=float32)"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "xgb.predict(test_X)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 13 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2015 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 214 | \n",
+ " 2 | \n",
+ " 31 | \n",
+ " 6 | \n",
+ " 15 | \n",
+ " 118 | \n",
+ " 2.8 | \n",
+ " 48.4 | \n",
+ " 718 | \n",
+ " 18 | \n",
+ " 0.031 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n",
+ "0 2015 8 3 214 2 31 6 15 118 2.8 48.4 718 18 0.031"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test = [2015,8,3,214,2,31,6,15,118,2.8,48.4,718,18,0.031]\n",
+ "p = pd.DataFrame([test])\n",
+ "p\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0.14080931], dtype=float32)"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "xgb.predict(p)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#import pickle\n",
+ "#file_name = \"xgb_reg_sklearn_updated.pkl\"\n",
+ "\n",
+ "# save\n",
+ "#pickle.dump(xgb, open(file_name, \"wb\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "DatetimeIndex: 25655 entries, 2013-01-27 00:00:00 to 2015-12-31 22:00:00\n",
+ "Freq: H\n",
+ "Data columns (total 15 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 year 25655 non-null float64\n",
+ " 1 month 25655 non-null float64\n",
+ " 2 quarter 25655 non-null float64\n",
+ " 3 dayofyear 25655 non-null float64\n",
+ " 4 dayofmonth 25655 non-null float64\n",
+ " 5 weekofyear 25655 non-null float64\n",
+ " 6 dayofweek 25655 non-null float64\n",
+ " 7 datehour 25655 non-null float64\n",
+ " 8 WD_Hour 25655 non-null float64\n",
+ " 9 WS_Hour 25655 non-null float64\n",
+ " 10 Temp_Hour 25655 non-null float64\n",
+ " 11 SR_Hour 25655 non-null float64\n",
+ " 12 RH_Hour 25655 non-null float64\n",
+ " 13 NO2 25655 non-null float64\n",
+ " 14 O3 25655 non-null float64\n",
+ "dtypes: float64(15)\n",
+ "memory usage: 3.1 MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#evaluate_model(tuned_blender)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#evaluate_model(tuned_lightgbm)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df.index.names = ['date']\n",
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df =df.reset_index()\n",
+ "#df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df = df.dropna(how='any')\n",
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df = df.set_index('date')\n",
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df = df.reset_index()\n",
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df = df.set_index('date').asfreq('h')\n",
+ "#df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df1= df.interpolate()\n",
+ "#df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df5=df1[df1.index.year == 2015]\n",
+ "#df5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#save_model(final_lgbm,'Final_Lgbm_Model_CC_no_num_prod')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}