{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
" _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
"C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
" _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
"C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
" _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n"
]
}
],
"source": [
"# learning curve, fitting,feature importance. rmse,mse, etc..\n",
"#import pmdarima as pm\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"# https://www.kaggle.com/code/rtatman/lightgbm-hyperparameter-optimisation-lb-0-761/notebook\n",
"import xgboost as xgb\n",
"import lightgbm as lgb\n",
"import numpy as np\n",
"pd.set_option('display.max_rows', 500)\n",
"pd.set_option('display.max_columns', 500)\n",
"pd.set_option('display.width', 1000) \n",
"from sklearn.metrics import mean_squared_error\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" CO2 | \n",
" PM10 | \n",
" SO2 | \n",
" H2S | \n",
" NO | \n",
" NOX | \n",
" NO2 | \n",
" O3 | \n",
" CO | \n",
" THC | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2013-01-27 00:00:00 | \n",
" 114 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5 | \n",
" 100 | \n",
" 380 | \n",
" 101.000000 | \n",
" 0.001000 | \n",
" 0.003 | \n",
" 0.005 | \n",
" 0.023 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
" 0.58 | \n",
" 2.170000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2013-01-27 01:00:00 | \n",
" 151 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3 | \n",
" 89 | \n",
" 410 | \n",
" 101.000000 | \n",
" 0.003000 | \n",
" 0.017 | \n",
" 0.065 | \n",
" 0.151 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
" 1.74 | \n",
" 2.830000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2013-01-27 02:00:00 | \n",
" 175 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2 | \n",
" 88 | \n",
" 392 | \n",
" 100.000000 | \n",
" 0.002000 | \n",
" 0.004 | \n",
" 0.019 | \n",
" 0.072 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
" 0.95 | \n",
" 3.770000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2013-01-27 03:00:00 | \n",
" 264 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2 | \n",
" 92 | \n",
" 403 | \n",
" 92.000000 | \n",
" 0.002000 | \n",
" 0.014 | \n",
" 0.051 | \n",
" 0.123 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
" 1.43 | \n",
" 2.720000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2013-01-27 04:00:00 | \n",
" 187 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2 | \n",
" 94 | \n",
" 400 | \n",
" 69.000000 | \n",
" 0.002000 | \n",
" 0.018 | \n",
" 0.041 | \n",
" 0.105 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
" 1.32 | \n",
" 2.520000 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 22774 | \n",
" 2015-12-31 18:00:00 | \n",
" 85 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10 | \n",
" 76 | \n",
" 404 | \n",
" 344.238675 | \n",
" 0.006386 | \n",
" 0.008 | \n",
" 0.014 | \n",
" 0.081 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
" 2.73 | \n",
" 1.464756 | \n",
"
\n",
" \n",
" 22775 | \n",
" 2015-12-31 19:00:00 | \n",
" 99 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9 | \n",
" 75 | \n",
" 410 | \n",
" 281.815817 | \n",
" 0.006301 | \n",
" 0.009 | \n",
" 0.023 | \n",
" 0.100 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
" 2.81 | \n",
" 1.903803 | \n",
"
\n",
" \n",
" 22776 | \n",
" 2015-12-31 20:00:00 | \n",
" 210 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10 | \n",
" 79 | \n",
" 424 | \n",
" 51.079508 | \n",
" 0.006520 | \n",
" 0.009 | \n",
" 0.064 | \n",
" 0.164 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
" 2.96 | \n",
" 2.039618 | \n",
"
\n",
" \n",
" 22777 | \n",
" 2015-12-31 21:00:00 | \n",
" 185 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10 | \n",
" 81 | \n",
" 428 | \n",
" 120.974071 | \n",
" 0.006437 | \n",
" 0.009 | \n",
" 0.077 | \n",
" 0.182 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
" 3.25 | \n",
" 1.754740 | \n",
"
\n",
" \n",
" 22778 | \n",
" 2015-12-31 22:00:00 | \n",
" 147 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10 | \n",
" 85 | \n",
" 425 | \n",
" 50.016188 | \n",
" 0.006457 | \n",
" 0.009 | \n",
" 0.050 | \n",
" 0.137 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
" 3.45 | \n",
" 1.818111 | \n",
"
\n",
" \n",
"
\n",
"
22779 rows × 16 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour CO2 PM10 SO2 H2S NO NOX NO2 O3 CO THC\n",
"0 2013-01-27 00:00:00 114 1.7 18.1 5 100 380 101.000000 0.001000 0.003 0.005 0.023 0.017000 2.600000e-02 0.58 2.170000\n",
"1 2013-01-27 01:00:00 151 0.4 17.8 3 89 410 101.000000 0.003000 0.017 0.065 0.151 0.086000 1.000000e-03 1.74 2.830000\n",
"2 2013-01-27 02:00:00 175 0.3 17.8 2 88 392 100.000000 0.002000 0.004 0.019 0.072 0.053000 2.000000e-03 0.95 3.770000\n",
"3 2013-01-27 03:00:00 264 0.8 16.8 2 92 403 92.000000 0.002000 0.014 0.051 0.123 0.072000 1.000000e-03 1.43 2.720000\n",
"4 2013-01-27 04:00:00 187 0.4 16.2 2 94 400 69.000000 0.002000 0.018 0.041 0.105 0.063000 1.000000e-03 1.32 2.520000\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 404 344.238675 0.006386 0.008 0.014 0.081 0.076199 1.747050e-03 2.73 1.464756\n",
"22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 410 281.815817 0.006301 0.009 0.023 0.100 0.098153 1.108100e-04 2.81 1.903803\n",
"22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 424 51.079508 0.006520 0.009 0.064 0.164 0.106758 8.930000e-06 2.96 2.039618\n",
"22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 428 120.974071 0.006437 0.009 0.077 0.182 0.087641 1.000000e-08 3.25 1.754740\n",
"22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 425 50.016188 0.006457 0.009 0.050 0.137 0.089011 0.000000e+00 3.45 1.818111\n",
"\n",
"[22779 rows x 16 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_excel('Interpolation_Average_2013_2015.xlsx',sheet_name='Sheet2')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'CO2', 'PM10', 'SO2', 'H2S', 'NO', 'NOX', 'NO2', 'O3', 'CO', 'THC'], dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"cols = ['MeasurementDateTime','WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour','NO2', 'O3'\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2013-01-27 00:00:00 | \n",
" 114 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5 | \n",
" 100 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
"
\n",
" \n",
" 1 | \n",
" 2013-01-27 01:00:00 | \n",
" 151 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3 | \n",
" 89 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2 | \n",
" 2013-01-27 02:00:00 | \n",
" 175 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2 | \n",
" 88 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
"
\n",
" \n",
" 3 | \n",
" 2013-01-27 03:00:00 | \n",
" 264 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2 | \n",
" 92 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 4 | \n",
" 2013-01-27 04:00:00 | \n",
" 187 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2 | \n",
" 94 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 22774 | \n",
" 2015-12-31 18:00:00 | \n",
" 85 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10 | \n",
" 76 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
"
\n",
" \n",
" 22775 | \n",
" 2015-12-31 19:00:00 | \n",
" 99 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9 | \n",
" 75 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
"
\n",
" \n",
" 22776 | \n",
" 2015-12-31 20:00:00 | \n",
" 210 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10 | \n",
" 79 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
"
\n",
" \n",
" 22777 | \n",
" 2015-12-31 21:00:00 | \n",
" 185 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10 | \n",
" 81 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
"
\n",
" \n",
" 22778 | \n",
" 2015-12-31 22:00:00 | \n",
" 147 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10 | \n",
" 85 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
22779 rows × 8 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
"0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02\n",
"1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03\n",
"2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03\n",
"3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03\n",
"4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03\n",
"... ... ... ... ... ... ... ... ...\n",
"22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03\n",
"22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04\n",
"22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06\n",
"22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08\n",
"22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00\n",
"\n",
"[22779 rows x 8 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df[cols]\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 22779 entries, 0 to 22778\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 MeasurementDateTime 22779 non-null datetime64[ns]\n",
" 1 WD-Hour 22779 non-null int64 \n",
" 2 WS-Hour 22779 non-null float64 \n",
" 3 Temp-Hour 22779 non-null float64 \n",
" 4 SR-Hour 22779 non-null int64 \n",
" 5 RH-Hour 22779 non-null int64 \n",
" 6 NO2 22779 non-null float64 \n",
" 7 O3 22779 non-null float64 \n",
"dtypes: datetime64[ns](1), float64(4), int64(3)\n",
"memory usage: 1.4 MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2013-01-27 00:00:00 | \n",
" 114 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5 | \n",
" 100 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
"
\n",
" \n",
" 1 | \n",
" 2013-01-27 01:00:00 | \n",
" 151 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3 | \n",
" 89 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2 | \n",
" 2013-01-27 02:00:00 | \n",
" 175 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2 | \n",
" 88 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
"
\n",
" \n",
" 3 | \n",
" 2013-01-27 03:00:00 | \n",
" 264 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2 | \n",
" 92 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 4 | \n",
" 2013-01-27 04:00:00 | \n",
" 187 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2 | \n",
" 94 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 22774 | \n",
" 2015-12-31 18:00:00 | \n",
" 85 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10 | \n",
" 76 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
"
\n",
" \n",
" 22775 | \n",
" 2015-12-31 19:00:00 | \n",
" 99 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9 | \n",
" 75 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
"
\n",
" \n",
" 22776 | \n",
" 2015-12-31 20:00:00 | \n",
" 210 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10 | \n",
" 79 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
"
\n",
" \n",
" 22777 | \n",
" 2015-12-31 21:00:00 | \n",
" 185 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10 | \n",
" 81 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
"
\n",
" \n",
" 22778 | \n",
" 2015-12-31 22:00:00 | \n",
" 147 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10 | \n",
" 85 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
22779 rows × 8 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
"0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02\n",
"1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03\n",
"2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03\n",
"3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03\n",
"4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03\n",
"... ... ... ... ... ... ... ... ...\n",
"22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03\n",
"22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04\n",
"22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06\n",
"22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08\n",
"22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00\n",
"\n",
"[22779 rows x 8 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['MeasurementDateTime'] = pd.to_datetime(df['MeasurementDateTime'], errors='coerce')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
" date | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2013-01-27 00:00:00 | \n",
" 114 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5 | \n",
" 100 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
" 2013-01-27 00:00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 2013-01-27 01:00:00 | \n",
" 151 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3 | \n",
" 89 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
" 2013-01-27 01:00:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 2013-01-27 02:00:00 | \n",
" 175 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2 | \n",
" 88 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
" 2013-01-27 02:00:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 2013-01-27 03:00:00 | \n",
" 264 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2 | \n",
" 92 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
" 2013-01-27 03:00:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 2013-01-27 04:00:00 | \n",
" 187 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2 | \n",
" 94 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
" 2013-01-27 04:00:00 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 22774 | \n",
" 2015-12-31 18:00:00 | \n",
" 85 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10 | \n",
" 76 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
" 2015-12-31 18:00:00 | \n",
"
\n",
" \n",
" 22775 | \n",
" 2015-12-31 19:00:00 | \n",
" 99 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9 | \n",
" 75 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
" 2015-12-31 19:00:00 | \n",
"
\n",
" \n",
" 22776 | \n",
" 2015-12-31 20:00:00 | \n",
" 210 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10 | \n",
" 79 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
" 2015-12-31 20:00:00 | \n",
"
\n",
" \n",
" 22777 | \n",
" 2015-12-31 21:00:00 | \n",
" 185 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10 | \n",
" 81 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
" 2015-12-31 21:00:00 | \n",
"
\n",
" \n",
" 22778 | \n",
" 2015-12-31 22:00:00 | \n",
" 147 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10 | \n",
" 85 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
" 2015-12-31 22:00:00 | \n",
"
\n",
" \n",
"
\n",
"
22779 rows × 9 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 date\n",
"0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02 2013-01-27 00:00:00\n",
"1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03 2013-01-27 01:00:00\n",
"2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03 2013-01-27 02:00:00\n",
"3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03 2013-01-27 03:00:00\n",
"4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03 2013-01-27 04:00:00\n",
"... ... ... ... ... ... ... ... ... ...\n",
"22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03 2015-12-31 18:00:00\n",
"22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04 2015-12-31 19:00:00\n",
"22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06 2015-12-31 20:00:00\n",
"22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08 2015-12-31 21:00:00\n",
"22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00 2015-12-31 22:00:00\n",
"\n",
"[22779 rows x 9 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['date'] = df['MeasurementDateTime']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
" date | \n",
" dayofweek | \n",
" quarter | \n",
" month | \n",
" year | \n",
" dayofyear | \n",
" dayofmonth | \n",
" datehour | \n",
" weekofyear | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2013-01-27 00:00:00 | \n",
" 114 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5 | \n",
" 100 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
" 2013-01-27 00:00:00 | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 2013 | \n",
" 27 | \n",
" 27 | \n",
" 0 | \n",
" 4 | \n",
"
\n",
" \n",
" 1 | \n",
" 2013-01-27 01:00:00 | \n",
" 151 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3 | \n",
" 89 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
" 2013-01-27 01:00:00 | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 2013 | \n",
" 27 | \n",
" 27 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" 2013-01-27 02:00:00 | \n",
" 175 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2 | \n",
" 88 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
" 2013-01-27 02:00:00 | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 2013 | \n",
" 27 | \n",
" 27 | \n",
" 2 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 2013-01-27 03:00:00 | \n",
" 264 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2 | \n",
" 92 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
" 2013-01-27 03:00:00 | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 2013 | \n",
" 27 | \n",
" 27 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" 4 | \n",
" 2013-01-27 04:00:00 | \n",
" 187 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2 | \n",
" 94 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
" 2013-01-27 04:00:00 | \n",
" 6 | \n",
" 1 | \n",
" 1 | \n",
" 2013 | \n",
" 27 | \n",
" 27 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 22774 | \n",
" 2015-12-31 18:00:00 | \n",
" 85 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10 | \n",
" 76 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
" 2015-12-31 18:00:00 | \n",
" 3 | \n",
" 4 | \n",
" 12 | \n",
" 2015 | \n",
" 365 | \n",
" 31 | \n",
" 18 | \n",
" 53 | \n",
"
\n",
" \n",
" 22775 | \n",
" 2015-12-31 19:00:00 | \n",
" 99 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9 | \n",
" 75 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
" 2015-12-31 19:00:00 | \n",
" 3 | \n",
" 4 | \n",
" 12 | \n",
" 2015 | \n",
" 365 | \n",
" 31 | \n",
" 19 | \n",
" 53 | \n",
"
\n",
" \n",
" 22776 | \n",
" 2015-12-31 20:00:00 | \n",
" 210 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10 | \n",
" 79 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
" 2015-12-31 20:00:00 | \n",
" 3 | \n",
" 4 | \n",
" 12 | \n",
" 2015 | \n",
" 365 | \n",
" 31 | \n",
" 20 | \n",
" 53 | \n",
"
\n",
" \n",
" 22777 | \n",
" 2015-12-31 21:00:00 | \n",
" 185 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10 | \n",
" 81 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
" 2015-12-31 21:00:00 | \n",
" 3 | \n",
" 4 | \n",
" 12 | \n",
" 2015 | \n",
" 365 | \n",
" 31 | \n",
" 21 | \n",
" 53 | \n",
"
\n",
" \n",
" 22778 | \n",
" 2015-12-31 22:00:00 | \n",
" 147 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10 | \n",
" 85 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
" 2015-12-31 22:00:00 | \n",
" 3 | \n",
" 4 | \n",
" 12 | \n",
" 2015 | \n",
" 365 | \n",
" 31 | \n",
" 22 | \n",
" 53 | \n",
"
\n",
" \n",
"
\n",
"
22779 rows × 17 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 date dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n",
"0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02 2013-01-27 00:00:00 6 1 1 2013 27 27 0 4\n",
"1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03 2013-01-27 01:00:00 6 1 1 2013 27 27 1 4\n",
"2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03 2013-01-27 02:00:00 6 1 1 2013 27 27 2 4\n",
"3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03 2013-01-27 03:00:00 6 1 1 2013 27 27 3 4\n",
"4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03 2013-01-27 04:00:00 6 1 1 2013 27 27 4 4\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03 2015-12-31 18:00:00 3 4 12 2015 365 31 18 53\n",
"22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04 2015-12-31 19:00:00 3 4 12 2015 365 31 19 53\n",
"22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06 2015-12-31 20:00:00 3 4 12 2015 365 31 20 53\n",
"22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08 2015-12-31 21:00:00 3 4 12 2015 365 31 21 53\n",
"22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00 2015-12-31 22:00:00 3 4 12 2015 365 31 22 53\n",
"\n",
"[22779 rows x 17 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from datetime import datetime\n",
"#df['MeasurementDateTime'] = df.index\n",
"df['dayofweek'] = df['date'].dt.dayofweek\n",
"df['quarter'] = df['date'].dt.quarter\n",
"df['month'] = df['date'].dt.month\n",
"df['year'] = df['date'].dt.year\n",
"df['dayofyear'] = df['date'].dt.dayofyear\n",
"df['dayofmonth'] = df['date'].dt.day\n",
"df['datehour'] = df['date'].dt.hour\n",
"df['weekofyear'] = df['date'].dt.weekofyear\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3', 'date', 'dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear'], dtype='object')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
" dayofweek | \n",
" quarter | \n",
" month | \n",
" year | \n",
" dayofyear | \n",
" dayofmonth | \n",
" datehour | \n",
" weekofyear | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 2013-01-27 00:00:00 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 2013-01-27 01:00:00 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 2013-01-27 02:00:00 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 2.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 2013-01-27 03:00:00 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 2013-01-27 04:00:00 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 2015-12-31 18:00:00 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 18.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 2015-12-31 19:00:00 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 19.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 2015-12-31 20:00:00 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 20.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 2015-12-31 21:00:00 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 21.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 2015-12-31 22:00:00 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 22.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 16 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n",
"date \n",
"2013-01-27 00:00:00 2013-01-27 00:00:00 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0\n",
"2013-01-27 01:00:00 2013-01-27 01:00:00 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0\n",
"2013-01-27 02:00:00 2013-01-27 02:00:00 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0\n",
"2013-01-27 03:00:00 2013-01-27 03:00:00 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0\n",
"2013-01-27 04:00:00 2013-01-27 04:00:00 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 2015-12-31 18:00:00 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0\n",
"2015-12-31 19:00:00 2015-12-31 19:00:00 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0\n",
"2015-12-31 20:00:00 2015-12-31 20:00:00 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0\n",
"2015-12-31 21:00:00 2015-12-31 21:00:00 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0\n",
"2015-12-31 22:00:00 2015-12-31 22:00:00 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0\n",
"\n",
"[25655 rows x 16 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df.set_index('date').asfreq('h')\n",
"df\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MeasurementDateTime 2876\n",
"WD-Hour 2876\n",
"WS-Hour 2876\n",
"Temp-Hour 2876\n",
"SR-Hour 2876\n",
"RH-Hour 2876\n",
"NO2 2876\n",
"O3 2876\n",
"dayofweek 2876\n",
"quarter 2876\n",
"month 2876\n",
"year 2876\n",
"dayofyear 2876\n",
"dayofmonth 2876\n",
"datehour 2876\n",
"weekofyear 2876\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MeasurementDateTime | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
" dayofweek | \n",
" quarter | \n",
" month | \n",
" year | \n",
" dayofyear | \n",
" dayofmonth | \n",
" datehour | \n",
" weekofyear | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 2013-01-27 00:00:00 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 2013-01-27 01:00:00 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 2013-01-27 02:00:00 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 2.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 2013-01-27 03:00:00 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 2013-01-27 04:00:00 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 2015-12-31 18:00:00 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 18.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 2015-12-31 19:00:00 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 19.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 2015-12-31 20:00:00 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 20.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 2015-12-31 21:00:00 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 21.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 2015-12-31 22:00:00 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 22.0 | \n",
" 53.0 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 16 columns
\n",
"
"
],
"text/plain": [
" MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n",
"date \n",
"2013-01-27 00:00:00 2013-01-27 00:00:00 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0\n",
"2013-01-27 01:00:00 2013-01-27 01:00:00 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0\n",
"2013-01-27 02:00:00 2013-01-27 02:00:00 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0\n",
"2013-01-27 03:00:00 2013-01-27 03:00:00 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0\n",
"2013-01-27 04:00:00 2013-01-27 04:00:00 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 2015-12-31 18:00:00 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0\n",
"2015-12-31 19:00:00 2015-12-31 19:00:00 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0\n",
"2015-12-31 20:00:00 2015-12-31 20:00:00 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0\n",
"2015-12-31 21:00:00 2015-12-31 21:00:00 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0\n",
"2015-12-31 22:00:00 2015-12-31 22:00:00 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0\n",
"\n",
"[25655 rows x 16 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1= df.interpolate(method='ffill')\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" dayofweek | \n",
" quarter | \n",
" month | \n",
" year | \n",
" dayofyear | \n",
" dayofmonth | \n",
" datehour | \n",
" weekofyear | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 2.0 | \n",
" 4.0 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 18.0 | \n",
" 53.0 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 19.0 | \n",
" 53.0 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 20.0 | \n",
" 53.0 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 21.0 | \n",
" 53.0 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 22.0 | \n",
" 53.0 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 15 columns
\n",
"
"
],
"text/plain": [
" dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
"date \n",
"2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
"2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
"2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
"2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
"2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
"2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
"2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
"2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
"2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
"\n",
"[25655 rows x 15 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols =['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3']\n",
"df1 = df1[cols]\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dayofweek 0\n",
"quarter 0\n",
"month 0\n",
"year 0\n",
"dayofyear 0\n",
"dayofmonth 0\n",
"datehour 0\n",
"weekofyear 0\n",
"WD-Hour 0\n",
"WS-Hour 0\n",
"Temp-Hour 0\n",
"SR-Hour 0\n",
"RH-Hour 0\n",
"NO2 0\n",
"O3 0\n",
"dtype: int64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"#cols =['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear','MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3']\n",
"#df = df[cols]\n",
"#df = df.set_index('MeasurementDateTime')\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"#df1 = df1.drop(['year'],axis=1)\n",
"#df1"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Description | \n",
" Value | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Session id | \n",
" 123 | \n",
"
\n",
" \n",
" 1 | \n",
" Target | \n",
" O3 | \n",
"
\n",
" \n",
" 2 | \n",
" Target type | \n",
" regression | \n",
"
\n",
" \n",
" 3 | \n",
" Data shape | \n",
" (25655, 15) | \n",
"
\n",
" \n",
" 4 | \n",
" Train data shape | \n",
" (17958, 15) | \n",
"
\n",
" \n",
" 5 | \n",
" Test data shape | \n",
" (7697, 15) | \n",
"
\n",
" \n",
" 6 | \n",
" Numeric features | \n",
" 14 | \n",
"
\n",
" \n",
" 7 | \n",
" Preprocess | \n",
" 1 | \n",
"
\n",
" \n",
" 8 | \n",
" Imputation type | \n",
" simple | \n",
"
\n",
" \n",
" 9 | \n",
" Numeric imputation | \n",
" mean | \n",
"
\n",
" \n",
" 10 | \n",
" Categorical imputation | \n",
" constant | \n",
"
\n",
" \n",
" 11 | \n",
" Fold Generator | \n",
" KFold | \n",
"
\n",
" \n",
" 12 | \n",
" Fold Number | \n",
" 10 | \n",
"
\n",
" \n",
" 13 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" 14 | \n",
" Log Experiment | \n",
" 0 | \n",
"
\n",
" \n",
" 15 | \n",
" Experiment Name | \n",
" reg-default-name | \n",
"
\n",
" \n",
" 16 | \n",
" USI | \n",
" 9345 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.regression import *\n",
"exp_reg101 = setup(data = df1, target = 'O3', session_id=123) "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Model | \n",
" MAE | \n",
" MSE | \n",
" RMSE | \n",
" R2 | \n",
" RMSLE | \n",
" MAPE | \n",
" TT (Sec) | \n",
"
\n",
" \n",
" \n",
" \n",
" catboost | \n",
" CatBoost Regressor | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0063 | \n",
" 0.8809 | \n",
" 0.0061 | \n",
" 4.3849 | \n",
" 2.2550 | \n",
"
\n",
" \n",
" xgboost | \n",
" Extreme Gradient Boosting | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0064 | \n",
" 0.8787 | \n",
" 0.0061 | \n",
" 6.0682 | \n",
" 0.4340 | \n",
"
\n",
" \n",
" et | \n",
" Extra Trees Regressor | \n",
" 0.0041 | \n",
" 0.0000 | \n",
" 0.0064 | \n",
" 0.8777 | \n",
" 0.0061 | \n",
" 3.4533 | \n",
" 0.9890 | \n",
"
\n",
" \n",
" lightgbm | \n",
" Light Gradient Boosting Machine | \n",
" 0.0048 | \n",
" 0.0000 | \n",
" 0.0070 | \n",
" 0.8536 | \n",
" 0.0067 | \n",
" 9.2379 | \n",
" 0.2320 | \n",
"
\n",
" \n",
" rf | \n",
" Random Forest Regressor | \n",
" 0.0045 | \n",
" 0.0001 | \n",
" 0.0071 | \n",
" 0.8495 | \n",
" 0.0068 | \n",
" 3.9257 | \n",
" 1.6330 | \n",
"
\n",
" \n",
" gbr | \n",
" Gradient Boosting Regressor | \n",
" 0.0062 | \n",
" 0.0001 | \n",
" 0.0089 | \n",
" 0.7629 | \n",
" 0.0086 | \n",
" 15.3717 | \n",
" 0.5790 | \n",
"
\n",
" \n",
" dt | \n",
" Decision Tree Regressor | \n",
" 0.0062 | \n",
" 0.0001 | \n",
" 0.0104 | \n",
" 0.6787 | \n",
" 0.0100 | \n",
" 1.7337 | \n",
" 0.0430 | \n",
"
\n",
" \n",
" lr | \n",
" Linear Regression | \n",
" 0.0084 | \n",
" 0.0001 | \n",
" 0.0115 | \n",
" 0.6051 | \n",
" 0.0110 | \n",
" 37.2053 | \n",
" 0.6290 | \n",
"
\n",
" \n",
" lar | \n",
" Least Angle Regression | \n",
" 0.0084 | \n",
" 0.0001 | \n",
" 0.0115 | \n",
" 0.6051 | \n",
" 0.0110 | \n",
" 37.5542 | \n",
" 0.0130 | \n",
"
\n",
" \n",
" br | \n",
" Bayesian Ridge | \n",
" 0.0084 | \n",
" 0.0001 | \n",
" 0.0115 | \n",
" 0.6051 | \n",
" 0.0110 | \n",
" 37.2033 | \n",
" 0.0150 | \n",
"
\n",
" \n",
" ridge | \n",
" Ridge Regression | \n",
" 0.0084 | \n",
" 0.0001 | \n",
" 0.0115 | \n",
" 0.6035 | \n",
" 0.0110 | \n",
" 37.8741 | \n",
" 0.0140 | \n",
"
\n",
" \n",
" knn | \n",
" K Neighbors Regressor | \n",
" 0.0084 | \n",
" 0.0001 | \n",
" 0.0118 | \n",
" 0.5852 | \n",
" 0.0114 | \n",
" 48.5364 | \n",
" 0.0410 | \n",
"
\n",
" \n",
" ada | \n",
" AdaBoost Regressor | \n",
" 0.0110 | \n",
" 0.0002 | \n",
" 0.0131 | \n",
" 0.4864 | \n",
" 0.0127 | \n",
" 143.3527 | \n",
" 0.3020 | \n",
"
\n",
" \n",
" omp | \n",
" Orthogonal Matching Pursuit | \n",
" 0.0118 | \n",
" 0.0002 | \n",
" 0.0151 | \n",
" 0.3208 | \n",
" 0.0146 | \n",
" 159.7325 | \n",
" 0.0110 | \n",
"
\n",
" \n",
" en | \n",
" Elastic Net | \n",
" 0.0119 | \n",
" 0.0002 | \n",
" 0.0151 | \n",
" 0.3201 | \n",
" 0.0146 | \n",
" 177.8048 | \n",
" 0.0100 | \n",
"
\n",
" \n",
" lasso | \n",
" Lasso Regression | \n",
" 0.0121 | \n",
" 0.0002 | \n",
" 0.0154 | \n",
" 0.2932 | \n",
" 0.0149 | \n",
" 188.8234 | \n",
" 0.0140 | \n",
"
\n",
" \n",
" huber | \n",
" Huber Regressor | \n",
" 0.0116 | \n",
" 0.0003 | \n",
" 0.0169 | \n",
" 0.0927 | \n",
" 0.0159 | \n",
" 31.1456 | \n",
" 0.2690 | \n",
"
\n",
" \n",
" llar | \n",
" Lasso Least Angle Regression | \n",
" 0.0147 | \n",
" 0.0003 | \n",
" 0.0183 | \n",
" -0.0009 | \n",
" 0.0177 | \n",
" 233.4750 | \n",
" 0.0150 | \n",
"
\n",
" \n",
" dummy | \n",
" Dummy Regressor | \n",
" 0.0147 | \n",
" 0.0003 | \n",
" 0.0183 | \n",
" -0.0009 | \n",
" 0.0177 | \n",
" 233.4750 | \n",
" 0.0120 | \n",
"
\n",
" \n",
" par | \n",
" Passive Aggressive Regressor | \n",
" 0.0430 | \n",
" 0.0021 | \n",
" 0.0458 | \n",
" -5.2891 | \n",
" 0.0439 | \n",
" 586.9311 | \n",
" 0.0130 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compare_models()\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" MAE | \n",
" MSE | \n",
" RMSE | \n",
" R2 | \n",
" RMSLE | \n",
" MAPE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0061 | \n",
" 0.8947 | \n",
" 0.0059 | \n",
" 0.4868 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0064 | \n",
" 0.8761 | \n",
" 0.0061 | \n",
" 0.4432 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0066 | \n",
" 0.8744 | \n",
" 0.0063 | \n",
" 24.7232 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0062 | \n",
" 0.8876 | \n",
" 0.0059 | \n",
" 0.5085 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0061 | \n",
" 0.8934 | \n",
" 0.0059 | \n",
" 0.4957 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0062 | \n",
" 0.8721 | \n",
" 0.0060 | \n",
" 0.3841 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.0046 | \n",
" 0.0000 | \n",
" 0.0069 | \n",
" 0.8635 | \n",
" 0.0066 | \n",
" 7.5846 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.0044 | \n",
" 0.0000 | \n",
" 0.0064 | \n",
" 0.8815 | \n",
" 0.0061 | \n",
" 1.6161 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0063 | \n",
" 0.8763 | \n",
" 0.0060 | \n",
" 1.4757 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0059 | \n",
" 0.8894 | \n",
" 0.0057 | \n",
" 6.1310 | \n",
"
\n",
" \n",
" Mean | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0063 | \n",
" 0.8809 | \n",
" 0.0061 | \n",
" 4.3849 | \n",
"
\n",
" \n",
" SD | \n",
" 0.0001 | \n",
" 0.0000 | \n",
" 0.0003 | \n",
" 0.0096 | \n",
" 0.0002 | \n",
" 7.2108 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"catboost = create_model('catboost')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" MAE | \n",
" MSE | \n",
" RMSE | \n",
" R2 | \n",
" RMSLE | \n",
" MAPE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0063 | \n",
" 0.8883 | \n",
" 0.0061 | \n",
" 0.4205 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0066 | \n",
" 0.8697 | \n",
" 0.0063 | \n",
" 0.4188 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0067 | \n",
" 0.8706 | \n",
" 0.0064 | \n",
" 33.1689 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0062 | \n",
" 0.8869 | \n",
" 0.0060 | \n",
" 1.3693 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0040 | \n",
" 0.0000 | \n",
" 0.0060 | \n",
" 0.8987 | \n",
" 0.0057 | \n",
" 0.4582 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0061 | \n",
" 0.8761 | \n",
" 0.0059 | \n",
" 0.3537 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.0045 | \n",
" 0.0000 | \n",
" 0.0070 | \n",
" 0.8611 | \n",
" 0.0067 | \n",
" 6.0101 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.0045 | \n",
" 0.0000 | \n",
" 0.0065 | \n",
" 0.8756 | \n",
" 0.0062 | \n",
" 0.8085 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0063 | \n",
" 0.8742 | \n",
" 0.0061 | \n",
" 1.3488 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.0042 | \n",
" 0.0000 | \n",
" 0.0060 | \n",
" 0.8858 | \n",
" 0.0058 | \n",
" 16.3256 | \n",
"
\n",
" \n",
" Mean | \n",
" 0.0043 | \n",
" 0.0000 | \n",
" 0.0064 | \n",
" 0.8787 | \n",
" 0.0061 | \n",
" 6.0682 | \n",
"
\n",
" \n",
" SD | \n",
" 0.0001 | \n",
" 0.0000 | \n",
" 0.0003 | \n",
" 0.0105 | \n",
" 0.0003 | \n",
" 10.1962 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"xgboost = create_model('xgboost')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2937c8c598234ec88f05eaabfd90b449",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(catboost)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "21257fbf25164af38d6669499e8ed2a2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(catboost)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7a05f3d53a314fb3b797cf0869ac11b2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(catboost)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1dc376a7450645b7ba8002498790e066",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(catboost)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2c31c6d3b05148de9ff22f3bb9deb8fb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(catboost)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3a4d314bb5d14394a82fca54b524a473",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(catboost)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"from xgboost import XGBRegressor\n",
"\n",
"xgb = (XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n",
" colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,\n",
" early_stopping_rounds=None, enable_categorical=False,\n",
" eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n",
" importance_type=None, interaction_constraints='',\n",
" learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,\n",
" max_delta_step=0, max_depth=9, max_leaves=0, min_child_weight=1,\n",
" monotone_constraints='()', n_estimators=290,\n",
" n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=123,\n",
" reg_alpha=0.05, reg_lambda=0.1))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" dayofweek | \n",
" quarter | \n",
" month | \n",
" year | \n",
" dayofyear | \n",
" dayofmonth | \n",
" datehour | \n",
" weekofyear | \n",
" WD-Hour | \n",
" WS-Hour | \n",
" Temp-Hour | \n",
" SR-Hour | \n",
" RH-Hour | \n",
" NO2 | \n",
" O3 | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 2.0 | \n",
" 4.0 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 18.0 | \n",
" 53.0 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 19.0 | \n",
" 53.0 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 20.0 | \n",
" 53.0 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 21.0 | \n",
" 53.0 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 22.0 | \n",
" 53.0 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 15 columns
\n",
"
"
],
"text/plain": [
" dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n",
"date \n",
"2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
"2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
"2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
"2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
"2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
"2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
"2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
"2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
"2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
"\n",
"[25655 rows x 15 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3'], dtype='object')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"df1.columns"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" dayofweek | \n",
" quarter | \n",
" month | \n",
" year | \n",
" dayofyear | \n",
" dayofmonth | \n",
" datehour | \n",
" weekofyear | \n",
" WD_Hour | \n",
" WS_Hour | \n",
" Temp_Hour | \n",
" SR_Hour | \n",
" RH_Hour | \n",
" NO2 | \n",
" O3 | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 2.0 | \n",
" 4.0 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2013.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 18.0 | \n",
" 53.0 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 19.0 | \n",
" 53.0 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 20.0 | \n",
" 53.0 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 21.0 | \n",
" 53.0 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 12.0 | \n",
" 2015.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 22.0 | \n",
" 53.0 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 15 columns
\n",
"
"
],
"text/plain": [
" dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2 O3\n",
"date \n",
"2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
"2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
"2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
"2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
"2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
"2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
"2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
"2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
"2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
"\n",
"[25655 rows x 15 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.columns = df1.columns.str.replace('WD-Hour', 'WD_Hour')\n",
"df1.columns = df1.columns.str.replace('WS-Hour', 'WS_Hour')\n",
"df1.columns = df1.columns.str.replace('Temp-Hour', 'Temp_Hour')\n",
"df1.columns = df1.columns.str.replace('SR-Hour', 'SR_Hour')\n",
"df1.columns = df1.columns.str.replace('RH-Hour', 'RH_Hour')\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" month | \n",
" quarter | \n",
" dayofyear | \n",
" dayofmonth | \n",
" weekofyear | \n",
" dayofweek | \n",
" datehour | \n",
" WD_Hour | \n",
" WS_Hour | \n",
" Temp_Hour | \n",
" SR_Hour | \n",
" RH_Hour | \n",
" NO2 | \n",
" O3 | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 0.0 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
" 2.600000e-02 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 2.0 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
" 2.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 3.0 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 4.0 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
" 1.000000e-03 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 18.0 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
" 1.747050e-03 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 19.0 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
" 1.108100e-04 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 20.0 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
" 8.930000e-06 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 21.0 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
" 1.000000e-08 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 22.0 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 15 columns
\n",
"
"
],
"text/plain": [
" year month quarter dayofyear dayofmonth weekofyear dayofweek datehour WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2 O3\n",
"date \n",
"2013-01-27 00:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 0.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n",
"2013-01-27 01:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 1.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n",
"2013-01-27 02:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 2.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n",
"2013-01-27 03:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 3.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n",
"2013-01-27 04:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 18.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n",
"2015-12-31 19:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 19.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n",
"2015-12-31 20:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 20.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n",
"2015-12-31 21:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 21.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n",
"2015-12-31 22:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 22.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n",
"\n",
"[25655 rows x 15 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = ['year','month','quarter','dayofyear','dayofmonth','weekofyear','dayofweek','datehour','WD_Hour', 'WS_Hour', 'Temp_Hour', 'SR_Hour', 'RH_Hour', 'NO2', 'O3']\n",
"df1 = df1[cols]\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"X, y = df1.iloc[:, :-1], df1.iloc[:, -1]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" month | \n",
" quarter | \n",
" dayofyear | \n",
" dayofmonth | \n",
" weekofyear | \n",
" dayofweek | \n",
" datehour | \n",
" WD_Hour | \n",
" WS_Hour | \n",
" Temp_Hour | \n",
" SR_Hour | \n",
" RH_Hour | \n",
" NO2 | \n",
"
\n",
" \n",
" date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-27 00:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 0.0 | \n",
" 114.0 | \n",
" 1.7 | \n",
" 18.1 | \n",
" 5.0 | \n",
" 100.0 | \n",
" 0.017000 | \n",
"
\n",
" \n",
" 2013-01-27 01:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 1.0 | \n",
" 151.0 | \n",
" 0.4 | \n",
" 17.8 | \n",
" 3.0 | \n",
" 89.0 | \n",
" 0.086000 | \n",
"
\n",
" \n",
" 2013-01-27 02:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 2.0 | \n",
" 175.0 | \n",
" 0.3 | \n",
" 17.8 | \n",
" 2.0 | \n",
" 88.0 | \n",
" 0.053000 | \n",
"
\n",
" \n",
" 2013-01-27 03:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 3.0 | \n",
" 264.0 | \n",
" 0.8 | \n",
" 16.8 | \n",
" 2.0 | \n",
" 92.0 | \n",
" 0.072000 | \n",
"
\n",
" \n",
" 2013-01-27 04:00:00 | \n",
" 2013.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 27.0 | \n",
" 27.0 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 4.0 | \n",
" 187.0 | \n",
" 0.4 | \n",
" 16.2 | \n",
" 2.0 | \n",
" 94.0 | \n",
" 0.063000 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2015-12-31 18:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 18.0 | \n",
" 85.0 | \n",
" 1.4 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 76.0 | \n",
" 0.076199 | \n",
"
\n",
" \n",
" 2015-12-31 19:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 19.0 | \n",
" 99.0 | \n",
" 1.3 | \n",
" 14.6 | \n",
" 9.0 | \n",
" 75.0 | \n",
" 0.098153 | \n",
"
\n",
" \n",
" 2015-12-31 20:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 20.0 | \n",
" 210.0 | \n",
" 0.7 | \n",
" 14.5 | \n",
" 10.0 | \n",
" 79.0 | \n",
" 0.106758 | \n",
"
\n",
" \n",
" 2015-12-31 21:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 21.0 | \n",
" 185.0 | \n",
" 0.8 | \n",
" 14.6 | \n",
" 10.0 | \n",
" 81.0 | \n",
" 0.087641 | \n",
"
\n",
" \n",
" 2015-12-31 22:00:00 | \n",
" 2015.0 | \n",
" 12.0 | \n",
" 4.0 | \n",
" 365.0 | \n",
" 31.0 | \n",
" 53.0 | \n",
" 3.0 | \n",
" 22.0 | \n",
" 147.0 | \n",
" 0.9 | \n",
" 14.1 | \n",
" 10.0 | \n",
" 85.0 | \n",
" 0.089011 | \n",
"
\n",
" \n",
"
\n",
"
25655 rows × 14 columns
\n",
"
"
],
"text/plain": [
" year month quarter dayofyear dayofmonth weekofyear dayofweek datehour WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2\n",
"date \n",
"2013-01-27 00:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 0.0 114.0 1.7 18.1 5.0 100.0 0.017000\n",
"2013-01-27 01:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 1.0 151.0 0.4 17.8 3.0 89.0 0.086000\n",
"2013-01-27 02:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 2.0 175.0 0.3 17.8 2.0 88.0 0.053000\n",
"2013-01-27 03:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 3.0 264.0 0.8 16.8 2.0 92.0 0.072000\n",
"2013-01-27 04:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"2015-12-31 18:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 18.0 85.0 1.4 14.6 10.0 76.0 0.076199\n",
"2015-12-31 19:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 19.0 99.0 1.3 14.6 9.0 75.0 0.098153\n",
"2015-12-31 20:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 20.0 210.0 0.7 14.5 10.0 79.0 0.106758\n",
"2015-12-31 21:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 21.0 185.0 0.8 14.6 10.0 81.0 0.087641\n",
"2015-12-31 22:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 22.0 147.0 0.9 14.1 10.0 85.0 0.089011\n",
"\n",
"[25655 rows x 14 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"date\n",
"2013-01-27 00:00:00 2.600000e-02\n",
"2013-01-27 01:00:00 1.000000e-03\n",
"2013-01-27 02:00:00 2.000000e-03\n",
"2013-01-27 03:00:00 1.000000e-03\n",
"2013-01-27 04:00:00 1.000000e-03\n",
" ... \n",
"2015-12-31 18:00:00 1.747050e-03\n",
"2015-12-31 19:00:00 1.108100e-04\n",
"2015-12-31 20:00:00 8.930000e-06\n",
"2015-12-31 21:00:00 1.000000e-08\n",
"2015-12-31 22:00:00 0.000000e+00\n",
"Freq: H, Name: O3, Length: 25655, dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"train_X, test_X, train_y, test_y = train_test_split(X, y,\n",
" test_size = 0.2, random_state = 123)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n",
" colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,\n",
" early_stopping_rounds=None, enable_categorical=False,\n",
" eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n",
" importance_type=None, interaction_constraints='',\n",
" learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,\n",
" max_delta_step=0, max_depth=9, max_leaves=0, min_child_weight=1,\n",
" missing=nan, monotone_constraints='()', n_estimators=290,\n",
" n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=123,\n",
" reg_alpha=0.05, reg_lambda=0.1, ...)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xgb.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.00045016, 0.02260762, 0.0020332 , ..., 0.03950962, 0.06227764,\n",
" 0.01274938], dtype=float32)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred = xgb.predict(test_X)\n",
"pred"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE : 0.005350\n"
]
}
],
"source": [
"# RMSE Computation\n",
"from sklearn.metrics import mean_squared_error as MSE\n",
"rmse = np.sqrt(MSE(test_y, pred))\n",
"print(\"RMSE : % f\" %(rmse))"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9756177779346994"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xgb.score(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.918161859103899"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xgb.score(test_X, test_y)\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['year', 'month', 'quarter', 'dayofyear', 'dayofmonth', 'weekofyear', 'dayofweek', 'datehour', 'WD_Hour', 'WS_Hour', 'Temp_Hour', 'SR_Hour', 'RH_Hour', 'NO2', 'O3'], dtype='object')"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.columns"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.00045016, 0.02260762, 0.0020332 , ..., 0.03950962, 0.06227764,\n",
" 0.01274938], dtype=float32)"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xgb.predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" 13 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015 | \n",
" 8 | \n",
" 3 | \n",
" 214 | \n",
" 2 | \n",
" 31 | \n",
" 6 | \n",
" 15 | \n",
" 118 | \n",
" 2.8 | \n",
" 48.4 | \n",
" 718 | \n",
" 18 | \n",
" 0.031 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n",
"0 2015 8 3 214 2 31 6 15 118 2.8 48.4 718 18 0.031"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test = [2015,8,3,214,2,31,6,15,118,2.8,48.4,718,18,0.031]\n",
"p = pd.DataFrame([test])\n",
"p\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.14080931], dtype=float32)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xgb.predict(p)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"#import pickle\n",
"#file_name = \"xgb_reg_sklearn_updated.pkl\"\n",
"\n",
"# save\n",
"#pickle.dump(xgb, open(file_name, \"wb\"))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"DatetimeIndex: 25655 entries, 2013-01-27 00:00:00 to 2015-12-31 22:00:00\n",
"Freq: H\n",
"Data columns (total 15 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 year 25655 non-null float64\n",
" 1 month 25655 non-null float64\n",
" 2 quarter 25655 non-null float64\n",
" 3 dayofyear 25655 non-null float64\n",
" 4 dayofmonth 25655 non-null float64\n",
" 5 weekofyear 25655 non-null float64\n",
" 6 dayofweek 25655 non-null float64\n",
" 7 datehour 25655 non-null float64\n",
" 8 WD_Hour 25655 non-null float64\n",
" 9 WS_Hour 25655 non-null float64\n",
" 10 Temp_Hour 25655 non-null float64\n",
" 11 SR_Hour 25655 non-null float64\n",
" 12 RH_Hour 25655 non-null float64\n",
" 13 NO2 25655 non-null float64\n",
" 14 O3 25655 non-null float64\n",
"dtypes: float64(15)\n",
"memory usage: 3.1 MB\n"
]
}
],
"source": [
"df1.info()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"#evaluate_model(tuned_blender)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"#evaluate_model(tuned_lightgbm)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"#df"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"#df.index.names = ['date']\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"#df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"#df =df.reset_index()\n",
"#df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"#df = df.dropna(how='any')\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"#df = df.set_index('date')\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"#df"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"#df = df.reset_index()\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"#df = df.set_index('date').asfreq('h')\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"#df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"#df1= df.interpolate()\n",
"#df1"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"#df1.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"#df1"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"#df5=df1[df1.index.year == 2015]\n",
"#df5"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"#save_model(final_lgbm,'Final_Lgbm_Model_CC_no_num_prod')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}