{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", "C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", "C:\\Users\\ahmed.ewis\\Anaconda3_ts_3.8\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n" ] } ], "source": [ "# learning curve, fitting,feature importance. rmse,mse, etc..\n", "#import pmdarima as pm\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "# https://www.kaggle.com/code/rtatman/lightgbm-hyperparameter-optimisation-lb-0-761/notebook\n", "import xgboost as xgb\n", "import lightgbm as lgb\n", "import numpy as np\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.width', 1000) \n", "from sklearn.metrics import mean_squared_error\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourCO2PM10SO2H2SNONOXNO2O3COTHC
02013-01-27 00:00:001141.718.15100380101.0000000.0010000.0030.0050.0230.0170002.600000e-020.582.170000
12013-01-27 01:00:001510.417.8389410101.0000000.0030000.0170.0650.1510.0860001.000000e-031.742.830000
22013-01-27 02:00:001750.317.8288392100.0000000.0020000.0040.0190.0720.0530002.000000e-030.953.770000
32013-01-27 03:00:002640.816.829240392.0000000.0020000.0140.0510.1230.0720001.000000e-031.432.720000
42013-01-27 04:00:001870.416.229440069.0000000.0020000.0180.0410.1050.0630001.000000e-031.322.520000
...................................................
227742015-12-31 18:00:00851.414.61076404344.2386750.0063860.0080.0140.0810.0761991.747050e-032.731.464756
227752015-12-31 19:00:00991.314.6975410281.8158170.0063010.0090.0230.1000.0981531.108100e-042.811.903803
227762015-12-31 20:00:002100.714.5107942451.0795080.0065200.0090.0640.1640.1067588.930000e-062.962.039618
227772015-12-31 21:00:001850.814.61081428120.9740710.0064370.0090.0770.1820.0876411.000000e-083.251.754740
227782015-12-31 22:00:001470.914.1108542550.0161880.0064570.0090.0500.1370.0890110.000000e+003.451.818111
\n", "

22779 rows × 16 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour CO2 PM10 SO2 H2S NO NOX NO2 O3 CO THC\n", "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 380 101.000000 0.001000 0.003 0.005 0.023 0.017000 2.600000e-02 0.58 2.170000\n", "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 410 101.000000 0.003000 0.017 0.065 0.151 0.086000 1.000000e-03 1.74 2.830000\n", "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 392 100.000000 0.002000 0.004 0.019 0.072 0.053000 2.000000e-03 0.95 3.770000\n", "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 403 92.000000 0.002000 0.014 0.051 0.123 0.072000 1.000000e-03 1.43 2.720000\n", "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 400 69.000000 0.002000 0.018 0.041 0.105 0.063000 1.000000e-03 1.32 2.520000\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 404 344.238675 0.006386 0.008 0.014 0.081 0.076199 1.747050e-03 2.73 1.464756\n", "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 410 281.815817 0.006301 0.009 0.023 0.100 0.098153 1.108100e-04 2.81 1.903803\n", "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 424 51.079508 0.006520 0.009 0.064 0.164 0.106758 8.930000e-06 2.96 2.039618\n", "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 428 120.974071 0.006437 0.009 0.077 0.182 0.087641 1.000000e-08 3.25 1.754740\n", "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 425 50.016188 0.006457 0.009 0.050 0.137 0.089011 0.000000e+00 3.45 1.818111\n", "\n", "[22779 rows x 16 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_excel('Interpolation_Average_2013_2015.xlsx',sheet_name='Sheet2')\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'CO2', 'PM10', 'SO2', 'H2S', 'NO', 'NOX', 'NO2', 'O3', 'CO', 'THC'], dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "cols = ['MeasurementDateTime','WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour','NO2', 'O3'\n", "]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3
02013-01-27 00:00:001141.718.151000.0170002.600000e-02
12013-01-27 01:00:001510.417.83890.0860001.000000e-03
22013-01-27 02:00:001750.317.82880.0530002.000000e-03
32013-01-27 03:00:002640.816.82920.0720001.000000e-03
42013-01-27 04:00:001870.416.22940.0630001.000000e-03
...........................
227742015-12-31 18:00:00851.414.610760.0761991.747050e-03
227752015-12-31 19:00:00991.314.69750.0981531.108100e-04
227762015-12-31 20:00:002100.714.510790.1067588.930000e-06
227772015-12-31 21:00:001850.814.610810.0876411.000000e-08
227782015-12-31 22:00:001470.914.110850.0890110.000000e+00
\n", "

22779 rows × 8 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n", "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02\n", "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03\n", "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03\n", "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03\n", "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03\n", "... ... ... ... ... ... ... ... ...\n", "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03\n", "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04\n", "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06\n", "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08\n", "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00\n", "\n", "[22779 rows x 8 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df[cols]\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 22779 entries, 0 to 22778\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 MeasurementDateTime 22779 non-null datetime64[ns]\n", " 1 WD-Hour 22779 non-null int64 \n", " 2 WS-Hour 22779 non-null float64 \n", " 3 Temp-Hour 22779 non-null float64 \n", " 4 SR-Hour 22779 non-null int64 \n", " 5 RH-Hour 22779 non-null int64 \n", " 6 NO2 22779 non-null float64 \n", " 7 O3 22779 non-null float64 \n", "dtypes: datetime64[ns](1), float64(4), int64(3)\n", "memory usage: 1.4 MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3
02013-01-27 00:00:001141.718.151000.0170002.600000e-02
12013-01-27 01:00:001510.417.83890.0860001.000000e-03
22013-01-27 02:00:001750.317.82880.0530002.000000e-03
32013-01-27 03:00:002640.816.82920.0720001.000000e-03
42013-01-27 04:00:001870.416.22940.0630001.000000e-03
...........................
227742015-12-31 18:00:00851.414.610760.0761991.747050e-03
227752015-12-31 19:00:00991.314.69750.0981531.108100e-04
227762015-12-31 20:00:002100.714.510790.1067588.930000e-06
227772015-12-31 21:00:001850.814.610810.0876411.000000e-08
227782015-12-31 22:00:001470.914.110850.0890110.000000e+00
\n", "

22779 rows × 8 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n", "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02\n", "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03\n", "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03\n", "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03\n", "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03\n", "... ... ... ... ... ... ... ... ...\n", "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03\n", "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04\n", "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06\n", "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08\n", "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00\n", "\n", "[22779 rows x 8 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['MeasurementDateTime'] = pd.to_datetime(df['MeasurementDateTime'], errors='coerce')\n", "df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3date
02013-01-27 00:00:001141.718.151000.0170002.600000e-022013-01-27 00:00:00
12013-01-27 01:00:001510.417.83890.0860001.000000e-032013-01-27 01:00:00
22013-01-27 02:00:001750.317.82880.0530002.000000e-032013-01-27 02:00:00
32013-01-27 03:00:002640.816.82920.0720001.000000e-032013-01-27 03:00:00
42013-01-27 04:00:001870.416.22940.0630001.000000e-032013-01-27 04:00:00
..............................
227742015-12-31 18:00:00851.414.610760.0761991.747050e-032015-12-31 18:00:00
227752015-12-31 19:00:00991.314.69750.0981531.108100e-042015-12-31 19:00:00
227762015-12-31 20:00:002100.714.510790.1067588.930000e-062015-12-31 20:00:00
227772015-12-31 21:00:001850.814.610810.0876411.000000e-082015-12-31 21:00:00
227782015-12-31 22:00:001470.914.110850.0890110.000000e+002015-12-31 22:00:00
\n", "

22779 rows × 9 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 date\n", "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02 2013-01-27 00:00:00\n", "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03 2013-01-27 01:00:00\n", "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03 2013-01-27 02:00:00\n", "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03 2013-01-27 03:00:00\n", "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03 2013-01-27 04:00:00\n", "... ... ... ... ... ... ... ... ... ...\n", "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03 2015-12-31 18:00:00\n", "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04 2015-12-31 19:00:00\n", "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06 2015-12-31 20:00:00\n", "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08 2015-12-31 21:00:00\n", "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00 2015-12-31 22:00:00\n", "\n", "[22779 rows x 9 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['date'] = df['MeasurementDateTime']\n", "df" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3datedayofweekquartermonthyeardayofyeardayofmonthdatehourweekofyear
02013-01-27 00:00:001141.718.151000.0170002.600000e-022013-01-27 00:00:006112013272704
12013-01-27 01:00:001510.417.83890.0860001.000000e-032013-01-27 01:00:006112013272714
22013-01-27 02:00:001750.317.82880.0530002.000000e-032013-01-27 02:00:006112013272724
32013-01-27 03:00:002640.816.82920.0720001.000000e-032013-01-27 03:00:006112013272734
42013-01-27 04:00:001870.416.22940.0630001.000000e-032013-01-27 04:00:006112013272744
......................................................
227742015-12-31 18:00:00851.414.610760.0761991.747050e-032015-12-31 18:00:0034122015365311853
227752015-12-31 19:00:00991.314.69750.0981531.108100e-042015-12-31 19:00:0034122015365311953
227762015-12-31 20:00:002100.714.510790.1067588.930000e-062015-12-31 20:00:0034122015365312053
227772015-12-31 21:00:001850.814.610810.0876411.000000e-082015-12-31 21:00:0034122015365312153
227782015-12-31 22:00:001470.914.110850.0890110.000000e+002015-12-31 22:00:0034122015365312253
\n", "

22779 rows × 17 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 date dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n", "0 2013-01-27 00:00:00 114 1.7 18.1 5 100 0.017000 2.600000e-02 2013-01-27 00:00:00 6 1 1 2013 27 27 0 4\n", "1 2013-01-27 01:00:00 151 0.4 17.8 3 89 0.086000 1.000000e-03 2013-01-27 01:00:00 6 1 1 2013 27 27 1 4\n", "2 2013-01-27 02:00:00 175 0.3 17.8 2 88 0.053000 2.000000e-03 2013-01-27 02:00:00 6 1 1 2013 27 27 2 4\n", "3 2013-01-27 03:00:00 264 0.8 16.8 2 92 0.072000 1.000000e-03 2013-01-27 03:00:00 6 1 1 2013 27 27 3 4\n", "4 2013-01-27 04:00:00 187 0.4 16.2 2 94 0.063000 1.000000e-03 2013-01-27 04:00:00 6 1 1 2013 27 27 4 4\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "22774 2015-12-31 18:00:00 85 1.4 14.6 10 76 0.076199 1.747050e-03 2015-12-31 18:00:00 3 4 12 2015 365 31 18 53\n", "22775 2015-12-31 19:00:00 99 1.3 14.6 9 75 0.098153 1.108100e-04 2015-12-31 19:00:00 3 4 12 2015 365 31 19 53\n", "22776 2015-12-31 20:00:00 210 0.7 14.5 10 79 0.106758 8.930000e-06 2015-12-31 20:00:00 3 4 12 2015 365 31 20 53\n", "22777 2015-12-31 21:00:00 185 0.8 14.6 10 81 0.087641 1.000000e-08 2015-12-31 21:00:00 3 4 12 2015 365 31 21 53\n", "22778 2015-12-31 22:00:00 147 0.9 14.1 10 85 0.089011 0.000000e+00 2015-12-31 22:00:00 3 4 12 2015 365 31 22 53\n", "\n", "[22779 rows x 17 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datetime import datetime\n", "#df['MeasurementDateTime'] = df.index\n", "df['dayofweek'] = df['date'].dt.dayofweek\n", "df['quarter'] = df['date'].dt.quarter\n", "df['month'] = df['date'].dt.month\n", "df['year'] = df['date'].dt.year\n", "df['dayofyear'] = df['date'].dt.dayofyear\n", "df['dayofmonth'] = df['date'].dt.day\n", "df['datehour'] = df['date'].dt.hour\n", "df['weekofyear'] = df['date'].dt.weekofyear\n", "df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3', 'date', 'dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear'], dtype='object')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3dayofweekquartermonthyeardayofyeardayofmonthdatehourweekofyear
date
2013-01-27 00:00:002013-01-27 00:00:00114.01.718.15.0100.00.0170002.600000e-026.01.01.02013.027.027.00.04.0
2013-01-27 01:00:002013-01-27 01:00:00151.00.417.83.089.00.0860001.000000e-036.01.01.02013.027.027.01.04.0
2013-01-27 02:00:002013-01-27 02:00:00175.00.317.82.088.00.0530002.000000e-036.01.01.02013.027.027.02.04.0
2013-01-27 03:00:002013-01-27 03:00:00264.00.816.82.092.00.0720001.000000e-036.01.01.02013.027.027.03.04.0
2013-01-27 04:00:002013-01-27 04:00:00187.00.416.22.094.00.0630001.000000e-036.01.01.02013.027.027.04.04.0
...................................................
2015-12-31 18:00:002015-12-31 18:00:0085.01.414.610.076.00.0761991.747050e-033.04.012.02015.0365.031.018.053.0
2015-12-31 19:00:002015-12-31 19:00:0099.01.314.69.075.00.0981531.108100e-043.04.012.02015.0365.031.019.053.0
2015-12-31 20:00:002015-12-31 20:00:00210.00.714.510.079.00.1067588.930000e-063.04.012.02015.0365.031.020.053.0
2015-12-31 21:00:002015-12-31 21:00:00185.00.814.610.081.00.0876411.000000e-083.04.012.02015.0365.031.021.053.0
2015-12-31 22:00:002015-12-31 22:00:00147.00.914.110.085.00.0890110.000000e+003.04.012.02015.0365.031.022.053.0
\n", "

25655 rows × 16 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n", "date \n", "2013-01-27 00:00:00 2013-01-27 00:00:00 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0\n", "2013-01-27 01:00:00 2013-01-27 01:00:00 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0\n", "2013-01-27 02:00:00 2013-01-27 02:00:00 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0\n", "2013-01-27 03:00:00 2013-01-27 03:00:00 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0\n", "2013-01-27 04:00:00 2013-01-27 04:00:00 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 2015-12-31 18:00:00 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0\n", "2015-12-31 19:00:00 2015-12-31 19:00:00 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0\n", "2015-12-31 20:00:00 2015-12-31 20:00:00 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0\n", "2015-12-31 21:00:00 2015-12-31 21:00:00 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0\n", "2015-12-31 22:00:00 2015-12-31 22:00:00 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0\n", "\n", "[25655 rows x 16 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.set_index('date').asfreq('h')\n", "df\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MeasurementDateTime 2876\n", "WD-Hour 2876\n", "WS-Hour 2876\n", "Temp-Hour 2876\n", "SR-Hour 2876\n", "RH-Hour 2876\n", "NO2 2876\n", "O3 2876\n", "dayofweek 2876\n", "quarter 2876\n", "month 2876\n", "year 2876\n", "dayofyear 2876\n", "dayofmonth 2876\n", "datehour 2876\n", "weekofyear 2876\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MeasurementDateTimeWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3dayofweekquartermonthyeardayofyeardayofmonthdatehourweekofyear
date
2013-01-27 00:00:002013-01-27 00:00:00114.01.718.15.0100.00.0170002.600000e-026.01.01.02013.027.027.00.04.0
2013-01-27 01:00:002013-01-27 01:00:00151.00.417.83.089.00.0860001.000000e-036.01.01.02013.027.027.01.04.0
2013-01-27 02:00:002013-01-27 02:00:00175.00.317.82.088.00.0530002.000000e-036.01.01.02013.027.027.02.04.0
2013-01-27 03:00:002013-01-27 03:00:00264.00.816.82.092.00.0720001.000000e-036.01.01.02013.027.027.03.04.0
2013-01-27 04:00:002013-01-27 04:00:00187.00.416.22.094.00.0630001.000000e-036.01.01.02013.027.027.04.04.0
...................................................
2015-12-31 18:00:002015-12-31 18:00:0085.01.414.610.076.00.0761991.747050e-033.04.012.02015.0365.031.018.053.0
2015-12-31 19:00:002015-12-31 19:00:0099.01.314.69.075.00.0981531.108100e-043.04.012.02015.0365.031.019.053.0
2015-12-31 20:00:002015-12-31 20:00:00210.00.714.510.079.00.1067588.930000e-063.04.012.02015.0365.031.020.053.0
2015-12-31 21:00:002015-12-31 21:00:00185.00.814.610.081.00.0876411.000000e-083.04.012.02015.0365.031.021.053.0
2015-12-31 22:00:002015-12-31 22:00:00147.00.914.110.085.00.0890110.000000e+003.04.012.02015.0365.031.022.053.0
\n", "

25655 rows × 16 columns

\n", "
" ], "text/plain": [ " MeasurementDateTime WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3 dayofweek quarter month year dayofyear dayofmonth datehour weekofyear\n", "date \n", "2013-01-27 00:00:00 2013-01-27 00:00:00 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0\n", "2013-01-27 01:00:00 2013-01-27 01:00:00 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0\n", "2013-01-27 02:00:00 2013-01-27 02:00:00 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0\n", "2013-01-27 03:00:00 2013-01-27 03:00:00 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0\n", "2013-01-27 04:00:00 2013-01-27 04:00:00 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 2015-12-31 18:00:00 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0\n", "2015-12-31 19:00:00 2015-12-31 19:00:00 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0\n", "2015-12-31 20:00:00 2015-12-31 20:00:00 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0\n", "2015-12-31 21:00:00 2015-12-31 21:00:00 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0\n", "2015-12-31 22:00:00 2015-12-31 22:00:00 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0\n", "\n", "[25655 rows x 16 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1= df.interpolate(method='ffill')\n", "df1" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayofweekquartermonthyeardayofyeardayofmonthdatehourweekofyearWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3
date
2013-01-27 00:00:006.01.01.02013.027.027.00.04.0114.01.718.15.0100.00.0170002.600000e-02
2013-01-27 01:00:006.01.01.02013.027.027.01.04.0151.00.417.83.089.00.0860001.000000e-03
2013-01-27 02:00:006.01.01.02013.027.027.02.04.0175.00.317.82.088.00.0530002.000000e-03
2013-01-27 03:00:006.01.01.02013.027.027.03.04.0264.00.816.82.092.00.0720001.000000e-03
2013-01-27 04:00:006.01.01.02013.027.027.04.04.0187.00.416.22.094.00.0630001.000000e-03
................................................
2015-12-31 18:00:003.04.012.02015.0365.031.018.053.085.01.414.610.076.00.0761991.747050e-03
2015-12-31 19:00:003.04.012.02015.0365.031.019.053.099.01.314.69.075.00.0981531.108100e-04
2015-12-31 20:00:003.04.012.02015.0365.031.020.053.0210.00.714.510.079.00.1067588.930000e-06
2015-12-31 21:00:003.04.012.02015.0365.031.021.053.0185.00.814.610.081.00.0876411.000000e-08
2015-12-31 22:00:003.04.012.02015.0365.031.022.053.0147.00.914.110.085.00.0890110.000000e+00
\n", "

25655 rows × 15 columns

\n", "
" ], "text/plain": [ " dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n", "date \n", "2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n", "2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n", "2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n", "2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n", "2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n", "2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n", "2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n", "2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n", "2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n", "\n", "[25655 rows x 15 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols =['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3']\n", "df1 = df1[cols]\n", "df1" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dayofweek 0\n", "quarter 0\n", "month 0\n", "year 0\n", "dayofyear 0\n", "dayofmonth 0\n", "datehour 0\n", "weekofyear 0\n", "WD-Hour 0\n", "WS-Hour 0\n", "Temp-Hour 0\n", "SR-Hour 0\n", "RH-Hour 0\n", "NO2 0\n", "O3 0\n", "dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.isna().sum()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "#cols =['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear','MeasurementDateTime', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3']\n", "#df = df[cols]\n", "#df = df.set_index('MeasurementDateTime')\n", "#df" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "#df1 = df1.drop(['year'],axis=1)\n", "#df1" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0Session id123
1TargetO3
2Target typeregression
3Data shape(25655, 15)
4Train data shape(17958, 15)
5Test data shape(7697, 15)
6Numeric features14
7Preprocess1
8Imputation typesimple
9Numeric imputationmean
10Categorical imputationconstant
11Fold GeneratorKFold
12Fold Number10
13CPU Jobs-1
14Log Experiment0
15Experiment Namereg-default-name
16USI9345
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pycaret.regression import *\n", "exp_reg101 = setup(data = df1, target = 'O3', session_id=123) " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelMAEMSERMSER2RMSLEMAPETT (Sec)
catboostCatBoost Regressor0.00430.00000.00630.88090.00614.38492.2550
xgboostExtreme Gradient Boosting0.00430.00000.00640.87870.00616.06820.4340
etExtra Trees Regressor0.00410.00000.00640.87770.00613.45330.9890
lightgbmLight Gradient Boosting Machine0.00480.00000.00700.85360.00679.23790.2320
rfRandom Forest Regressor0.00450.00010.00710.84950.00683.92571.6330
gbrGradient Boosting Regressor0.00620.00010.00890.76290.008615.37170.5790
dtDecision Tree Regressor0.00620.00010.01040.67870.01001.73370.0430
lrLinear Regression0.00840.00010.01150.60510.011037.20530.6290
larLeast Angle Regression0.00840.00010.01150.60510.011037.55420.0130
brBayesian Ridge0.00840.00010.01150.60510.011037.20330.0150
ridgeRidge Regression0.00840.00010.01150.60350.011037.87410.0140
knnK Neighbors Regressor0.00840.00010.01180.58520.011448.53640.0410
adaAdaBoost Regressor0.01100.00020.01310.48640.0127143.35270.3020
ompOrthogonal Matching Pursuit0.01180.00020.01510.32080.0146159.73250.0110
enElastic Net0.01190.00020.01510.32010.0146177.80480.0100
lassoLasso Regression0.01210.00020.01540.29320.0149188.82340.0140
huberHuber Regressor0.01160.00030.01690.09270.015931.14560.2690
llarLasso Least Angle Regression0.01470.00030.0183-0.00090.0177233.47500.0150
dummyDummy Regressor0.01470.00030.0183-0.00090.0177233.47500.0120
parPassive Aggressive Regressor0.04300.00210.0458-5.28910.0439586.93110.0130
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare_models()\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MAEMSERMSER2RMSLEMAPE
00.00430.00000.00610.89470.00590.4868
10.00420.00000.00640.87610.00610.4432
20.00430.00000.00660.87440.006324.7232
30.00420.00000.00620.88760.00590.5085
40.00420.00000.00610.89340.00590.4957
50.00430.00000.00620.87210.00600.3841
60.00460.00000.00690.86350.00667.5846
70.00440.00000.00640.88150.00611.6161
80.00430.00000.00630.87630.00601.4757
90.00420.00000.00590.88940.00576.1310
Mean0.00430.00000.00630.88090.00614.3849
SD0.00010.00000.00030.00960.00027.2108
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "catboost = create_model('catboost')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MAEMSERMSER2RMSLEMAPE
00.00430.00000.00630.88830.00610.4205
10.00420.00000.00660.86970.00630.4188
20.00430.00000.00670.87060.006433.1689
30.00420.00000.00620.88690.00601.3693
40.00400.00000.00600.89870.00570.4582
50.00420.00000.00610.87610.00590.3537
60.00450.00000.00700.86110.00676.0101
70.00450.00000.00650.87560.00620.8085
80.00430.00000.00630.87420.00611.3488
90.00420.00000.00600.88580.005816.3256
Mean0.00430.00000.00640.87870.00616.0682
SD0.00010.00000.00030.01050.000310.1962
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "xgboost = create_model('xgboost')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2937c8c598234ec88f05eaabfd90b449", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(catboost)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "21257fbf25164af38d6669499e8ed2a2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(catboost)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7a05f3d53a314fb3b797cf0869ac11b2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(catboost)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1dc376a7450645b7ba8002498790e066", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(catboost)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2c31c6d3b05148de9ff22f3bb9deb8fb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(catboost)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3a4d314bb5d14394a82fca54b524a473", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(catboost)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "from xgboost import XGBRegressor\n", "\n", "xgb = (XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n", " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n", " importance_type=None, interaction_constraints='',\n", " learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,\n", " max_delta_step=0, max_depth=9, max_leaves=0, min_child_weight=1,\n", " monotone_constraints='()', n_estimators=290,\n", " n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=123,\n", " reg_alpha=0.05, reg_lambda=0.1))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayofweekquartermonthyeardayofyeardayofmonthdatehourweekofyearWD-HourWS-HourTemp-HourSR-HourRH-HourNO2O3
date
2013-01-27 00:00:006.01.01.02013.027.027.00.04.0114.01.718.15.0100.00.0170002.600000e-02
2013-01-27 01:00:006.01.01.02013.027.027.01.04.0151.00.417.83.089.00.0860001.000000e-03
2013-01-27 02:00:006.01.01.02013.027.027.02.04.0175.00.317.82.088.00.0530002.000000e-03
2013-01-27 03:00:006.01.01.02013.027.027.03.04.0264.00.816.82.092.00.0720001.000000e-03
2013-01-27 04:00:006.01.01.02013.027.027.04.04.0187.00.416.22.094.00.0630001.000000e-03
................................................
2015-12-31 18:00:003.04.012.02015.0365.031.018.053.085.01.414.610.076.00.0761991.747050e-03
2015-12-31 19:00:003.04.012.02015.0365.031.019.053.099.01.314.69.075.00.0981531.108100e-04
2015-12-31 20:00:003.04.012.02015.0365.031.020.053.0210.00.714.510.079.00.1067588.930000e-06
2015-12-31 21:00:003.04.012.02015.0365.031.021.053.0185.00.814.610.081.00.0876411.000000e-08
2015-12-31 22:00:003.04.012.02015.0365.031.022.053.0147.00.914.110.085.00.0890110.000000e+00
\n", "

25655 rows × 15 columns

\n", "
" ], "text/plain": [ " dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD-Hour WS-Hour Temp-Hour SR-Hour RH-Hour NO2 O3\n", "date \n", "2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n", "2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n", "2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n", "2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n", "2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n", "2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n", "2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n", "2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n", "2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n", "\n", "[25655 rows x 15 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'datehour', 'weekofyear', 'WD-Hour', 'WS-Hour', 'Temp-Hour', 'SR-Hour', 'RH-Hour', 'NO2', 'O3'], dtype='object')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import train_test_split\n", "df1.columns" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayofweekquartermonthyeardayofyeardayofmonthdatehourweekofyearWD_HourWS_HourTemp_HourSR_HourRH_HourNO2O3
date
2013-01-27 00:00:006.01.01.02013.027.027.00.04.0114.01.718.15.0100.00.0170002.600000e-02
2013-01-27 01:00:006.01.01.02013.027.027.01.04.0151.00.417.83.089.00.0860001.000000e-03
2013-01-27 02:00:006.01.01.02013.027.027.02.04.0175.00.317.82.088.00.0530002.000000e-03
2013-01-27 03:00:006.01.01.02013.027.027.03.04.0264.00.816.82.092.00.0720001.000000e-03
2013-01-27 04:00:006.01.01.02013.027.027.04.04.0187.00.416.22.094.00.0630001.000000e-03
................................................
2015-12-31 18:00:003.04.012.02015.0365.031.018.053.085.01.414.610.076.00.0761991.747050e-03
2015-12-31 19:00:003.04.012.02015.0365.031.019.053.099.01.314.69.075.00.0981531.108100e-04
2015-12-31 20:00:003.04.012.02015.0365.031.020.053.0210.00.714.510.079.00.1067588.930000e-06
2015-12-31 21:00:003.04.012.02015.0365.031.021.053.0185.00.814.610.081.00.0876411.000000e-08
2015-12-31 22:00:003.04.012.02015.0365.031.022.053.0147.00.914.110.085.00.0890110.000000e+00
\n", "

25655 rows × 15 columns

\n", "
" ], "text/plain": [ " dayofweek quarter month year dayofyear dayofmonth datehour weekofyear WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2 O3\n", "date \n", "2013-01-27 00:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 0.0 4.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n", "2013-01-27 01:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 1.0 4.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n", "2013-01-27 02:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 2.0 4.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n", "2013-01-27 03:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 3.0 4.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n", "2013-01-27 04:00:00 6.0 1.0 1.0 2013.0 27.0 27.0 4.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 18.0 53.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n", "2015-12-31 19:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 19.0 53.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n", "2015-12-31 20:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 20.0 53.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n", "2015-12-31 21:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 21.0 53.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n", "2015-12-31 22:00:00 3.0 4.0 12.0 2015.0 365.0 31.0 22.0 53.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n", "\n", "[25655 rows x 15 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.columns = df1.columns.str.replace('WD-Hour', 'WD_Hour')\n", "df1.columns = df1.columns.str.replace('WS-Hour', 'WS_Hour')\n", "df1.columns = df1.columns.str.replace('Temp-Hour', 'Temp_Hour')\n", "df1.columns = df1.columns.str.replace('SR-Hour', 'SR_Hour')\n", "df1.columns = df1.columns.str.replace('RH-Hour', 'RH_Hour')\n", "df1" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearmonthquarterdayofyeardayofmonthweekofyeardayofweekdatehourWD_HourWS_HourTemp_HourSR_HourRH_HourNO2O3
date
2013-01-27 00:00:002013.01.01.027.027.04.06.00.0114.01.718.15.0100.00.0170002.600000e-02
2013-01-27 01:00:002013.01.01.027.027.04.06.01.0151.00.417.83.089.00.0860001.000000e-03
2013-01-27 02:00:002013.01.01.027.027.04.06.02.0175.00.317.82.088.00.0530002.000000e-03
2013-01-27 03:00:002013.01.01.027.027.04.06.03.0264.00.816.82.092.00.0720001.000000e-03
2013-01-27 04:00:002013.01.01.027.027.04.06.04.0187.00.416.22.094.00.0630001.000000e-03
................................................
2015-12-31 18:00:002015.012.04.0365.031.053.03.018.085.01.414.610.076.00.0761991.747050e-03
2015-12-31 19:00:002015.012.04.0365.031.053.03.019.099.01.314.69.075.00.0981531.108100e-04
2015-12-31 20:00:002015.012.04.0365.031.053.03.020.0210.00.714.510.079.00.1067588.930000e-06
2015-12-31 21:00:002015.012.04.0365.031.053.03.021.0185.00.814.610.081.00.0876411.000000e-08
2015-12-31 22:00:002015.012.04.0365.031.053.03.022.0147.00.914.110.085.00.0890110.000000e+00
\n", "

25655 rows × 15 columns

\n", "
" ], "text/plain": [ " year month quarter dayofyear dayofmonth weekofyear dayofweek datehour WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2 O3\n", "date \n", "2013-01-27 00:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 0.0 114.0 1.7 18.1 5.0 100.0 0.017000 2.600000e-02\n", "2013-01-27 01:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 1.0 151.0 0.4 17.8 3.0 89.0 0.086000 1.000000e-03\n", "2013-01-27 02:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 2.0 175.0 0.3 17.8 2.0 88.0 0.053000 2.000000e-03\n", "2013-01-27 03:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 3.0 264.0 0.8 16.8 2.0 92.0 0.072000 1.000000e-03\n", "2013-01-27 04:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000 1.000000e-03\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 18.0 85.0 1.4 14.6 10.0 76.0 0.076199 1.747050e-03\n", "2015-12-31 19:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 19.0 99.0 1.3 14.6 9.0 75.0 0.098153 1.108100e-04\n", "2015-12-31 20:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 20.0 210.0 0.7 14.5 10.0 79.0 0.106758 8.930000e-06\n", "2015-12-31 21:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 21.0 185.0 0.8 14.6 10.0 81.0 0.087641 1.000000e-08\n", "2015-12-31 22:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 22.0 147.0 0.9 14.1 10.0 85.0 0.089011 0.000000e+00\n", "\n", "[25655 rows x 15 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = ['year','month','quarter','dayofyear','dayofmonth','weekofyear','dayofweek','datehour','WD_Hour', 'WS_Hour', 'Temp_Hour', 'SR_Hour', 'RH_Hour', 'NO2', 'O3']\n", "df1 = df1[cols]\n", "df1" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "X, y = df1.iloc[:, :-1], df1.iloc[:, -1]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearmonthquarterdayofyeardayofmonthweekofyeardayofweekdatehourWD_HourWS_HourTemp_HourSR_HourRH_HourNO2
date
2013-01-27 00:00:002013.01.01.027.027.04.06.00.0114.01.718.15.0100.00.017000
2013-01-27 01:00:002013.01.01.027.027.04.06.01.0151.00.417.83.089.00.086000
2013-01-27 02:00:002013.01.01.027.027.04.06.02.0175.00.317.82.088.00.053000
2013-01-27 03:00:002013.01.01.027.027.04.06.03.0264.00.816.82.092.00.072000
2013-01-27 04:00:002013.01.01.027.027.04.06.04.0187.00.416.22.094.00.063000
.............................................
2015-12-31 18:00:002015.012.04.0365.031.053.03.018.085.01.414.610.076.00.076199
2015-12-31 19:00:002015.012.04.0365.031.053.03.019.099.01.314.69.075.00.098153
2015-12-31 20:00:002015.012.04.0365.031.053.03.020.0210.00.714.510.079.00.106758
2015-12-31 21:00:002015.012.04.0365.031.053.03.021.0185.00.814.610.081.00.087641
2015-12-31 22:00:002015.012.04.0365.031.053.03.022.0147.00.914.110.085.00.089011
\n", "

25655 rows × 14 columns

\n", "
" ], "text/plain": [ " year month quarter dayofyear dayofmonth weekofyear dayofweek datehour WD_Hour WS_Hour Temp_Hour SR_Hour RH_Hour NO2\n", "date \n", "2013-01-27 00:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 0.0 114.0 1.7 18.1 5.0 100.0 0.017000\n", "2013-01-27 01:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 1.0 151.0 0.4 17.8 3.0 89.0 0.086000\n", "2013-01-27 02:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 2.0 175.0 0.3 17.8 2.0 88.0 0.053000\n", "2013-01-27 03:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 3.0 264.0 0.8 16.8 2.0 92.0 0.072000\n", "2013-01-27 04:00:00 2013.0 1.0 1.0 27.0 27.0 4.0 6.0 4.0 187.0 0.4 16.2 2.0 94.0 0.063000\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "2015-12-31 18:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 18.0 85.0 1.4 14.6 10.0 76.0 0.076199\n", "2015-12-31 19:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 19.0 99.0 1.3 14.6 9.0 75.0 0.098153\n", "2015-12-31 20:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 20.0 210.0 0.7 14.5 10.0 79.0 0.106758\n", "2015-12-31 21:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 21.0 185.0 0.8 14.6 10.0 81.0 0.087641\n", "2015-12-31 22:00:00 2015.0 12.0 4.0 365.0 31.0 53.0 3.0 22.0 147.0 0.9 14.1 10.0 85.0 0.089011\n", "\n", "[25655 rows x 14 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "date\n", "2013-01-27 00:00:00 2.600000e-02\n", "2013-01-27 01:00:00 1.000000e-03\n", "2013-01-27 02:00:00 2.000000e-03\n", "2013-01-27 03:00:00 1.000000e-03\n", "2013-01-27 04:00:00 1.000000e-03\n", " ... \n", "2015-12-31 18:00:00 1.747050e-03\n", "2015-12-31 19:00:00 1.108100e-04\n", "2015-12-31 20:00:00 8.930000e-06\n", "2015-12-31 21:00:00 1.000000e-08\n", "2015-12-31 22:00:00 0.000000e+00\n", "Freq: H, Name: O3, Length: 25655, dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "train_X, test_X, train_y, test_y = train_test_split(X, y,\n", " test_size = 0.2, random_state = 123)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n", " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n", " importance_type=None, interaction_constraints='',\n", " learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,\n", " max_delta_step=0, max_depth=9, max_leaves=0, min_child_weight=1,\n", " missing=nan, monotone_constraints='()', n_estimators=290,\n", " n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=123,\n", " reg_alpha=0.05, reg_lambda=0.1, ...)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xgb.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.00045016, 0.02260762, 0.0020332 , ..., 0.03950962, 0.06227764,\n", " 0.01274938], dtype=float32)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred = xgb.predict(test_X)\n", "pred" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSE : 0.005350\n" ] } ], "source": [ "# RMSE Computation\n", "from sklearn.metrics import mean_squared_error as MSE\n", "rmse = np.sqrt(MSE(test_y, pred))\n", "print(\"RMSE : % f\" %(rmse))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9756177779346994" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xgb.score(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.918161859103899" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xgb.score(test_X, test_y)\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['year', 'month', 'quarter', 'dayofyear', 'dayofmonth', 'weekofyear', 'dayofweek', 'datehour', 'WD_Hour', 'WS_Hour', 'Temp_Hour', 'SR_Hour', 'RH_Hour', 'NO2', 'O3'], dtype='object')" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.columns" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.00045016, 0.02260762, 0.0020332 , ..., 0.03950962, 0.06227764,\n", " 0.01274938], dtype=float32)" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xgb.predict(test_X)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678910111213
02015832142316151182.848.4718180.031
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n", "0 2015 8 3 214 2 31 6 15 118 2.8 48.4 718 18 0.031" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test = [2015,8,3,214,2,31,6,15,118,2.8,48.4,718,18,0.031]\n", "p = pd.DataFrame([test])\n", "p\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.14080931], dtype=float32)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xgb.predict(p)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "#import pickle\n", "#file_name = \"xgb_reg_sklearn_updated.pkl\"\n", "\n", "# save\n", "#pickle.dump(xgb, open(file_name, \"wb\"))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "DatetimeIndex: 25655 entries, 2013-01-27 00:00:00 to 2015-12-31 22:00:00\n", "Freq: H\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 year 25655 non-null float64\n", " 1 month 25655 non-null float64\n", " 2 quarter 25655 non-null float64\n", " 3 dayofyear 25655 non-null float64\n", " 4 dayofmonth 25655 non-null float64\n", " 5 weekofyear 25655 non-null float64\n", " 6 dayofweek 25655 non-null float64\n", " 7 datehour 25655 non-null float64\n", " 8 WD_Hour 25655 non-null float64\n", " 9 WS_Hour 25655 non-null float64\n", " 10 Temp_Hour 25655 non-null float64\n", " 11 SR_Hour 25655 non-null float64\n", " 12 RH_Hour 25655 non-null float64\n", " 13 NO2 25655 non-null float64\n", " 14 O3 25655 non-null float64\n", "dtypes: float64(15)\n", "memory usage: 3.1 MB\n" ] } ], "source": [ "df1.info()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "#evaluate_model(tuned_blender)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "#evaluate_model(tuned_lightgbm)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "#df" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "#df.index.names = ['date']\n", "#df" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "#df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "#df =df.reset_index()\n", "#df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "#df = df.dropna(how='any')\n", "#df" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "#df = df.set_index('date')\n", "#df" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "#df" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "#df = df.reset_index()\n", "#df" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "#df = df.set_index('date').asfreq('h')\n", "#df" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "#df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "#df1= df.interpolate()\n", "#df1" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "#df1.isna().sum()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "#df1" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "#df5=df1[df1.index.year == 2015]\n", "#df5" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "#save_model(final_lgbm,'Final_Lgbm_Model_CC_no_num_prod')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }