Spaces:
Sleeping
Sleeping
File size: 5,171 Bytes
41bce18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl
# https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py
from time import time
import pandas as pd
import numpy as np
from datasetsforecast.losses import (
mae, mape, mase, rmse, smape
)
from fire import Fire
from neuralprophet import NeuralProphet
from prophet import Prophet
from prophet.diagnostics import cross_validation
from statsforecast import StatsForecast
from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive
def evaluate_performace(y_hist, y_true, models):
cutoffs = y_true['cutoff'].unique()
eval_ = []
for cutoff in cutoffs:
evaluation = {}
for model in models:
evaluation[model] = {}
for metric in [mase, mae, mape, rmse, smape]:
metric_name = metric.__name__
if metric_name == 'mase':
evaluation[model][metric_name] = metric(
y_true.query('cutoff == @cutoff')['y'].values,
y_true.query('cutoff == @cutoff')[model].values,
y_hist.query('ds <= @cutoff')['y'].values,
seasonality=24
)
else:
evaluation[model][metric_name] = metric(
y_true.query('cutoff == @cutoff')['y'].values,
y_true.query('cutoff == @cutoff')[model].values
)
eval_cutoff = pd.DataFrame(evaluation).T
eval_cutoff.insert(0, 'cutoff', cutoff)
eval_cutoff.index = eval_cutoff.index.rename('model')
eval_.append(eval_cutoff)
return pd.concat(eval_)
def experiment():
filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv'
# 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv'
df = pd.read_csv(filename)
df.columns = ['ds', 'y']
df.insert(0, 'unique_id', 'PJM_Load_hourly')
df['ds'] = pd.to_datetime(df['ds'])
df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)
# MSTL model
mstl = MSTL(
season_length=[24, 24 * 7], # seasonalities of the time series
trend_forecaster=AutoARIMA() # model used to forecast trend
)
sf = StatsForecast(
df=df,
models=[mstl],
freq='H'
)
init = time()
forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24)
end = time()
time_mstl = (end - init) / 60
print(f'MSTL Time: {time_mstl:.2f} minutes')
# SeasonalNaive model
sf = StatsForecast(
df=df,
models=[SeasonalNaive(season_length=24)],
freq='H'
)
init = time()
forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24)
end = time()
time_seas = (end - init) / 60
print(f'SeasonalNaive Time: {time_seas:.2f} minutes')
forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff'])
cutoffs = forecasts_cv['cutoff'].unique()
# Prophet model
forecasts_cv['Prophet'] = None
time_prophet = 0
for cutoff in cutoffs:
df_train = df.query('ds <= @cutoff')
prophet = Prophet()
# produce forecasts
init = time()
prophet.fit(df_train)
# produce forecasts
future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False)
forecast_prophet = prophet.predict(future)
end = time()
assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values
# data wrangling
time_prophet += (end - init) / 60
print(f'Prophet Time: {time_prophet:.2f} minutes')
times = pd.DataFrame({
'model': ['MSTL', 'SeasonalNaive', 'Prophet'],
'time (mins)': [time_mstl, time_seas, time_prophet]
})
# NeuralProphet
forecasts_cv['NeuralProphet'] = None
time_np = 0
for cutoff in cutoffs:
df_train = df.query('ds <= @cutoff')
neuralprophet = NeuralProphet()
init = time()
neuralprophet.fit(df_train.drop(columns='unique_id'))
future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24)
forecast_np = neuralprophet.predict(future)
end = time()
assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values
time_np += (end - init) / 60
print(f'NeuralProphet Time: {time_np:.2f} minutes')
times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True)
# Final evalaution
evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive'])
print(times)
print(evaluation)
print(evaluation.groupby('model').mean(numeric_only=True))
if __name__=="__main__":
Fire(experiment)
|