Spaces:
Sleeping
Sleeping
# https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl | |
# https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py | |
from time import time | |
import pandas as pd | |
import numpy as np | |
from datasetsforecast.losses import ( | |
mae, mape, mase, rmse, smape | |
) | |
from fire import Fire | |
from neuralprophet import NeuralProphet | |
from prophet import Prophet | |
from prophet.diagnostics import cross_validation | |
from statsforecast import StatsForecast | |
from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive | |
def evaluate_performace(y_hist, y_true, models): | |
cutoffs = y_true['cutoff'].unique() | |
eval_ = [] | |
for cutoff in cutoffs: | |
evaluation = {} | |
for model in models: | |
evaluation[model] = {} | |
for metric in [mase, mae, mape, rmse, smape]: | |
metric_name = metric.__name__ | |
if metric_name == 'mase': | |
evaluation[model][metric_name] = metric( | |
y_true.query('cutoff == @cutoff')['y'].values, | |
y_true.query('cutoff == @cutoff')[model].values, | |
y_hist.query('ds <= @cutoff')['y'].values, | |
seasonality=24 | |
) | |
else: | |
evaluation[model][metric_name] = metric( | |
y_true.query('cutoff == @cutoff')['y'].values, | |
y_true.query('cutoff == @cutoff')[model].values | |
) | |
eval_cutoff = pd.DataFrame(evaluation).T | |
eval_cutoff.insert(0, 'cutoff', cutoff) | |
eval_cutoff.index = eval_cutoff.index.rename('model') | |
eval_.append(eval_cutoff) | |
return pd.concat(eval_) | |
def experiment(): | |
filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv' | |
# 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv' | |
df = pd.read_csv(filename) | |
df.columns = ['ds', 'y'] | |
df.insert(0, 'unique_id', 'PJM_Load_hourly') | |
df['ds'] = pd.to_datetime(df['ds']) | |
df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True) | |
# MSTL model | |
mstl = MSTL( | |
season_length=[24, 24 * 7], # seasonalities of the time series | |
trend_forecaster=AutoARIMA() # model used to forecast trend | |
) | |
sf = StatsForecast( | |
df=df, | |
models=[mstl], | |
freq='H' | |
) | |
init = time() | |
forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24) | |
end = time() | |
time_mstl = (end - init) / 60 | |
print(f'MSTL Time: {time_mstl:.2f} minutes') | |
# SeasonalNaive model | |
sf = StatsForecast( | |
df=df, | |
models=[SeasonalNaive(season_length=24)], | |
freq='H' | |
) | |
init = time() | |
forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24) | |
end = time() | |
time_seas = (end - init) / 60 | |
print(f'SeasonalNaive Time: {time_seas:.2f} minutes') | |
forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff']) | |
cutoffs = forecasts_cv['cutoff'].unique() | |
# Prophet model | |
forecasts_cv['Prophet'] = None | |
time_prophet = 0 | |
for cutoff in cutoffs: | |
df_train = df.query('ds <= @cutoff') | |
prophet = Prophet() | |
# produce forecasts | |
init = time() | |
prophet.fit(df_train) | |
# produce forecasts | |
future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False) | |
forecast_prophet = prophet.predict(future) | |
end = time() | |
assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all() | |
forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values | |
# data wrangling | |
time_prophet += (end - init) / 60 | |
print(f'Prophet Time: {time_prophet:.2f} minutes') | |
times = pd.DataFrame({ | |
'model': ['MSTL', 'SeasonalNaive', 'Prophet'], | |
'time (mins)': [time_mstl, time_seas, time_prophet] | |
}) | |
# NeuralProphet | |
forecasts_cv['NeuralProphet'] = None | |
time_np = 0 | |
for cutoff in cutoffs: | |
df_train = df.query('ds <= @cutoff') | |
neuralprophet = NeuralProphet() | |
init = time() | |
neuralprophet.fit(df_train.drop(columns='unique_id')) | |
future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24) | |
forecast_np = neuralprophet.predict(future) | |
end = time() | |
assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all() | |
forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values | |
time_np += (end - init) / 60 | |
print(f'NeuralProphet Time: {time_np:.2f} minutes') | |
times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True) | |
# Final evalaution | |
evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive']) | |
print(times) | |
print(evaluation) | |
print(evaluation.groupby('model').mean(numeric_only=True)) | |
if __name__=="__main__": | |
Fire(experiment) | |