Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from prophet import Prophet | |
import holidays | |
import logging | |
from sklearn.metrics import mean_absolute_error | |
# kW | |
PREDICTION_LOWER_BOUND = 0 # 15 | |
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW") | |
hungarian_holidays = holidays.Hungary(years=range(2019, 2031)) | |
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday']) | |
def prophet_backend(train_data, forecast_horizon): | |
# Initialize and train the Prophet model using the training data | |
model = Prophet(seasonality_mode='multiplicative', growth='flat', | |
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True, | |
holidays=HOLIDAY_DF) | |
# we can also play with setting daily_seasonality=False above, and then manually adding | |
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative") | |
# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow. | |
# the rest didn't have much effect. | |
model.fit(train_data) | |
# Create a DataFrame with future timestamps for the evaluation period | |
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False) | |
# Make predictions for the evaluation period | |
forecast = model.predict(future) | |
assert len(forecast) == forecast_horizon | |
for key in ('yhat', 'yhat_lower', 'yhat_upper'): | |
forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND) | |
return forecast, model | |
def prediction_task(backend, df, split_date, forecast_horizon): | |
# Split the data into training (past) and evaluation (future) sets | |
train_data = df[df['ds'] <= split_date] | |
eval_data = df[df['ds'] > split_date] | |
eval_data = eval_data.head(forecast_horizon) | |
forecast, model = backend(train_data, forecast_horizon) | |
mae = mean_absolute_error(eval_data['y'], forecast['yhat']) | |
do_vis = False | |
if do_vis: | |
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True) | |
forecast = model.predict(future) | |
plt.figure(figsize=(12, 6)) | |
plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue') | |
plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red') | |
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty') | |
plt.xlabel('Timestamp') | |
plt.ylabel('Value') | |
plt.title('Actual vs. Predicted Values') | |
plt.legend() | |
plt.grid(True) | |
plt.show() | |
fig1 = model.plot(forecast) | |
plt.plot(eval_data['ds'], eval_data['y'], c='r') | |
plt.show() | |
fig2 = model.plot_components(forecast) | |
plt.show() | |
exit() | |
return mae, eval_data['y'].mean() | |
logger = logging.getLogger('cmdstanpy') | |
logger.addHandler(logging.NullHandler()) | |
logger.propagate = False | |
logger.setLevel(logging.CRITICAL) | |
cons_filename = 'pq_terheles_2021_adatok.tsv' | |
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',') | |
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M') | |
df = df.set_index('Time') | |
df['Consumption'] = df['Hatásos teljesítmény [kW]'] | |
df['ds'] = df.index | |
df['y'] = df['Consumption'] | |
# TODO 15 minutes timestep hardwired! | |
forecast_horizon = 24 * 4 | |
print("forecast horizon", forecast_horizon // 4, "hours") | |
start_date = '2021-06-01' | |
end_date = '2021-10-24' | |
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d') | |
maes = [] | |
mean_values = [] | |
for split_date in weekly_date_range: | |
# prophet_backend is the only backend currently | |
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon) | |
maes.append(mae) | |
mean_values.append(mean_value) | |
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value) | |
maes = np.array(maes) | |
mean_values = np.array(mean_values) | |
aggregate_mae = maes.mean() | |
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae) | |
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean()) | |