Spaces:
Sleeping
Sleeping
Daniel Varga
commited on
Commit
•
6f8bbb1
1
Parent(s):
5711d94
in progress: making predictor usable from architecture.py
Browse files- v2/predictor.py +84 -36
v2/predictor.py
CHANGED
@@ -7,20 +7,21 @@ import logging
|
|
7 |
from sklearn.metrics import mean_absolute_error
|
8 |
|
9 |
|
10 |
-
# kW
|
11 |
-
PREDICTION_LOWER_BOUND = 0 # 15
|
12 |
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
|
13 |
|
14 |
-
hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
|
15 |
-
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
|
16 |
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
def prophet_backend(train_data, forecast_horizon):
|
20 |
# Initialize and train the Prophet model using the training data
|
21 |
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
22 |
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
|
23 |
-
holidays=
|
24 |
|
25 |
# we can also play with setting daily_seasonality=False above, and then manually adding
|
26 |
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
|
@@ -36,8 +37,9 @@ def prophet_backend(train_data, forecast_horizon):
|
|
36 |
forecast = model.predict(future)
|
37 |
assert len(forecast) == forecast_horizon
|
38 |
|
|
|
39 |
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
40 |
-
forecast[key] = np.maximum(forecast[key],
|
41 |
|
42 |
return forecast, model
|
43 |
|
@@ -79,46 +81,92 @@ def prediction_task(backend, df, split_date, forecast_horizon):
|
|
79 |
return mae, eval_data['y'].mean()
|
80 |
|
81 |
|
82 |
-
|
83 |
-
logger
|
84 |
-
logger.
|
85 |
-
logger.
|
|
|
86 |
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
|
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
95 |
|
96 |
-
|
97 |
-
|
|
|
98 |
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
print("forecast horizon", forecast_horizon // 4, "hours")
|
103 |
|
|
|
104 |
|
105 |
-
start_date = '2021-06-01'
|
106 |
-
end_date = '2021-10-24'
|
107 |
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
-
maes = []
|
112 |
-
mean_values = []
|
113 |
-
for split_date in weekly_date_range:
|
114 |
-
# prophet_backend is the only backend currently
|
115 |
-
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
|
116 |
-
maes.append(mae)
|
117 |
-
mean_values.append(mean_value)
|
118 |
-
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
aggregate_mae = maes.mean()
|
123 |
-
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
|
124 |
-
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
|
|
|
7 |
from sklearn.metrics import mean_absolute_error
|
8 |
|
9 |
|
10 |
+
PREDICTION_LOWER_BOUND = 0 # 15 [kW]
|
|
|
11 |
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
|
12 |
|
|
|
|
|
13 |
|
14 |
+
def get_holidays():
|
15 |
+
hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
|
16 |
+
holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
|
17 |
+
return holiday_df
|
18 |
|
19 |
|
20 |
def prophet_backend(train_data, forecast_horizon):
|
21 |
# Initialize and train the Prophet model using the training data
|
22 |
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
23 |
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
|
24 |
+
holidays=get_holidays())
|
25 |
|
26 |
# we can also play with setting daily_seasonality=False above, and then manually adding
|
27 |
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
|
|
|
37 |
forecast = model.predict(future)
|
38 |
assert len(forecast) == forecast_horizon
|
39 |
|
40 |
+
# we never predict below zero, although prophet happily does.
|
41 |
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
42 |
+
forecast[key] = np.maximum(forecast[key], 0)
|
43 |
|
44 |
return forecast, model
|
45 |
|
|
|
81 |
return mae, eval_data['y'].mean()
|
82 |
|
83 |
|
84 |
+
def quiet_logging():
|
85 |
+
logger = logging.getLogger('cmdstanpy')
|
86 |
+
logger.addHandler(logging.NullHandler())
|
87 |
+
logger.propagate = False
|
88 |
+
logger.setLevel(logging.CRITICAL)
|
89 |
|
90 |
|
91 |
+
def build_predictor(training_data: pd.Series):
|
92 |
+
quiet_logging()
|
93 |
+
training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
|
94 |
+
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
95 |
+
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
|
96 |
+
holidays=get_holidays())
|
97 |
+
|
98 |
+
# we can also play with setting daily_seasonality=False above, and then manually adding
|
99 |
+
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
|
100 |
+
# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
|
101 |
+
# the rest didn't have much effect.
|
102 |
+
|
103 |
+
model.fit(training_data_frame)
|
104 |
+
return model
|
105 |
+
|
106 |
+
|
107 |
+
def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
|
108 |
+
date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
|
109 |
+
for split_date in date_range:
|
110 |
+
future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
|
111 |
+
|
112 |
+
# Make predictions for the evaluation period
|
113 |
+
forecast = prophet_model.predict(future)
|
114 |
+
assert len(forecast) == forecast_horizon
|
115 |
+
|
116 |
+
# we never predict below zero, although prophet happily does.
|
117 |
+
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
118 |
+
forecast[key] = np.maximum(forecast[key], 0)
|
119 |
+
|
120 |
+
return forecast
|
121 |
+
|
122 |
+
|
123 |
+
def main():
|
124 |
+
quiet_logging()
|
125 |
+
|
126 |
+
cons_filename = 'pq_terheles_2021_adatok.tsv'
|
127 |
+
|
128 |
+
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
|
129 |
+
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
|
130 |
+
df = df.set_index('Time')
|
131 |
+
df['Consumption'] = df['Hatásos teljesítmény [kW]']
|
132 |
|
133 |
+
df['ds'] = df.index
|
134 |
+
df['y'] = df['Consumption']
|
135 |
|
136 |
+
# we slightly alter both the train and the test
|
137 |
+
# because we have an almost constant shift, and the model is multiplicative.
|
138 |
+
# we add it back in the end.
|
139 |
+
print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
|
140 |
+
(df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
|
141 |
+
df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)
|
142 |
|
143 |
+
# TODO 15 minutes timestep hardwired!
|
144 |
+
forecast_horizon = 7 * 24 * 4
|
145 |
+
print("forecast horizon", forecast_horizon // 4, "hours")
|
146 |
|
147 |
|
148 |
+
start_date = '2021-06-01'
|
149 |
+
end_date = '2021-10-24'
|
|
|
150 |
|
151 |
+
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
|
152 |
|
|
|
|
|
153 |
|
154 |
+
maes = []
|
155 |
+
mean_values = []
|
156 |
+
for split_date in weekly_date_range:
|
157 |
+
# prophet_backend is the only backend currently
|
158 |
+
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
|
159 |
+
mean_value += PREDICTION_LOWER_BOUND
|
160 |
+
maes.append(mae)
|
161 |
+
mean_values.append(mean_value)
|
162 |
+
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
|
163 |
|
164 |
+
maes = np.array(maes)
|
165 |
+
mean_values = np.array(mean_values)
|
166 |
+
aggregate_mae = maes.mean()
|
167 |
+
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
|
168 |
+
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
+
if __name__ == '__main__':
|
172 |
+
main()
|
|
|
|
|
|