# This source code is provided for the purposes of scientific reproducibility # under the following limited license from Element AI Inc. The code is an # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis # expansion analysis for interpretable time series forecasting, # https://arxiv.org/abs/1905.10437). The copyright to the source code is # licensed under the Creative Commons - Attribution-NonCommercial 4.0 # International license (CC BY-NC 4.0): # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether # for the benefit of third parties or internally in production) requires an # explicit license. The subject-matter of the N-BEATS model and associated # materials are the property of Element AI Inc. and may be subject to patent # protection. No license to patents is granted hereunder (whether express or # implied). Copyright 2020 Element AI Inc. All rights reserved. """ M4 Summary """ from collections import OrderedDict import numpy as np import pandas as pd from data_provider.m4 import M4Dataset from data_provider.m4 import M4Meta import os def group_values(values, groups, group_name): return np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) def mase(forecast, insample, outsample, frequency): return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) def smape_2(forecast, target): denom = np.abs(target) + np.abs(forecast) # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. denom[denom == 0.0] = 1.0 return 200 * np.abs(forecast - target) / denom def mape(forecast, target): denom = np.abs(target) # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. denom[denom == 0.0] = 1.0 return 100 * np.abs(forecast - target) / denom class M4Summary: def __init__(self, file_path, root_path): self.file_path = file_path self.training_set = M4Dataset.load(training=True, dataset_file=root_path) self.test_set = M4Dataset.load(training=False, dataset_file=root_path) self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') def evaluate(self): """ Evaluate forecasts using M4 test dataset. :param forecast: Forecasts. Shape: timeseries, time. :return: sMAPE and OWA grouped by seasonal patterns. """ grouped_owa = OrderedDict() naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts]) model_mases = {} naive2_smapes = {} naive2_mases = {} grouped_smapes = {} grouped_mapes = {} for group_name in M4Meta.seasonal_patterns: file_name = self.file_path + group_name + "_forecast.csv" if os.path.exists(file_name): model_forecast = pd.read_csv(file_name).values naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) target = group_values(self.test_set.values, self.test_set.groups, group_name) # all timeseries within group have same frequency frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] insample = group_values(self.training_set.values, self.test_set.groups, group_name) model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], insample=insample[i], outsample=target[i], frequency=frequency) for i in range(len(model_forecast))]) naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], insample=insample[i], outsample=target[i], frequency=frequency) for i in range(len(model_forecast))]) naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) grouped_smapes = self.summarize_groups(grouped_smapes) grouped_mapes = self.summarize_groups(grouped_mapes) grouped_model_mases = self.summarize_groups(model_mases) grouped_naive2_smapes = self.summarize_groups(naive2_smapes) grouped_naive2_mases = self.summarize_groups(naive2_mases) for k in grouped_model_mases.keys(): grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 def round_all(d): return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( grouped_model_mases) def summarize_groups(self, scores): """ Re-group scores respecting M4 rules. :param scores: Scores per group. :return: Grouped scores. """ scores_summary = OrderedDict() def group_count(group_name): return len(np.where(self.test_set.groups == group_name)[0]) weighted_score = {} for g in ['Yearly', 'Quarterly', 'Monthly']: weighted_score[g] = scores[g] * group_count(g) scores_summary[g] = scores[g] others_score = 0 others_count = 0 for g in ['Weekly', 'Daily', 'Hourly']: others_score += scores[g] * group_count(g) others_count += group_count(g) weighted_score['Others'] = others_score scores_summary['Others'] = others_score / others_count average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) scores_summary['Average'] = average return scores_summary