from gluonts.dataset.multivariate_grouper import MultivariateGrouper from gluonts.time_feature import time_features_from_frequency_str from datasets import load_dataset from functools import lru_cache import pandas as pd import numpy as np from functools import partial from transformers import InformerConfig, InformerForPrediction freq = "1H" prediction_length = 48 def get_train_test_datasets(): @lru_cache(10_000) def convert_to_pandas_period(date, freq): return pd.Period(date, freq) def transform_start_field(batch, freq): batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]] return batch dataset = load_dataset("monash_tsf", "traffic_hourly") train_dataset = dataset["train"] test_dataset = dataset["test"] train_dataset.set_transform(partial(transform_start_field, freq=freq)) test_dataset.set_transform(partial(transform_start_field, freq=freq)) return train_dataset, test_dataset def get_train_test_multivariate_grouper(train_dataset, test_dataset): num_of_variates = len(train_dataset) train_grouper = MultivariateGrouper(max_target_dim=num_of_variates) test_grouper = MultivariateGrouper( max_target_dim=num_of_variates, num_test_dates=len(test_dataset) // num_of_variates, # number of rolling test windows ) return train_grouper, test_grouper def get_informer_model(num_of_variates, time_features): config = InformerConfig( # in the multivariate setting, input_size is the number of variates in the time series per time step input_size=num_of_variates, # prediction length: prediction_length=prediction_length, # context length: context_length=prediction_length * 2, # lags value copied from 1 week before: lags_sequence=[1, 24 * 7], # we'll add 5 time features ("hour_of_day", ..., and "age"): num_time_features=len(time_features) + 1, # informer params: dropout=0.1, encoder_layers=6, decoder_layers=4, # project input from num_of_variates*len(lags_sequence)+num_time_features to: d_model=64, ) model = InformerForPrediction(config) return model def main(): train_dataset, test_dataset = get_train_test_datasets() train_grouper, test_grouper = get_train_test_multivariate_grouper(train_dataset, test_dataset) multi_variate_train_dataset = train_grouper(train_dataset) multi_variate_test_dataset = test_grouper(test_dataset) multi_variate_train_example = multi_variate_train_dataset[0] train_example = train_dataset[0] print('train_example["target"].shape =', len(train_example["target"])) print('multi_variate_train_example["target"].shape =', multi_variate_train_example["target"].shape) time_features = time_features_from_frequency_str(freq) print(time_features) informer = get_informer_model(num_of_variates=62, time_features=time_features) if __name__ == '__main__': main()