TSEditor / utils /predictive_metric.py
PeterYu's picture
update
2875fe6
"""Reimplement TimeGAN-pytorch Codebase.
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
"Time-series Generative Adversarial Networks,"
Neural Information Processing Systems (NeurIPS), 2019.
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks
Last updated Date: October 18th 2021
Code author: Zhiwei Zhang (bitzzw@gmail.com)
-----------------------------
predictive_metrics.py
Note: Use Post-hoc RNN to predict one-step ahead (last feature)
"""
# Necessary Packages
import tensorflow as tf
import tensorflow._api.v2.compat.v1 as tf1
tf.compat.v1.disable_eager_execution()
import numpy as np
from sklearn.metrics import mean_absolute_error
from utils.metric_utils import extract_time
def predictive_score_metrics(ori_data, generated_data):
"""Report the performance of Post-hoc RNN one-step ahead prediction.
Args:
- ori_data: original data
- generated_data: generated synthetic data
Returns:
- predictive_score: MAE of the predictions on the original data
"""
# Initialization on the Graph
tf1.reset_default_graph()
# Basic Parameters
no, seq_len, dim = ori_data.shape
# Set maximum sequence length and each sequence length
ori_time, ori_max_seq_len = extract_time(ori_data)
generated_time, generated_max_seq_len = extract_time(ori_data)
max_seq_len = max([ori_max_seq_len, generated_max_seq_len])
# max_seq_len = 36
## Builde a post-hoc RNN predictive network
# Network parameters
hidden_dim = int(dim / 2)
iterations = 5000
batch_size = 128
# Input place holders
X = tf1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1], name="myinput_x")
T = tf1.placeholder(tf.int32, [None], name="myinput_t")
Y = tf1.placeholder(tf.float32, [None, max_seq_len - 1, 1], name="myinput_y")
# Predictor function
def predictor(x, t):
"""Simple predictor function.
Args:
- x: time-series data
- t: time information
Returns:
- y_hat: prediction
- p_vars: predictor variables
"""
with tf1.variable_scope("predictor", reuse=tf1.AUTO_REUSE) as vs:
p_cell = tf1.nn.rnn_cell.GRUCell(
num_units=hidden_dim, activation=tf.nn.tanh, name="p_cell"
)
p_outputs, p_last_states = tf1.nn.dynamic_rnn(
p_cell, x, dtype=tf.float32, sequence_length=t
)
# y_hat_logit = tf.contrib.layers.fully_connected(p_outputs, 1, activation_fn=None)
y_hat_logit = tf1.layers.dense(p_outputs, 1, activation=None)
y_hat = tf.nn.sigmoid(y_hat_logit)
p_vars = [v for v in tf1.all_variables() if v.name.startswith(vs.name)]
return y_hat, p_vars
y_pred, p_vars = predictor(X, T)
# Loss for the predictor
p_loss = tf1.losses.absolute_difference(Y, y_pred)
# optimizer
p_solver = tf1.train.AdamOptimizer().minimize(p_loss, var_list=p_vars)
## Training
# Session start
sess = tf1.Session()
sess.run(tf1.global_variables_initializer())
from tqdm.auto import tqdm
# Training using Synthetic dataset
for itt in tqdm(range(iterations), desc="training", total=iterations):
# Set mini-batch
idx = np.random.permutation(len(generated_data))
train_idx = idx[:batch_size]
X_mb = list(generated_data[i][:-1, : (dim - 1)] for i in train_idx)
T_mb = list(generated_time[i] - 1 for i in train_idx)
Y_mb = list(
np.reshape(
generated_data[i][1:, (dim - 1)],
[len(generated_data[i][1:, (dim - 1)]), 1],
)
for i in train_idx
)
# Train predictor
_, step_p_loss = sess.run(
[p_solver, p_loss], feed_dict={X: X_mb, T: T_mb, Y: Y_mb}
)
## Test the trained model on the original data
idx = np.random.permutation(len(ori_data))
train_idx = idx[:no]
# idx = np.random.permutation(len(generated_data))
# train_idx = idx[:batch_size]
# X_mb = list(generated_data[i][:-1,:(dim-1)] for i in train_idx)
# T_mb = list(generated_time[i]-1 for i in train_idx)
# Y_mb = list(np.reshape(generated_data[i][1:,(dim-1)],[len(generated_data[i][1:,(dim-1)]),1]) for i in train_idx)
X_mb = list(ori_data[i][:-1, : (dim - 1)] for i in train_idx)
T_mb = list(ori_time[i] - 1 for i in train_idx)
Y_mb = list(
np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1])
for i in train_idx
)
# Prediction
pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb})
# Compute the performance in terms of MAE
MAE_temp = 0
for i in range(no):
MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :])
predictive_score = MAE_temp / no
return predictive_score