Spaces:
Sleeping
Sleeping
"""Reimplement TimeGAN-pytorch Codebase. | |
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar, | |
"Time-series Generative Adversarial Networks," | |
Neural Information Processing Systems (NeurIPS), 2019. | |
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks | |
Last updated Date: October 18th 2021 | |
Code author: Zhiwei Zhang (bitzzw@gmail.com) | |
----------------------------- | |
predictive_metrics.py | |
Note: Use Post-hoc RNN to predict one-step ahead (last feature) | |
""" | |
# Necessary Packages | |
import tensorflow as tf | |
import tensorflow._api.v2.compat.v1 as tf1 | |
tf.compat.v1.disable_eager_execution() | |
import numpy as np | |
from sklearn.metrics import mean_absolute_error | |
from utils.metric_utils import extract_time | |
def predictive_score_metrics(ori_data, generated_data): | |
"""Report the performance of Post-hoc RNN one-step ahead prediction. | |
Args: | |
- ori_data: original data | |
- generated_data: generated synthetic data | |
Returns: | |
- predictive_score: MAE of the predictions on the original data | |
""" | |
# Initialization on the Graph | |
tf1.reset_default_graph() | |
# Basic Parameters | |
no, seq_len, dim = ori_data.shape | |
# Set maximum sequence length and each sequence length | |
ori_time, ori_max_seq_len = extract_time(ori_data) | |
generated_time, generated_max_seq_len = extract_time(ori_data) | |
max_seq_len = max([ori_max_seq_len, generated_max_seq_len]) | |
# max_seq_len = 36 | |
## Builde a post-hoc RNN predictive network | |
# Network parameters | |
hidden_dim = int(dim / 2) | |
iterations = 5000 | |
batch_size = 128 | |
# Input place holders | |
X = tf1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1], name="myinput_x") | |
T = tf1.placeholder(tf.int32, [None], name="myinput_t") | |
Y = tf1.placeholder(tf.float32, [None, max_seq_len - 1, 1], name="myinput_y") | |
# Predictor function | |
def predictor(x, t): | |
"""Simple predictor function. | |
Args: | |
- x: time-series data | |
- t: time information | |
Returns: | |
- y_hat: prediction | |
- p_vars: predictor variables | |
""" | |
with tf1.variable_scope("predictor", reuse=tf1.AUTO_REUSE) as vs: | |
p_cell = tf1.nn.rnn_cell.GRUCell( | |
num_units=hidden_dim, activation=tf.nn.tanh, name="p_cell" | |
) | |
p_outputs, p_last_states = tf1.nn.dynamic_rnn( | |
p_cell, x, dtype=tf.float32, sequence_length=t | |
) | |
# y_hat_logit = tf.contrib.layers.fully_connected(p_outputs, 1, activation_fn=None) | |
y_hat_logit = tf1.layers.dense(p_outputs, 1, activation=None) | |
y_hat = tf.nn.sigmoid(y_hat_logit) | |
p_vars = [v for v in tf1.all_variables() if v.name.startswith(vs.name)] | |
return y_hat, p_vars | |
y_pred, p_vars = predictor(X, T) | |
# Loss for the predictor | |
p_loss = tf1.losses.absolute_difference(Y, y_pred) | |
# optimizer | |
p_solver = tf1.train.AdamOptimizer().minimize(p_loss, var_list=p_vars) | |
## Training | |
# Session start | |
sess = tf1.Session() | |
sess.run(tf1.global_variables_initializer()) | |
from tqdm.auto import tqdm | |
# Training using Synthetic dataset | |
for itt in tqdm(range(iterations), desc="training", total=iterations): | |
# Set mini-batch | |
idx = np.random.permutation(len(generated_data)) | |
train_idx = idx[:batch_size] | |
X_mb = list(generated_data[i][:-1, : (dim - 1)] for i in train_idx) | |
T_mb = list(generated_time[i] - 1 for i in train_idx) | |
Y_mb = list( | |
np.reshape( | |
generated_data[i][1:, (dim - 1)], | |
[len(generated_data[i][1:, (dim - 1)]), 1], | |
) | |
for i in train_idx | |
) | |
# Train predictor | |
_, step_p_loss = sess.run( | |
[p_solver, p_loss], feed_dict={X: X_mb, T: T_mb, Y: Y_mb} | |
) | |
## Test the trained model on the original data | |
idx = np.random.permutation(len(ori_data)) | |
train_idx = idx[:no] | |
# idx = np.random.permutation(len(generated_data)) | |
# train_idx = idx[:batch_size] | |
# X_mb = list(generated_data[i][:-1,:(dim-1)] for i in train_idx) | |
# T_mb = list(generated_time[i]-1 for i in train_idx) | |
# Y_mb = list(np.reshape(generated_data[i][1:,(dim-1)],[len(generated_data[i][1:,(dim-1)]),1]) for i in train_idx) | |
X_mb = list(ori_data[i][:-1, : (dim - 1)] for i in train_idx) | |
T_mb = list(ori_time[i] - 1 for i in train_idx) | |
Y_mb = list( | |
np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1]) | |
for i in train_idx | |
) | |
# Prediction | |
pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb}) | |
# Compute the performance in terms of MAE | |
MAE_temp = 0 | |
for i in range(no): | |
MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :]) | |
predictive_score = MAE_temp / no | |
return predictive_score | |