Spaces:
Sleeping
Sleeping
File size: 4,921 Bytes
2875fe6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
"""Reimplement TimeGAN-pytorch Codebase.
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
"Time-series Generative Adversarial Networks,"
Neural Information Processing Systems (NeurIPS), 2019.
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks
Last updated Date: October 18th 2021
Code author: Zhiwei Zhang (bitzzw@gmail.com)
-----------------------------
predictive_metrics.py
Note: Use Post-hoc RNN to predict one-step ahead (last feature)
"""
# Necessary Packages
import tensorflow as tf
import tensorflow._api.v2.compat.v1 as tf1
tf.compat.v1.disable_eager_execution()
import numpy as np
from sklearn.metrics import mean_absolute_error
from utils.metric_utils import extract_time
def predictive_score_metrics(ori_data, generated_data):
"""Report the performance of Post-hoc RNN one-step ahead prediction.
Args:
- ori_data: original data
- generated_data: generated synthetic data
Returns:
- predictive_score: MAE of the predictions on the original data
"""
# Initialization on the Graph
tf1.reset_default_graph()
# Basic Parameters
no, seq_len, dim = ori_data.shape
# Set maximum sequence length and each sequence length
ori_time, ori_max_seq_len = extract_time(ori_data)
generated_time, generated_max_seq_len = extract_time(ori_data)
max_seq_len = max([ori_max_seq_len, generated_max_seq_len])
# max_seq_len = 36
## Builde a post-hoc RNN predictive network
# Network parameters
hidden_dim = int(dim / 2)
iterations = 5000
batch_size = 128
# Input place holders
X = tf1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1], name="myinput_x")
T = tf1.placeholder(tf.int32, [None], name="myinput_t")
Y = tf1.placeholder(tf.float32, [None, max_seq_len - 1, 1], name="myinput_y")
# Predictor function
def predictor(x, t):
"""Simple predictor function.
Args:
- x: time-series data
- t: time information
Returns:
- y_hat: prediction
- p_vars: predictor variables
"""
with tf1.variable_scope("predictor", reuse=tf1.AUTO_REUSE) as vs:
p_cell = tf1.nn.rnn_cell.GRUCell(
num_units=hidden_dim, activation=tf.nn.tanh, name="p_cell"
)
p_outputs, p_last_states = tf1.nn.dynamic_rnn(
p_cell, x, dtype=tf.float32, sequence_length=t
)
# y_hat_logit = tf.contrib.layers.fully_connected(p_outputs, 1, activation_fn=None)
y_hat_logit = tf1.layers.dense(p_outputs, 1, activation=None)
y_hat = tf.nn.sigmoid(y_hat_logit)
p_vars = [v for v in tf1.all_variables() if v.name.startswith(vs.name)]
return y_hat, p_vars
y_pred, p_vars = predictor(X, T)
# Loss for the predictor
p_loss = tf1.losses.absolute_difference(Y, y_pred)
# optimizer
p_solver = tf1.train.AdamOptimizer().minimize(p_loss, var_list=p_vars)
## Training
# Session start
sess = tf1.Session()
sess.run(tf1.global_variables_initializer())
from tqdm.auto import tqdm
# Training using Synthetic dataset
for itt in tqdm(range(iterations), desc="training", total=iterations):
# Set mini-batch
idx = np.random.permutation(len(generated_data))
train_idx = idx[:batch_size]
X_mb = list(generated_data[i][:-1, : (dim - 1)] for i in train_idx)
T_mb = list(generated_time[i] - 1 for i in train_idx)
Y_mb = list(
np.reshape(
generated_data[i][1:, (dim - 1)],
[len(generated_data[i][1:, (dim - 1)]), 1],
)
for i in train_idx
)
# Train predictor
_, step_p_loss = sess.run(
[p_solver, p_loss], feed_dict={X: X_mb, T: T_mb, Y: Y_mb}
)
## Test the trained model on the original data
idx = np.random.permutation(len(ori_data))
train_idx = idx[:no]
# idx = np.random.permutation(len(generated_data))
# train_idx = idx[:batch_size]
# X_mb = list(generated_data[i][:-1,:(dim-1)] for i in train_idx)
# T_mb = list(generated_time[i]-1 for i in train_idx)
# Y_mb = list(np.reshape(generated_data[i][1:,(dim-1)],[len(generated_data[i][1:,(dim-1)]),1]) for i in train_idx)
X_mb = list(ori_data[i][:-1, : (dim - 1)] for i in train_idx)
T_mb = list(ori_time[i] - 1 for i in train_idx)
Y_mb = list(
np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1])
for i in train_idx
)
# Prediction
pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb})
# Compute the performance in terms of MAE
MAE_temp = 0
for i in range(no):
MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :])
predictive_score = MAE_temp / no
return predictive_score
|