File size: 4,921 Bytes
2875fe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""Reimplement TimeGAN-pytorch Codebase.

Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
"Time-series Generative Adversarial Networks,"
Neural Information Processing Systems (NeurIPS), 2019.

Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks

Last updated Date: October 18th 2021
Code author: Zhiwei Zhang (bitzzw@gmail.com)

-----------------------------

predictive_metrics.py

Note: Use Post-hoc RNN to predict one-step ahead (last feature)
"""

# Necessary Packages
import tensorflow as tf
import tensorflow._api.v2.compat.v1 as tf1

tf.compat.v1.disable_eager_execution()
import numpy as np
from sklearn.metrics import mean_absolute_error
from utils.metric_utils import extract_time


def predictive_score_metrics(ori_data, generated_data):
    """Report the performance of Post-hoc RNN one-step ahead prediction.

    Args:
      - ori_data: original data
      - generated_data: generated synthetic data

    Returns:
      - predictive_score: MAE of the predictions on the original data
    """
    # Initialization on the Graph
    tf1.reset_default_graph()

    # Basic Parameters
    no, seq_len, dim = ori_data.shape

    # Set maximum sequence length and each sequence length
    ori_time, ori_max_seq_len = extract_time(ori_data)
    generated_time, generated_max_seq_len = extract_time(ori_data)
    max_seq_len = max([ori_max_seq_len, generated_max_seq_len])
    # max_seq_len = 36

    ## Builde a post-hoc RNN predictive network
    # Network parameters
    hidden_dim = int(dim / 2)
    iterations = 5000
    batch_size = 128

    # Input place holders
    X = tf1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1], name="myinput_x")
    T = tf1.placeholder(tf.int32, [None], name="myinput_t")
    Y = tf1.placeholder(tf.float32, [None, max_seq_len - 1, 1], name="myinput_y")

    # Predictor function
    def predictor(x, t):
        """Simple predictor function.

        Args:
          - x: time-series data
          - t: time information

        Returns:
          - y_hat: prediction
          - p_vars: predictor variables
        """
        with tf1.variable_scope("predictor", reuse=tf1.AUTO_REUSE) as vs:
            p_cell = tf1.nn.rnn_cell.GRUCell(
                num_units=hidden_dim, activation=tf.nn.tanh, name="p_cell"
            )
            p_outputs, p_last_states = tf1.nn.dynamic_rnn(
                p_cell, x, dtype=tf.float32, sequence_length=t
            )
            # y_hat_logit = tf.contrib.layers.fully_connected(p_outputs, 1, activation_fn=None)
            y_hat_logit = tf1.layers.dense(p_outputs, 1, activation=None)
            y_hat = tf.nn.sigmoid(y_hat_logit)
            p_vars = [v for v in tf1.all_variables() if v.name.startswith(vs.name)]

        return y_hat, p_vars

    y_pred, p_vars = predictor(X, T)
    # Loss for the predictor
    p_loss = tf1.losses.absolute_difference(Y, y_pred)
    # optimizer
    p_solver = tf1.train.AdamOptimizer().minimize(p_loss, var_list=p_vars)

    ## Training
    # Session start
    sess = tf1.Session()
    sess.run(tf1.global_variables_initializer())

    from tqdm.auto import tqdm

    # Training using Synthetic dataset
    for itt in tqdm(range(iterations), desc="training", total=iterations):

        # Set mini-batch
        idx = np.random.permutation(len(generated_data))
        train_idx = idx[:batch_size]

        X_mb = list(generated_data[i][:-1, : (dim - 1)] for i in train_idx)
        T_mb = list(generated_time[i] - 1 for i in train_idx)
        Y_mb = list(
            np.reshape(
                generated_data[i][1:, (dim - 1)],
                [len(generated_data[i][1:, (dim - 1)]), 1],
            )
            for i in train_idx
        )

        # Train predictor
        _, step_p_loss = sess.run(
            [p_solver, p_loss], feed_dict={X: X_mb, T: T_mb, Y: Y_mb}
        )

    ## Test the trained model on the original data
    idx = np.random.permutation(len(ori_data))
    train_idx = idx[:no]

    # idx = np.random.permutation(len(generated_data))
    # train_idx = idx[:batch_size]
    # X_mb = list(generated_data[i][:-1,:(dim-1)] for i in train_idx)
    # T_mb = list(generated_time[i]-1 for i in train_idx)
    # Y_mb = list(np.reshape(generated_data[i][1:,(dim-1)],[len(generated_data[i][1:,(dim-1)]),1]) for i in train_idx)

    X_mb = list(ori_data[i][:-1, : (dim - 1)] for i in train_idx)
    T_mb = list(ori_time[i] - 1 for i in train_idx)
    Y_mb = list(
        np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1])
        for i in train_idx
    )

    # Prediction
    pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb})

    # Compute the performance in terms of MAE
    MAE_temp = 0
    for i in range(no):
        MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :])

    predictive_score = MAE_temp / no

    return predictive_score