Spaces:

TSAnonymousDemo
/

TSEditor

Sleeping

App Files Files Community

TSEditor / utils /predictive_metric.py

PeterYu

update

2875fe6 6 months ago

raw

history blame contribute delete

4.92 kB

	"""Reimplement TimeGAN-pytorch Codebase.

	Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
	"Time-series Generative Adversarial Networks,"
	Neural Information Processing Systems (NeurIPS), 2019.

	Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks

	Last updated Date: October 18th 2021
	Code author: Zhiwei Zhang (bitzzw@gmail.com)

	-----------------------------

	predictive_metrics.py

	Note: Use Post-hoc RNN to predict one-step ahead (last feature)
	"""

	# Necessary Packages
	import tensorflow as tf
	import tensorflow._api.v2.compat.v1 as tf1

	tf.compat.v1.disable_eager_execution()
	import numpy as np
	from sklearn.metrics import mean_absolute_error
	from utils.metric_utils import extract_time


	def predictive_score_metrics(ori_data, generated_data):
	"""Report the performance of Post-hoc RNN one-step ahead prediction.

	Args:
	- ori_data: original data
	- generated_data: generated synthetic data

	Returns:
	- predictive_score: MAE of the predictions on the original data
	"""
	# Initialization on the Graph
	tf1.reset_default_graph()

	# Basic Parameters
	no, seq_len, dim = ori_data.shape

	# Set maximum sequence length and each sequence length
	ori_time, ori_max_seq_len = extract_time(ori_data)
	generated_time, generated_max_seq_len = extract_time(ori_data)
	max_seq_len = max([ori_max_seq_len, generated_max_seq_len])
	# max_seq_len = 36

	## Builde a post-hoc RNN predictive network
	# Network parameters
	hidden_dim = int(dim / 2)
	iterations = 5000
	batch_size = 128

	# Input place holders
	X = tf1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1], name="myinput_x")
	T = tf1.placeholder(tf.int32, [None], name="myinput_t")
	Y = tf1.placeholder(tf.float32, [None, max_seq_len - 1, 1], name="myinput_y")

	# Predictor function
	def predictor(x, t):
	"""Simple predictor function.

	Args:
	- x: time-series data
	- t: time information

	Returns:
	- y_hat: prediction
	- p_vars: predictor variables
	"""
	with tf1.variable_scope("predictor", reuse=tf1.AUTO_REUSE) as vs:
	p_cell = tf1.nn.rnn_cell.GRUCell(
	num_units=hidden_dim, activation=tf.nn.tanh, name="p_cell"
	)
	p_outputs, p_last_states = tf1.nn.dynamic_rnn(
	p_cell, x, dtype=tf.float32, sequence_length=t
	)
	# y_hat_logit = tf.contrib.layers.fully_connected(p_outputs, 1, activation_fn=None)
	y_hat_logit = tf1.layers.dense(p_outputs, 1, activation=None)
	y_hat = tf.nn.sigmoid(y_hat_logit)
	p_vars = [v for v in tf1.all_variables() if v.name.startswith(vs.name)]

	return y_hat, p_vars

	y_pred, p_vars = predictor(X, T)
	# Loss for the predictor
	p_loss = tf1.losses.absolute_difference(Y, y_pred)
	# optimizer
	p_solver = tf1.train.AdamOptimizer().minimize(p_loss, var_list=p_vars)

	## Training
	# Session start
	sess = tf1.Session()
	sess.run(tf1.global_variables_initializer())

	from tqdm.auto import tqdm

	# Training using Synthetic dataset
	for itt in tqdm(range(iterations), desc="training", total=iterations):

	# Set mini-batch
	idx = np.random.permutation(len(generated_data))
	train_idx = idx[:batch_size]

	X_mb = list(generated_data[i][:-1, : (dim - 1)] for i in train_idx)
	T_mb = list(generated_time[i] - 1 for i in train_idx)
	Y_mb = list(
	np.reshape(
	generated_data[i][1:, (dim - 1)],
	[len(generated_data[i][1:, (dim - 1)]), 1],
	)
	for i in train_idx
	)

	# Train predictor
	_, step_p_loss = sess.run(
	[p_solver, p_loss], feed_dict={X: X_mb, T: T_mb, Y: Y_mb}
	)

	## Test the trained model on the original data
	idx = np.random.permutation(len(ori_data))
	train_idx = idx[:no]

	# idx = np.random.permutation(len(generated_data))
	# train_idx = idx[:batch_size]
	# X_mb = list(generated_data[i][:-1,:(dim-1)] for i in train_idx)
	# T_mb = list(generated_time[i]-1 for i in train_idx)
	# Y_mb = list(np.reshape(generated_data[i][1:,(dim-1)],[len(generated_data[i][1:,(dim-1)]),1]) for i in train_idx)

	X_mb = list(ori_data[i][:-1, : (dim - 1)] for i in train_idx)
	T_mb = list(ori_time[i] - 1 for i in train_idx)
	Y_mb = list(
	np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1])
	for i in train_idx
	)

	# Prediction
	pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb})

	# Compute the performance in terms of MAE
	MAE_temp = 0
	for i in range(no):
	MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :])

	predictive_score = MAE_temp / no

	return predictive_score