Spaces:

NCTCMumbai
/

NCTC

Runtime error

App Files Files Community

NCTC / models /research /brain_coder /single_task /pg_train.py

NCTCMumbai

Upload 2571 files

0b8359d about 2 years ago

raw

history blame contribute delete

30.3 kB

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	r"""Train RL agent on coding tasks."""

	import contextlib
	import cPickle
	import cProfile
	import marshal
	import os
	import time

	from absl import flags
	from absl import logging
	import tensorflow as tf

	# internal session lib import

	from single_task import data # brain coder
	from single_task import defaults # brain coder
	from single_task import pg_agent as agent_lib # brain coder
	from single_task import results_lib # brain coder


	FLAGS = flags.FLAGS
	flags.DEFINE_string(
	'master', '',
	'URL of the TensorFlow master to use.')
	flags.DEFINE_integer(
	'ps_tasks', 0,
	'Number of parameter server tasks. Only set to 0 for '
	'single worker training.')
	flags.DEFINE_integer(
	'summary_interval', 10,
	'How often to write summaries.')
	flags.DEFINE_integer(
	'summary_tasks', 16,
	'If greater than 0 only tasks 0 through summary_tasks - 1 '
	'will write summaries. If 0, all tasks will write '
	'summaries.')
	flags.DEFINE_bool(
	'stop_on_success', True,
	'If True, training will stop as soon as a solution is found. '
	'If False, training will continue indefinitely until another '
	'stopping condition is reached.')
	flags.DEFINE_bool(
	'do_profiling', False,
	'If True, cProfile profiler will run and results will be '
	'written to logdir. WARNING: Results will not be written if '
	'the code crashes. Make sure it exists successfully.')
	flags.DEFINE_integer('model_v', 0, 'Model verbosity level.')
	flags.DEFINE_bool(
	'delayed_graph_cleanup', True,
	'If true, container for n-th run will not be reset until the (n+1)-th run '
	'is complete. This greatly reduces the chance that a worker is still '
	'using the n-th container when it is cleared.')


	def define_tuner_hparam_space(hparam_space_type):
	"""Define tunable hparams for grid search."""
	if hparam_space_type not in ('pg', 'pg-topk', 'topk', 'is'):
	raise ValueError('Hparam space is not valid: "%s"' % hparam_space_type)

	# Discrete hparam space is stored as a dict from hparam name to discrete
	# values.
	hparam_space = {}

	if hparam_space_type in ('pg', 'pg-topk', 'is'):
	# Add a floating point parameter named learning rate.
	hparam_space['lr'] = [1e-5, 1e-4, 1e-3]
	hparam_space['entropy_beta'] = [0.005, 0.01, 0.05, 0.10]
	else: # 'topk'
	# Add a floating point parameter named learning rate.
	hparam_space['lr'] = [1e-5, 1e-4, 1e-3]
	hparam_space['entropy_beta'] = [0.0, 0.005, 0.01, 0.05, 0.10]

	if hparam_space_type in ('topk', 'pg-topk'):
	# topk tuning will be enabled.
	hparam_space['topk'] = [10]
	hparam_space['topk_loss_hparam'] = [1.0, 10.0, 50.0, 200.0]

	elif hparam_space_type == 'is':
	# importance sampling tuning will be enabled.
	hparam_space['replay_temperature'] = [0.25, 0.5, 1.0, 2.0]
	hparam_space['alpha'] = [0.5, 0.75, 63/64.]

	return hparam_space


	def write_hparams_to_config(config, hparams, hparam_space_type):
	"""Write hparams given by the tuner into the Config object."""
	if hparam_space_type not in ('pg', 'pg-topk', 'topk', 'is'):
	raise ValueError('Hparam space is not valid: "%s"' % hparam_space_type)

	config.agent.lr = hparams.lr
	config.agent.entropy_beta = hparams.entropy_beta

	if hparam_space_type in ('topk', 'pg-topk'):
	# topk tuning will be enabled.
	config.agent.topk = hparams.topk
	config.agent.topk_loss_hparam = hparams.topk_loss_hparam
	elif hparam_space_type == 'is':
	# importance sampling tuning will be enabled.
	config.agent.replay_temperature = hparams.replay_temperature
	config.agent.alpha = hparams.alpha


	def make_initialized_variable(value, name, shape=None, dtype=tf.float32):
	"""Create a tf.Variable with a constant initializer.

	Args:
	value: Constant value to initialize the variable with. This is the value
	that the variable starts with.
	name: Name of the variable in the TF graph.
	shape: Shape of the variable. If None, variable will be a scalar.
	dtype: Data type of the variable. Should be a TF dtype. Defaults to
	tf.float32.

	Returns:
	tf.Variable instance.
	"""
	if shape is None:
	shape = []
	return tf.get_variable(
	name=name, shape=shape, initializer=tf.constant_initializer(value),
	dtype=dtype, trainable=False)


	class AsyncTrainer(object):
	"""Manages graph creation and training.

	This async trainer creates a global model on the parameter server, and a local
	model (for this worker). Gradient updates are sent to the global model, and
	the updated weights are synced to the local copy.
	"""

	def __init__(self, config, task_id, ps_tasks, num_workers, is_chief=True,
	summary_writer=None,
	dtype=tf.float32,
	summary_interval=1,
	run_number=0,
	logging_dir='/tmp', model_v=0):
	self.config = config
	self.data_manager = data.DataManager(
	config, run_number=run_number,
	do_code_simplification=not FLAGS.stop_on_success)
	self.task_id = task_id
	self.ps_tasks = ps_tasks
	self.is_chief = is_chief
	if ps_tasks == 0:
	assert task_id == 0, 'No parameter servers specified. Expecting 1 task.'
	assert num_workers == 1, (
	'No parameter servers specified. Expecting 1 task.')
	worker_device = '/job:localhost/replica:%d/task:0/cpu:0' % task_id
	# worker_device = '/cpu:0'
	# ps_device = '/cpu:0'
	else:
	assert num_workers > 0, 'There must be at least 1 training worker.'
	worker_device = '/job:worker/replica:%d/task:0/cpu:0' % task_id
	# ps_device = '/job:ps/replica:0/task:0/cpu:0'
	logging.info('worker_device: %s', worker_device)

	logging_file = os.path.join(
	logging_dir, 'solutions_%d.txt' % task_id)
	experience_replay_file = os.path.join(
	logging_dir, 'replay_buffer_%d.pickle' % task_id)
	self.topk_file = os.path.join(
	logging_dir, 'topk_buffer_%d.pickle' % task_id)

	tf.get_variable_scope().set_use_resource(True)

	# global model
	with tf.device(tf.train.replica_device_setter(ps_tasks,
	ps_device='/job:ps/replica:0',
	worker_device=worker_device)):
	with tf.variable_scope('global'):
	global_model = agent_lib.LMAgent(config, dtype=dtype, is_local=False)
	global_params_dict = {p.name: p
	for p in global_model.sync_variables}
	self.global_model = global_model
	self.global_step = make_initialized_variable(
	0, 'global_step', dtype=tf.int64)

	self.global_best_reward = make_initialized_variable(
	-10.0, 'global_best_reward', dtype=tf.float64)
	self.is_best_model = make_initialized_variable(
	False, 'is_best_model', dtype=tf.bool)
	self.reset_is_best_model = self.is_best_model.assign(False)
	self.global_best_reward_placeholder = tf.placeholder(
	tf.float64, [], name='global_best_reward_placeholder')
	self.assign_global_best_reward_op = tf.group(
	self.global_best_reward.assign(
	self.global_best_reward_placeholder),
	self.is_best_model.assign(True))
	def assign_global_best_reward_fn(session, reward):
	reward = round(reward, 10)
	best_reward = round(session.run(self.global_best_reward), 10)
	is_best = reward > best_reward
	if is_best:
	session.run(self.assign_global_best_reward_op,
	{self.global_best_reward_placeholder: reward})
	return is_best
	self.assign_global_best_reward_fn = assign_global_best_reward_fn

	# Any worker will set to true when it finds a solution.
	self.found_solution_flag = make_initialized_variable(
	False, 'found_solution_flag', dtype=tf.bool)
	self.found_solution_op = self.found_solution_flag.assign(True)

	self.run_number = make_initialized_variable(
	run_number, 'run_number', dtype=tf.int32)

	# Store a solution when found.
	self.code_solution_variable = tf.get_variable(
	'code_solution', [], tf.string,
	initializer=tf.constant_initializer(''))
	self.code_solution_ph = tf.placeholder(
	tf.string, [], name='code_solution_ph')
	self.code_solution_assign_op = self.code_solution_variable.assign(
	self.code_solution_ph)
	def assign_code_solution_fn(session, code_solution_string):
	session.run(self.code_solution_assign_op,
	{self.code_solution_ph: code_solution_string})
	self.assign_code_solution_fn = assign_code_solution_fn

	# Count all programs sampled from policy. This does not include
	# programs sampled from replay buffer.
	# This equals NPE (number of programs executed). Only programs sampled
	# from the policy need to be executed.
	self.program_count = make_initialized_variable(
	0, 'program_count', dtype=tf.int64)

	# local model
	with tf.device(worker_device):
	with tf.variable_scope('local'):
	self.model = model = agent_lib.LMAgent(
	config,
	task_id=task_id,
	logging_file=logging_file,
	experience_replay_file=experience_replay_file,
	dtype=dtype,
	global_best_reward_fn=self.assign_global_best_reward_fn,
	found_solution_op=self.found_solution_op,
	assign_code_solution_fn=self.assign_code_solution_fn,
	program_count=self.program_count,
	stop_on_success=FLAGS.stop_on_success,
	verbose_level=model_v)
	local_params = model.trainable_variables
	local_params_dict = {p.name: p for p in local_params}

	# Pull global params to local model.
	def _global_to_local_scope(name):
	assert name.startswith('global/')
	return 'local' + name[6:]
	sync_dict = {
	local_params_dict[_global_to_local_scope(p_name)]: p
	for p_name, p in global_params_dict.items()}
	self.sync_op = tf.group(*[v_local.assign(v_global)
	for v_local, v_global
	in sync_dict.items()])

	# Pair local gradients with global params.
	grad_var_dict = {
	gradient: sync_dict[local_var]
	for local_var, gradient in model.gradients_dict.items()}

	# local model
	model.make_summary_ops() # Don't put summaries under 'local' scope.
	with tf.variable_scope('local'):
	self.train_op = model.optimizer.apply_gradients(
	grad_var_dict.items(), global_step=self.global_step)
	self.local_init_op = tf.variables_initializer(
	tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
	tf.get_variable_scope().name))

	self.local_step = 0
	self.last_summary_time = time.time()
	self.summary_interval = summary_interval
	self.summary_writer = summary_writer
	self.cached_global_step = -1
	self.cached_global_npe = -1

	logging.info('summary_interval: %d', self.summary_interval)

	# Load top-k buffer.
	if self.model.top_episodes is not None and tf.gfile.Exists(self.topk_file):
	try:
	with tf.gfile.FastGFile(self.topk_file, 'r') as f:
	self.model.top_episodes = cPickle.loads(f.read())
	logging.info(
	'Loaded top-k buffer from disk with %d items. Location: "%s"',
	len(self.model.top_episodes), self.topk_file)
	except (cPickle.UnpicklingError, EOFError) as e:
	logging.warn(
	'Failed to load existing top-k buffer from disk. Removing bad file.'
	'\nLocation: "%s"\nException: %s', self.topk_file, str(e))
	tf.gfile.Remove(self.topk_file)

	def initialize(self, session):
	"""Run initialization ops."""
	session.run(self.local_init_op)
	session.run(self.sync_op)
	self.cached_global_step, self.cached_global_npe = session.run(
	[self.global_step, self.program_count])

	def update_global_model(self, session):
	"""Run an update step.

	1) Asynchronously copy global weights to local model.
	2) Call into local model's update_step method, which does the following:
	a) Sample batch of programs from policy.
	b) Compute rewards.
	c) Compute gradients and update the global model asynchronously.
	3) Write tensorboard summaries to disk.

	Args:
	session: tf.Session instance.
	"""
	session.run(self.sync_op) # Copy weights from global to local.

	with session.as_default():
	result = self.model.update_step(
	session, self.data_manager.sample_rl_batch(), self.train_op,
	self.global_step)
	global_step = result.global_step
	global_npe = result.global_npe
	summaries = result.summaries_list
	self.cached_global_step = global_step
	self.cached_global_npe = global_npe
	self.local_step += 1

	if self.summary_writer and self.local_step % self.summary_interval == 0:
	if not isinstance(summaries, (tuple, list)):
	summaries = [summaries]
	summaries.append(self._local_step_summary())
	if self.is_chief:
	(global_best_reward,
	found_solution_flag,
	program_count) = session.run(
	[self.global_best_reward,
	self.found_solution_flag,
	self.program_count])
	summaries.append(
	tf.Summary(
	value=[tf.Summary.Value(
	tag='model/best_reward',
	simple_value=global_best_reward)]))
	summaries.append(
	tf.Summary(
	value=[tf.Summary.Value(
	tag='model/solution_found',
	simple_value=int(found_solution_flag))]))
	summaries.append(
	tf.Summary(
	value=[tf.Summary.Value(
	tag='model/program_count',
	simple_value=program_count)]))
	for s in summaries:
	self.summary_writer.add_summary(s, global_step)
	self.last_summary_time = time.time()

	def _local_step_summary(self):
	"""Compute number of local steps per time increment."""
	dt = time.time() - self.last_summary_time
	steps_per_time = self.summary_interval / float(dt)
	return tf.Summary(value=[
	tf.Summary.Value(
	tag='local_step/per_sec',
	simple_value=steps_per_time),
	tf.Summary.Value(
	tag='local_step/step',
	simple_value=self.local_step)])

	def maybe_save_best_model(self, session, saver, checkpoint_file):
	"""Check if this model got the highest reward and save to disk if so."""
	if self.is_chief and session.run(self.is_best_model):
	logging.info('Saving best model to "%s"', checkpoint_file)
	saver.save(session, checkpoint_file)
	session.run(self.reset_is_best_model)

	def save_replay_buffer(self):
	"""Save replay buffer to disk.

	Call this periodically so that training can recover if jobs go down.
	"""
	if self.model.experience_replay is not None:
	logging.info('Saving experience replay buffer to "%s".',
	self.model.experience_replay.save_file)
	self.model.experience_replay.incremental_save(True)

	def delete_replay_buffer(self):
	"""Delete replay buffer from disk.

	Call this at the end of training to clean up. Replay buffer can get very
	large.
	"""
	if self.model.experience_replay is not None:
	logging.info('Deleting experience replay buffer at "%s".',
	self.model.experience_replay.save_file)
	tf.gfile.Remove(self.model.experience_replay.save_file)

	def save_topk_buffer(self):
	"""Save top-k buffer to disk.

	Call this periodically so that training can recover if jobs go down.
	"""
	if self.model.top_episodes is not None:
	logging.info('Saving top-k buffer to "%s".', self.topk_file)
	# Overwrite previous data each time.
	with tf.gfile.FastGFile(self.topk_file, 'w') as f:
	f.write(cPickle.dumps(self.model.top_episodes))


	@contextlib.contextmanager
	def managed_session(sv, master='', config=None,
	start_standard_services=True,
	close_summary_writer=True,
	max_wait_secs=7200):
	# Same as Supervisor.managed_session, but with configurable timeout.
	try:
	sess = sv.prepare_or_wait_for_session(
	master=master, config=config,
	start_standard_services=start_standard_services,
	max_wait_secs=max_wait_secs)
	yield sess
	except tf.errors.DeadlineExceededError:
	raise
	except Exception as e: # pylint: disable=broad-except
	sv.request_stop(e)
	finally:
	try:
	# Request all the threads to stop and wait for them to do so. Any
	# exception raised by the threads is raised again from stop().
	# Passing stop_grace_period_secs is for blocked enqueue/dequeue
	# threads which are not checking for `should_stop()`. They
	# will be stopped when we close the session further down.
	sv.stop(close_summary_writer=close_summary_writer)
	finally:
	# Close the session to finish up all pending calls. We do not care
	# about exceptions raised when closing. This takes care of
	# blocked enqueue/dequeue calls.
	try:
	sess.close()
	except Exception: # pylint: disable=broad-except
	# Silently ignore exceptions raised by close().
	pass


	def train(config, is_chief, tuner=None, run_dir=None, run_number=0,
	results_writer=None):
	"""Run training loop.

	Args:
	config: config_lib.Config instance containing global config (agent and env).
	is_chief: True if this worker is chief. Chief worker manages writing some
	data to disk and initialization of the global model.
	tuner: A tuner instance. If not tuning, leave as None.
	run_dir: Directory where all data for this run will be written. If None,
	run_dir = FLAGS.logdir. Set this argument when doing multiple runs.
	run_number: Which run is this.
	results_writer: Managest writing training results to disk. Results are a
	dict of metric names and values.

	Returns:
	The trainer object used to run training updates.
	"""
	logging.info('Will run asynchronous training.')

	if run_dir is None:
	run_dir = FLAGS.logdir
	train_dir = os.path.join(run_dir, 'train')
	best_model_checkpoint = os.path.join(train_dir, 'best.ckpt')
	events_dir = '%s/events_%d' % (run_dir, FLAGS.task_id)
	logging.info('Events directory: %s', events_dir)

	logging_dir = os.path.join(run_dir, 'logs')
	if not tf.gfile.Exists(logging_dir):
	tf.gfile.MakeDirs(logging_dir)
	status_file = os.path.join(logging_dir, 'status.txt')

	if FLAGS.summary_tasks and FLAGS.task_id < FLAGS.summary_tasks:
	summary_writer = tf.summary.FileWriter(events_dir)
	else:
	summary_writer = None

	# Only profile task 0.
	if FLAGS.do_profiling:
	logging.info('Profiling enabled')
	profiler = cProfile.Profile()
	profiler.enable()
	else:
	profiler = None

	trainer = AsyncTrainer(
	config, FLAGS.task_id, FLAGS.ps_tasks, FLAGS.num_workers,
	is_chief=is_chief,
	summary_interval=FLAGS.summary_interval,
	summary_writer=summary_writer,
	logging_dir=logging_dir,
	run_number=run_number,
	model_v=FLAGS.model_v)

	variables_to_save = [v for v in tf.global_variables()
	if v.name.startswith('global')]
	global_init_op = tf.variables_initializer(variables_to_save)
	saver = tf.train.Saver(variables_to_save)

	var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
	tf.get_variable_scope().name)
	logging.info('Trainable vars:')
	for v in var_list:
	logging.info(' %s, %s, %s', v.name, v.device, v.get_shape())

	logging.info('All vars:')
	for v in tf.global_variables():
	logging.info(' %s, %s, %s', v.name, v.device, v.get_shape())

	def init_fn(unused_sess):
	logging.info('No checkpoint found. Initialized global params.')

	sv = tf.train.Supervisor(is_chief=is_chief,
	logdir=train_dir,
	saver=saver,
	summary_op=None,
	init_op=global_init_op,
	init_fn=init_fn,
	summary_writer=summary_writer,
	ready_op=tf.report_uninitialized_variables(
	variables_to_save),
	ready_for_local_init_op=None,
	global_step=trainer.global_step,
	save_model_secs=30,
	save_summaries_secs=30)

	# Add a thread that periodically checks if this Trial should stop
	# based on an early stopping policy.
	if tuner:
	sv.Loop(60, tuner.check_for_stop, (sv.coord,))

	last_replay_save_time = time.time()

	global_step = -1
	logging.info(
	'Starting session. '
	'If this hangs, we\'re mostly likely waiting to connect '
	'to the parameter server. One common cause is that the parameter '
	'server DNS name isn\'t resolving yet, or is misspecified.')
	should_retry = True
	supervisor_deadline_exceeded = False
	while should_retry:
	try:
	with managed_session(
	sv, FLAGS.master, max_wait_secs=60) as session, session.as_default():
	should_retry = False
	do_training = True

	try:
	trainer.initialize(session)
	if session.run(trainer.run_number) != run_number:
	# If we loaded existing model from disk, and the saved run number is
	# different, throw an exception.
	raise RuntimeError(
	'Expecting to be on run %d, but is actually on run %d. '
	'run_dir: "%s"'
	% (run_number, session.run(trainer.run_number), run_dir))
	global_step = trainer.cached_global_step
	logging.info('Starting training at step=%d', global_step)
	while do_training:
	trainer.update_global_model(session)

	if is_chief:
	trainer.maybe_save_best_model(
	session, saver, best_model_checkpoint)
	global_step = trainer.cached_global_step
	global_npe = trainer.cached_global_npe

	if time.time() - last_replay_save_time >= 30:
	trainer.save_replay_buffer()
	trainer.save_topk_buffer()
	last_replay_save_time = time.time()

	# Stopping conditions.
	if tuner and tuner.should_trial_stop():
	logging.info('Tuner requested early stopping. Finishing.')
	do_training = False
	if is_chief and FLAGS.stop_on_success:
	found_solution = session.run(trainer.found_solution_flag)
	if found_solution:
	do_training = False
	logging.info('Solution found. Finishing.')
	if FLAGS.max_npe and global_npe >= FLAGS.max_npe:
	# Max NPE (number of programs executed) reached.
	logging.info('Max NPE reached. Finishing.')
	do_training = False
	if sv.should_stop():
	logging.info('Supervisor issued stop. Finishing.')
	do_training = False

	except tf.errors.NotFoundError:
	# Catch "Error while reading resource variable".
	# The chief worker likely destroyed the container, so do not retry.
	logging.info('Caught NotFoundError. Quitting.')
	do_training = False
	should_retry = False
	break
	except tf.errors.InternalError as e:
	# Catch "Invalid variable reference."
	if str(e).startswith('Invalid variable reference.'):
	# The chief worker likely destroyed the container, so do not
	# retry.
	logging.info(
	'Caught "InternalError: Invalid variable reference.". '
	'Quitting.')
	do_training = False
	should_retry = False
	break
	else:
	# Pass exception through.
	raise

	# Exited training loop. Write results to disk.
	if is_chief and results_writer:
	assert not should_retry
	with tf.gfile.FastGFile(status_file, 'w') as f:
	f.write('done')
	(program_count,
	found_solution,
	code_solution,
	best_reward,
	global_step) = session.run(
	[trainer.program_count,
	trainer.found_solution_flag,
	trainer.code_solution_variable,
	trainer.global_best_reward,
	trainer.global_step])
	results_dict = {
	'max_npe': FLAGS.max_npe,
	'batch_size': config.batch_size,
	'max_batches': FLAGS.max_npe // config.batch_size,
	'npe': program_count,
	'max_global_repetitions': FLAGS.num_repetitions,
	'max_local_repetitions': FLAGS.num_repetitions,
	'code_solution': code_solution,
	'best_reward': best_reward,
	'num_batches': global_step,
	'found_solution': found_solution,
	'task': trainer.data_manager.task_name,
	'global_rep': run_number}
	logging.info('results_dict: %s', results_dict)
	results_writer.append(results_dict)

	except tf.errors.AbortedError:
	# Catch "Graph handle is not found" error due to preempted jobs.
	logging.info('Caught AbortedError. Retying.')
	should_retry = True
	except tf.errors.DeadlineExceededError:
	supervisor_deadline_exceeded = True
	should_retry = False

	if is_chief:
	logging.info('This is chief worker. Stopping all workers.')
	sv.stop()

	if supervisor_deadline_exceeded:
	logging.info('Supervisor timed out. Quitting.')
	else:
	logging.info('Reached %s steps. Worker stopped.', global_step)

	# Dump profiling.
	"""
	How to use profiling data.

	Download the profiler dump to your local machine, say to PROF_FILE_PATH.
	In a separate script, run something like the following:

	import pstats
	p = pstats.Stats(PROF_FILE_PATH)
	p.strip_dirs().sort_stats('cumtime').print_stats()

	This will sort by 'cumtime', which "is the cumulative time spent in this and
	all subfunctions (from invocation till exit)."
	https://docs.python.org/2/library/profile.html#instant-user-s-manual
	""" # pylint: disable=pointless-string-statement
	if profiler:
	prof_file = os.path.join(run_dir, 'task_%d.prof' % FLAGS.task_id)
	logging.info('Done profiling.\nDumping to "%s".', prof_file)
	profiler.create_stats()
	with tf.gfile.Open(prof_file, 'w') as f:
	f.write(marshal.dumps(profiler.stats))

	return trainer


	def run_training(config=None, tuner=None, logdir=None, trial_name=None,
	is_chief=True):
	"""Do all training runs.

	This is the top level training function for policy gradient based models.
	Run this from the main function.

	Args:
	config: config_lib.Config instance containing global config (agent and
	environment hparams). If None, config will be parsed from FLAGS.config.
	tuner: A tuner instance. Leave as None if not tuning.
	logdir: Parent directory where all data from all runs will be written. If
	None, FLAGS.logdir will be used.
	trial_name: If tuning, set this to a unique string that identifies this
	trial. If `tuner` is not None, this also must be set.
	is_chief: True if this worker is the chief.

	Returns:
	List of results dicts which were written to disk. Each training run gets a
	results dict. Results dict contains metrics, i.e. (name, value) pairs which
	give information about the training run.

	Raises:
	ValueError: If results dicts read from disk contain invalid data.
	"""
	if not config:
	# If custom config is not given, get it from flags.
	config = defaults.default_config_with_updates(FLAGS.config)
	if not logdir:
	logdir = FLAGS.logdir
	if not tf.gfile.Exists(logdir):
	tf.gfile.MakeDirs(logdir)
	assert FLAGS.num_repetitions > 0
	results = results_lib.Results(logdir)
	results_list, _ = results.read_all()

	logging.info('Starting experiment. Directory: "%s"', logdir)

	if results_list:
	if results_list[0]['max_npe'] != FLAGS.max_npe:
	raise ValueError(
	'Cannot resume training. Max-NPE changed. Was %s, now %s',
	results_list[0]['max_npe'], FLAGS.max_npe)
	if results_list[0]['max_global_repetitions'] != FLAGS.num_repetitions:
	raise ValueError(
	'Cannot resume training. Number of repetitions changed. Was %s, '
	'now %s',
	results_list[0]['max_global_repetitions'],
	FLAGS.num_repetitions)

	while len(results_list) < FLAGS.num_repetitions:
	run_number = len(results_list)
	rep_container_name = trial_name if trial_name else 'container'
	if FLAGS.num_repetitions > 1:
	rep_dir = os.path.join(logdir, 'run_%d' % run_number)
	rep_container_name = rep_container_name + '_run_' + str(run_number)
	else:
	rep_dir = logdir

	logging.info(
	'Starting repetition %d (%d out of %d)', run_number, run_number + 1,
	FLAGS.num_repetitions)

	# Train will write result to disk.
	with tf.container(rep_container_name):
	trainer = train(config, is_chief, tuner, rep_dir, run_number, results)
	logging.info('Done training.')

	if is_chief:
	# Destroy current container immediately (clears current graph).
	logging.info('Clearing shared variables.')
	tf.Session.reset(FLAGS.master, containers=[rep_container_name])
	logging.info('Shared variables cleared.')

	# Delete replay buffer on disk.
	assert trainer
	trainer.delete_replay_buffer()
	else:
	# Give chief worker time to clean up.
	sleep_sec = 30.0
	logging.info('Sleeping for %s sec.', sleep_sec)
	time.sleep(sleep_sec)
	tf.reset_default_graph()
	logging.info('Default graph reset.')

	# Expecting that train wrote new result to disk before returning.
	results_list, _ = results.read_all()
	return results_list