| import tensorflow as tf |
| import numpy as np |
| from baselines.common.vec_env.dummy_vec_env import DummyVecEnv |
|
|
| N_TRIALS = 10000 |
| N_EPISODES = 100 |
|
|
| _sess_config = tf.compat.v1.ConfigProto( |
| allow_soft_placement=True, |
| intra_op_parallelism_threads=1, |
| inter_op_parallelism_threads=1 |
| ) |
|
|
| def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): |
| def seeded_env_fn(): |
| env = env_fn() |
| env.seed(0) |
| return env |
|
|
| np.random.seed(0) |
| env = DummyVecEnv([seeded_env_fn]) |
| with tf.Graph().as_default(), tf.compat.v1.Session(config=_sess_config).as_default(): |
| tf.compat.v1.set_random_seed(0) |
| model = learn_fn(env) |
| sum_rew = 0 |
| done = True |
| for i in range(n_trials): |
| if done: |
| obs = env.reset() |
| state = model.initial_state |
| if state is not None: |
| a, v, state, _ = model.step(obs, S=state, M=[False]) |
| else: |
| a, v, _, _ = model.step(obs) |
| obs, rew, done, _ = env.step(a) |
| sum_rew += float(rew) |
| print("Reward in {} trials is {}".format(n_trials, sum_rew)) |
| assert sum_rew > min_reward_fraction * n_trials, \ |
| 'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials) |
|
|
| def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES): |
| env = DummyVecEnv([env_fn]) |
| with tf.Graph().as_default(), tf.compat.v1.Session(config=_sess_config).as_default(): |
| model = learn_fn(env) |
| N_TRIALS = 100 |
| observations, actions, rewards = rollout(env, model, N_TRIALS) |
| rewards = [sum(r) for r in rewards] |
| avg_rew = sum(rewards) / N_TRIALS |
| print("Average reward in {} episodes is {}".format(n_trials, avg_rew)) |
| assert avg_rew > min_avg_reward, \ |
| 'average reward in {} episodes ({}) is less than {}'.format(n_trials, avg_rew, min_avg_reward) |
|
|
| def rollout(env, model, n_trials): |
| rewards = [] |
| actions = [] |
| observations = [] |
| for i in range(n_trials): |
| obs = env.reset() |
| state = model.initial_state if hasattr(model, 'initial_state') else None |
| episode_rew = [] |
| episode_actions = [] |
| episode_obs = [] |
| while True: |
| if state is not None: |
| a, v, state, _ = model.step(obs, S=state, M=[False]) |
| else: |
| a,v, _, _ = model.step(obs) |
|
|
| obs, rew, done, _ = env.step(a) |
| episode_rew.append(rew) |
| episode_actions.append(a) |
| episode_obs.append(obs) |
| if done: |
| break |
| rewards.append(episode_rew) |
| actions.append(episode_actions) |
| observations.append(episode_obs) |
| return observations, actions, rewards |
|
|
|
|
| def smoketest(argstr, **kwargs): |
| import tempfile |
| import subprocess |
| import os |
| argstr = 'python -m baselines.run ' + argstr |
| for key, value in kwargs: |
| argstr += ' --{}={}'.format(key, value) |
| tempdir = tempfile.mkdtemp() |
| env = os.environ.copy() |
| env['OPENAI_LOGDIR'] = tempdir |
| subprocess.run(argstr.split(' '), env=env) |
| return tempdir |
|
|