|
1) model = PPO(policy = "MlpPolicy", |
|
env = env, |
|
n_steps = 1024, |
|
batch_size = 64, |
|
n_epochs = 4, |
|
gamma = 0.999, |
|
gae_lambda = 0.98, |
|
ent_coef = 0.01, |
|
verbose=1) |
|
model.learn(total_timesteps = 500000) |
|
mean_reward=193.60 +/- 21.32519973099738 |
|
|
|
2) model = PPO(policy = "MlpPolicy", |
|
env = env, |
|
n_steps = 1024, |
|
batch_size = 64, |
|
n_epochs = 8, |
|
gamma = 0.999, |
|
gae_lambda = 0.98, |
|
ent_coef = 0.01, |
|
verbose=1) |
|
model.learn(total_timesteps = 500000) |
|
mean_reward=235.09 +/- 21.878789192117072 |
|
|
|
3) model = PPO(policy = "MlpPolicy", |
|
env = env, |
|
n_steps = 1024, |
|
batch_size = 64, |
|
n_epochs = 8, |
|
gamma = 0.999, |
|
gae_lambda = 0.98, |
|
ent_coef = 0.01, |
|
verbose=1) |
|
model.learn(total_timesteps = 1000000) |
|
|
|
|