1) model = PPO(policy = "MlpPolicy", env = env, n_steps = 1024, batch_size = 64, n_epochs = 4, gamma = 0.999, gae_lambda = 0.98, ent_coef = 0.01, verbose=1) model.learn(total_timesteps = 500000) mean_reward=193.60 +/- 21.32519973099738 2) model = PPO(policy = "MlpPolicy", env = env, n_steps = 1024, batch_size = 64, n_epochs = 8, gamma = 0.999, gae_lambda = 0.98, ent_coef = 0.01, verbose=1) model.learn(total_timesteps = 500000) mean_reward=235.09 +/- 21.878789192117072 3) model = PPO(policy = "MlpPolicy", env = env, n_steps = 1024, batch_size = 64, n_epochs = 8, gamma = 0.999, gae_lambda = 0.98, ent_coef = 0.01, verbose=1) model.learn(total_timesteps = 1000000)