kingabzpro commited on
Commit
277e232
1 Parent(s): 6c65102

Code modified

Browse files
Files changed (1) hide show
  1. README.md +8 -10
README.md CHANGED
@@ -30,24 +30,24 @@ from stable_baselines3 import PPO
30
  from stable_baselines3.common.evaluation import evaluate_policy
31
  from stable_baselines3.common.env_util import make_vec_env
32
 
33
- # Create a vectorized environment of 16 parallel environments
34
- env = make_vec_env("LunarLander-v2", n_envs=16)
35
 
36
  # Optimizaed Hyperparameters
37
  model = PPO(
38
  "MlpPolicy",
39
  env=env,
40
- n_steps=655,
41
  batch_size=32,
42
- n_epochs=8,
43
- gamma=0.998,
44
  gae_lambda=0.98,
45
  ent_coef=0.01,
46
  verbose=1,
47
  )
48
 
49
- # Train it for 500,000 timesteps
50
- model.learn(total_timesteps=int(5e6))
51
 
52
  # Create a new environment for evaluation
53
  eval_env = gym.make("LunarLander-v2")
@@ -60,8 +60,6 @@ mean_reward, std_reward = evaluate_policy(
60
  # Print the results
61
  print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
62
 
63
- #>>> mean_reward=254.56 +/- 18.45056958672337
64
-
65
-
66
  ```
67
 
 
30
  from stable_baselines3.common.evaluation import evaluate_policy
31
  from stable_baselines3.common.env_util import make_vec_env
32
 
33
+ # Create a vectorized environment of 64 parallel environments
34
+ env = make_vec_env("LunarLander-v2", n_envs=64)
35
 
36
  # Optimizaed Hyperparameters
37
  model = PPO(
38
  "MlpPolicy",
39
  env=env,
40
+ n_steps=1024,
41
  batch_size=32,
42
+ n_epochs=10,
43
+ gamma=0.997,
44
  gae_lambda=0.98,
45
  ent_coef=0.01,
46
  verbose=1,
47
  )
48
 
49
+ # Train it for 1,000,000 timesteps
50
+ model.learn(total_timesteps=int(1e6))
51
 
52
  # Create a new environment for evaluation
53
  eval_env = gym.make("LunarLander-v2")
 
60
  # Print the results
61
  print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
62
 
63
+ # >>> mean_reward=261.42 +/- 18.69168514436243
 
 
64
  ```
65