kingabzpro
commited on
Commit
•
277e232
1
Parent(s):
6c65102
Code modified
Browse files
README.md
CHANGED
@@ -30,24 +30,24 @@ from stable_baselines3 import PPO
|
|
30 |
from stable_baselines3.common.evaluation import evaluate_policy
|
31 |
from stable_baselines3.common.env_util import make_vec_env
|
32 |
|
33 |
-
# Create a vectorized environment of
|
34 |
-
env = make_vec_env("LunarLander-v2", n_envs=
|
35 |
|
36 |
# Optimizaed Hyperparameters
|
37 |
model = PPO(
|
38 |
"MlpPolicy",
|
39 |
env=env,
|
40 |
-
n_steps=
|
41 |
batch_size=32,
|
42 |
-
n_epochs=
|
43 |
-
gamma=0.
|
44 |
gae_lambda=0.98,
|
45 |
ent_coef=0.01,
|
46 |
verbose=1,
|
47 |
)
|
48 |
|
49 |
-
# Train it for
|
50 |
-
model.learn(total_timesteps=int(
|
51 |
|
52 |
# Create a new environment for evaluation
|
53 |
eval_env = gym.make("LunarLander-v2")
|
@@ -60,8 +60,6 @@ mean_reward, std_reward = evaluate_policy(
|
|
60 |
# Print the results
|
61 |
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
```
|
67 |
|
|
|
30 |
from stable_baselines3.common.evaluation import evaluate_policy
|
31 |
from stable_baselines3.common.env_util import make_vec_env
|
32 |
|
33 |
+
# Create a vectorized environment of 64 parallel environments
|
34 |
+
env = make_vec_env("LunarLander-v2", n_envs=64)
|
35 |
|
36 |
# Optimizaed Hyperparameters
|
37 |
model = PPO(
|
38 |
"MlpPolicy",
|
39 |
env=env,
|
40 |
+
n_steps=1024,
|
41 |
batch_size=32,
|
42 |
+
n_epochs=10,
|
43 |
+
gamma=0.997,
|
44 |
gae_lambda=0.98,
|
45 |
ent_coef=0.01,
|
46 |
verbose=1,
|
47 |
)
|
48 |
|
49 |
+
# Train it for 1,000,000 timesteps
|
50 |
+
model.learn(total_timesteps=int(1e6))
|
51 |
|
52 |
# Create a new environment for evaluation
|
53 |
eval_env = gym.make("LunarLander-v2")
|
|
|
60 |
# Print the results
|
61 |
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
|
62 |
|
63 |
+
# >>> mean_reward=261.42 +/- 18.69168514436243
|
|
|
|
|
64 |
```
|
65 |
|