Cleaning up
Browse files
README.md
CHANGED
@@ -30,9 +30,6 @@ using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines
|
|
30 |
|
31 |
|
32 |
```python
|
33 |
-
from stable_baselines3 import ...
|
34 |
-
from huggingface_sb3 import load_from_hub
|
35 |
-
|
36 |
|
37 |
# Defining model
|
38 |
model = PPO('MlpPolicy', env, n_steps = 512, batch_size = 64, n_epochs = 4, gamma = 0.999, gae_lambda = 0.98, ent_coef = 0.01, verbose=1)
|
@@ -53,7 +50,11 @@ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, d
|
|
53 |
# Print the results
|
54 |
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
|
55 |
|
56 |
-
# 284.84514090000005, "std_reward": 18.270698037778157
|
57 |
# mean_reward=284.85 +/- 18.270698037778157
|
58 |
...
|
59 |
```
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
```python
|
|
|
|
|
|
|
33 |
|
34 |
# Defining model
|
35 |
model = PPO('MlpPolicy', env, n_steps = 512, batch_size = 64, n_epochs = 4, gamma = 0.999, gae_lambda = 0.98, ent_coef = 0.01, verbose=1)
|
|
|
50 |
# Print the results
|
51 |
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
|
52 |
|
|
|
53 |
# mean_reward=284.85 +/- 18.270698037778157
|
54 |
...
|
55 |
```
|
56 |
+
|
57 |
+
## Diffs
|
58 |
+
|
59 |
+
* Dropped `n_steps` down to 512
|
60 |
+
* Bumped `total_timestamps` up to 2,000,000
|