sgoodfriend
commited on
Commit
•
4a3b696
1
Parent(s):
9d35e63
A2C playing LunarLander-v2 from https://github.com/sgoodfriend/rl-algo-impls/tree/0760ef7d52b17f30219a27c18ba52c8895025ae3
Browse files- README.md +4 -4
- replay.meta.json +1 -1
- replay.mp4 +0 -0
- runner/env.py +3 -0
- shared/callbacks/eval_callback.py +2 -2
- wrappers/sync_vector_env_render_compat.py +31 -0
- wrappers/vec_episode_recorder.py +1 -26
README.md
CHANGED
@@ -10,7 +10,7 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value: 203.36 +/- 113.
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
@@ -31,9 +31,9 @@ This model was trained from 3 trainings of **A2C** agents using different initia
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:---------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
-
| a2c | LunarLander-v2 | 1 |
|
35 |
-
| a2c | LunarLander-v2 | 2 |
|
36 |
-
| a2c | LunarLander-v2 | 3 | 203.
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: 203.36 +/- 113.35
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:---------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
+
| a2c | LunarLander-v2 | 1 | 102.539 | 106.633 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/wzm9jhee) |
|
35 |
+
| a2c | LunarLander-v2 | 2 | 86.8229 | 132.518 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/f6jaeow9) |
|
36 |
+
| a2c | LunarLander-v2 | 3 | 203.362 | 113.355 | 16 | * | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/nyywc8ns) |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
replay.meta.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version
|
|
|
1 |
+
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "1200x800", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmps7cn24ni/a2c-LunarLander-v2/replay.mp4"]}, "episode": {"r": 31.878372192382812, "l": 302, "t": 3.747}}
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
runner/env.py
CHANGED
@@ -26,6 +26,7 @@ from wrappers.initial_step_truncate_wrapper import InitialStepTruncateWrapper
|
|
26 |
from wrappers.is_vector_env import IsVectorEnv
|
27 |
from wrappers.noop_env_seed import NoopEnvSeed
|
28 |
from wrappers.normalize import NormalizeObservation, NormalizeReward
|
|
|
29 |
from wrappers.transpose_image_observation import TransposeImageObservation
|
30 |
from wrappers.vectorable_wrapper import VecEnv
|
31 |
from wrappers.video_compat_wrapper import VideoCompatWrapper
|
@@ -180,6 +181,8 @@ def _make_vec_env(
|
|
180 |
else:
|
181 |
raise ValueError(f"env_type {env_type} unsupported")
|
182 |
envs = VecEnvClass([make(i) for i in range(n_envs)])
|
|
|
|
|
183 |
if training:
|
184 |
assert tb_writer
|
185 |
envs = EpisodeStatsWriter(
|
|
|
26 |
from wrappers.is_vector_env import IsVectorEnv
|
27 |
from wrappers.noop_env_seed import NoopEnvSeed
|
28 |
from wrappers.normalize import NormalizeObservation, NormalizeReward
|
29 |
+
from wrappers.sync_vector_env_render_compat import SyncVectorEnvRenderCompat
|
30 |
from wrappers.transpose_image_observation import TransposeImageObservation
|
31 |
from wrappers.vectorable_wrapper import VecEnv
|
32 |
from wrappers.video_compat_wrapper import VideoCompatWrapper
|
|
|
181 |
else:
|
182 |
raise ValueError(f"env_type {env_type} unsupported")
|
183 |
envs = VecEnvClass([make(i) for i in range(n_envs)])
|
184 |
+
if env_type == "gymvec" and vec_env_class == "sync":
|
185 |
+
envs = SyncVectorEnvRenderCompat(envs)
|
186 |
if training:
|
187 |
assert tb_writer
|
188 |
envs = EpisodeStatsWriter(
|
shared/callbacks/eval_callback.py
CHANGED
@@ -75,7 +75,9 @@ def evaluate(
|
|
75 |
print_returns: bool = True,
|
76 |
ignore_first_episode: bool = False,
|
77 |
) -> EpisodesStats:
|
|
|
78 |
policy.eval()
|
|
|
79 |
episodes = EvaluateAccumulator(
|
80 |
env.num_envs, n_episodes, print_returns, ignore_first_episode
|
81 |
)
|
@@ -137,7 +139,6 @@ class EvalCallback(Callback):
|
|
137 |
def on_step(self, timesteps_elapsed: int = 1) -> bool:
|
138 |
super().on_step(timesteps_elapsed)
|
139 |
if self.timesteps_elapsed // self.step_freq >= len(self.stats):
|
140 |
-
self.policy.sync_normalization(self.env)
|
141 |
self.evaluate()
|
142 |
return True
|
143 |
|
@@ -176,7 +177,6 @@ class EvalCallback(Callback):
|
|
176 |
)
|
177 |
if strictly_better and self.record_best_videos:
|
178 |
assert self.video_env and self.best_video_dir
|
179 |
-
self.policy.sync_normalization(self.video_env)
|
180 |
self.best_video_base_path = os.path.join(
|
181 |
self.best_video_dir, str(self.timesteps_elapsed)
|
182 |
)
|
|
|
75 |
print_returns: bool = True,
|
76 |
ignore_first_episode: bool = False,
|
77 |
) -> EpisodesStats:
|
78 |
+
policy.sync_normalization(env)
|
79 |
policy.eval()
|
80 |
+
|
81 |
episodes = EvaluateAccumulator(
|
82 |
env.num_envs, n_episodes, print_returns, ignore_first_episode
|
83 |
)
|
|
|
139 |
def on_step(self, timesteps_elapsed: int = 1) -> bool:
|
140 |
super().on_step(timesteps_elapsed)
|
141 |
if self.timesteps_elapsed // self.step_freq >= len(self.stats):
|
|
|
142 |
self.evaluate()
|
143 |
return True
|
144 |
|
|
|
177 |
)
|
178 |
if strictly_better and self.record_best_videos:
|
179 |
assert self.video_env and self.best_video_dir
|
|
|
180 |
self.best_video_base_path = os.path.join(
|
181 |
self.best_video_dir, str(self.timesteps_elapsed)
|
182 |
)
|
wrappers/sync_vector_env_render_compat.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
from gym.vector.sync_vector_env import SyncVectorEnv
|
4 |
+
from stable_baselines3.common.vec_env.base_vec_env import tile_images
|
5 |
+
from typing import Optional
|
6 |
+
|
7 |
+
from wrappers.vectorable_wrapper import (
|
8 |
+
VecotarableWrapper,
|
9 |
+
)
|
10 |
+
|
11 |
+
|
12 |
+
class SyncVectorEnvRenderCompat(VecotarableWrapper):
|
13 |
+
def __init__(self, env) -> None:
|
14 |
+
super().__init__(env)
|
15 |
+
|
16 |
+
def render(self, mode: str = "human") -> Optional[np.ndarray]:
|
17 |
+
base_env = self.env.unwrapped
|
18 |
+
if isinstance(base_env, SyncVectorEnv):
|
19 |
+
imgs = [env.render(mode="rgb_array") for env in base_env.envs]
|
20 |
+
bigimg = tile_images(imgs)
|
21 |
+
if mode == "human":
|
22 |
+
import cv2
|
23 |
+
|
24 |
+
cv2.imshow("vecenv", bigimg[:, :, ::-1])
|
25 |
+
cv2.waitKey(1)
|
26 |
+
elif mode == "rgb_array":
|
27 |
+
return bigimg
|
28 |
+
else:
|
29 |
+
raise NotImplemented(f"Render mode {mode} is not supported")
|
30 |
+
else:
|
31 |
+
return self.env.render(mode=mode)
|
wrappers/vec_episode_recorder.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
|
3 |
-
from gym.vector.sync_vector_env import SyncVectorEnv
|
4 |
from gym.wrappers.monitoring.video_recorder import VideoRecorder
|
5 |
-
from stable_baselines3.common.vec_env.base_vec_env import tile_images
|
6 |
-
from typing import Optional
|
7 |
|
8 |
from wrappers.vectorable_wrapper import (
|
9 |
VecotarableWrapper,
|
@@ -45,7 +42,7 @@ class VecEpisodeRecorder(VecotarableWrapper):
|
|
45 |
self._close_video_recorder()
|
46 |
|
47 |
self.video_recorder = VideoRecorder(
|
48 |
-
|
49 |
base_path=self.base_path,
|
50 |
)
|
51 |
|
@@ -56,25 +53,3 @@ class VecEpisodeRecorder(VecotarableWrapper):
|
|
56 |
if self.video_recorder:
|
57 |
self.video_recorder.close()
|
58 |
self.video_recorder = None
|
59 |
-
|
60 |
-
|
61 |
-
class SyncVectorEnvRenderCompat(VecotarableWrapper):
|
62 |
-
def __init__(self, env) -> None:
|
63 |
-
super().__init__(env)
|
64 |
-
|
65 |
-
def render(self, mode: str = "human") -> Optional[np.ndarray]:
|
66 |
-
base_env = self.env.unwrapped
|
67 |
-
if isinstance(base_env, SyncVectorEnv):
|
68 |
-
imgs = [env.render(mode="rgb_array") for env in base_env.envs]
|
69 |
-
bigimg = tile_images(imgs)
|
70 |
-
if mode == "humnan":
|
71 |
-
import cv2
|
72 |
-
|
73 |
-
cv2.imshow("vecenv", bigimg[:, :, ::-1])
|
74 |
-
cv2.waitKey(1)
|
75 |
-
elif mode == "rgb_array":
|
76 |
-
return bigimg
|
77 |
-
else:
|
78 |
-
raise NotImplemented(f"Render mode {mode} is not supported")
|
79 |
-
else:
|
80 |
-
return self.env.render(mode=mode)
|
|
|
1 |
import numpy as np
|
2 |
|
|
|
3 |
from gym.wrappers.monitoring.video_recorder import VideoRecorder
|
|
|
|
|
4 |
|
5 |
from wrappers.vectorable_wrapper import (
|
6 |
VecotarableWrapper,
|
|
|
42 |
self._close_video_recorder()
|
43 |
|
44 |
self.video_recorder = VideoRecorder(
|
45 |
+
self.env,
|
46 |
base_path=self.base_path,
|
47 |
)
|
48 |
|
|
|
53 |
if self.video_recorder:
|
54 |
self.video_recorder.close()
|
55 |
self.video_recorder = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|