sgoodfriend commited on
Commit
4a3b696
1 Parent(s): 9d35e63

A2C playing LunarLander-v2 from https://github.com/sgoodfriend/rl-algo-impls/tree/0760ef7d52b17f30219a27c18ba52c8895025ae3

Browse files
README.md CHANGED
@@ -10,7 +10,7 @@ model-index:
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
- value: 203.36 +/- 113.36
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
@@ -31,9 +31,9 @@ This model was trained from 3 trainings of **A2C** agents using different initia
31
 
32
  | algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
33
  |:-------|:---------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
34
- | a2c | LunarLander-v2 | 1 | 116.479 | 101.181 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/wzm9jhee) |
35
- | a2c | LunarLander-v2 | 2 | 74.1282 | 134.996 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/f6jaeow9) |
36
- | a2c | LunarLander-v2 | 3 | 203.36 | 113.362 | 16 | * | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/nyywc8ns) |
37
 
38
 
39
  ### Prerequisites: Weights & Biases (WandB)
 
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
+ value: 203.36 +/- 113.35
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
 
31
 
32
  | algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
33
  |:-------|:---------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
34
+ | a2c | LunarLander-v2 | 1 | 102.539 | 106.633 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/wzm9jhee) |
35
+ | a2c | LunarLander-v2 | 2 | 86.8229 | 132.518 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/f6jaeow9) |
36
+ | a2c | LunarLander-v2 | 3 | 203.362 | 113.355 | 16 | * | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/nyywc8ns) |
37
 
38
 
39
  ### Prerequisites: Weights & Biases (WandB)
replay.meta.json CHANGED
@@ -1 +1 @@
1
- {"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\\nlibavutil 56. 31.100 / 56. 31.100\\nlibavcodec 58. 54.100 / 58. 54.100\\nlibavformat 58. 29.100 / 58. 29.100\\nlibavdevice 58. 8.100 / 58. 8.100\\nlibavfilter 7. 57.100 / 7. 57.100\\nlibavresample 4. 0. 0 / 4. 0. 0\\nlibswscale 5. 5.100 / 5. 5.100\\nlibswresample 3. 5.100 / 3. 5.100\\nlibpostproc 55. 5.100 / 55. 5.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "/tmp/tmpmog2i1xe/a2c-LunarLander-v2/replay.mp4"]}, "episode": {"r": 3.8731765747070312, "l": 123, "t": 1.6477}}
 
1
+ {"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "1200x800", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmps7cn24ni/a2c-LunarLander-v2/replay.mp4"]}, "episode": {"r": 31.878372192382812, "l": 302, "t": 3.747}}
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
runner/env.py CHANGED
@@ -26,6 +26,7 @@ from wrappers.initial_step_truncate_wrapper import InitialStepTruncateWrapper
26
  from wrappers.is_vector_env import IsVectorEnv
27
  from wrappers.noop_env_seed import NoopEnvSeed
28
  from wrappers.normalize import NormalizeObservation, NormalizeReward
 
29
  from wrappers.transpose_image_observation import TransposeImageObservation
30
  from wrappers.vectorable_wrapper import VecEnv
31
  from wrappers.video_compat_wrapper import VideoCompatWrapper
@@ -180,6 +181,8 @@ def _make_vec_env(
180
  else:
181
  raise ValueError(f"env_type {env_type} unsupported")
182
  envs = VecEnvClass([make(i) for i in range(n_envs)])
 
 
183
  if training:
184
  assert tb_writer
185
  envs = EpisodeStatsWriter(
 
26
  from wrappers.is_vector_env import IsVectorEnv
27
  from wrappers.noop_env_seed import NoopEnvSeed
28
  from wrappers.normalize import NormalizeObservation, NormalizeReward
29
+ from wrappers.sync_vector_env_render_compat import SyncVectorEnvRenderCompat
30
  from wrappers.transpose_image_observation import TransposeImageObservation
31
  from wrappers.vectorable_wrapper import VecEnv
32
  from wrappers.video_compat_wrapper import VideoCompatWrapper
 
181
  else:
182
  raise ValueError(f"env_type {env_type} unsupported")
183
  envs = VecEnvClass([make(i) for i in range(n_envs)])
184
+ if env_type == "gymvec" and vec_env_class == "sync":
185
+ envs = SyncVectorEnvRenderCompat(envs)
186
  if training:
187
  assert tb_writer
188
  envs = EpisodeStatsWriter(
shared/callbacks/eval_callback.py CHANGED
@@ -75,7 +75,9 @@ def evaluate(
75
  print_returns: bool = True,
76
  ignore_first_episode: bool = False,
77
  ) -> EpisodesStats:
 
78
  policy.eval()
 
79
  episodes = EvaluateAccumulator(
80
  env.num_envs, n_episodes, print_returns, ignore_first_episode
81
  )
@@ -137,7 +139,6 @@ class EvalCallback(Callback):
137
  def on_step(self, timesteps_elapsed: int = 1) -> bool:
138
  super().on_step(timesteps_elapsed)
139
  if self.timesteps_elapsed // self.step_freq >= len(self.stats):
140
- self.policy.sync_normalization(self.env)
141
  self.evaluate()
142
  return True
143
 
@@ -176,7 +177,6 @@ class EvalCallback(Callback):
176
  )
177
  if strictly_better and self.record_best_videos:
178
  assert self.video_env and self.best_video_dir
179
- self.policy.sync_normalization(self.video_env)
180
  self.best_video_base_path = os.path.join(
181
  self.best_video_dir, str(self.timesteps_elapsed)
182
  )
 
75
  print_returns: bool = True,
76
  ignore_first_episode: bool = False,
77
  ) -> EpisodesStats:
78
+ policy.sync_normalization(env)
79
  policy.eval()
80
+
81
  episodes = EvaluateAccumulator(
82
  env.num_envs, n_episodes, print_returns, ignore_first_episode
83
  )
 
139
  def on_step(self, timesteps_elapsed: int = 1) -> bool:
140
  super().on_step(timesteps_elapsed)
141
  if self.timesteps_elapsed // self.step_freq >= len(self.stats):
 
142
  self.evaluate()
143
  return True
144
 
 
177
  )
178
  if strictly_better and self.record_best_videos:
179
  assert self.video_env and self.best_video_dir
 
180
  self.best_video_base_path = os.path.join(
181
  self.best_video_dir, str(self.timesteps_elapsed)
182
  )
wrappers/sync_vector_env_render_compat.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from gym.vector.sync_vector_env import SyncVectorEnv
4
+ from stable_baselines3.common.vec_env.base_vec_env import tile_images
5
+ from typing import Optional
6
+
7
+ from wrappers.vectorable_wrapper import (
8
+ VecotarableWrapper,
9
+ )
10
+
11
+
12
+ class SyncVectorEnvRenderCompat(VecotarableWrapper):
13
+ def __init__(self, env) -> None:
14
+ super().__init__(env)
15
+
16
+ def render(self, mode: str = "human") -> Optional[np.ndarray]:
17
+ base_env = self.env.unwrapped
18
+ if isinstance(base_env, SyncVectorEnv):
19
+ imgs = [env.render(mode="rgb_array") for env in base_env.envs]
20
+ bigimg = tile_images(imgs)
21
+ if mode == "human":
22
+ import cv2
23
+
24
+ cv2.imshow("vecenv", bigimg[:, :, ::-1])
25
+ cv2.waitKey(1)
26
+ elif mode == "rgb_array":
27
+ return bigimg
28
+ else:
29
+ raise NotImplemented(f"Render mode {mode} is not supported")
30
+ else:
31
+ return self.env.render(mode=mode)
wrappers/vec_episode_recorder.py CHANGED
@@ -1,9 +1,6 @@
1
  import numpy as np
2
 
3
- from gym.vector.sync_vector_env import SyncVectorEnv
4
  from gym.wrappers.monitoring.video_recorder import VideoRecorder
5
- from stable_baselines3.common.vec_env.base_vec_env import tile_images
6
- from typing import Optional
7
 
8
  from wrappers.vectorable_wrapper import (
9
  VecotarableWrapper,
@@ -45,7 +42,7 @@ class VecEpisodeRecorder(VecotarableWrapper):
45
  self._close_video_recorder()
46
 
47
  self.video_recorder = VideoRecorder(
48
- SyncVectorEnvRenderCompat(self.env),
49
  base_path=self.base_path,
50
  )
51
 
@@ -56,25 +53,3 @@ class VecEpisodeRecorder(VecotarableWrapper):
56
  if self.video_recorder:
57
  self.video_recorder.close()
58
  self.video_recorder = None
59
-
60
-
61
- class SyncVectorEnvRenderCompat(VecotarableWrapper):
62
- def __init__(self, env) -> None:
63
- super().__init__(env)
64
-
65
- def render(self, mode: str = "human") -> Optional[np.ndarray]:
66
- base_env = self.env.unwrapped
67
- if isinstance(base_env, SyncVectorEnv):
68
- imgs = [env.render(mode="rgb_array") for env in base_env.envs]
69
- bigimg = tile_images(imgs)
70
- if mode == "humnan":
71
- import cv2
72
-
73
- cv2.imshow("vecenv", bigimg[:, :, ::-1])
74
- cv2.waitKey(1)
75
- elif mode == "rgb_array":
76
- return bigimg
77
- else:
78
- raise NotImplemented(f"Render mode {mode} is not supported")
79
- else:
80
- return self.env.render(mode=mode)
 
1
  import numpy as np
2
 
 
3
  from gym.wrappers.monitoring.video_recorder import VideoRecorder
 
 
4
 
5
  from wrappers.vectorable_wrapper import (
6
  VecotarableWrapper,
 
42
  self._close_video_recorder()
43
 
44
  self.video_recorder = VideoRecorder(
45
+ self.env,
46
  base_path=self.base_path,
47
  )
48
 
 
53
  if self.video_recorder:
54
  self.video_recorder.close()
55
  self.video_recorder = None