sgoodfriend commited on
Commit
9eed99e
1 Parent(s): 1791f57

A2C playing MountainCar-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/0760ef7d52b17f30219a27c18ba52c8895025ae3

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
 
24
  This is a trained model of a **A2C** agent playing **MountainCar-v0** using the [/sgoodfriend/rl-algo-impls](https://github.com/sgoodfriend/rl-algo-impls) repo.
25
 
26
- All models trained at this commit can be found at None.
27
 
28
  ## Training Results
29
 
@@ -83,7 +83,7 @@ notebook.
83
 
84
 
85
  ## Benchmarking (with Lambda Labs instance)
86
- This and other models from None were generated by running a script on a Lambda
87
  Labs instance. In a Lambda Labs instance terminal:
88
  ```
89
  git clone git@github.com:sgoodfriend/rl-algo-impls.git
 
23
 
24
  This is a trained model of a **A2C** agent playing **MountainCar-v0** using the [/sgoodfriend/rl-algo-impls](https://github.com/sgoodfriend/rl-algo-impls) repo.
25
 
26
+ All models trained at this commit can be found at https://api.wandb.ai/links/sgoodfriend/eyvb72mv.
27
 
28
  ## Training Results
29
 
 
83
 
84
 
85
  ## Benchmarking (with Lambda Labs instance)
86
+ This and other models from https://api.wandb.ai/links/sgoodfriend/eyvb72mv were generated by running a script on a Lambda
87
  Labs instance. In a Lambda Labs instance terminal:
88
  ```
89
  git clone git@github.com:sgoodfriend/rl-algo-impls.git
replay.meta.json CHANGED
@@ -1 +1 @@
1
- {"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "1200x800", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmptrol4bye/a2c-MountainCar-v0/replay.mp4"]}, "episode": {"r": -118.0, "l": 118, "t": 42.409465}}
 
1
+ {"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "1200x800", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmprlq4z8zm/a2c-MountainCar-v0/replay.mp4"]}, "episode": {"r": -118.0, "l": 118, "t": 1.678512}}
runner/env.py CHANGED
@@ -26,6 +26,7 @@ from wrappers.initial_step_truncate_wrapper import InitialStepTruncateWrapper
26
  from wrappers.is_vector_env import IsVectorEnv
27
  from wrappers.noop_env_seed import NoopEnvSeed
28
  from wrappers.normalize import NormalizeObservation, NormalizeReward
 
29
  from wrappers.transpose_image_observation import TransposeImageObservation
30
  from wrappers.vectorable_wrapper import VecEnv
31
  from wrappers.video_compat_wrapper import VideoCompatWrapper
@@ -180,6 +181,8 @@ def _make_vec_env(
180
  else:
181
  raise ValueError(f"env_type {env_type} unsupported")
182
  envs = VecEnvClass([make(i) for i in range(n_envs)])
 
 
183
  if training:
184
  assert tb_writer
185
  envs = EpisodeStatsWriter(
 
26
  from wrappers.is_vector_env import IsVectorEnv
27
  from wrappers.noop_env_seed import NoopEnvSeed
28
  from wrappers.normalize import NormalizeObservation, NormalizeReward
29
+ from wrappers.sync_vector_env_render_compat import SyncVectorEnvRenderCompat
30
  from wrappers.transpose_image_observation import TransposeImageObservation
31
  from wrappers.vectorable_wrapper import VecEnv
32
  from wrappers.video_compat_wrapper import VideoCompatWrapper
 
181
  else:
182
  raise ValueError(f"env_type {env_type} unsupported")
183
  envs = VecEnvClass([make(i) for i in range(n_envs)])
184
+ if env_type == "gymvec" and vec_env_class == "sync":
185
+ envs = SyncVectorEnvRenderCompat(envs)
186
  if training:
187
  assert tb_writer
188
  envs = EpisodeStatsWriter(
shared/callbacks/eval_callback.py CHANGED
@@ -75,7 +75,9 @@ def evaluate(
75
  print_returns: bool = True,
76
  ignore_first_episode: bool = False,
77
  ) -> EpisodesStats:
 
78
  policy.eval()
 
79
  episodes = EvaluateAccumulator(
80
  env.num_envs, n_episodes, print_returns, ignore_first_episode
81
  )
@@ -137,7 +139,6 @@ class EvalCallback(Callback):
137
  def on_step(self, timesteps_elapsed: int = 1) -> bool:
138
  super().on_step(timesteps_elapsed)
139
  if self.timesteps_elapsed // self.step_freq >= len(self.stats):
140
- self.policy.sync_normalization(self.env)
141
  self.evaluate()
142
  return True
143
 
@@ -176,7 +177,6 @@ class EvalCallback(Callback):
176
  )
177
  if strictly_better and self.record_best_videos:
178
  assert self.video_env and self.best_video_dir
179
- self.policy.sync_normalization(self.video_env)
180
  self.best_video_base_path = os.path.join(
181
  self.best_video_dir, str(self.timesteps_elapsed)
182
  )
 
75
  print_returns: bool = True,
76
  ignore_first_episode: bool = False,
77
  ) -> EpisodesStats:
78
+ policy.sync_normalization(env)
79
  policy.eval()
80
+
81
  episodes = EvaluateAccumulator(
82
  env.num_envs, n_episodes, print_returns, ignore_first_episode
83
  )
 
139
  def on_step(self, timesteps_elapsed: int = 1) -> bool:
140
  super().on_step(timesteps_elapsed)
141
  if self.timesteps_elapsed // self.step_freq >= len(self.stats):
 
142
  self.evaluate()
143
  return True
144
 
 
177
  )
178
  if strictly_better and self.record_best_videos:
179
  assert self.video_env and self.best_video_dir
 
180
  self.best_video_base_path = os.path.join(
181
  self.best_video_dir, str(self.timesteps_elapsed)
182
  )
wrappers/sync_vector_env_render_compat.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from gym.vector.sync_vector_env import SyncVectorEnv
4
+ from stable_baselines3.common.vec_env.base_vec_env import tile_images
5
+ from typing import Optional
6
+
7
+ from wrappers.vectorable_wrapper import (
8
+ VecotarableWrapper,
9
+ )
10
+
11
+
12
+ class SyncVectorEnvRenderCompat(VecotarableWrapper):
13
+ def __init__(self, env) -> None:
14
+ super().__init__(env)
15
+
16
+ def render(self, mode: str = "human") -> Optional[np.ndarray]:
17
+ base_env = self.env.unwrapped
18
+ if isinstance(base_env, SyncVectorEnv):
19
+ imgs = [env.render(mode="rgb_array") for env in base_env.envs]
20
+ bigimg = tile_images(imgs)
21
+ if mode == "human":
22
+ import cv2
23
+
24
+ cv2.imshow("vecenv", bigimg[:, :, ::-1])
25
+ cv2.waitKey(1)
26
+ elif mode == "rgb_array":
27
+ return bigimg
28
+ else:
29
+ raise NotImplemented(f"Render mode {mode} is not supported")
30
+ else:
31
+ return self.env.render(mode=mode)
wrappers/vec_episode_recorder.py CHANGED
@@ -1,9 +1,6 @@
1
  import numpy as np
2
 
3
- from gym.vector.sync_vector_env import SyncVectorEnv
4
  from gym.wrappers.monitoring.video_recorder import VideoRecorder
5
- from stable_baselines3.common.vec_env.base_vec_env import tile_images
6
- from typing import Optional
7
 
8
  from wrappers.vectorable_wrapper import (
9
  VecotarableWrapper,
@@ -45,7 +42,7 @@ class VecEpisodeRecorder(VecotarableWrapper):
45
  self._close_video_recorder()
46
 
47
  self.video_recorder = VideoRecorder(
48
- SyncVectorEnvRenderCompat(self.env),
49
  base_path=self.base_path,
50
  )
51
 
@@ -56,25 +53,3 @@ class VecEpisodeRecorder(VecotarableWrapper):
56
  if self.video_recorder:
57
  self.video_recorder.close()
58
  self.video_recorder = None
59
-
60
-
61
- class SyncVectorEnvRenderCompat(VecotarableWrapper):
62
- def __init__(self, env) -> None:
63
- super().__init__(env)
64
-
65
- def render(self, mode: str = "human") -> Optional[np.ndarray]:
66
- base_env = self.env.unwrapped
67
- if isinstance(base_env, SyncVectorEnv):
68
- imgs = [env.render(mode="rgb_array") for env in base_env.envs]
69
- bigimg = tile_images(imgs)
70
- if mode == "humnan":
71
- import cv2
72
-
73
- cv2.imshow("vecenv", bigimg[:, :, ::-1])
74
- cv2.waitKey(1)
75
- elif mode == "rgb_array":
76
- return bigimg
77
- else:
78
- raise NotImplemented(f"Render mode {mode} is not supported")
79
- else:
80
- return self.env.render(mode=mode)
 
1
  import numpy as np
2
 
 
3
  from gym.wrappers.monitoring.video_recorder import VideoRecorder
 
 
4
 
5
  from wrappers.vectorable_wrapper import (
6
  VecotarableWrapper,
 
42
  self._close_video_recorder()
43
 
44
  self.video_recorder = VideoRecorder(
45
+ self.env,
46
  base_path=self.base_path,
47
  )
48
 
 
53
  if self.video_recorder:
54
  self.video_recorder.close()
55
  self.video_recorder = None