sgoodfriend
commited on
Commit
•
9eed99e
1
Parent(s):
1791f57
A2C playing MountainCar-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/0760ef7d52b17f30219a27c18ba52c8895025ae3
Browse files- README.md +2 -2
- replay.meta.json +1 -1
- runner/env.py +3 -0
- shared/callbacks/eval_callback.py +2 -2
- wrappers/sync_vector_env_render_compat.py +31 -0
- wrappers/vec_episode_recorder.py +1 -26
README.md
CHANGED
@@ -23,7 +23,7 @@ model-index:
|
|
23 |
|
24 |
This is a trained model of a **A2C** agent playing **MountainCar-v0** using the [/sgoodfriend/rl-algo-impls](https://github.com/sgoodfriend/rl-algo-impls) repo.
|
25 |
|
26 |
-
All models trained at this commit can be found at
|
27 |
|
28 |
## Training Results
|
29 |
|
@@ -83,7 +83,7 @@ notebook.
|
|
83 |
|
84 |
|
85 |
## Benchmarking (with Lambda Labs instance)
|
86 |
-
This and other models from
|
87 |
Labs instance. In a Lambda Labs instance terminal:
|
88 |
```
|
89 |
git clone git@github.com:sgoodfriend/rl-algo-impls.git
|
|
|
23 |
|
24 |
This is a trained model of a **A2C** agent playing **MountainCar-v0** using the [/sgoodfriend/rl-algo-impls](https://github.com/sgoodfriend/rl-algo-impls) repo.
|
25 |
|
26 |
+
All models trained at this commit can be found at https://api.wandb.ai/links/sgoodfriend/eyvb72mv.
|
27 |
|
28 |
## Training Results
|
29 |
|
|
|
83 |
|
84 |
|
85 |
## Benchmarking (with Lambda Labs instance)
|
86 |
+
This and other models from https://api.wandb.ai/links/sgoodfriend/eyvb72mv were generated by running a script on a Lambda
|
87 |
Labs instance. In a Lambda Labs instance terminal:
|
88 |
```
|
89 |
git clone git@github.com:sgoodfriend/rl-algo-impls.git
|
replay.meta.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "1200x800", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/
|
|
|
1 |
+
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "1200x800", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmprlq4z8zm/a2c-MountainCar-v0/replay.mp4"]}, "episode": {"r": -118.0, "l": 118, "t": 1.678512}}
|
runner/env.py
CHANGED
@@ -26,6 +26,7 @@ from wrappers.initial_step_truncate_wrapper import InitialStepTruncateWrapper
|
|
26 |
from wrappers.is_vector_env import IsVectorEnv
|
27 |
from wrappers.noop_env_seed import NoopEnvSeed
|
28 |
from wrappers.normalize import NormalizeObservation, NormalizeReward
|
|
|
29 |
from wrappers.transpose_image_observation import TransposeImageObservation
|
30 |
from wrappers.vectorable_wrapper import VecEnv
|
31 |
from wrappers.video_compat_wrapper import VideoCompatWrapper
|
@@ -180,6 +181,8 @@ def _make_vec_env(
|
|
180 |
else:
|
181 |
raise ValueError(f"env_type {env_type} unsupported")
|
182 |
envs = VecEnvClass([make(i) for i in range(n_envs)])
|
|
|
|
|
183 |
if training:
|
184 |
assert tb_writer
|
185 |
envs = EpisodeStatsWriter(
|
|
|
26 |
from wrappers.is_vector_env import IsVectorEnv
|
27 |
from wrappers.noop_env_seed import NoopEnvSeed
|
28 |
from wrappers.normalize import NormalizeObservation, NormalizeReward
|
29 |
+
from wrappers.sync_vector_env_render_compat import SyncVectorEnvRenderCompat
|
30 |
from wrappers.transpose_image_observation import TransposeImageObservation
|
31 |
from wrappers.vectorable_wrapper import VecEnv
|
32 |
from wrappers.video_compat_wrapper import VideoCompatWrapper
|
|
|
181 |
else:
|
182 |
raise ValueError(f"env_type {env_type} unsupported")
|
183 |
envs = VecEnvClass([make(i) for i in range(n_envs)])
|
184 |
+
if env_type == "gymvec" and vec_env_class == "sync":
|
185 |
+
envs = SyncVectorEnvRenderCompat(envs)
|
186 |
if training:
|
187 |
assert tb_writer
|
188 |
envs = EpisodeStatsWriter(
|
shared/callbacks/eval_callback.py
CHANGED
@@ -75,7 +75,9 @@ def evaluate(
|
|
75 |
print_returns: bool = True,
|
76 |
ignore_first_episode: bool = False,
|
77 |
) -> EpisodesStats:
|
|
|
78 |
policy.eval()
|
|
|
79 |
episodes = EvaluateAccumulator(
|
80 |
env.num_envs, n_episodes, print_returns, ignore_first_episode
|
81 |
)
|
@@ -137,7 +139,6 @@ class EvalCallback(Callback):
|
|
137 |
def on_step(self, timesteps_elapsed: int = 1) -> bool:
|
138 |
super().on_step(timesteps_elapsed)
|
139 |
if self.timesteps_elapsed // self.step_freq >= len(self.stats):
|
140 |
-
self.policy.sync_normalization(self.env)
|
141 |
self.evaluate()
|
142 |
return True
|
143 |
|
@@ -176,7 +177,6 @@ class EvalCallback(Callback):
|
|
176 |
)
|
177 |
if strictly_better and self.record_best_videos:
|
178 |
assert self.video_env and self.best_video_dir
|
179 |
-
self.policy.sync_normalization(self.video_env)
|
180 |
self.best_video_base_path = os.path.join(
|
181 |
self.best_video_dir, str(self.timesteps_elapsed)
|
182 |
)
|
|
|
75 |
print_returns: bool = True,
|
76 |
ignore_first_episode: bool = False,
|
77 |
) -> EpisodesStats:
|
78 |
+
policy.sync_normalization(env)
|
79 |
policy.eval()
|
80 |
+
|
81 |
episodes = EvaluateAccumulator(
|
82 |
env.num_envs, n_episodes, print_returns, ignore_first_episode
|
83 |
)
|
|
|
139 |
def on_step(self, timesteps_elapsed: int = 1) -> bool:
|
140 |
super().on_step(timesteps_elapsed)
|
141 |
if self.timesteps_elapsed // self.step_freq >= len(self.stats):
|
|
|
142 |
self.evaluate()
|
143 |
return True
|
144 |
|
|
|
177 |
)
|
178 |
if strictly_better and self.record_best_videos:
|
179 |
assert self.video_env and self.best_video_dir
|
|
|
180 |
self.best_video_base_path = os.path.join(
|
181 |
self.best_video_dir, str(self.timesteps_elapsed)
|
182 |
)
|
wrappers/sync_vector_env_render_compat.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
from gym.vector.sync_vector_env import SyncVectorEnv
|
4 |
+
from stable_baselines3.common.vec_env.base_vec_env import tile_images
|
5 |
+
from typing import Optional
|
6 |
+
|
7 |
+
from wrappers.vectorable_wrapper import (
|
8 |
+
VecotarableWrapper,
|
9 |
+
)
|
10 |
+
|
11 |
+
|
12 |
+
class SyncVectorEnvRenderCompat(VecotarableWrapper):
|
13 |
+
def __init__(self, env) -> None:
|
14 |
+
super().__init__(env)
|
15 |
+
|
16 |
+
def render(self, mode: str = "human") -> Optional[np.ndarray]:
|
17 |
+
base_env = self.env.unwrapped
|
18 |
+
if isinstance(base_env, SyncVectorEnv):
|
19 |
+
imgs = [env.render(mode="rgb_array") for env in base_env.envs]
|
20 |
+
bigimg = tile_images(imgs)
|
21 |
+
if mode == "human":
|
22 |
+
import cv2
|
23 |
+
|
24 |
+
cv2.imshow("vecenv", bigimg[:, :, ::-1])
|
25 |
+
cv2.waitKey(1)
|
26 |
+
elif mode == "rgb_array":
|
27 |
+
return bigimg
|
28 |
+
else:
|
29 |
+
raise NotImplemented(f"Render mode {mode} is not supported")
|
30 |
+
else:
|
31 |
+
return self.env.render(mode=mode)
|
wrappers/vec_episode_recorder.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
|
3 |
-
from gym.vector.sync_vector_env import SyncVectorEnv
|
4 |
from gym.wrappers.monitoring.video_recorder import VideoRecorder
|
5 |
-
from stable_baselines3.common.vec_env.base_vec_env import tile_images
|
6 |
-
from typing import Optional
|
7 |
|
8 |
from wrappers.vectorable_wrapper import (
|
9 |
VecotarableWrapper,
|
@@ -45,7 +42,7 @@ class VecEpisodeRecorder(VecotarableWrapper):
|
|
45 |
self._close_video_recorder()
|
46 |
|
47 |
self.video_recorder = VideoRecorder(
|
48 |
-
|
49 |
base_path=self.base_path,
|
50 |
)
|
51 |
|
@@ -56,25 +53,3 @@ class VecEpisodeRecorder(VecotarableWrapper):
|
|
56 |
if self.video_recorder:
|
57 |
self.video_recorder.close()
|
58 |
self.video_recorder = None
|
59 |
-
|
60 |
-
|
61 |
-
class SyncVectorEnvRenderCompat(VecotarableWrapper):
|
62 |
-
def __init__(self, env) -> None:
|
63 |
-
super().__init__(env)
|
64 |
-
|
65 |
-
def render(self, mode: str = "human") -> Optional[np.ndarray]:
|
66 |
-
base_env = self.env.unwrapped
|
67 |
-
if isinstance(base_env, SyncVectorEnv):
|
68 |
-
imgs = [env.render(mode="rgb_array") for env in base_env.envs]
|
69 |
-
bigimg = tile_images(imgs)
|
70 |
-
if mode == "humnan":
|
71 |
-
import cv2
|
72 |
-
|
73 |
-
cv2.imshow("vecenv", bigimg[:, :, ::-1])
|
74 |
-
cv2.waitKey(1)
|
75 |
-
elif mode == "rgb_array":
|
76 |
-
return bigimg
|
77 |
-
else:
|
78 |
-
raise NotImplemented(f"Render mode {mode} is not supported")
|
79 |
-
else:
|
80 |
-
return self.env.render(mode=mode)
|
|
|
1 |
import numpy as np
|
2 |
|
|
|
3 |
from gym.wrappers.monitoring.video_recorder import VideoRecorder
|
|
|
|
|
4 |
|
5 |
from wrappers.vectorable_wrapper import (
|
6 |
VecotarableWrapper,
|
|
|
42 |
self._close_video_recorder()
|
43 |
|
44 |
self.video_recorder = VideoRecorder(
|
45 |
+
self.env,
|
46 |
base_path=self.base_path,
|
47 |
)
|
48 |
|
|
|
53 |
if self.video_recorder:
|
54 |
self.video_recorder.close()
|
55 |
self.video_recorder = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|