nsanghi commited on
Commit
fbdb7ad
1 Parent(s): 4b91afe

pushing model

Browse files
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - MountainCarContinuous-v0
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - custom-implementation
7
+ library_name: cleanrl
8
+ model-index:
9
+ - name: DDPG
10
+ results:
11
+ - task:
12
+ type: reinforcement-learning
13
+ name: reinforcement-learning
14
+ dataset:
15
+ name: MountainCarContinuous-v0
16
+ type: MountainCarContinuous-v0
17
+ metrics:
18
+ - type: mean_reward
19
+ value: -1.00 +/- 0.04
20
+ name: mean_reward
21
+ verified: false
22
+ ---
23
+
24
+ # (CleanRL) **DDPG** Agent Playing **MountainCarContinuous-v0**
25
+
26
+ This is a trained model of a DDPG agent playing MountainCarContinuous-v0.
27
+ The model was trained by using [CleanRL](https://github.com/vwxyzjn/cleanrl) and the most up-to-date training code can be
28
+ found [here](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py).
29
+
30
+ ## Get Started
31
+
32
+ To use this model, please install the `cleanrl` package with the following command:
33
+
34
+ ```
35
+ pip install "cleanrl[ddpg_continuous_action]"
36
+ python -m cleanrl_utils.enjoy --exp-name ddpg_continuous_action --env-id MountainCarContinuous-v0
37
+ ```
38
+
39
+ Please refer to the [documentation](https://docs.cleanrl.dev/get-started/zoo/) for more detail.
40
+
41
+
42
+ ## Command to reproduce the training
43
+
44
+ ```bash
45
+ curl -OL https://huggingface.co/nsanghi/MountainCarContinuous-v0-ddpg_continuous_action-seed1/raw/main/ddpg_continuous_action.py
46
+ curl -OL https://huggingface.co/nsanghi/MountainCarContinuous-v0-ddpg_continuous_action-seed1/raw/main/pyproject.toml
47
+ curl -OL https://huggingface.co/nsanghi/MountainCarContinuous-v0-ddpg_continuous_action-seed1/raw/main/poetry.lock
48
+ poetry install --all-extras
49
+ python ddpg_continuous_action.py --no-cuda --total-timesteps 25000 --learning-starts 5000 --env-id MountainCarContinuous-v0 --track --hf-entity nsanghi --capture-video --save-model --upload-model
50
+ ```
51
+
52
+ # Hyperparameters
53
+ ```python
54
+ {'batch_size': 256,
55
+ 'buffer_size': 1000000,
56
+ 'capture_video': True,
57
+ 'cuda': False,
58
+ 'env_id': 'MountainCarContinuous-v0',
59
+ 'exp_name': 'ddpg_continuous_action',
60
+ 'exploration_noise': 0.1,
61
+ 'gamma': 0.99,
62
+ 'hf_entity': 'nsanghi',
63
+ 'learning_rate': 0.0003,
64
+ 'learning_starts': 5000,
65
+ 'noise_clip': 0.5,
66
+ 'policy_frequency': 2,
67
+ 'save_model': True,
68
+ 'seed': 1,
69
+ 'tau': 0.005,
70
+ 'torch_deterministic': True,
71
+ 'total_timesteps': 25000,
72
+ 'track': True,
73
+ 'upload_model': True,
74
+ 'wandb_entity': None,
75
+ 'wandb_project_name': 'cleanRL'}
76
+ ```
77
+
ddpg_continuous_action.cleanrl_model ADDED
Binary file (540 kB). View file
 
ddpg_continuous_action.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ddpg/#ddpg_continuous_actionpy
2
+ import os
3
+ import random
4
+ import time
5
+ from dataclasses import dataclass
6
+
7
+ import gymnasium as gym
8
+ import numpy as np
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ import torch.optim as optim
13
+ import tyro
14
+ from stable_baselines3.common.buffers import ReplayBuffer
15
+ from torch.utils.tensorboard import SummaryWriter
16
+
17
+
18
+ @dataclass
19
+ class Args:
20
+ exp_name: str = os.path.basename(__file__)[: -len(".py")]
21
+ """the name of this experiment"""
22
+ seed: int = 1
23
+ """seed of the experiment"""
24
+ torch_deterministic: bool = True
25
+ """if toggled, `torch.backends.cudnn.deterministic=False`"""
26
+ cuda: bool = True
27
+ """if toggled, cuda will be enabled by default"""
28
+ track: bool = False
29
+ """if toggled, this experiment will be tracked with Weights and Biases"""
30
+ wandb_project_name: str = "cleanRL"
31
+ """the wandb's project name"""
32
+ wandb_entity: str = None
33
+ """the entity (team) of wandb's project"""
34
+ capture_video: bool = False
35
+ """whether to capture videos of the agent performances (check out `videos` folder)"""
36
+ save_model: bool = False
37
+ """whether to save model into the `runs/{run_name}` folder"""
38
+ upload_model: bool = False
39
+ """whether to upload the saved model to huggingface"""
40
+ hf_entity: str = ""
41
+ """the user or org name of the model repository from the Hugging Face Hub"""
42
+
43
+ # Algorithm specific arguments
44
+ env_id: str = "Hopper-v4"
45
+ """the environment id of the Atari game"""
46
+ total_timesteps: int = 1000000
47
+ """total timesteps of the experiments"""
48
+ learning_rate: float = 3e-4
49
+ """the learning rate of the optimizer"""
50
+ buffer_size: int = int(1e6)
51
+ """the replay memory buffer size"""
52
+ gamma: float = 0.99
53
+ """the discount factor gamma"""
54
+ tau: float = 0.005
55
+ """target smoothing coefficient (default: 0.005)"""
56
+ batch_size: int = 256
57
+ """the batch size of sample from the reply memory"""
58
+ exploration_noise: float = 0.1
59
+ """the scale of exploration noise"""
60
+ learning_starts: int = 25e3
61
+ """timestep to start learning"""
62
+ policy_frequency: int = 2
63
+ """the frequency of training policy (delayed)"""
64
+ noise_clip: float = 0.5
65
+ """noise clip parameter of the Target Policy Smoothing Regularization"""
66
+
67
+
68
+ def make_env(env_id, seed, idx, capture_video, run_name):
69
+ def thunk():
70
+ if capture_video and idx == 0:
71
+ env = gym.make(env_id, render_mode="rgb_array")
72
+ env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
73
+ else:
74
+ env = gym.make(env_id)
75
+ env = gym.wrappers.RecordEpisodeStatistics(env)
76
+ env.action_space.seed(seed)
77
+ return env
78
+
79
+ return thunk
80
+
81
+
82
+ # ALGO LOGIC: initialize agent here:
83
+ class QNetwork(nn.Module):
84
+ def __init__(self, env):
85
+ super().__init__()
86
+ self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 256)
87
+ self.fc2 = nn.Linear(256, 256)
88
+ self.fc3 = nn.Linear(256, 1)
89
+
90
+ def forward(self, x, a):
91
+ x = torch.cat([x, a], 1)
92
+ x = F.relu(self.fc1(x))
93
+ x = F.relu(self.fc2(x))
94
+ x = self.fc3(x)
95
+ return x
96
+
97
+
98
+ class Actor(nn.Module):
99
+ def __init__(self, env):
100
+ super().__init__()
101
+ self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
102
+ self.fc2 = nn.Linear(256, 256)
103
+ self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
104
+ # action rescaling
105
+ self.register_buffer(
106
+ "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
107
+ )
108
+ self.register_buffer(
109
+ "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
110
+ )
111
+
112
+ def forward(self, x):
113
+ x = F.relu(self.fc1(x))
114
+ x = F.relu(self.fc2(x))
115
+ x = torch.tanh(self.fc_mu(x))
116
+ return x * self.action_scale + self.action_bias
117
+
118
+
119
+ if __name__ == "__main__":
120
+ import stable_baselines3 as sb3
121
+
122
+ if sb3.__version__ < "2.0":
123
+ raise ValueError(
124
+ """Ongoing migration: run the following command to install the new dependencies:
125
+ poetry run pip install "stable_baselines3==2.0.0a1"
126
+ """
127
+ )
128
+ args = tyro.cli(Args)
129
+ run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
130
+ if args.track:
131
+ import wandb
132
+
133
+ wandb.init(
134
+ project=args.wandb_project_name,
135
+ entity=args.wandb_entity,
136
+ sync_tensorboard=True,
137
+ config=vars(args),
138
+ name=run_name,
139
+ monitor_gym=True,
140
+ save_code=True,
141
+ )
142
+ writer = SummaryWriter(f"runs/{run_name}")
143
+ writer.add_text(
144
+ "hyperparameters",
145
+ "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
146
+ )
147
+
148
+ # TRY NOT TO MODIFY: seeding
149
+ random.seed(args.seed)
150
+ np.random.seed(args.seed)
151
+ torch.manual_seed(args.seed)
152
+ torch.backends.cudnn.deterministic = args.torch_deterministic
153
+
154
+ device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
155
+
156
+ # env setup
157
+ envs = gym.vector.SyncVectorEnv([make_env(args.env_id, args.seed, 0, args.capture_video, run_name)])
158
+ assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
159
+
160
+ actor = Actor(envs).to(device)
161
+ qf1 = QNetwork(envs).to(device)
162
+ qf1_target = QNetwork(envs).to(device)
163
+ target_actor = Actor(envs).to(device)
164
+ target_actor.load_state_dict(actor.state_dict())
165
+ qf1_target.load_state_dict(qf1.state_dict())
166
+ q_optimizer = optim.Adam(list(qf1.parameters()), lr=args.learning_rate)
167
+ actor_optimizer = optim.Adam(list(actor.parameters()), lr=args.learning_rate)
168
+
169
+ envs.single_observation_space.dtype = np.float32
170
+ rb = ReplayBuffer(
171
+ args.buffer_size,
172
+ envs.single_observation_space,
173
+ envs.single_action_space,
174
+ device,
175
+ handle_timeout_termination=False,
176
+ )
177
+ start_time = time.time()
178
+
179
+ # TRY NOT TO MODIFY: start the game
180
+ obs, _ = envs.reset(seed=args.seed)
181
+ for global_step in range(args.total_timesteps):
182
+ # ALGO LOGIC: put action logic here
183
+ if global_step < args.learning_starts:
184
+ actions = np.array([envs.single_action_space.sample() for _ in range(envs.num_envs)])
185
+ else:
186
+ with torch.no_grad():
187
+ actions = actor(torch.Tensor(obs).to(device))
188
+ actions += torch.normal(0, actor.action_scale * args.exploration_noise)
189
+ actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high)
190
+
191
+ # TRY NOT TO MODIFY: execute the game and log data.
192
+ next_obs, rewards, terminations, truncations, infos = envs.step(actions)
193
+
194
+ # TRY NOT TO MODIFY: record rewards for plotting purposes
195
+ if "final_info" in infos:
196
+ for info in infos["final_info"]:
197
+ print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
198
+ writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
199
+ writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
200
+ break
201
+
202
+ # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
203
+ real_next_obs = next_obs.copy()
204
+ for idx, trunc in enumerate(truncations):
205
+ if trunc:
206
+ real_next_obs[idx] = infos["final_observation"][idx]
207
+ rb.add(obs, real_next_obs, actions, rewards, terminations, infos)
208
+
209
+ # TRY NOT TO MODIFY: CRUCIAL step easy to overlook
210
+ obs = next_obs
211
+
212
+ # ALGO LOGIC: training.
213
+ if global_step > args.learning_starts:
214
+ data = rb.sample(args.batch_size)
215
+ with torch.no_grad():
216
+ next_state_actions = target_actor(data.next_observations)
217
+ qf1_next_target = qf1_target(data.next_observations, next_state_actions)
218
+ next_q_value = data.rewards.flatten() + (1 - data.dones.flatten()) * args.gamma * (qf1_next_target).view(-1)
219
+
220
+ qf1_a_values = qf1(data.observations, data.actions).view(-1)
221
+ qf1_loss = F.mse_loss(qf1_a_values, next_q_value)
222
+
223
+ # optimize the model
224
+ q_optimizer.zero_grad()
225
+ qf1_loss.backward()
226
+ q_optimizer.step()
227
+
228
+ if global_step % args.policy_frequency == 0:
229
+ actor_loss = -qf1(data.observations, actor(data.observations)).mean()
230
+ actor_optimizer.zero_grad()
231
+ actor_loss.backward()
232
+ actor_optimizer.step()
233
+
234
+ # update the target network
235
+ for param, target_param in zip(actor.parameters(), target_actor.parameters()):
236
+ target_param.data.copy_(args.tau * param.data + (1 - args.tau) * target_param.data)
237
+ for param, target_param in zip(qf1.parameters(), qf1_target.parameters()):
238
+ target_param.data.copy_(args.tau * param.data + (1 - args.tau) * target_param.data)
239
+
240
+ if global_step % 100 == 0:
241
+ writer.add_scalar("losses/qf1_values", qf1_a_values.mean().item(), global_step)
242
+ writer.add_scalar("losses/qf1_loss", qf1_loss.item(), global_step)
243
+ writer.add_scalar("losses/actor_loss", actor_loss.item(), global_step)
244
+ print("SPS:", int(global_step / (time.time() - start_time)))
245
+ writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
246
+
247
+ if args.save_model:
248
+ model_path = f"runs/{run_name}/{args.exp_name}.cleanrl_model"
249
+ torch.save((actor.state_dict(), qf1.state_dict()), model_path)
250
+ print(f"model saved to {model_path}")
251
+ from cleanrl_utils.evals.ddpg_eval import evaluate
252
+
253
+ episodic_returns = evaluate(
254
+ model_path,
255
+ make_env,
256
+ args.env_id,
257
+ eval_episodes=10,
258
+ run_name=f"{run_name}-eval",
259
+ Model=(Actor, QNetwork),
260
+ device=device,
261
+ exploration_noise=args.exploration_noise,
262
+ )
263
+ for idx, episodic_return in enumerate(episodic_returns):
264
+ writer.add_scalar("eval/episodic_return", episodic_return, idx)
265
+
266
+ if args.upload_model:
267
+ from cleanrl_utils.huggingface import push_to_hub
268
+
269
+ repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
270
+ repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
271
+ push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")
272
+
273
+ envs.close()
274
+ writer.close()
events.out.tfevents.1705171834.nimish-lenovo.11451.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da411b6caf94faed2960fadad1113699638beb2be5bc6fc244941100eb2ffc0c
3
+ size 47373
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "cleanrl"
3
+ version = "2.0.0b1"
4
+ description = "High-quality single file implementation of Deep Reinforcement Learning algorithms with research-friendly features"
5
+ authors = ["Costa Huang <costa.huang@outlook.com>"]
6
+ packages = [
7
+ { include = "cleanrl" },
8
+ { include = "cleanrl_utils" },
9
+ ]
10
+ keywords = ["reinforcement", "machine", "learning", "research"]
11
+ license="MIT"
12
+ readme = "README.md"
13
+
14
+ [tool.poetry.dependencies]
15
+ python = ">=3.8,<3.11"
16
+ tensorboard = "^2.10.0"
17
+ wandb = "^0.13.11"
18
+ gym = "0.23.1"
19
+ torch = ">=1.12.1"
20
+ stable-baselines3 = "2.0.0"
21
+ gymnasium = ">=0.28.1"
22
+ moviepy = "^1.0.3"
23
+ pygame = "2.1.0"
24
+ huggingface-hub = "^0.11.1"
25
+ rich = "<12.0"
26
+ tenacity = "^8.2.2"
27
+ tyro = "^0.5.10"
28
+ pyyaml = "^6.0.1"
29
+
30
+ ale-py = {version = "0.8.1", optional = true}
31
+ AutoROM = {extras = ["accept-rom-license"], version = "~0.4.2", optional = true}
32
+ opencv-python = {version = "^4.6.0.66", optional = true}
33
+ procgen = {version = "^0.10.7", optional = true}
34
+ pytest = {version = "^7.1.3", optional = true}
35
+ mujoco = {version = "<=2.3.3", optional = true}
36
+ imageio = {version = "^2.14.1", optional = true}
37
+ mkdocs-material = {version = "^8.4.3", optional = true}
38
+ markdown-include = {version = "^0.7.0", optional = true}
39
+ openrlbenchmark = {version = "^0.1.1b4", optional = true}
40
+ jax = {version = "0.4.8", optional = true}
41
+ jaxlib = {version = "0.4.7", optional = true}
42
+ flax = {version = "0.6.8", optional = true}
43
+ optuna = {version = "^3.0.1", optional = true}
44
+ optuna-dashboard = {version = "^0.7.2", optional = true}
45
+ envpool = {version = "^0.6.4", optional = true}
46
+ PettingZoo = {version = "1.18.1", optional = true}
47
+ SuperSuit = {version = "3.4.0", optional = true}
48
+ multi-agent-ale-py = {version = "0.1.11", optional = true}
49
+ boto3 = {version = "^1.24.70", optional = true}
50
+ awscli = {version = "^1.31.0", optional = true}
51
+ shimmy = {version = ">=1.1.0", optional = true}
52
+ dm-control = {version = ">=1.0.10", optional = true}
53
+ h5py = {version = ">=3.7.0", optional = true}
54
+ optax = {version = "0.1.4", optional = true}
55
+ chex = {version = "0.1.5", optional = true}
56
+ numpy = ">=1.21.6"
57
+
58
+ [tool.poetry.group.dev.dependencies]
59
+ pre-commit = "^2.20.0"
60
+
61
+ [build-system]
62
+ requires = ["poetry-core"]
63
+ build-backend = "poetry.core.masonry.api"
64
+
65
+ [tool.poetry.extras]
66
+ atari = ["ale-py", "AutoROM", "opencv-python", "shimmy"]
67
+ procgen = ["procgen"]
68
+ plot = ["pandas", "seaborn"]
69
+ pytest = ["pytest"]
70
+ mujoco = ["mujoco", "imageio"]
71
+ jax = ["jax", "jaxlib", "flax"]
72
+ docs = ["mkdocs-material", "markdown-include", "openrlbenchmark"]
73
+ envpool = ["envpool"]
74
+ optuna = ["optuna", "optuna-dashboard"]
75
+ pettingzoo = ["PettingZoo", "SuperSuit", "multi-agent-ale-py"]
76
+ cloud = ["boto3", "awscli"]
77
+ dm_control = ["shimmy", "mujoco", "dm-control", "h5py"]
78
+
79
+ # dependencies for algorithm variant (useful when you want to run a specific algorithm)
80
+ dqn = []
81
+ dqn_atari = ["ale-py", "AutoROM", "opencv-python"]
82
+ dqn_jax = ["jax", "jaxlib", "flax"]
83
+ dqn_atari_jax = [
84
+ "ale-py", "AutoROM", "opencv-python", # atari
85
+ "jax", "jaxlib", "flax" # jax
86
+ ]
87
+ c51 = []
88
+ c51_atari = ["ale-py", "AutoROM", "opencv-python"]
89
+ c51_jax = ["jax", "jaxlib", "flax"]
90
+ c51_atari_jax = [
91
+ "ale-py", "AutoROM", "opencv-python", # atari
92
+ "jax", "jaxlib", "flax" # jax
93
+ ]
94
+ ppo_atari_envpool_xla_jax_scan = [
95
+ "ale-py", "AutoROM", "opencv-python", # atari
96
+ "jax", "jaxlib", "flax", # jax
97
+ "envpool", # envpool
98
+ ]
99
+ qdagger_dqn_atari_impalacnn = [
100
+ "ale-py", "AutoROM", "opencv-python"
101
+ ]
102
+ qdagger_dqn_atari_jax_impalacnn = [
103
+ "ale-py", "AutoROM", "opencv-python", # atari
104
+ "jax", "jaxlib", "flax", # jax
105
+ ]
replay.mp4 ADDED
Binary file (149 kB). View file
 
videos/MountainCarContinuous-v0__ddpg_continuous_action__1__1705171829-eval/rl-video-episode-0.mp4 ADDED
Binary file (116 kB). View file
 
videos/MountainCarContinuous-v0__ddpg_continuous_action__1__1705171829-eval/rl-video-episode-1.mp4 ADDED
Binary file (161 kB). View file
 
videos/MountainCarContinuous-v0__ddpg_continuous_action__1__1705171829-eval/rl-video-episode-8.mp4 ADDED
Binary file (149 kB). View file