andrewzhang505 commited on
Commit
432866b
1 Parent(s): 48a62db

Delete git.diff

Browse files
Files changed (1) hide show
  1. git.diff +0 -328
git.diff DELETED
@@ -1,328 +0,0 @@
1
- diff --git a/sample_factory/algo/learning/learner.py b/sample_factory/algo/learning/learner.py
2
- index 178d2ab..20bb937 100644
3
- --- a/sample_factory/algo/learning/learner.py
4
- +++ b/sample_factory/algo/learning/learner.py
5
- @@ -110,6 +110,20 @@ class KlAdaptiveSchedulerPerEpoch(KlAdaptiveScheduler):
6
- def invoke_after_each_epoch(self):
7
- return True
8
-
9
- +class LinearDecayScheduler(LearningRateScheduler):
10
- + def __init__(self, cfg):
11
- + num_updates = cfg.train_for_env_steps // cfg.batch_size * cfg.num_epochs
12
- + self.linear_decay = LinearDecay([(0, cfg.learning_rate), (num_updates, 0)])
13
- + self.step = 0
14
- +
15
- + def invoke_after_each_minibatch(self):
16
- + return True
17
- +
18
- + def update(self, current_lr, recent_kls):
19
- + self.step += 1
20
- + lr = self.linear_decay.at(self.step)
21
- + return lr
22
- +
23
-
24
- def get_lr_scheduler(cfg) -> LearningRateScheduler:
25
- if cfg.lr_schedule == "constant":
26
- @@ -118,6 +132,8 @@ def get_lr_scheduler(cfg) -> LearningRateScheduler:
27
- return KlAdaptiveSchedulerPerMinibatch(cfg)
28
- elif cfg.lr_schedule == "kl_adaptive_epoch":
29
- return KlAdaptiveSchedulerPerEpoch(cfg)
30
- + elif cfg.lr_schedule == "linear_decay":
31
- + return LinearDecayScheduler(cfg)
32
- else:
33
- raise RuntimeError(f"Unknown scheduler {cfg.lr_schedule}")
34
-
35
- diff --git a/sample_factory/envs/mujoco/mujoco_params.py b/sample_factory/envs/mujoco/mujoco_params.py
36
- index ef0b486..cb4b977 100644
37
- --- a/sample_factory/envs/mujoco/mujoco_params.py
38
- +++ b/sample_factory/envs/mujoco/mujoco_params.py
39
- @@ -1,117 +1,155 @@
40
- +# def mujoco_override_defaults(env, parser):
41
- +# parser.set_defaults(
42
- +# batched_sampling=False,
43
- +# num_workers=8,
44
- +# num_envs_per_worker=16,
45
- +# worker_num_splits=2,
46
- +# train_for_env_steps=1000000,
47
- +# encoder_type="mlp",
48
- +# encoder_subtype="mlp_mujoco",
49
- +# hidden_size=64,
50
- +# encoder_extra_fc_layers=0,
51
- +# env_frameskip=1,
52
- +# nonlinearity="tanh",
53
- +# batch_size=64,
54
- +# kl_loss_coeff=0.1,
55
- +# use_rnn=False,
56
- +# adaptive_stddev=False,
57
- +# policy_initialization="torch_default",
58
- +# reward_scale=0.01,
59
- +# rollout=8,
60
- +# max_grad_norm=0.0,
61
- +# ppo_epochs=10,
62
- +# num_batches_per_epoch=32,
63
- +# ppo_clip_ratio=0.2,
64
- +# value_loss_coeff=2.0,
65
- +# exploration_loss_coeff=0.0,
66
- +# learning_rate=3e-3,
67
- +# lr_schedule="constant",
68
- +# shuffle_minibatches=True,
69
- +# gamma=0.99,
70
- +# gae_lambda=0.95,
71
- +# with_vtrace=False,
72
- +# recurrence=1,
73
- +# value_bootstrap=False,
74
- +# normalize_input=True,
75
- +# experiment_summaries_interval=3,
76
- +# save_every_sec=15,
77
- +# serial_mode=False,
78
- +# async_rl=False,
79
- +# )
80
- +
81
- +# # environment specific overrides
82
- +# env_name = "_".join(env.split("_")[1:]).lower()
83
- +
84
- +# if env_name == "halfcheetah":
85
- +# parser.set_defaults(
86
- +# reward_scale=0.1,
87
- +# learning_rate=3e-3,
88
- +# lr_schedule="kl_adaptive_epoch",
89
- +# lr_schedule_kl_threshold=3e-2,
90
- +# normalize_input=False,
91
- +# num_batches_per_epoch=1,
92
- +# )
93
- +# if env_name == "humanoid":
94
- +# parser.set_defaults(
95
- +# learning_rate=3e-4,
96
- +# )
97
- +# if env_name == "hopper":
98
- +# parser.set_defaults(
99
- +# reward_scale=0.1,
100
- +# learning_rate=3e-3,
101
- +# lr_schedule="kl_adaptive_epoch",
102
- +# lr_schedule_kl_threshold=3e-2,
103
- +# # normalize_input=False,
104
- +# # num_batches_per_epoch=1,
105
- +# # normalize_returns=True,
106
- +# # hidden_size=128,
107
- +# )
108
- +# if env_name == "doublependulum":
109
- +# parser.set_defaults(
110
- +# reward_scale=0.01,
111
- +# learning_rate=3e-3,
112
- +# lr_schedule="kl_adaptive_epoch",
113
- +# lr_schedule_kl_threshold=3e-2,
114
- +# )
115
- +# if env_name == "pendulum":
116
- +# parser.set_defaults(
117
- +# # reward_scale=0.01,
118
- +# learning_rate=3e-4,
119
- +# lr_schedule="kl_adaptive_epoch",
120
- +# lr_schedule_kl_threshold=3e-3,
121
- +# )
122
- +# if env_name == "reacher":
123
- +# parser.set_defaults(
124
- +# reward_scale=0.1,
125
- +# learning_rate=3e-3,
126
- +# lr_schedule="kl_adaptive_epoch",
127
- +# lr_schedule_kl_threshold=3e-2,
128
- +# normalize_input=False,
129
- +# num_batches_per_epoch=1,
130
- +# )
131
- +# if env_name == "swimmer":
132
- +# parser.set_defaults(
133
- +# reward_scale=1,
134
- +# # learning_rate=3e-3,
135
- +# # lr_schedule="kl_adaptive_epoch",
136
- +# # lr_schedule_kl_threshold=3e-2,
137
- +# # gamma=0.9995,
138
- +# rollout=128,
139
- +# batch_size=128,
140
- +# )
141
- +# if env_name == "walker":
142
- +# parser.set_defaults(
143
- +# reward_scale=0.1,
144
- +# learning_rate=3e-3,
145
- +# lr_schedule="kl_adaptive_epoch",
146
- +# lr_schedule_kl_threshold=3e-2,
147
- +# )
148
- +
149
- def mujoco_override_defaults(env, parser):
150
- parser.set_defaults(
151
- batched_sampling=False,
152
- num_workers=8,
153
- - num_envs_per_worker=16,
154
- + num_envs_per_worker=8,
155
- worker_num_splits=2,
156
- - train_for_env_steps=1000000,
157
- + train_for_env_steps=10000000,
158
- encoder_type="mlp",
159
- encoder_subtype="mlp_mujoco",
160
- hidden_size=64,
161
- encoder_extra_fc_layers=0,
162
- env_frameskip=1,
163
- nonlinearity="tanh",
164
- - batch_size=64,
165
- + batch_size=1024,
166
- kl_loss_coeff=0.1,
167
- -
168
- use_rnn=False,
169
- adaptive_stddev=False,
170
- policy_initialization="torch_default",
171
- - reward_scale=0.01,
172
- - rollout=8,
173
- - max_grad_norm=0.0,
174
- - ppo_epochs=10,
175
- - num_batches_per_epoch=32,
176
- + reward_scale=1,
177
- + rollout=64,
178
- + max_grad_norm=3.5,
179
- + num_epochs=2,
180
- + num_batches_per_epoch=4,
181
- ppo_clip_ratio=0.2,
182
- - value_loss_coeff=2.0,
183
- + value_loss_coeff=1.3,
184
- exploration_loss_coeff=0.0,
185
- - learning_rate=3e-3,
186
- - lr_schedule="constant",
187
- - shuffle_minibatches=True,
188
- + learning_rate=0.00295,
189
- + lr_schedule="linear_decay",
190
- + shuffle_minibatches=False,
191
- gamma=0.99,
192
- gae_lambda=0.95,
193
- with_vtrace=False,
194
- recurrence=1,
195
- value_bootstrap=False,
196
- normalize_input=True,
197
- + normalize_returns=True,
198
- experiment_summaries_interval=3,
199
- save_every_sec=15,
200
- -
201
- serial_mode=False,
202
- async_rl=False,
203
- )
204
-
205
- - # environment specific overrides
206
- - env_name = "_".join(env.split("_")[1:]).lower()
207
- -
208
- - if env_name == "halfcheetah":
209
- - parser.set_defaults(
210
- - reward_scale=0.1,
211
- - learning_rate=3e-3,
212
- - lr_schedule="kl_adaptive_epoch",
213
- - lr_schedule_kl_threshold=3e-2,
214
- - normalize_input=False,
215
- - num_batches_per_epoch=1,
216
- - )
217
- - if env_name == "humanoid":
218
- - parser.set_defaults(
219
- - learning_rate=3e-4,
220
- - )
221
- - if env_name == "hopper":
222
- - parser.set_defaults(
223
- - reward_scale=0.1,
224
- - learning_rate=3e-3,
225
- - lr_schedule="kl_adaptive_epoch",
226
- - lr_schedule_kl_threshold=3e-2,
227
- - # normalize_input=False,
228
- - # num_batches_per_epoch=1,
229
- - # normalize_returns=True,
230
- - # hidden_size=128,
231
- - )
232
- - if env_name == "doublependulum":
233
- - parser.set_defaults(
234
- - reward_scale=0.01,
235
- - learning_rate=3e-3,
236
- - lr_schedule="kl_adaptive_epoch",
237
- - lr_schedule_kl_threshold=3e-2,
238
- - )
239
- - if env_name == "pendulum":
240
- - parser.set_defaults(
241
- - # reward_scale=0.01,
242
- - learning_rate=3e-4,
243
- - lr_schedule="kl_adaptive_epoch",
244
- - lr_schedule_kl_threshold=3e-3,
245
- - )
246
- - if env_name == "reacher":
247
- - parser.set_defaults(
248
- - reward_scale=0.1,
249
- - learning_rate=3e-3,
250
- - lr_schedule="kl_adaptive_epoch",
251
- - lr_schedule_kl_threshold=3e-2,
252
- - normalize_input=False,
253
- - num_batches_per_epoch=1,
254
- - )
255
- - if env_name == "swimmer":
256
- - parser.set_defaults(
257
- - reward_scale=1,
258
- - learning_rate=3e-4,
259
- - lr_schedule="kl_adaptive_epoch",
260
- - lr_schedule_kl_threshold=3e-3,
261
- - # normalize_input=False,
262
- - # num_batches_per_epoch=1,
263
- - normalize_returns=True,
264
- - hidden_size=128,
265
- - )
266
- - if env_name == "walker":
267
- - parser.set_defaults(
268
- - reward_scale=0.1,
269
- - learning_rate=3e-3,
270
- - lr_schedule="kl_adaptive_epoch",
271
- - lr_schedule_kl_threshold=3e-2,
272
- - # normalize_returns=True,
273
- - # normalize_input=False,
274
- - # num_batches_per_epoch=1,
275
- - )
276
- +
277
-
278
-
279
- # noinspection PyUnusedLocal
280
- diff --git a/sample_factory/model/model_utils.py b/sample_factory/model/model_utils.py
281
- index df6c82c..d8226d8 100644
282
- --- a/sample_factory/model/model_utils.py
283
- +++ b/sample_factory/model/model_utils.py
284
- @@ -276,7 +276,7 @@ class MlpEncoder(EncoderBase):
285
- self.init_fc_blocks(fc_encoder_layer)
286
-
287
- def forward(self, obs_dict):
288
- - x = self.mlp_head(obs_dict['obs'].float())
289
- + x = self.mlp_head(obs_dict["obs"].float())
290
- x = self.forward_fc_blocks(x)
291
- return x
292
-
293
- diff --git a/sample_factory/runner/runs/mujoco_all_envs.py b/sample_factory/runner/runs/mujoco_all_envs.py
294
- index 3ac67ce..5cbaa1a 100644
295
- --- a/sample_factory/runner/runs/mujoco_all_envs.py
296
- +++ b/sample_factory/runner/runs/mujoco_all_envs.py
297
- @@ -8,12 +8,12 @@ _params = ParamGrid(
298
- [
299
- "mujoco_ant",
300
- "mujoco_halfcheetah",
301
- - "mujoco_hopper",
302
- + # "mujoco_hopper",
303
- "mujoco_humanoid",
304
- - "mujoco_doublependulum",
305
- - "mujoco_pendulum",
306
- - "mujoco_reacher",
307
- - "mujoco_swimmer",
308
- + # "mujoco_doublependulum",
309
- + # "mujoco_pendulum",
310
- + # "mujoco_reacher",
311
- + # "mujoco_swimmer",
312
- "mujoco_walker",
313
- ],
314
- ),
315
- @@ -23,11 +23,11 @@ _params = ParamGrid(
316
- _experiments = [
317
- Experiment(
318
- "mujoco_all_envs",
319
- - "python -m sample_factory_examples.mujoco_examples.train_mujoco --algo=APPO --with_wandb=True --wandb_tags mujoco runner_4",
320
- + "python -m sample_factory_examples.mujoco_examples.train_mujoco --algo=APPO --with_wandb=True --wandb_tags mujoco runner_crl_4",
321
- _params.generate_params(randomize=False),
322
- ),
323
- ]
324
-
325
-
326
- RUN_DESCRIPTION = RunDescription("mujoco_all_envs", experiments=_experiments)
327
- -# python -m sample_factory.runner.run --run=mujoco_all_envs --runner=processes --max_parallel=8 --pause_between=1 --experiments_per_gpu=10000 --num_gpus=1 --experiment_suffix=4
328
- +# python -m sample_factory.runner.run --run=mujoco_all_envs --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=32 --num_gpus=1 --experiment_suffix=crl_3