ALCIAN commited on
Commit
aa693a0
1 Parent(s): 8219146

Update configuration.yaml

Browse files
Files changed (1) hide show
  1. configuration.yaml +299 -85
configuration.yaml CHANGED
@@ -1,88 +1,302 @@
1
- default_settings: null
2
- behaviors:
3
- Pyramids:
4
- trainer_type: ppo
5
- hyperparameters:
6
- batch_size: 128
7
- buffer_size: 2048
8
- learning_rate: 0.0003
9
- beta: 0.01
10
- epsilon: 0.2
11
- lambd: 0.95
12
- num_epoch: 3
13
- learning_rate_schedule: linear
14
- beta_schedule: linear
15
- epsilon_schedule: linear
16
- network_settings:
17
- normalize: false
18
- hidden_units: 512
19
- num_layers: 2
20
- vis_encode_type: simple
21
- memory: null
22
- goal_conditioning_type: hyper
23
- deterministic: false
24
  reward_signals:
25
- extrinsic:
26
- gamma: 0.99
27
- strength: 1.0
28
- network_settings:
29
- normalize: false
30
- hidden_units: 128
31
- num_layers: 2
32
- vis_encode_type: simple
33
- memory: null
34
- goal_conditioning_type: hyper
35
- deterministic: false
36
- rnd:
37
- gamma: 0.99
38
- strength: 0.01
39
- network_settings:
40
- normalize: false
41
- hidden_units: 64
42
- num_layers: 3
43
- vis_encode_type: simple
44
- memory: null
45
- goal_conditioning_type: hyper
46
- deterministic: false
47
- learning_rate: 0.0001
48
- encoding_size: null
49
- init_path: null
50
- keep_checkpoints: 5
51
- checkpoint_interval: 500000
52
- max_steps: 3000000
 
 
 
 
 
 
53
  time_horizon: 128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  summary_freq: 30000
55
- threaded: false
56
- self_play: null
57
- behavioral_cloning: null
58
- env_settings:
59
- env_path: trained-envs-executables/Linux/Pyramids/Pyramids
60
- env_args: null
61
- base_port: 5005
62
- num_envs: 1
63
- num_areas: 1
64
- seed: -1
65
- max_lifetime_restarts: 10
66
- restarts_rate_limit_n: 1
67
- restarts_rate_limit_period_s: 60
68
- engine_settings:
69
- width: 84
70
- height: 84
71
- quality_level: 5
72
- time_scale: 20
73
- target_frame_rate: -1
74
- capture_frame_rate: 60
75
- no_graphics: true
76
- environment_parameters: null
77
- checkpoint_settings:
78
- run_id: First Training
79
- initialize_from: null
80
- load_model: false
81
- resume: true
82
- force: false
83
- train_model: false
84
- inference: false
85
- results_dir: results
86
- torch_settings:
87
- device: cuda:0
88
- debug: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default:
2
+ trainer: ppo
3
+ batch_size: 1024
4
+ beta: 5.0e-3
5
+ buffer_size: 10240
6
+ epsilon: 0.2
7
+ hidden_units: 128
8
+ lambd: 0.95
9
+ learning_rate: 3.0e-4
10
+ learning_rate_schedule: linear
11
+ max_steps: 5.0e5
12
+ memory_size: 256
13
+ normalize: false
14
+ num_epoch: 3
15
+ num_layers: 2
16
+ time_horizon: 64
17
+ sequence_length: 64
18
+ summary_freq: 10000
19
+ use_recurrent: false
20
+ vis_encode_type: simple
 
 
 
21
  reward_signals:
22
+ extrinsic:
23
+ strength: 1.0
24
+ gamma: 0.99
25
+
26
+ FoodCollector:
27
+ normalize: false
28
+ beta: 5.0e-3
29
+ batch_size: 1024
30
+ buffer_size: 10240
31
+ max_steps: 2.0e6
32
+
33
+ Bouncer:
34
+ normalize: true
35
+ max_steps: 7.0e6
36
+ num_layers: 2
37
+ hidden_units: 64
38
+
39
+ PushBlock:
40
+ max_steps: 1.5e7
41
+ batch_size: 128
42
+ buffer_size: 2048
43
+ beta: 1.0e-2
44
+ hidden_units: 256
45
+ summary_freq: 60000
46
+ time_horizon: 64
47
+ num_layers: 2
48
+
49
+ SmallWallJump:
50
+ max_steps: 5e6
51
+ batch_size: 128
52
+ buffer_size: 2048
53
+ beta: 5.0e-3
54
+ hidden_units: 256
55
+ summary_freq: 20000
56
  time_horizon: 128
57
+ num_layers: 2
58
+ normalize: false
59
+
60
+ BigWallJump:
61
+ max_steps: 2e7
62
+ batch_size: 128
63
+ buffer_size: 2048
64
+ beta: 5.0e-3
65
+ hidden_units: 256
66
+ summary_freq: 20000
67
+ time_horizon: 128
68
+ num_layers: 2
69
+ normalize: false
70
+
71
+ Pyramids:
72
+ summary_freq: 30000
73
+ time_horizon: 128
74
+ batch_size: 128
75
+ buffer_size: 2048
76
+ hidden_units: 512
77
+ num_layers: 2
78
+ beta: 1.0e-2
79
+ max_steps: 1.0e7
80
+ num_epoch: 3
81
+ reward_signals:
82
+ extrinsic:
83
+ strength: 1.0
84
+ gamma: 0.99
85
+ curiosity:
86
+ strength: 0.02
87
+ gamma: 0.99
88
+ encoding_size: 256
89
+
90
+ VisualPyramids:
91
+ time_horizon: 128
92
+ batch_size: 64
93
+ buffer_size: 2024
94
+ hidden_units: 256
95
+ num_layers: 1
96
+ beta: 1.0e-2
97
+ max_steps: 1.0e7
98
+ num_epoch: 3
99
+ reward_signals:
100
+ extrinsic:
101
+ strength: 1.0
102
+ gamma: 0.99
103
+ curiosity:
104
+ strength: 0.01
105
+ gamma: 0.99
106
+ encoding_size: 256
107
+
108
+ 3DBall:
109
+ normalize: true
110
+ batch_size: 64
111
+ buffer_size: 12000
112
+ summary_freq: 12000
113
+ time_horizon: 1000
114
+ lambd: 0.99
115
+ beta: 0.001
116
+
117
+ 3DBallHard:
118
+ normalize: true
119
+ batch_size: 1200
120
+ buffer_size: 12000
121
+ summary_freq: 12000
122
+ time_horizon: 1000
123
+ max_steps: 5.0e5
124
+ beta: 0.001
125
+ reward_signals:
126
+ extrinsic:
127
+ strength: 1.0
128
+ gamma: 0.995
129
+
130
+ Tennis:
131
+ normalize: true
132
+ max_steps: 5.0e7
133
+ learning_rate_schedule: constant
134
+ batch_size: 1024
135
+ buffer_size: 10240
136
+ hidden_units: 256
137
+ time_horizon: 1000
138
+ self_play:
139
+ window: 10
140
+ play_against_current_self_ratio: 0.5
141
+ save_steps: 50000
142
+ swap_steps: 50000
143
+
144
+ Soccer:
145
+ normalize: false
146
+ max_steps: 5.0e7
147
+ learning_rate_schedule: constant
148
+ batch_size: 2048
149
+ buffer_size: 20480
150
+ hidden_units: 512
151
+ time_horizon: 1000
152
+ num_layers: 2
153
+ self_play:
154
+ window: 10
155
+ play_against_current_self_ratio: 0.5
156
+ save_steps: 50000
157
+ swap_steps: 50000
158
+
159
+ CrawlerStatic:
160
+ normalize: true
161
+ num_epoch: 3
162
+ time_horizon: 1000
163
+ batch_size: 2024
164
+ buffer_size: 20240
165
+ max_steps: 1e7
166
+ summary_freq: 30000
167
+ num_layers: 3
168
+ hidden_units: 512
169
+ reward_signals:
170
+ extrinsic:
171
+ strength: 1.0
172
+ gamma: 0.995
173
+
174
+ CrawlerDynamic:
175
+ normalize: true
176
+ num_epoch: 3
177
+ time_horizon: 1000
178
+ batch_size: 2024
179
+ buffer_size: 20240
180
+ max_steps: 1e7
181
  summary_freq: 30000
182
+ num_layers: 3
183
+ hidden_units: 512
184
+ reward_signals:
185
+ extrinsic:
186
+ strength: 1.0
187
+ gamma: 0.995
188
+
189
+ Walker:
190
+ normalize: true
191
+ num_epoch: 3
192
+ time_horizon: 1000
193
+ batch_size: 2048
194
+ buffer_size: 20480
195
+ max_steps: 2e7
196
+ summary_freq: 30000
197
+ num_layers: 3
198
+ hidden_units: 512
199
+ reward_signals:
200
+ extrinsic:
201
+ strength: 1.0
202
+ gamma: 0.995
203
+
204
+ Reacher:
205
+ normalize: true
206
+ num_epoch: 3
207
+ time_horizon: 1000
208
+ batch_size: 2024
209
+ buffer_size: 20240
210
+ max_steps: 2e7
211
+ summary_freq: 60000
212
+ reward_signals:
213
+ extrinsic:
214
+ strength: 1.0
215
+ gamma: 0.995
216
+
217
+ Hallway:
218
+ use_recurrent: true
219
+ sequence_length: 64
220
+ num_layers: 2
221
+ hidden_units: 128
222
+ memory_size: 256
223
+ beta: 1.0e-2
224
+ num_epoch: 3
225
+ buffer_size: 1024
226
+ batch_size: 128
227
+ max_steps: 1.0e7
228
+ summary_freq: 10000
229
+ time_horizon: 64
230
+
231
+ VisualHallway:
232
+ use_recurrent: true
233
+ sequence_length: 64
234
+ num_layers: 1
235
+ hidden_units: 128
236
+ memory_size: 256
237
+ beta: 1.0e-2
238
+ num_epoch: 3
239
+ buffer_size: 1024
240
+ batch_size: 64
241
+ max_steps: 1.0e7
242
+ summary_freq: 10000
243
+ time_horizon: 64
244
+
245
+ VisualPushBlock:
246
+ use_recurrent: true
247
+ sequence_length: 32
248
+ num_layers: 1
249
+ hidden_units: 128
250
+ memory_size: 256
251
+ beta: 1.0e-2
252
+ num_epoch: 3
253
+ buffer_size: 1024
254
+ batch_size: 64
255
+ max_steps: 3.0e6
256
+ summary_freq: 60000
257
+ time_horizon: 64
258
+
259
+ GridWorld:
260
+ batch_size: 32
261
+ normalize: false
262
+ num_layers: 1
263
+ hidden_units: 256
264
+ beta: 5.0e-3
265
+ buffer_size: 256
266
+ max_steps: 500000
267
+ summary_freq: 20000
268
+ time_horizon: 5
269
+ reward_signals:
270
+ extrinsic:
271
+ strength: 1.0
272
+ gamma: 0.9
273
+
274
+ Basic:
275
+ batch_size: 32
276
+ normalize: false
277
+ num_layers: 1
278
+ hidden_units: 20
279
+ beta: 5.0e-3
280
+ buffer_size: 256
281
+ max_steps: 5.0e5
282
+ summary_freq: 2000
283
+ time_horizon: 3
284
+ reward_signals:
285
+ extrinsic:
286
+ strength: 1.0
287
+ gamma: 0.9
288
+
289
+ AircraftLearning:
290
+ summary_freq: 32000
291
+ time_horizon: 128
292
+ batch_size: 2048 #512
293
+ buffer_size: 20480 #4096
294
+ hidden_units: 256 #128
295
+ num_layers: 2
296
+ beta: 1.0e-2
297
+ max_steps: 5.0e7
298
+ num_epoch: 3
299
+ reward_signals:
300
+ extrinsic:
301
+ strength: 1.0
302
+ gamma: 0.99