GUIAgent commited on
Commit
765fb0e
·
verified ·
1 Parent(s): 2cdd03d

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1409
trainer_state.json DELETED
@@ -1,1409 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.9882571793977445,
5
- "eval_steps": 500,
6
- "global_step": 8500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "clip_ratio": 0.0,
13
- "completion_length": 9.5546875,
14
- "epoch": 0.0001162655505173817,
15
- "grad_norm": 4.281470286839314,
16
- "kl": 0.0,
17
- "learning_rate": 1.1494252873563218e-08,
18
- "loss": 0.024247150868177414,
19
- "memory(GiB)": 38.7,
20
- "response_clip_ratio": 0.0,
21
- "reward": 1.191904902458191,
22
- "reward_std": 0.2820184826850891,
23
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.191904902458191,
24
- "step": 1,
25
- "train_speed(iter/s)": 0.026594
26
- },
27
- {
28
- "clip_ratio": 0.0,
29
- "completion_length": 9.550307765151516,
30
- "epoch": 0.01162655505173817,
31
- "grad_norm": 3.349521431788205,
32
- "kl": 0.042687281213625514,
33
- "learning_rate": 9.99994247477391e-07,
34
- "loss": 0.006692861065720067,
35
- "memory(GiB)": 49.8,
36
- "response_clip_ratio": 0.0,
37
- "reward": 1.103371890506359,
38
- "reward_std": 0.4002408231749679,
39
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.103371890506359,
40
- "step": 100,
41
- "train_speed(iter/s)": 0.093985
42
- },
43
- {
44
- "clip_ratio": 0.0,
45
- "completion_length": 9.4905078125,
46
- "epoch": 0.02325311010347634,
47
- "grad_norm": 6.366169338883484,
48
- "kl": 0.09955322265625,
49
- "learning_rate": 9.995654233098478e-07,
50
- "loss": 0.0052225708961486815,
51
- "memory(GiB)": 49.84,
52
- "response_clip_ratio": 0.0,
53
- "reward": 1.2144100672006608,
54
- "reward_std": 0.26068811796605584,
55
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.2144100672006608,
56
- "step": 200,
57
- "train_speed(iter/s)": 0.09568
58
- },
59
- {
60
- "clip_ratio": 0.0,
61
- "completion_length": 9.5164453125,
62
- "epoch": 0.03487966515521451,
63
- "grad_norm": 8.965565147378136,
64
- "kl": 0.123232421875,
65
- "learning_rate": 9.98456494798275e-07,
66
- "loss": 0.005987527966499329,
67
- "memory(GiB)": 49.84,
68
- "response_clip_ratio": 0.0,
69
- "reward": 1.268085294365883,
70
- "reward_std": 0.2155047995969653,
71
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.268085294365883,
72
- "step": 300,
73
- "train_speed(iter/s)": 0.095899
74
- },
75
- {
76
- "clip_ratio": 0.0,
77
- "completion_length": 9.3789453125,
78
- "epoch": 0.04650622020695268,
79
- "grad_norm": 5.398711016783826,
80
- "kl": 0.1314990234375,
81
- "learning_rate": 9.966689716290176e-07,
82
- "loss": 0.006581841707229614,
83
- "memory(GiB)": 49.84,
84
- "response_clip_ratio": 0.0,
85
- "reward": 1.3075979512929916,
86
- "reward_std": 0.1733351560495794,
87
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3075979512929916,
88
- "step": 400,
89
- "train_speed(iter/s)": 0.095915
90
- },
91
- {
92
- "clip_ratio": 0.0,
93
- "completion_length": 9.464140625,
94
- "epoch": 0.05813277525869085,
95
- "grad_norm": 11.929118986441576,
96
- "kl": 0.13150390625,
97
- "learning_rate": 9.942052873217221e-07,
98
- "loss": 0.004319159388542175,
99
- "memory(GiB)": 49.84,
100
- "response_clip_ratio": 0.0,
101
- "reward": 1.3076169914007187,
102
- "reward_std": 0.16386293478310107,
103
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3076169914007187,
104
- "step": 500,
105
- "train_speed(iter/s)": 0.095747
106
- },
107
- {
108
- "clip_ratio": 0.0,
109
- "completion_length": 9.3632421875,
110
- "epoch": 0.06975933031042902,
111
- "grad_norm": 9.252344124235806,
112
- "kl": 0.1381982421875,
113
- "learning_rate": 9.910687959163634e-07,
114
- "loss": 0.0042449763417243954,
115
- "memory(GiB)": 49.84,
116
- "response_clip_ratio": 0.0,
117
- "reward": 1.304671415090561,
118
- "reward_std": 0.1764876712858677,
119
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.304671415090561,
120
- "step": 600,
121
- "train_speed(iter/s)": 0.094624
122
- },
123
- {
124
- "clip_ratio": 0.0,
125
- "completion_length": 9.49859375,
126
- "epoch": 0.08138588536216719,
127
- "grad_norm": 10.5649786127299,
128
- "kl": 0.14923828125,
129
- "learning_rate": 9.872637674070805e-07,
130
- "loss": 0.005914233922958374,
131
- "memory(GiB)": 49.84,
132
- "response_clip_ratio": 0.0,
133
- "reward": 1.34539220392704,
134
- "reward_std": 0.14272778324782848,
135
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.34539220392704,
136
- "step": 700,
137
- "train_speed(iter/s)": 0.094626
138
- },
139
- {
140
- "clip_ratio": 0.0,
141
- "completion_length": 9.431015625,
142
- "epoch": 0.09301244041390536,
143
- "grad_norm": 13.958988545352236,
144
- "kl": 0.1456884765625,
145
- "learning_rate": 9.827953819290425e-07,
146
- "loss": 0.00618122935295105,
147
- "memory(GiB)": 49.86,
148
- "response_clip_ratio": 0.0,
149
- "reward": 1.3057015722990035,
150
- "reward_std": 0.15137971622869373,
151
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3057015722990035,
152
- "step": 800,
153
- "train_speed(iter/s)": 0.09467
154
- },
155
- {
156
- "clip_ratio": 0.0,
157
- "completion_length": 9.5357421875,
158
- "epoch": 0.10463899546564354,
159
- "grad_norm": 8.423788144030903,
160
- "kl": 0.14693359375,
161
- "learning_rate": 9.776697227062527e-07,
162
- "loss": 0.004831492304801941,
163
- "memory(GiB)": 49.86,
164
- "response_clip_ratio": 0.0,
165
- "reward": 1.347001107931137,
166
- "reward_std": 0.14807809382677078,
167
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.347001107931137,
168
- "step": 900,
169
- "train_speed(iter/s)": 0.094728
170
- },
171
- {
172
- "clip_ratio": 0.0,
173
- "completion_length": 9.3253515625,
174
- "epoch": 0.1162655505173817,
175
- "grad_norm": 6.695960097747582,
176
- "kl": 0.1556298828125,
177
- "learning_rate": 9.718937677698976e-07,
178
- "loss": 0.006615055799484253,
179
- "memory(GiB)": 49.86,
180
- "response_clip_ratio": 0.0,
181
- "reward": 1.3540274119377136,
182
- "reward_std": 0.12457055719569325,
183
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3540274119377136,
184
- "step": 1000,
185
- "train_speed(iter/s)": 0.094744
186
- },
187
- {
188
- "clip_ratio": 0.0,
189
- "completion_length": 9.465390625,
190
- "epoch": 0.12789210556911987,
191
- "grad_norm": 10.285617962993532,
192
- "kl": 0.16072265625,
193
- "learning_rate": 9.654753804585103e-07,
194
- "loss": 0.005601688623428345,
195
- "memory(GiB)": 49.86,
196
- "response_clip_ratio": 0.0,
197
- "reward": 1.2828907597064971,
198
- "reward_std": 0.14970409277826546,
199
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.2828907597064971,
200
- "step": 1100,
201
- "train_speed(iter/s)": 0.094415
202
- },
203
- {
204
- "clip_ratio": 0.0,
205
- "completion_length": 9.42546875,
206
- "epoch": 0.13951866062085805,
207
- "grad_norm": 10.836784655966541,
208
- "kl": 0.158349609375,
209
- "learning_rate": 9.584232987128862e-07,
210
- "loss": 0.005478205680847168,
211
- "memory(GiB)": 49.86,
212
- "response_clip_ratio": 0.0,
213
- "reward": 1.3187702250480653,
214
- "reward_std": 0.1322044050693512,
215
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3187702250480653,
216
- "step": 1200,
217
- "train_speed(iter/s)": 0.094452
218
- },
219
- {
220
- "clip_ratio": 0.0,
221
- "completion_length": 9.5623828125,
222
- "epoch": 0.1511452156725962,
223
- "grad_norm": 11.103606735921945,
224
- "kl": 0.1616162109375,
225
- "learning_rate": 9.507471231803197e-07,
226
- "loss": 0.00721156120300293,
227
- "memory(GiB)": 49.86,
228
- "response_clip_ratio": 0.0,
229
- "reward": 1.3138669067621231,
230
- "reward_std": 0.14569927806034685,
231
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3138669067621231,
232
- "step": 1300,
233
- "train_speed(iter/s)": 0.094427
234
- },
235
- {
236
- "clip_ratio": 0.0,
237
- "completion_length": 9.3422265625,
238
- "epoch": 0.16277177072433438,
239
- "grad_norm": 13.981767782488324,
240
- "kl": 0.154521484375,
241
- "learning_rate": 9.424573041443602e-07,
242
- "loss": 0.0046518009901046755,
243
- "memory(GiB)": 49.88,
244
- "response_clip_ratio": 0.0,
245
- "reward": 1.355497771501541,
246
- "reward_std": 0.1318685195595026,
247
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.355497771501541,
248
- "step": 1400,
249
- "train_speed(iter/s)": 0.094456
250
- },
251
- {
252
- "clip_ratio": 0.0,
253
- "completion_length": 9.40296875,
254
- "epoch": 0.17439832577607256,
255
- "grad_norm": 15.113419365506667,
256
- "kl": 0.1561474609375,
257
- "learning_rate": 9.335651272978812e-07,
258
- "loss": 0.007406370639801025,
259
- "memory(GiB)": 49.88,
260
- "response_clip_ratio": 0.0,
261
- "reward": 1.3447466862201691,
262
- "reward_std": 0.11326077262870968,
263
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3447466862201691,
264
- "step": 1500,
265
- "train_speed(iter/s)": 0.094463
266
- },
267
- {
268
- "clip_ratio": 0.0,
269
- "completion_length": 9.2856640625,
270
- "epoch": 0.1860248808278107,
271
- "grad_norm": 9.743718453316509,
272
- "kl": 0.1603076171875,
273
- "learning_rate": 9.240826983788282e-07,
274
- "loss": 0.005493613481521606,
275
- "memory(GiB)": 49.88,
276
- "response_clip_ratio": 0.0,
277
- "reward": 1.3119167065620423,
278
- "reward_std": 0.12479189267382025,
279
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3119167065620423,
280
- "step": 1600,
281
- "train_speed(iter/s)": 0.094167
282
- },
283
- {
284
- "clip_ratio": 0.0,
285
- "completion_length": 9.5353515625,
286
- "epoch": 0.1976514358795489,
287
- "grad_norm": 13.264394110416175,
288
- "kl": 0.1698193359375,
289
- "learning_rate": 9.140229266895642e-07,
290
- "loss": 0.006227902770042419,
291
- "memory(GiB)": 49.88,
292
- "response_clip_ratio": 0.0,
293
- "reward": 1.3038281148672104,
294
- "reward_std": 0.11284614092670381,
295
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3038281148672104,
296
- "step": 1700,
297
- "train_speed(iter/s)": 0.094188
298
- },
299
- {
300
- "clip_ratio": 0.0,
301
- "completion_length": 9.4226171875,
302
- "epoch": 0.20927799093128707,
303
- "grad_norm": 9.544011725041532,
304
- "kl": 0.16232421875,
305
- "learning_rate": 9.033995075222507e-07,
306
- "loss": 0.005826195478439331,
307
- "memory(GiB)": 49.88,
308
- "response_clip_ratio": 0.0,
309
- "reward": 1.3212931084632873,
310
- "reward_std": 0.11722485709935426,
311
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3212931084632873,
312
- "step": 1800,
313
- "train_speed(iter/s)": 0.094171
314
- },
315
- {
316
- "clip_ratio": 0.0,
317
- "completion_length": 9.4878125,
318
- "epoch": 0.22090454598302522,
319
- "grad_norm": 10.343651509670643,
320
- "kl": 0.17587890625,
321
- "learning_rate": 8.922269035141858e-07,
322
- "loss": 0.006521174311637878,
323
- "memory(GiB)": 49.88,
324
- "response_clip_ratio": 0.0,
325
- "reward": 1.3666004729270935,
326
- "reward_std": 0.11584680547006428,
327
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3666004729270935,
328
- "step": 1900,
329
- "train_speed(iter/s)": 0.094172
330
- },
331
- {
332
- "clip_ratio": 0.0,
333
- "completion_length": 9.4062109375,
334
- "epoch": 0.2325311010347634,
335
- "grad_norm": 14.015908728784089,
336
- "kl": 0.1614306640625,
337
- "learning_rate": 8.805203249584874e-07,
338
- "loss": 0.005633658170700074,
339
- "memory(GiB)": 49.88,
340
- "response_clip_ratio": 0.0,
341
- "reward": 1.3424965512752534,
342
- "reward_std": 0.11278555382974446,
343
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3424965512752534,
344
- "step": 2000,
345
- "train_speed(iter/s)": 0.094185
346
- },
347
- {
348
- "clip_ratio": 0.0,
349
- "completion_length": 9.5082421875,
350
- "epoch": 0.24415765608650156,
351
- "grad_norm": 18.652060056142428,
352
- "kl": 0.16919921875,
353
- "learning_rate": 8.682957090969219e-07,
354
- "loss": 0.005501749515533447,
355
- "memory(GiB)": 49.96,
356
- "response_clip_ratio": 0.0,
357
- "reward": 1.3379686850309371,
358
- "reward_std": 0.11816087782382965,
359
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3379686850309371,
360
- "step": 2100,
361
- "train_speed(iter/s)": 0.094031
362
- },
363
- {
364
- "clip_ratio": 0.0,
365
- "completion_length": 9.449453125,
366
- "epoch": 0.25578421113823974,
367
- "grad_norm": 13.30826804477268,
368
- "kl": 0.1685107421875,
369
- "learning_rate": 8.555696984230717e-07,
370
- "loss": 0.006635627746582031,
371
- "memory(GiB)": 49.96,
372
- "response_clip_ratio": 0.0,
373
- "reward": 1.3248258876800536,
374
- "reward_std": 0.12071031459607184,
375
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3248258876800536,
376
- "step": 2200,
377
- "train_speed(iter/s)": 0.094031
378
- },
379
- {
380
- "clip_ratio": 0.0,
381
- "completion_length": 9.582734375,
382
- "epoch": 0.2674107661899779,
383
- "grad_norm": 14.474933819131337,
384
- "kl": 0.1704052734375,
385
- "learning_rate": 8.423596180253791e-07,
386
- "loss": 0.005957164168357849,
387
- "memory(GiB)": 49.96,
388
- "response_clip_ratio": 0.0,
389
- "reward": 1.3814373064041137,
390
- "reward_std": 0.1127387316338718,
391
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3814373064041137,
392
- "step": 2300,
393
- "train_speed(iter/s)": 0.094051
394
- },
395
- {
396
- "clip_ratio": 0.0,
397
- "completion_length": 9.45703125,
398
- "epoch": 0.2790373212417161,
399
- "grad_norm": 9.422596375579234,
400
- "kl": 0.167822265625,
401
- "learning_rate": 8.286834520009099e-07,
402
- "loss": 0.0051918733119964595,
403
- "memory(GiB)": 49.96,
404
- "response_clip_ratio": 0.0,
405
- "reward": 1.3118427366018295,
406
- "reward_std": 0.12126734969206154,
407
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3118427366018295,
408
- "step": 2400,
409
- "train_speed(iter/s)": 0.09409
410
- },
411
- {
412
- "clip_ratio": 0.0,
413
- "completion_length": 9.4315234375,
414
- "epoch": 0.29066387629345425,
415
- "grad_norm": 9.38274122340621,
416
- "kl": 0.1697900390625,
417
- "learning_rate": 8.145598189719482e-07,
418
- "loss": 0.005171371102333069,
419
- "memory(GiB)": 49.96,
420
- "response_clip_ratio": 0.0,
421
- "reward": 1.3867295610904693,
422
- "reward_std": 0.11929248780943454,
423
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3867295610904693,
424
- "step": 2500,
425
- "train_speed(iter/s)": 0.094103
426
- },
427
- {
428
- "clip_ratio": 0.0,
429
- "completion_length": 9.479296875,
430
- "epoch": 0.3022904313451924,
431
- "grad_norm": 13.737979351496811,
432
- "kl": 0.176787109375,
433
- "learning_rate": 8.000079467387547e-07,
434
- "loss": 0.007171725034713745,
435
- "memory(GiB)": 49.97,
436
- "response_clip_ratio": 0.0,
437
- "reward": 1.3833684372901915,
438
- "reward_std": 0.1071634407620877,
439
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3833684372901915,
440
- "step": 2600,
441
- "train_speed(iter/s)": 0.093982
442
- },
443
- {
444
- "clip_ratio": 0.0,
445
- "completion_length": 9.4278515625,
446
- "epoch": 0.3139169863969306,
447
- "grad_norm": 15.580608222516146,
448
- "kl": 0.171650390625,
449
- "learning_rate": 7.850476461029935e-07,
450
- "loss": 0.004804742336273193,
451
- "memory(GiB)": 49.97,
452
- "response_clip_ratio": 0.0,
453
- "reward": 1.3642382991313935,
454
- "reward_std": 0.10643348384648561,
455
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3642382991313935,
456
- "step": 2700,
457
- "train_speed(iter/s)": 0.094
458
- },
459
- {
460
- "clip_ratio": 0.0,
461
- "completion_length": 9.5507421875,
462
- "epoch": 0.32554354144866876,
463
- "grad_norm": 11.114845313442226,
464
- "kl": 0.1594287109375,
465
- "learning_rate": 7.696992838974672e-07,
466
- "loss": 0.0048704689741134646,
467
- "memory(GiB)": 49.97,
468
- "response_clip_ratio": 0.0,
469
- "reward": 1.3298124277591705,
470
- "reward_std": 0.11607776273973286,
471
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3298124277591705,
472
- "step": 2800,
473
- "train_speed(iter/s)": 0.093983
474
- },
475
- {
476
- "clip_ratio": 0.0,
477
- "completion_length": 9.53078125,
478
- "epoch": 0.3371700965004069,
479
- "grad_norm": 12.360373764757135,
480
- "kl": 0.174365234375,
481
- "learning_rate": 7.53983755258874e-07,
482
- "loss": 0.005847660303115845,
483
- "memory(GiB)": 49.97,
484
- "response_clip_ratio": 0.0,
485
- "reward": 1.3589847725629807,
486
- "reward_std": 0.10737374008633196,
487
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3589847725629807,
488
- "step": 2900,
489
- "train_speed(iter/s)": 0.093989
490
- },
491
- {
492
- "clip_ratio": 0.0,
493
- "completion_length": 9.5336328125,
494
- "epoch": 0.3487966515521451,
495
- "grad_norm": 10.064735245824536,
496
- "kl": 0.1720654296875,
497
- "learning_rate": 7.379224551813364e-07,
498
- "loss": 0.005320903062820435,
499
- "memory(GiB)": 49.97,
500
- "response_clip_ratio": 0.0,
501
- "reward": 1.3229967230558395,
502
- "reward_std": 0.11362701586447656,
503
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3229967230558395,
504
- "step": 3000,
505
- "train_speed(iter/s)": 0.094004
506
- },
507
- {
508
- "clip_ratio": 0.0,
509
- "completion_length": 9.531796875,
510
- "epoch": 0.3604232066038833,
511
- "grad_norm": 9.746163845034996,
512
- "kl": 0.1750537109375,
513
- "learning_rate": 7.215372493894305e-07,
514
- "loss": 0.005580630302429199,
515
- "memory(GiB)": 49.97,
516
- "response_clip_ratio": 0.0,
517
- "reward": 1.394041805267334,
518
- "reward_std": 0.10719707342796028,
519
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.394041805267334,
520
- "step": 3100,
521
- "train_speed(iter/s)": 0.093906
522
- },
523
- {
524
- "clip_ratio": 0.0,
525
- "completion_length": 9.569140625,
526
- "epoch": 0.3720497616556214,
527
- "grad_norm": 10.075852679167767,
528
- "kl": 0.164130859375,
529
- "learning_rate": 7.048504445703623e-07,
530
- "loss": 0.00647194504737854,
531
- "memory(GiB)": 49.97,
532
- "response_clip_ratio": 0.0,
533
- "reward": 1.3294458091259003,
534
- "reward_std": 0.10014143475331366,
535
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3294458091259003,
536
- "step": 3200,
537
- "train_speed(iter/s)": 0.093919
538
- },
539
- {
540
- "clip_ratio": 0.0,
541
- "completion_length": 9.46984375,
542
- "epoch": 0.38367631670735963,
543
- "grad_norm": 9.861222956771822,
544
- "kl": 0.16787109375,
545
- "learning_rate": 6.87884758005825e-07,
546
- "loss": 0.005488141775131226,
547
- "memory(GiB)": 49.97,
548
- "response_clip_ratio": 0.0,
549
- "reward": 1.3671020436286927,
550
- "reward_std": 0.10204395545646548,
551
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3671020436286927,
552
- "step": 3300,
553
- "train_speed(iter/s)": 0.093924
554
- },
555
- {
556
- "clip_ratio": 0.0,
557
- "completion_length": 9.4744921875,
558
- "epoch": 0.3953028717590978,
559
- "grad_norm": 10.703294199835678,
560
- "kl": 0.1692626953125,
561
- "learning_rate": 6.706632866448739e-07,
562
- "loss": 0.005845343470573425,
563
- "memory(GiB)": 49.97,
564
- "response_clip_ratio": 0.0,
565
- "reward": 1.3646071863174438,
566
- "reward_std": 0.10554393734782934,
567
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3646071863174438,
568
- "step": 3400,
569
- "train_speed(iter/s)": 0.093949
570
- },
571
- {
572
- "clip_ratio": 0.0,
573
- "completion_length": 9.5876953125,
574
- "epoch": 0.40692942681083594,
575
- "grad_norm": 12.156014006973646,
576
- "kl": 0.164482421875,
577
- "learning_rate": 6.53209475659926e-07,
578
- "loss": 0.005560991764068604,
579
- "memory(GiB)": 49.97,
580
- "response_clip_ratio": 0.0,
581
- "reward": 1.3740809667110443,
582
- "reward_std": 0.10303839593194425,
583
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3740809667110443,
584
- "step": 3500,
585
- "train_speed(iter/s)": 0.093941
586
- },
587
- {
588
- "clip_ratio": 0.0,
589
- "completion_length": 9.4395703125,
590
- "epoch": 0.41855598186257414,
591
- "grad_norm": 10.579136209736488,
592
- "kl": 0.167001953125,
593
- "learning_rate": 6.355470865286917e-07,
594
- "loss": 0.0047145700454711914,
595
- "memory(GiB)": 49.97,
596
- "response_clip_ratio": 0.0,
597
- "reward": 1.3503324526548386,
598
- "reward_std": 0.09954373368062079,
599
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3503324526548386,
600
- "step": 3600,
601
- "train_speed(iter/s)": 0.09383
602
- },
603
- {
604
- "clip_ratio": 0.0,
605
- "completion_length": 9.5458984375,
606
- "epoch": 0.4301825369143123,
607
- "grad_norm": 12.621743422766063,
608
- "kl": 0.1657080078125,
609
- "learning_rate": 6.177001646854896e-07,
610
- "loss": 0.006506719589233398,
611
- "memory(GiB)": 49.97,
612
- "response_clip_ratio": 3.90625e-05,
613
- "reward": 1.3525580525398255,
614
- "reward_std": 0.09826488124206662,
615
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3525580525398255,
616
- "step": 3700,
617
- "train_speed(iter/s)": 0.093795
618
- },
619
- {
620
- "clip_ratio": 0.0,
621
- "completion_length": 9.4311328125,
622
- "epoch": 0.44180909196605045,
623
- "grad_norm": 9.354617068193154,
624
- "kl": 0.173662109375,
625
- "learning_rate": 5.996930067859863e-07,
626
- "loss": 0.005135659575462342,
627
- "memory(GiB)": 49.97,
628
- "response_clip_ratio": 0.0,
629
- "reward": 1.3989291822910308,
630
- "reward_std": 0.09648296672850848,
631
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3989291822910308,
632
- "step": 3800,
633
- "train_speed(iter/s)": 0.093833
634
- },
635
- {
636
- "clip_ratio": 0.0,
637
- "completion_length": 9.49171875,
638
- "epoch": 0.45343564701778866,
639
- "grad_norm": 14.90852834265166,
640
- "kl": 0.16689453125,
641
- "learning_rate": 5.815501276299251e-07,
642
- "loss": 0.0054639244079589845,
643
- "memory(GiB)": 49.97,
644
- "response_clip_ratio": 0.0,
645
- "reward": 1.3303378784656525,
646
- "reward_std": 0.1025526038557291,
647
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3303378784656525,
648
- "step": 3900,
649
- "train_speed(iter/s)": 0.093865
650
- },
651
- {
652
- "clip_ratio": 0.0,
653
- "completion_length": 9.45296875,
654
- "epoch": 0.4650622020695268,
655
- "grad_norm": 9.790046480640859,
656
- "kl": 0.171201171875,
657
- "learning_rate": 5.632962267868746e-07,
658
- "loss": 0.005732476711273193,
659
- "memory(GiB)": 49.97,
660
- "response_clip_ratio": 0.0,
661
- "reward": 1.3618503904342651,
662
- "reward_std": 0.09345575381070376,
663
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3618503904342651,
664
- "step": 4000,
665
- "train_speed(iter/s)": 0.093884
666
- },
667
- {
668
- "clip_ratio": 0.0,
669
- "completion_length": 9.4981640625,
670
- "epoch": 0.47668875712126496,
671
- "grad_norm": 16.23171809625287,
672
- "kl": 0.1683154296875,
673
- "learning_rate": 5.449561549704347e-07,
674
- "loss": 0.006350870132446289,
675
- "memory(GiB)": 49.97,
676
- "response_clip_ratio": 0.0,
677
- "reward": 1.3815095353126525,
678
- "reward_std": 0.10499328017234802,
679
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3815095353126525,
680
- "step": 4100,
681
- "train_speed(iter/s)": 0.093808
682
- },
683
- {
684
- "clip_ratio": 0.0,
685
- "completion_length": 9.4899609375,
686
- "epoch": 0.4883153121730031,
687
- "grad_norm": 10.028625827275622,
688
- "kl": 0.172294921875,
689
- "learning_rate": 5.265548802066736e-07,
690
- "loss": 0.005829288959503174,
691
- "memory(GiB)": 50.01,
692
- "response_clip_ratio": 0.0,
693
- "reward": 1.3365002036094666,
694
- "reward_std": 0.09621842056512833,
695
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3365002036094666,
696
- "step": 4200,
697
- "train_speed(iter/s)": 0.093821
698
- },
699
- {
700
- "clip_ratio": 0.0,
701
- "completion_length": 9.44015625,
702
- "epoch": 0.4999418672247413,
703
- "grad_norm": 10.282442330807516,
704
- "kl": 0.1719873046875,
705
- "learning_rate": 5.081174538428596e-07,
706
- "loss": 0.006229421496391297,
707
- "memory(GiB)": 50.01,
708
- "response_clip_ratio": 0.0,
709
- "reward": 1.3299858927726746,
710
- "reward_std": 0.09780080110765993,
711
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3299858927726746,
712
- "step": 4300,
713
- "train_speed(iter/s)": 0.093842
714
- },
715
- {
716
- "clip_ratio": 0.0,
717
- "completion_length": 9.4265234375,
718
- "epoch": 0.5115684222764795,
719
- "grad_norm": 10.479527710442367,
720
- "kl": 0.171787109375,
721
- "learning_rate": 4.89668976442758e-07,
722
- "loss": 0.004759013652801514,
723
- "memory(GiB)": 50.01,
724
- "response_clip_ratio": 0.0,
725
- "reward": 1.4068960046768189,
726
- "reward_std": 0.09772759121842682,
727
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4068960046768189,
728
- "step": 4400,
729
- "train_speed(iter/s)": 0.093846
730
- },
731
- {
732
- "clip_ratio": 0.0,
733
- "completion_length": 9.43875,
734
- "epoch": 0.5231949773282176,
735
- "grad_norm": 17.529468974754778,
736
- "kl": 0.1623486328125,
737
- "learning_rate": 4.7123456361492607e-07,
738
- "loss": 0.004565647840499878,
739
- "memory(GiB)": 50.01,
740
- "response_clip_ratio": 0.0,
741
- "reward": 1.3555949538946153,
742
- "reward_std": 0.09924329521134495,
743
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3555949538946153,
744
- "step": 4500,
745
- "train_speed(iter/s)": 0.093871
746
- },
747
- {
748
- "clip_ratio": 0.0,
749
- "completion_length": 9.5710546875,
750
- "epoch": 0.5348215323799558,
751
- "grad_norm": 13.65773402603584,
752
- "kl": 0.16982421875,
753
- "learning_rate": 4.528393118205257e-07,
754
- "loss": 0.005337421298027039,
755
- "memory(GiB)": 50.01,
756
- "response_clip_ratio": 0.0,
757
- "reward": 1.3599874556064606,
758
- "reward_std": 0.10061670660972595,
759
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3599874556064606,
760
- "step": 4600,
761
- "train_speed(iter/s)": 0.093805
762
- },
763
- {
764
- "clip_ratio": 0.0,
765
- "completion_length": 9.53828125,
766
- "epoch": 0.546448087431694,
767
- "grad_norm": 8.826143181240491,
768
- "kl": 0.177373046875,
769
- "learning_rate": 4.3450826420720283e-07,
770
- "loss": 0.0059269857406616215,
771
- "memory(GiB)": 50.01,
772
- "response_clip_ratio": 3.90625e-05,
773
- "reward": 1.3456268101930617,
774
- "reward_std": 0.0843733003223315,
775
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3456268101930617,
776
- "step": 4700,
777
- "train_speed(iter/s)": 0.093771
778
- },
779
- {
780
- "clip_ratio": 0.0,
781
- "completion_length": 9.7103125,
782
- "epoch": 0.5580746424834322,
783
- "grad_norm": 10.962918399547704,
784
- "kl": 0.178984375,
785
- "learning_rate": 4.1626637651554855e-07,
786
- "loss": 0.005514188408851623,
787
- "memory(GiB)": 50.01,
788
- "response_clip_ratio": 0.0,
789
- "reward": 1.3955884772539138,
790
- "reward_std": 0.0897440404444933,
791
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3955884772539138,
792
- "step": 4800,
793
- "train_speed(iter/s)": 0.093778
794
- },
795
- {
796
- "clip_ratio": 0.0,
797
- "completion_length": 9.421328125,
798
- "epoch": 0.5697011975351703,
799
- "grad_norm": 13.288533082686738,
800
- "kl": 0.1740966796875,
801
- "learning_rate": 3.9813848310455493e-07,
802
- "loss": 0.004496717453002929,
803
- "memory(GiB)": 50.01,
804
- "response_clip_ratio": 0.0,
805
- "reward": 1.3486594069004059,
806
- "reward_std": 0.09252091265749186,
807
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3486594069004059,
808
- "step": 4900,
809
- "train_speed(iter/s)": 0.093792
810
- },
811
- {
812
- "clip_ratio": 0.0,
813
- "completion_length": 9.6050390625,
814
- "epoch": 0.5813277525869085,
815
- "grad_norm": 13.576182979053868,
816
- "kl": 0.16875,
817
- "learning_rate": 3.801492631423183e-07,
818
- "loss": 0.006311448216438293,
819
- "memory(GiB)": 50.01,
820
- "response_clip_ratio": 0.0,
821
- "reward": 1.3385675930976868,
822
- "reward_std": 0.08789137057960034,
823
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3385675930976868,
824
- "step": 5000,
825
- "train_speed(iter/s)": 0.093819
826
- },
827
- {
828
- "clip_ratio": 0.0,
829
- "completion_length": 9.4701953125,
830
- "epoch": 0.5929543076386466,
831
- "grad_norm": 10.883793005621234,
832
- "kl": 0.1636376953125,
833
- "learning_rate": 3.6232320700801824e-07,
834
- "loss": 0.005545246005058288,
835
- "memory(GiB)": 50.01,
836
- "response_clip_ratio": 0.0,
837
- "reward": 1.3905673331022264,
838
- "reward_std": 0.08755960265174507,
839
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3905673331022264,
840
- "step": 5100,
841
- "train_speed(iter/s)": 0.093749
842
- },
843
- {
844
- "clip_ratio": 0.0,
845
- "completion_length": 9.4654296875,
846
- "epoch": 0.6045808626903848,
847
- "grad_norm": 10.540876662145202,
848
- "kl": 0.1647607421875,
849
- "learning_rate": 3.446845829509133e-07,
850
- "loss": 0.004752608835697174,
851
- "memory(GiB)": 50.01,
852
- "response_clip_ratio": 0.0,
853
- "reward": 1.428147051334381,
854
- "reward_std": 0.08269149933941662,
855
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.428147051334381,
856
- "step": 5200,
857
- "train_speed(iter/s)": 0.093767
858
- },
859
- {
860
- "clip_ratio": 0.0,
861
- "completion_length": 9.4448046875,
862
- "epoch": 0.616207417742123,
863
- "grad_norm": 13.475346015876513,
864
- "kl": 0.16580078125,
865
- "learning_rate": 3.2725740405174216e-07,
866
- "loss": 0.0050477349758148195,
867
- "memory(GiB)": 50.01,
868
- "response_clip_ratio": 0.0,
869
- "reward": 1.404239798784256,
870
- "reward_std": 0.0858971696998924,
871
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.404239798784256,
872
- "step": 5300,
873
- "train_speed(iter/s)": 0.093778
874
- },
875
- {
876
- "clip_ratio": 0.0,
877
- "completion_length": 9.4316796875,
878
- "epoch": 0.6278339727938612,
879
- "grad_norm": 12.403978855236371,
880
- "kl": 0.1601806640625,
881
- "learning_rate": 3.1006539553150724e-07,
882
- "loss": 0.005034840106964112,
883
- "memory(GiB)": 50.01,
884
- "response_clip_ratio": 0.0,
885
- "reward": 1.3801133859157562,
886
- "reward_std": 0.0971566170360893,
887
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3801133859157562,
888
- "step": 5400,
889
- "train_speed(iter/s)": 0.093788
890
- },
891
- {
892
- "clip_ratio": 0.0,
893
- "completion_length": 9.47546875,
894
- "epoch": 0.6394605278455994,
895
- "grad_norm": 13.196174592922217,
896
- "kl": 0.1671484375,
897
- "learning_rate": 2.931319624521521e-07,
898
- "loss": 0.00633289098739624,
899
- "memory(GiB)": 50.01,
900
- "response_clip_ratio": 0.0,
901
- "reward": 1.3824928963184357,
902
- "reward_std": 0.09388638647273183,
903
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3824928963184357,
904
- "step": 5500,
905
- "train_speed(iter/s)": 0.093796
906
- },
907
- {
908
- "clip_ratio": 0.0,
909
- "completion_length": 9.5144921875,
910
- "epoch": 0.6510870828973375,
911
- "grad_norm": 10.532572334095438,
912
- "kl": 0.17203125,
913
- "learning_rate": 2.7648015785309695e-07,
914
- "loss": 0.004889653325080872,
915
- "memory(GiB)": 50.01,
916
- "response_clip_ratio": 0.0,
917
- "reward": 1.4016336119174957,
918
- "reward_std": 0.09526587199419737,
919
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4016336119174957,
920
- "step": 5600,
921
- "train_speed(iter/s)": 0.09374
922
- },
923
- {
924
- "clip_ratio": 0.0,
925
- "completion_length": 9.5614453125,
926
- "epoch": 0.6627136379490757,
927
- "grad_norm": 12.720205008880969,
928
- "kl": 0.16912109375,
929
- "learning_rate": 2.60132651367017e-07,
930
- "loss": 0.00633256196975708,
931
- "memory(GiB)": 50.01,
932
- "response_clip_ratio": 0.0,
933
- "reward": 1.3703772777318954,
934
- "reward_std": 0.09573625258170068,
935
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3703772777318954,
936
- "step": 5700,
937
- "train_speed(iter/s)": 0.093756
938
- },
939
- {
940
- "clip_ratio": 0.0,
941
- "completion_length": 9.571796875,
942
- "epoch": 0.6743401930008138,
943
- "grad_norm": 6.872908071660086,
944
- "kl": 0.177060546875,
945
- "learning_rate": 2.441116983575876e-07,
946
- "loss": 0.004491334557533264,
947
- "memory(GiB)": 50.01,
948
- "response_clip_ratio": 0.0,
949
- "reward": 1.3781077253818512,
950
- "reward_std": 0.0949486710689962,
951
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3781077253818512,
952
- "step": 5800,
953
- "train_speed(iter/s)": 0.093772
954
- },
955
- {
956
- "clip_ratio": 0.0,
957
- "completion_length": 9.511953125,
958
- "epoch": 0.685966748052552,
959
- "grad_norm": 10.828437072129354,
960
- "kl": 0.1688671875,
961
- "learning_rate": 2.2843910962121028e-07,
962
- "loss": 0.004997452199459076,
963
- "memory(GiB)": 50.01,
964
- "response_clip_ratio": 0.0,
965
- "reward": 1.3701044476032258,
966
- "reward_std": 0.08872982957400381,
967
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3701044476032258,
968
- "step": 5900,
969
- "train_speed(iter/s)": 0.0938
970
- },
971
- {
972
- "clip_ratio": 0.0,
973
- "completion_length": 9.5623046875,
974
- "epoch": 0.6975933031042902,
975
- "grad_norm": 8.65544047927268,
976
- "kl": 0.1734130859375,
977
- "learning_rate": 2.1313622169397133e-07,
978
- "loss": 0.005034286975860596,
979
- "memory(GiB)": 50.01,
980
- "response_clip_ratio": 0.0,
981
- "reward": 1.376991518139839,
982
- "reward_std": 0.08544575774110853,
983
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.376991518139839,
984
- "step": 6000,
985
- "train_speed(iter/s)": 0.09382
986
- },
987
- {
988
- "clip_ratio": 0.0,
989
- "completion_length": 9.5916796875,
990
- "epoch": 0.7092198581560284,
991
- "grad_norm": 10.832177815406336,
992
- "kl": 0.1687158203125,
993
- "learning_rate": 1.9822386780425242e-07,
994
- "loss": 0.005690656900405884,
995
- "memory(GiB)": 50.01,
996
- "response_clip_ratio": 0.0,
997
- "reward": 1.3775933372974396,
998
- "reward_std": 0.09453948569484055,
999
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3775933372974396,
1000
- "step": 6100,
1001
- "train_speed(iter/s)": 0.093779
1002
- },
1003
- {
1004
- "clip_ratio": 0.0,
1005
- "completion_length": 9.452265625,
1006
- "epoch": 0.7208464132077665,
1007
- "grad_norm": 9.5769032777987,
1008
- "kl": 0.167353515625,
1009
- "learning_rate": 1.8372234951054205e-07,
1010
- "loss": 0.0064952802658081055,
1011
- "memory(GiB)": 50.01,
1012
- "response_clip_ratio": 0.0,
1013
- "reward": 1.3917820090055466,
1014
- "reward_std": 0.0956609710212797,
1015
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3917820090055466,
1016
- "step": 6200,
1017
- "train_speed(iter/s)": 0.093802
1018
- },
1019
- {
1020
- "clip_ratio": 0.0,
1021
- "completion_length": 9.595390625,
1022
- "epoch": 0.7324729682595047,
1023
- "grad_norm": 13.490536213430035,
1024
- "kl": 0.16693359375,
1025
- "learning_rate": 1.696514090630571e-07,
1026
- "loss": 0.006171835064888001,
1027
- "memory(GiB)": 50.01,
1028
- "response_clip_ratio": 0.0,
1029
- "reward": 1.3438419443368912,
1030
- "reward_std": 0.09881241522729396,
1031
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3438419443368912,
1032
- "step": 6300,
1033
- "train_speed(iter/s)": 0.093826
1034
- },
1035
- {
1036
- "clip_ratio": 0.0,
1037
- "completion_length": 9.58125,
1038
- "epoch": 0.7440995233112428,
1039
- "grad_norm": 13.80425097201388,
1040
- "kl": 0.1742724609375,
1041
- "learning_rate": 1.560302025268016e-07,
1042
- "loss": 0.004496442675590515,
1043
- "memory(GiB)": 50.01,
1044
- "response_clip_ratio": 0.0,
1045
- "reward": 1.3792090493440627,
1046
- "reward_std": 0.09534793998114764,
1047
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3792090493440627,
1048
- "step": 6400,
1049
- "train_speed(iter/s)": 0.093832
1050
- },
1051
- {
1052
- "clip_ratio": 0.0,
1053
- "completion_length": 9.4980078125,
1054
- "epoch": 0.755726078362981,
1055
- "grad_norm": 12.228002609541004,
1056
- "kl": 0.172158203125,
1057
- "learning_rate": 1.4287727370265557e-07,
1058
- "loss": 0.004531278908252716,
1059
- "memory(GiB)": 50.01,
1060
- "response_clip_ratio": 0.0,
1061
- "reward": 1.3554240018129349,
1062
- "reward_std": 0.0867443119455129,
1063
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3554240018129349,
1064
- "step": 6500,
1065
- "train_speed(iter/s)": 0.093837
1066
- },
1067
- {
1068
- "clip_ratio": 0.0,
1069
- "completion_length": 9.415234375,
1070
- "epoch": 0.7673526334147193,
1071
- "grad_norm": 11.022738680450304,
1072
- "kl": 0.164775390625,
1073
- "learning_rate": 1.3021052888199273e-07,
1074
- "loss": 0.006004486083984375,
1075
- "memory(GiB)": 50.01,
1076
- "response_clip_ratio": 0.0,
1077
- "reward": 1.3786427634954452,
1078
- "reward_std": 0.09436391741968692,
1079
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3786427634954452,
1080
- "step": 6600,
1081
- "train_speed(iter/s)": 0.093792
1082
- },
1083
- {
1084
- "clip_ratio": 0.0,
1085
- "completion_length": 9.5644140625,
1086
- "epoch": 0.7789791884664574,
1087
- "grad_norm": 7.2361000289177095,
1088
- "kl": 0.17333984375,
1089
- "learning_rate": 1.1804721246919974e-07,
1090
- "loss": 0.004695640504360199,
1091
- "memory(GiB)": 50.01,
1092
- "response_clip_ratio": 0.0,
1093
- "reward": 1.383240016102791,
1094
- "reward_std": 0.09347979573532939,
1095
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.383240016102791,
1096
- "step": 6700,
1097
- "train_speed(iter/s)": 0.093815
1098
- },
1099
- {
1100
- "clip_ratio": 0.0,
1101
- "completion_length": 9.3888671875,
1102
- "epoch": 0.7906057435181956,
1103
- "grad_norm": 10.627678410762373,
1104
- "kl": 0.1702392578125,
1105
- "learning_rate": 1.0640388350528345e-07,
1106
- "loss": 0.006417045593261719,
1107
- "memory(GiB)": 50.01,
1108
- "response_clip_ratio": 0.0,
1109
- "reward": 1.392098326086998,
1110
- "reward_std": 0.08011089071165771,
1111
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.392098326086998,
1112
- "step": 6800,
1113
- "train_speed(iter/s)": 0.093838
1114
- },
1115
- {
1116
- "clip_ratio": 0.0,
1117
- "completion_length": 9.5668359375,
1118
- "epoch": 0.8022322985699337,
1119
- "grad_norm": 15.465333954982329,
1120
- "kl": 0.170009765625,
1121
- "learning_rate": 9.529639312452559e-08,
1122
- "loss": 0.005341029167175293,
1123
- "memory(GiB)": 50.01,
1124
- "response_clip_ratio": 0.0,
1125
- "reward": 1.4103717935085296,
1126
- "reward_std": 0.09352740393951535,
1127
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4103717935085296,
1128
- "step": 6900,
1129
- "train_speed(iter/s)": 0.093849
1130
- },
1131
- {
1132
- "clip_ratio": 0.0,
1133
- "completion_length": 9.5197265625,
1134
- "epoch": 0.8138588536216719,
1135
- "grad_norm": 14.110343993379132,
1136
- "kl": 0.1599560546875,
1137
- "learning_rate": 8.473986297487578e-08,
1138
- "loss": 0.004906933009624481,
1139
- "memory(GiB)": 50.01,
1140
- "response_clip_ratio": 0.0,
1141
- "reward": 1.4088983750343322,
1142
- "reward_std": 0.08588065375573933,
1143
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4088983750343322,
1144
- "step": 7000,
1145
- "train_speed(iter/s)": 0.093861
1146
- },
1147
- {
1148
- "clip_ratio": 0.0,
1149
- "completion_length": 9.4692578125,
1150
- "epoch": 0.82548540867341,
1151
- "grad_norm": 8.408536596023637,
1152
- "kl": 0.1673828125,
1153
- "learning_rate": 7.474866463146251e-08,
1154
- "loss": 0.005006779432296753,
1155
- "memory(GiB)": 50.01,
1156
- "response_clip_ratio": 0.0,
1157
- "reward": 1.4262621313333512,
1158
- "reward_std": 0.08552982007153333,
1159
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4262621313333512,
1160
- "step": 7100,
1161
- "train_speed(iter/s)": 0.093824
1162
- },
1163
- {
1164
- "clip_ratio": 0.0,
1165
- "completion_length": 9.4380859375,
1166
- "epoch": 0.8371119637251483,
1167
- "grad_norm": 15.805070345572961,
1168
- "kl": 0.1696728515625,
1169
- "learning_rate": 6.53364000312463e-08,
1170
- "loss": 0.005111285448074341,
1171
- "memory(GiB)": 50.01,
1172
- "response_clip_ratio": 0.0,
1173
- "reward": 1.4090092825889586,
1174
- "reward_std": 0.08110610948875546,
1175
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4090092825889586,
1176
- "step": 7200,
1177
- "train_speed(iter/s)": 0.093851
1178
- },
1179
- {
1180
- "clip_ratio": 0.0,
1181
- "completion_length": 9.5043359375,
1182
- "epoch": 0.8487385187768864,
1183
- "grad_norm": 9.57328394904971,
1184
- "kl": 0.161328125,
1185
- "learning_rate": 5.651588295545168e-08,
1186
- "loss": 0.004713370501995087,
1187
- "memory(GiB)": 50.01,
1188
- "response_clip_ratio": 0.0,
1189
- "reward": 1.3688128244876863,
1190
- "reward_std": 0.08936454836279153,
1191
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3688128244876863,
1192
- "step": 7300,
1193
- "train_speed(iter/s)": 0.093859
1194
- },
1195
- {
1196
- "clip_ratio": 0.0,
1197
- "completion_length": 9.4918359375,
1198
- "epoch": 0.8603650738286246,
1199
- "grad_norm": 13.141769298737369,
1200
- "kl": 0.1631298828125,
1201
- "learning_rate": 4.8299121584989445e-08,
1202
- "loss": 0.005130698680877685,
1203
- "memory(GiB)": 50.01,
1204
- "response_clip_ratio": 0.0,
1205
- "reward": 1.400255571603775,
1206
- "reward_std": 0.09085271211341023,
1207
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.400255571603775,
1208
- "step": 7400,
1209
- "train_speed(iter/s)": 0.093861
1210
- },
1211
- {
1212
- "clip_ratio": 0.0,
1213
- "completion_length": 9.4555859375,
1214
- "epoch": 0.8719916288803627,
1215
- "grad_norm": 9.746601959968274,
1216
- "kl": 0.1671728515625,
1217
- "learning_rate": 4.069730215261552e-08,
1218
- "loss": 0.005373966693878174,
1219
- "memory(GiB)": 50.01,
1220
- "response_clip_ratio": 0.0,
1221
- "reward": 1.4106507396697998,
1222
- "reward_std": 0.08433111377526074,
1223
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4106507396697998,
1224
- "step": 7500,
1225
- "train_speed(iter/s)": 0.093864
1226
- },
1227
- {
1228
- "clip_ratio": 0.0,
1229
- "completion_length": 9.479375,
1230
- "epoch": 0.8836181839321009,
1231
- "grad_norm": 12.304794577190247,
1232
- "kl": 0.166220703125,
1233
- "learning_rate": 3.372077371408361e-08,
1234
- "loss": 0.004605398774147034,
1235
- "memory(GiB)": 50.01,
1236
- "response_clip_ratio": 0.0,
1237
- "reward": 1.4062111765146255,
1238
- "reward_std": 0.08385045255534351,
1239
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4062111765146255,
1240
- "step": 7600,
1241
- "train_speed(iter/s)": 0.093825
1242
- },
1243
- {
1244
- "clip_ratio": 0.0,
1245
- "completion_length": 9.4255078125,
1246
- "epoch": 0.895244738983839,
1247
- "grad_norm": 10.788517373431308,
1248
- "kl": 0.1640380859375,
1249
- "learning_rate": 2.7379034059024085e-08,
1250
- "loss": 0.005341172218322754,
1251
- "memory(GiB)": 50.01,
1252
- "response_clip_ratio": 0.0,
1253
- "reward": 1.400910457968712,
1254
- "reward_std": 0.09175728805363179,
1255
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.400910457968712,
1256
- "step": 7700,
1257
- "train_speed(iter/s)": 0.093832
1258
- },
1259
- {
1260
- "clip_ratio": 0.0,
1261
- "completion_length": 9.557265625,
1262
- "epoch": 0.9068712940355773,
1263
- "grad_norm": 15.69745045067331,
1264
- "kl": 0.1719189453125,
1265
- "learning_rate": 2.1680716780730136e-08,
1266
- "loss": 0.0057187509536743165,
1267
- "memory(GiB)": 50.01,
1268
- "response_clip_ratio": 0.0,
1269
- "reward": 1.4035167163610458,
1270
- "reward_std": 0.09192508255131543,
1271
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4035167163610458,
1272
- "step": 7800,
1273
- "train_speed(iter/s)": 0.093839
1274
- },
1275
- {
1276
- "clip_ratio": 0.0,
1277
- "completion_length": 9.5982421875,
1278
- "epoch": 0.9184978490873155,
1279
- "grad_norm": 9.828379408967727,
1280
- "kl": 0.167861328125,
1281
- "learning_rate": 1.6633579522452923e-08,
1282
- "loss": 0.004599595665931702,
1283
- "memory(GiB)": 50.01,
1284
- "response_clip_ratio": 0.0,
1285
- "reward": 1.4050581753253937,
1286
- "reward_std": 0.08899507346563042,
1287
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.4050581753253937,
1288
- "step": 7900,
1289
- "train_speed(iter/s)": 0.093848
1290
- },
1291
- {
1292
- "clip_ratio": 0.0,
1293
- "completion_length": 9.4523828125,
1294
- "epoch": 0.9301244041390536,
1295
- "grad_norm": 7.564846175327,
1296
- "kl": 0.165205078125,
1297
- "learning_rate": 1.2244493416208424e-08,
1298
- "loss": 0.006511397361755371,
1299
- "memory(GiB)": 50.01,
1300
- "response_clip_ratio": 0.0,
1301
- "reward": 1.3674461019039155,
1302
- "reward_std": 0.09354917639866471,
1303
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3674461019039155,
1304
- "step": 8000,
1305
- "train_speed(iter/s)": 0.093866
1306
- },
1307
- {
1308
- "clip_ratio": 0.0,
1309
- "completion_length": 9.441796875,
1310
- "epoch": 0.9417509591907918,
1311
- "grad_norm": 8.473570999026597,
1312
- "kl": 0.166337890625,
1313
- "learning_rate": 8.51943372847419e-09,
1314
- "loss": 0.004957606792449951,
1315
- "memory(GiB)": 50.01,
1316
- "response_clip_ratio": 0.0,
1317
- "reward": 1.3664843720197677,
1318
- "reward_std": 0.09796678048558533,
1319
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3664843720197677,
1320
- "step": 8100,
1321
- "train_speed(iter/s)": 0.093827
1322
- },
1323
- {
1324
- "clip_ratio": 0.0,
1325
- "completion_length": 9.4091796875,
1326
- "epoch": 0.9533775142425299,
1327
- "grad_norm": 17.38564208248951,
1328
- "kl": 0.1664453125,
1329
- "learning_rate": 5.463471725509206e-09,
1330
- "loss": 0.005511881113052368,
1331
- "memory(GiB)": 50.01,
1332
- "response_clip_ratio": 0.0,
1333
- "reward": 1.3864123088121414,
1334
- "reward_std": 0.09022373898886143,
1335
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3864123088121414,
1336
- "step": 8200,
1337
- "train_speed(iter/s)": 0.093846
1338
- },
1339
- {
1340
- "clip_ratio": 0.0,
1341
- "completion_length": 9.52640625,
1342
- "epoch": 0.9650040692942681,
1343
- "grad_norm": 12.073204680109393,
1344
- "kl": 0.1724462890625,
1345
- "learning_rate": 3.0807677693729385e-09,
1346
- "loss": 0.006146684885025024,
1347
- "memory(GiB)": 50.01,
1348
- "response_clip_ratio": 0.0,
1349
- "reward": 1.3671981352567673,
1350
- "reward_std": 0.09982930341735483,
1351
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3671981352567673,
1352
- "step": 8300,
1353
- "train_speed(iter/s)": 0.093858
1354
- },
1355
- {
1356
- "clip_ratio": 0.0,
1357
- "completion_length": 9.4923828125,
1358
- "epoch": 0.9766306243460062,
1359
- "grad_norm": 14.136069122546115,
1360
- "kl": 0.164794921875,
1361
- "learning_rate": 1.3745656540422036e-09,
1362
- "loss": 0.0051899772882461545,
1363
- "memory(GiB)": 50.01,
1364
- "response_clip_ratio": 0.0,
1365
- "reward": 1.3862751299142837,
1366
- "reward_std": 0.09112708026543259,
1367
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3862751299142837,
1368
- "step": 8400,
1369
- "train_speed(iter/s)": 0.093871
1370
- },
1371
- {
1372
- "clip_ratio": 0.0,
1373
- "completion_length": 9.5616015625,
1374
- "epoch": 0.9882571793977445,
1375
- "grad_norm": 7.965510618669221,
1376
- "kl": 0.168076171875,
1377
- "learning_rate": 3.4718818933582305e-10,
1378
- "loss": 0.0061265045404434205,
1379
- "memory(GiB)": 50.01,
1380
- "response_clip_ratio": 0.0,
1381
- "reward": 1.3907999232411385,
1382
- "reward_std": 0.08998070719651878,
1383
- "rewards/MultiModalMultiActionNoThinkNewAction": 1.3907999232411385,
1384
- "step": 8500,
1385
- "train_speed(iter/s)": 0.093882
1386
- }
1387
- ],
1388
- "logging_steps": 100,
1389
- "max_steps": 8601,
1390
- "num_input_tokens_seen": 0,
1391
- "num_train_epochs": 1,
1392
- "save_steps": 500,
1393
- "stateful_callbacks": {
1394
- "TrainerControl": {
1395
- "args": {
1396
- "should_epoch_stop": false,
1397
- "should_evaluate": false,
1398
- "should_log": false,
1399
- "should_save": true,
1400
- "should_training_stop": false
1401
- },
1402
- "attributes": {}
1403
- }
1404
- },
1405
- "total_flos": 0.0,
1406
- "train_batch_size": 8,
1407
- "trial_name": null,
1408
- "trial_params": null
1409
- }