DavidGF commited on
Commit
7eb798e
1 Parent(s): adf49e3

Delete kraken_router/trainer_state.json

Browse files
Files changed (1) hide show
  1. kraken_router/trainer_state.json +0 -747
kraken_router/trainer_state.json DELETED
@@ -1,747 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 5.283331606754377,
5
- "eval_steps": 500,
6
- "global_step": 51000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.05179736869367036,
13
- "grad_norm": 3.8771300836515366e-08,
14
- "learning_rate": 1.9852007518018084e-05,
15
- "loss": 0.0996,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.10359473738734072,
20
- "grad_norm": 9.604158321963041e-07,
21
- "learning_rate": 1.970401503603617e-05,
22
- "loss": 0.256,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.1553921060810111,
27
- "grad_norm": 1.8179751350544393e-05,
28
- "learning_rate": 1.9556022554054253e-05,
29
- "loss": 0.1367,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.20718947477468144,
34
- "grad_norm": 8.53914680192247e-05,
35
- "learning_rate": 1.9408030072072343e-05,
36
- "loss": 0.063,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.2589868434683518,
41
- "grad_norm": 4.6534645662177354e-05,
42
- "learning_rate": 1.9260037590090425e-05,
43
- "loss": 0.0201,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.3107842121620222,
48
- "grad_norm": 6.902104843220513e-08,
49
- "learning_rate": 1.911204510810851e-05,
50
- "loss": 0.106,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.36258158085569253,
55
- "grad_norm": 2.192794745781157e-08,
56
- "learning_rate": 1.8964052626126594e-05,
57
- "loss": 0.0797,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.4143789495493629,
62
- "grad_norm": 4.58152558859698e-13,
63
- "learning_rate": 1.881606014414468e-05,
64
- "loss": 0.0264,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.46617631824303324,
69
- "grad_norm": 4.8503436119062826e-05,
70
- "learning_rate": 1.8668067662162763e-05,
71
- "loss": 0.0329,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.5179736869367036,
76
- "grad_norm": 9.135746950050816e-05,
77
- "learning_rate": 1.852007518018085e-05,
78
- "loss": 0.023,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 0.569771055630374,
83
- "grad_norm": 2.3920888381212535e-08,
84
- "learning_rate": 1.8372082698198932e-05,
85
- "loss": 0.044,
86
- "step": 5500
87
- },
88
- {
89
- "epoch": 0.6215684243240444,
90
- "grad_norm": 1.1770172932301648e-05,
91
- "learning_rate": 1.8224090216217018e-05,
92
- "loss": 0.0097,
93
- "step": 6000
94
- },
95
- {
96
- "epoch": 0.6733657930177147,
97
- "grad_norm": 5.798747224616818e-05,
98
- "learning_rate": 1.8076097734235104e-05,
99
- "loss": 0.0628,
100
- "step": 6500
101
- },
102
- {
103
- "epoch": 0.7251631617113851,
104
- "grad_norm": 0.0055756960064172745,
105
- "learning_rate": 1.7928105252253187e-05,
106
- "loss": 0.0167,
107
- "step": 7000
108
- },
109
- {
110
- "epoch": 0.7769605304050554,
111
- "grad_norm": 6.234566535567865e-05,
112
- "learning_rate": 1.7780112770271273e-05,
113
- "loss": 0.0342,
114
- "step": 7500
115
- },
116
- {
117
- "epoch": 0.8287578990987258,
118
- "grad_norm": 2.065628723357804e-05,
119
- "learning_rate": 1.7632120288289356e-05,
120
- "loss": 0.0278,
121
- "step": 8000
122
- },
123
- {
124
- "epoch": 0.8805552677923961,
125
- "grad_norm": 0.00034213648177683353,
126
- "learning_rate": 1.7484127806307442e-05,
127
- "loss": 0.0777,
128
- "step": 8500
129
- },
130
- {
131
- "epoch": 0.9323526364860665,
132
- "grad_norm": 0.0024548424407839775,
133
- "learning_rate": 1.7336135324325525e-05,
134
- "loss": 0.0117,
135
- "step": 9000
136
- },
137
- {
138
- "epoch": 0.9841500051797368,
139
- "grad_norm": 2.010272328334395e-05,
140
- "learning_rate": 1.718814284234361e-05,
141
- "loss": 0.0149,
142
- "step": 9500
143
- },
144
- {
145
- "epoch": 1.0359473738734073,
146
- "grad_norm": 3.7440368032548577e-05,
147
- "learning_rate": 1.7040150360361697e-05,
148
- "loss": 0.0059,
149
- "step": 10000
150
- },
151
- {
152
- "epoch": 1.0877447425670776,
153
- "grad_norm": 9.011640031530987e-06,
154
- "learning_rate": 1.689215787837978e-05,
155
- "loss": 0.0245,
156
- "step": 10500
157
- },
158
- {
159
- "epoch": 1.139542111260748,
160
- "grad_norm": 3.9126422052504495e-05,
161
- "learning_rate": 1.6744165396397866e-05,
162
- "loss": 0.0001,
163
- "step": 11000
164
- },
165
- {
166
- "epoch": 1.1913394799544184,
167
- "grad_norm": 8.866464668244589e-06,
168
- "learning_rate": 1.659617291441595e-05,
169
- "loss": 0.0,
170
- "step": 11500
171
- },
172
- {
173
- "epoch": 1.2431368486480887,
174
- "grad_norm": 0.00013718219997826964,
175
- "learning_rate": 1.6448180432434035e-05,
176
- "loss": 0.0181,
177
- "step": 12000
178
- },
179
- {
180
- "epoch": 1.294934217341759,
181
- "grad_norm": 3.808485416811891e-06,
182
- "learning_rate": 1.6300187950452117e-05,
183
- "loss": 0.0,
184
- "step": 12500
185
- },
186
- {
187
- "epoch": 1.3467315860354294,
188
- "grad_norm": 7.217061011033366e-07,
189
- "learning_rate": 1.6152195468470203e-05,
190
- "loss": 0.0266,
191
- "step": 13000
192
- },
193
- {
194
- "epoch": 1.3985289547290998,
195
- "grad_norm": 7.131046731956303e-05,
196
- "learning_rate": 1.600420298648829e-05,
197
- "loss": 0.0266,
198
- "step": 13500
199
- },
200
- {
201
- "epoch": 1.4503263234227701,
202
- "grad_norm": 9.411406608705875e-06,
203
- "learning_rate": 1.5856210504506372e-05,
204
- "loss": 0.0079,
205
- "step": 14000
206
- },
207
- {
208
- "epoch": 1.5021236921164405,
209
- "grad_norm": 7.976142660481855e-05,
210
- "learning_rate": 1.570821802252446e-05,
211
- "loss": 0.022,
212
- "step": 14500
213
- },
214
- {
215
- "epoch": 1.5539210608101108,
216
- "grad_norm": 1.0580498610579525e-06,
217
- "learning_rate": 1.556022554054254e-05,
218
- "loss": 0.0093,
219
- "step": 15000
220
- },
221
- {
222
- "epoch": 1.6057184295037812,
223
- "grad_norm": 6.298066182353068e-06,
224
- "learning_rate": 1.5412233058560627e-05,
225
- "loss": 0.0,
226
- "step": 15500
227
- },
228
- {
229
- "epoch": 1.6575157981974515,
230
- "grad_norm": 2.102418066030065e-12,
231
- "learning_rate": 1.526424057657871e-05,
232
- "loss": 0.0024,
233
- "step": 16000
234
- },
235
- {
236
- "epoch": 1.709313166891122,
237
- "grad_norm": 3.009020701938425e-06,
238
- "learning_rate": 1.5116248094596794e-05,
239
- "loss": 0.009,
240
- "step": 16500
241
- },
242
- {
243
- "epoch": 1.7611105355847922,
244
- "grad_norm": 6.2723142946197186e-06,
245
- "learning_rate": 1.4968255612614882e-05,
246
- "loss": 0.023,
247
- "step": 17000
248
- },
249
- {
250
- "epoch": 1.8129079042784626,
251
- "grad_norm": 5.638932634610683e-06,
252
- "learning_rate": 1.4820263130632967e-05,
253
- "loss": 0.0099,
254
- "step": 17500
255
- },
256
- {
257
- "epoch": 1.8647052729721332,
258
- "grad_norm": 3.8804391806479543e-05,
259
- "learning_rate": 1.4672270648651051e-05,
260
- "loss": 0.0185,
261
- "step": 18000
262
- },
263
- {
264
- "epoch": 1.9165026416658035,
265
- "grad_norm": 3.445857828410226e-06,
266
- "learning_rate": 1.4524278166669134e-05,
267
- "loss": 0.0,
268
- "step": 18500
269
- },
270
- {
271
- "epoch": 1.9683000103594739,
272
- "grad_norm": 0.0029530602041631937,
273
- "learning_rate": 1.4376285684687218e-05,
274
- "loss": 0.0243,
275
- "step": 19000
276
- },
277
- {
278
- "epoch": 2.0200973790531442,
279
- "grad_norm": 7.133132271519571e-07,
280
- "learning_rate": 1.4228293202705303e-05,
281
- "loss": 0.0189,
282
- "step": 19500
283
- },
284
- {
285
- "epoch": 2.0718947477468146,
286
- "grad_norm": 0.00042451228364370763,
287
- "learning_rate": 1.4080300720723387e-05,
288
- "loss": 0.0062,
289
- "step": 20000
290
- },
291
- {
292
- "epoch": 2.123692116440485,
293
- "grad_norm": 1.8360736930844723e-06,
294
- "learning_rate": 1.3932308238741471e-05,
295
- "loss": 0.0067,
296
- "step": 20500
297
- },
298
- {
299
- "epoch": 2.1754894851341553,
300
- "grad_norm": 0.0001334488915745169,
301
- "learning_rate": 1.378431575675956e-05,
302
- "loss": 0.006,
303
- "step": 21000
304
- },
305
- {
306
- "epoch": 2.2272868538278257,
307
- "grad_norm": 4.610120413417462e-06,
308
- "learning_rate": 1.3636323274777644e-05,
309
- "loss": 0.0061,
310
- "step": 21500
311
- },
312
- {
313
- "epoch": 2.279084222521496,
314
- "grad_norm": 2.7200339900446124e-06,
315
- "learning_rate": 1.3488330792795728e-05,
316
- "loss": 0.0,
317
- "step": 22000
318
- },
319
- {
320
- "epoch": 2.3308815912151664,
321
- "grad_norm": 3.3594403703318676e-06,
322
- "learning_rate": 1.3340338310813813e-05,
323
- "loss": 0.009,
324
- "step": 22500
325
- },
326
- {
327
- "epoch": 2.3826789599088367,
328
- "grad_norm": 5.500828137883218e-06,
329
- "learning_rate": 1.3192345828831897e-05,
330
- "loss": 0.0083,
331
- "step": 23000
332
- },
333
- {
334
- "epoch": 2.434476328602507,
335
- "grad_norm": 1103.414306640625,
336
- "learning_rate": 1.304435334684998e-05,
337
- "loss": 0.0058,
338
- "step": 23500
339
- },
340
- {
341
- "epoch": 2.4862736972961774,
342
- "grad_norm": 1.7569537931194645e-06,
343
- "learning_rate": 1.2896360864868064e-05,
344
- "loss": 0.0028,
345
- "step": 24000
346
- },
347
- {
348
- "epoch": 2.5380710659898478,
349
- "grad_norm": 9.93580897556967e-07,
350
- "learning_rate": 1.2748368382886152e-05,
351
- "loss": 0.0052,
352
- "step": 24500
353
- },
354
- {
355
- "epoch": 2.589868434683518,
356
- "grad_norm": 2.71925017225616e-12,
357
- "learning_rate": 1.2600375900904236e-05,
358
- "loss": 0.0,
359
- "step": 25000
360
- },
361
- {
362
- "epoch": 2.6416658033771885,
363
- "grad_norm": 1.8420889318804257e-05,
364
- "learning_rate": 1.245238341892232e-05,
365
- "loss": 0.0195,
366
- "step": 25500
367
- },
368
- {
369
- "epoch": 2.693463172070859,
370
- "grad_norm": 8.20615071006614e-07,
371
- "learning_rate": 1.2304390936940405e-05,
372
- "loss": 0.008,
373
- "step": 26000
374
- },
375
- {
376
- "epoch": 2.745260540764529,
377
- "grad_norm": 6.169057451188564e-05,
378
- "learning_rate": 1.215639845495849e-05,
379
- "loss": 0.0005,
380
- "step": 26500
381
- },
382
- {
383
- "epoch": 2.7970579094581995,
384
- "grad_norm": 2.9846903544239467e-06,
385
- "learning_rate": 1.2008405972976574e-05,
386
- "loss": 0.0037,
387
- "step": 27000
388
- },
389
- {
390
- "epoch": 2.84885527815187,
391
- "grad_norm": 8.764583071751986e-06,
392
- "learning_rate": 1.1860413490994659e-05,
393
- "loss": 0.0156,
394
- "step": 27500
395
- },
396
- {
397
- "epoch": 2.9006526468455403,
398
- "grad_norm": 5.1639810408232734e-05,
399
- "learning_rate": 1.1712421009012743e-05,
400
- "loss": 0.0,
401
- "step": 28000
402
- },
403
- {
404
- "epoch": 2.9524500155392106,
405
- "grad_norm": 8.454779163002968e-06,
406
- "learning_rate": 1.1564428527030829e-05,
407
- "loss": 0.0,
408
- "step": 28500
409
- },
410
- {
411
- "epoch": 3.004247384232881,
412
- "grad_norm": 1.0601724653724887e-07,
413
- "learning_rate": 1.1416436045048913e-05,
414
- "loss": 0.0,
415
- "step": 29000
416
- },
417
- {
418
- "epoch": 3.0560447529265513,
419
- "grad_norm": 3.302725417597685e-06,
420
- "learning_rate": 1.1268443563066998e-05,
421
- "loss": 0.0,
422
- "step": 29500
423
- },
424
- {
425
- "epoch": 3.1078421216202217,
426
- "grad_norm": 8.728113243705593e-06,
427
- "learning_rate": 1.1120451081085082e-05,
428
- "loss": 0.0067,
429
- "step": 30000
430
- },
431
- {
432
- "epoch": 3.159639490313892,
433
- "grad_norm": 2.4715068320801947e-06,
434
- "learning_rate": 1.0972458599103167e-05,
435
- "loss": 0.0,
436
- "step": 30500
437
- },
438
- {
439
- "epoch": 3.2114368590075624,
440
- "grad_norm": 6.171033419377636e-06,
441
- "learning_rate": 1.0824466117121251e-05,
442
- "loss": 0.0,
443
- "step": 31000
444
- },
445
- {
446
- "epoch": 3.2632342277012327,
447
- "grad_norm": 2.5147855922114104e-06,
448
- "learning_rate": 1.0676473635139336e-05,
449
- "loss": 0.0,
450
- "step": 31500
451
- },
452
- {
453
- "epoch": 3.315031596394903,
454
- "grad_norm": 2.676899021025747e-05,
455
- "learning_rate": 1.052848115315742e-05,
456
- "loss": 0.006,
457
- "step": 32000
458
- },
459
- {
460
- "epoch": 3.3668289650885734,
461
- "grad_norm": 2.081859747704584e-05,
462
- "learning_rate": 1.0380488671175506e-05,
463
- "loss": 0.0028,
464
- "step": 32500
465
- },
466
- {
467
- "epoch": 3.418626333782244,
468
- "grad_norm": 2.3868110474722926e-06,
469
- "learning_rate": 1.023249618919359e-05,
470
- "loss": 0.0001,
471
- "step": 33000
472
- },
473
- {
474
- "epoch": 3.470423702475914,
475
- "grad_norm": 2.7923347261094023e-06,
476
- "learning_rate": 1.0084503707211675e-05,
477
- "loss": 0.0,
478
- "step": 33500
479
- },
480
- {
481
- "epoch": 3.5222210711695845,
482
- "grad_norm": 4.678757704823511e-06,
483
- "learning_rate": 9.93651122522976e-06,
484
- "loss": 0.0,
485
- "step": 34000
486
- },
487
- {
488
- "epoch": 3.574018439863255,
489
- "grad_norm": 3.305537575215567e-06,
490
- "learning_rate": 9.788518743247844e-06,
491
- "loss": 0.0,
492
- "step": 34500
493
- },
494
- {
495
- "epoch": 3.625815808556925,
496
- "grad_norm": 2.4619773739686934e-06,
497
- "learning_rate": 9.640526261265928e-06,
498
- "loss": 0.0,
499
- "step": 35000
500
- },
501
- {
502
- "epoch": 3.6776131772505956,
503
- "grad_norm": 2.973723951527063e-07,
504
- "learning_rate": 9.492533779284013e-06,
505
- "loss": 0.0,
506
- "step": 35500
507
- },
508
- {
509
- "epoch": 3.729410545944266,
510
- "grad_norm": 5.624352183986048e-07,
511
- "learning_rate": 9.344541297302097e-06,
512
- "loss": 0.012,
513
- "step": 36000
514
- },
515
- {
516
- "epoch": 3.7812079146379363,
517
- "grad_norm": 1.8933849332825048e-07,
518
- "learning_rate": 9.196548815320182e-06,
519
- "loss": 0.0,
520
- "step": 36500
521
- },
522
- {
523
- "epoch": 3.8330052833316066,
524
- "grad_norm": 3.9811013266444206e-05,
525
- "learning_rate": 9.048556333338268e-06,
526
- "loss": 0.0,
527
- "step": 37000
528
- },
529
- {
530
- "epoch": 3.884802652025277,
531
- "grad_norm": 2.1272378944559023e-05,
532
- "learning_rate": 8.900563851356352e-06,
533
- "loss": 0.0073,
534
- "step": 37500
535
- },
536
- {
537
- "epoch": 3.9366000207189473,
538
- "grad_norm": 8.419656296609901e-07,
539
- "learning_rate": 8.752571369374436e-06,
540
- "loss": 0.0045,
541
- "step": 38000
542
- },
543
- {
544
- "epoch": 3.9883973894126177,
545
- "grad_norm": 1.4807918660153518e-06,
546
- "learning_rate": 8.604578887392521e-06,
547
- "loss": 0.0,
548
- "step": 38500
549
- },
550
- {
551
- "epoch": 4.0401947581062885,
552
- "grad_norm": 2.789050199680787e-07,
553
- "learning_rate": 8.456586405410605e-06,
554
- "loss": 0.0,
555
- "step": 39000
556
- },
557
- {
558
- "epoch": 4.091992126799958,
559
- "grad_norm": 7.712332603659888e-07,
560
- "learning_rate": 8.30859392342869e-06,
561
- "loss": 0.0,
562
- "step": 39500
563
- },
564
- {
565
- "epoch": 4.143789495493629,
566
- "grad_norm": 1.126294500863878e-05,
567
- "learning_rate": 8.160601441446774e-06,
568
- "loss": 0.0,
569
- "step": 40000
570
- },
571
- {
572
- "epoch": 4.195586864187299,
573
- "grad_norm": 1.1078836905653588e-05,
574
- "learning_rate": 8.01260895946486e-06,
575
- "loss": 0.0,
576
- "step": 40500
577
- },
578
- {
579
- "epoch": 4.24738423288097,
580
- "grad_norm": 4.333252491051098e-06,
581
- "learning_rate": 7.864616477482945e-06,
582
- "loss": 0.0,
583
- "step": 41000
584
- },
585
- {
586
- "epoch": 4.29918160157464,
587
- "grad_norm": 7.190360065578716e-06,
588
- "learning_rate": 7.71662399550103e-06,
589
- "loss": 0.0,
590
- "step": 41500
591
- },
592
- {
593
- "epoch": 4.350978970268311,
594
- "grad_norm": 6.172657776915003e-06,
595
- "learning_rate": 7.568631513519114e-06,
596
- "loss": 0.0,
597
- "step": 42000
598
- },
599
- {
600
- "epoch": 4.4027763389619805,
601
- "grad_norm": 1.1028377144839396e-07,
602
- "learning_rate": 7.420639031537199e-06,
603
- "loss": 0.0,
604
- "step": 42500
605
- },
606
- {
607
- "epoch": 4.454573707655651,
608
- "grad_norm": 7.63295773253958e-08,
609
- "learning_rate": 7.272646549555283e-06,
610
- "loss": 0.0,
611
- "step": 43000
612
- },
613
- {
614
- "epoch": 4.506371076349321,
615
- "grad_norm": 8.308877568197204e-07,
616
- "learning_rate": 7.124654067573368e-06,
617
- "loss": 0.0,
618
- "step": 43500
619
- },
620
- {
621
- "epoch": 4.558168445042992,
622
- "grad_norm": 8.788915550894671e-08,
623
- "learning_rate": 6.976661585591452e-06,
624
- "loss": 0.0,
625
- "step": 44000
626
- },
627
- {
628
- "epoch": 4.609965813736662,
629
- "grad_norm": 5.980305104458239e-07,
630
- "learning_rate": 6.828669103609537e-06,
631
- "loss": 0.0,
632
- "step": 44500
633
- },
634
- {
635
- "epoch": 4.661763182430333,
636
- "grad_norm": 0.00010543836833676323,
637
- "learning_rate": 6.680676621627622e-06,
638
- "loss": 0.0034,
639
- "step": 45000
640
- },
641
- {
642
- "epoch": 4.713560551124003,
643
- "grad_norm": 2.6961990442941897e-05,
644
- "learning_rate": 6.532684139645706e-06,
645
- "loss": 0.0,
646
- "step": 45500
647
- },
648
- {
649
- "epoch": 4.765357919817673,
650
- "grad_norm": 2.6214322133455426e-05,
651
- "learning_rate": 6.384691657663791e-06,
652
- "loss": 0.0006,
653
- "step": 46000
654
- },
655
- {
656
- "epoch": 4.817155288511343,
657
- "grad_norm": 6.838554782007122e-06,
658
- "learning_rate": 6.236699175681876e-06,
659
- "loss": 0.0,
660
- "step": 46500
661
- },
662
- {
663
- "epoch": 4.868952657205014,
664
- "grad_norm": 1.3388408660830464e-05,
665
- "learning_rate": 6.08870669369996e-06,
666
- "loss": 0.0,
667
- "step": 47000
668
- },
669
- {
670
- "epoch": 4.920750025898684,
671
- "grad_norm": 1.1914085007447284e-06,
672
- "learning_rate": 5.940714211718045e-06,
673
- "loss": 0.0,
674
- "step": 47500
675
- },
676
- {
677
- "epoch": 4.972547394592355,
678
- "grad_norm": 1.9319197235745378e-05,
679
- "learning_rate": 5.792721729736129e-06,
680
- "loss": 0.0,
681
- "step": 48000
682
- },
683
- {
684
- "epoch": 5.024344763286025,
685
- "grad_norm": 7.528370815634844e-07,
686
- "learning_rate": 5.6447292477542145e-06,
687
- "loss": 0.0058,
688
- "step": 48500
689
- },
690
- {
691
- "epoch": 5.0761421319796955,
692
- "grad_norm": 1.075523073268414e-06,
693
- "learning_rate": 5.496736765772299e-06,
694
- "loss": 0.0,
695
- "step": 49000
696
- },
697
- {
698
- "epoch": 5.1279395006733655,
699
- "grad_norm": 4.6377437001865474e-07,
700
- "learning_rate": 5.348744283790383e-06,
701
- "loss": 0.0,
702
- "step": 49500
703
- },
704
- {
705
- "epoch": 5.179736869367036,
706
- "grad_norm": 6.992227667979023e-07,
707
- "learning_rate": 5.2007518018084694e-06,
708
- "loss": 0.0,
709
- "step": 50000
710
- },
711
- {
712
- "epoch": 5.231534238060706,
713
- "grad_norm": 2.332795929760323e-06,
714
- "learning_rate": 5.052759319826553e-06,
715
- "loss": 0.0,
716
- "step": 50500
717
- },
718
- {
719
- "epoch": 5.283331606754377,
720
- "grad_norm": 5.32125454810739e-07,
721
- "learning_rate": 4.9047668378446374e-06,
722
- "loss": 0.0,
723
- "step": 51000
724
- }
725
- ],
726
- "logging_steps": 500,
727
- "max_steps": 67571,
728
- "num_input_tokens_seen": 0,
729
- "num_train_epochs": 7,
730
- "save_steps": 500,
731
- "stateful_callbacks": {
732
- "TrainerControl": {
733
- "args": {
734
- "should_epoch_stop": false,
735
- "should_evaluate": false,
736
- "should_log": false,
737
- "should_save": true,
738
- "should_training_stop": false
739
- },
740
- "attributes": {}
741
- }
742
- },
743
- "total_flos": 1.4080058755834675e+17,
744
- "train_batch_size": 4,
745
- "trial_name": null,
746
- "trial_params": null
747
- }