dq158 commited on
Commit
8bf9c65
1 Parent(s): f2e1e60

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.34.1"
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.35.2"
6
  }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54d9a221b97b4a103c3bbd0fd8abf8231538258d0486fa04d26296da0b8c69c4
3
- size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:853fdc84d3fd43924101f3b600fbd9f7fc43eb8341b64b2118c78dce07ce9bc5
3
+ size 1256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c9b188ef411c860fa162f3665ea4a77b4e06ff07ab727a5f17c10e78d9237ed
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bfea11a2e02465bb531791987f71426cd35b56e7ebc758216b34e89a76ca829
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29fd4cd191139dd97c05600899e5a66a54004c5898470ac779c283abb76b7689
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2492f9b5d907d6544a13d4d0a642eb64eb32b5925504787877c14c24ec71c83
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,1644 +1,248 @@
1
  {
2
- "best_metric": 2.8255138397216797,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-127555",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 127555,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.02,
13
- "learning_rate": 8e-05,
14
- "loss": 4.3147,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.04,
19
- "learning_rate": 7.9999915639379e-05,
20
- "loss": 3.7437,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.06,
25
- "learning_rate": 7.999966255787188e-05,
26
- "loss": 3.5193,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.08,
31
- "learning_rate": 7.99992407565461e-05,
32
- "loss": 3.4996,
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 0.1,
37
- "learning_rate": 7.999865023718086e-05,
38
- "loss": 3.4639,
39
  "step": 2500
40
  },
41
  {
42
- "epoch": 0.12,
43
- "learning_rate": 7.999789100226699e-05,
44
- "loss": 3.4482,
45
  "step": 3000
46
  },
47
  {
48
- "epoch": 0.14,
49
- "learning_rate": 7.999696305500694e-05,
50
- "loss": 3.3228,
51
  "step": 3500
52
  },
53
  {
54
- "epoch": 0.16,
55
- "learning_rate": 7.999586639931485e-05,
56
- "loss": 3.3901,
57
  "step": 4000
58
  },
59
  {
60
- "epoch": 0.18,
61
- "learning_rate": 7.999460103981642e-05,
62
- "loss": 3.2202,
63
  "step": 4500
64
  },
65
  {
66
- "epoch": 0.2,
67
- "learning_rate": 7.999316698184901e-05,
68
- "loss": 3.2508,
69
  "step": 5000
70
  },
71
  {
72
- "epoch": 0.22,
73
- "learning_rate": 7.99915642314615e-05,
74
- "loss": 3.2316,
75
  "step": 5500
76
  },
77
  {
78
- "epoch": 0.24,
79
- "learning_rate": 7.998979279541433e-05,
80
- "loss": 3.2365,
81
  "step": 6000
82
  },
83
  {
84
- "epoch": 0.25,
85
- "learning_rate": 7.99878526811795e-05,
86
- "loss": 3.2453,
87
  "step": 6500
88
  },
89
  {
90
- "epoch": 0.27,
91
- "learning_rate": 7.998574389694044e-05,
92
- "loss": 3.2715,
93
  "step": 7000
94
  },
95
  {
96
- "epoch": 0.29,
97
- "learning_rate": 7.99834664515921e-05,
98
- "loss": 3.3127,
99
  "step": 7500
100
  },
101
  {
102
- "epoch": 0.31,
103
- "learning_rate": 7.99810203547408e-05,
104
- "loss": 3.2683,
105
  "step": 8000
106
  },
107
  {
108
- "epoch": 0.33,
109
- "learning_rate": 7.997840561670425e-05,
110
- "loss": 3.2322,
111
  "step": 8500
112
  },
113
  {
114
- "epoch": 0.35,
115
- "learning_rate": 7.997562224851148e-05,
116
- "loss": 3.146,
117
  "step": 9000
118
  },
119
  {
120
- "epoch": 0.37,
121
- "learning_rate": 7.997267026190286e-05,
122
- "loss": 3.1452,
123
  "step": 9500
124
  },
125
  {
126
- "epoch": 0.39,
127
- "learning_rate": 7.996954966932996e-05,
128
- "loss": 3.2489,
129
  "step": 10000
130
  },
131
  {
132
- "epoch": 0.41,
133
- "learning_rate": 7.996626048395549e-05,
134
- "loss": 3.2486,
135
  "step": 10500
136
  },
137
  {
138
- "epoch": 0.43,
139
- "learning_rate": 7.996280271965338e-05,
140
- "loss": 3.2388,
141
  "step": 11000
142
  },
143
  {
144
- "epoch": 0.45,
145
- "learning_rate": 7.995917639100858e-05,
146
- "loss": 3.1955,
147
  "step": 11500
148
  },
149
  {
150
- "epoch": 0.47,
151
- "learning_rate": 7.995538151331703e-05,
152
- "loss": 3.2209,
153
  "step": 12000
154
  },
155
  {
156
- "epoch": 0.49,
157
- "learning_rate": 7.995141810258568e-05,
158
- "loss": 3.1386,
159
  "step": 12500
160
  },
161
  {
162
- "epoch": 0.51,
163
- "learning_rate": 7.994728617553229e-05,
164
- "loss": 3.1551,
165
  "step": 13000
166
  },
167
  {
168
- "epoch": 0.53,
169
- "learning_rate": 7.994298574958548e-05,
170
- "loss": 3.1919,
171
  "step": 13500
172
  },
173
  {
174
- "epoch": 0.55,
175
- "learning_rate": 7.993851684288455e-05,
176
- "loss": 3.1474,
177
  "step": 14000
178
  },
179
  {
180
- "epoch": 0.57,
181
- "learning_rate": 7.993387947427952e-05,
182
- "loss": 3.1897,
183
  "step": 14500
184
  },
185
  {
186
- "epoch": 0.59,
187
- "learning_rate": 7.992907366333094e-05,
188
- "loss": 3.1343,
189
  "step": 15000
190
  },
191
  {
192
- "epoch": 0.61,
193
- "learning_rate": 7.992409943030987e-05,
194
- "loss": 3.1988,
195
  "step": 15500
196
  },
197
  {
198
- "epoch": 0.63,
199
- "learning_rate": 7.991895679619778e-05,
200
- "loss": 3.0978,
201
  "step": 16000
202
  },
203
  {
204
- "epoch": 0.65,
205
- "learning_rate": 7.991364578268645e-05,
206
- "loss": 3.1587,
207
  "step": 16500
208
  },
209
- {
210
- "epoch": 0.67,
211
- "learning_rate": 7.990816641217791e-05,
212
- "loss": 3.1617,
213
- "step": 17000
214
- },
215
- {
216
- "epoch": 0.69,
217
- "learning_rate": 7.990251870778432e-05,
218
- "loss": 3.0655,
219
- "step": 17500
220
- },
221
- {
222
- "epoch": 0.71,
223
- "learning_rate": 7.989670269332786e-05,
224
- "loss": 3.066,
225
- "step": 18000
226
- },
227
- {
228
- "epoch": 0.73,
229
- "learning_rate": 7.989071839334067e-05,
230
- "loss": 3.1541,
231
- "step": 18500
232
- },
233
- {
234
- "epoch": 0.74,
235
- "learning_rate": 7.988456583306473e-05,
236
- "loss": 3.1307,
237
- "step": 19000
238
- },
239
- {
240
- "epoch": 0.76,
241
- "learning_rate": 7.987824503845168e-05,
242
- "loss": 3.0477,
243
- "step": 19500
244
- },
245
- {
246
- "epoch": 0.78,
247
- "learning_rate": 7.987175603616287e-05,
248
- "loss": 3.0709,
249
- "step": 20000
250
- },
251
- {
252
- "epoch": 0.8,
253
- "learning_rate": 7.98650988535691e-05,
254
- "loss": 3.1225,
255
- "step": 20500
256
- },
257
- {
258
- "epoch": 0.82,
259
- "learning_rate": 7.985827351875058e-05,
260
- "loss": 3.1071,
261
- "step": 21000
262
- },
263
- {
264
- "epoch": 0.84,
265
- "learning_rate": 7.985128006049677e-05,
266
- "loss": 3.0508,
267
- "step": 21500
268
- },
269
- {
270
- "epoch": 0.86,
271
- "learning_rate": 7.984411850830631e-05,
272
- "loss": 3.133,
273
- "step": 22000
274
- },
275
- {
276
- "epoch": 0.88,
277
- "learning_rate": 7.983678889238684e-05,
278
- "loss": 3.0661,
279
- "step": 22500
280
- },
281
- {
282
- "epoch": 0.9,
283
- "learning_rate": 7.98292912436549e-05,
284
- "loss": 3.1163,
285
- "step": 23000
286
- },
287
- {
288
- "epoch": 0.92,
289
- "learning_rate": 7.982162559373581e-05,
290
- "loss": 3.1544,
291
- "step": 23500
292
- },
293
- {
294
- "epoch": 0.94,
295
- "learning_rate": 7.981379197496355e-05,
296
- "loss": 3.0375,
297
- "step": 24000
298
- },
299
  {
300
  "epoch": 0.96,
301
- "learning_rate": 7.980579042038053e-05,
302
- "loss": 3.0113,
303
- "step": 24500
304
  },
305
  {
306
  "epoch": 0.98,
307
- "learning_rate": 7.979762096373755e-05,
308
- "loss": 3.092,
309
- "step": 25000
310
- },
311
- {
312
- "epoch": 1.0,
313
- "learning_rate": 7.978928363949367e-05,
314
- "loss": 3.129,
315
- "step": 25500
316
  },
317
  {
318
  "epoch": 1.0,
319
  "eval_bleu": 1.0,
320
  "eval_brevity_penalty": 1.0,
321
  "eval_length_ratio": 1.0,
322
- "eval_loss": 2.9649720191955566,
323
- "eval_precisions": [
324
- 1.0,
325
- 1.0,
326
- 1.0,
327
- 1.0
328
- ],
329
- "eval_reference_length": 5805056,
330
- "eval_runtime": 9759.3615,
331
- "eval_samples_per_second": 1.162,
332
- "eval_steps_per_second": 0.29,
333
- "eval_translation_length": 5805056,
334
- "step": 25511
335
- },
336
- {
337
- "epoch": 1.02,
338
- "learning_rate": 7.978077848281594e-05,
339
- "loss": 3.066,
340
- "step": 26000
341
- },
342
- {
343
- "epoch": 1.04,
344
- "learning_rate": 7.977210552957941e-05,
345
- "loss": 3.0837,
346
- "step": 26500
347
- },
348
- {
349
- "epoch": 1.06,
350
- "learning_rate": 7.976326481636685e-05,
351
- "loss": 3.0351,
352
- "step": 27000
353
- },
354
- {
355
- "epoch": 1.08,
356
- "learning_rate": 7.975425638046865e-05,
357
- "loss": 3.0893,
358
- "step": 27500
359
- },
360
- {
361
- "epoch": 1.1,
362
- "learning_rate": 7.974508025988271e-05,
363
- "loss": 3.1108,
364
- "step": 28000
365
- },
366
- {
367
- "epoch": 1.12,
368
- "learning_rate": 7.973573649331417e-05,
369
- "loss": 3.092,
370
- "step": 28500
371
- },
372
- {
373
- "epoch": 1.14,
374
- "learning_rate": 7.972622512017531e-05,
375
- "loss": 3.0614,
376
- "step": 29000
377
- },
378
- {
379
- "epoch": 1.16,
380
- "learning_rate": 7.971654618058541e-05,
381
- "loss": 3.0665,
382
- "step": 29500
383
- },
384
- {
385
- "epoch": 1.18,
386
- "learning_rate": 7.970669971537055e-05,
387
- "loss": 3.0267,
388
- "step": 30000
389
- },
390
- {
391
- "epoch": 1.2,
392
- "learning_rate": 7.969668576606342e-05,
393
- "loss": 3.0357,
394
- "step": 30500
395
- },
396
- {
397
- "epoch": 1.22,
398
- "learning_rate": 7.968650437490315e-05,
399
- "loss": 3.0802,
400
- "step": 31000
401
- },
402
- {
403
- "epoch": 1.23,
404
- "learning_rate": 7.96761555848352e-05,
405
- "loss": 3.02,
406
- "step": 31500
407
- },
408
- {
409
- "epoch": 1.25,
410
- "learning_rate": 7.966563943951106e-05,
411
- "loss": 3.019,
412
- "step": 32000
413
- },
414
- {
415
- "epoch": 1.27,
416
- "learning_rate": 7.965495598328816e-05,
417
- "loss": 2.963,
418
- "step": 32500
419
- },
420
- {
421
- "epoch": 1.29,
422
- "learning_rate": 7.964410526122968e-05,
423
- "loss": 3.0504,
424
- "step": 33000
425
- },
426
- {
427
- "epoch": 1.31,
428
- "learning_rate": 7.963308731910425e-05,
429
- "loss": 3.0028,
430
- "step": 33500
431
- },
432
- {
433
- "epoch": 1.33,
434
- "learning_rate": 7.962190220338592e-05,
435
- "loss": 3.1032,
436
- "step": 34000
437
- },
438
- {
439
- "epoch": 1.35,
440
- "learning_rate": 7.961054996125386e-05,
441
- "loss": 3.1041,
442
- "step": 34500
443
- },
444
- {
445
- "epoch": 1.37,
446
- "learning_rate": 7.959903064059219e-05,
447
- "loss": 3.0636,
448
- "step": 35000
449
- },
450
- {
451
- "epoch": 1.39,
452
- "learning_rate": 7.958734428998974e-05,
453
- "loss": 3.0328,
454
- "step": 35500
455
- },
456
- {
457
- "epoch": 1.41,
458
- "learning_rate": 7.95754909587399e-05,
459
- "loss": 3.119,
460
- "step": 36000
461
- },
462
- {
463
- "epoch": 1.43,
464
- "learning_rate": 7.95634706968404e-05,
465
- "loss": 3.0262,
466
- "step": 36500
467
- },
468
- {
469
- "epoch": 1.45,
470
- "learning_rate": 7.955128355499307e-05,
471
- "loss": 2.9855,
472
- "step": 37000
473
- },
474
- {
475
- "epoch": 1.47,
476
- "learning_rate": 7.953892958460368e-05,
477
- "loss": 2.961,
478
- "step": 37500
479
- },
480
- {
481
- "epoch": 1.49,
482
- "learning_rate": 7.95264088377816e-05,
483
- "loss": 3.0219,
484
- "step": 38000
485
- },
486
- {
487
- "epoch": 1.51,
488
- "learning_rate": 7.95137213673398e-05,
489
- "loss": 3.0122,
490
- "step": 38500
491
- },
492
- {
493
- "epoch": 1.53,
494
- "learning_rate": 7.950086722679437e-05,
495
- "loss": 3.0509,
496
- "step": 39000
497
- },
498
- {
499
- "epoch": 1.55,
500
- "learning_rate": 7.948784647036451e-05,
501
- "loss": 3.0243,
502
- "step": 39500
503
- },
504
- {
505
- "epoch": 1.57,
506
- "learning_rate": 7.947465915297216e-05,
507
- "loss": 3.0097,
508
- "step": 40000
509
- },
510
- {
511
- "epoch": 1.59,
512
- "learning_rate": 7.946130533024182e-05,
513
- "loss": 3.0304,
514
- "step": 40500
515
- },
516
- {
517
- "epoch": 1.61,
518
- "learning_rate": 7.944778505850034e-05,
519
- "loss": 3.0242,
520
- "step": 41000
521
- },
522
- {
523
- "epoch": 1.63,
524
- "learning_rate": 7.943409839477667e-05,
525
- "loss": 3.0768,
526
- "step": 41500
527
- },
528
- {
529
- "epoch": 1.65,
530
- "learning_rate": 7.942024539680155e-05,
531
- "loss": 3.002,
532
- "step": 42000
533
- },
534
- {
535
- "epoch": 1.67,
536
- "learning_rate": 7.940622612300737e-05,
537
- "loss": 3.051,
538
- "step": 42500
539
- },
540
- {
541
- "epoch": 1.69,
542
- "learning_rate": 7.939204063252785e-05,
543
- "loss": 2.9671,
544
- "step": 43000
545
- },
546
- {
547
- "epoch": 1.71,
548
- "learning_rate": 7.937768898519784e-05,
549
- "loss": 3.0447,
550
- "step": 43500
551
- },
552
- {
553
- "epoch": 1.72,
554
- "learning_rate": 7.936317124155305e-05,
555
- "loss": 3.0225,
556
- "step": 44000
557
- },
558
- {
559
- "epoch": 1.74,
560
- "learning_rate": 7.934848746282973e-05,
561
- "loss": 3.0529,
562
- "step": 44500
563
- },
564
- {
565
- "epoch": 1.76,
566
- "learning_rate": 7.933363771096455e-05,
567
- "loss": 2.9976,
568
- "step": 45000
569
- },
570
- {
571
- "epoch": 1.78,
572
- "learning_rate": 7.931862204859424e-05,
573
- "loss": 3.0628,
574
- "step": 45500
575
- },
576
- {
577
- "epoch": 1.8,
578
- "learning_rate": 7.930344053905528e-05,
579
- "loss": 3.052,
580
- "step": 46000
581
- },
582
- {
583
- "epoch": 1.82,
584
- "learning_rate": 7.928809324638376e-05,
585
- "loss": 3.0504,
586
- "step": 46500
587
- },
588
- {
589
- "epoch": 1.84,
590
- "learning_rate": 7.927258023531509e-05,
591
- "loss": 2.9963,
592
- "step": 47000
593
- },
594
- {
595
- "epoch": 1.86,
596
- "learning_rate": 7.925690157128356e-05,
597
- "loss": 2.977,
598
- "step": 47500
599
- },
600
- {
601
- "epoch": 1.88,
602
- "learning_rate": 7.924105732042232e-05,
603
- "loss": 2.9631,
604
- "step": 48000
605
- },
606
- {
607
- "epoch": 1.9,
608
- "learning_rate": 7.922504754956285e-05,
609
- "loss": 3.0761,
610
- "step": 48500
611
- },
612
- {
613
- "epoch": 1.92,
614
- "learning_rate": 7.920887232623493e-05,
615
- "loss": 3.0048,
616
- "step": 49000
617
- },
618
- {
619
- "epoch": 1.94,
620
- "learning_rate": 7.919253171866611e-05,
621
- "loss": 3.0274,
622
- "step": 49500
623
- },
624
- {
625
- "epoch": 1.96,
626
- "learning_rate": 7.91760257957816e-05,
627
- "loss": 3.0597,
628
- "step": 50000
629
- },
630
- {
631
- "epoch": 1.98,
632
- "learning_rate": 7.915935462720387e-05,
633
- "loss": 3.036,
634
- "step": 50500
635
- },
636
- {
637
- "epoch": 2.0,
638
- "learning_rate": 7.914251828325245e-05,
639
- "loss": 3.0542,
640
- "step": 51000
641
- },
642
- {
643
- "epoch": 2.0,
644
- "eval_bleu": 1.0,
645
- "eval_brevity_penalty": 1.0,
646
- "eval_length_ratio": 1.0,
647
- "eval_loss": 2.9141533374786377,
648
- "eval_precisions": [
649
- 1.0,
650
- 1.0,
651
- 1.0,
652
- 1.0
653
- ],
654
- "eval_reference_length": 5805056,
655
- "eval_runtime": 9841.5587,
656
- "eval_samples_per_second": 1.152,
657
- "eval_steps_per_second": 0.288,
658
- "eval_translation_length": 5805056,
659
- "step": 51022
660
- },
661
- {
662
- "epoch": 2.02,
663
- "learning_rate": 7.912551683494356e-05,
664
- "loss": 2.9429,
665
- "step": 51500
666
- },
667
- {
668
- "epoch": 2.04,
669
- "learning_rate": 7.910835035398984e-05,
670
- "loss": 2.9148,
671
- "step": 52000
672
- },
673
- {
674
- "epoch": 2.06,
675
- "learning_rate": 7.909101891280002e-05,
676
- "loss": 3.0458,
677
- "step": 52500
678
- },
679
- {
680
- "epoch": 2.08,
681
- "learning_rate": 7.907352258447868e-05,
682
- "loss": 2.9764,
683
- "step": 53000
684
- },
685
- {
686
- "epoch": 2.1,
687
- "learning_rate": 7.905586144282585e-05,
688
- "loss": 2.9702,
689
- "step": 53500
690
- },
691
- {
692
- "epoch": 2.12,
693
- "learning_rate": 7.90380355623368e-05,
694
- "loss": 3.0079,
695
- "step": 54000
696
- },
697
- {
698
- "epoch": 2.14,
699
- "learning_rate": 7.902004501820162e-05,
700
- "loss": 3.0268,
701
- "step": 54500
702
- },
703
- {
704
- "epoch": 2.16,
705
- "learning_rate": 7.900188988630502e-05,
706
- "loss": 2.9168,
707
- "step": 55000
708
- },
709
- {
710
- "epoch": 2.18,
711
- "learning_rate": 7.898357024322588e-05,
712
- "loss": 2.9747,
713
- "step": 55500
714
- },
715
- {
716
- "epoch": 2.2,
717
- "learning_rate": 7.896508616623704e-05,
718
- "loss": 3.0031,
719
- "step": 56000
720
- },
721
- {
722
- "epoch": 2.21,
723
- "learning_rate": 7.89464377333049e-05,
724
- "loss": 3.0554,
725
- "step": 56500
726
- },
727
- {
728
- "epoch": 2.23,
729
- "learning_rate": 7.892762502308914e-05,
730
- "loss": 3.0112,
731
- "step": 57000
732
- },
733
- {
734
- "epoch": 2.25,
735
- "learning_rate": 7.890864811494233e-05,
736
- "loss": 2.9569,
737
- "step": 57500
738
- },
739
- {
740
- "epoch": 2.27,
741
- "learning_rate": 7.888950708890969e-05,
742
- "loss": 2.9121,
743
- "step": 58000
744
- },
745
- {
746
- "epoch": 2.29,
747
- "learning_rate": 7.887020202572866e-05,
748
- "loss": 3.0086,
749
- "step": 58500
750
- },
751
- {
752
- "epoch": 2.31,
753
- "learning_rate": 7.885073300682857e-05,
754
- "loss": 2.9117,
755
- "step": 59000
756
- },
757
- {
758
- "epoch": 2.33,
759
- "learning_rate": 7.883110011433036e-05,
760
- "loss": 2.9263,
761
- "step": 59500
762
- },
763
- {
764
- "epoch": 2.35,
765
- "learning_rate": 7.881130343104617e-05,
766
- "loss": 2.9659,
767
- "step": 60000
768
- },
769
- {
770
- "epoch": 2.37,
771
- "learning_rate": 7.879134304047906e-05,
772
- "loss": 3.055,
773
- "step": 60500
774
- },
775
- {
776
- "epoch": 2.39,
777
- "learning_rate": 7.877121902682255e-05,
778
- "loss": 2.9047,
779
- "step": 61000
780
- },
781
- {
782
- "epoch": 2.41,
783
- "learning_rate": 7.875093147496036e-05,
784
- "loss": 2.9133,
785
- "step": 61500
786
- },
787
- {
788
- "epoch": 2.43,
789
- "learning_rate": 7.873048047046599e-05,
790
- "loss": 2.9905,
791
- "step": 62000
792
- },
793
- {
794
- "epoch": 2.45,
795
- "learning_rate": 7.870986609960245e-05,
796
- "loss": 3.0002,
797
- "step": 62500
798
- },
799
- {
800
- "epoch": 2.47,
801
- "learning_rate": 7.868908844932178e-05,
802
- "loss": 2.9581,
803
- "step": 63000
804
- },
805
- {
806
- "epoch": 2.49,
807
- "learning_rate": 7.866814760726474e-05,
808
- "loss": 2.9722,
809
- "step": 63500
810
- },
811
- {
812
- "epoch": 2.51,
813
- "learning_rate": 7.864704366176047e-05,
814
- "loss": 2.9978,
815
- "step": 64000
816
- },
817
- {
818
- "epoch": 2.53,
819
- "learning_rate": 7.862577670182607e-05,
820
- "loss": 2.9933,
821
- "step": 64500
822
- },
823
- {
824
- "epoch": 2.55,
825
- "learning_rate": 7.860434681716623e-05,
826
- "loss": 2.9421,
827
- "step": 65000
828
- },
829
- {
830
- "epoch": 2.57,
831
- "learning_rate": 7.858275409817285e-05,
832
- "loss": 3.0076,
833
- "step": 65500
834
- },
835
- {
836
- "epoch": 2.59,
837
- "learning_rate": 7.856099863592473e-05,
838
- "loss": 2.9756,
839
- "step": 66000
840
- },
841
- {
842
- "epoch": 2.61,
843
- "learning_rate": 7.853908052218706e-05,
844
- "loss": 2.9747,
845
- "step": 66500
846
- },
847
- {
848
- "epoch": 2.63,
849
- "learning_rate": 7.851699984941113e-05,
850
- "loss": 2.9841,
851
- "step": 67000
852
- },
853
- {
854
- "epoch": 2.65,
855
- "learning_rate": 7.849475671073389e-05,
856
- "loss": 3.0131,
857
- "step": 67500
858
- },
859
- {
860
- "epoch": 2.67,
861
- "learning_rate": 7.847235119997762e-05,
862
- "loss": 2.9802,
863
- "step": 68000
864
- },
865
- {
866
- "epoch": 2.69,
867
- "learning_rate": 7.844978341164943e-05,
868
- "loss": 2.9494,
869
- "step": 68500
870
- },
871
- {
872
- "epoch": 2.7,
873
- "learning_rate": 7.842705344094096e-05,
874
- "loss": 2.9844,
875
- "step": 69000
876
- },
877
- {
878
- "epoch": 2.72,
879
- "learning_rate": 7.840416138372793e-05,
880
- "loss": 2.9849,
881
- "step": 69500
882
- },
883
- {
884
- "epoch": 2.74,
885
- "learning_rate": 7.838110733656976e-05,
886
- "loss": 2.9394,
887
- "step": 70000
888
- },
889
- {
890
- "epoch": 2.76,
891
- "learning_rate": 7.835789139670913e-05,
892
- "loss": 3.0505,
893
- "step": 70500
894
- },
895
- {
896
- "epoch": 2.78,
897
- "learning_rate": 7.833451366207159e-05,
898
- "loss": 2.9532,
899
- "step": 71000
900
- },
901
- {
902
- "epoch": 2.8,
903
- "learning_rate": 7.831097423126516e-05,
904
- "loss": 3.0089,
905
- "step": 71500
906
- },
907
- {
908
- "epoch": 2.82,
909
- "learning_rate": 7.828727320357988e-05,
910
- "loss": 2.938,
911
- "step": 72000
912
- },
913
- {
914
- "epoch": 2.84,
915
- "learning_rate": 7.826341067898745e-05,
916
- "loss": 3.0084,
917
- "step": 72500
918
- },
919
- {
920
- "epoch": 2.86,
921
- "learning_rate": 7.823938675814069e-05,
922
- "loss": 2.9872,
923
- "step": 73000
924
- },
925
- {
926
- "epoch": 2.88,
927
- "learning_rate": 7.821520154237328e-05,
928
- "loss": 3.0044,
929
- "step": 73500
930
- },
931
- {
932
- "epoch": 2.9,
933
- "learning_rate": 7.819085513369918e-05,
934
- "loss": 2.9529,
935
- "step": 74000
936
- },
937
- {
938
- "epoch": 2.92,
939
- "learning_rate": 7.816634763481234e-05,
940
- "loss": 2.9624,
941
- "step": 74500
942
- },
943
- {
944
- "epoch": 2.94,
945
- "learning_rate": 7.814167914908614e-05,
946
- "loss": 2.932,
947
- "step": 75000
948
- },
949
- {
950
- "epoch": 2.96,
951
- "learning_rate": 7.8116849780573e-05,
952
- "loss": 2.9968,
953
- "step": 75500
954
- },
955
- {
956
- "epoch": 2.98,
957
- "learning_rate": 7.809185963400396e-05,
958
- "loss": 2.9481,
959
- "step": 76000
960
- },
961
- {
962
- "epoch": 3.0,
963
- "learning_rate": 7.806670881478827e-05,
964
- "loss": 2.9894,
965
- "step": 76500
966
- },
967
- {
968
- "epoch": 3.0,
969
- "eval_bleu": 1.0,
970
- "eval_brevity_penalty": 1.0,
971
- "eval_length_ratio": 1.0,
972
- "eval_loss": 2.871685028076172,
973
- "eval_precisions": [
974
- 1.0,
975
- 1.0,
976
- 1.0,
977
- 1.0
978
- ],
979
- "eval_reference_length": 5805056,
980
- "eval_runtime": 9748.7325,
981
- "eval_samples_per_second": 1.163,
982
- "eval_steps_per_second": 0.291,
983
- "eval_translation_length": 5805056,
984
- "step": 76533
985
- },
986
- {
987
- "epoch": 3.02,
988
- "learning_rate": 7.804139742901283e-05,
989
- "loss": 2.9583,
990
- "step": 77000
991
- },
992
- {
993
- "epoch": 3.04,
994
- "learning_rate": 7.801592558344189e-05,
995
- "loss": 2.8663,
996
- "step": 77500
997
- },
998
- {
999
- "epoch": 3.06,
1000
- "learning_rate": 7.799029338551645e-05,
1001
- "loss": 2.9258,
1002
- "step": 78000
1003
- },
1004
- {
1005
- "epoch": 3.08,
1006
- "learning_rate": 7.796450094335392e-05,
1007
- "loss": 2.9957,
1008
- "step": 78500
1009
- },
1010
- {
1011
- "epoch": 3.1,
1012
- "learning_rate": 7.793854836574765e-05,
1013
- "loss": 2.8852,
1014
- "step": 79000
1015
- },
1016
- {
1017
- "epoch": 3.12,
1018
- "learning_rate": 7.791243576216639e-05,
1019
- "loss": 2.8922,
1020
- "step": 79500
1021
- },
1022
- {
1023
- "epoch": 3.14,
1024
- "learning_rate": 7.788616324275393e-05,
1025
- "loss": 2.9122,
1026
- "step": 80000
1027
- },
1028
- {
1029
- "epoch": 3.16,
1030
- "learning_rate": 7.785973091832855e-05,
1031
- "loss": 2.9864,
1032
- "step": 80500
1033
- },
1034
- {
1035
- "epoch": 3.18,
1036
- "learning_rate": 7.783313890038264e-05,
1037
- "loss": 2.9979,
1038
- "step": 81000
1039
- },
1040
- {
1041
- "epoch": 3.19,
1042
- "learning_rate": 7.780638730108215e-05,
1043
- "loss": 2.9145,
1044
- "step": 81500
1045
- },
1046
- {
1047
- "epoch": 3.21,
1048
- "learning_rate": 7.777947623326615e-05,
1049
- "loss": 2.9814,
1050
- "step": 82000
1051
- },
1052
- {
1053
- "epoch": 3.23,
1054
- "learning_rate": 7.775240581044637e-05,
1055
- "loss": 2.8943,
1056
- "step": 82500
1057
- },
1058
- {
1059
- "epoch": 3.25,
1060
- "learning_rate": 7.772517614680669e-05,
1061
- "loss": 2.8884,
1062
- "step": 83000
1063
- },
1064
- {
1065
- "epoch": 3.27,
1066
- "learning_rate": 7.769778735720267e-05,
1067
- "loss": 3.0106,
1068
- "step": 83500
1069
- },
1070
- {
1071
- "epoch": 3.29,
1072
- "learning_rate": 7.767023955716108e-05,
1073
- "loss": 2.8724,
1074
- "step": 84000
1075
- },
1076
- {
1077
- "epoch": 3.31,
1078
- "learning_rate": 7.764253286287938e-05,
1079
- "loss": 2.98,
1080
- "step": 84500
1081
- },
1082
- {
1083
- "epoch": 3.33,
1084
- "learning_rate": 7.76146673912253e-05,
1085
- "loss": 3.014,
1086
- "step": 85000
1087
- },
1088
- {
1089
- "epoch": 3.35,
1090
- "learning_rate": 7.758664325973623e-05,
1091
- "loss": 2.951,
1092
- "step": 85500
1093
- },
1094
- {
1095
- "epoch": 3.37,
1096
- "learning_rate": 7.755846058661887e-05,
1097
- "loss": 2.8924,
1098
- "step": 86000
1099
- },
1100
- {
1101
- "epoch": 3.39,
1102
- "learning_rate": 7.753011949074857e-05,
1103
- "loss": 2.911,
1104
- "step": 86500
1105
- },
1106
- {
1107
- "epoch": 3.41,
1108
- "learning_rate": 7.750162009166897e-05,
1109
- "loss": 2.9266,
1110
- "step": 87000
1111
- },
1112
- {
1113
- "epoch": 3.43,
1114
- "learning_rate": 7.74729625095914e-05,
1115
- "loss": 2.9427,
1116
- "step": 87500
1117
- },
1118
- {
1119
- "epoch": 3.45,
1120
- "learning_rate": 7.744414686539447e-05,
1121
- "loss": 2.9499,
1122
- "step": 88000
1123
- },
1124
- {
1125
- "epoch": 3.47,
1126
- "learning_rate": 7.741517328062343e-05,
1127
- "loss": 2.9378,
1128
- "step": 88500
1129
- },
1130
- {
1131
- "epoch": 3.49,
1132
- "learning_rate": 7.738604187748976e-05,
1133
- "loss": 2.9196,
1134
- "step": 89000
1135
- },
1136
- {
1137
- "epoch": 3.51,
1138
- "learning_rate": 7.735675277887063e-05,
1139
- "loss": 2.9497,
1140
- "step": 89500
1141
- },
1142
- {
1143
- "epoch": 3.53,
1144
- "learning_rate": 7.732730610830839e-05,
1145
- "loss": 2.9255,
1146
- "step": 90000
1147
- },
1148
- {
1149
- "epoch": 3.55,
1150
- "learning_rate": 7.729770199000999e-05,
1151
- "loss": 2.8952,
1152
- "step": 90500
1153
- },
1154
- {
1155
- "epoch": 3.57,
1156
- "learning_rate": 7.72679405488465e-05,
1157
- "loss": 2.8923,
1158
- "step": 91000
1159
- },
1160
- {
1161
- "epoch": 3.59,
1162
- "learning_rate": 7.723802191035264e-05,
1163
- "loss": 2.9121,
1164
- "step": 91500
1165
- },
1166
- {
1167
- "epoch": 3.61,
1168
- "learning_rate": 7.720794620072614e-05,
1169
- "loss": 2.9961,
1170
- "step": 92000
1171
- },
1172
- {
1173
- "epoch": 3.63,
1174
- "learning_rate": 7.717771354682728e-05,
1175
- "loss": 2.8947,
1176
- "step": 92500
1177
- },
1178
- {
1179
- "epoch": 3.65,
1180
- "learning_rate": 7.714732407617831e-05,
1181
- "loss": 2.9992,
1182
- "step": 93000
1183
- },
1184
- {
1185
- "epoch": 3.67,
1186
- "learning_rate": 7.7116777916963e-05,
1187
- "loss": 2.9619,
1188
- "step": 93500
1189
- },
1190
- {
1191
- "epoch": 3.68,
1192
- "learning_rate": 7.708607519802597e-05,
1193
- "loss": 2.9621,
1194
- "step": 94000
1195
- },
1196
- {
1197
- "epoch": 3.7,
1198
- "learning_rate": 7.705521604887226e-05,
1199
- "loss": 3.008,
1200
- "step": 94500
1201
- },
1202
- {
1203
- "epoch": 3.72,
1204
- "learning_rate": 7.702420059966669e-05,
1205
- "loss": 2.957,
1206
- "step": 95000
1207
- },
1208
- {
1209
- "epoch": 3.74,
1210
- "learning_rate": 7.699302898123343e-05,
1211
- "loss": 2.8856,
1212
- "step": 95500
1213
- },
1214
- {
1215
- "epoch": 3.76,
1216
- "learning_rate": 7.696170132505529e-05,
1217
- "loss": 2.9774,
1218
- "step": 96000
1219
- },
1220
- {
1221
- "epoch": 3.78,
1222
- "learning_rate": 7.693021776327334e-05,
1223
- "loss": 2.8744,
1224
- "step": 96500
1225
- },
1226
- {
1227
- "epoch": 3.8,
1228
- "learning_rate": 7.689857842868618e-05,
1229
- "loss": 2.9059,
1230
- "step": 97000
1231
- },
1232
- {
1233
- "epoch": 3.82,
1234
- "learning_rate": 7.686678345474953e-05,
1235
- "loss": 2.9568,
1236
- "step": 97500
1237
- },
1238
- {
1239
- "epoch": 3.84,
1240
- "learning_rate": 7.683483297557558e-05,
1241
- "loss": 2.9424,
1242
- "step": 98000
1243
- },
1244
- {
1245
- "epoch": 3.86,
1246
- "learning_rate": 7.680272712593242e-05,
1247
- "loss": 2.9074,
1248
- "step": 98500
1249
- },
1250
- {
1251
- "epoch": 3.88,
1252
- "learning_rate": 7.677046604124356e-05,
1253
- "loss": 2.9393,
1254
- "step": 99000
1255
- },
1256
- {
1257
- "epoch": 3.9,
1258
- "learning_rate": 7.673804985758723e-05,
1259
- "loss": 2.9479,
1260
- "step": 99500
1261
- },
1262
- {
1263
- "epoch": 3.92,
1264
- "learning_rate": 7.670547871169587e-05,
1265
- "loss": 2.8662,
1266
- "step": 100000
1267
- },
1268
- {
1269
- "epoch": 3.94,
1270
- "learning_rate": 7.667275274095564e-05,
1271
- "loss": 2.8926,
1272
- "step": 100500
1273
- },
1274
- {
1275
- "epoch": 3.96,
1276
- "learning_rate": 7.663987208340569e-05,
1277
- "loss": 2.8352,
1278
- "step": 101000
1279
- },
1280
- {
1281
- "epoch": 3.98,
1282
- "learning_rate": 7.660683687773763e-05,
1283
- "loss": 2.9091,
1284
- "step": 101500
1285
- },
1286
- {
1287
- "epoch": 4.0,
1288
- "learning_rate": 7.657364726329499e-05,
1289
- "loss": 2.9203,
1290
- "step": 102000
1291
- },
1292
- {
1293
- "epoch": 4.0,
1294
- "eval_bleu": 1.0,
1295
- "eval_brevity_penalty": 1.0,
1296
- "eval_length_ratio": 1.0,
1297
- "eval_loss": 2.8476955890655518,
1298
- "eval_precisions": [
1299
- 1.0,
1300
- 1.0,
1301
- 1.0,
1302
- 1.0
1303
- ],
1304
- "eval_reference_length": 5805056,
1305
- "eval_runtime": 9732.5788,
1306
- "eval_samples_per_second": 1.165,
1307
- "eval_steps_per_second": 0.291,
1308
- "eval_translation_length": 5805056,
1309
- "step": 102044
1310
- },
1311
- {
1312
- "epoch": 4.02,
1313
- "learning_rate": 7.65403033800726e-05,
1314
- "loss": 2.9547,
1315
- "step": 102500
1316
- },
1317
- {
1318
- "epoch": 4.04,
1319
- "learning_rate": 7.6506805368716e-05,
1320
- "loss": 2.8596,
1321
- "step": 103000
1322
- },
1323
- {
1324
- "epoch": 4.06,
1325
- "learning_rate": 7.647315337052083e-05,
1326
- "loss": 2.9155,
1327
- "step": 103500
1328
- },
1329
- {
1330
- "epoch": 4.08,
1331
- "learning_rate": 7.643934752743228e-05,
1332
- "loss": 2.8802,
1333
- "step": 104000
1334
- },
1335
- {
1336
- "epoch": 4.1,
1337
- "learning_rate": 7.640538798204443e-05,
1338
- "loss": 2.9248,
1339
- "step": 104500
1340
- },
1341
- {
1342
- "epoch": 4.12,
1343
- "learning_rate": 7.63712748775997e-05,
1344
- "loss": 2.8865,
1345
- "step": 105000
1346
- },
1347
- {
1348
- "epoch": 4.14,
1349
- "learning_rate": 7.633700835798824e-05,
1350
- "loss": 2.9383,
1351
- "step": 105500
1352
- },
1353
- {
1354
- "epoch": 4.16,
1355
- "learning_rate": 7.630258856774726e-05,
1356
- "loss": 2.8581,
1357
- "step": 106000
1358
- },
1359
- {
1360
- "epoch": 4.17,
1361
- "learning_rate": 7.626801565206054e-05,
1362
- "loss": 2.9359,
1363
- "step": 106500
1364
- },
1365
- {
1366
- "epoch": 4.19,
1367
- "learning_rate": 7.623328975675768e-05,
1368
- "loss": 2.9459,
1369
- "step": 107000
1370
- },
1371
- {
1372
- "epoch": 4.21,
1373
- "learning_rate": 7.619841102831362e-05,
1374
- "loss": 2.9888,
1375
- "step": 107500
1376
- },
1377
- {
1378
- "epoch": 4.23,
1379
- "learning_rate": 7.616337961384787e-05,
1380
- "loss": 2.8815,
1381
- "step": 108000
1382
- },
1383
- {
1384
- "epoch": 4.25,
1385
- "learning_rate": 7.612819566112408e-05,
1386
- "loss": 2.9105,
1387
- "step": 108500
1388
- },
1389
- {
1390
- "epoch": 4.27,
1391
- "learning_rate": 7.609285931854922e-05,
1392
- "loss": 2.9028,
1393
- "step": 109000
1394
- },
1395
- {
1396
- "epoch": 4.29,
1397
- "learning_rate": 7.605737073517308e-05,
1398
- "loss": 2.9141,
1399
- "step": 109500
1400
- },
1401
- {
1402
- "epoch": 4.31,
1403
- "learning_rate": 7.602173006068763e-05,
1404
- "loss": 2.9244,
1405
- "step": 110000
1406
- },
1407
- {
1408
- "epoch": 4.33,
1409
- "learning_rate": 7.598593744542632e-05,
1410
- "loss": 2.8534,
1411
- "step": 110500
1412
- },
1413
- {
1414
- "epoch": 4.35,
1415
- "learning_rate": 7.594999304036352e-05,
1416
- "loss": 2.8936,
1417
- "step": 111000
1418
- },
1419
- {
1420
- "epoch": 4.37,
1421
- "learning_rate": 7.591389699711384e-05,
1422
- "loss": 2.8866,
1423
- "step": 111500
1424
- },
1425
- {
1426
- "epoch": 4.39,
1427
- "learning_rate": 7.587764946793153e-05,
1428
- "loss": 2.9335,
1429
- "step": 112000
1430
- },
1431
- {
1432
- "epoch": 4.41,
1433
- "learning_rate": 7.584125060570976e-05,
1434
- "loss": 2.7982,
1435
- "step": 112500
1436
- },
1437
- {
1438
- "epoch": 4.43,
1439
- "learning_rate": 7.58047005639801e-05,
1440
- "loss": 2.8565,
1441
- "step": 113000
1442
- },
1443
- {
1444
- "epoch": 4.45,
1445
- "learning_rate": 7.576799949691174e-05,
1446
- "loss": 2.8682,
1447
- "step": 113500
1448
- },
1449
- {
1450
- "epoch": 4.47,
1451
- "learning_rate": 7.573114755931093e-05,
1452
- "loss": 2.887,
1453
- "step": 114000
1454
- },
1455
- {
1456
- "epoch": 4.49,
1457
- "learning_rate": 7.569414490662027e-05,
1458
- "loss": 2.9001,
1459
- "step": 114500
1460
- },
1461
- {
1462
- "epoch": 4.51,
1463
- "learning_rate": 7.565699169491811e-05,
1464
- "loss": 2.8804,
1465
- "step": 115000
1466
- },
1467
- {
1468
- "epoch": 4.53,
1469
- "learning_rate": 7.561968808091784e-05,
1470
- "loss": 2.9395,
1471
- "step": 115500
1472
- },
1473
- {
1474
- "epoch": 4.55,
1475
- "learning_rate": 7.558223422196728e-05,
1476
- "loss": 2.922,
1477
- "step": 116000
1478
- },
1479
- {
1480
- "epoch": 4.57,
1481
- "learning_rate": 7.554463027604798e-05,
1482
- "loss": 2.8777,
1483
- "step": 116500
1484
- },
1485
- {
1486
- "epoch": 4.59,
1487
- "learning_rate": 7.550687640177451e-05,
1488
- "loss": 2.8746,
1489
- "step": 117000
1490
- },
1491
- {
1492
- "epoch": 4.61,
1493
- "learning_rate": 7.54689727583939e-05,
1494
- "loss": 2.9362,
1495
- "step": 117500
1496
- },
1497
- {
1498
- "epoch": 4.63,
1499
- "learning_rate": 7.543091950578492e-05,
1500
- "loss": 2.8282,
1501
- "step": 118000
1502
- },
1503
- {
1504
- "epoch": 4.65,
1505
- "learning_rate": 7.539271680445734e-05,
1506
- "loss": 2.8902,
1507
- "step": 118500
1508
- },
1509
- {
1510
- "epoch": 4.66,
1511
- "learning_rate": 7.535436481555135e-05,
1512
- "loss": 2.9123,
1513
- "step": 119000
1514
- },
1515
- {
1516
- "epoch": 4.68,
1517
- "learning_rate": 7.531586370083684e-05,
1518
- "loss": 2.847,
1519
- "step": 119500
1520
- },
1521
- {
1522
- "epoch": 4.7,
1523
- "learning_rate": 7.527721362271269e-05,
1524
- "loss": 2.8664,
1525
- "step": 120000
1526
- },
1527
- {
1528
- "epoch": 4.72,
1529
- "learning_rate": 7.523841474420614e-05,
1530
- "loss": 2.9341,
1531
- "step": 120500
1532
- },
1533
- {
1534
- "epoch": 4.74,
1535
- "learning_rate": 7.519946722897209e-05,
1536
- "loss": 2.8942,
1537
- "step": 121000
1538
- },
1539
- {
1540
- "epoch": 4.76,
1541
- "learning_rate": 7.516037124129231e-05,
1542
- "loss": 2.9092,
1543
- "step": 121500
1544
- },
1545
- {
1546
- "epoch": 4.78,
1547
- "learning_rate": 7.512112694607494e-05,
1548
- "loss": 2.8891,
1549
- "step": 122000
1550
- },
1551
- {
1552
- "epoch": 4.8,
1553
- "learning_rate": 7.508173450885361e-05,
1554
- "loss": 2.9581,
1555
- "step": 122500
1556
- },
1557
- {
1558
- "epoch": 4.82,
1559
- "learning_rate": 7.504219409578685e-05,
1560
- "loss": 3.0015,
1561
- "step": 123000
1562
- },
1563
- {
1564
- "epoch": 4.84,
1565
- "learning_rate": 7.500250587365735e-05,
1566
- "loss": 2.9472,
1567
- "step": 123500
1568
- },
1569
- {
1570
- "epoch": 4.86,
1571
- "learning_rate": 7.496267000987127e-05,
1572
- "loss": 2.8975,
1573
- "step": 124000
1574
- },
1575
- {
1576
- "epoch": 4.88,
1577
- "learning_rate": 7.49226866724575e-05,
1578
- "loss": 2.8226,
1579
- "step": 124500
1580
- },
1581
- {
1582
- "epoch": 4.9,
1583
- "learning_rate": 7.488255603006703e-05,
1584
- "loss": 2.8738,
1585
- "step": 125000
1586
- },
1587
- {
1588
- "epoch": 4.92,
1589
- "learning_rate": 7.484227825197214e-05,
1590
- "loss": 2.9323,
1591
- "step": 125500
1592
- },
1593
- {
1594
- "epoch": 4.94,
1595
- "learning_rate": 7.480185350806574e-05,
1596
- "loss": 2.9436,
1597
- "step": 126000
1598
- },
1599
- {
1600
- "epoch": 4.96,
1601
- "learning_rate": 7.476128196886067e-05,
1602
- "loss": 2.8861,
1603
- "step": 126500
1604
- },
1605
- {
1606
- "epoch": 4.98,
1607
- "learning_rate": 7.472056380548893e-05,
1608
- "loss": 2.9255,
1609
- "step": 127000
1610
- },
1611
- {
1612
- "epoch": 5.0,
1613
- "learning_rate": 7.467969918970099e-05,
1614
- "loss": 2.8591,
1615
- "step": 127500
1616
- },
1617
- {
1618
- "epoch": 5.0,
1619
- "eval_bleu": 1.0,
1620
- "eval_brevity_penalty": 1.0,
1621
- "eval_length_ratio": 1.0,
1622
- "eval_loss": 2.8255138397216797,
1623
  "eval_precisions": [
1624
  1.0,
1625
  1.0,
1626
  1.0,
1627
  1.0
1628
  ],
1629
- "eval_reference_length": 5805056,
1630
- "eval_runtime": 10189.5043,
1631
- "eval_samples_per_second": 1.113,
1632
- "eval_steps_per_second": 0.278,
1633
- "eval_translation_length": 5805056,
1634
- "step": 127555
1635
  }
1636
  ],
1637
  "logging_steps": 500,
1638
- "max_steps": 765330,
1639
  "num_train_epochs": 30,
1640
  "save_steps": 1000,
1641
- "total_flos": 4.378268597873541e+18,
1642
  "trial_name": null,
1643
  "trial_params": null
1644
  }
 
1
  {
2
+ "best_metric": 3.0692174434661865,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-17782",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 17782,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "learning_rate": 6e-05,
14
+ "loss": 3.2033,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.06,
19
+ "learning_rate": 5.9999869700884375e-05,
20
+ "loss": 3.1925,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.08,
25
+ "learning_rate": 5.999947880466937e-05,
26
+ "loss": 3.1596,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 0.11,
31
+ "learning_rate": 5.999882731475053e-05,
32
+ "loss": 3.2032,
33
  "step": 2000
34
  },
35
  {
36
+ "epoch": 0.14,
37
+ "learning_rate": 5.99979152367871e-05,
38
+ "loss": 3.1379,
39
  "step": 2500
40
  },
41
  {
42
+ "epoch": 0.17,
43
+ "learning_rate": 5.999674257870195e-05,
44
+ "loss": 3.1821,
45
  "step": 3000
46
  },
47
  {
48
+ "epoch": 0.2,
49
+ "learning_rate": 5.9995309350681496e-05,
50
+ "loss": 3.165,
51
  "step": 3500
52
  },
53
  {
54
+ "epoch": 0.22,
55
+ "learning_rate": 5.9993615565175614e-05,
56
+ "loss": 3.1682,
57
  "step": 4000
58
  },
59
  {
60
+ "epoch": 0.25,
61
+ "learning_rate": 5.999166123689758e-05,
62
+ "loss": 3.1234,
63
  "step": 4500
64
  },
65
  {
66
+ "epoch": 0.28,
67
+ "learning_rate": 5.9989446382823863e-05,
68
+ "loss": 3.1389,
69
  "step": 5000
70
  },
71
  {
72
+ "epoch": 0.31,
73
+ "learning_rate": 5.9986971022194026e-05,
74
+ "loss": 3.1197,
75
  "step": 5500
76
  },
77
  {
78
+ "epoch": 0.34,
79
+ "learning_rate": 5.998423517651056e-05,
80
+ "loss": 3.2251,
81
  "step": 6000
82
  },
83
  {
84
+ "epoch": 0.37,
85
+ "learning_rate": 5.998123886953869e-05,
86
+ "loss": 3.2011,
87
  "step": 6500
88
  },
89
  {
90
+ "epoch": 0.39,
91
+ "learning_rate": 5.9977982127306157e-05,
92
+ "loss": 3.1573,
93
  "step": 7000
94
  },
95
  {
96
+ "epoch": 0.42,
97
+ "learning_rate": 5.9974464978103e-05,
98
+ "loss": 3.1049,
99
  "step": 7500
100
  },
101
  {
102
+ "epoch": 0.45,
103
+ "learning_rate": 5.997068745248132e-05,
104
+ "loss": 3.1919,
105
  "step": 8000
106
  },
107
  {
108
+ "epoch": 0.48,
109
+ "learning_rate": 5.996664958325499e-05,
110
+ "loss": 3.1192,
111
  "step": 8500
112
  },
113
  {
114
+ "epoch": 0.51,
115
+ "learning_rate": 5.99623514054994e-05,
116
+ "loss": 3.1393,
117
  "step": 9000
118
  },
119
  {
120
+ "epoch": 0.53,
121
+ "learning_rate": 5.995779295655114e-05,
122
+ "loss": 3.1306,
123
  "step": 9500
124
  },
125
  {
126
+ "epoch": 0.56,
127
+ "learning_rate": 5.995297427600766e-05,
128
+ "loss": 3.1719,
129
  "step": 10000
130
  },
131
  {
132
+ "epoch": 0.59,
133
+ "learning_rate": 5.994789540572695e-05,
134
+ "loss": 3.1197,
135
  "step": 10500
136
  },
137
  {
138
+ "epoch": 0.62,
139
+ "learning_rate": 5.994255638982716e-05,
140
+ "loss": 3.1231,
141
  "step": 11000
142
  },
143
  {
144
+ "epoch": 0.65,
145
+ "learning_rate": 5.9936957274686233e-05,
146
+ "loss": 3.1585,
147
  "step": 11500
148
  },
149
  {
150
+ "epoch": 0.67,
151
+ "learning_rate": 5.9931098108941496e-05,
152
+ "loss": 3.2459,
153
  "step": 12000
154
  },
155
  {
156
+ "epoch": 0.7,
157
+ "learning_rate": 5.9924978943489196e-05,
158
+ "loss": 3.1119,
159
  "step": 12500
160
  },
161
  {
162
+ "epoch": 0.73,
163
+ "learning_rate": 5.991859983148415e-05,
164
+ "loss": 3.1499,
165
  "step": 13000
166
  },
167
  {
168
+ "epoch": 0.76,
169
+ "learning_rate": 5.9911960828339176e-05,
170
+ "loss": 3.2634,
171
  "step": 13500
172
  },
173
  {
174
+ "epoch": 0.79,
175
+ "learning_rate": 5.9905061991724704e-05,
176
+ "loss": 3.1048,
177
  "step": 14000
178
  },
179
  {
180
+ "epoch": 0.82,
181
+ "learning_rate": 5.9897903381568234e-05,
182
+ "loss": 3.1717,
183
  "step": 14500
184
  },
185
  {
186
+ "epoch": 0.84,
187
+ "learning_rate": 5.989048506005378e-05,
188
+ "loss": 3.317,
189
  "step": 15000
190
  },
191
  {
192
+ "epoch": 0.87,
193
+ "learning_rate": 5.98828070916214e-05,
194
+ "loss": 3.1535,
195
  "step": 15500
196
  },
197
  {
198
+ "epoch": 0.9,
199
+ "learning_rate": 5.9874869542966605e-05,
200
+ "loss": 3.0698,
201
  "step": 16000
202
  },
203
  {
204
+ "epoch": 0.93,
205
+ "learning_rate": 5.986667248303975e-05,
206
+ "loss": 3.1798,
207
  "step": 16500
208
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  {
210
  "epoch": 0.96,
211
+ "learning_rate": 5.985821598304549e-05,
212
+ "loss": 3.0956,
213
+ "step": 17000
214
  },
215
  {
216
  "epoch": 0.98,
217
+ "learning_rate": 5.984950011644212e-05,
218
+ "loss": 3.1267,
219
+ "step": 17500
 
 
 
 
 
 
220
  },
221
  {
222
  "epoch": 1.0,
223
  "eval_bleu": 1.0,
224
  "eval_brevity_penalty": 1.0,
225
  "eval_length_ratio": 1.0,
226
+ "eval_loss": 3.0692174434661865,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  "eval_precisions": [
228
  1.0,
229
  1.0,
230
  1.0,
231
  1.0
232
  ],
233
+ "eval_reference_length": 2023424,
234
+ "eval_runtime": 3477.4176,
235
+ "eval_samples_per_second": 1.136,
236
+ "eval_steps_per_second": 0.568,
237
+ "eval_translation_length": 2023424,
238
+ "step": 17782
239
  }
240
  ],
241
  "logging_steps": 500,
242
+ "max_steps": 533460,
243
  "num_train_epochs": 30,
244
  "save_steps": 1000,
245
+ "total_flos": 3.0517701759310234e+17,
246
  "trial_name": null,
247
  "trial_params": null
248
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b50f7546568912cb3d5e056bc8c92501ce9dcdde35db94c9f3bca630112c5c4
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d95e82f3766efe05da841f4af2236c57541bc9acc301ad4339595e386ca971
3
  size 4728