HealthTeam commited on
Commit
3b118fe
1 Parent(s): b16bd8b

End of training

Browse files
last-checkpoint/generation_config.json → generation_config.json RENAMED
File without changes
last-checkpoint/added_tokens.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "translate Arabic to English:": 250101,
3
- "translate Arabic to Thai:": 250100,
4
- "translate Thai to Arabic:": 250102,
5
- "translate Thai to English:": 250103
6
- }
 
 
 
 
 
 
 
last-checkpoint/config.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "_name_or_path": "google/mt5-small",
3
- "architectures": [
4
- "MT5ForConditionalGeneration"
5
- ],
6
- "d_ff": 1024,
7
- "d_kv": 64,
8
- "d_model": 512,
9
- "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.1,
12
- "eos_token_id": 1,
13
- "feed_forward_proj": "gated-gelu",
14
- "initializer_factor": 1.0,
15
- "is_encoder_decoder": true,
16
- "is_gated_act": true,
17
- "layer_norm_epsilon": 1e-06,
18
- "max_length": 300,
19
- "model_type": "mt5",
20
- "num_decoder_layers": 8,
21
- "num_heads": 6,
22
- "num_layers": 8,
23
- "pad_token_id": 0,
24
- "relative_attention_max_distance": 128,
25
- "relative_attention_num_buckets": 32,
26
- "tie_word_embeddings": false,
27
- "tokenizer_class": "T5Tokenizer",
28
- "torch_dtype": "float32",
29
- "transformers_version": "4.26.0",
30
- "use_cache": true,
31
- "vocab_size": 250104
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cedde736261632f89acf33553d7f1b796fa4666797a76600bc7414afba87e83
3
- size 2401461637
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c830ba03e87ef89c167a65183788768bf905dc7f4c689fe476c351a7fc9b93b
3
- size 1200739717
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd594782425d26032039a6c2b147e5861095f1958bd2b0f4a2e42679340bff32
3
- size 14575
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9febce275791009bb39a3242ecdd95ff1e50bc7a7886ff99da486505feed461
3
- size 627
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "translate Arabic to Thai:",
4
- "translate Arabic to English:",
5
- "translate Thai to Arabic:",
6
- "translate Thai to English:"
7
- ],
8
- "eos_token": "</s>",
9
- "pad_token": "<pad>",
10
- "unk_token": "<unk>"
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
- size 4309802
 
 
 
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "additional_special_tokens": null,
3
- "eos_token": "</s>",
4
- "extra_ids": 0,
5
- "model_max_length": 1000000000000000019884624838656,
6
- "name_or_path": "google/mt5-small",
7
- "pad_token": "<pad>",
8
- "sp_model_kwargs": {},
9
- "special_tokens_map_file": "/root/.cache/huggingface/hub/models--google--mt5-small/snapshots/38f23af8ec210eb6c376d40e9c56bd25a80f195d/special_tokens_map.json",
10
- "tokenizer_class": "T5Tokenizer",
11
- "unk_token": "<unk>"
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,2416 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.9583172175775787,
5
- "global_step": 198864,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 1.9950413059216727e-05,
13
- "loss": 14.0627,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.01,
18
- "learning_rate": 1.9900826118433453e-05,
19
- "loss": 6.3799,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.02,
24
- "learning_rate": 1.9851239177650176e-05,
25
- "loss": 5.31,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.03,
30
- "learning_rate": 1.9801652236866898e-05,
31
- "loss": 4.9781,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.04,
36
- "learning_rate": 1.9752065296083624e-05,
37
- "loss": 4.7392,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.04,
42
- "learning_rate": 1.970247835530035e-05,
43
- "loss": 4.5779,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.05,
48
- "learning_rate": 1.9652891414517075e-05,
49
- "loss": 4.4691,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.06,
54
- "learning_rate": 1.96033044737338e-05,
55
- "loss": 4.3745,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 0.07,
60
- "learning_rate": 1.9553717532950524e-05,
61
- "loss": 4.2883,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 0.07,
66
- "learning_rate": 1.9504130592167246e-05,
67
- "loss": 4.2342,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.08,
72
- "learning_rate": 1.9454543651383972e-05,
73
- "loss": 4.1614,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 0.09,
78
- "learning_rate": 1.9404956710600698e-05,
79
- "loss": 4.1279,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 0.1,
84
- "learning_rate": 1.9355369769817423e-05,
85
- "loss": 4.0802,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 0.1,
90
- "learning_rate": 1.930578282903415e-05,
91
- "loss": 4.0298,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 0.11,
96
- "learning_rate": 1.925619588825087e-05,
97
- "loss": 3.9697,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 0.12,
102
- "learning_rate": 1.9206608947467594e-05,
103
- "loss": 3.9584,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 0.13,
108
- "learning_rate": 1.915702200668432e-05,
109
- "loss": 3.9196,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 0.13,
114
- "learning_rate": 1.9107435065901046e-05,
115
- "loss": 3.9081,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 0.14,
120
- "learning_rate": 1.905784812511777e-05,
121
- "loss": 3.8419,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 0.15,
126
- "learning_rate": 1.9008261184334497e-05,
127
- "loss": 3.8363,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 0.16,
132
- "learning_rate": 1.895867424355122e-05,
133
- "loss": 3.8047,
134
- "step": 10500
135
- },
136
- {
137
- "epoch": 0.16,
138
- "learning_rate": 1.8909087302767945e-05,
139
- "loss": 3.7728,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 0.17,
144
- "learning_rate": 1.8859500361984668e-05,
145
- "loss": 3.7731,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 0.18,
150
- "learning_rate": 1.8809913421201393e-05,
151
- "loss": 3.7408,
152
- "step": 12000
153
- },
154
- {
155
- "epoch": 0.19,
156
- "learning_rate": 1.876032648041812e-05,
157
- "loss": 3.7027,
158
- "step": 12500
159
- },
160
- {
161
- "epoch": 0.19,
162
- "learning_rate": 1.8710739539634845e-05,
163
- "loss": 3.6865,
164
- "step": 13000
165
- },
166
- {
167
- "epoch": 0.2,
168
- "learning_rate": 1.8661152598851567e-05,
169
- "loss": 3.6456,
170
- "step": 13500
171
- },
172
- {
173
- "epoch": 0.21,
174
- "learning_rate": 1.8611565658068293e-05,
175
- "loss": 3.6539,
176
- "step": 14000
177
- },
178
- {
179
- "epoch": 0.22,
180
- "learning_rate": 1.8561978717285016e-05,
181
- "loss": 3.6222,
182
- "step": 14500
183
- },
184
- {
185
- "epoch": 0.22,
186
- "learning_rate": 1.851239177650174e-05,
187
- "loss": 3.6127,
188
- "step": 15000
189
- },
190
- {
191
- "epoch": 0.23,
192
- "learning_rate": 1.8462804835718467e-05,
193
- "loss": 3.6133,
194
- "step": 15500
195
- },
196
- {
197
- "epoch": 0.24,
198
- "learning_rate": 1.8413217894935193e-05,
199
- "loss": 3.5863,
200
- "step": 16000
201
- },
202
- {
203
- "epoch": 0.25,
204
- "learning_rate": 1.8363630954151915e-05,
205
- "loss": 3.5669,
206
- "step": 16500
207
- },
208
- {
209
- "epoch": 0.25,
210
- "learning_rate": 1.831404401336864e-05,
211
- "loss": 3.5518,
212
- "step": 17000
213
- },
214
- {
215
- "epoch": 0.26,
216
- "learning_rate": 1.8264457072585367e-05,
217
- "loss": 3.5368,
218
- "step": 17500
219
- },
220
- {
221
- "epoch": 0.27,
222
- "learning_rate": 1.821487013180209e-05,
223
- "loss": 3.5294,
224
- "step": 18000
225
- },
226
- {
227
- "epoch": 0.28,
228
- "learning_rate": 1.8165283191018815e-05,
229
- "loss": 3.5097,
230
- "step": 18500
231
- },
232
- {
233
- "epoch": 0.28,
234
- "learning_rate": 1.811569625023554e-05,
235
- "loss": 3.5198,
236
- "step": 19000
237
- },
238
- {
239
- "epoch": 0.29,
240
- "learning_rate": 1.8066109309452263e-05,
241
- "loss": 3.4702,
242
- "step": 19500
243
- },
244
- {
245
- "epoch": 0.3,
246
- "learning_rate": 1.801652236866899e-05,
247
- "loss": 3.485,
248
- "step": 20000
249
- },
250
- {
251
- "epoch": 0.3,
252
- "learning_rate": 1.7966935427885715e-05,
253
- "loss": 3.4853,
254
- "step": 20500
255
- },
256
- {
257
- "epoch": 0.31,
258
- "learning_rate": 1.7917348487102437e-05,
259
- "loss": 3.4395,
260
- "step": 21000
261
- },
262
- {
263
- "epoch": 0.32,
264
- "learning_rate": 1.7867761546319163e-05,
265
- "loss": 3.4515,
266
- "step": 21500
267
- },
268
- {
269
- "epoch": 0.33,
270
- "learning_rate": 1.781817460553589e-05,
271
- "loss": 3.4307,
272
- "step": 22000
273
- },
274
- {
275
- "epoch": 0.33,
276
- "learning_rate": 1.776858766475261e-05,
277
- "loss": 3.4343,
278
- "step": 22500
279
- },
280
- {
281
- "epoch": 0.34,
282
- "learning_rate": 1.7719000723969337e-05,
283
- "loss": 3.4053,
284
- "step": 23000
285
- },
286
- {
287
- "epoch": 0.35,
288
- "learning_rate": 1.7669413783186063e-05,
289
- "loss": 3.4008,
290
- "step": 23500
291
- },
292
- {
293
- "epoch": 0.36,
294
- "learning_rate": 1.7619826842402785e-05,
295
- "loss": 3.3951,
296
- "step": 24000
297
- },
298
- {
299
- "epoch": 0.36,
300
- "learning_rate": 1.757023990161951e-05,
301
- "loss": 3.3871,
302
- "step": 24500
303
- },
304
- {
305
- "epoch": 0.37,
306
- "learning_rate": 1.7520652960836234e-05,
307
- "loss": 3.3822,
308
- "step": 25000
309
- },
310
- {
311
- "epoch": 0.38,
312
- "learning_rate": 1.747106602005296e-05,
313
- "loss": 3.3816,
314
- "step": 25500
315
- },
316
- {
317
- "epoch": 0.39,
318
- "learning_rate": 1.7421479079269685e-05,
319
- "loss": 3.3759,
320
- "step": 26000
321
- },
322
- {
323
- "epoch": 0.39,
324
- "learning_rate": 1.737189213848641e-05,
325
- "loss": 3.3624,
326
- "step": 26500
327
- },
328
- {
329
- "epoch": 0.4,
330
- "learning_rate": 1.7322305197703137e-05,
331
- "loss": 3.3535,
332
- "step": 27000
333
- },
334
- {
335
- "epoch": 0.41,
336
- "learning_rate": 1.727271825691986e-05,
337
- "loss": 3.3366,
338
- "step": 27500
339
- },
340
- {
341
- "epoch": 0.42,
342
- "learning_rate": 1.722313131613658e-05,
343
- "loss": 3.3245,
344
- "step": 28000
345
- },
346
- {
347
- "epoch": 0.42,
348
- "learning_rate": 1.7173544375353307e-05,
349
- "loss": 3.3575,
350
- "step": 28500
351
- },
352
- {
353
- "epoch": 0.43,
354
- "learning_rate": 1.7123957434570033e-05,
355
- "loss": 3.3133,
356
- "step": 29000
357
- },
358
- {
359
- "epoch": 0.44,
360
- "learning_rate": 1.707437049378676e-05,
361
- "loss": 3.3124,
362
- "step": 29500
363
- },
364
- {
365
- "epoch": 0.45,
366
- "learning_rate": 1.7024783553003485e-05,
367
- "loss": 3.3295,
368
- "step": 30000
369
- },
370
- {
371
- "epoch": 0.45,
372
- "learning_rate": 1.6975196612220207e-05,
373
- "loss": 3.3192,
374
- "step": 30500
375
- },
376
- {
377
- "epoch": 0.46,
378
- "learning_rate": 1.692560967143693e-05,
379
- "loss": 3.3241,
380
- "step": 31000
381
- },
382
- {
383
- "epoch": 0.47,
384
- "learning_rate": 1.6876022730653655e-05,
385
- "loss": 3.2989,
386
- "step": 31500
387
- },
388
- {
389
- "epoch": 0.48,
390
- "learning_rate": 1.682643578987038e-05,
391
- "loss": 3.2956,
392
- "step": 32000
393
- },
394
- {
395
- "epoch": 0.48,
396
- "learning_rate": 1.6776848849087107e-05,
397
- "loss": 3.2889,
398
- "step": 32500
399
- },
400
- {
401
- "epoch": 0.49,
402
- "learning_rate": 1.6727261908303833e-05,
403
- "loss": 3.2934,
404
- "step": 33000
405
- },
406
- {
407
- "epoch": 0.5,
408
- "learning_rate": 1.6677674967520555e-05,
409
- "loss": 3.2642,
410
- "step": 33500
411
- },
412
- {
413
- "epoch": 0.51,
414
- "learning_rate": 1.6628088026737277e-05,
415
- "loss": 3.2513,
416
- "step": 34000
417
- },
418
- {
419
- "epoch": 0.51,
420
- "learning_rate": 1.6578501085954003e-05,
421
- "loss": 3.2584,
422
- "step": 34500
423
- },
424
- {
425
- "epoch": 0.52,
426
- "learning_rate": 1.652891414517073e-05,
427
- "loss": 3.2576,
428
- "step": 35000
429
- },
430
- {
431
- "epoch": 0.53,
432
- "learning_rate": 1.6479327204387455e-05,
433
- "loss": 3.2532,
434
- "step": 35500
435
- },
436
- {
437
- "epoch": 0.54,
438
- "learning_rate": 1.642974026360418e-05,
439
- "loss": 3.2349,
440
- "step": 36000
441
- },
442
- {
443
- "epoch": 0.54,
444
- "learning_rate": 1.6380153322820903e-05,
445
- "loss": 3.2349,
446
- "step": 36500
447
- },
448
- {
449
- "epoch": 0.55,
450
- "learning_rate": 1.6330566382037625e-05,
451
- "loss": 3.2158,
452
- "step": 37000
453
- },
454
- {
455
- "epoch": 0.56,
456
- "learning_rate": 1.628097944125435e-05,
457
- "loss": 3.2309,
458
- "step": 37500
459
- },
460
- {
461
- "epoch": 0.57,
462
- "learning_rate": 1.6231392500471077e-05,
463
- "loss": 3.2227,
464
- "step": 38000
465
- },
466
- {
467
- "epoch": 0.57,
468
- "learning_rate": 1.6181805559687803e-05,
469
- "loss": 3.2134,
470
- "step": 38500
471
- },
472
- {
473
- "epoch": 0.58,
474
- "learning_rate": 1.613221861890453e-05,
475
- "loss": 3.2206,
476
- "step": 39000
477
- },
478
- {
479
- "epoch": 0.59,
480
- "learning_rate": 1.608263167812125e-05,
481
- "loss": 3.2002,
482
- "step": 39500
483
- },
484
- {
485
- "epoch": 0.6,
486
- "learning_rate": 1.6033044737337973e-05,
487
- "loss": 3.1988,
488
- "step": 40000
489
- },
490
- {
491
- "epoch": 0.6,
492
- "learning_rate": 1.59834577965547e-05,
493
- "loss": 3.2081,
494
- "step": 40500
495
- },
496
- {
497
- "epoch": 0.61,
498
- "learning_rate": 1.5933870855771425e-05,
499
- "loss": 3.1891,
500
- "step": 41000
501
- },
502
- {
503
- "epoch": 0.62,
504
- "learning_rate": 1.588428391498815e-05,
505
- "loss": 3.2007,
506
- "step": 41500
507
- },
508
- {
509
- "epoch": 0.62,
510
- "learning_rate": 1.5834696974204877e-05,
511
- "loss": 3.1948,
512
- "step": 42000
513
- },
514
- {
515
- "epoch": 0.63,
516
- "learning_rate": 1.57851100334216e-05,
517
- "loss": 3.1673,
518
- "step": 42500
519
- },
520
- {
521
- "epoch": 0.64,
522
- "learning_rate": 1.5735523092638325e-05,
523
- "loss": 3.158,
524
- "step": 43000
525
- },
526
- {
527
- "epoch": 0.65,
528
- "learning_rate": 1.5685936151855047e-05,
529
- "loss": 3.1561,
530
- "step": 43500
531
- },
532
- {
533
- "epoch": 0.65,
534
- "learning_rate": 1.5636349211071773e-05,
535
- "loss": 3.1734,
536
- "step": 44000
537
- },
538
- {
539
- "epoch": 0.66,
540
- "learning_rate": 1.55867622702885e-05,
541
- "loss": 3.1401,
542
- "step": 44500
543
- },
544
- {
545
- "epoch": 0.67,
546
- "learning_rate": 1.5537175329505225e-05,
547
- "loss": 3.1463,
548
- "step": 45000
549
- },
550
- {
551
- "epoch": 0.68,
552
- "learning_rate": 1.5487588388721947e-05,
553
- "loss": 3.1431,
554
- "step": 45500
555
- },
556
- {
557
- "epoch": 0.68,
558
- "learning_rate": 1.5438001447938673e-05,
559
- "loss": 3.1316,
560
- "step": 46000
561
- },
562
- {
563
- "epoch": 0.69,
564
- "learning_rate": 1.5388414507155395e-05,
565
- "loss": 3.1606,
566
- "step": 46500
567
- },
568
- {
569
- "epoch": 0.7,
570
- "learning_rate": 1.533882756637212e-05,
571
- "loss": 3.1362,
572
- "step": 47000
573
- },
574
- {
575
- "epoch": 0.71,
576
- "learning_rate": 1.5289240625588847e-05,
577
- "loss": 3.1335,
578
- "step": 47500
579
- },
580
- {
581
- "epoch": 0.71,
582
- "learning_rate": 1.523965368480557e-05,
583
- "loss": 3.149,
584
- "step": 48000
585
- },
586
- {
587
- "epoch": 0.72,
588
- "learning_rate": 1.5190066744022297e-05,
589
- "loss": 3.1293,
590
- "step": 48500
591
- },
592
- {
593
- "epoch": 0.73,
594
- "learning_rate": 1.514047980323902e-05,
595
- "loss": 3.1286,
596
- "step": 49000
597
- },
598
- {
599
- "epoch": 0.74,
600
- "learning_rate": 1.5090892862455743e-05,
601
- "loss": 3.1196,
602
- "step": 49500
603
- },
604
- {
605
- "epoch": 0.74,
606
- "learning_rate": 1.5041305921672469e-05,
607
- "loss": 3.1238,
608
- "step": 50000
609
- },
610
- {
611
- "epoch": 0.75,
612
- "learning_rate": 1.4991718980889195e-05,
613
- "loss": 3.1033,
614
- "step": 50500
615
- },
616
- {
617
- "epoch": 0.76,
618
- "learning_rate": 1.4942132040105919e-05,
619
- "loss": 3.1112,
620
- "step": 51000
621
- },
622
- {
623
- "epoch": 0.77,
624
- "learning_rate": 1.4892545099322645e-05,
625
- "loss": 3.0936,
626
- "step": 51500
627
- },
628
- {
629
- "epoch": 0.77,
630
- "learning_rate": 1.4842958158539369e-05,
631
- "loss": 3.107,
632
- "step": 52000
633
- },
634
- {
635
- "epoch": 0.78,
636
- "learning_rate": 1.4793371217756094e-05,
637
- "loss": 3.1063,
638
- "step": 52500
639
- },
640
- {
641
- "epoch": 0.79,
642
- "learning_rate": 1.4743784276972817e-05,
643
- "loss": 3.0639,
644
- "step": 53000
645
- },
646
- {
647
- "epoch": 0.8,
648
- "learning_rate": 1.4694197336189543e-05,
649
- "loss": 3.1028,
650
- "step": 53500
651
- },
652
- {
653
- "epoch": 0.8,
654
- "learning_rate": 1.4644610395406267e-05,
655
- "loss": 3.0821,
656
- "step": 54000
657
- },
658
- {
659
- "epoch": 0.81,
660
- "learning_rate": 1.4595023454622992e-05,
661
- "loss": 3.0596,
662
- "step": 54500
663
- },
664
- {
665
- "epoch": 0.82,
666
- "learning_rate": 1.4545436513839717e-05,
667
- "loss": 3.0787,
668
- "step": 55000
669
- },
670
- {
671
- "epoch": 0.83,
672
- "learning_rate": 1.4495849573056442e-05,
673
- "loss": 3.0755,
674
- "step": 55500
675
- },
676
- {
677
- "epoch": 0.83,
678
- "learning_rate": 1.4446262632273165e-05,
679
- "loss": 3.066,
680
- "step": 56000
681
- },
682
- {
683
- "epoch": 0.84,
684
- "learning_rate": 1.439667569148989e-05,
685
- "loss": 3.0695,
686
- "step": 56500
687
- },
688
- {
689
- "epoch": 0.85,
690
- "learning_rate": 1.4347088750706615e-05,
691
- "loss": 3.059,
692
- "step": 57000
693
- },
694
- {
695
- "epoch": 0.86,
696
- "learning_rate": 1.429750180992334e-05,
697
- "loss": 3.0628,
698
- "step": 57500
699
- },
700
- {
701
- "epoch": 0.86,
702
- "learning_rate": 1.4247914869140065e-05,
703
- "loss": 3.0733,
704
- "step": 58000
705
- },
706
- {
707
- "epoch": 0.87,
708
- "learning_rate": 1.419832792835679e-05,
709
- "loss": 3.0591,
710
- "step": 58500
711
- },
712
- {
713
- "epoch": 0.88,
714
- "learning_rate": 1.4148740987573514e-05,
715
- "loss": 3.0468,
716
- "step": 59000
717
- },
718
- {
719
- "epoch": 0.89,
720
- "learning_rate": 1.4099154046790237e-05,
721
- "loss": 3.0265,
722
- "step": 59500
723
- },
724
- {
725
- "epoch": 0.89,
726
- "learning_rate": 1.4049567106006963e-05,
727
- "loss": 3.0282,
728
- "step": 60000
729
- },
730
- {
731
- "epoch": 0.9,
732
- "learning_rate": 1.3999980165223688e-05,
733
- "loss": 3.0222,
734
- "step": 60500
735
- },
736
- {
737
- "epoch": 0.91,
738
- "learning_rate": 1.3950393224440413e-05,
739
- "loss": 3.0275,
740
- "step": 61000
741
- },
742
- {
743
- "epoch": 0.91,
744
- "learning_rate": 1.3900806283657138e-05,
745
- "loss": 3.0277,
746
- "step": 61500
747
- },
748
- {
749
- "epoch": 0.92,
750
- "learning_rate": 1.3851219342873862e-05,
751
- "loss": 3.0551,
752
- "step": 62000
753
- },
754
- {
755
- "epoch": 0.93,
756
- "learning_rate": 1.3801632402090585e-05,
757
- "loss": 3.0205,
758
- "step": 62500
759
- },
760
- {
761
- "epoch": 0.94,
762
- "learning_rate": 1.375204546130731e-05,
763
- "loss": 3.023,
764
- "step": 63000
765
- },
766
- {
767
- "epoch": 0.94,
768
- "learning_rate": 1.3702458520524036e-05,
769
- "loss": 3.0244,
770
- "step": 63500
771
- },
772
- {
773
- "epoch": 0.95,
774
- "learning_rate": 1.365287157974076e-05,
775
- "loss": 3.0116,
776
- "step": 64000
777
- },
778
- {
779
- "epoch": 0.96,
780
- "learning_rate": 1.3603284638957486e-05,
781
- "loss": 3.0141,
782
- "step": 64500
783
- },
784
- {
785
- "epoch": 0.97,
786
- "learning_rate": 1.355369769817421e-05,
787
- "loss": 3.0284,
788
- "step": 65000
789
- },
790
- {
791
- "epoch": 0.97,
792
- "learning_rate": 1.3504110757390933e-05,
793
- "loss": 3.0236,
794
- "step": 65500
795
- },
796
- {
797
- "epoch": 0.98,
798
- "learning_rate": 1.3454523816607659e-05,
799
- "loss": 3.013,
800
- "step": 66000
801
- },
802
- {
803
- "epoch": 0.99,
804
- "learning_rate": 1.3404936875824384e-05,
805
- "loss": 3.0027,
806
- "step": 66500
807
- },
808
- {
809
- "epoch": 1.0,
810
- "learning_rate": 1.3355349935041108e-05,
811
- "loss": 3.0155,
812
- "step": 67000
813
- },
814
- {
815
- "epoch": 1.0,
816
- "eval_bleu": 11.298551127218651,
817
- "eval_loss": 2.3749005794525146,
818
- "eval_runtime": 4929.9601,
819
- "eval_samples_per_second": 8.201,
820
- "eval_steps_per_second": 0.513,
821
- "step": 67222
822
- },
823
- {
824
- "epoch": 1.0,
825
- "learning_rate": 1.3305762994257834e-05,
826
- "loss": 3.0195,
827
- "step": 67500
828
- },
829
- {
830
- "epoch": 1.01,
831
- "learning_rate": 1.3256176053474558e-05,
832
- "loss": 2.9924,
833
- "step": 68000
834
- },
835
- {
836
- "epoch": 1.02,
837
- "learning_rate": 1.3206589112691284e-05,
838
- "loss": 2.997,
839
- "step": 68500
840
- },
841
- {
842
- "epoch": 1.03,
843
- "learning_rate": 1.3157002171908007e-05,
844
- "loss": 2.9694,
845
- "step": 69000
846
- },
847
- {
848
- "epoch": 1.03,
849
- "learning_rate": 1.3107415231124732e-05,
850
- "loss": 2.9804,
851
- "step": 69500
852
- },
853
- {
854
- "epoch": 1.04,
855
- "learning_rate": 1.3057828290341456e-05,
856
- "loss": 2.9879,
857
- "step": 70000
858
- },
859
- {
860
- "epoch": 1.05,
861
- "learning_rate": 1.3008241349558182e-05,
862
- "loss": 2.9919,
863
- "step": 70500
864
- },
865
- {
866
- "epoch": 1.06,
867
- "learning_rate": 1.2958654408774906e-05,
868
- "loss": 2.9875,
869
- "step": 71000
870
- },
871
- {
872
- "epoch": 1.06,
873
- "learning_rate": 1.2909067467991632e-05,
874
- "loss": 2.9912,
875
- "step": 71500
876
- },
877
- {
878
- "epoch": 1.07,
879
- "learning_rate": 1.2859480527208354e-05,
880
- "loss": 2.974,
881
- "step": 72000
882
- },
883
- {
884
- "epoch": 1.08,
885
- "learning_rate": 1.280989358642508e-05,
886
- "loss": 2.9581,
887
- "step": 72500
888
- },
889
- {
890
- "epoch": 1.09,
891
- "learning_rate": 1.2760306645641804e-05,
892
- "loss": 2.975,
893
- "step": 73000
894
- },
895
- {
896
- "epoch": 1.09,
897
- "learning_rate": 1.271071970485853e-05,
898
- "loss": 2.9737,
899
- "step": 73500
900
- },
901
- {
902
- "epoch": 1.1,
903
- "learning_rate": 1.2661132764075254e-05,
904
- "loss": 2.9722,
905
- "step": 74000
906
- },
907
- {
908
- "epoch": 1.11,
909
- "learning_rate": 1.261154582329198e-05,
910
- "loss": 2.9727,
911
- "step": 74500
912
- },
913
- {
914
- "epoch": 1.12,
915
- "learning_rate": 1.2561958882508702e-05,
916
- "loss": 2.9618,
917
- "step": 75000
918
- },
919
- {
920
- "epoch": 1.12,
921
- "learning_rate": 1.2512371941725428e-05,
922
- "loss": 2.9554,
923
- "step": 75500
924
- },
925
- {
926
- "epoch": 1.13,
927
- "learning_rate": 1.2462785000942152e-05,
928
- "loss": 2.961,
929
- "step": 76000
930
- },
931
- {
932
- "epoch": 1.14,
933
- "learning_rate": 1.2413198060158878e-05,
934
- "loss": 2.9627,
935
- "step": 76500
936
- },
937
- {
938
- "epoch": 1.15,
939
- "learning_rate": 1.2363611119375602e-05,
940
- "loss": 2.9896,
941
- "step": 77000
942
- },
943
- {
944
- "epoch": 1.15,
945
- "learning_rate": 1.2314024178592328e-05,
946
- "loss": 2.9433,
947
- "step": 77500
948
- },
949
- {
950
- "epoch": 1.16,
951
- "learning_rate": 1.2264437237809052e-05,
952
- "loss": 2.9329,
953
- "step": 78000
954
- },
955
- {
956
- "epoch": 1.17,
957
- "learning_rate": 1.2214850297025776e-05,
958
- "loss": 2.9552,
959
- "step": 78500
960
- },
961
- {
962
- "epoch": 1.18,
963
- "learning_rate": 1.21652633562425e-05,
964
- "loss": 2.9382,
965
- "step": 79000
966
- },
967
- {
968
- "epoch": 1.18,
969
- "learning_rate": 1.2115676415459226e-05,
970
- "loss": 2.9629,
971
- "step": 79500
972
- },
973
- {
974
- "epoch": 1.19,
975
- "learning_rate": 1.206608947467595e-05,
976
- "loss": 2.9555,
977
- "step": 80000
978
- },
979
- {
980
- "epoch": 1.2,
981
- "learning_rate": 1.2016502533892676e-05,
982
- "loss": 2.9364,
983
- "step": 80500
984
- },
985
- {
986
- "epoch": 1.2,
987
- "learning_rate": 1.19669155931094e-05,
988
- "loss": 2.9296,
989
- "step": 81000
990
- },
991
- {
992
- "epoch": 1.21,
993
- "learning_rate": 1.1917328652326124e-05,
994
- "loss": 2.9483,
995
- "step": 81500
996
- },
997
- {
998
- "epoch": 1.22,
999
- "learning_rate": 1.1867741711542848e-05,
1000
- "loss": 2.9605,
1001
- "step": 82000
1002
- },
1003
- {
1004
- "epoch": 1.23,
1005
- "learning_rate": 1.1818154770759574e-05,
1006
- "loss": 2.928,
1007
- "step": 82500
1008
- },
1009
- {
1010
- "epoch": 1.23,
1011
- "learning_rate": 1.1768567829976298e-05,
1012
- "loss": 2.9216,
1013
- "step": 83000
1014
- },
1015
- {
1016
- "epoch": 1.24,
1017
- "learning_rate": 1.1718980889193024e-05,
1018
- "loss": 2.9402,
1019
- "step": 83500
1020
- },
1021
- {
1022
- "epoch": 1.25,
1023
- "learning_rate": 1.1669393948409748e-05,
1024
- "loss": 2.9311,
1025
- "step": 84000
1026
- },
1027
- {
1028
- "epoch": 1.26,
1029
- "learning_rate": 1.1619807007626474e-05,
1030
- "loss": 2.9537,
1031
- "step": 84500
1032
- },
1033
- {
1034
- "epoch": 1.26,
1035
- "learning_rate": 1.1570220066843196e-05,
1036
- "loss": 2.919,
1037
- "step": 85000
1038
- },
1039
- {
1040
- "epoch": 1.27,
1041
- "learning_rate": 1.1520633126059922e-05,
1042
- "loss": 2.918,
1043
- "step": 85500
1044
- },
1045
- {
1046
- "epoch": 1.28,
1047
- "learning_rate": 1.1471046185276646e-05,
1048
- "loss": 2.9339,
1049
- "step": 86000
1050
- },
1051
- {
1052
- "epoch": 1.29,
1053
- "learning_rate": 1.1421459244493372e-05,
1054
- "loss": 2.9071,
1055
- "step": 86500
1056
- },
1057
- {
1058
- "epoch": 1.29,
1059
- "learning_rate": 1.1371872303710096e-05,
1060
- "loss": 2.9397,
1061
- "step": 87000
1062
- },
1063
- {
1064
- "epoch": 1.3,
1065
- "learning_rate": 1.1322285362926822e-05,
1066
- "loss": 2.9225,
1067
- "step": 87500
1068
- },
1069
- {
1070
- "epoch": 1.31,
1071
- "learning_rate": 1.1272698422143544e-05,
1072
- "loss": 2.9248,
1073
- "step": 88000
1074
- },
1075
- {
1076
- "epoch": 1.32,
1077
- "learning_rate": 1.122311148136027e-05,
1078
- "loss": 2.9132,
1079
- "step": 88500
1080
- },
1081
- {
1082
- "epoch": 1.32,
1083
- "learning_rate": 1.1173524540576994e-05,
1084
- "loss": 2.8945,
1085
- "step": 89000
1086
- },
1087
- {
1088
- "epoch": 1.33,
1089
- "learning_rate": 1.112393759979372e-05,
1090
- "loss": 2.905,
1091
- "step": 89500
1092
- },
1093
- {
1094
- "epoch": 1.34,
1095
- "learning_rate": 1.1074350659010444e-05,
1096
- "loss": 2.9256,
1097
- "step": 90000
1098
- },
1099
- {
1100
- "epoch": 1.35,
1101
- "learning_rate": 1.102476371822717e-05,
1102
- "loss": 2.9089,
1103
- "step": 90500
1104
- },
1105
- {
1106
- "epoch": 1.35,
1107
- "learning_rate": 1.0975176777443892e-05,
1108
- "loss": 2.9104,
1109
- "step": 91000
1110
- },
1111
- {
1112
- "epoch": 1.36,
1113
- "learning_rate": 1.0925589836660618e-05,
1114
- "loss": 2.9226,
1115
- "step": 91500
1116
- },
1117
- {
1118
- "epoch": 1.37,
1119
- "learning_rate": 1.0876002895877342e-05,
1120
- "loss": 2.902,
1121
- "step": 92000
1122
- },
1123
- {
1124
- "epoch": 1.38,
1125
- "learning_rate": 1.0826415955094068e-05,
1126
- "loss": 2.8831,
1127
- "step": 92500
1128
- },
1129
- {
1130
- "epoch": 1.38,
1131
- "learning_rate": 1.0776829014310792e-05,
1132
- "loss": 2.906,
1133
- "step": 93000
1134
- },
1135
- {
1136
- "epoch": 1.39,
1137
- "learning_rate": 1.0727242073527518e-05,
1138
- "loss": 2.906,
1139
- "step": 93500
1140
- },
1141
- {
1142
- "epoch": 1.4,
1143
- "learning_rate": 1.0677655132744242e-05,
1144
- "loss": 2.8901,
1145
- "step": 94000
1146
- },
1147
- {
1148
- "epoch": 1.41,
1149
- "learning_rate": 1.0628068191960966e-05,
1150
- "loss": 2.9063,
1151
- "step": 94500
1152
- },
1153
- {
1154
- "epoch": 1.41,
1155
- "learning_rate": 1.057848125117769e-05,
1156
- "loss": 2.8765,
1157
- "step": 95000
1158
- },
1159
- {
1160
- "epoch": 1.42,
1161
- "learning_rate": 1.0528894310394416e-05,
1162
- "loss": 2.9022,
1163
- "step": 95500
1164
- },
1165
- {
1166
- "epoch": 1.43,
1167
- "learning_rate": 1.047930736961114e-05,
1168
- "loss": 2.8906,
1169
- "step": 96000
1170
- },
1171
- {
1172
- "epoch": 1.44,
1173
- "learning_rate": 1.0429720428827866e-05,
1174
- "loss": 2.8627,
1175
- "step": 96500
1176
- },
1177
- {
1178
- "epoch": 1.44,
1179
- "learning_rate": 1.038013348804459e-05,
1180
- "loss": 2.8789,
1181
- "step": 97000
1182
- },
1183
- {
1184
- "epoch": 1.45,
1185
- "learning_rate": 1.0330546547261314e-05,
1186
- "loss": 2.8782,
1187
- "step": 97500
1188
- },
1189
- {
1190
- "epoch": 1.46,
1191
- "learning_rate": 1.0280959606478038e-05,
1192
- "loss": 2.8706,
1193
- "step": 98000
1194
- },
1195
- {
1196
- "epoch": 1.47,
1197
- "learning_rate": 1.0231372665694764e-05,
1198
- "loss": 2.8434,
1199
- "step": 98500
1200
- },
1201
- {
1202
- "epoch": 1.47,
1203
- "learning_rate": 1.0181785724911488e-05,
1204
- "loss": 2.8851,
1205
- "step": 99000
1206
- },
1207
- {
1208
- "epoch": 1.48,
1209
- "learning_rate": 1.0132198784128214e-05,
1210
- "loss": 2.8806,
1211
- "step": 99500
1212
- },
1213
- {
1214
- "epoch": 1.49,
1215
- "learning_rate": 1.0082611843344938e-05,
1216
- "loss": 2.8695,
1217
- "step": 100000
1218
- },
1219
- {
1220
- "epoch": 1.5,
1221
- "learning_rate": 1.0033024902561664e-05,
1222
- "loss": 2.8775,
1223
- "step": 100500
1224
- },
1225
- {
1226
- "epoch": 1.5,
1227
- "learning_rate": 9.983437961778388e-06,
1228
- "loss": 2.8717,
1229
- "step": 101000
1230
- },
1231
- {
1232
- "epoch": 1.51,
1233
- "learning_rate": 9.933851020995112e-06,
1234
- "loss": 2.8616,
1235
- "step": 101500
1236
- },
1237
- {
1238
- "epoch": 1.52,
1239
- "learning_rate": 9.884264080211836e-06,
1240
- "loss": 2.8656,
1241
- "step": 102000
1242
- },
1243
- {
1244
- "epoch": 1.52,
1245
- "learning_rate": 9.834677139428562e-06,
1246
- "loss": 2.8867,
1247
- "step": 102500
1248
- },
1249
- {
1250
- "epoch": 1.53,
1251
- "learning_rate": 9.785090198645286e-06,
1252
- "loss": 2.8491,
1253
- "step": 103000
1254
- },
1255
- {
1256
- "epoch": 1.54,
1257
- "learning_rate": 9.73550325786201e-06,
1258
- "loss": 2.8716,
1259
- "step": 103500
1260
- },
1261
- {
1262
- "epoch": 1.55,
1263
- "learning_rate": 9.685916317078736e-06,
1264
- "loss": 2.8743,
1265
- "step": 104000
1266
- },
1267
- {
1268
- "epoch": 1.55,
1269
- "learning_rate": 9.63632937629546e-06,
1270
- "loss": 2.8503,
1271
- "step": 104500
1272
- },
1273
- {
1274
- "epoch": 1.56,
1275
- "learning_rate": 9.586742435512184e-06,
1276
- "loss": 2.8625,
1277
- "step": 105000
1278
- },
1279
- {
1280
- "epoch": 1.57,
1281
- "learning_rate": 9.53715549472891e-06,
1282
- "loss": 2.8237,
1283
- "step": 105500
1284
- },
1285
- {
1286
- "epoch": 1.58,
1287
- "learning_rate": 9.487568553945634e-06,
1288
- "loss": 2.8619,
1289
- "step": 106000
1290
- },
1291
- {
1292
- "epoch": 1.58,
1293
- "learning_rate": 9.437981613162358e-06,
1294
- "loss": 2.8629,
1295
- "step": 106500
1296
- },
1297
- {
1298
- "epoch": 1.59,
1299
- "learning_rate": 9.388394672379084e-06,
1300
- "loss": 2.8441,
1301
- "step": 107000
1302
- },
1303
- {
1304
- "epoch": 1.6,
1305
- "learning_rate": 9.338807731595808e-06,
1306
- "loss": 2.8569,
1307
- "step": 107500
1308
- },
1309
- {
1310
- "epoch": 1.61,
1311
- "learning_rate": 9.289220790812532e-06,
1312
- "loss": 2.8511,
1313
- "step": 108000
1314
- },
1315
- {
1316
- "epoch": 1.61,
1317
- "learning_rate": 9.239633850029258e-06,
1318
- "loss": 2.8701,
1319
- "step": 108500
1320
- },
1321
- {
1322
- "epoch": 1.62,
1323
- "learning_rate": 9.190046909245982e-06,
1324
- "loss": 2.8572,
1325
- "step": 109000
1326
- },
1327
- {
1328
- "epoch": 1.63,
1329
- "learning_rate": 9.140459968462706e-06,
1330
- "loss": 2.8673,
1331
- "step": 109500
1332
- },
1333
- {
1334
- "epoch": 1.64,
1335
- "learning_rate": 9.090873027679432e-06,
1336
- "loss": 2.8621,
1337
- "step": 110000
1338
- },
1339
- {
1340
- "epoch": 1.64,
1341
- "learning_rate": 9.041286086896156e-06,
1342
- "loss": 2.8592,
1343
- "step": 110500
1344
- },
1345
- {
1346
- "epoch": 1.65,
1347
- "learning_rate": 8.99169914611288e-06,
1348
- "loss": 2.8582,
1349
- "step": 111000
1350
- },
1351
- {
1352
- "epoch": 1.66,
1353
- "learning_rate": 8.942112205329606e-06,
1354
- "loss": 2.8666,
1355
- "step": 111500
1356
- },
1357
- {
1358
- "epoch": 1.67,
1359
- "learning_rate": 8.89252526454633e-06,
1360
- "loss": 2.8588,
1361
- "step": 112000
1362
- },
1363
- {
1364
- "epoch": 1.67,
1365
- "learning_rate": 8.842938323763054e-06,
1366
- "loss": 2.8475,
1367
- "step": 112500
1368
- },
1369
- {
1370
- "epoch": 1.68,
1371
- "learning_rate": 8.79335138297978e-06,
1372
- "loss": 2.8357,
1373
- "step": 113000
1374
- },
1375
- {
1376
- "epoch": 1.69,
1377
- "learning_rate": 8.743764442196504e-06,
1378
- "loss": 2.8608,
1379
- "step": 113500
1380
- },
1381
- {
1382
- "epoch": 1.7,
1383
- "learning_rate": 8.69417750141323e-06,
1384
- "loss": 2.8532,
1385
- "step": 114000
1386
- },
1387
- {
1388
- "epoch": 1.7,
1389
- "learning_rate": 8.644590560629953e-06,
1390
- "loss": 2.8545,
1391
- "step": 114500
1392
- },
1393
- {
1394
- "epoch": 1.71,
1395
- "learning_rate": 8.595003619846678e-06,
1396
- "loss": 2.8277,
1397
- "step": 115000
1398
- },
1399
- {
1400
- "epoch": 1.72,
1401
- "learning_rate": 8.545416679063403e-06,
1402
- "loss": 2.8509,
1403
- "step": 115500
1404
- },
1405
- {
1406
- "epoch": 1.73,
1407
- "learning_rate": 8.495829738280127e-06,
1408
- "loss": 2.8413,
1409
- "step": 116000
1410
- },
1411
- {
1412
- "epoch": 1.73,
1413
- "learning_rate": 8.446242797496852e-06,
1414
- "loss": 2.838,
1415
- "step": 116500
1416
- },
1417
- {
1418
- "epoch": 1.74,
1419
- "learning_rate": 8.396655856713577e-06,
1420
- "loss": 2.8543,
1421
- "step": 117000
1422
- },
1423
- {
1424
- "epoch": 1.75,
1425
- "learning_rate": 8.347068915930301e-06,
1426
- "loss": 2.8347,
1427
- "step": 117500
1428
- },
1429
- {
1430
- "epoch": 1.76,
1431
- "learning_rate": 8.297481975147026e-06,
1432
- "loss": 2.8669,
1433
- "step": 118000
1434
- },
1435
- {
1436
- "epoch": 1.76,
1437
- "learning_rate": 8.247895034363751e-06,
1438
- "loss": 2.8228,
1439
- "step": 118500
1440
- },
1441
- {
1442
- "epoch": 1.77,
1443
- "learning_rate": 8.198308093580475e-06,
1444
- "loss": 2.8385,
1445
- "step": 119000
1446
- },
1447
- {
1448
- "epoch": 1.78,
1449
- "learning_rate": 8.1487211527972e-06,
1450
- "loss": 2.8257,
1451
- "step": 119500
1452
- },
1453
- {
1454
- "epoch": 1.79,
1455
- "learning_rate": 8.099134212013925e-06,
1456
- "loss": 2.8362,
1457
- "step": 120000
1458
- },
1459
- {
1460
- "epoch": 1.79,
1461
- "learning_rate": 8.04954727123065e-06,
1462
- "loss": 2.8319,
1463
- "step": 120500
1464
- },
1465
- {
1466
- "epoch": 1.8,
1467
- "learning_rate": 7.999960330447374e-06,
1468
- "loss": 2.8356,
1469
- "step": 121000
1470
- },
1471
- {
1472
- "epoch": 1.81,
1473
- "learning_rate": 7.9503733896641e-06,
1474
- "loss": 2.8199,
1475
- "step": 121500
1476
- },
1477
- {
1478
- "epoch": 1.81,
1479
- "learning_rate": 7.900786448880823e-06,
1480
- "loss": 2.8039,
1481
- "step": 122000
1482
- },
1483
- {
1484
- "epoch": 1.82,
1485
- "learning_rate": 7.851199508097548e-06,
1486
- "loss": 2.832,
1487
- "step": 122500
1488
- },
1489
- {
1490
- "epoch": 1.83,
1491
- "learning_rate": 7.801612567314273e-06,
1492
- "loss": 2.8125,
1493
- "step": 123000
1494
- },
1495
- {
1496
- "epoch": 1.84,
1497
- "learning_rate": 7.752025626530997e-06,
1498
- "loss": 2.8005,
1499
- "step": 123500
1500
- },
1501
- {
1502
- "epoch": 1.84,
1503
- "learning_rate": 7.702438685747721e-06,
1504
- "loss": 2.8402,
1505
- "step": 124000
1506
- },
1507
- {
1508
- "epoch": 1.85,
1509
- "learning_rate": 7.652851744964447e-06,
1510
- "loss": 2.8186,
1511
- "step": 124500
1512
- },
1513
- {
1514
- "epoch": 1.86,
1515
- "learning_rate": 7.603264804181172e-06,
1516
- "loss": 2.8296,
1517
- "step": 125000
1518
- },
1519
- {
1520
- "epoch": 1.87,
1521
- "learning_rate": 7.5536778633978955e-06,
1522
- "loss": 2.8193,
1523
- "step": 125500
1524
- },
1525
- {
1526
- "epoch": 1.87,
1527
- "learning_rate": 7.50409092261462e-06,
1528
- "loss": 2.8093,
1529
- "step": 126000
1530
- },
1531
- {
1532
- "epoch": 1.88,
1533
- "learning_rate": 7.454503981831346e-06,
1534
- "loss": 2.8383,
1535
- "step": 126500
1536
- },
1537
- {
1538
- "epoch": 1.89,
1539
- "learning_rate": 7.4049170410480695e-06,
1540
- "loss": 2.821,
1541
- "step": 127000
1542
- },
1543
- {
1544
- "epoch": 1.9,
1545
- "learning_rate": 7.355330100264794e-06,
1546
- "loss": 2.7976,
1547
- "step": 127500
1548
- },
1549
- {
1550
- "epoch": 1.9,
1551
- "learning_rate": 7.30574315948152e-06,
1552
- "loss": 2.8183,
1553
- "step": 128000
1554
- },
1555
- {
1556
- "epoch": 1.91,
1557
- "learning_rate": 7.2561562186982434e-06,
1558
- "loss": 2.8089,
1559
- "step": 128500
1560
- },
1561
- {
1562
- "epoch": 1.92,
1563
- "learning_rate": 7.206569277914968e-06,
1564
- "loss": 2.818,
1565
- "step": 129000
1566
- },
1567
- {
1568
- "epoch": 1.93,
1569
- "learning_rate": 7.156982337131694e-06,
1570
- "loss": 2.8052,
1571
- "step": 129500
1572
- },
1573
- {
1574
- "epoch": 1.93,
1575
- "learning_rate": 7.107395396348419e-06,
1576
- "loss": 2.8183,
1577
- "step": 130000
1578
- },
1579
- {
1580
- "epoch": 1.94,
1581
- "learning_rate": 7.057808455565142e-06,
1582
- "loss": 2.8098,
1583
- "step": 130500
1584
- },
1585
- {
1586
- "epoch": 1.95,
1587
- "learning_rate": 7.008221514781868e-06,
1588
- "loss": 2.8155,
1589
- "step": 131000
1590
- },
1591
- {
1592
- "epoch": 1.96,
1593
- "learning_rate": 6.958634573998593e-06,
1594
- "loss": 2.8074,
1595
- "step": 131500
1596
- },
1597
- {
1598
- "epoch": 1.96,
1599
- "learning_rate": 6.909047633215316e-06,
1600
- "loss": 2.7913,
1601
- "step": 132000
1602
- },
1603
- {
1604
- "epoch": 1.97,
1605
- "learning_rate": 6.859460692432042e-06,
1606
- "loss": 2.8122,
1607
- "step": 132500
1608
- },
1609
- {
1610
- "epoch": 1.98,
1611
- "learning_rate": 6.809873751648767e-06,
1612
- "loss": 2.8327,
1613
- "step": 133000
1614
- },
1615
- {
1616
- "epoch": 1.99,
1617
- "learning_rate": 6.76028681086549e-06,
1618
- "loss": 2.7897,
1619
- "step": 133500
1620
- },
1621
- {
1622
- "epoch": 1.99,
1623
- "learning_rate": 6.710699870082215e-06,
1624
- "loss": 2.7777,
1625
- "step": 134000
1626
- },
1627
- {
1628
- "epoch": 2.0,
1629
- "eval_bleu": 13.585366050482984,
1630
- "eval_loss": 2.2518081665039062,
1631
- "eval_runtime": 4182.4693,
1632
- "eval_samples_per_second": 9.667,
1633
- "eval_steps_per_second": 0.604,
1634
- "step": 134444
1635
- },
1636
- {
1637
- "epoch": 2.0,
1638
- "learning_rate": 6.661112929298941e-06,
1639
- "loss": 2.7994,
1640
- "step": 134500
1641
- },
1642
- {
1643
- "epoch": 2.01,
1644
- "learning_rate": 6.611525988515664e-06,
1645
- "loss": 2.8167,
1646
- "step": 135000
1647
- },
1648
- {
1649
- "epoch": 2.02,
1650
- "learning_rate": 6.561939047732389e-06,
1651
- "loss": 2.8123,
1652
- "step": 135500
1653
- },
1654
- {
1655
- "epoch": 2.02,
1656
- "learning_rate": 6.512352106949115e-06,
1657
- "loss": 2.7844,
1658
- "step": 136000
1659
- },
1660
- {
1661
- "epoch": 2.03,
1662
- "learning_rate": 6.462765166165838e-06,
1663
- "loss": 2.7956,
1664
- "step": 136500
1665
- },
1666
- {
1667
- "epoch": 2.04,
1668
- "learning_rate": 6.413178225382563e-06,
1669
- "loss": 2.7968,
1670
- "step": 137000
1671
- },
1672
- {
1673
- "epoch": 2.05,
1674
- "learning_rate": 6.363591284599289e-06,
1675
- "loss": 2.7916,
1676
- "step": 137500
1677
- },
1678
- {
1679
- "epoch": 2.05,
1680
- "learning_rate": 6.314004343816014e-06,
1681
- "loss": 2.7958,
1682
- "step": 138000
1683
- },
1684
- {
1685
- "epoch": 2.06,
1686
- "learning_rate": 6.264417403032737e-06,
1687
- "loss": 2.7855,
1688
- "step": 138500
1689
- },
1690
- {
1691
- "epoch": 2.07,
1692
- "learning_rate": 6.214830462249463e-06,
1693
- "loss": 2.7876,
1694
- "step": 139000
1695
- },
1696
- {
1697
- "epoch": 2.08,
1698
- "learning_rate": 6.165243521466188e-06,
1699
- "loss": 2.7724,
1700
- "step": 139500
1701
- },
1702
- {
1703
- "epoch": 2.08,
1704
- "learning_rate": 6.115656580682911e-06,
1705
- "loss": 2.8021,
1706
- "step": 140000
1707
- },
1708
- {
1709
- "epoch": 2.09,
1710
- "learning_rate": 6.066069639899637e-06,
1711
- "loss": 2.8024,
1712
- "step": 140500
1713
- },
1714
- {
1715
- "epoch": 2.1,
1716
- "learning_rate": 6.016482699116362e-06,
1717
- "loss": 2.7891,
1718
- "step": 141000
1719
- },
1720
- {
1721
- "epoch": 2.1,
1722
- "learning_rate": 5.966895758333085e-06,
1723
- "loss": 2.7592,
1724
- "step": 141500
1725
- },
1726
- {
1727
- "epoch": 2.11,
1728
- "learning_rate": 5.917308817549811e-06,
1729
- "loss": 2.7895,
1730
- "step": 142000
1731
- },
1732
- {
1733
- "epoch": 2.12,
1734
- "learning_rate": 5.867721876766536e-06,
1735
- "loss": 2.8106,
1736
- "step": 142500
1737
- },
1738
- {
1739
- "epoch": 2.13,
1740
- "learning_rate": 5.818134935983259e-06,
1741
- "loss": 2.7985,
1742
- "step": 143000
1743
- },
1744
- {
1745
- "epoch": 2.13,
1746
- "learning_rate": 5.768547995199985e-06,
1747
- "loss": 2.8137,
1748
- "step": 143500
1749
- },
1750
- {
1751
- "epoch": 2.14,
1752
- "learning_rate": 5.71896105441671e-06,
1753
- "loss": 2.7824,
1754
- "step": 144000
1755
- },
1756
- {
1757
- "epoch": 2.15,
1758
- "learning_rate": 5.669374113633433e-06,
1759
- "loss": 2.7878,
1760
- "step": 144500
1761
- },
1762
- {
1763
- "epoch": 2.16,
1764
- "learning_rate": 5.619787172850158e-06,
1765
- "loss": 2.7861,
1766
- "step": 145000
1767
- },
1768
- {
1769
- "epoch": 2.16,
1770
- "learning_rate": 5.570200232066884e-06,
1771
- "loss": 2.7886,
1772
- "step": 145500
1773
- },
1774
- {
1775
- "epoch": 2.17,
1776
- "learning_rate": 5.520613291283607e-06,
1777
- "loss": 2.7798,
1778
- "step": 146000
1779
- },
1780
- {
1781
- "epoch": 2.18,
1782
- "learning_rate": 5.471026350500332e-06,
1783
- "loss": 2.8015,
1784
- "step": 146500
1785
- },
1786
- {
1787
- "epoch": 2.19,
1788
- "learning_rate": 5.421439409717058e-06,
1789
- "loss": 2.8013,
1790
- "step": 147000
1791
- },
1792
- {
1793
- "epoch": 2.19,
1794
- "learning_rate": 5.371852468933783e-06,
1795
- "loss": 2.7567,
1796
- "step": 147500
1797
- },
1798
- {
1799
- "epoch": 2.2,
1800
- "learning_rate": 5.322265528150506e-06,
1801
- "loss": 2.8004,
1802
- "step": 148000
1803
- },
1804
- {
1805
- "epoch": 2.21,
1806
- "learning_rate": 5.272678587367232e-06,
1807
- "loss": 2.7817,
1808
- "step": 148500
1809
- },
1810
- {
1811
- "epoch": 2.22,
1812
- "learning_rate": 5.223091646583957e-06,
1813
- "loss": 2.7674,
1814
- "step": 149000
1815
- },
1816
- {
1817
- "epoch": 2.22,
1818
- "learning_rate": 5.17350470580068e-06,
1819
- "loss": 2.7882,
1820
- "step": 149500
1821
- },
1822
- {
1823
- "epoch": 2.23,
1824
- "learning_rate": 5.123917765017406e-06,
1825
- "loss": 2.7695,
1826
- "step": 150000
1827
- },
1828
- {
1829
- "epoch": 2.24,
1830
- "learning_rate": 5.074330824234131e-06,
1831
- "loss": 2.7708,
1832
- "step": 150500
1833
- },
1834
- {
1835
- "epoch": 2.25,
1836
- "learning_rate": 5.024743883450854e-06,
1837
- "loss": 2.7791,
1838
- "step": 151000
1839
- },
1840
- {
1841
- "epoch": 2.25,
1842
- "learning_rate": 4.97515694266758e-06,
1843
- "loss": 2.7998,
1844
- "step": 151500
1845
- },
1846
- {
1847
- "epoch": 2.26,
1848
- "learning_rate": 4.925570001884304e-06,
1849
- "loss": 2.7546,
1850
- "step": 152000
1851
- },
1852
- {
1853
- "epoch": 2.27,
1854
- "learning_rate": 4.875983061101029e-06,
1855
- "loss": 2.7576,
1856
- "step": 152500
1857
- },
1858
- {
1859
- "epoch": 2.28,
1860
- "learning_rate": 4.826396120317754e-06,
1861
- "loss": 2.7917,
1862
- "step": 153000
1863
- },
1864
- {
1865
- "epoch": 2.28,
1866
- "learning_rate": 4.776809179534479e-06,
1867
- "loss": 2.7828,
1868
- "step": 153500
1869
- },
1870
- {
1871
- "epoch": 2.29,
1872
- "learning_rate": 4.727222238751203e-06,
1873
- "loss": 2.7884,
1874
- "step": 154000
1875
- },
1876
- {
1877
- "epoch": 2.3,
1878
- "learning_rate": 4.677635297967928e-06,
1879
- "loss": 2.7807,
1880
- "step": 154500
1881
- },
1882
- {
1883
- "epoch": 2.31,
1884
- "learning_rate": 4.628048357184653e-06,
1885
- "loss": 2.7652,
1886
- "step": 155000
1887
- },
1888
- {
1889
- "epoch": 2.31,
1890
- "learning_rate": 4.578461416401377e-06,
1891
- "loss": 2.7918,
1892
- "step": 155500
1893
- },
1894
- {
1895
- "epoch": 2.32,
1896
- "learning_rate": 4.528874475618102e-06,
1897
- "loss": 2.7771,
1898
- "step": 156000
1899
- },
1900
- {
1901
- "epoch": 2.33,
1902
- "learning_rate": 4.479287534834827e-06,
1903
- "loss": 2.7561,
1904
- "step": 156500
1905
- },
1906
- {
1907
- "epoch": 2.34,
1908
- "learning_rate": 4.429700594051551e-06,
1909
- "loss": 2.7789,
1910
- "step": 157000
1911
- },
1912
- {
1913
- "epoch": 2.34,
1914
- "learning_rate": 4.380113653268276e-06,
1915
- "loss": 2.7629,
1916
- "step": 157500
1917
- },
1918
- {
1919
- "epoch": 2.35,
1920
- "learning_rate": 4.330526712485001e-06,
1921
- "loss": 2.7672,
1922
- "step": 158000
1923
- },
1924
- {
1925
- "epoch": 2.36,
1926
- "learning_rate": 4.280939771701725e-06,
1927
- "loss": 2.7643,
1928
- "step": 158500
1929
- },
1930
- {
1931
- "epoch": 2.37,
1932
- "learning_rate": 4.23135283091845e-06,
1933
- "loss": 2.7836,
1934
- "step": 159000
1935
- },
1936
- {
1937
- "epoch": 2.37,
1938
- "learning_rate": 4.181765890135175e-06,
1939
- "loss": 2.7742,
1940
- "step": 159500
1941
- },
1942
- {
1943
- "epoch": 2.38,
1944
- "learning_rate": 4.132178949351899e-06,
1945
- "loss": 2.7504,
1946
- "step": 160000
1947
- },
1948
- {
1949
- "epoch": 2.39,
1950
- "learning_rate": 4.082592008568624e-06,
1951
- "loss": 2.7738,
1952
- "step": 160500
1953
- },
1954
- {
1955
- "epoch": 2.4,
1956
- "learning_rate": 4.033005067785349e-06,
1957
- "loss": 2.7741,
1958
- "step": 161000
1959
- },
1960
- {
1961
- "epoch": 2.4,
1962
- "learning_rate": 3.983418127002074e-06,
1963
- "loss": 2.768,
1964
- "step": 161500
1965
- },
1966
- {
1967
- "epoch": 2.41,
1968
- "learning_rate": 3.933831186218798e-06,
1969
- "loss": 2.7874,
1970
- "step": 162000
1971
- },
1972
- {
1973
- "epoch": 2.42,
1974
- "learning_rate": 3.884244245435523e-06,
1975
- "loss": 2.7733,
1976
- "step": 162500
1977
- },
1978
- {
1979
- "epoch": 2.42,
1980
- "learning_rate": 3.834657304652248e-06,
1981
- "loss": 2.7677,
1982
- "step": 163000
1983
- },
1984
- {
1985
- "epoch": 2.43,
1986
- "learning_rate": 3.7850703638689717e-06,
1987
- "loss": 2.7601,
1988
- "step": 163500
1989
- },
1990
- {
1991
- "epoch": 2.44,
1992
- "learning_rate": 3.735483423085696e-06,
1993
- "loss": 2.7832,
1994
- "step": 164000
1995
- },
1996
- {
1997
- "epoch": 2.45,
1998
- "learning_rate": 3.685896482302421e-06,
1999
- "loss": 2.7704,
2000
- "step": 164500
2001
- },
2002
- {
2003
- "epoch": 2.45,
2004
- "learning_rate": 3.6363095415191457e-06,
2005
- "loss": 2.7698,
2006
- "step": 165000
2007
- },
2008
- {
2009
- "epoch": 2.46,
2010
- "learning_rate": 3.5867226007358706e-06,
2011
- "loss": 2.7724,
2012
- "step": 165500
2013
- },
2014
- {
2015
- "epoch": 2.47,
2016
- "learning_rate": 3.537135659952595e-06,
2017
- "loss": 2.7647,
2018
- "step": 166000
2019
- },
2020
- {
2021
- "epoch": 2.48,
2022
- "learning_rate": 3.4875487191693196e-06,
2023
- "loss": 2.7719,
2024
- "step": 166500
2025
- },
2026
- {
2027
- "epoch": 2.48,
2028
- "learning_rate": 3.4379617783860446e-06,
2029
- "loss": 2.7578,
2030
- "step": 167000
2031
- },
2032
- {
2033
- "epoch": 2.49,
2034
- "learning_rate": 3.388374837602769e-06,
2035
- "loss": 2.7478,
2036
- "step": 167500
2037
- },
2038
- {
2039
- "epoch": 2.5,
2040
- "learning_rate": 3.3387878968194936e-06,
2041
- "loss": 2.7747,
2042
- "step": 168000
2043
- },
2044
- {
2045
- "epoch": 2.51,
2046
- "learning_rate": 3.2892009560362186e-06,
2047
- "loss": 2.7533,
2048
- "step": 168500
2049
- },
2050
- {
2051
- "epoch": 2.51,
2052
- "learning_rate": 3.239614015252943e-06,
2053
- "loss": 2.7488,
2054
- "step": 169000
2055
- },
2056
- {
2057
- "epoch": 2.52,
2058
- "learning_rate": 3.190027074469668e-06,
2059
- "loss": 2.7577,
2060
- "step": 169500
2061
- },
2062
- {
2063
- "epoch": 2.53,
2064
- "learning_rate": 3.1404401336863925e-06,
2065
- "loss": 2.7646,
2066
- "step": 170000
2067
- },
2068
- {
2069
- "epoch": 2.54,
2070
- "learning_rate": 3.090853192903117e-06,
2071
- "loss": 2.7556,
2072
- "step": 170500
2073
- },
2074
- {
2075
- "epoch": 2.54,
2076
- "learning_rate": 3.041266252119842e-06,
2077
- "loss": 2.7578,
2078
- "step": 171000
2079
- },
2080
- {
2081
- "epoch": 2.55,
2082
- "learning_rate": 2.9916793113365665e-06,
2083
- "loss": 2.7428,
2084
- "step": 171500
2085
- },
2086
- {
2087
- "epoch": 2.56,
2088
- "learning_rate": 2.942092370553291e-06,
2089
- "loss": 2.7487,
2090
- "step": 172000
2091
- },
2092
- {
2093
- "epoch": 2.57,
2094
- "learning_rate": 2.892505429770016e-06,
2095
- "loss": 2.7457,
2096
- "step": 172500
2097
- },
2098
- {
2099
- "epoch": 2.57,
2100
- "learning_rate": 2.8429184889867405e-06,
2101
- "loss": 2.7366,
2102
- "step": 173000
2103
- },
2104
- {
2105
- "epoch": 2.58,
2106
- "learning_rate": 2.7933315482034655e-06,
2107
- "loss": 2.7497,
2108
- "step": 173500
2109
- },
2110
- {
2111
- "epoch": 2.59,
2112
- "learning_rate": 2.74374460742019e-06,
2113
- "loss": 2.7467,
2114
- "step": 174000
2115
- },
2116
- {
2117
- "epoch": 2.6,
2118
- "learning_rate": 2.6941576666369145e-06,
2119
- "loss": 2.7633,
2120
- "step": 174500
2121
- },
2122
- {
2123
- "epoch": 2.6,
2124
- "learning_rate": 2.6445707258536394e-06,
2125
- "loss": 2.7681,
2126
- "step": 175000
2127
- },
2128
- {
2129
- "epoch": 2.61,
2130
- "learning_rate": 2.594983785070364e-06,
2131
- "loss": 2.7552,
2132
- "step": 175500
2133
- },
2134
- {
2135
- "epoch": 2.62,
2136
- "learning_rate": 2.5453968442870885e-06,
2137
- "loss": 2.7539,
2138
- "step": 176000
2139
- },
2140
- {
2141
- "epoch": 2.63,
2142
- "learning_rate": 2.4958099035038134e-06,
2143
- "loss": 2.7393,
2144
- "step": 176500
2145
- },
2146
- {
2147
- "epoch": 2.63,
2148
- "learning_rate": 2.446222962720538e-06,
2149
- "loss": 2.7727,
2150
- "step": 177000
2151
- },
2152
- {
2153
- "epoch": 2.64,
2154
- "learning_rate": 2.396636021937263e-06,
2155
- "loss": 2.736,
2156
- "step": 177500
2157
- },
2158
- {
2159
- "epoch": 2.65,
2160
- "learning_rate": 2.3470490811539874e-06,
2161
- "loss": 2.7546,
2162
- "step": 178000
2163
- },
2164
- {
2165
- "epoch": 2.66,
2166
- "learning_rate": 2.297462140370712e-06,
2167
- "loss": 2.7601,
2168
- "step": 178500
2169
- },
2170
- {
2171
- "epoch": 2.66,
2172
- "learning_rate": 2.247875199587437e-06,
2173
- "loss": 2.7456,
2174
- "step": 179000
2175
- },
2176
- {
2177
- "epoch": 2.67,
2178
- "learning_rate": 2.1982882588041614e-06,
2179
- "loss": 2.76,
2180
- "step": 179500
2181
- },
2182
- {
2183
- "epoch": 2.68,
2184
- "learning_rate": 2.1487013180208863e-06,
2185
- "loss": 2.7396,
2186
- "step": 180000
2187
- },
2188
- {
2189
- "epoch": 2.69,
2190
- "learning_rate": 2.099114377237611e-06,
2191
- "loss": 2.761,
2192
- "step": 180500
2193
- },
2194
- {
2195
- "epoch": 2.69,
2196
- "learning_rate": 2.0495274364543354e-06,
2197
- "loss": 2.7603,
2198
- "step": 181000
2199
- },
2200
- {
2201
- "epoch": 2.7,
2202
- "learning_rate": 1.9999404956710603e-06,
2203
- "loss": 2.7614,
2204
- "step": 181500
2205
- },
2206
- {
2207
- "epoch": 2.71,
2208
- "learning_rate": 1.950353554887785e-06,
2209
- "loss": 2.7638,
2210
- "step": 182000
2211
- },
2212
- {
2213
- "epoch": 2.71,
2214
- "learning_rate": 1.9007666141045096e-06,
2215
- "loss": 2.7806,
2216
- "step": 182500
2217
- },
2218
- {
2219
- "epoch": 2.72,
2220
- "learning_rate": 1.8511796733212343e-06,
2221
- "loss": 2.7561,
2222
- "step": 183000
2223
- },
2224
- {
2225
- "epoch": 2.73,
2226
- "learning_rate": 1.801592732537959e-06,
2227
- "loss": 2.7473,
2228
- "step": 183500
2229
- },
2230
- {
2231
- "epoch": 2.74,
2232
- "learning_rate": 1.7520057917546838e-06,
2233
- "loss": 2.7405,
2234
- "step": 184000
2235
- },
2236
- {
2237
- "epoch": 2.74,
2238
- "learning_rate": 1.7024188509714083e-06,
2239
- "loss": 2.742,
2240
- "step": 184500
2241
- },
2242
- {
2243
- "epoch": 2.75,
2244
- "learning_rate": 1.652831910188133e-06,
2245
- "loss": 2.7387,
2246
- "step": 185000
2247
- },
2248
- {
2249
- "epoch": 2.76,
2250
- "learning_rate": 1.6032449694048577e-06,
2251
- "loss": 2.7681,
2252
- "step": 185500
2253
- },
2254
- {
2255
- "epoch": 2.77,
2256
- "learning_rate": 1.5536580286215825e-06,
2257
- "loss": 2.7599,
2258
- "step": 186000
2259
- },
2260
- {
2261
- "epoch": 2.77,
2262
- "learning_rate": 1.504071087838307e-06,
2263
- "loss": 2.7497,
2264
- "step": 186500
2265
- },
2266
- {
2267
- "epoch": 2.78,
2268
- "learning_rate": 1.4544841470550317e-06,
2269
- "loss": 2.7417,
2270
- "step": 187000
2271
- },
2272
- {
2273
- "epoch": 2.79,
2274
- "learning_rate": 1.4048972062717565e-06,
2275
- "loss": 2.739,
2276
- "step": 187500
2277
- },
2278
- {
2279
- "epoch": 2.8,
2280
- "learning_rate": 1.3553102654884812e-06,
2281
- "loss": 2.7633,
2282
- "step": 188000
2283
- },
2284
- {
2285
- "epoch": 2.8,
2286
- "learning_rate": 1.3057233247052057e-06,
2287
- "loss": 2.7516,
2288
- "step": 188500
2289
- },
2290
- {
2291
- "epoch": 2.81,
2292
- "learning_rate": 1.2561363839219304e-06,
2293
- "loss": 2.7638,
2294
- "step": 189000
2295
- },
2296
- {
2297
- "epoch": 2.82,
2298
- "learning_rate": 1.2065494431386552e-06,
2299
- "loss": 2.7397,
2300
- "step": 189500
2301
- },
2302
- {
2303
- "epoch": 2.83,
2304
- "learning_rate": 1.1569625023553797e-06,
2305
- "loss": 2.7614,
2306
- "step": 190000
2307
- },
2308
- {
2309
- "epoch": 2.83,
2310
- "learning_rate": 1.1073755615721044e-06,
2311
- "loss": 2.7235,
2312
- "step": 190500
2313
- },
2314
- {
2315
- "epoch": 2.84,
2316
- "learning_rate": 1.0577886207888292e-06,
2317
- "loss": 2.7401,
2318
- "step": 191000
2319
- },
2320
- {
2321
- "epoch": 2.85,
2322
- "learning_rate": 1.0082016800055539e-06,
2323
- "loss": 2.7573,
2324
- "step": 191500
2325
- },
2326
- {
2327
- "epoch": 2.86,
2328
- "learning_rate": 9.586147392222784e-07,
2329
- "loss": 2.7452,
2330
- "step": 192000
2331
- },
2332
- {
2333
- "epoch": 2.86,
2334
- "learning_rate": 9.090277984390032e-07,
2335
- "loss": 2.7505,
2336
- "step": 192500
2337
- },
2338
- {
2339
- "epoch": 2.87,
2340
- "learning_rate": 8.594408576557279e-07,
2341
- "loss": 2.7233,
2342
- "step": 193000
2343
- },
2344
- {
2345
- "epoch": 2.88,
2346
- "learning_rate": 8.098539168724526e-07,
2347
- "loss": 2.7409,
2348
- "step": 193500
2349
- },
2350
- {
2351
- "epoch": 2.89,
2352
- "learning_rate": 7.602669760891772e-07,
2353
- "loss": 2.7424,
2354
- "step": 194000
2355
- },
2356
- {
2357
- "epoch": 2.89,
2358
- "learning_rate": 7.10680035305902e-07,
2359
- "loss": 2.7563,
2360
- "step": 194500
2361
- },
2362
- {
2363
- "epoch": 2.9,
2364
- "learning_rate": 6.610930945226266e-07,
2365
- "loss": 2.7478,
2366
- "step": 195000
2367
- },
2368
- {
2369
- "epoch": 2.91,
2370
- "learning_rate": 6.115061537393512e-07,
2371
- "loss": 2.7555,
2372
- "step": 195500
2373
- },
2374
- {
2375
- "epoch": 2.92,
2376
- "learning_rate": 5.619192129560759e-07,
2377
- "loss": 2.7679,
2378
- "step": 196000
2379
- },
2380
- {
2381
- "epoch": 2.92,
2382
- "learning_rate": 5.123322721728006e-07,
2383
- "loss": 2.7219,
2384
- "step": 196500
2385
- },
2386
- {
2387
- "epoch": 2.93,
2388
- "learning_rate": 4.6274533138952524e-07,
2389
- "loss": 2.7283,
2390
- "step": 197000
2391
- },
2392
- {
2393
- "epoch": 2.94,
2394
- "learning_rate": 4.131583906062499e-07,
2395
- "loss": 2.7703,
2396
- "step": 197500
2397
- },
2398
- {
2399
- "epoch": 2.95,
2400
- "learning_rate": 3.6357144982297465e-07,
2401
- "loss": 2.7606,
2402
- "step": 198000
2403
- },
2404
- {
2405
- "epoch": 2.95,
2406
- "learning_rate": 3.1398450903969933e-07,
2407
- "loss": 2.7247,
2408
- "step": 198500
2409
- }
2410
- ],
2411
- "max_steps": 201666,
2412
- "num_train_epochs": 3,
2413
- "total_flos": 2.3390721608830157e+17,
2414
- "trial_name": null,
2415
- "trial_params": null
2416
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b850b2abccaa9cb6b0c14aaa89493109b9ac553fd3b9208268f6ca150829be14
3
- size 3643
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c830ba03e87ef89c167a65183788768bf905dc7f4c689fe476c351a7fc9b93b
3
  size 1200739717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c954b7531becf7ee955cd4be1cd0399bb0f33cb2a0ea78c8d99d44f686badc
3
  size 1200739717
runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3418c4d48d9655d66cec7f231aa9eca5c7d2436ae748697c02e220abc2d1378
3
- size 68191
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f9d0277d514d6e2ad74ecd63999ef4b383df03978d356722e96e08fd2bcc67
3
+ size 69836