Text2Text Generation
Transformers
PyTorch
English
Kinyarwanda
m2m_100
Inference Endpoints
Kleber commited on
Commit
594ec24
1 Parent(s): 67c32d4

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -678
trainer_state.json DELETED
@@ -1,678 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "global_step": 53168,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 4.9529792356304544e-05,
13
- "loss": 0.5805,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.04,
18
- "learning_rate": 4.905958471260909e-05,
19
- "loss": 0.566,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.06,
24
- "learning_rate": 4.8589377068913635e-05,
25
- "loss": 0.5748,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.08,
30
- "learning_rate": 4.811916942521818e-05,
31
- "loss": 0.577,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.09,
36
- "learning_rate": 4.7648961781522725e-05,
37
- "loss": 0.5681,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.11,
42
- "learning_rate": 4.717875413782727e-05,
43
- "loss": 0.5619,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.13,
48
- "learning_rate": 4.670854649413181e-05,
49
- "loss": 0.5643,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.15,
54
- "learning_rate": 4.623833885043635e-05,
55
- "loss": 0.5567,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 0.17,
60
- "learning_rate": 4.57681312067409e-05,
61
- "loss": 0.5544,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 0.19,
66
- "learning_rate": 4.529792356304544e-05,
67
- "loss": 0.5453,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.21,
72
- "learning_rate": 4.482771591934999e-05,
73
- "loss": 0.5963,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 0.23,
78
- "learning_rate": 4.435750827565453e-05,
79
- "loss": 0.561,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 0.24,
84
- "learning_rate": 4.388730063195908e-05,
85
- "loss": 0.584,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 0.26,
90
- "learning_rate": 4.341709298826362e-05,
91
- "loss": 0.5583,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 0.28,
96
- "learning_rate": 4.2946885344568164e-05,
97
- "loss": 0.578,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 0.3,
102
- "learning_rate": 4.2476677700872706e-05,
103
- "loss": 0.5764,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 0.32,
108
- "learning_rate": 4.200647005717725e-05,
109
- "loss": 0.5873,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 0.34,
114
- "learning_rate": 4.1536262413481796e-05,
115
- "loss": 0.6006,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 0.36,
120
- "learning_rate": 4.106605476978634e-05,
121
- "loss": 0.6151,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 0.38,
126
- "learning_rate": 4.059584712609089e-05,
127
- "loss": 0.6026,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 0.39,
132
- "learning_rate": 4.012563948239543e-05,
133
- "loss": 0.6029,
134
- "step": 10500
135
- },
136
- {
137
- "epoch": 0.41,
138
- "learning_rate": 3.965543183869998e-05,
139
- "loss": 0.6321,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 0.43,
144
- "learning_rate": 3.918522419500452e-05,
145
- "loss": 0.609,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 0.45,
150
- "learning_rate": 3.871501655130906e-05,
151
- "loss": 0.6285,
152
- "step": 12000
153
- },
154
- {
155
- "epoch": 0.47,
156
- "learning_rate": 3.82448089076136e-05,
157
- "loss": 0.6045,
158
- "step": 12500
159
- },
160
- {
161
- "epoch": 0.49,
162
- "learning_rate": 3.7774601263918145e-05,
163
- "loss": 0.6652,
164
- "step": 13000
165
- },
166
- {
167
- "epoch": 0.51,
168
- "learning_rate": 3.7304393620222693e-05,
169
- "loss": 0.645,
170
- "step": 13500
171
- },
172
- {
173
- "epoch": 0.53,
174
- "learning_rate": 3.6834185976527235e-05,
175
- "loss": 0.7,
176
- "step": 14000
177
- },
178
- {
179
- "epoch": 0.55,
180
- "learning_rate": 3.6363978332831784e-05,
181
- "loss": 0.6917,
182
- "step": 14500
183
- },
184
- {
185
- "epoch": 0.56,
186
- "learning_rate": 3.5893770689136326e-05,
187
- "loss": 0.6928,
188
- "step": 15000
189
- },
190
- {
191
- "epoch": 0.58,
192
- "learning_rate": 3.542356304544087e-05,
193
- "loss": 0.6743,
194
- "step": 15500
195
- },
196
- {
197
- "epoch": 0.6,
198
- "learning_rate": 3.4953355401745416e-05,
199
- "loss": 0.6652,
200
- "step": 16000
201
- },
202
- {
203
- "epoch": 0.62,
204
- "learning_rate": 3.448314775804996e-05,
205
- "loss": 0.706,
206
- "step": 16500
207
- },
208
- {
209
- "epoch": 0.64,
210
- "learning_rate": 3.40129401143545e-05,
211
- "loss": 0.688,
212
- "step": 17000
213
- },
214
- {
215
- "epoch": 0.66,
216
- "learning_rate": 3.354273247065904e-05,
217
- "loss": 0.7293,
218
- "step": 17500
219
- },
220
- {
221
- "epoch": 0.68,
222
- "learning_rate": 3.307252482696359e-05,
223
- "loss": 0.7225,
224
- "step": 18000
225
- },
226
- {
227
- "epoch": 0.7,
228
- "learning_rate": 3.260231718326813e-05,
229
- "loss": 0.756,
230
- "step": 18500
231
- },
232
- {
233
- "epoch": 0.71,
234
- "learning_rate": 3.2132109539572674e-05,
235
- "loss": 0.7508,
236
- "step": 19000
237
- },
238
- {
239
- "epoch": 0.73,
240
- "learning_rate": 3.166190189587722e-05,
241
- "loss": 0.7115,
242
- "step": 19500
243
- },
244
- {
245
- "epoch": 0.75,
246
- "learning_rate": 3.1191694252181765e-05,
247
- "loss": 0.7543,
248
- "step": 20000
249
- },
250
- {
251
- "epoch": 0.77,
252
- "learning_rate": 3.072148660848631e-05,
253
- "loss": 0.7587,
254
- "step": 20500
255
- },
256
- {
257
- "epoch": 0.79,
258
- "learning_rate": 3.025127896479085e-05,
259
- "loss": 0.7785,
260
- "step": 21000
261
- },
262
- {
263
- "epoch": 0.81,
264
- "learning_rate": 2.97810713210954e-05,
265
- "loss": 0.7991,
266
- "step": 21500
267
- },
268
- {
269
- "epoch": 0.83,
270
- "learning_rate": 2.9310863677399942e-05,
271
- "loss": 0.8171,
272
- "step": 22000
273
- },
274
- {
275
- "epoch": 0.85,
276
- "learning_rate": 2.8840656033704484e-05,
277
- "loss": 0.843,
278
- "step": 22500
279
- },
280
- {
281
- "epoch": 0.87,
282
- "learning_rate": 2.837044839000903e-05,
283
- "loss": 0.8072,
284
- "step": 23000
285
- },
286
- {
287
- "epoch": 0.88,
288
- "learning_rate": 2.790024074631357e-05,
289
- "loss": 0.8326,
290
- "step": 23500
291
- },
292
- {
293
- "epoch": 0.9,
294
- "learning_rate": 2.743003310261812e-05,
295
- "loss": 0.8578,
296
- "step": 24000
297
- },
298
- {
299
- "epoch": 0.92,
300
- "learning_rate": 2.695982545892266e-05,
301
- "loss": 0.8621,
302
- "step": 24500
303
- },
304
- {
305
- "epoch": 0.94,
306
- "learning_rate": 2.6489617815227207e-05,
307
- "loss": 0.9032,
308
- "step": 25000
309
- },
310
- {
311
- "epoch": 0.96,
312
- "learning_rate": 2.601941017153175e-05,
313
- "loss": 0.9124,
314
- "step": 25500
315
- },
316
- {
317
- "epoch": 0.98,
318
- "learning_rate": 2.554920252783629e-05,
319
- "loss": 0.9126,
320
- "step": 26000
321
- },
322
- {
323
- "epoch": 1.0,
324
- "learning_rate": 2.507899488414084e-05,
325
- "loss": 0.9252,
326
- "step": 26500
327
- },
328
- {
329
- "epoch": 1.0,
330
- "eval_bleu": 34.6634,
331
- "eval_chrf++": 61.3155,
332
- "eval_gen_len": 20.4231,
333
- "eval_loss": 0.9675707817077637,
334
- "eval_runtime": 2539.8863,
335
- "eval_samples_per_second": 4.979,
336
- "eval_spbleu": 48.2634,
337
- "eval_steps_per_second": 0.996,
338
- "eval_ter": 53.1762,
339
- "step": 26584
340
- },
341
- {
342
- "epoch": 1.02,
343
- "learning_rate": 2.4608787240445384e-05,
344
- "loss": 0.6562,
345
- "step": 27000
346
- },
347
- {
348
- "epoch": 1.03,
349
- "learning_rate": 2.4138579596749926e-05,
350
- "loss": 0.5603,
351
- "step": 27500
352
- },
353
- {
354
- "epoch": 1.05,
355
- "learning_rate": 2.3668371953054468e-05,
356
- "loss": 0.549,
357
- "step": 28000
358
- },
359
- {
360
- "epoch": 1.07,
361
- "learning_rate": 2.3198164309359013e-05,
362
- "loss": 0.5384,
363
- "step": 28500
364
- },
365
- {
366
- "epoch": 1.09,
367
- "learning_rate": 2.272795666566356e-05,
368
- "loss": 0.5653,
369
- "step": 29000
370
- },
371
- {
372
- "epoch": 1.11,
373
- "learning_rate": 2.2257749021968104e-05,
374
- "loss": 0.556,
375
- "step": 29500
376
- },
377
- {
378
- "epoch": 1.13,
379
- "learning_rate": 2.1787541378272646e-05,
380
- "loss": 0.5604,
381
- "step": 30000
382
- },
383
- {
384
- "epoch": 1.15,
385
- "learning_rate": 2.131733373457719e-05,
386
- "loss": 0.5572,
387
- "step": 30500
388
- },
389
- {
390
- "epoch": 1.17,
391
- "learning_rate": 2.0847126090881736e-05,
392
- "loss": 0.5569,
393
- "step": 31000
394
- },
395
- {
396
- "epoch": 1.18,
397
- "learning_rate": 2.0376918447186278e-05,
398
- "loss": 0.5539,
399
- "step": 31500
400
- },
401
- {
402
- "epoch": 1.2,
403
- "learning_rate": 1.9906710803490823e-05,
404
- "loss": 0.5361,
405
- "step": 32000
406
- },
407
- {
408
- "epoch": 1.22,
409
- "learning_rate": 1.9436503159795365e-05,
410
- "loss": 0.5396,
411
- "step": 32500
412
- },
413
- {
414
- "epoch": 1.24,
415
- "learning_rate": 1.896629551609991e-05,
416
- "loss": 0.5457,
417
- "step": 33000
418
- },
419
- {
420
- "epoch": 1.26,
421
- "learning_rate": 1.8496087872404456e-05,
422
- "loss": 0.5638,
423
- "step": 33500
424
- },
425
- {
426
- "epoch": 1.28,
427
- "learning_rate": 1.8025880228709e-05,
428
- "loss": 0.5548,
429
- "step": 34000
430
- },
431
- {
432
- "epoch": 1.3,
433
- "learning_rate": 1.7555672585013543e-05,
434
- "loss": 0.533,
435
- "step": 34500
436
- },
437
- {
438
- "epoch": 1.32,
439
- "learning_rate": 1.7085464941318085e-05,
440
- "loss": 0.5455,
441
- "step": 35000
442
- },
443
- {
444
- "epoch": 1.34,
445
- "learning_rate": 1.661525729762263e-05,
446
- "loss": 0.5675,
447
- "step": 35500
448
- },
449
- {
450
- "epoch": 1.35,
451
- "learning_rate": 1.6145049653927175e-05,
452
- "loss": 0.5444,
453
- "step": 36000
454
- },
455
- {
456
- "epoch": 1.37,
457
- "learning_rate": 1.567484201023172e-05,
458
- "loss": 0.5677,
459
- "step": 36500
460
- },
461
- {
462
- "epoch": 1.39,
463
- "learning_rate": 1.5204634366536264e-05,
464
- "loss": 0.551,
465
- "step": 37000
466
- },
467
- {
468
- "epoch": 1.41,
469
- "learning_rate": 1.4734426722840807e-05,
470
- "loss": 0.5588,
471
- "step": 37500
472
- },
473
- {
474
- "epoch": 1.43,
475
- "learning_rate": 1.4264219079145353e-05,
476
- "loss": 0.5602,
477
- "step": 38000
478
- },
479
- {
480
- "epoch": 1.45,
481
- "learning_rate": 1.3794011435449896e-05,
482
- "loss": 0.5434,
483
- "step": 38500
484
- },
485
- {
486
- "epoch": 1.47,
487
- "learning_rate": 1.3323803791754438e-05,
488
- "loss": 0.5642,
489
- "step": 39000
490
- },
491
- {
492
- "epoch": 1.49,
493
- "learning_rate": 1.2853596148058983e-05,
494
- "loss": 0.5516,
495
- "step": 39500
496
- },
497
- {
498
- "epoch": 1.5,
499
- "learning_rate": 1.2383388504363527e-05,
500
- "loss": 0.5639,
501
- "step": 40000
502
- },
503
- {
504
- "epoch": 1.52,
505
- "learning_rate": 1.1913180860668072e-05,
506
- "loss": 0.531,
507
- "step": 40500
508
- },
509
- {
510
- "epoch": 1.54,
511
- "learning_rate": 1.1442973216972616e-05,
512
- "loss": 0.5301,
513
- "step": 41000
514
- },
515
- {
516
- "epoch": 1.56,
517
- "learning_rate": 1.0972765573277159e-05,
518
- "loss": 0.5677,
519
- "step": 41500
520
- },
521
- {
522
- "epoch": 1.58,
523
- "learning_rate": 1.0502557929581704e-05,
524
- "loss": 0.5308,
525
- "step": 42000
526
- },
527
- {
528
- "epoch": 1.6,
529
- "learning_rate": 1.0032350285886248e-05,
530
- "loss": 0.5224,
531
- "step": 42500
532
- },
533
- {
534
- "epoch": 1.62,
535
- "learning_rate": 9.562142642190793e-06,
536
- "loss": 0.5864,
537
- "step": 43000
538
- },
539
- {
540
- "epoch": 1.64,
541
- "learning_rate": 9.091934998495335e-06,
542
- "loss": 0.5345,
543
- "step": 43500
544
- },
545
- {
546
- "epoch": 1.66,
547
- "learning_rate": 8.62172735479988e-06,
548
- "loss": 0.5531,
549
- "step": 44000
550
- },
551
- {
552
- "epoch": 1.67,
553
- "learning_rate": 8.151519711104424e-06,
554
- "loss": 0.5274,
555
- "step": 44500
556
- },
557
- {
558
- "epoch": 1.69,
559
- "learning_rate": 7.681312067408967e-06,
560
- "loss": 0.543,
561
- "step": 45000
562
- },
563
- {
564
- "epoch": 1.71,
565
- "learning_rate": 7.211104423713512e-06,
566
- "loss": 0.5486,
567
- "step": 45500
568
- },
569
- {
570
- "epoch": 1.73,
571
- "learning_rate": 6.740896780018056e-06,
572
- "loss": 0.5469,
573
- "step": 46000
574
- },
575
- {
576
- "epoch": 1.75,
577
- "learning_rate": 6.2706891363226005e-06,
578
- "loss": 0.5427,
579
- "step": 46500
580
- },
581
- {
582
- "epoch": 1.77,
583
- "learning_rate": 5.800481492627145e-06,
584
- "loss": 0.5424,
585
- "step": 47000
586
- },
587
- {
588
- "epoch": 1.79,
589
- "learning_rate": 5.330273848931688e-06,
590
- "loss": 0.556,
591
- "step": 47500
592
- },
593
- {
594
- "epoch": 1.81,
595
- "learning_rate": 4.860066205236233e-06,
596
- "loss": 0.5411,
597
- "step": 48000
598
- },
599
- {
600
- "epoch": 1.82,
601
- "learning_rate": 4.389858561540776e-06,
602
- "loss": 0.5639,
603
- "step": 48500
604
- },
605
- {
606
- "epoch": 1.84,
607
- "learning_rate": 3.919650917845321e-06,
608
- "loss": 0.5344,
609
- "step": 49000
610
- },
611
- {
612
- "epoch": 1.86,
613
- "learning_rate": 3.4494432741498647e-06,
614
- "loss": 0.5245,
615
- "step": 49500
616
- },
617
- {
618
- "epoch": 1.88,
619
- "learning_rate": 2.9792356304544087e-06,
620
- "loss": 0.5638,
621
- "step": 50000
622
- },
623
- {
624
- "epoch": 1.9,
625
- "learning_rate": 2.509027986758953e-06,
626
- "loss": 0.5466,
627
- "step": 50500
628
- },
629
- {
630
- "epoch": 1.92,
631
- "learning_rate": 2.038820343063497e-06,
632
- "loss": 0.5473,
633
- "step": 51000
634
- },
635
- {
636
- "epoch": 1.94,
637
- "learning_rate": 1.568612699368041e-06,
638
- "loss": 0.5281,
639
- "step": 51500
640
- },
641
- {
642
- "epoch": 1.96,
643
- "learning_rate": 1.098405055672585e-06,
644
- "loss": 0.5205,
645
- "step": 52000
646
- },
647
- {
648
- "epoch": 1.97,
649
- "learning_rate": 6.281974119771292e-07,
650
- "loss": 0.543,
651
- "step": 52500
652
- },
653
- {
654
- "epoch": 1.99,
655
- "learning_rate": 1.579897682816732e-07,
656
- "loss": 0.5409,
657
- "step": 53000
658
- },
659
- {
660
- "epoch": 2.0,
661
- "eval_bleu": 36.0011,
662
- "eval_chrf++": 62.004,
663
- "eval_gen_len": 20.4066,
664
- "eval_loss": 1.0103600025177002,
665
- "eval_runtime": 2543.0781,
666
- "eval_samples_per_second": 4.973,
667
- "eval_spbleu": 49.2767,
668
- "eval_steps_per_second": 0.995,
669
- "eval_ter": 51.8536,
670
- "step": 53168
671
- }
672
- ],
673
- "max_steps": 53168,
674
- "num_train_epochs": 2,
675
- "total_flos": 2.262694057958441e+17,
676
- "trial_name": null,
677
- "trial_params": null
678
- }