Abdulwahab Sahyoun commited on
Commit
06d286f
1 Parent(s): 6bf0d45

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +379 -22
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_loss": 0.6945873498916626,
4
- "eval_runtime": 205.608,
5
  "eval_samples": 4899,
6
- "eval_samples_per_second": 23.827,
7
- "eval_steps_per_second": 0.749,
8
- "eval_wer": 0.3939828080229226,
9
- "train_loss": 1.2329752604166666,
10
- "train_runtime": 16157.973,
11
  "train_samples": 14730,
12
- "train_samples_per_second": 13.674,
13
- "train_steps_per_second": 0.214
14
  }
1
  {
2
+ "epoch": 30.0,
3
+ "eval_loss": 0.8535701632499695,
4
+ "eval_runtime": 201.4101,
5
  "eval_samples": 4899,
6
+ "eval_samples_per_second": 24.324,
7
+ "eval_steps_per_second": 0.765,
8
+ "eval_wer": 0.3736551873276748,
9
+ "train_loss": 0.15137778351272363,
10
+ "train_runtime": 18309.2993,
11
  "train_samples": 14730,
12
+ "train_samples_per_second": 24.135,
13
+ "train_steps_per_second": 0.377
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_loss": 0.6945873498916626,
4
- "eval_runtime": 205.608,
5
  "eval_samples": 4899,
6
- "eval_samples_per_second": 23.827,
7
- "eval_steps_per_second": 0.749,
8
- "eval_wer": 0.3939828080229226
9
  }
1
  {
2
+ "epoch": 30.0,
3
+ "eval_loss": 0.8535701632499695,
4
+ "eval_runtime": 201.4101,
5
  "eval_samples": 4899,
6
+ "eval_samples_per_second": 24.324,
7
+ "eval_steps_per_second": 0.765,
8
+ "eval_wer": 0.3736551873276748
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 15.0,
3
- "train_loss": 1.2329752604166666,
4
- "train_runtime": 16157.973,
5
  "train_samples": 14730,
6
- "train_samples_per_second": 13.674,
7
- "train_steps_per_second": 0.214
8
  }
1
  {
2
+ "epoch": 30.0,
3
+ "train_loss": 0.15137778351272363,
4
+ "train_runtime": 18309.2993,
5
  "train_samples": 14730,
6
+ "train_samples_per_second": 24.135,
7
+ "train_steps_per_second": 0.377
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.997830802603037,
5
- "global_step": 3450,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -333,35 +333,392 @@
333
  },
334
  {
335
  "epoch": 14.35,
336
- "eval_loss": 0.6937959790229797,
337
- "eval_runtime": 202.6788,
338
- "eval_samples_per_second": 24.171,
339
- "eval_steps_per_second": 0.76,
340
- "eval_wer": 0.39365843109693466,
341
  "step": 3300
342
  },
343
  {
344
  "epoch": 14.78,
345
- "eval_loss": 0.6927648782730103,
346
- "eval_runtime": 201.0141,
347
- "eval_samples_per_second": 24.371,
348
- "eval_steps_per_second": 0.766,
349
- "eval_wer": 0.3945504676434016,
350
  "step": 3400
351
  },
352
  {
353
- "epoch": 15.0,
354
- "step": 3450,
355
- "total_flos": 2.9875620378207076e+19,
356
- "train_loss": 1.2329752604166666,
357
- "train_runtime": 16157.973,
358
- "train_samples_per_second": 13.674,
359
- "train_steps_per_second": 0.214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  }
361
  ],
362
- "max_steps": 3450,
363
- "num_train_epochs": 15,
364
- "total_flos": 2.9875620378207076e+19,
365
  "trial_name": null,
366
  "trial_params": null
367
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 29.997830802603037,
5
+ "global_step": 6900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
333
  },
334
  {
335
  "epoch": 14.35,
336
+ "eval_loss": 0.7177483439445496,
337
+ "eval_runtime": 201.1511,
338
+ "eval_samples_per_second": 24.355,
339
+ "eval_steps_per_second": 0.766,
340
+ "eval_wer": 0.4009839433421636,
341
  "step": 3300
342
  },
343
  {
344
  "epoch": 14.78,
345
+ "eval_loss": 0.6887663006782532,
346
+ "eval_runtime": 201.3869,
347
+ "eval_samples_per_second": 24.326,
348
+ "eval_steps_per_second": 0.765,
349
+ "eval_wer": 0.4042547440125426,
350
  "step": 3400
351
  },
352
  {
353
+ "epoch": 15.22,
354
+ "learning_rate": 0.000159515625,
355
+ "loss": 0.3767,
356
+ "step": 3500
357
+ },
358
+ {
359
+ "epoch": 15.22,
360
+ "eval_loss": 0.7124109268188477,
361
+ "eval_runtime": 201.401,
362
+ "eval_samples_per_second": 24.325,
363
+ "eval_steps_per_second": 0.765,
364
+ "eval_wer": 0.4202303076174515,
365
+ "step": 3500
366
+ },
367
+ {
368
+ "epoch": 15.65,
369
+ "eval_loss": 0.7275710701942444,
370
+ "eval_runtime": 201.0133,
371
+ "eval_samples_per_second": 24.372,
372
+ "eval_steps_per_second": 0.766,
373
+ "eval_wer": 0.41201275882575555,
374
+ "step": 3600
375
+ },
376
+ {
377
+ "epoch": 16.09,
378
+ "eval_loss": 0.7265337109565735,
379
+ "eval_runtime": 201.5776,
380
+ "eval_samples_per_second": 24.303,
381
+ "eval_steps_per_second": 0.764,
382
+ "eval_wer": 0.40336270746607555,
383
+ "step": 3700
384
+ },
385
+ {
386
+ "epoch": 16.52,
387
+ "eval_loss": 0.7392393946647644,
388
+ "eval_runtime": 200.4049,
389
+ "eval_samples_per_second": 24.446,
390
+ "eval_steps_per_second": 0.768,
391
+ "eval_wer": 0.40771476455641453,
392
+ "step": 3800
393
+ },
394
+ {
395
+ "epoch": 16.95,
396
+ "eval_loss": 0.7403403520584106,
397
+ "eval_runtime": 200.8623,
398
+ "eval_samples_per_second": 24.39,
399
+ "eval_steps_per_second": 0.767,
400
+ "eval_wer": 0.39652376060982863,
401
+ "step": 3900
402
+ },
403
+ {
404
+ "epoch": 17.39,
405
+ "learning_rate": 0.000136078125,
406
+ "loss": 0.3603,
407
+ "step": 4000
408
+ },
409
+ {
410
+ "epoch": 17.39,
411
+ "eval_loss": 0.7444584369659424,
412
+ "eval_runtime": 203.8048,
413
+ "eval_samples_per_second": 24.038,
414
+ "eval_steps_per_second": 0.756,
415
+ "eval_wer": 0.4016326971941396,
416
+ "step": 4000
417
+ },
418
+ {
419
+ "epoch": 17.82,
420
+ "eval_loss": 0.757925271987915,
421
+ "eval_runtime": 201.3248,
422
+ "eval_samples_per_second": 24.334,
423
+ "eval_steps_per_second": 0.765,
424
+ "eval_wer": 0.4012272260366546,
425
+ "step": 4100
426
+ },
427
+ {
428
+ "epoch": 18.26,
429
+ "eval_loss": 0.7225211262702942,
430
+ "eval_runtime": 200.7191,
431
+ "eval_samples_per_second": 24.407,
432
+ "eval_steps_per_second": 0.767,
433
+ "eval_wer": 0.3962804779153376,
434
+ "step": 4200
435
+ },
436
+ {
437
+ "epoch": 18.69,
438
+ "eval_loss": 0.7354857325553894,
439
+ "eval_runtime": 200.5207,
440
+ "eval_samples_per_second": 24.431,
441
+ "eval_steps_per_second": 0.768,
442
+ "eval_wer": 0.3951181272638806,
443
+ "step": 4300
444
+ },
445
+ {
446
+ "epoch": 19.13,
447
+ "eval_loss": 0.7482349276542664,
448
+ "eval_runtime": 200.7847,
449
+ "eval_samples_per_second": 24.399,
450
+ "eval_steps_per_second": 0.767,
451
+ "eval_wer": 0.39252311185597666,
452
+ "step": 4400
453
+ },
454
+ {
455
+ "epoch": 19.56,
456
+ "learning_rate": 0.000112640625,
457
+ "loss": 0.3153,
458
+ "step": 4500
459
+ },
460
+ {
461
+ "epoch": 19.56,
462
+ "eval_loss": 0.772259533405304,
463
+ "eval_runtime": 200.8729,
464
+ "eval_samples_per_second": 24.389,
465
+ "eval_steps_per_second": 0.767,
466
+ "eval_wer": 0.3971725144618046,
467
+ "step": 4500
468
+ },
469
+ {
470
+ "epoch": 20.0,
471
+ "eval_loss": 0.7469175457954407,
472
+ "eval_runtime": 200.5892,
473
+ "eval_samples_per_second": 24.423,
474
+ "eval_steps_per_second": 0.768,
475
+ "eval_wer": 0.3897929393955777,
476
+ "step": 4600
477
+ },
478
+ {
479
+ "epoch": 20.43,
480
+ "eval_loss": 0.7799847722053528,
481
+ "eval_runtime": 200.7406,
482
+ "eval_samples_per_second": 24.405,
483
+ "eval_steps_per_second": 0.767,
484
+ "eval_wer": 0.39444234200140565,
485
+ "step": 4700
486
+ },
487
+ {
488
+ "epoch": 20.87,
489
+ "eval_loss": 0.7827086448669434,
490
+ "eval_runtime": 200.8886,
491
+ "eval_samples_per_second": 24.387,
492
+ "eval_steps_per_second": 0.767,
493
+ "eval_wer": 0.3897388765745797,
494
+ "step": 4800
495
+ },
496
+ {
497
+ "epoch": 21.3,
498
+ "eval_loss": 0.7935096621513367,
499
+ "eval_runtime": 204.4398,
500
+ "eval_samples_per_second": 23.963,
501
+ "eval_steps_per_second": 0.753,
502
+ "eval_wer": 0.3913607612045196,
503
+ "step": 4900
504
+ },
505
+ {
506
+ "epoch": 21.74,
507
+ "learning_rate": 8.9203125e-05,
508
+ "loss": 0.286,
509
+ "step": 5000
510
+ },
511
+ {
512
+ "epoch": 21.74,
513
+ "eval_loss": 0.7984351515769958,
514
+ "eval_runtime": 200.4244,
515
+ "eval_samples_per_second": 24.443,
516
+ "eval_steps_per_second": 0.768,
517
+ "eval_wer": 0.37497972644212574,
518
+ "step": 5000
519
+ },
520
+ {
521
+ "epoch": 22.17,
522
+ "eval_loss": 0.7945317625999451,
523
+ "eval_runtime": 200.6266,
524
+ "eval_samples_per_second": 24.418,
525
+ "eval_steps_per_second": 0.768,
526
+ "eval_wer": 0.3830080553603287,
527
+ "step": 5100
528
+ },
529
+ {
530
+ "epoch": 22.61,
531
+ "eval_loss": 0.8010774254798889,
532
+ "eval_runtime": 200.4942,
533
+ "eval_samples_per_second": 24.435,
534
+ "eval_steps_per_second": 0.768,
535
+ "eval_wer": 0.37746661620803373,
536
+ "step": 5200
537
+ },
538
+ {
539
+ "epoch": 23.04,
540
+ "eval_loss": 0.7977942228317261,
541
+ "eval_runtime": 201.0949,
542
+ "eval_samples_per_second": 24.362,
543
+ "eval_steps_per_second": 0.766,
544
+ "eval_wer": 0.3823863329188517,
545
+ "step": 5300
546
+ },
547
+ {
548
+ "epoch": 23.48,
549
+ "eval_loss": 0.8160513043403625,
550
+ "eval_runtime": 201.474,
551
+ "eval_samples_per_second": 24.316,
552
+ "eval_steps_per_second": 0.764,
553
+ "eval_wer": 0.3833324322863167,
554
+ "step": 5400
555
+ },
556
+ {
557
+ "epoch": 23.91,
558
+ "learning_rate": 6.576562499999999e-05,
559
+ "loss": 0.2615,
560
+ "step": 5500
561
+ },
562
+ {
563
+ "epoch": 23.91,
564
+ "eval_loss": 0.7822675704956055,
565
+ "eval_runtime": 200.6337,
566
+ "eval_samples_per_second": 24.418,
567
+ "eval_steps_per_second": 0.768,
568
+ "eval_wer": 0.3857922906417257,
569
+ "step": 5500
570
+ },
571
+ {
572
+ "epoch": 24.35,
573
+ "eval_loss": 0.8311657309532166,
574
+ "eval_runtime": 201.9809,
575
+ "eval_samples_per_second": 24.255,
576
+ "eval_steps_per_second": 0.762,
577
+ "eval_wer": 0.3863329188517057,
578
+ "step": 5600
579
+ },
580
+ {
581
+ "epoch": 24.78,
582
+ "eval_loss": 0.8427298069000244,
583
+ "eval_runtime": 201.3389,
584
+ "eval_samples_per_second": 24.332,
585
+ "eval_steps_per_second": 0.765,
586
+ "eval_wer": 0.3818997675298697,
587
+ "step": 5700
588
+ },
589
+ {
590
+ "epoch": 25.22,
591
+ "eval_loss": 0.8432016968727112,
592
+ "eval_runtime": 205.0165,
593
+ "eval_samples_per_second": 23.896,
594
+ "eval_steps_per_second": 0.751,
595
+ "eval_wer": 0.3801697572579337,
596
+ "step": 5800
597
+ },
598
+ {
599
+ "epoch": 25.65,
600
+ "eval_loss": 0.8285790085792542,
601
+ "eval_runtime": 200.9997,
602
+ "eval_samples_per_second": 24.373,
603
+ "eval_steps_per_second": 0.766,
604
+ "eval_wer": 0.37943990917446074,
605
+ "step": 5900
606
+ },
607
+ {
608
+ "epoch": 26.09,
609
+ "learning_rate": 4.2328124999999994e-05,
610
+ "loss": 0.2408,
611
+ "step": 6000
612
+ },
613
+ {
614
+ "epoch": 26.09,
615
+ "eval_loss": 0.8224411606788635,
616
+ "eval_runtime": 200.5192,
617
+ "eval_samples_per_second": 24.432,
618
+ "eval_steps_per_second": 0.768,
619
+ "eval_wer": 0.3824403957398497,
620
+ "step": 6000
621
+ },
622
+ {
623
+ "epoch": 26.52,
624
+ "eval_loss": 0.8228213787078857,
625
+ "eval_runtime": 201.4468,
626
+ "eval_samples_per_second": 24.319,
627
+ "eval_steps_per_second": 0.764,
628
+ "eval_wer": 0.3823052386873547,
629
+ "step": 6100
630
+ },
631
+ {
632
+ "epoch": 26.95,
633
+ "eval_loss": 0.832374095916748,
634
+ "eval_runtime": 200.428,
635
+ "eval_samples_per_second": 24.443,
636
+ "eval_steps_per_second": 0.768,
637
+ "eval_wer": 0.3794669405849597,
638
+ "step": 6200
639
+ },
640
+ {
641
+ "epoch": 27.39,
642
+ "eval_loss": 0.8564098477363586,
643
+ "eval_runtime": 199.9963,
644
+ "eval_samples_per_second": 24.495,
645
+ "eval_steps_per_second": 0.77,
646
+ "eval_wer": 0.37441206682164674,
647
+ "step": 6300
648
+ },
649
+ {
650
+ "epoch": 27.82,
651
+ "eval_loss": 0.8629336953163147,
652
+ "eval_runtime": 200.1836,
653
+ "eval_samples_per_second": 24.473,
654
+ "eval_steps_per_second": 0.769,
655
+ "eval_wer": 0.37738552197653674,
656
+ "step": 6400
657
+ },
658
+ {
659
+ "epoch": 28.26,
660
+ "learning_rate": 1.8890625e-05,
661
+ "loss": 0.2254,
662
+ "step": 6500
663
+ },
664
+ {
665
+ "epoch": 28.26,
666
+ "eval_loss": 0.8544773459434509,
667
+ "eval_runtime": 200.1232,
668
+ "eval_samples_per_second": 24.48,
669
+ "eval_steps_per_second": 0.77,
670
+ "eval_wer": 0.3778450559550197,
671
+ "step": 6500
672
+ },
673
+ {
674
+ "epoch": 28.69,
675
+ "eval_loss": 0.8491827845573425,
676
+ "eval_runtime": 201.2475,
677
+ "eval_samples_per_second": 24.343,
678
+ "eval_steps_per_second": 0.765,
679
+ "eval_wer": 0.3767097367140617,
680
+ "step": 6600
681
+ },
682
+ {
683
+ "epoch": 29.13,
684
+ "eval_loss": 0.8510707020759583,
685
+ "eval_runtime": 203.9748,
686
+ "eval_samples_per_second": 24.018,
687
+ "eval_steps_per_second": 0.755,
688
+ "eval_wer": 0.37508785208412176,
689
+ "step": 6700
690
+ },
691
+ {
692
+ "epoch": 29.56,
693
+ "eval_loss": 0.8490829467773438,
694
+ "eval_runtime": 200.922,
695
+ "eval_samples_per_second": 24.383,
696
+ "eval_steps_per_second": 0.766,
697
+ "eval_wer": 0.3752770719576147,
698
+ "step": 6800
699
+ },
700
+ {
701
+ "epoch": 30.0,
702
+ "eval_loss": 0.8535701632499695,
703
+ "eval_runtime": 201.4279,
704
+ "eval_samples_per_second": 24.321,
705
+ "eval_steps_per_second": 0.765,
706
+ "eval_wer": 0.3736551873276748,
707
+ "step": 6900
708
+ },
709
+ {
710
+ "epoch": 30.0,
711
+ "step": 6900,
712
+ "total_flos": 5.975239123849969e+19,
713
+ "train_loss": 0.15137778351272363,
714
+ "train_runtime": 18309.2993,
715
+ "train_samples_per_second": 24.135,
716
+ "train_steps_per_second": 0.377
717
  }
718
  ],
719
+ "max_steps": 6900,
720
+ "num_train_epochs": 30,
721
+ "total_flos": 5.975239123849969e+19,
722
  "trial_name": null,
723
  "trial_params": null
724
  }