TenzinGayche commited on
Commit
b09b7d8
β€’
1 Parent(s): 3ff632d

Training in progress, step 3300

Browse files
{checkpoint-2100 β†’ checkpoint-3200}/config.json RENAMED
File without changes
{checkpoint-2100 β†’ checkpoint-3200}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87e2080de9b12e77e557f50a48e30b7ba4194eca0fd9c1a98f3b79a795706b37
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b096ae7b366c7c999b92a9c0d6ce1c8f70bba3cd30c87556eab078c149c9e3d
3
  size 2490946501
{checkpoint-2100 β†’ checkpoint-3200}/preprocessor_config.json RENAMED
File without changes
{checkpoint-2200 β†’ checkpoint-3200}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cce06d15cec4e6aaad2f52b7c165936df822e00a89dfb534de60b39c6180cc4
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1aab560189c775afd4d52f128e4bd0cfd72f760ad25aab0de2f45c34b0b45e2
3
  size 1262344621
{checkpoint-2200 β†’ checkpoint-3200}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96e9dc9cd32a89c54dbb93c6ae252f82b827a1a9d04f14d2ce52e858a5d26e24
3
  size 14575
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86fe28654777ce9becd5efc4b0839519aac223aa554449121cb9e9b1d6391db8
3
  size 14575
{checkpoint-2200 β†’ checkpoint-3200}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf71784c669ab5c19402209fd2ce2ba2839e04cc29da73480a4896e9283ae06
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd7c34e3c6e4a21fa45cdb54a89b626e4c29dfd67a2bac43d63b8bce896a050f
3
  size 557
{checkpoint-2100 β†’ checkpoint-3200}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c20e8e4f4623e1333635d4d6f55117df2428ab91ba217c9207d1a49a91c2552
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9eae38967de4c32ee3782c111dc2a1911655d360e132f06fbb454136bb7010c
3
  size 627
{checkpoint-2200 β†’ checkpoint-3200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.965986394557824,
5
- "global_step": 2200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -468,11 +468,221 @@
468
  "eval_samples_per_second": 23.695,
469
  "eval_steps_per_second": 2.982,
470
  "step": 2200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  }
472
  ],
473
  "max_steps": 3675,
474
  "num_train_epochs": 25,
475
- "total_flos": 8.535511077527554e+18,
476
  "trial_name": null,
477
  "trial_params": null
478
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.768707482993197,
5
+ "global_step": 3200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
468
  "eval_samples_per_second": 23.695,
469
  "eval_steps_per_second": 2.982,
470
  "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
+ },
619
+ {
620
+ "epoch": 20.07,
621
+ "learning_rate": 6.878740157480315e-05,
622
+ "loss": 0.1773,
623
+ "step": 2950
624
+ },
625
+ {
626
+ "epoch": 20.41,
627
+ "learning_rate": 6.406299212598424e-05,
628
+ "loss": 0.185,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 20.41,
633
+ "eval_cer": 0.1447578672753954,
634
+ "eval_loss": 0.36554473638534546,
635
+ "eval_runtime": 24.7061,
636
+ "eval_samples_per_second": 23.8,
637
+ "eval_steps_per_second": 2.995,
638
+ "step": 3000
639
+ },
640
+ {
641
+ "epoch": 20.75,
642
+ "learning_rate": 5.933858267716535e-05,
643
+ "loss": 0.1735,
644
+ "step": 3050
645
+ },
646
+ {
647
+ "epoch": 21.09,
648
+ "learning_rate": 5.461417322834645e-05,
649
+ "loss": 0.1599,
650
+ "step": 3100
651
+ },
652
+ {
653
+ "epoch": 21.09,
654
+ "eval_cer": 0.14867112343062122,
655
+ "eval_loss": 0.37344449758529663,
656
+ "eval_runtime": 24.8752,
657
+ "eval_samples_per_second": 23.638,
658
+ "eval_steps_per_second": 2.975,
659
+ "step": 3100
660
+ },
661
+ {
662
+ "epoch": 21.43,
663
+ "learning_rate": 4.9889763779527555e-05,
664
+ "loss": 0.1578,
665
+ "step": 3150
666
+ },
667
+ {
668
+ "epoch": 21.77,
669
+ "learning_rate": 4.516535433070866e-05,
670
+ "loss": 0.1742,
671
+ "step": 3200
672
+ },
673
+ {
674
+ "epoch": 21.77,
675
+ "eval_cer": 0.148638512962661,
676
+ "eval_loss": 0.3685232102870941,
677
+ "eval_runtime": 25.1361,
678
+ "eval_samples_per_second": 23.393,
679
+ "eval_steps_per_second": 2.944,
680
+ "step": 3200
681
  }
682
  ],
683
  "max_steps": 3675,
684
  "num_train_epochs": 25,
685
+ "total_flos": 1.242459434699676e+19,
686
  "trial_name": null,
687
  "trial_params": null
688
  }
{checkpoint-2100 β†’ checkpoint-3200}/training_args.bin RENAMED
File without changes
{checkpoint-2200 β†’ checkpoint-3300}/config.json RENAMED
File without changes
{checkpoint-2200 β†’ checkpoint-3300}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6767d1346acea9f1cfc55aa58dcfbce8b49d761c43b31cd3ece778ea28537d18
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b656fc491d0ed8f6ebfccb7267d33dde8cfdd3fc42b0f12ed1ea145c2467b56
3
  size 2490946501
{checkpoint-2200 β†’ checkpoint-3300}/preprocessor_config.json RENAMED
File without changes
{checkpoint-2100 β†’ checkpoint-3300}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec5ea49be8b746ea48d869dab13f1c8fb1f407557d206b2a7a9ee54fc7002b55
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b3268161afb2bcdf8b28a1a1d911f835d6d1a33dd0c929b5d6cf7248b065dc
3
  size 1262344621
{checkpoint-2100 β†’ checkpoint-3300}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d26c90a74252f9ad5cdc4c91db335ffbbece95883ba82e495b95ac3d28e6ea52
3
  size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8d02f162a600f1e16123bc1f3de13d710b69a81bc405e4671daab4d2f276389
3
  size 14639
{checkpoint-2100 β†’ checkpoint-3300}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f255336918f2820e02ae648c756202ba55ed3193e9fd1950955ca2f6fb0457f
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d87cd8a2522fd3677eac24590995de677069aa54288fb9a97a9c0b1ce60c09
3
  size 557
{checkpoint-2200 β†’ checkpoint-3300}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96e91f67ea7900c342bef5999be148a76880328bc0cbe140bae8517f86187cdb
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96a17614a18bd66b442195d1c5c4d8d5494ab5df6a2f462116d009a1efdf394c
3
  size 627
{checkpoint-2100 β†’ checkpoint-3300}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.285714285714286,
5
- "global_step": 2100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -447,11 +447,263 @@
447
  "eval_samples_per_second": 23.673,
448
  "eval_steps_per_second": 2.979,
449
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "max_steps": 3675,
453
  "num_train_epochs": 25,
454
- "total_flos": 8.157984479823167e+18,
455
  "trial_name": null,
456
  "trial_params": null
457
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.448979591836736,
5
+ "global_step": 3300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
447
  "eval_samples_per_second": 23.673,
448
  "eval_steps_per_second": 2.979,
449
  "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
+ },
619
+ {
620
+ "epoch": 20.07,
621
+ "learning_rate": 6.878740157480315e-05,
622
+ "loss": 0.1773,
623
+ "step": 2950
624
+ },
625
+ {
626
+ "epoch": 20.41,
627
+ "learning_rate": 6.406299212598424e-05,
628
+ "loss": 0.185,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 20.41,
633
+ "eval_cer": 0.1447578672753954,
634
+ "eval_loss": 0.36554473638534546,
635
+ "eval_runtime": 24.7061,
636
+ "eval_samples_per_second": 23.8,
637
+ "eval_steps_per_second": 2.995,
638
+ "step": 3000
639
+ },
640
+ {
641
+ "epoch": 20.75,
642
+ "learning_rate": 5.933858267716535e-05,
643
+ "loss": 0.1735,
644
+ "step": 3050
645
+ },
646
+ {
647
+ "epoch": 21.09,
648
+ "learning_rate": 5.461417322834645e-05,
649
+ "loss": 0.1599,
650
+ "step": 3100
651
+ },
652
+ {
653
+ "epoch": 21.09,
654
+ "eval_cer": 0.14867112343062122,
655
+ "eval_loss": 0.37344449758529663,
656
+ "eval_runtime": 24.8752,
657
+ "eval_samples_per_second": 23.638,
658
+ "eval_steps_per_second": 2.975,
659
+ "step": 3100
660
+ },
661
+ {
662
+ "epoch": 21.43,
663
+ "learning_rate": 4.9889763779527555e-05,
664
+ "loss": 0.1578,
665
+ "step": 3150
666
+ },
667
+ {
668
+ "epoch": 21.77,
669
+ "learning_rate": 4.516535433070866e-05,
670
+ "loss": 0.1742,
671
+ "step": 3200
672
+ },
673
+ {
674
+ "epoch": 21.77,
675
+ "eval_cer": 0.148638512962661,
676
+ "eval_loss": 0.3685232102870941,
677
+ "eval_runtime": 25.1361,
678
+ "eval_samples_per_second": 23.393,
679
+ "eval_steps_per_second": 2.944,
680
+ "step": 3200
681
+ },
682
+ {
683
+ "epoch": 22.11,
684
+ "learning_rate": 4.0440944881889764e-05,
685
+ "loss": 0.1666,
686
+ "step": 3250
687
+ },
688
+ {
689
+ "epoch": 22.45,
690
+ "learning_rate": 3.571653543307086e-05,
691
+ "loss": 0.1605,
692
+ "step": 3300
693
+ },
694
+ {
695
+ "epoch": 22.45,
696
+ "eval_cer": 0.14746453611609325,
697
+ "eval_loss": 0.3691498637199402,
698
+ "eval_runtime": 24.7916,
699
+ "eval_samples_per_second": 23.718,
700
+ "eval_steps_per_second": 2.985,
701
+ "step": 3300
702
  }
703
  ],
704
  "max_steps": 3675,
705
  "num_train_epochs": 25,
706
+ "total_flos": 1.2812925033719341e+19,
707
  "trial_name": null,
708
  "trial_params": null
709
  }
{checkpoint-2200 β†’ checkpoint-3300}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa48ab3bfa27ac8b74f16b85b0b0e9423a7006947f26bc79b3a134b3a28201d9
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b3268161afb2bcdf8b28a1a1d911f835d6d1a33dd0c929b5d6cf7248b065dc
3
  size 1262344621