TenzinGayche commited on
Commit
3ff632d
β€’
1 Parent(s): d7d6ee2

Training in progress, step 3100

Browse files
{checkpoint-1900 β†’ checkpoint-3000}/config.json RENAMED
File without changes
{checkpoint-1900 β†’ checkpoint-3000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edfc1df136634c5b701601b6ff6af9b1881f7cd09119136764e47cb758be8c01
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add5027d5fd3913c88e3ce4c98bba2cacc80d01308c6d7e2ef915041867428fd
3
  size 2490946501
{checkpoint-1900 β†’ checkpoint-3000}/preprocessor_config.json RENAMED
File without changes
{checkpoint-2000 β†’ checkpoint-3000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cd38ec9830107698c5d0ace7830fd9cf63e7332cde8b2dc9f42453c1fe96228
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b0809c17d9dbb31d634626a048a7d434c078f79477bfe8f7a8cf3593908c7c
3
  size 1262344621
{checkpoint-2000 β†’ checkpoint-3000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5c1cece2539dcc21e5f05a104f8a7b0d17369adf61ee02eabacdc5688c8ce77
3
  size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e994db3699b40bee8c6677e4890450c771f5c163635dc7865e66b42965e184
3
  size 14639
{checkpoint-2000 β†’ checkpoint-3000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa1f6377b029d7c93ef2025a47117a476c1020f2bd22a0097fe33acd14b35ae7
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09035270122b5f507afb3c91029317a19c9951c55151aa7d9fc1f5691d8ea29
3
  size 557
{checkpoint-1900 β†’ checkpoint-3000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0ea1eff427e80a898e7d7eedbf89dfef56c38dc29788f8cb29b845574ba7ec4
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3cf2d9b187c53d4ab17e26aaa543c99e77f6fa20e6dcea24354d1208058809
3
  size 627
{checkpoint-2000 β†’ checkpoint-3000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.60544217687075,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -426,11 +426,221 @@
426
  "eval_samples_per_second": 23.518,
427
  "eval_steps_per_second": 2.96,
428
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  }
430
  ],
431
  "max_steps": 3675,
432
  "num_train_epochs": 25,
433
- "total_flos": 7.778933854152041e+18,
434
  "trial_name": null,
435
  "trial_params": null
436
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.408163265306122,
5
+ "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
426
  "eval_samples_per_second": 23.518,
427
  "eval_steps_per_second": 2.96,
428
  "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
+ },
619
+ {
620
+ "epoch": 20.07,
621
+ "learning_rate": 6.878740157480315e-05,
622
+ "loss": 0.1773,
623
+ "step": 2950
624
+ },
625
+ {
626
+ "epoch": 20.41,
627
+ "learning_rate": 6.406299212598424e-05,
628
+ "loss": 0.185,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 20.41,
633
+ "eval_cer": 0.1447578672753954,
634
+ "eval_loss": 0.36554473638534546,
635
+ "eval_runtime": 24.7061,
636
+ "eval_samples_per_second": 23.8,
637
+ "eval_steps_per_second": 2.995,
638
+ "step": 3000
639
  }
640
  ],
641
  "max_steps": 3675,
642
  "num_train_epochs": 25,
643
+ "total_flos": 1.1654661840261913e+19,
644
  "trial_name": null,
645
  "trial_params": null
646
  }
{checkpoint-1900 β†’ checkpoint-3000}/training_args.bin RENAMED
File without changes
{checkpoint-2000 β†’ checkpoint-3100}/config.json RENAMED
File without changes
{checkpoint-2000 β†’ checkpoint-3100}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3400ad82ded439b14c2bac67c692eb6cb3776ec6471beac1430b11b22c8fb5b5
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0547651e9fa5e5a88a9166ef0d5f076cee95faf27589392ad547f526db7c4fe
3
  size 2490946501
{checkpoint-2000 β†’ checkpoint-3100}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1900 β†’ checkpoint-3100}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:944bbfe53ed8e3678f0df597947df90c1f1fd1a592c5e2f1c1893cd644db8b25
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa48ab3bfa27ac8b74f16b85b0b0e9423a7006947f26bc79b3a134b3a28201d9
3
  size 1262344621
{checkpoint-1900 β†’ checkpoint-3100}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19558c49bacfb403056aca7f61bd635fce6a3b4d6a2d60ba71c39efd76e04aa8
3
- size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6882a736f444e030f1e63185cd593b64dcbe537747a536c06d91a9ff822878
3
+ size 14575
{checkpoint-1900 β†’ checkpoint-3100}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c12154f7b08b94398fffa27beb8a5986462659e12ab1223bd629316877afb771
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a5a1c934e0cb72b172df77c7f977d3f9bcedc7c9991dd7cf5ab0d4a3a9e556e
3
  size 557
{checkpoint-2000 β†’ checkpoint-3100}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb073859e728ca55c339a2f6d3ed17539f043fe9dec39e46f41d5b139c046481
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f224e654f13b383690d5d87143e399102b541cd0bfe5fa57c4e0ad3f1abceb1
3
  size 627
{checkpoint-1900 β†’ checkpoint-3100}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.92517006802721,
5
- "global_step": 1900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -405,11 +405,263 @@
405
  "eval_samples_per_second": 24.027,
406
  "eval_steps_per_second": 3.024,
407
  "step": 1900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  }
409
  ],
410
  "max_steps": 3675,
411
  "num_train_epochs": 25,
412
- "total_flos": 7.38349957947191e+18,
413
  "trial_name": null,
414
  "trial_params": null
415
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.08843537414966,
5
+ "global_step": 3100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
405
  "eval_samples_per_second": 24.027,
406
  "eval_steps_per_second": 3.024,
407
  "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
+ },
619
+ {
620
+ "epoch": 20.07,
621
+ "learning_rate": 6.878740157480315e-05,
622
+ "loss": 0.1773,
623
+ "step": 2950
624
+ },
625
+ {
626
+ "epoch": 20.41,
627
+ "learning_rate": 6.406299212598424e-05,
628
+ "loss": 0.185,
629
+ "step": 3000
630
+ },
631
+ {
632
+ "epoch": 20.41,
633
+ "eval_cer": 0.1447578672753954,
634
+ "eval_loss": 0.36554473638534546,
635
+ "eval_runtime": 24.7061,
636
+ "eval_samples_per_second": 23.8,
637
+ "eval_steps_per_second": 2.995,
638
+ "step": 3000
639
+ },
640
+ {
641
+ "epoch": 20.75,
642
+ "learning_rate": 5.933858267716535e-05,
643
+ "loss": 0.1735,
644
+ "step": 3050
645
+ },
646
+ {
647
+ "epoch": 21.09,
648
+ "learning_rate": 5.461417322834645e-05,
649
+ "loss": 0.1599,
650
+ "step": 3100
651
+ },
652
+ {
653
+ "epoch": 21.09,
654
+ "eval_cer": 0.14867112343062122,
655
+ "eval_loss": 0.37344449758529663,
656
+ "eval_runtime": 24.8752,
657
+ "eval_samples_per_second": 23.638,
658
+ "eval_steps_per_second": 2.975,
659
+ "step": 3100
660
  }
661
  ],
662
  "max_steps": 3675,
663
  "num_train_epochs": 25,
664
+ "total_flos": 1.2047450649580579e+19,
665
  "trial_name": null,
666
  "trial_params": null
667
  }
{checkpoint-2000 β†’ checkpoint-3100}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6ffc772554604c775a56582be10709d4c3e456a09beff5590f70c6d6164bae7
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa48ab3bfa27ac8b74f16b85b0b0e9423a7006947f26bc79b3a134b3a28201d9
3
  size 1262344621