TenzinGayche commited on
Commit
d7d6ee2
β€’
1 Parent(s): 2f23d51

Training in progress, step 2900

Browse files
{checkpoint-1700 β†’ checkpoint-2800}/config.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2800}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:338ba02b721f5c923b03a6437e0bf0d6913d2f5150fc93110856b5274907a102
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:284fe9fd3efe2fa6bfac3dde889d7df4a894fadafdf72a079a9aa70dae570bb6
3
  size 2490946501
{checkpoint-1700 β†’ checkpoint-2800}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2800}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52a571823a5349c817772817b3ca92d24c7154a82c1ebe1934fc9b5c45efd5ae
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d79086abe43be08504757ae268247e875b4ca0ee40d8541768b0d1afc44dcff9
3
  size 1262344621
{checkpoint-1800 β†’ checkpoint-2800}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47d39bea761335d6d65b0b6d7eff2cfb4efe3fe039ea66b0c5cffcd47767de2b
3
  size 14575
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27687fa73261753e3f3ea1442dcfbb7ebac6e911817034879446db31b67ab26f
3
  size 14575
{checkpoint-1800 β†’ checkpoint-2800}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e961ca75ff911c1f4b25d656bc8175958d7392f62f4f35cdbbe7bb3902c5e7a6
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b468846eb222417f6778288d55e188312cf2d6a8edd4f708c9744ca1711621
3
  size 557
{checkpoint-1700 β†’ checkpoint-2800}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a0f385f9f50d27e3534f7102c4921897834c11c7a2ceb3c3a568765164e2f10
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b188442f4189d4023ae493e876a79dd05e958bbee2b254952060c03892fff41
3
  size 627
{checkpoint-1800 β†’ checkpoint-2800}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.244897959183673,
5
- "global_step": 1800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -384,11 +384,221 @@
384
  "eval_samples_per_second": 23.79,
385
  "eval_steps_per_second": 2.994,
386
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  }
388
  ],
389
  "max_steps": 3675,
390
  "num_train_epochs": 25,
391
- "total_flos": 6.982329131312831e+18,
392
  "trial_name": null,
393
  "trial_params": null
394
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.047619047619047,
5
+ "global_step": 2800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
384
  "eval_samples_per_second": 23.79,
385
  "eval_steps_per_second": 2.994,
386
  "step": 1800
387
+ },
388
+ {
389
+ "epoch": 12.59,
390
+ "learning_rate": 0.0001727244094488189,
391
+ "loss": 0.3208,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 12.93,
396
+ "learning_rate": 0.000168,
397
+ "loss": 0.3063,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 12.93,
402
+ "eval_cer": 0.15904125224196966,
403
+ "eval_loss": 0.3622555434703827,
404
+ "eval_runtime": 24.4729,
405
+ "eval_samples_per_second": 24.027,
406
+ "eval_steps_per_second": 3.024,
407
+ "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
  }
598
  ],
599
  "max_steps": 3675,
600
  "num_train_epochs": 25,
601
+ "total_flos": 1.0878260136221063e+19,
602
  "trial_name": null,
603
  "trial_params": null
604
  }
{checkpoint-1700 β†’ checkpoint-2800}/training_args.bin RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2900}/config.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2900}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42f99a886bbf378c0dcaebf101e4c5328ee0274d00a62c2e1be3614a61c5e312
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da92116ac7249a9c2f78c6d915c53ed280d77d3eee1ec0daa3648db76ed65088
3
  size 2490946501
{checkpoint-1800 β†’ checkpoint-2900}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2900}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:019acb608476676b22ab74cd09e4aca50c24dd533fcbd74898a0026c97c98361
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6ffc772554604c775a56582be10709d4c3e456a09beff5590f70c6d6164bae7
3
  size 1262344621
{checkpoint-1700 β†’ checkpoint-2900}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71a1db9e934ba49590cfdbe67c7ee3acaf29bd87a47fabf42f5d018afb48d43b
3
  size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f89b2092406b7051521efc60918e39ddbc8577addacf224a167b0840e13a8b1
3
  size 14639
{checkpoint-1700 β†’ checkpoint-2900}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb5240745a606135a0f5215801453e4a46e10de71a210812a2ab7133882ce395
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d0e0ea2050687876737e0313597d903728681aadc2ef7b13381f1a340209c4
3
  size 557
{checkpoint-1800 β†’ checkpoint-2900}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae735c83da45a65907ba8c02193907649b6ed04e0ca77c0400a50dbb61401aaa
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a694febd2309e1fb1cb50db9210a98ee65b7677fbb4514ceb39bb98bbdadc9d
3
  size 627
{checkpoint-1700 β†’ checkpoint-2900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.564625850340136,
5
- "global_step": 1700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -363,11 +363,263 @@
363
  "eval_samples_per_second": 24.123,
364
  "eval_steps_per_second": 3.036,
365
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  }
367
  ],
368
  "max_steps": 3675,
369
  "num_train_epochs": 25,
370
- "total_flos": 6.617213289199883e+18,
371
  "trial_name": null,
372
  "trial_params": null
373
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.727891156462587,
5
+ "global_step": 2900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
363
  "eval_samples_per_second": 24.123,
364
  "eval_steps_per_second": 3.036,
365
  "step": 1700
366
+ },
367
+ {
368
+ "epoch": 11.9,
369
+ "learning_rate": 0.00018217322834645667,
370
+ "loss": 0.3455,
371
+ "step": 1750
372
+ },
373
+ {
374
+ "epoch": 12.24,
375
+ "learning_rate": 0.00017744881889763777,
376
+ "loss": 0.3245,
377
+ "step": 1800
378
+ },
379
+ {
380
+ "epoch": 12.24,
381
+ "eval_cer": 0.15816076960704387,
382
+ "eval_loss": 0.3740461468696594,
383
+ "eval_runtime": 24.7162,
384
+ "eval_samples_per_second": 23.79,
385
+ "eval_steps_per_second": 2.994,
386
+ "step": 1800
387
+ },
388
+ {
389
+ "epoch": 12.59,
390
+ "learning_rate": 0.0001727244094488189,
391
+ "loss": 0.3208,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 12.93,
396
+ "learning_rate": 0.000168,
397
+ "loss": 0.3063,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 12.93,
402
+ "eval_cer": 0.15904125224196966,
403
+ "eval_loss": 0.3622555434703827,
404
+ "eval_runtime": 24.4729,
405
+ "eval_samples_per_second": 24.027,
406
+ "eval_steps_per_second": 3.024,
407
+ "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
+ },
577
+ {
578
+ "epoch": 18.71,
579
+ "learning_rate": 8.768503937007873e-05,
580
+ "loss": 0.1995,
581
+ "step": 2750
582
+ },
583
+ {
584
+ "epoch": 19.05,
585
+ "learning_rate": 8.296062992125984e-05,
586
+ "loss": 0.1872,
587
+ "step": 2800
588
+ },
589
+ {
590
+ "epoch": 19.05,
591
+ "eval_cer": 0.14766019892385457,
592
+ "eval_loss": 0.36663514375686646,
593
+ "eval_runtime": 24.4383,
594
+ "eval_samples_per_second": 24.061,
595
+ "eval_steps_per_second": 3.028,
596
+ "step": 2800
597
+ },
598
+ {
599
+ "epoch": 19.39,
600
+ "learning_rate": 7.823622047244094e-05,
601
+ "loss": 0.177,
602
+ "step": 2850
603
+ },
604
+ {
605
+ "epoch": 19.73,
606
+ "learning_rate": 7.351181102362205e-05,
607
+ "loss": 0.1855,
608
+ "step": 2900
609
+ },
610
+ {
611
+ "epoch": 19.73,
612
+ "eval_cer": 0.1432577857492255,
613
+ "eval_loss": 0.3651330769062042,
614
+ "eval_runtime": 24.7826,
615
+ "eval_samples_per_second": 23.726,
616
+ "eval_steps_per_second": 2.986,
617
+ "step": 2900
618
  }
619
  ],
620
  "max_steps": 3675,
621
  "num_train_epochs": 25,
622
+ "total_flos": 1.1249154319313052e+19,
623
  "trial_name": null,
624
  "trial_params": null
625
  }
{checkpoint-1800 β†’ checkpoint-2900}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bba14f51afadd438f5716f846b339431901a15de8cc5ac24dc52e61e72244c4
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6ffc772554604c775a56582be10709d4c3e456a09beff5590f70c6d6164bae7
3
  size 1262344621