gagan3012 commited on
Commit
8adf75f
1 Parent(s): 78d8e08

Training in progress, step 10000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e83cc6e1e437757fcbe6ad5c4c95b8e28741bf8b91e4f0dea84b7de0f1c10c79
3
  size 2217183175
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69df7b41162843d3066d511ff7e454b174a4296f035488ece7a03ebd7da15aef
3
  size 2217183175
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e17634cbbb2ed9ccef9ebc23443e12f89ed528074d2eaa42eab76de45d9f4fd9
3
  size 1110986025
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77912d0e17b40469aa7bdb635e77b5ed25c78dd7b2e1af6263fb95572303bb76
3
  size 1110986025
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70bfc92399e2b163a64fcf0120b63f6d59c2e8d8b38d265154202cbc421b1dff
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1138e5e81505c6ff611f6e95829a5854ad3acd60d1e5975aa99a4b42e0f7266
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af56da8a85d705265aaa6e3584e91a17430bdca56bf37e86dc75a88381347c34
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339fe610f593263fbda15028f0adf454993faebcdfef05cd5d15467b2d37e371
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89c11b0af83d6b6fed454f11aa8e321d662bdd0cdb429ad9305f7f01eb78bffc
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:765a68bdbb7550b6850d344347078b30cb0b00208e2e89de6624323f4ca4cb0a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4784151389710232,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -351,11 +351,356 @@
351
  "eval_samples_per_second": 15.641,
352
  "eval_steps_per_second": 3.913,
353
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  }
355
  ],
356
  "max_steps": 10146,
357
  "num_train_epochs": 3,
358
- "total_flos": 3.621264453794857e+18,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9568302779420463,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
351
  "eval_samples_per_second": 15.641,
352
  "eval_steps_per_second": 3.913,
353
  "step": 5000
354
+ },
355
+ {
356
+ "epoch": 1.51,
357
+ "learning_rate": 2.4906367041198502e-05,
358
+ "loss": 0.4557,
359
+ "step": 5100
360
+ },
361
+ {
362
+ "epoch": 1.54,
363
+ "learning_rate": 2.441356199487483e-05,
364
+ "loss": 0.445,
365
+ "step": 5200
366
+ },
367
+ {
368
+ "epoch": 1.57,
369
+ "learning_rate": 2.3920756948551153e-05,
370
+ "loss": 0.4523,
371
+ "step": 5300
372
+ },
373
+ {
374
+ "epoch": 1.6,
375
+ "learning_rate": 2.342795190222748e-05,
376
+ "loss": 0.3584,
377
+ "step": 5400
378
+ },
379
+ {
380
+ "epoch": 1.63,
381
+ "learning_rate": 2.2935146855903804e-05,
382
+ "loss": 0.4356,
383
+ "step": 5500
384
+ },
385
+ {
386
+ "epoch": 1.66,
387
+ "learning_rate": 2.244234180958013e-05,
388
+ "loss": 0.3989,
389
+ "step": 5600
390
+ },
391
+ {
392
+ "epoch": 1.69,
393
+ "learning_rate": 2.1949536763256458e-05,
394
+ "loss": 0.333,
395
+ "step": 5700
396
+ },
397
+ {
398
+ "epoch": 1.71,
399
+ "learning_rate": 2.145673171693278e-05,
400
+ "loss": 0.3701,
401
+ "step": 5800
402
+ },
403
+ {
404
+ "epoch": 1.74,
405
+ "learning_rate": 2.096392667060911e-05,
406
+ "loss": 0.4125,
407
+ "step": 5900
408
+ },
409
+ {
410
+ "epoch": 1.77,
411
+ "learning_rate": 2.0471121624285432e-05,
412
+ "loss": 0.3393,
413
+ "step": 6000
414
+ },
415
+ {
416
+ "epoch": 1.77,
417
+ "eval_cer": 0.12192614849371393,
418
+ "eval_loss": 0.2698688209056854,
419
+ "eval_runtime": 95.6778,
420
+ "eval_samples_per_second": 15.709,
421
+ "eval_steps_per_second": 3.93,
422
+ "step": 6000
423
+ },
424
+ {
425
+ "epoch": 1.8,
426
+ "learning_rate": 1.997831657796176e-05,
427
+ "loss": 0.2519,
428
+ "step": 6100
429
+ },
430
+ {
431
+ "epoch": 1.83,
432
+ "learning_rate": 1.9485511531638083e-05,
433
+ "loss": 0.2175,
434
+ "step": 6200
435
+ },
436
+ {
437
+ "epoch": 1.86,
438
+ "learning_rate": 1.899270648531441e-05,
439
+ "loss": 0.2418,
440
+ "step": 6300
441
+ },
442
+ {
443
+ "epoch": 1.89,
444
+ "learning_rate": 1.8499901438990734e-05,
445
+ "loss": 0.2157,
446
+ "step": 6400
447
+ },
448
+ {
449
+ "epoch": 1.92,
450
+ "learning_rate": 1.800709639266706e-05,
451
+ "loss": 0.2653,
452
+ "step": 6500
453
+ },
454
+ {
455
+ "epoch": 1.95,
456
+ "learning_rate": 1.7514291346343388e-05,
457
+ "loss": 0.2321,
458
+ "step": 6600
459
+ },
460
+ {
461
+ "epoch": 1.98,
462
+ "learning_rate": 1.702148630001971e-05,
463
+ "loss": 0.1881,
464
+ "step": 6700
465
+ },
466
+ {
467
+ "epoch": 2.01,
468
+ "learning_rate": 1.652868125369604e-05,
469
+ "loss": 0.1818,
470
+ "step": 6800
471
+ },
472
+ {
473
+ "epoch": 2.04,
474
+ "learning_rate": 1.6035876207372362e-05,
475
+ "loss": 0.1204,
476
+ "step": 6900
477
+ },
478
+ {
479
+ "epoch": 2.07,
480
+ "learning_rate": 1.554307116104869e-05,
481
+ "loss": 0.1077,
482
+ "step": 7000
483
+ },
484
+ {
485
+ "epoch": 2.07,
486
+ "eval_cer": 0.09330275954771883,
487
+ "eval_loss": 0.17944632470607758,
488
+ "eval_runtime": 96.1807,
489
+ "eval_samples_per_second": 15.627,
490
+ "eval_steps_per_second": 3.909,
491
+ "step": 7000
492
+ },
493
+ {
494
+ "epoch": 2.1,
495
+ "learning_rate": 1.5050266114725015e-05,
496
+ "loss": 0.174,
497
+ "step": 7100
498
+ },
499
+ {
500
+ "epoch": 2.13,
501
+ "learning_rate": 1.455746106840134e-05,
502
+ "loss": 0.1222,
503
+ "step": 7200
504
+ },
505
+ {
506
+ "epoch": 2.16,
507
+ "learning_rate": 1.4064656022077665e-05,
508
+ "loss": 0.0985,
509
+ "step": 7300
510
+ },
511
+ {
512
+ "epoch": 2.19,
513
+ "learning_rate": 1.357185097575399e-05,
514
+ "loss": 0.0946,
515
+ "step": 7400
516
+ },
517
+ {
518
+ "epoch": 2.22,
519
+ "learning_rate": 1.3079045929430316e-05,
520
+ "loss": 0.0832,
521
+ "step": 7500
522
+ },
523
+ {
524
+ "epoch": 2.25,
525
+ "learning_rate": 1.2586240883106643e-05,
526
+ "loss": 0.074,
527
+ "step": 7600
528
+ },
529
+ {
530
+ "epoch": 2.28,
531
+ "learning_rate": 1.209343583678297e-05,
532
+ "loss": 0.0768,
533
+ "step": 7700
534
+ },
535
+ {
536
+ "epoch": 2.31,
537
+ "learning_rate": 1.1600630790459296e-05,
538
+ "loss": 0.1018,
539
+ "step": 7800
540
+ },
541
+ {
542
+ "epoch": 2.34,
543
+ "learning_rate": 1.1107825744135621e-05,
544
+ "loss": 0.0668,
545
+ "step": 7900
546
+ },
547
+ {
548
+ "epoch": 2.37,
549
+ "learning_rate": 1.0615020697811946e-05,
550
+ "loss": 0.063,
551
+ "step": 8000
552
+ },
553
+ {
554
+ "epoch": 2.37,
555
+ "eval_cer": 0.06167470546374634,
556
+ "eval_loss": 0.13426831364631653,
557
+ "eval_runtime": 95.5453,
558
+ "eval_samples_per_second": 15.731,
559
+ "eval_steps_per_second": 3.935,
560
+ "step": 8000
561
+ },
562
+ {
563
+ "epoch": 2.4,
564
+ "learning_rate": 1.0122215651488272e-05,
565
+ "loss": 0.0742,
566
+ "step": 8100
567
+ },
568
+ {
569
+ "epoch": 2.42,
570
+ "learning_rate": 9.629410605164597e-06,
571
+ "loss": 0.0679,
572
+ "step": 8200
573
+ },
574
+ {
575
+ "epoch": 2.45,
576
+ "learning_rate": 9.136605558840922e-06,
577
+ "loss": 0.0818,
578
+ "step": 8300
579
+ },
580
+ {
581
+ "epoch": 2.48,
582
+ "learning_rate": 8.643800512517248e-06,
583
+ "loss": 0.0704,
584
+ "step": 8400
585
+ },
586
+ {
587
+ "epoch": 2.51,
588
+ "learning_rate": 8.150995466193575e-06,
589
+ "loss": 0.0537,
590
+ "step": 8500
591
+ },
592
+ {
593
+ "epoch": 2.54,
594
+ "learning_rate": 7.6581904198699e-06,
595
+ "loss": 0.0831,
596
+ "step": 8600
597
+ },
598
+ {
599
+ "epoch": 2.57,
600
+ "learning_rate": 7.1653853735462255e-06,
601
+ "loss": 0.0547,
602
+ "step": 8700
603
+ },
604
+ {
605
+ "epoch": 2.6,
606
+ "learning_rate": 6.672580327222551e-06,
607
+ "loss": 0.0765,
608
+ "step": 8800
609
+ },
610
+ {
611
+ "epoch": 2.63,
612
+ "learning_rate": 6.179775280898876e-06,
613
+ "loss": 0.0408,
614
+ "step": 8900
615
+ },
616
+ {
617
+ "epoch": 2.66,
618
+ "learning_rate": 5.6869702345752024e-06,
619
+ "loss": 0.0356,
620
+ "step": 9000
621
+ },
622
+ {
623
+ "epoch": 2.66,
624
+ "eval_cer": 0.06918636830868981,
625
+ "eval_loss": 0.0790172666311264,
626
+ "eval_runtime": 97.3434,
627
+ "eval_samples_per_second": 15.44,
628
+ "eval_steps_per_second": 3.863,
629
+ "step": 9000
630
+ },
631
+ {
632
+ "epoch": 2.69,
633
+ "learning_rate": 5.194165188251528e-06,
634
+ "loss": 0.059,
635
+ "step": 9100
636
+ },
637
+ {
638
+ "epoch": 2.72,
639
+ "learning_rate": 4.70628819239109e-06,
640
+ "loss": 0.0775,
641
+ "step": 9200
642
+ },
643
+ {
644
+ "epoch": 2.75,
645
+ "learning_rate": 4.213483146067416e-06,
646
+ "loss": 0.058,
647
+ "step": 9300
648
+ },
649
+ {
650
+ "epoch": 2.78,
651
+ "learning_rate": 3.7206780997437416e-06,
652
+ "loss": 0.0327,
653
+ "step": 9400
654
+ },
655
+ {
656
+ "epoch": 2.81,
657
+ "learning_rate": 3.227873053420067e-06,
658
+ "loss": 0.0378,
659
+ "step": 9500
660
+ },
661
+ {
662
+ "epoch": 2.84,
663
+ "learning_rate": 2.7350680070963927e-06,
664
+ "loss": 0.0752,
665
+ "step": 9600
666
+ },
667
+ {
668
+ "epoch": 2.87,
669
+ "learning_rate": 2.2422629607727185e-06,
670
+ "loss": 0.0269,
671
+ "step": 9700
672
+ },
673
+ {
674
+ "epoch": 2.9,
675
+ "learning_rate": 1.749457914449044e-06,
676
+ "loss": 0.0359,
677
+ "step": 9800
678
+ },
679
+ {
680
+ "epoch": 2.93,
681
+ "learning_rate": 1.2566528681253697e-06,
682
+ "loss": 0.0345,
683
+ "step": 9900
684
+ },
685
+ {
686
+ "epoch": 2.96,
687
+ "learning_rate": 7.638478218016952e-07,
688
+ "loss": 0.0292,
689
+ "step": 10000
690
+ },
691
+ {
692
+ "epoch": 2.96,
693
+ "eval_cer": 0.044595556258401205,
694
+ "eval_loss": 0.06197139248251915,
695
+ "eval_runtime": 96.6477,
696
+ "eval_samples_per_second": 15.551,
697
+ "eval_steps_per_second": 3.89,
698
+ "step": 10000
699
  }
700
  ],
701
  "max_steps": 10146,
702
  "num_train_epochs": 3,
703
+ "total_flos": 7.242528907589714e+18,
704
  "trial_name": null,
705
  "trial_params": null
706
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e17634cbbb2ed9ccef9ebc23443e12f89ed528074d2eaa42eab76de45d9f4fd9
3
  size 1110986025
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77912d0e17b40469aa7bdb635e77b5ed25c78dd7b2e1af6263fb95572303bb76
3
  size 1110986025
runs/Apr19_22-38-31_9bb1d09d1b50/events.out.tfevents.1650407940.9bb1d09d1b50.39.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9089acdf93d4be09869ff7c0fd1826236af036368f448e8bb1bc7597c05c4493
3
- size 90115
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb0e7f076be8579051c89bd5a3dd4cf7dd657db8f2e9ed56c58a2f10987cb26
3
+ size 99555