danurahul commited on
Commit
9177a94
1 Parent(s): 662b535

Initial commit

Browse files
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +843 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d3f00789a4413d3632cc8e173d96260869b1bcafd18749a7dcdc72c2223fbc7
3
  size 655348487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44596804f6bbe6223dc387b01a078b883d9793f67e4205685fa7e6d1bac155b
3
  size 655348487
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce41989eee8b6ee2818864d12f97fe7d711f6affac741934d0f5e61435ef9c9a
3
  size 333975623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e235124e44f7eac920bcb6bf072b75b2cc2272733920b4b17d82b8d859b967
3
  size 333975623
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09127d0733119c8e145dcce3b87877b5f41b009c0bf9061616a5a589615ed750
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:765811e5c0de8679a19ba5175edb8739e017bd63da1b3261fbc6a9c624ff81ea
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 79.27829414980864,
5
- "global_step": 290000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3486,11 +3486,851 @@
3486
  "learning_rate": 4.6042378673957626e-05,
3487
  "loss": 2.1761,
3488
  "step": 290000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3489
  }
3490
  ],
3491
  "max_steps": 3658000,
3492
  "num_train_epochs": 1000,
3493
- "total_flos": 583764386856173568,
3494
  "trial_name": null,
3495
  "trial_params": null
3496
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 98.41443411700382,
5
+ "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3486
  "learning_rate": 4.6042378673957626e-05,
3487
  "loss": 2.1761,
3488
  "step": 290000
3489
+ },
3490
+ {
3491
+ "epoch": 79.41,
3492
+ "learning_rate": 4.603554340396446e-05,
3493
+ "loss": 2.1889,
3494
+ "step": 290500
3495
+ },
3496
+ {
3497
+ "epoch": 79.55,
3498
+ "learning_rate": 4.6028708133971295e-05,
3499
+ "loss": 2.1878,
3500
+ "step": 291000
3501
+ },
3502
+ {
3503
+ "epoch": 79.69,
3504
+ "learning_rate": 4.602187286397813e-05,
3505
+ "loss": 2.1939,
3506
+ "step": 291500
3507
+ },
3508
+ {
3509
+ "epoch": 79.83,
3510
+ "learning_rate": 4.6015037593984965e-05,
3511
+ "loss": 2.2074,
3512
+ "step": 292000
3513
+ },
3514
+ {
3515
+ "epoch": 79.96,
3516
+ "learning_rate": 4.60082023239918e-05,
3517
+ "loss": 2.2052,
3518
+ "step": 292500
3519
+ },
3520
+ {
3521
+ "epoch": 80.1,
3522
+ "learning_rate": 4.6001367053998634e-05,
3523
+ "loss": 2.1769,
3524
+ "step": 293000
3525
+ },
3526
+ {
3527
+ "epoch": 80.24,
3528
+ "learning_rate": 4.599453178400547e-05,
3529
+ "loss": 2.1675,
3530
+ "step": 293500
3531
+ },
3532
+ {
3533
+ "epoch": 80.37,
3534
+ "learning_rate": 4.5987696514012304e-05,
3535
+ "loss": 2.1722,
3536
+ "step": 294000
3537
+ },
3538
+ {
3539
+ "epoch": 80.51,
3540
+ "learning_rate": 4.598086124401914e-05,
3541
+ "loss": 2.1814,
3542
+ "step": 294500
3543
+ },
3544
+ {
3545
+ "epoch": 80.65,
3546
+ "learning_rate": 4.5974025974025974e-05,
3547
+ "loss": 2.183,
3548
+ "step": 295000
3549
+ },
3550
+ {
3551
+ "epoch": 80.78,
3552
+ "learning_rate": 4.5967190704032815e-05,
3553
+ "loss": 2.1974,
3554
+ "step": 295500
3555
+ },
3556
+ {
3557
+ "epoch": 80.92,
3558
+ "learning_rate": 4.596035543403965e-05,
3559
+ "loss": 2.1974,
3560
+ "step": 296000
3561
+ },
3562
+ {
3563
+ "epoch": 81.06,
3564
+ "learning_rate": 4.5953520164046485e-05,
3565
+ "loss": 2.1853,
3566
+ "step": 296500
3567
+ },
3568
+ {
3569
+ "epoch": 81.19,
3570
+ "learning_rate": 4.594668489405331e-05,
3571
+ "loss": 2.1593,
3572
+ "step": 297000
3573
+ },
3574
+ {
3575
+ "epoch": 81.33,
3576
+ "learning_rate": 4.5939849624060154e-05,
3577
+ "loss": 2.1672,
3578
+ "step": 297500
3579
+ },
3580
+ {
3581
+ "epoch": 81.47,
3582
+ "learning_rate": 4.593301435406699e-05,
3583
+ "loss": 2.1748,
3584
+ "step": 298000
3585
+ },
3586
+ {
3587
+ "epoch": 81.6,
3588
+ "learning_rate": 4.5926179084073824e-05,
3589
+ "loss": 2.1794,
3590
+ "step": 298500
3591
+ },
3592
+ {
3593
+ "epoch": 81.74,
3594
+ "learning_rate": 4.591934381408066e-05,
3595
+ "loss": 2.1888,
3596
+ "step": 299000
3597
+ },
3598
+ {
3599
+ "epoch": 81.88,
3600
+ "learning_rate": 4.5912508544087494e-05,
3601
+ "loss": 2.1932,
3602
+ "step": 299500
3603
+ },
3604
+ {
3605
+ "epoch": 82.01,
3606
+ "learning_rate": 4.590567327409433e-05,
3607
+ "loss": 2.1958,
3608
+ "step": 300000
3609
+ },
3610
+ {
3611
+ "epoch": 82.15,
3612
+ "learning_rate": 4.589883800410116e-05,
3613
+ "loss": 2.1528,
3614
+ "step": 300500
3615
+ },
3616
+ {
3617
+ "epoch": 82.29,
3618
+ "learning_rate": 4.5892002734108e-05,
3619
+ "loss": 2.1538,
3620
+ "step": 301000
3621
+ },
3622
+ {
3623
+ "epoch": 82.42,
3624
+ "learning_rate": 4.588516746411484e-05,
3625
+ "loss": 2.1697,
3626
+ "step": 301500
3627
+ },
3628
+ {
3629
+ "epoch": 82.56,
3630
+ "learning_rate": 4.587833219412167e-05,
3631
+ "loss": 2.1753,
3632
+ "step": 302000
3633
+ },
3634
+ {
3635
+ "epoch": 82.7,
3636
+ "learning_rate": 4.58714969241285e-05,
3637
+ "loss": 2.1771,
3638
+ "step": 302500
3639
+ },
3640
+ {
3641
+ "epoch": 82.83,
3642
+ "learning_rate": 4.586466165413534e-05,
3643
+ "loss": 2.1842,
3644
+ "step": 303000
3645
+ },
3646
+ {
3647
+ "epoch": 82.97,
3648
+ "learning_rate": 4.585782638414218e-05,
3649
+ "loss": 2.1916,
3650
+ "step": 303500
3651
+ },
3652
+ {
3653
+ "epoch": 83.11,
3654
+ "learning_rate": 4.5850991114149013e-05,
3655
+ "loss": 2.1436,
3656
+ "step": 304000
3657
+ },
3658
+ {
3659
+ "epoch": 83.24,
3660
+ "learning_rate": 4.584415584415585e-05,
3661
+ "loss": 2.148,
3662
+ "step": 304500
3663
+ },
3664
+ {
3665
+ "epoch": 83.38,
3666
+ "learning_rate": 4.583732057416268e-05,
3667
+ "loss": 2.1559,
3668
+ "step": 305000
3669
+ },
3670
+ {
3671
+ "epoch": 83.52,
3672
+ "learning_rate": 4.583048530416952e-05,
3673
+ "loss": 2.1686,
3674
+ "step": 305500
3675
+ },
3676
+ {
3677
+ "epoch": 83.65,
3678
+ "learning_rate": 4.582365003417635e-05,
3679
+ "loss": 2.1744,
3680
+ "step": 306000
3681
+ },
3682
+ {
3683
+ "epoch": 83.79,
3684
+ "learning_rate": 4.581681476418319e-05,
3685
+ "loss": 2.1742,
3686
+ "step": 306500
3687
+ },
3688
+ {
3689
+ "epoch": 83.93,
3690
+ "learning_rate": 4.580997949419002e-05,
3691
+ "loss": 2.1881,
3692
+ "step": 307000
3693
+ },
3694
+ {
3695
+ "epoch": 84.06,
3696
+ "learning_rate": 4.580314422419686e-05,
3697
+ "loss": 2.1643,
3698
+ "step": 307500
3699
+ },
3700
+ {
3701
+ "epoch": 84.2,
3702
+ "learning_rate": 4.579630895420369e-05,
3703
+ "loss": 2.1451,
3704
+ "step": 308000
3705
+ },
3706
+ {
3707
+ "epoch": 84.34,
3708
+ "learning_rate": 4.5789473684210527e-05,
3709
+ "loss": 2.1479,
3710
+ "step": 308500
3711
+ },
3712
+ {
3713
+ "epoch": 84.47,
3714
+ "learning_rate": 4.578263841421737e-05,
3715
+ "loss": 2.1579,
3716
+ "step": 309000
3717
+ },
3718
+ {
3719
+ "epoch": 84.61,
3720
+ "learning_rate": 4.57758031442242e-05,
3721
+ "loss": 2.1608,
3722
+ "step": 309500
3723
+ },
3724
+ {
3725
+ "epoch": 84.75,
3726
+ "learning_rate": 4.576896787423103e-05,
3727
+ "loss": 2.1697,
3728
+ "step": 310000
3729
+ },
3730
+ {
3731
+ "epoch": 84.88,
3732
+ "learning_rate": 4.5762132604237866e-05,
3733
+ "loss": 2.1809,
3734
+ "step": 310500
3735
+ },
3736
+ {
3737
+ "epoch": 85.02,
3738
+ "learning_rate": 4.575529733424471e-05,
3739
+ "loss": 2.1723,
3740
+ "step": 311000
3741
+ },
3742
+ {
3743
+ "epoch": 85.16,
3744
+ "learning_rate": 4.574846206425154e-05,
3745
+ "loss": 2.1286,
3746
+ "step": 311500
3747
+ },
3748
+ {
3749
+ "epoch": 85.29,
3750
+ "learning_rate": 4.574162679425838e-05,
3751
+ "loss": 2.1458,
3752
+ "step": 312000
3753
+ },
3754
+ {
3755
+ "epoch": 85.43,
3756
+ "learning_rate": 4.5734791524265205e-05,
3757
+ "loss": 2.1496,
3758
+ "step": 312500
3759
+ },
3760
+ {
3761
+ "epoch": 85.57,
3762
+ "learning_rate": 4.5727956254272047e-05,
3763
+ "loss": 2.1572,
3764
+ "step": 313000
3765
+ },
3766
+ {
3767
+ "epoch": 85.7,
3768
+ "learning_rate": 4.572112098427888e-05,
3769
+ "loss": 2.1648,
3770
+ "step": 313500
3771
+ },
3772
+ {
3773
+ "epoch": 85.84,
3774
+ "learning_rate": 4.5714285714285716e-05,
3775
+ "loss": 2.1736,
3776
+ "step": 314000
3777
+ },
3778
+ {
3779
+ "epoch": 85.98,
3780
+ "learning_rate": 4.570745044429256e-05,
3781
+ "loss": 2.1776,
3782
+ "step": 314500
3783
+ },
3784
+ {
3785
+ "epoch": 86.11,
3786
+ "learning_rate": 4.5700615174299386e-05,
3787
+ "loss": 2.1304,
3788
+ "step": 315000
3789
+ },
3790
+ {
3791
+ "epoch": 86.25,
3792
+ "learning_rate": 4.569377990430622e-05,
3793
+ "loss": 2.1302,
3794
+ "step": 315500
3795
+ },
3796
+ {
3797
+ "epoch": 86.39,
3798
+ "learning_rate": 4.5686944634313055e-05,
3799
+ "loss": 2.1441,
3800
+ "step": 316000
3801
+ },
3802
+ {
3803
+ "epoch": 86.52,
3804
+ "learning_rate": 4.56801093643199e-05,
3805
+ "loss": 2.1551,
3806
+ "step": 316500
3807
+ },
3808
+ {
3809
+ "epoch": 86.66,
3810
+ "learning_rate": 4.567327409432673e-05,
3811
+ "loss": 2.1582,
3812
+ "step": 317000
3813
+ },
3814
+ {
3815
+ "epoch": 86.8,
3816
+ "learning_rate": 4.566643882433356e-05,
3817
+ "loss": 2.1666,
3818
+ "step": 317500
3819
+ },
3820
+ {
3821
+ "epoch": 86.93,
3822
+ "learning_rate": 4.5659603554340394e-05,
3823
+ "loss": 2.1599,
3824
+ "step": 318000
3825
+ },
3826
+ {
3827
+ "epoch": 87.07,
3828
+ "learning_rate": 4.5652768284347236e-05,
3829
+ "loss": 2.1394,
3830
+ "step": 318500
3831
+ },
3832
+ {
3833
+ "epoch": 87.21,
3834
+ "learning_rate": 4.564593301435407e-05,
3835
+ "loss": 2.1233,
3836
+ "step": 319000
3837
+ },
3838
+ {
3839
+ "epoch": 87.34,
3840
+ "learning_rate": 4.5639097744360906e-05,
3841
+ "loss": 2.1365,
3842
+ "step": 319500
3843
+ },
3844
+ {
3845
+ "epoch": 87.48,
3846
+ "learning_rate": 4.563226247436774e-05,
3847
+ "loss": 2.1441,
3848
+ "step": 320000
3849
+ },
3850
+ {
3851
+ "epoch": 87.62,
3852
+ "learning_rate": 4.5625427204374575e-05,
3853
+ "loss": 2.1482,
3854
+ "step": 320500
3855
+ },
3856
+ {
3857
+ "epoch": 87.75,
3858
+ "learning_rate": 4.561859193438141e-05,
3859
+ "loss": 2.1599,
3860
+ "step": 321000
3861
+ },
3862
+ {
3863
+ "epoch": 87.89,
3864
+ "learning_rate": 4.5611756664388245e-05,
3865
+ "loss": 2.1559,
3866
+ "step": 321500
3867
+ },
3868
+ {
3869
+ "epoch": 88.03,
3870
+ "learning_rate": 4.560492139439508e-05,
3871
+ "loss": 2.1513,
3872
+ "step": 322000
3873
+ },
3874
+ {
3875
+ "epoch": 88.16,
3876
+ "learning_rate": 4.5598086124401914e-05,
3877
+ "loss": 2.115,
3878
+ "step": 322500
3879
+ },
3880
+ {
3881
+ "epoch": 88.3,
3882
+ "learning_rate": 4.559125085440875e-05,
3883
+ "loss": 2.1336,
3884
+ "step": 323000
3885
+ },
3886
+ {
3887
+ "epoch": 88.44,
3888
+ "learning_rate": 4.5584415584415584e-05,
3889
+ "loss": 2.1394,
3890
+ "step": 323500
3891
+ },
3892
+ {
3893
+ "epoch": 88.57,
3894
+ "learning_rate": 4.5577580314422426e-05,
3895
+ "loss": 2.1366,
3896
+ "step": 324000
3897
+ },
3898
+ {
3899
+ "epoch": 88.71,
3900
+ "learning_rate": 4.557074504442926e-05,
3901
+ "loss": 2.1424,
3902
+ "step": 324500
3903
+ },
3904
+ {
3905
+ "epoch": 88.85,
3906
+ "learning_rate": 4.5563909774436095e-05,
3907
+ "loss": 2.151,
3908
+ "step": 325000
3909
+ },
3910
+ {
3911
+ "epoch": 88.98,
3912
+ "learning_rate": 4.555707450444292e-05,
3913
+ "loss": 2.1611,
3914
+ "step": 325500
3915
+ },
3916
+ {
3917
+ "epoch": 89.12,
3918
+ "learning_rate": 4.5550239234449765e-05,
3919
+ "loss": 2.1216,
3920
+ "step": 326000
3921
+ },
3922
+ {
3923
+ "epoch": 89.26,
3924
+ "learning_rate": 4.55434039644566e-05,
3925
+ "loss": 2.1112,
3926
+ "step": 326500
3927
+ },
3928
+ {
3929
+ "epoch": 89.39,
3930
+ "learning_rate": 4.5536568694463434e-05,
3931
+ "loss": 2.1246,
3932
+ "step": 327000
3933
+ },
3934
+ {
3935
+ "epoch": 89.53,
3936
+ "learning_rate": 4.552973342447027e-05,
3937
+ "loss": 2.1361,
3938
+ "step": 327500
3939
+ },
3940
+ {
3941
+ "epoch": 89.67,
3942
+ "learning_rate": 4.5522898154477104e-05,
3943
+ "loss": 2.1375,
3944
+ "step": 328000
3945
+ },
3946
+ {
3947
+ "epoch": 89.8,
3948
+ "learning_rate": 4.551606288448394e-05,
3949
+ "loss": 2.1438,
3950
+ "step": 328500
3951
+ },
3952
+ {
3953
+ "epoch": 89.94,
3954
+ "learning_rate": 4.5509227614490773e-05,
3955
+ "loss": 2.1525,
3956
+ "step": 329000
3957
+ },
3958
+ {
3959
+ "epoch": 90.08,
3960
+ "learning_rate": 4.550239234449761e-05,
3961
+ "loss": 2.127,
3962
+ "step": 329500
3963
+ },
3964
+ {
3965
+ "epoch": 90.21,
3966
+ "learning_rate": 4.549555707450445e-05,
3967
+ "loss": 2.1043,
3968
+ "step": 330000
3969
+ },
3970
+ {
3971
+ "epoch": 90.35,
3972
+ "learning_rate": 4.548872180451128e-05,
3973
+ "loss": 2.1148,
3974
+ "step": 330500
3975
+ },
3976
+ {
3977
+ "epoch": 90.49,
3978
+ "learning_rate": 4.548188653451811e-05,
3979
+ "loss": 2.1282,
3980
+ "step": 331000
3981
+ },
3982
+ {
3983
+ "epoch": 90.62,
3984
+ "learning_rate": 4.547505126452495e-05,
3985
+ "loss": 2.1347,
3986
+ "step": 331500
3987
+ },
3988
+ {
3989
+ "epoch": 90.76,
3990
+ "learning_rate": 4.546821599453179e-05,
3991
+ "loss": 2.1465,
3992
+ "step": 332000
3993
+ },
3994
+ {
3995
+ "epoch": 90.9,
3996
+ "learning_rate": 4.5461380724538624e-05,
3997
+ "loss": 2.1457,
3998
+ "step": 332500
3999
+ },
4000
+ {
4001
+ "epoch": 91.03,
4002
+ "learning_rate": 4.545454545454546e-05,
4003
+ "loss": 2.1369,
4004
+ "step": 333000
4005
+ },
4006
+ {
4007
+ "epoch": 91.17,
4008
+ "learning_rate": 4.544771018455229e-05,
4009
+ "loss": 2.0965,
4010
+ "step": 333500
4011
+ },
4012
+ {
4013
+ "epoch": 91.31,
4014
+ "learning_rate": 4.544087491455913e-05,
4015
+ "loss": 2.1165,
4016
+ "step": 334000
4017
+ },
4018
+ {
4019
+ "epoch": 91.44,
4020
+ "learning_rate": 4.543403964456596e-05,
4021
+ "loss": 2.1213,
4022
+ "step": 334500
4023
+ },
4024
+ {
4025
+ "epoch": 91.58,
4026
+ "learning_rate": 4.54272043745728e-05,
4027
+ "loss": 2.1286,
4028
+ "step": 335000
4029
+ },
4030
+ {
4031
+ "epoch": 91.72,
4032
+ "learning_rate": 4.542036910457963e-05,
4033
+ "loss": 2.1284,
4034
+ "step": 335500
4035
+ },
4036
+ {
4037
+ "epoch": 91.85,
4038
+ "learning_rate": 4.541353383458647e-05,
4039
+ "loss": 2.1407,
4040
+ "step": 336000
4041
+ },
4042
+ {
4043
+ "epoch": 91.99,
4044
+ "learning_rate": 4.54066985645933e-05,
4045
+ "loss": 2.1417,
4046
+ "step": 336500
4047
+ },
4048
+ {
4049
+ "epoch": 92.13,
4050
+ "learning_rate": 4.539986329460014e-05,
4051
+ "loss": 2.0955,
4052
+ "step": 337000
4053
+ },
4054
+ {
4055
+ "epoch": 92.26,
4056
+ "learning_rate": 4.539302802460698e-05,
4057
+ "loss": 2.0994,
4058
+ "step": 337500
4059
+ },
4060
+ {
4061
+ "epoch": 92.4,
4062
+ "learning_rate": 4.538619275461381e-05,
4063
+ "loss": 2.1162,
4064
+ "step": 338000
4065
+ },
4066
+ {
4067
+ "epoch": 92.54,
4068
+ "learning_rate": 4.537935748462064e-05,
4069
+ "loss": 2.1158,
4070
+ "step": 338500
4071
+ },
4072
+ {
4073
+ "epoch": 92.67,
4074
+ "learning_rate": 4.5372522214627476e-05,
4075
+ "loss": 2.1289,
4076
+ "step": 339000
4077
+ },
4078
+ {
4079
+ "epoch": 92.81,
4080
+ "learning_rate": 4.536568694463432e-05,
4081
+ "loss": 2.1288,
4082
+ "step": 339500
4083
+ },
4084
+ {
4085
+ "epoch": 92.95,
4086
+ "learning_rate": 4.535885167464115e-05,
4087
+ "loss": 2.1417,
4088
+ "step": 340000
4089
+ },
4090
+ {
4091
+ "epoch": 93.08,
4092
+ "learning_rate": 4.535201640464799e-05,
4093
+ "loss": 2.1089,
4094
+ "step": 340500
4095
+ },
4096
+ {
4097
+ "epoch": 93.22,
4098
+ "learning_rate": 4.5345181134654815e-05,
4099
+ "loss": 2.0924,
4100
+ "step": 341000
4101
+ },
4102
+ {
4103
+ "epoch": 93.36,
4104
+ "learning_rate": 4.533834586466166e-05,
4105
+ "loss": 2.1037,
4106
+ "step": 341500
4107
+ },
4108
+ {
4109
+ "epoch": 93.49,
4110
+ "learning_rate": 4.533151059466849e-05,
4111
+ "loss": 2.1097,
4112
+ "step": 342000
4113
+ },
4114
+ {
4115
+ "epoch": 93.63,
4116
+ "learning_rate": 4.5324675324675326e-05,
4117
+ "loss": 2.1164,
4118
+ "step": 342500
4119
+ },
4120
+ {
4121
+ "epoch": 93.77,
4122
+ "learning_rate": 4.531784005468217e-05,
4123
+ "loss": 2.1277,
4124
+ "step": 343000
4125
+ },
4126
+ {
4127
+ "epoch": 93.9,
4128
+ "learning_rate": 4.5311004784688996e-05,
4129
+ "loss": 2.1281,
4130
+ "step": 343500
4131
+ },
4132
+ {
4133
+ "epoch": 94.04,
4134
+ "learning_rate": 4.530416951469583e-05,
4135
+ "loss": 2.1238,
4136
+ "step": 344000
4137
+ },
4138
+ {
4139
+ "epoch": 94.18,
4140
+ "learning_rate": 4.5297334244702666e-05,
4141
+ "loss": 2.0879,
4142
+ "step": 344500
4143
+ },
4144
+ {
4145
+ "epoch": 94.31,
4146
+ "learning_rate": 4.529049897470951e-05,
4147
+ "loss": 2.0903,
4148
+ "step": 345000
4149
+ },
4150
+ {
4151
+ "epoch": 94.45,
4152
+ "learning_rate": 4.528366370471634e-05,
4153
+ "loss": 2.1058,
4154
+ "step": 345500
4155
+ },
4156
+ {
4157
+ "epoch": 94.59,
4158
+ "learning_rate": 4.527682843472317e-05,
4159
+ "loss": 2.1162,
4160
+ "step": 346000
4161
+ },
4162
+ {
4163
+ "epoch": 94.72,
4164
+ "learning_rate": 4.5269993164730005e-05,
4165
+ "loss": 2.1162,
4166
+ "step": 346500
4167
+ },
4168
+ {
4169
+ "epoch": 94.86,
4170
+ "learning_rate": 4.5263157894736846e-05,
4171
+ "loss": 2.1276,
4172
+ "step": 347000
4173
+ },
4174
+ {
4175
+ "epoch": 95.0,
4176
+ "learning_rate": 4.525632262474368e-05,
4177
+ "loss": 2.1259,
4178
+ "step": 347500
4179
+ },
4180
+ {
4181
+ "epoch": 95.13,
4182
+ "learning_rate": 4.5249487354750516e-05,
4183
+ "loss": 2.0818,
4184
+ "step": 348000
4185
+ },
4186
+ {
4187
+ "epoch": 95.27,
4188
+ "learning_rate": 4.524265208475735e-05,
4189
+ "loss": 2.0907,
4190
+ "step": 348500
4191
+ },
4192
+ {
4193
+ "epoch": 95.41,
4194
+ "learning_rate": 4.5235816814764186e-05,
4195
+ "loss": 2.0926,
4196
+ "step": 349000
4197
+ },
4198
+ {
4199
+ "epoch": 95.54,
4200
+ "learning_rate": 4.522898154477102e-05,
4201
+ "loss": 2.1014,
4202
+ "step": 349500
4203
+ },
4204
+ {
4205
+ "epoch": 95.68,
4206
+ "learning_rate": 4.5222146274777855e-05,
4207
+ "loss": 2.1133,
4208
+ "step": 350000
4209
+ },
4210
+ {
4211
+ "epoch": 95.82,
4212
+ "learning_rate": 4.521531100478469e-05,
4213
+ "loss": 2.1189,
4214
+ "step": 350500
4215
+ },
4216
+ {
4217
+ "epoch": 95.95,
4218
+ "learning_rate": 4.5208475734791525e-05,
4219
+ "loss": 2.1202,
4220
+ "step": 351000
4221
+ },
4222
+ {
4223
+ "epoch": 96.09,
4224
+ "learning_rate": 4.520164046479836e-05,
4225
+ "loss": 2.0911,
4226
+ "step": 351500
4227
+ },
4228
+ {
4229
+ "epoch": 96.23,
4230
+ "learning_rate": 4.5194805194805194e-05,
4231
+ "loss": 2.0824,
4232
+ "step": 352000
4233
+ },
4234
+ {
4235
+ "epoch": 96.36,
4236
+ "learning_rate": 4.5187969924812036e-05,
4237
+ "loss": 2.0898,
4238
+ "step": 352500
4239
+ },
4240
+ {
4241
+ "epoch": 96.5,
4242
+ "learning_rate": 4.518113465481887e-05,
4243
+ "loss": 2.0946,
4244
+ "step": 353000
4245
+ },
4246
+ {
4247
+ "epoch": 96.64,
4248
+ "learning_rate": 4.5174299384825705e-05,
4249
+ "loss": 2.1039,
4250
+ "step": 353500
4251
+ },
4252
+ {
4253
+ "epoch": 96.77,
4254
+ "learning_rate": 4.5167464114832533e-05,
4255
+ "loss": 2.1076,
4256
+ "step": 354000
4257
+ },
4258
+ {
4259
+ "epoch": 96.91,
4260
+ "learning_rate": 4.5160628844839375e-05,
4261
+ "loss": 2.1183,
4262
+ "step": 354500
4263
+ },
4264
+ {
4265
+ "epoch": 97.05,
4266
+ "learning_rate": 4.515379357484621e-05,
4267
+ "loss": 2.1028,
4268
+ "step": 355000
4269
+ },
4270
+ {
4271
+ "epoch": 97.18,
4272
+ "learning_rate": 4.5146958304853045e-05,
4273
+ "loss": 2.0748,
4274
+ "step": 355500
4275
+ },
4276
+ {
4277
+ "epoch": 97.32,
4278
+ "learning_rate": 4.514012303485988e-05,
4279
+ "loss": 2.082,
4280
+ "step": 356000
4281
+ },
4282
+ {
4283
+ "epoch": 97.46,
4284
+ "learning_rate": 4.5133287764866714e-05,
4285
+ "loss": 2.0929,
4286
+ "step": 356500
4287
+ },
4288
+ {
4289
+ "epoch": 97.59,
4290
+ "learning_rate": 4.512645249487355e-05,
4291
+ "loss": 2.0978,
4292
+ "step": 357000
4293
+ },
4294
+ {
4295
+ "epoch": 97.73,
4296
+ "learning_rate": 4.5119617224880384e-05,
4297
+ "loss": 2.1015,
4298
+ "step": 357500
4299
+ },
4300
+ {
4301
+ "epoch": 97.87,
4302
+ "learning_rate": 4.511278195488722e-05,
4303
+ "loss": 2.1031,
4304
+ "step": 358000
4305
+ },
4306
+ {
4307
+ "epoch": 98.0,
4308
+ "learning_rate": 4.510594668489406e-05,
4309
+ "loss": 2.1172,
4310
+ "step": 358500
4311
+ },
4312
+ {
4313
+ "epoch": 98.14,
4314
+ "learning_rate": 4.509911141490089e-05,
4315
+ "loss": 2.0695,
4316
+ "step": 359000
4317
+ },
4318
+ {
4319
+ "epoch": 98.28,
4320
+ "learning_rate": 4.509227614490772e-05,
4321
+ "loss": 2.0733,
4322
+ "step": 359500
4323
+ },
4324
+ {
4325
+ "epoch": 98.41,
4326
+ "learning_rate": 4.5085440874914565e-05,
4327
+ "loss": 2.0832,
4328
+ "step": 360000
4329
  }
4330
  ],
4331
  "max_steps": 3658000,
4332
  "num_train_epochs": 1000,
4333
+ "total_flos": 724673057990639616,
4334
  "trial_name": null,
4335
  "trial_params": null
4336
  }