Romain-XV commited on
Commit
d8ccbae
·
verified ·
1 Parent(s): ea0de1e

Training in progress, step 588, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a509f63f00d367a5c11498f8184cf3ff2fd851ce54ead76faf84db6de30c28
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe0fed13538d60e29cd4c40617a904be0d495d20dce3d817763c4605a48f2e2
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:849cd5af29d3daf1b69aa1da3756e315d7a480e7c9be302e8635b39c2c5d237f
3
  size 325340244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a67808c9d84c5e1501e5ef8f2a4e8792e9097f61b1ccda1274e89044232073a
3
  size 325340244
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa104de62d8b1546c2d94b0bb3c9abcc345b572fe22aa664ded8f16fe5fca19e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7732b0a21b71e9a4fdd4bf7a6f9d6298b39c6392652af04122d065c214b3065
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc7f999f24da151eab3e6fc0d25d33386c155b659c6380a3dfdb71eaecbe77dd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f9668780ea7cd0efd62652380c15b597f4f52be588b53462349e9a7707101b3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0009602023055776954,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.28837835239834664,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3555,6 +3555,622 @@
3555
  "eval_samples_per_second": 7.949,
3556
  "eval_steps_per_second": 1.99,
3557
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3558
  }
3559
  ],
3560
  "logging_steps": 1,
@@ -3578,12 +4194,12 @@
3578
  "should_evaluate": false,
3579
  "should_log": false,
3580
  "should_save": true,
3581
- "should_training_stop": false
3582
  },
3583
  "attributes": {}
3584
  }
3585
  },
3586
- "total_flos": 1.054417901102039e+18,
3587
  "train_batch_size": 4,
3588
  "trial_name": null,
3589
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0009602023055776954,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.33913294242045566,
5
  "eval_steps": 100,
6
+ "global_step": 588,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3555
  "eval_samples_per_second": 7.949,
3556
  "eval_steps_per_second": 1.99,
3557
  "step": 500
3558
+ },
3559
+ {
3560
+ "epoch": 0.2889551091031433,
3561
+ "grad_norm": 0.21408432722091675,
3562
+ "learning_rate": 1.0973492982661793e-05,
3563
+ "loss": 0.0032,
3564
+ "step": 501
3565
+ },
3566
+ {
3567
+ "epoch": 0.28953186580794,
3568
+ "grad_norm": 0.07948876172304153,
3569
+ "learning_rate": 1.0727263266554011e-05,
3570
+ "loss": 0.0017,
3571
+ "step": 502
3572
+ },
3573
+ {
3574
+ "epoch": 0.2901086225127367,
3575
+ "grad_norm": 0.16862964630126953,
3576
+ "learning_rate": 1.0483670864493778e-05,
3577
+ "loss": 0.0019,
3578
+ "step": 503
3579
+ },
3580
+ {
3581
+ "epoch": 0.2906853792175334,
3582
+ "grad_norm": 0.01340674702078104,
3583
+ "learning_rate": 1.024272297273925e-05,
3584
+ "loss": 0.0002,
3585
+ "step": 504
3586
+ },
3587
+ {
3588
+ "epoch": 0.2912621359223301,
3589
+ "grad_norm": 0.0023153014481067657,
3590
+ "learning_rate": 1.0004426709423974e-05,
3591
+ "loss": 0.0001,
3592
+ "step": 505
3593
+ },
3594
+ {
3595
+ "epoch": 0.2918388926271268,
3596
+ "grad_norm": 0.005769818089902401,
3597
+ "learning_rate": 9.768789114346499e-06,
3598
+ "loss": 0.0002,
3599
+ "step": 506
3600
+ },
3601
+ {
3602
+ "epoch": 0.2924156493319235,
3603
+ "grad_norm": 0.019190780818462372,
3604
+ "learning_rate": 9.535817148762461e-06,
3605
+ "loss": 0.0002,
3606
+ "step": 507
3607
+ },
3608
+ {
3609
+ "epoch": 0.29299240603672017,
3610
+ "grad_norm": 1.366793155670166,
3611
+ "learning_rate": 9.305517695178833e-06,
3612
+ "loss": 0.005,
3613
+ "step": 508
3614
+ },
3615
+ {
3616
+ "epoch": 0.29356916274151684,
3617
+ "grad_norm": 0.017835253849625587,
3618
+ "learning_rate": 9.07789755715075e-06,
3619
+ "loss": 0.0003,
3620
+ "step": 509
3621
+ },
3622
+ {
3623
+ "epoch": 0.2941459194463136,
3624
+ "grad_norm": 0.05861745774745941,
3625
+ "learning_rate": 8.85296345908041e-06,
3626
+ "loss": 0.0005,
3627
+ "step": 510
3628
+ },
3629
+ {
3630
+ "epoch": 0.29472267615111025,
3631
+ "grad_norm": 0.4936632812023163,
3632
+ "learning_rate": 8.630722046018458e-06,
3633
+ "loss": 0.0008,
3634
+ "step": 511
3635
+ },
3636
+ {
3637
+ "epoch": 0.29529943285590693,
3638
+ "grad_norm": 0.008170605637133121,
3639
+ "learning_rate": 8.411179883467667e-06,
3640
+ "loss": 0.0003,
3641
+ "step": 512
3642
+ },
3643
+ {
3644
+ "epoch": 0.29587618956070366,
3645
+ "grad_norm": 0.002018228406086564,
3646
+ "learning_rate": 8.194343457188991e-06,
3647
+ "loss": 0.0001,
3648
+ "step": 513
3649
+ },
3650
+ {
3651
+ "epoch": 0.29645294626550034,
3652
+ "grad_norm": 0.1278875768184662,
3653
+ "learning_rate": 7.98021917300993e-06,
3654
+ "loss": 0.001,
3655
+ "step": 514
3656
+ },
3657
+ {
3658
+ "epoch": 0.297029702970297,
3659
+ "grad_norm": 0.18191634118556976,
3660
+ "learning_rate": 7.76881335663534e-06,
3661
+ "loss": 0.0046,
3662
+ "step": 515
3663
+ },
3664
+ {
3665
+ "epoch": 0.29760645967509375,
3666
+ "grad_norm": 0.00487172557041049,
3667
+ "learning_rate": 7.560132253460483e-06,
3668
+ "loss": 0.0002,
3669
+ "step": 516
3670
+ },
3671
+ {
3672
+ "epoch": 0.2981832163798904,
3673
+ "grad_norm": 0.003059436334297061,
3674
+ "learning_rate": 7.354182028386591e-06,
3675
+ "loss": 0.0002,
3676
+ "step": 517
3677
+ },
3678
+ {
3679
+ "epoch": 0.2987599730846871,
3680
+ "grad_norm": 0.4325723946094513,
3681
+ "learning_rate": 7.150968765638743e-06,
3682
+ "loss": 0.0108,
3683
+ "step": 518
3684
+ },
3685
+ {
3686
+ "epoch": 0.2993367297894838,
3687
+ "grad_norm": 0.6904163360595703,
3688
+ "learning_rate": 6.950498468586075e-06,
3689
+ "loss": 0.004,
3690
+ "step": 519
3691
+ },
3692
+ {
3693
+ "epoch": 0.2999134864942805,
3694
+ "grad_norm": 0.02728419378399849,
3695
+ "learning_rate": 6.75277705956443e-06,
3696
+ "loss": 0.0004,
3697
+ "step": 520
3698
+ },
3699
+ {
3700
+ "epoch": 0.3004902431990772,
3701
+ "grad_norm": 0.0645672082901001,
3702
+ "learning_rate": 6.5578103797014455e-06,
3703
+ "loss": 0.0012,
3704
+ "step": 521
3705
+ },
3706
+ {
3707
+ "epoch": 0.30106699990387387,
3708
+ "grad_norm": 0.13279329240322113,
3709
+ "learning_rate": 6.365604188743979e-06,
3710
+ "loss": 0.0008,
3711
+ "step": 522
3712
+ },
3713
+ {
3714
+ "epoch": 0.3016437566086706,
3715
+ "grad_norm": 0.020064158365130424,
3716
+ "learning_rate": 6.176164164887932e-06,
3717
+ "loss": 0.0005,
3718
+ "step": 523
3719
+ },
3720
+ {
3721
+ "epoch": 0.3022205133134673,
3722
+ "grad_norm": 0.02414029650390148,
3723
+ "learning_rate": 5.9894959046105095e-06,
3724
+ "loss": 0.0005,
3725
+ "step": 524
3726
+ },
3727
+ {
3728
+ "epoch": 0.30279727001826395,
3729
+ "grad_norm": 0.11476976424455643,
3730
+ "learning_rate": 5.805604922504859e-06,
3731
+ "loss": 0.0035,
3732
+ "step": 525
3733
+ },
3734
+ {
3735
+ "epoch": 0.30337402672306063,
3736
+ "grad_norm": 0.15488499402999878,
3737
+ "learning_rate": 5.6244966511172505e-06,
3738
+ "loss": 0.006,
3739
+ "step": 526
3740
+ },
3741
+ {
3742
+ "epoch": 0.30395078342785736,
3743
+ "grad_norm": 1.3745477199554443,
3744
+ "learning_rate": 5.446176440786488e-06,
3745
+ "loss": 0.0229,
3746
+ "step": 527
3747
+ },
3748
+ {
3749
+ "epoch": 0.30452754013265404,
3750
+ "grad_norm": 0.003079983638599515,
3751
+ "learning_rate": 5.270649559485907e-06,
3752
+ "loss": 0.0001,
3753
+ "step": 528
3754
+ },
3755
+ {
3756
+ "epoch": 0.3051042968374507,
3757
+ "grad_norm": 0.018386349081993103,
3758
+ "learning_rate": 5.097921192667687e-06,
3759
+ "loss": 0.0004,
3760
+ "step": 529
3761
+ },
3762
+ {
3763
+ "epoch": 0.30568105354224745,
3764
+ "grad_norm": 0.9645707011222839,
3765
+ "learning_rate": 4.92799644310975e-06,
3766
+ "loss": 0.0022,
3767
+ "step": 530
3768
+ },
3769
+ {
3770
+ "epoch": 0.3062578102470441,
3771
+ "grad_norm": 0.054794877767562866,
3772
+ "learning_rate": 4.7608803307649385e-06,
3773
+ "loss": 0.0052,
3774
+ "step": 531
3775
+ },
3776
+ {
3777
+ "epoch": 0.3068345669518408,
3778
+ "grad_norm": 0.18055804073810577,
3779
+ "learning_rate": 4.596577792612755e-06,
3780
+ "loss": 0.0008,
3781
+ "step": 532
3782
+ },
3783
+ {
3784
+ "epoch": 0.3074113236566375,
3785
+ "grad_norm": 0.009832300245761871,
3786
+ "learning_rate": 4.4350936825134805e-06,
3787
+ "loss": 0.0002,
3788
+ "step": 533
3789
+ },
3790
+ {
3791
+ "epoch": 0.3079880803614342,
3792
+ "grad_norm": 0.0028213104233145714,
3793
+ "learning_rate": 4.27643277106482e-06,
3794
+ "loss": 0.0002,
3795
+ "step": 534
3796
+ },
3797
+ {
3798
+ "epoch": 0.3085648370662309,
3799
+ "grad_norm": 0.004487840458750725,
3800
+ "learning_rate": 4.120599745460918e-06,
3801
+ "loss": 0.0002,
3802
+ "step": 535
3803
+ },
3804
+ {
3805
+ "epoch": 0.30914159377102757,
3806
+ "grad_norm": 0.0015976703725755215,
3807
+ "learning_rate": 3.967599209353967e-06,
3808
+ "loss": 0.0001,
3809
+ "step": 536
3810
+ },
3811
+ {
3812
+ "epoch": 0.3097183504758243,
3813
+ "grad_norm": 0.013129237107932568,
3814
+ "learning_rate": 3.817435682718096e-06,
3815
+ "loss": 0.0002,
3816
+ "step": 537
3817
+ },
3818
+ {
3819
+ "epoch": 0.310295107180621,
3820
+ "grad_norm": 0.3533538281917572,
3821
+ "learning_rate": 3.670113601715941e-06,
3822
+ "loss": 0.0187,
3823
+ "step": 538
3824
+ },
3825
+ {
3826
+ "epoch": 0.31087186388541765,
3827
+ "grad_norm": 0.1365794688463211,
3828
+ "learning_rate": 3.525637318567554e-06,
3829
+ "loss": 0.0024,
3830
+ "step": 539
3831
+ },
3832
+ {
3833
+ "epoch": 0.3114486205902144,
3834
+ "grad_norm": 0.0068528540432453156,
3835
+ "learning_rate": 3.384011101421802e-06,
3836
+ "loss": 0.0003,
3837
+ "step": 540
3838
+ },
3839
+ {
3840
+ "epoch": 0.31202537729501106,
3841
+ "grad_norm": 0.006609945558011532,
3842
+ "learning_rate": 3.2452391342303046e-06,
3843
+ "loss": 0.0002,
3844
+ "step": 541
3845
+ },
3846
+ {
3847
+ "epoch": 0.31260213399980774,
3848
+ "grad_norm": 4.070964813232422,
3849
+ "learning_rate": 3.1093255166238176e-06,
3850
+ "loss": 0.0221,
3851
+ "step": 542
3852
+ },
3853
+ {
3854
+ "epoch": 0.3131788907046044,
3855
+ "grad_norm": 0.1819545030593872,
3856
+ "learning_rate": 2.976274263791179e-06,
3857
+ "loss": 0.0043,
3858
+ "step": 543
3859
+ },
3860
+ {
3861
+ "epoch": 0.31375564740940115,
3862
+ "grad_norm": 0.004702473059296608,
3863
+ "learning_rate": 2.8460893063606e-06,
3864
+ "loss": 0.0001,
3865
+ "step": 544
3866
+ },
3867
+ {
3868
+ "epoch": 0.3143324041141978,
3869
+ "grad_norm": 0.1847194880247116,
3870
+ "learning_rate": 2.718774490283593e-06,
3871
+ "loss": 0.0014,
3872
+ "step": 545
3873
+ },
3874
+ {
3875
+ "epoch": 0.3149091608189945,
3876
+ "grad_norm": 0.06856244802474976,
3877
+ "learning_rate": 2.5943335767213304e-06,
3878
+ "loss": 0.0003,
3879
+ "step": 546
3880
+ },
3881
+ {
3882
+ "epoch": 0.31548591752379124,
3883
+ "grad_norm": 0.021140173077583313,
3884
+ "learning_rate": 2.4727702419335864e-06,
3885
+ "loss": 0.0004,
3886
+ "step": 547
3887
+ },
3888
+ {
3889
+ "epoch": 0.3160626742285879,
3890
+ "grad_norm": 0.30309152603149414,
3891
+ "learning_rate": 2.3540880771700803e-06,
3892
+ "loss": 0.0025,
3893
+ "step": 548
3894
+ },
3895
+ {
3896
+ "epoch": 0.3166394309333846,
3897
+ "grad_norm": 0.07376673817634583,
3898
+ "learning_rate": 2.2382905885643844e-06,
3899
+ "loss": 0.0007,
3900
+ "step": 549
3901
+ },
3902
+ {
3903
+ "epoch": 0.31721618763818127,
3904
+ "grad_norm": 0.012836321257054806,
3905
+ "learning_rate": 2.125381197030374e-06,
3906
+ "loss": 0.0001,
3907
+ "step": 550
3908
+ },
3909
+ {
3910
+ "epoch": 0.317792944342978,
3911
+ "grad_norm": 0.012486038729548454,
3912
+ "learning_rate": 2.0153632381611498e-06,
3913
+ "loss": 0.0004,
3914
+ "step": 551
3915
+ },
3916
+ {
3917
+ "epoch": 0.3183697010477747,
3918
+ "grad_norm": 2.240330696105957,
3919
+ "learning_rate": 1.908239962130476e-06,
3920
+ "loss": 0.0023,
3921
+ "step": 552
3922
+ },
3923
+ {
3924
+ "epoch": 0.31894645775257136,
3925
+ "grad_norm": 1.6802948713302612,
3926
+ "learning_rate": 1.8040145335968318e-06,
3927
+ "loss": 0.0176,
3928
+ "step": 553
3929
+ },
3930
+ {
3931
+ "epoch": 0.3195232144573681,
3932
+ "grad_norm": 0.055240605026483536,
3933
+ "learning_rate": 1.7026900316098215e-06,
3934
+ "loss": 0.0005,
3935
+ "step": 554
3936
+ },
3937
+ {
3938
+ "epoch": 0.32009997116216476,
3939
+ "grad_norm": 0.0030760413501411676,
3940
+ "learning_rate": 1.6042694495193022e-06,
3941
+ "loss": 0.0002,
3942
+ "step": 555
3943
+ },
3944
+ {
3945
+ "epoch": 0.32067672786696144,
3946
+ "grad_norm": 0.010371128097176552,
3947
+ "learning_rate": 1.5087556948868876e-06,
3948
+ "loss": 0.0001,
3949
+ "step": 556
3950
+ },
3951
+ {
3952
+ "epoch": 0.3212534845717582,
3953
+ "grad_norm": 0.21550880372524261,
3954
+ "learning_rate": 1.4161515894001165e-06,
3955
+ "loss": 0.0005,
3956
+ "step": 557
3957
+ },
3958
+ {
3959
+ "epoch": 0.32183024127655485,
3960
+ "grad_norm": 0.030834214761853218,
3961
+ "learning_rate": 1.3264598687890205e-06,
3962
+ "loss": 0.0004,
3963
+ "step": 558
3964
+ },
3965
+ {
3966
+ "epoch": 0.32240699798135153,
3967
+ "grad_norm": 0.04491027444601059,
3968
+ "learning_rate": 1.2396831827453436e-06,
3969
+ "loss": 0.0013,
3970
+ "step": 559
3971
+ },
3972
+ {
3973
+ "epoch": 0.3229837546861482,
3974
+ "grad_norm": 0.0054609524086117744,
3975
+ "learning_rate": 1.1558240948443044e-06,
3976
+ "loss": 0.0002,
3977
+ "step": 560
3978
+ },
3979
+ {
3980
+ "epoch": 0.32356051139094494,
3981
+ "grad_norm": 0.06006991118192673,
3982
+ "learning_rate": 1.0748850824687795e-06,
3983
+ "loss": 0.0011,
3984
+ "step": 561
3985
+ },
3986
+ {
3987
+ "epoch": 0.3241372680957416,
3988
+ "grad_norm": 0.09065622091293335,
3989
+ "learning_rate": 9.968685367361618e-07,
3990
+ "loss": 0.0012,
3991
+ "step": 562
3992
+ },
3993
+ {
3994
+ "epoch": 0.3247140248005383,
3995
+ "grad_norm": 0.11779052764177322,
3996
+ "learning_rate": 9.217767624277396e-07,
3997
+ "loss": 0.0009,
3998
+ "step": 563
3999
+ },
4000
+ {
4001
+ "epoch": 0.325290781505335,
4002
+ "grad_norm": 0.004247438628226519,
4003
+ "learning_rate": 8.496119779205725e-07,
4004
+ "loss": 0.0002,
4005
+ "step": 564
4006
+ },
4007
+ {
4008
+ "epoch": 0.3258675382101317,
4009
+ "grad_norm": 0.26266857981681824,
4010
+ "learning_rate": 7.803763151219779e-07,
4011
+ "loss": 0.0012,
4012
+ "step": 565
4013
+ },
4014
+ {
4015
+ "epoch": 0.3264442949149284,
4016
+ "grad_norm": 0.17352361977100372,
4017
+ "learning_rate": 7.140718194065032e-07,
4018
+ "loss": 0.0046,
4019
+ "step": 566
4020
+ },
4021
+ {
4022
+ "epoch": 0.32702105161972506,
4023
+ "grad_norm": 0.003937472123652697,
4024
+ "learning_rate": 6.507004495555969e-07,
4025
+ "loss": 0.0002,
4026
+ "step": 567
4027
+ },
4028
+ {
4029
+ "epoch": 0.3275978083245218,
4030
+ "grad_norm": 0.0046991510316729546,
4031
+ "learning_rate": 5.902640776996315e-07,
4032
+ "loss": 0.0002,
4033
+ "step": 568
4034
+ },
4035
+ {
4036
+ "epoch": 0.32817456502931847,
4037
+ "grad_norm": 0.007674611173570156,
4038
+ "learning_rate": 5.327644892626938e-07,
4039
+ "loss": 0.0001,
4040
+ "step": 569
4041
+ },
4042
+ {
4043
+ "epoch": 0.32875132173411514,
4044
+ "grad_norm": 0.011769871227443218,
4045
+ "learning_rate": 4.782033829097587e-07,
4046
+ "loss": 0.0002,
4047
+ "step": 570
4048
+ },
4049
+ {
4050
+ "epoch": 0.3293280784389119,
4051
+ "grad_norm": 0.3156275749206543,
4052
+ "learning_rate": 4.2658237049655323e-07,
4053
+ "loss": 0.0011,
4054
+ "step": 571
4055
+ },
4056
+ {
4057
+ "epoch": 0.32990483514370855,
4058
+ "grad_norm": 0.14030705392360687,
4059
+ "learning_rate": 3.779029770219378e-07,
4060
+ "loss": 0.0006,
4061
+ "step": 572
4062
+ },
4063
+ {
4064
+ "epoch": 0.33048159184850523,
4065
+ "grad_norm": 1.7659718990325928,
4066
+ "learning_rate": 3.3216664058283165e-07,
4067
+ "loss": 0.0134,
4068
+ "step": 573
4069
+ },
4070
+ {
4071
+ "epoch": 0.3310583485533019,
4072
+ "grad_norm": 0.0017080976394936442,
4073
+ "learning_rate": 2.893747123317581e-07,
4074
+ "loss": 0.0001,
4075
+ "step": 574
4076
+ },
4077
+ {
4078
+ "epoch": 0.33163510525809864,
4079
+ "grad_norm": 0.003221493447199464,
4080
+ "learning_rate": 2.4952845643689827e-07,
4081
+ "loss": 0.0001,
4082
+ "step": 575
4083
+ },
4084
+ {
4085
+ "epoch": 0.3322118619628953,
4086
+ "grad_norm": 0.006105966400355101,
4087
+ "learning_rate": 2.1262905004475475e-07,
4088
+ "loss": 0.0001,
4089
+ "step": 576
4090
+ },
4091
+ {
4092
+ "epoch": 0.332788618667692,
4093
+ "grad_norm": 0.9341827034950256,
4094
+ "learning_rate": 1.786775832454013e-07,
4095
+ "loss": 0.0124,
4096
+ "step": 577
4097
+ },
4098
+ {
4099
+ "epoch": 0.3333653753724887,
4100
+ "grad_norm": 0.0012107370421290398,
4101
+ "learning_rate": 1.4767505904021983e-07,
4102
+ "loss": 0.0001,
4103
+ "step": 578
4104
+ },
4105
+ {
4106
+ "epoch": 0.3339421320772854,
4107
+ "grad_norm": 0.004891383461654186,
4108
+ "learning_rate": 1.1962239331234637e-07,
4109
+ "loss": 0.0002,
4110
+ "step": 579
4111
+ },
4112
+ {
4113
+ "epoch": 0.3345188887820821,
4114
+ "grad_norm": 0.006321438588202,
4115
+ "learning_rate": 9.452041479954821e-08,
4116
+ "loss": 0.0003,
4117
+ "step": 580
4118
+ },
4119
+ {
4120
+ "epoch": 0.3350956454868788,
4121
+ "grad_norm": 0.16756032407283783,
4122
+ "learning_rate": 7.236986506978794e-08,
4123
+ "loss": 0.001,
4124
+ "step": 581
4125
+ },
4126
+ {
4127
+ "epoch": 0.3356724021916755,
4128
+ "grad_norm": 0.14708253741264343,
4129
+ "learning_rate": 5.317139849928543e-08,
4130
+ "loss": 0.0062,
4131
+ "step": 582
4132
+ },
4133
+ {
4134
+ "epoch": 0.33624915889647217,
4135
+ "grad_norm": 0.4846436679363251,
4136
+ "learning_rate": 3.692558225317777e-08,
4137
+ "loss": 0.0169,
4138
+ "step": 583
4139
+ },
4140
+ {
4141
+ "epoch": 0.33682591560126884,
4142
+ "grad_norm": 0.0019764131866395473,
4143
+ "learning_rate": 2.363289626882148e-08,
4144
+ "loss": 0.0001,
4145
+ "step": 584
4146
+ },
4147
+ {
4148
+ "epoch": 0.3374026723060656,
4149
+ "grad_norm": 0.016219504177570343,
4150
+ "learning_rate": 1.3293733241537266e-08,
4151
+ "loss": 0.0004,
4152
+ "step": 585
4153
+ },
4154
+ {
4155
+ "epoch": 0.33797942901086225,
4156
+ "grad_norm": 0.052470527589321136,
4157
+ "learning_rate": 5.908398613074795e-09,
4158
+ "loss": 0.0007,
4159
+ "step": 586
4160
+ },
4161
+ {
4162
+ "epoch": 0.33855618571565893,
4163
+ "grad_norm": 0.0014326622476801276,
4164
+ "learning_rate": 1.4771105625421834e-09,
4165
+ "loss": 0.0001,
4166
+ "step": 587
4167
+ },
4168
+ {
4169
+ "epoch": 0.33913294242045566,
4170
+ "grad_norm": 0.028719905763864517,
4171
+ "learning_rate": 0.0,
4172
+ "loss": 0.0004,
4173
+ "step": 588
4174
  }
4175
  ],
4176
  "logging_steps": 1,
 
4194
  "should_evaluate": false,
4195
  "should_log": false,
4196
  "should_save": true,
4197
+ "should_training_stop": true
4198
  },
4199
  "attributes": {}
4200
  }
4201
  },
4202
+ "total_flos": 1.243015677103571e+18,
4203
  "train_batch_size": 4,
4204
  "trial_name": null,
4205
  "trial_params": null