ZeroUniqueness commited on
Commit
a831524
β€’
1 Parent(s): b0e748a

Training in progress, step 59000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf9efdf73d7ecc9f45ca166bec5b70555182c38338e6de139c6203b8a009fc59
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f56b8a333605f03b496496aac3531e5eb50e390d67be06083619275a78de77da
3
  size 500897101
{checkpoint-55000 β†’ checkpoint-58000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-55000 β†’ checkpoint-58000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-55000 β†’ checkpoint-58000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f8b0f0db95ea8131359795995cdc710c82f988052688d4cb6fe2ddae5cacce5
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9efdf73d7ecc9f45ca166bec5b70555182c38338e6de139c6203b8a009fc59
3
  size 500897101
{checkpoint-55000/adapter_model β†’ checkpoint-59000}/README.md RENAMED
File without changes
{checkpoint-55000/adapter_model β†’ checkpoint-59000}/adapter_config.json RENAMED
File without changes
{checkpoint-55000/adapter_model β†’ checkpoint-59000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f8b0f0db95ea8131359795995cdc710c82f988052688d4cb6fe2ddae5cacce5
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f56b8a333605f03b496496aac3531e5eb50e390d67be06083619275a78de77da
3
  size 500897101
{checkpoint-55000 β†’ checkpoint-59000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c88ae3cc26774425e6ffff0abb81ec12ed8fbe2579554a9bd2a67b2403b938e4
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b26d7da827461914ac19ca3bc7f168368f4015c2a5364188dfe94a4e3cfde0cb
3
  size 1001723453
{checkpoint-55000 β†’ checkpoint-59000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39e3e89f36f4ba7d699b28f5be6babfa23dddbfd4b08c956d86503ec92a30841
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd334de4d3525ea70c0977c8fe7956563ce9e7d3af12dc2b9fcbbc68894cb2d
3
  size 14575
{checkpoint-55000 β†’ checkpoint-59000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2af9a4ff5b725263882a083e2bae995d8efcb515dd0909ae5f27db346a1c684
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67aad82a87c2a78c7bf3dfc2188cc16487d1a53a6ab0632026c89faf1cd6731c
3
  size 627
{checkpoint-55000 β†’ checkpoint-59000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.49695634841918945,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-55000",
4
- "epoch": 2.050631967488162,
5
- "global_step": 55000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3746,11 +3746,283 @@
3746
  "eval_samples_per_second": 0.444,
3747
  "eval_steps_per_second": 0.444,
3748
  "step": 55000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3749
  }
3750
  ],
3751
  "max_steps": 80463,
3752
  "num_train_epochs": 3,
3753
- "total_flos": 1.5420961039711519e+19,
3754
  "trial_name": null,
3755
  "trial_params": null
3756
  }
 
1
  {
2
+ "best_metric": 0.4893116354942322,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-59000",
4
+ "epoch": 2.1997688378509377,
5
+ "global_step": 59000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3746
  "eval_samples_per_second": 0.444,
3747
  "eval_steps_per_second": 0.444,
3748
  "step": 55000
3749
+ },
3750
+ {
3751
+ "epoch": 2.05,
3752
+ "learning_rate": 4.516400542719733e-05,
3753
+ "loss": 0.3941,
3754
+ "step": 55100
3755
+ },
3756
+ {
3757
+ "epoch": 2.06,
3758
+ "learning_rate": 4.4837881748920594e-05,
3759
+ "loss": 0.366,
3760
+ "step": 55200
3761
+ },
3762
+ {
3763
+ "epoch": 2.06,
3764
+ "learning_rate": 4.451259918615569e-05,
3765
+ "loss": 0.4203,
3766
+ "step": 55300
3767
+ },
3768
+ {
3769
+ "epoch": 2.07,
3770
+ "learning_rate": 4.418816269883204e-05,
3771
+ "loss": 0.3613,
3772
+ "step": 55400
3773
+ },
3774
+ {
3775
+ "epoch": 2.07,
3776
+ "learning_rate": 4.386457723397794e-05,
3777
+ "loss": 0.3825,
3778
+ "step": 55500
3779
+ },
3780
+ {
3781
+ "epoch": 2.07,
3782
+ "learning_rate": 4.354184772564526e-05,
3783
+ "loss": 0.4147,
3784
+ "step": 55600
3785
+ },
3786
+ {
3787
+ "epoch": 2.08,
3788
+ "learning_rate": 4.3219979094834275e-05,
3789
+ "loss": 0.3812,
3790
+ "step": 55700
3791
+ },
3792
+ {
3793
+ "epoch": 2.08,
3794
+ "learning_rate": 4.289897624941841e-05,
3795
+ "loss": 0.3926,
3796
+ "step": 55800
3797
+ },
3798
+ {
3799
+ "epoch": 2.08,
3800
+ "learning_rate": 4.257884408406968e-05,
3801
+ "loss": 0.4103,
3802
+ "step": 55900
3803
+ },
3804
+ {
3805
+ "epoch": 2.09,
3806
+ "learning_rate": 4.225958748018381e-05,
3807
+ "loss": 0.4009,
3808
+ "step": 56000
3809
+ },
3810
+ {
3811
+ "epoch": 2.09,
3812
+ "eval_loss": 0.49594032764434814,
3813
+ "eval_runtime": 1218.3341,
3814
+ "eval_samples_per_second": 0.445,
3815
+ "eval_steps_per_second": 0.445,
3816
+ "step": 56000
3817
+ },
3818
+ {
3819
+ "epoch": 2.09,
3820
+ "learning_rate": 4.194121130580594e-05,
3821
+ "loss": 0.3779,
3822
+ "step": 56100
3823
+ },
3824
+ {
3825
+ "epoch": 2.1,
3826
+ "learning_rate": 4.1623720415556336e-05,
3827
+ "loss": 0.3651,
3828
+ "step": 56200
3829
+ },
3830
+ {
3831
+ "epoch": 2.1,
3832
+ "learning_rate": 4.1307119650556494e-05,
3833
+ "loss": 0.3754,
3834
+ "step": 56300
3835
+ },
3836
+ {
3837
+ "epoch": 2.1,
3838
+ "learning_rate": 4.099141383835512e-05,
3839
+ "loss": 0.3887,
3840
+ "step": 56400
3841
+ },
3842
+ {
3843
+ "epoch": 2.11,
3844
+ "learning_rate": 4.067660779285465e-05,
3845
+ "loss": 0.3739,
3846
+ "step": 56500
3847
+ },
3848
+ {
3849
+ "epoch": 2.11,
3850
+ "learning_rate": 4.036270631423781e-05,
3851
+ "loss": 0.3842,
3852
+ "step": 56600
3853
+ },
3854
+ {
3855
+ "epoch": 2.11,
3856
+ "learning_rate": 4.004971418889447e-05,
3857
+ "loss": 0.3723,
3858
+ "step": 56700
3859
+ },
3860
+ {
3861
+ "epoch": 2.12,
3862
+ "learning_rate": 3.9737636189348634e-05,
3863
+ "loss": 0.3889,
3864
+ "step": 56800
3865
+ },
3866
+ {
3867
+ "epoch": 2.12,
3868
+ "learning_rate": 3.942647707418561e-05,
3869
+ "loss": 0.3897,
3870
+ "step": 56900
3871
+ },
3872
+ {
3873
+ "epoch": 2.13,
3874
+ "learning_rate": 3.9116241587979496e-05,
3875
+ "loss": 0.3592,
3876
+ "step": 57000
3877
+ },
3878
+ {
3879
+ "epoch": 2.13,
3880
+ "eval_loss": 0.49361398816108704,
3881
+ "eval_runtime": 1208.1063,
3882
+ "eval_samples_per_second": 0.449,
3883
+ "eval_steps_per_second": 0.449,
3884
+ "step": 57000
3885
+ },
3886
+ {
3887
+ "epoch": 2.13,
3888
+ "learning_rate": 3.8806934461220826e-05,
3889
+ "loss": 0.3512,
3890
+ "step": 57100
3891
+ },
3892
+ {
3893
+ "epoch": 2.13,
3894
+ "learning_rate": 3.8498560410244546e-05,
3895
+ "loss": 0.3715,
3896
+ "step": 57200
3897
+ },
3898
+ {
3899
+ "epoch": 2.14,
3900
+ "learning_rate": 3.819112413715791e-05,
3901
+ "loss": 0.3803,
3902
+ "step": 57300
3903
+ },
3904
+ {
3905
+ "epoch": 2.14,
3906
+ "learning_rate": 3.7884630329768875e-05,
3907
+ "loss": 0.3785,
3908
+ "step": 57400
3909
+ },
3910
+ {
3911
+ "epoch": 2.14,
3912
+ "learning_rate": 3.757908366151463e-05,
3913
+ "loss": 0.3626,
3914
+ "step": 57500
3915
+ },
3916
+ {
3917
+ "epoch": 2.15,
3918
+ "learning_rate": 3.72744887913904e-05,
3919
+ "loss": 0.3981,
3920
+ "step": 57600
3921
+ },
3922
+ {
3923
+ "epoch": 2.15,
3924
+ "learning_rate": 3.697085036387822e-05,
3925
+ "loss": 0.3918,
3926
+ "step": 57700
3927
+ },
3928
+ {
3929
+ "epoch": 2.16,
3930
+ "learning_rate": 3.6668173008876324e-05,
3931
+ "loss": 0.3876,
3932
+ "step": 57800
3933
+ },
3934
+ {
3935
+ "epoch": 2.16,
3936
+ "learning_rate": 3.6366461341628396e-05,
3937
+ "loss": 0.3878,
3938
+ "step": 57900
3939
+ },
3940
+ {
3941
+ "epoch": 2.16,
3942
+ "learning_rate": 3.606571996265321e-05,
3943
+ "loss": 0.3674,
3944
+ "step": 58000
3945
+ },
3946
+ {
3947
+ "epoch": 2.16,
3948
+ "eval_loss": 0.4916069805622101,
3949
+ "eval_runtime": 1244.109,
3950
+ "eval_samples_per_second": 0.436,
3951
+ "eval_steps_per_second": 0.436,
3952
+ "step": 58000
3953
+ },
3954
+ {
3955
+ "epoch": 2.17,
3956
+ "learning_rate": 3.576595345767464e-05,
3957
+ "loss": 0.3759,
3958
+ "step": 58100
3959
+ },
3960
+ {
3961
+ "epoch": 2.17,
3962
+ "learning_rate": 3.5467166397551524e-05,
3963
+ "loss": 0.3987,
3964
+ "step": 58200
3965
+ },
3966
+ {
3967
+ "epoch": 2.17,
3968
+ "learning_rate": 3.5169363338208094e-05,
3969
+ "loss": 0.3809,
3970
+ "step": 58300
3971
+ },
3972
+ {
3973
+ "epoch": 2.18,
3974
+ "learning_rate": 3.4872548820564455e-05,
3975
+ "loss": 0.3851,
3976
+ "step": 58400
3977
+ },
3978
+ {
3979
+ "epoch": 2.18,
3980
+ "learning_rate": 3.457672737046737e-05,
3981
+ "loss": 0.3832,
3982
+ "step": 58500
3983
+ },
3984
+ {
3985
+ "epoch": 2.18,
3986
+ "learning_rate": 3.42819034986213e-05,
3987
+ "loss": 0.3923,
3988
+ "step": 58600
3989
+ },
3990
+ {
3991
+ "epoch": 2.19,
3992
+ "learning_rate": 3.398808170051951e-05,
3993
+ "loss": 0.3609,
3994
+ "step": 58700
3995
+ },
3996
+ {
3997
+ "epoch": 2.19,
3998
+ "learning_rate": 3.369526645637556e-05,
3999
+ "loss": 0.3538,
4000
+ "step": 58800
4001
+ },
4002
+ {
4003
+ "epoch": 2.2,
4004
+ "learning_rate": 3.3403462231055107e-05,
4005
+ "loss": 0.3941,
4006
+ "step": 58900
4007
+ },
4008
+ {
4009
+ "epoch": 2.2,
4010
+ "learning_rate": 3.3112673474007584e-05,
4011
+ "loss": 0.3984,
4012
+ "step": 59000
4013
+ },
4014
+ {
4015
+ "epoch": 2.2,
4016
+ "eval_loss": 0.4893116354942322,
4017
+ "eval_runtime": 1243.7748,
4018
+ "eval_samples_per_second": 0.436,
4019
+ "eval_steps_per_second": 0.436,
4020
+ "step": 59000
4021
  }
4022
  ],
4023
  "max_steps": 80463,
4024
  "num_train_epochs": 3,
4025
+ "total_flos": 1.6542001385066742e+19,
4026
  "trial_name": null,
4027
  "trial_params": null
4028
  }
{checkpoint-55000 β†’ checkpoint-59000}/training_args.bin RENAMED
File without changes