cat-searcher commited on
Commit
4afe893
1 Parent(s): 28df2c3

Training in progress, epoch 14, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step2765/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step2765/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step2765/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step2765/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step2765/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step2765/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step2765/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step2765/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step2765/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step2765/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step2765/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step2765/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step2765/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step2765/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step2765/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step2765/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step2765/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc6bf9b97a94b90699f45a7fc38d202fb177244c38dbfa6f36c6271cda5ac98
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eeef3e3616d377f90d12ea86346472117cc77ecfbe03a66d411aa1ddd074309
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28412a24f982a84a28a802cdb61b19b296e9b7c9f9c89785ac5f1eee4a5f6d68
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:040ce20af60dd7da7e3b33695723be16e615aabf38e4ccffc60ef74d46370115
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1c3657eea69211a07303c22ef8ed8263791c3ca1ed2c1bd76f7759df03df7b0
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825697483c0d8dbaacff033646860487bed6d0841aea89b0ebb37c5e044e33f3
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e3a889dd2c577204f326a307c88e12d2fd0036af5c448e2668e5c2344e60b9
3
+ size 2506176112
last-checkpoint/global_step2765/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665538142db7bdae0da78f519f68de74bf856090f5be14a5db2ac232dac90866
3
+ size 2506176112
last-checkpoint/global_step2765/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:991c4af0233056577c2d759b974541f3192f0cd0dc222e13ee0454f7746d456a
3
+ size 85570
last-checkpoint/global_step2765/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:560aa971ff7c1f9452683a8ea181cecc727c47c0c053ad3c99c5dc4a3009323c
3
+ size 85506
last-checkpoint/global_step2765/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4b35f8a1cbb50722c530745fc7e51d9d5bd879c6ae1bf2914faf9e7575c5ad3
3
+ size 85506
last-checkpoint/global_step2765/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f0cd13d71b6e139ffda808e5162d78171b468da3bd5f30c830aa64f68d9a908
3
+ size 85506
last-checkpoint/global_step2765/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a722a8dbc5fc64619032a8f6e56ed05ef49c8507a36d3d4c5add2d009be8d1d
3
+ size 85506
last-checkpoint/global_step2765/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:188f0299e77dc75c4f4dc4a7610a68931ae3788b068f59ce3cd1b428d784e9d8
3
+ size 85506
last-checkpoint/global_step2765/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7e7d8e3c13d10a4e9c3c7444405fec9aedb115d49a24baf47b10f0bd9a9275
3
+ size 85506
last-checkpoint/global_step2765/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49eeefae15968f8ea4dcaca071892d22ccb38c4cb923ca8c9bb007d471075be
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2567
 
1
+ global_step2765
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31a59d02512e22c8fde96ae53ade5fd3efb11b708a7c65545ed6a6a202751a37
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51554b1b141cb737fe919c7a7d90b054dc54778551653b359782ad641e140b5d
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43b5816e1bbc869b1c3d647caa31746b3c9674dc2e4dd47857a690cd4ee4639b
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39ccf095d82d77e6ad3b265d9fdc49a86ab3fad18daa92e729265d0f5f3bbb71
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9eb2db72f0e418efa4f13d7448db05b4ce751b00def470d4d8f87d4965bb17c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:136693ea21ecf0b59fde813d184b14a037ef0bca92ae910a6f73169e6198ccb5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 2567,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3862,6 +3862,306 @@
3862
  "rewards/margins": 0.5234028100967407,
3863
  "rewards/rejected": -0.3366047739982605,
3864
  "step": 2560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3865
  }
3866
  ],
3867
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
  "eval_steps": 100,
6
+ "global_step": 2765,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3862
  "rewards/margins": 0.5234028100967407,
3863
  "rewards/rejected": -0.3366047739982605,
3864
  "step": 2560
3865
+ },
3866
+ {
3867
+ "epoch": 13.012658227848101,
3868
+ "grad_norm": 335161.21326055715,
3869
+ "learning_rate": 1.529301159511125e-07,
3870
+ "logits/chosen": 0.09210095554590225,
3871
+ "logits/rejected": 0.2885093688964844,
3872
+ "logps/chosen": -52.608367919921875,
3873
+ "logps/rejected": -558.9227294921875,
3874
+ "loss": 15959.725,
3875
+ "rewards/accuracies": 0.9750000238418579,
3876
+ "rewards/chosen": 0.1778368204832077,
3877
+ "rewards/margins": 0.5030940175056458,
3878
+ "rewards/rejected": -0.32525718212127686,
3879
+ "step": 2570
3880
+ },
3881
+ {
3882
+ "epoch": 13.063291139240507,
3883
+ "grad_norm": 771775.1017807113,
3884
+ "learning_rate": 1.5136320902538388e-07,
3885
+ "logits/chosen": -1.3265520334243774,
3886
+ "logits/rejected": -0.9296306371688843,
3887
+ "logps/chosen": -62.875038146972656,
3888
+ "logps/rejected": -560.3228759765625,
3889
+ "loss": 15567.6344,
3890
+ "rewards/accuracies": 0.987500011920929,
3891
+ "rewards/chosen": 0.18662917613983154,
3892
+ "rewards/margins": 0.49883994460105896,
3893
+ "rewards/rejected": -0.31221073865890503,
3894
+ "step": 2580
3895
+ },
3896
+ {
3897
+ "epoch": 13.113924050632912,
3898
+ "grad_norm": 446168.3148918395,
3899
+ "learning_rate": 1.4979630209965526e-07,
3900
+ "logits/chosen": -0.11115183681249619,
3901
+ "logits/rejected": 0.8431870341300964,
3902
+ "logps/chosen": -46.82927703857422,
3903
+ "logps/rejected": -552.5628051757812,
3904
+ "loss": 16255.3438,
3905
+ "rewards/accuracies": 1.0,
3906
+ "rewards/chosen": 0.17291709780693054,
3907
+ "rewards/margins": 0.5057471990585327,
3908
+ "rewards/rejected": -0.3328301012516022,
3909
+ "step": 2590
3910
+ },
3911
+ {
3912
+ "epoch": 13.164556962025316,
3913
+ "grad_norm": 586122.4453174556,
3914
+ "learning_rate": 1.4822939517392665e-07,
3915
+ "logits/chosen": -0.757349967956543,
3916
+ "logits/rejected": 0.037270687520504,
3917
+ "logps/chosen": -55.21142578125,
3918
+ "logps/rejected": -557.4276123046875,
3919
+ "loss": 16720.8172,
3920
+ "rewards/accuracies": 0.9750000238418579,
3921
+ "rewards/chosen": 0.1850253939628601,
3922
+ "rewards/margins": 0.5111584663391113,
3923
+ "rewards/rejected": -0.32613304257392883,
3924
+ "step": 2600
3925
+ },
3926
+ {
3927
+ "epoch": 13.215189873417721,
3928
+ "grad_norm": 420628.2693101698,
3929
+ "learning_rate": 1.4666248824819803e-07,
3930
+ "logits/chosen": -0.11379202455282211,
3931
+ "logits/rejected": -0.11788152158260345,
3932
+ "logps/chosen": -49.00257110595703,
3933
+ "logps/rejected": -576.3326416015625,
3934
+ "loss": 16306.0688,
3935
+ "rewards/accuracies": 1.0,
3936
+ "rewards/chosen": 0.18590961396694183,
3937
+ "rewards/margins": 0.5257736444473267,
3938
+ "rewards/rejected": -0.33986401557922363,
3939
+ "step": 2610
3940
+ },
3941
+ {
3942
+ "epoch": 13.265822784810126,
3943
+ "grad_norm": 436219.2086299041,
3944
+ "learning_rate": 1.4509558132246945e-07,
3945
+ "logits/chosen": -0.7918820977210999,
3946
+ "logits/rejected": -0.14419230818748474,
3947
+ "logps/chosen": -56.56486892700195,
3948
+ "logps/rejected": -584.7669677734375,
3949
+ "loss": 16369.2719,
3950
+ "rewards/accuracies": 1.0,
3951
+ "rewards/chosen": 0.18918678164482117,
3952
+ "rewards/margins": 0.5305701494216919,
3953
+ "rewards/rejected": -0.3413834273815155,
3954
+ "step": 2620
3955
+ },
3956
+ {
3957
+ "epoch": 13.316455696202532,
3958
+ "grad_norm": 596793.3073449759,
3959
+ "learning_rate": 1.4352867439674083e-07,
3960
+ "logits/chosen": 1.9564087390899658,
3961
+ "logits/rejected": 2.246692180633545,
3962
+ "logps/chosen": -51.851722717285156,
3963
+ "logps/rejected": -548.3530883789062,
3964
+ "loss": 16796.1063,
3965
+ "rewards/accuracies": 0.987500011920929,
3966
+ "rewards/chosen": 0.18290123343467712,
3967
+ "rewards/margins": 0.4980129599571228,
3968
+ "rewards/rejected": -0.3151116371154785,
3969
+ "step": 2630
3970
+ },
3971
+ {
3972
+ "epoch": 13.367088607594937,
3973
+ "grad_norm": 474733.1664905385,
3974
+ "learning_rate": 1.4196176747101222e-07,
3975
+ "logits/chosen": 0.530455470085144,
3976
+ "logits/rejected": 0.14751790463924408,
3977
+ "logps/chosen": -48.55830001831055,
3978
+ "logps/rejected": -558.3150024414062,
3979
+ "loss": 16144.2906,
3980
+ "rewards/accuracies": 0.987500011920929,
3981
+ "rewards/chosen": 0.17938682436943054,
3982
+ "rewards/margins": 0.5066471695899963,
3983
+ "rewards/rejected": -0.3272603154182434,
3984
+ "step": 2640
3985
+ },
3986
+ {
3987
+ "epoch": 13.417721518987342,
3988
+ "grad_norm": 1649837.8712191964,
3989
+ "learning_rate": 1.403948605452836e-07,
3990
+ "logits/chosen": -0.03671743720769882,
3991
+ "logits/rejected": 0.7579118013381958,
3992
+ "logps/chosen": -42.065242767333984,
3993
+ "logps/rejected": -554.230224609375,
3994
+ "loss": 16118.8047,
3995
+ "rewards/accuracies": 1.0,
3996
+ "rewards/chosen": 0.18058671057224274,
3997
+ "rewards/margins": 0.5129930377006531,
3998
+ "rewards/rejected": -0.3324064016342163,
3999
+ "step": 2650
4000
+ },
4001
+ {
4002
+ "epoch": 13.468354430379748,
4003
+ "grad_norm": 594890.10809389,
4004
+ "learning_rate": 1.38827953619555e-07,
4005
+ "logits/chosen": 0.288557231426239,
4006
+ "logits/rejected": 0.2958771288394928,
4007
+ "logps/chosen": -52.33495330810547,
4008
+ "logps/rejected": -561.2686157226562,
4009
+ "loss": 15733.7453,
4010
+ "rewards/accuracies": 0.987500011920929,
4011
+ "rewards/chosen": 0.1808079034090042,
4012
+ "rewards/margins": 0.5136345028877258,
4013
+ "rewards/rejected": -0.3328266143798828,
4014
+ "step": 2660
4015
+ },
4016
+ {
4017
+ "epoch": 13.518987341772151,
4018
+ "grad_norm": 467820.0894028926,
4019
+ "learning_rate": 1.3726104669382637e-07,
4020
+ "logits/chosen": -0.39889806509017944,
4021
+ "logits/rejected": 0.02098376676440239,
4022
+ "logps/chosen": -53.63391876220703,
4023
+ "logps/rejected": -556.4556884765625,
4024
+ "loss": 15584.0406,
4025
+ "rewards/accuracies": 0.9750000238418579,
4026
+ "rewards/chosen": 0.18383657932281494,
4027
+ "rewards/margins": 0.5030336976051331,
4028
+ "rewards/rejected": -0.3191971182823181,
4029
+ "step": 2670
4030
+ },
4031
+ {
4032
+ "epoch": 13.569620253164556,
4033
+ "grad_norm": 349641.6736805019,
4034
+ "learning_rate": 1.3569413976809776e-07,
4035
+ "logits/chosen": -1.0416258573532104,
4036
+ "logits/rejected": -0.687407374382019,
4037
+ "logps/chosen": -40.50030517578125,
4038
+ "logps/rejected": -560.5548706054688,
4039
+ "loss": 15275.5312,
4040
+ "rewards/accuracies": 1.0,
4041
+ "rewards/chosen": 0.18312379717826843,
4042
+ "rewards/margins": 0.5221952199935913,
4043
+ "rewards/rejected": -0.33907145261764526,
4044
+ "step": 2680
4045
+ },
4046
+ {
4047
+ "epoch": 13.620253164556962,
4048
+ "grad_norm": 769040.8085386351,
4049
+ "learning_rate": 1.3412723284236915e-07,
4050
+ "logits/chosen": 1.7483727931976318,
4051
+ "logits/rejected": 2.3238413333892822,
4052
+ "logps/chosen": -49.73235321044922,
4053
+ "logps/rejected": -559.8514404296875,
4054
+ "loss": 16850.175,
4055
+ "rewards/accuracies": 1.0,
4056
+ "rewards/chosen": 0.18260039389133453,
4057
+ "rewards/margins": 0.5106431245803833,
4058
+ "rewards/rejected": -0.3280427157878876,
4059
+ "step": 2690
4060
+ },
4061
+ {
4062
+ "epoch": 13.670886075949367,
4063
+ "grad_norm": 459226.17158416886,
4064
+ "learning_rate": 1.3256032591664053e-07,
4065
+ "logits/chosen": -0.2809019684791565,
4066
+ "logits/rejected": 0.43121522665023804,
4067
+ "logps/chosen": -58.69781494140625,
4068
+ "logps/rejected": -588.9169921875,
4069
+ "loss": 15404.6109,
4070
+ "rewards/accuracies": 0.987500011920929,
4071
+ "rewards/chosen": 0.19193768501281738,
4072
+ "rewards/margins": 0.5343278646469116,
4073
+ "rewards/rejected": -0.34239014983177185,
4074
+ "step": 2700
4075
+ },
4076
+ {
4077
+ "epoch": 13.721518987341772,
4078
+ "grad_norm": 339517.3364374988,
4079
+ "learning_rate": 1.3099341899091192e-07,
4080
+ "logits/chosen": 0.3717317283153534,
4081
+ "logits/rejected": 0.5634896159172058,
4082
+ "logps/chosen": -60.52980422973633,
4083
+ "logps/rejected": -555.2349243164062,
4084
+ "loss": 15341.8219,
4085
+ "rewards/accuracies": 0.9375,
4086
+ "rewards/chosen": 0.17079493403434753,
4087
+ "rewards/margins": 0.489946186542511,
4088
+ "rewards/rejected": -0.31915122270584106,
4089
+ "step": 2710
4090
+ },
4091
+ {
4092
+ "epoch": 13.772151898734178,
4093
+ "grad_norm": 1157921.1375110236,
4094
+ "learning_rate": 1.2942651206518333e-07,
4095
+ "logits/chosen": -1.758825659751892,
4096
+ "logits/rejected": -1.0223956108093262,
4097
+ "logps/chosen": -48.61360549926758,
4098
+ "logps/rejected": -562.5768432617188,
4099
+ "loss": 16196.7625,
4100
+ "rewards/accuracies": 0.987500011920929,
4101
+ "rewards/chosen": 0.180302232503891,
4102
+ "rewards/margins": 0.5197224020957947,
4103
+ "rewards/rejected": -0.3394201397895813,
4104
+ "step": 2720
4105
+ },
4106
+ {
4107
+ "epoch": 13.822784810126583,
4108
+ "grad_norm": 434777.104877517,
4109
+ "learning_rate": 1.2785960513945471e-07,
4110
+ "logits/chosen": -0.3282082676887512,
4111
+ "logits/rejected": 0.4013535976409912,
4112
+ "logps/chosen": -50.629215240478516,
4113
+ "logps/rejected": -582.4617309570312,
4114
+ "loss": 15710.8641,
4115
+ "rewards/accuracies": 1.0,
4116
+ "rewards/chosen": 0.18200094997882843,
4117
+ "rewards/margins": 0.5299168825149536,
4118
+ "rewards/rejected": -0.3479159474372864,
4119
+ "step": 2730
4120
+ },
4121
+ {
4122
+ "epoch": 13.873417721518987,
4123
+ "grad_norm": 677123.1021845904,
4124
+ "learning_rate": 1.262926982137261e-07,
4125
+ "logits/chosen": -0.9533359408378601,
4126
+ "logits/rejected": -0.11374642699956894,
4127
+ "logps/chosen": -50.710845947265625,
4128
+ "logps/rejected": -568.776611328125,
4129
+ "loss": 16490.0469,
4130
+ "rewards/accuracies": 1.0,
4131
+ "rewards/chosen": 0.18456825613975525,
4132
+ "rewards/margins": 0.5208636522293091,
4133
+ "rewards/rejected": -0.3362954258918762,
4134
+ "step": 2740
4135
+ },
4136
+ {
4137
+ "epoch": 13.924050632911392,
4138
+ "grad_norm": 608241.5399016802,
4139
+ "learning_rate": 1.2472579128799749e-07,
4140
+ "logits/chosen": -0.009487760253250599,
4141
+ "logits/rejected": 0.5674014091491699,
4142
+ "logps/chosen": -47.34721755981445,
4143
+ "logps/rejected": -558.3707275390625,
4144
+ "loss": 16114.125,
4145
+ "rewards/accuracies": 0.9750000238418579,
4146
+ "rewards/chosen": 0.18486423790454865,
4147
+ "rewards/margins": 0.5096093416213989,
4148
+ "rewards/rejected": -0.3247450888156891,
4149
+ "step": 2750
4150
+ },
4151
+ {
4152
+ "epoch": 13.974683544303797,
4153
+ "grad_norm": 510265.43069577636,
4154
+ "learning_rate": 1.2315888436226887e-07,
4155
+ "logits/chosen": -1.1760886907577515,
4156
+ "logits/rejected": -0.8848980665206909,
4157
+ "logps/chosen": -50.471961975097656,
4158
+ "logps/rejected": -569.0016479492188,
4159
+ "loss": 15240.5234,
4160
+ "rewards/accuracies": 1.0,
4161
+ "rewards/chosen": 0.1889052391052246,
4162
+ "rewards/margins": 0.5153056383132935,
4163
+ "rewards/rejected": -0.3264002799987793,
4164
+ "step": 2760
4165
  }
4166
  ],
4167
  "logging_steps": 10,