cat-searcher commited on
Commit
39281e8
1 Parent(s): d679692

Training in progress, epoch 30, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step6113/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step6113/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step6113/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step6113/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step6113/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step6113/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step6113/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step6113/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step6113/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step6113/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step6113/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step6113/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step6113/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step6113/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step6113/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step6113/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step6113/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0499858123b31f7a4204d087c039e8726dc6ad4603df074753febcc04419561a
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72e84707d58bb67c0db60989f9c153473ac467cba53022b78d75a841fd9ab53
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdf672e0ab53201eff10260483f20107ede551a0770ffa18e03d8cf6f9fa4bb8
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e65dea90b6ebad258ebeb2480617854af6fb764c69f32a09290d8098b409ffa
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac88c49031eafd7094a57d19a07adcfab7e8035bf8f4fce77718cd7ad6881af9
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c04cd0d89bc49c2d22736060318f6bd9a096c0bb6aa02df4860ea10b269624c
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d6d7ca8d8feae576757708b7a34c4018a12ee540735d04c0c8c1b3de1489bf4
3
+ size 2506176112
last-checkpoint/global_step6113/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84296edebd96febb1ac999c83a53f11cf291da4cbe9d06d3d2c319e1eabd737
3
+ size 2506176112
last-checkpoint/global_step6113/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:561724fd38716552727a57affc0c5d116f64c8e53da7c31f5b707480dfd32bb9
3
+ size 85570
last-checkpoint/global_step6113/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8064a387c0abdcce514d00b06b69a41e661ce183e377f111f476ca71082d97f
3
+ size 85506
last-checkpoint/global_step6113/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0191b2ffba6a0b1a7fca25147384ea4b659723c25754ef7916d3cd7d62aa692b
3
+ size 85506
last-checkpoint/global_step6113/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66690d8256e090f00ce5ead20a2bde13e0e3ed458334da20baf9b28dcc486813
3
+ size 85506
last-checkpoint/global_step6113/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df4ee2a67dc3236ef4d1dd231d6108de62d9c6c7105ddf2d1dc172d972f4a138
3
+ size 85506
last-checkpoint/global_step6113/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38355ae4214f9d9d2b1a486a99713a7ab23cdb8f2d880e01fbe59be80673fff1
3
+ size 85506
last-checkpoint/global_step6113/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96454f7b3634ce62258c7197cf4b4befbf6c5f3323c69fd38b68cab2b2a39bd1
3
+ size 85506
last-checkpoint/global_step6113/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95f4cf5fdb61c35b72378382a3517094d3e980be021c62c73e593c994774f6f
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5916
 
1
+ global_step6113
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d3b3ba3d73fcfe06684964aa6a0da80302158d0b45234f7473a31d0a8f86d45
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a98e86a0252e0053a50f39ee5da1bc91fc6cc770c00bdf4879a07beb1bfb774
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d86e8e9eec87ad0431303407e43b154a156dec7fa7e56ad78330489a8ac89828
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90d998420cc3a03c1798986731cd33f00d6a7cd27ade984be3af7b6e9ba76532
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c6c5e7528cb919bf0e448095ef87b2e21b836859cd72ece77237cb822e78f88
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ac3887e0b6e47ee941f0099109d55fe8c4958125034b41513ac29b0a09c9e86
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3be13484e2a524a3f5e79f3e726fd41b0876252a9d0898131fc1ccf0d86f6a8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de0272654fc4c603ac86d1e7e8bda566a95de507f7e48193f810ed9d664b308b
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f070bbb32dc96a08f76f5f85c2e01ae98d5e4d16dd18623bfab6b4b54e1d03c4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a612c5aff75737f917d89b930ca876fddca4236d73b7c4e2122a374a8c7279
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f43d659c0909b37d66f4502f36b99850e3f553b6e2fb3ac13de4a060aa1cdc1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d130c7a2df3aa45929d007a20cc7aee23015b41b9d24357c85880be9954c25
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80a51e24107d3bf93a1e4d903f42d6626efe1c5b8bd714f393fb93f840c5ce2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0852f2ccb050a5008bb4e71f4b3a6ea8b845e136589936cf10d03133438507
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e624a5e2e5c07e8e80b5d066ec132b4a872761dcba6d77f7386705eb95f67228
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cad3ab70f668806214a1537af9f7d2fc6de4401f7eec2f544f9059052862cb5
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc47fa553a7e6a70b45be521b98449ef920fe0d39f6cf85ae50b9a45d6c9da85
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57dc573e9fb6a6cc583ada6a4737e432c792b6361f3eb47de697d69b75f09d21
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8a0e7009351aac3c555f7946c515a64baa2d14aa4706e4a371e0c2df02b5a90
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c46a2fd5508f6b24b7fd294db0c5ae6e928c7174f4e3caeeeeca276dd88690c5
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db6ce75ecc7db03a363686e05ba8e98d2588fcd56e7f730c69ff2320b79e2de2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12fdc5d527314ffb69c899c4aaadd4582bc4cb2067e14fecb7c5f8dd01262d7f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 30.0,
5
  "eval_steps": 100,
6
- "global_step": 5916,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8887,6 +8887,306 @@
8887
  "rewards/margins": 0.5534237027168274,
8888
  "rewards/rejected": -0.34499144554138184,
8889
  "step": 5910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8890
  }
8891
  ],
8892
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 30.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 6113,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8887
  "rewards/margins": 0.5534237027168274,
8888
  "rewards/rejected": -0.34499144554138184,
8889
  "step": 5910
8890
+ },
8891
+ {
8892
+ "epoch": 30.020253164556962,
8893
+ "grad_norm": 218071.18905642498,
8894
+ "learning_rate": 9.182074584769664e-08,
8895
+ "logits/chosen": -0.20139971375465393,
8896
+ "logits/rejected": 0.6374796628952026,
8897
+ "logps/chosen": -36.04420852661133,
8898
+ "logps/rejected": -585.3655395507812,
8899
+ "loss": 12139.8164,
8900
+ "rewards/accuracies": 0.987500011920929,
8901
+ "rewards/chosen": 0.20962996780872345,
8902
+ "rewards/margins": 0.5547569394111633,
8903
+ "rewards/rejected": -0.3451269865036011,
8904
+ "step": 5920
8905
+ },
8906
+ {
8907
+ "epoch": 30.070886075949367,
8908
+ "grad_norm": 199502.22634833233,
8909
+ "learning_rate": 9.103729238483233e-08,
8910
+ "logits/chosen": -0.5093935132026672,
8911
+ "logits/rejected": -0.9036226272583008,
8912
+ "logps/chosen": -32.292423248291016,
8913
+ "logps/rejected": -584.6748046875,
8914
+ "loss": 11463.5555,
8915
+ "rewards/accuracies": 1.0,
8916
+ "rewards/chosen": 0.20751234889030457,
8917
+ "rewards/margins": 0.5532687902450562,
8918
+ "rewards/rejected": -0.345756471157074,
8919
+ "step": 5930
8920
+ },
8921
+ {
8922
+ "epoch": 30.121518987341773,
8923
+ "grad_norm": 164683.94241544002,
8924
+ "learning_rate": 9.025383892196802e-08,
8925
+ "logits/chosen": -1.2027417421340942,
8926
+ "logits/rejected": -0.21418258547782898,
8927
+ "logps/chosen": -38.00572967529297,
8928
+ "logps/rejected": -542.5321044921875,
8929
+ "loss": 12248.6938,
8930
+ "rewards/accuracies": 0.9624999761581421,
8931
+ "rewards/chosen": 0.1922551691532135,
8932
+ "rewards/margins": 0.5097079277038574,
8933
+ "rewards/rejected": -0.3174527585506439,
8934
+ "step": 5940
8935
+ },
8936
+ {
8937
+ "epoch": 30.172151898734178,
8938
+ "grad_norm": 209885.7696817789,
8939
+ "learning_rate": 8.947038545910373e-08,
8940
+ "logits/chosen": -0.5836046934127808,
8941
+ "logits/rejected": -0.049278389662504196,
8942
+ "logps/chosen": -26.44875144958496,
8943
+ "logps/rejected": -577.2633056640625,
8944
+ "loss": 11882.8156,
8945
+ "rewards/accuracies": 1.0,
8946
+ "rewards/chosen": 0.2028985321521759,
8947
+ "rewards/margins": 0.548152506351471,
8948
+ "rewards/rejected": -0.34525397419929504,
8949
+ "step": 5950
8950
+ },
8951
+ {
8952
+ "epoch": 30.222784810126583,
8953
+ "grad_norm": 116064.20956709805,
8954
+ "learning_rate": 8.868693199623942e-08,
8955
+ "logits/chosen": -0.3441212773323059,
8956
+ "logits/rejected": 0.3469446897506714,
8957
+ "logps/chosen": -29.866031646728516,
8958
+ "logps/rejected": -576.8469848632812,
8959
+ "loss": 11899.9906,
8960
+ "rewards/accuracies": 0.987500011920929,
8961
+ "rewards/chosen": 0.20657262206077576,
8962
+ "rewards/margins": 0.5527979731559753,
8963
+ "rewards/rejected": -0.3462253212928772,
8964
+ "step": 5960
8965
+ },
8966
+ {
8967
+ "epoch": 30.27341772151899,
8968
+ "grad_norm": 213446.8577722312,
8969
+ "learning_rate": 8.790347853337511e-08,
8970
+ "logits/chosen": -1.195245623588562,
8971
+ "logits/rejected": -1.5595389604568481,
8972
+ "logps/chosen": -26.48971939086914,
8973
+ "logps/rejected": -562.9793090820312,
8974
+ "loss": 12288.4188,
8975
+ "rewards/accuracies": 0.9624999761581421,
8976
+ "rewards/chosen": 0.19797027111053467,
8977
+ "rewards/margins": 0.5365854501724243,
8978
+ "rewards/rejected": -0.3386152386665344,
8979
+ "step": 5970
8980
+ },
8981
+ {
8982
+ "epoch": 30.324050632911394,
8983
+ "grad_norm": 150392.6550831942,
8984
+ "learning_rate": 8.712002507051081e-08,
8985
+ "logits/chosen": -0.636971116065979,
8986
+ "logits/rejected": -0.8326961398124695,
8987
+ "logps/chosen": -31.82355308532715,
8988
+ "logps/rejected": -572.6309814453125,
8989
+ "loss": 11735.9594,
8990
+ "rewards/accuracies": 0.9750000238418579,
8991
+ "rewards/chosen": 0.20741339027881622,
8992
+ "rewards/margins": 0.5429075360298157,
8993
+ "rewards/rejected": -0.33549413084983826,
8994
+ "step": 5980
8995
+ },
8996
+ {
8997
+ "epoch": 30.374683544303796,
8998
+ "grad_norm": 248873.00017903763,
8999
+ "learning_rate": 8.63365716076465e-08,
9000
+ "logits/chosen": -0.8763412237167358,
9001
+ "logits/rejected": -0.38471752405166626,
9002
+ "logps/chosen": -33.753509521484375,
9003
+ "logps/rejected": -577.4928588867188,
9004
+ "loss": 11981.0336,
9005
+ "rewards/accuracies": 0.9750000238418579,
9006
+ "rewards/chosen": 0.20298103988170624,
9007
+ "rewards/margins": 0.5445905923843384,
9008
+ "rewards/rejected": -0.34160953760147095,
9009
+ "step": 5990
9010
+ },
9011
+ {
9012
+ "epoch": 30.4253164556962,
9013
+ "grad_norm": 247123.70966936232,
9014
+ "learning_rate": 8.555311814478219e-08,
9015
+ "logits/chosen": -1.4638581275939941,
9016
+ "logits/rejected": -1.6560137271881104,
9017
+ "logps/chosen": -27.02420425415039,
9018
+ "logps/rejected": -579.2672119140625,
9019
+ "loss": 12743.5711,
9020
+ "rewards/accuracies": 0.987500011920929,
9021
+ "rewards/chosen": 0.20767991244792938,
9022
+ "rewards/margins": 0.5485936403274536,
9023
+ "rewards/rejected": -0.3409137427806854,
9024
+ "step": 6000
9025
+ },
9026
+ {
9027
+ "epoch": 30.475949367088607,
9028
+ "grad_norm": 152247.200364489,
9029
+ "learning_rate": 8.476966468191789e-08,
9030
+ "logits/chosen": -1.2626516819000244,
9031
+ "logits/rejected": -1.3656198978424072,
9032
+ "logps/chosen": -30.586597442626953,
9033
+ "logps/rejected": -564.9951782226562,
9034
+ "loss": 12138.7328,
9035
+ "rewards/accuracies": 0.987500011920929,
9036
+ "rewards/chosen": 0.19354796409606934,
9037
+ "rewards/margins": 0.5365390181541443,
9038
+ "rewards/rejected": -0.34299105405807495,
9039
+ "step": 6010
9040
+ },
9041
+ {
9042
+ "epoch": 30.526582278481012,
9043
+ "grad_norm": 153551.3953399981,
9044
+ "learning_rate": 8.398621121905358e-08,
9045
+ "logits/chosen": -0.8625293970108032,
9046
+ "logits/rejected": -1.6173267364501953,
9047
+ "logps/chosen": -23.908416748046875,
9048
+ "logps/rejected": -591.5709228515625,
9049
+ "loss": 12247.5672,
9050
+ "rewards/accuracies": 1.0,
9051
+ "rewards/chosen": 0.2063622921705246,
9052
+ "rewards/margins": 0.5622067451477051,
9053
+ "rewards/rejected": -0.3558444678783417,
9054
+ "step": 6020
9055
+ },
9056
+ {
9057
+ "epoch": 30.577215189873417,
9058
+ "grad_norm": 247558.34356145174,
9059
+ "learning_rate": 8.320275775618927e-08,
9060
+ "logits/chosen": -0.6456829309463501,
9061
+ "logits/rejected": -0.25254157185554504,
9062
+ "logps/chosen": -30.65958023071289,
9063
+ "logps/rejected": -572.8914794921875,
9064
+ "loss": 11907.7406,
9065
+ "rewards/accuracies": 0.987500011920929,
9066
+ "rewards/chosen": 0.20363232493400574,
9067
+ "rewards/margins": 0.5450500845909119,
9068
+ "rewards/rejected": -0.34141772985458374,
9069
+ "step": 6030
9070
+ },
9071
+ {
9072
+ "epoch": 30.627848101265823,
9073
+ "grad_norm": 162045.80468301394,
9074
+ "learning_rate": 8.241930429332496e-08,
9075
+ "logits/chosen": 0.5193571448326111,
9076
+ "logits/rejected": 1.0150249004364014,
9077
+ "logps/chosen": -21.961605072021484,
9078
+ "logps/rejected": -586.28369140625,
9079
+ "loss": 11870.2594,
9080
+ "rewards/accuracies": 1.0,
9081
+ "rewards/chosen": 0.20082764327526093,
9082
+ "rewards/margins": 0.5631116032600403,
9083
+ "rewards/rejected": -0.36228394508361816,
9084
+ "step": 6040
9085
+ },
9086
+ {
9087
+ "epoch": 30.678481012658228,
9088
+ "grad_norm": 183677.73161043233,
9089
+ "learning_rate": 8.163585083046067e-08,
9090
+ "logits/chosen": -2.2690348625183105,
9091
+ "logits/rejected": -1.8725353479385376,
9092
+ "logps/chosen": -34.20100021362305,
9093
+ "logps/rejected": -571.948486328125,
9094
+ "loss": 11952.7477,
9095
+ "rewards/accuracies": 0.9750000238418579,
9096
+ "rewards/chosen": 0.2089938223361969,
9097
+ "rewards/margins": 0.542193591594696,
9098
+ "rewards/rejected": -0.33319979906082153,
9099
+ "step": 6050
9100
+ },
9101
+ {
9102
+ "epoch": 30.729113924050633,
9103
+ "grad_norm": 206299.51509471133,
9104
+ "learning_rate": 8.085239736759636e-08,
9105
+ "logits/chosen": -1.8886245489120483,
9106
+ "logits/rejected": -1.428289532661438,
9107
+ "logps/chosen": -33.442771911621094,
9108
+ "logps/rejected": -577.7710571289062,
9109
+ "loss": 12094.3477,
9110
+ "rewards/accuracies": 0.9750000238418579,
9111
+ "rewards/chosen": 0.20627331733703613,
9112
+ "rewards/margins": 0.5446707606315613,
9113
+ "rewards/rejected": -0.3383975028991699,
9114
+ "step": 6060
9115
+ },
9116
+ {
9117
+ "epoch": 30.77974683544304,
9118
+ "grad_norm": 178455.47052363763,
9119
+ "learning_rate": 8.006894390473206e-08,
9120
+ "logits/chosen": -0.9159374237060547,
9121
+ "logits/rejected": -0.5700797438621521,
9122
+ "logps/chosen": -22.003402709960938,
9123
+ "logps/rejected": -588.1246948242188,
9124
+ "loss": 12967.7109,
9125
+ "rewards/accuracies": 1.0,
9126
+ "rewards/chosen": 0.20624502003192902,
9127
+ "rewards/margins": 0.5602100491523743,
9128
+ "rewards/rejected": -0.35396507382392883,
9129
+ "step": 6070
9130
+ },
9131
+ {
9132
+ "epoch": 30.830379746835444,
9133
+ "grad_norm": 188994.549896938,
9134
+ "learning_rate": 7.928549044186775e-08,
9135
+ "logits/chosen": -2.218046188354492,
9136
+ "logits/rejected": -2.298725128173828,
9137
+ "logps/chosen": -36.601036071777344,
9138
+ "logps/rejected": -578.4909057617188,
9139
+ "loss": 11942.907,
9140
+ "rewards/accuracies": 0.987500011920929,
9141
+ "rewards/chosen": 0.20413891971111298,
9142
+ "rewards/margins": 0.5424162149429321,
9143
+ "rewards/rejected": -0.33827728033065796,
9144
+ "step": 6080
9145
+ },
9146
+ {
9147
+ "epoch": 30.88101265822785,
9148
+ "grad_norm": 226618.0916543629,
9149
+ "learning_rate": 7.850203697900344e-08,
9150
+ "logits/chosen": -0.8958581686019897,
9151
+ "logits/rejected": -0.3350396454334259,
9152
+ "logps/chosen": -27.914409637451172,
9153
+ "logps/rejected": -584.2742919921875,
9154
+ "loss": 12020.4094,
9155
+ "rewards/accuracies": 0.987500011920929,
9156
+ "rewards/chosen": 0.2059321403503418,
9157
+ "rewards/margins": 0.5559757947921753,
9158
+ "rewards/rejected": -0.3500436246395111,
9159
+ "step": 6090
9160
+ },
9161
+ {
9162
+ "epoch": 30.931645569620255,
9163
+ "grad_norm": 193720.76624447017,
9164
+ "learning_rate": 7.771858351613913e-08,
9165
+ "logits/chosen": -0.13203875720500946,
9166
+ "logits/rejected": -0.22968029975891113,
9167
+ "logps/chosen": -25.164508819580078,
9168
+ "logps/rejected": -573.2855224609375,
9169
+ "loss": 12096.6344,
9170
+ "rewards/accuracies": 0.987500011920929,
9171
+ "rewards/chosen": 0.1958034336566925,
9172
+ "rewards/margins": 0.5465744137763977,
9173
+ "rewards/rejected": -0.3507709503173828,
9174
+ "step": 6100
9175
+ },
9176
+ {
9177
+ "epoch": 30.98227848101266,
9178
+ "grad_norm": 177238.22636435836,
9179
+ "learning_rate": 7.693513005327483e-08,
9180
+ "logits/chosen": -2.0759382247924805,
9181
+ "logits/rejected": -1.4708411693572998,
9182
+ "logps/chosen": -28.522485733032227,
9183
+ "logps/rejected": -573.73681640625,
9184
+ "loss": 12017.775,
9185
+ "rewards/accuracies": 0.987500011920929,
9186
+ "rewards/chosen": 0.20944443345069885,
9187
+ "rewards/margins": 0.5498504042625427,
9188
+ "rewards/rejected": -0.34040600061416626,
9189
+ "step": 6110
9190
  }
9191
  ],
9192
  "logging_steps": 10,