cat-searcher commited on
Commit
f90c7f7
·
verified ·
1 Parent(s): f3806d1

Training in progress, epoch 24, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step4928/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step4928/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step4928/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step4928/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step4928/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step4928/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step4928/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step4928/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step4928/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step4928/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step4928/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step4928/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step4928/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step4928/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step4928/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step4928/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step4928/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a57a9a23ef7f096514a7c23cd3b6dd9320c85781d700c34b4b22f2c8df92a75
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021f57078c9044b2309021b4fc20bdc862c548034ebebb7a1e2c77c8eaae5d09
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19eaeba11c0ceb2532ddfd732a74b89769b0567f5c2480d566b19b72c9fd6a80
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5c93da09fbfeb06fc19b3cef49ab91db0e725035444d6cdb96809b8b9eadcb
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00ffef8eefa8f8358ed1c6f61d654b20c27eddbfe0dd82830a9f20e16757f85
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10d54de0cd548e613485ee0e69da7d813ee80b2317569160958e2e4574aa72b
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:626e1b55e0f1d6f3ce34dcade96d7fba40f0099032be2cd622d3062d9fca2e0e
3
+ size 2506176112
last-checkpoint/global_step4928/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb896c66c8c21d39353a007d4c2ac348fc8f8b10134c68b5313b9e83069fd695
3
+ size 2506176112
last-checkpoint/global_step4928/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84194c554e1ba6e218f3d9df508f289e525895d9d44dbdbafac31ef0e28aa543
3
+ size 85570
last-checkpoint/global_step4928/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9ecac1033adac02f1e567dee0188e38f517d7c2675ace09591182ee269512c
3
+ size 85506
last-checkpoint/global_step4928/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c04da8f7351dc76a8b4e781d08009e5e26ec0026e8092fb4066e4ee4c0c017
3
+ size 85506
last-checkpoint/global_step4928/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52caac51b5ab59ce3fdf3092538f84272c220a80081cc21f22f230699d5152d1
3
+ size 85506
last-checkpoint/global_step4928/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15beab398fd58f2a629c4c60d768da129efbcfc290c667ba33bdf6fd1245d530
3
+ size 85506
last-checkpoint/global_step4928/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc28cb444e8b65d936deba8cb0b52f072349842d1308d11e82c4aed0769e1934
3
+ size 85506
last-checkpoint/global_step4928/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350b51963f22fad0c545de65665496d3aeea7062c1ab553cc246600f06963c6a
3
+ size 85506
last-checkpoint/global_step4928/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b41ed59377277c212831b6ecb28e620914d57efeefc205f64c9c84a7c32c26
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4731
 
1
+ global_step4928
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c0fa0a03d21a0ff6b27eafad6ffcf212788b879afb1bb9961a5a085f6a29e4a
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9da2b7dcfa00a88a71ba39f69087d6c106b9fc3502f080f4fdf0c95ba7f0232
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b10af69a716125a68d0b4044a64aaba3019edc45d92d331681815aa21d3cbf
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66272c39e510a03fae4a8b8c2051165d69a38abcede58508cbcdf95984247125
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de1e7081b4b7ecbe80a0244c117feb164a6edd889266c562a7b550e4bdb35eb2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6118faeaee4fdfaa13ac70fa72cfa116964f5a3b96fac7723d08df58a38b397c
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abeabad207497496517e104a739bcf618c55f0f5f24ab84280d26e5a138129b9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dfa684751cc5b0d0c6c20837671c3f615d1dea15a14f377cd3f328d98685be6
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbfeffde7de6926c029ae5ce615a2206c5f1f7b6d3ba91813e5f083de4672d94
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6288d8200e463b5ae8ad06a62ffa8cc379f48c86fd01515a13181f976efa71a
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fe308403a8a8a99e9433e74b57e10647861653f3902c0d37d8ffd57267868ef
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd3d0484a9201a8efbd51326113ef6b44a0b272cfa40529d39c56a9d67deac8e
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5559dcd2db39330772a1e035b47ab90a38c1fad876c0cc77298cb150d7c792b0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426c6fc3fcaae6fad4fdb83d268d91ed94e3e0e88bed483e185a78e86a9692b3
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5e601f488ab4088a4b5f2b2eaf592f26d9277d818c0ccce4b09ffc93af9b671
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d089430782ecc3b7446f527601b569a4990fdd9f8d96c18c87dc8d503cbdb70
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:948cff0d258b1f24171ea15c9fd1db43d638e0bf5726b3462453de79d891e501
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61f73069ccc96351c7253c2334d20c170e92750457b685c9d79286d288292e9
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e1437c26fc61851ea1c51c8dbde816a8f21ba5abf80812e00135e1e418db89c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e70ddcc8f9ba6c0be1ac8d1ba2f06e8a4253e0a843f3a263a85c073416ca948d
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7afe10276133c7ad9d69e4ce5223f876b6c12764d68dc657b0c96f5afe271edc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0dea9c7426022052c6904d669ed6537aacbe6f69c00de710577f64daa74c6b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 24.0,
5
  "eval_steps": 100,
6
- "global_step": 4731,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7117,6 +7117,291 @@
7117
  "rewards/margins": 0.5577437877655029,
7118
  "rewards/rejected": -0.3512403070926666,
7119
  "step": 4730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7120
  }
7121
  ],
7122
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 24.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 4928,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7117
  "rewards/margins": 0.5577437877655029,
7118
  "rewards/rejected": -0.3512403070926666,
7119
  "step": 4730
7120
+ },
7121
+ {
7122
+ "epoch": 24.045569620253165,
7123
+ "grad_norm": 627241.9523500776,
7124
+ "learning_rate": 1.8426825446568473e-07,
7125
+ "logits/chosen": -0.28766584396362305,
7126
+ "logits/rejected": -0.6269916296005249,
7127
+ "logps/chosen": -44.65516662597656,
7128
+ "logps/rejected": -577.6954345703125,
7129
+ "loss": 12950.232,
7130
+ "rewards/accuracies": 1.0,
7131
+ "rewards/chosen": 0.20441746711730957,
7132
+ "rewards/margins": 0.5374675393104553,
7133
+ "rewards/rejected": -0.33305004239082336,
7134
+ "step": 4740
7135
+ },
7136
+ {
7137
+ "epoch": 24.09620253164557,
7138
+ "grad_norm": 347781.48168387014,
7139
+ "learning_rate": 1.8348480100282043e-07,
7140
+ "logits/chosen": -0.7881828546524048,
7141
+ "logits/rejected": 0.06337795406579971,
7142
+ "logps/chosen": -27.82383155822754,
7143
+ "logps/rejected": -579.8468017578125,
7144
+ "loss": 12986.6266,
7145
+ "rewards/accuracies": 1.0,
7146
+ "rewards/chosen": 0.2079242467880249,
7147
+ "rewards/margins": 0.5530039668083191,
7148
+ "rewards/rejected": -0.3450797498226166,
7149
+ "step": 4750
7150
+ },
7151
+ {
7152
+ "epoch": 24.146835443037975,
7153
+ "grad_norm": 263636.0742414822,
7154
+ "learning_rate": 1.827013475399561e-07,
7155
+ "logits/chosen": -1.2877452373504639,
7156
+ "logits/rejected": -0.24622194468975067,
7157
+ "logps/chosen": -30.940113067626953,
7158
+ "logps/rejected": -566.517333984375,
7159
+ "loss": 12716.3281,
7160
+ "rewards/accuracies": 0.987500011920929,
7161
+ "rewards/chosen": 0.20519797503948212,
7162
+ "rewards/margins": 0.5418139696121216,
7163
+ "rewards/rejected": -0.33661606907844543,
7164
+ "step": 4760
7165
+ },
7166
+ {
7167
+ "epoch": 24.19746835443038,
7168
+ "grad_norm": 343217.0970891512,
7169
+ "learning_rate": 1.8191789407709182e-07,
7170
+ "logits/chosen": -1.0294177532196045,
7171
+ "logits/rejected": -0.4815802574157715,
7172
+ "logps/chosen": -31.3253231048584,
7173
+ "logps/rejected": -560.1129150390625,
7174
+ "loss": 13408.0094,
7175
+ "rewards/accuracies": 0.9750000238418579,
7176
+ "rewards/chosen": 0.19244466722011566,
7177
+ "rewards/margins": 0.5293484926223755,
7178
+ "rewards/rejected": -0.3369038701057434,
7179
+ "step": 4770
7180
+ },
7181
+ {
7182
+ "epoch": 24.248101265822786,
7183
+ "grad_norm": 504214.9301429895,
7184
+ "learning_rate": 1.811344406142275e-07,
7185
+ "logits/chosen": -0.604789137840271,
7186
+ "logits/rejected": 0.22590136528015137,
7187
+ "logps/chosen": -41.899864196777344,
7188
+ "logps/rejected": -555.2015380859375,
7189
+ "loss": 13531.9625,
7190
+ "rewards/accuracies": 0.9750000238418579,
7191
+ "rewards/chosen": 0.190764918923378,
7192
+ "rewards/margins": 0.5197011232376099,
7193
+ "rewards/rejected": -0.32893624901771545,
7194
+ "step": 4780
7195
+ },
7196
+ {
7197
+ "epoch": 24.29873417721519,
7198
+ "grad_norm": 376293.61129873747,
7199
+ "learning_rate": 1.803509871513632e-07,
7200
+ "logits/chosen": -0.3547658324241638,
7201
+ "logits/rejected": -0.13969659805297852,
7202
+ "logps/chosen": -28.85129737854004,
7203
+ "logps/rejected": -577.5880126953125,
7204
+ "loss": 12898.8125,
7205
+ "rewards/accuracies": 1.0,
7206
+ "rewards/chosen": 0.19991345703601837,
7207
+ "rewards/margins": 0.5475735664367676,
7208
+ "rewards/rejected": -0.3476601243019104,
7209
+ "step": 4790
7210
+ },
7211
+ {
7212
+ "epoch": 24.349367088607593,
7213
+ "grad_norm": 414135.66871189536,
7214
+ "learning_rate": 1.7956753368849888e-07,
7215
+ "logits/chosen": 0.09798486530780792,
7216
+ "logits/rejected": 0.8055311441421509,
7217
+ "logps/chosen": -34.26675796508789,
7218
+ "logps/rejected": -584.9435424804688,
7219
+ "loss": 12507.7328,
7220
+ "rewards/accuracies": 1.0,
7221
+ "rewards/chosen": 0.20852184295654297,
7222
+ "rewards/margins": 0.5590152144432068,
7223
+ "rewards/rejected": -0.3504934012889862,
7224
+ "step": 4800
7225
+ },
7226
+ {
7227
+ "epoch": 24.4,
7228
+ "grad_norm": 343912.9213203651,
7229
+ "learning_rate": 1.787840802256346e-07,
7230
+ "logits/chosen": -0.12874791026115417,
7231
+ "logits/rejected": 0.15529172122478485,
7232
+ "logps/chosen": -32.1009635925293,
7233
+ "logps/rejected": -574.7623291015625,
7234
+ "loss": 13372.5594,
7235
+ "rewards/accuracies": 0.987500011920929,
7236
+ "rewards/chosen": 0.1985681653022766,
7237
+ "rewards/margins": 0.546955943107605,
7238
+ "rewards/rejected": -0.34838777780532837,
7239
+ "step": 4810
7240
+ },
7241
+ {
7242
+ "epoch": 24.450632911392404,
7243
+ "grad_norm": 697134.5088322331,
7244
+ "learning_rate": 1.7800062676277027e-07,
7245
+ "logits/chosen": 1.3246450424194336,
7246
+ "logits/rejected": 1.760595679283142,
7247
+ "logps/chosen": -32.29949188232422,
7248
+ "logps/rejected": -575.516357421875,
7249
+ "loss": 12415.9672,
7250
+ "rewards/accuracies": 0.987500011920929,
7251
+ "rewards/chosen": 0.19931714236736298,
7252
+ "rewards/margins": 0.5466721057891846,
7253
+ "rewards/rejected": -0.3473549485206604,
7254
+ "step": 4820
7255
+ },
7256
+ {
7257
+ "epoch": 24.50126582278481,
7258
+ "grad_norm": 389676.90431780973,
7259
+ "learning_rate": 1.7721717329990597e-07,
7260
+ "logits/chosen": -1.3381322622299194,
7261
+ "logits/rejected": -0.6404735445976257,
7262
+ "logps/chosen": -34.635719299316406,
7263
+ "logps/rejected": -587.3080444335938,
7264
+ "loss": 13101.6227,
7265
+ "rewards/accuracies": 1.0,
7266
+ "rewards/chosen": 0.20362886786460876,
7267
+ "rewards/margins": 0.5546956658363342,
7268
+ "rewards/rejected": -0.3510667383670807,
7269
+ "step": 4830
7270
+ },
7271
+ {
7272
+ "epoch": 24.551898734177215,
7273
+ "grad_norm": 314600.5637340995,
7274
+ "learning_rate": 1.7643371983704165e-07,
7275
+ "logits/chosen": 0.9953921437263489,
7276
+ "logits/rejected": 0.9643779993057251,
7277
+ "logps/chosen": -30.646331787109375,
7278
+ "logps/rejected": -570.855712890625,
7279
+ "loss": 12974.9633,
7280
+ "rewards/accuracies": 0.9750000238418579,
7281
+ "rewards/chosen": 0.19645583629608154,
7282
+ "rewards/margins": 0.5343093872070312,
7283
+ "rewards/rejected": -0.3378535211086273,
7284
+ "step": 4840
7285
+ },
7286
+ {
7287
+ "epoch": 24.60253164556962,
7288
+ "grad_norm": 327013.6839426029,
7289
+ "learning_rate": 1.7565026637417739e-07,
7290
+ "logits/chosen": -0.7217426300048828,
7291
+ "logits/rejected": -0.7290517091751099,
7292
+ "logps/chosen": -37.666648864746094,
7293
+ "logps/rejected": -563.5865478515625,
7294
+ "loss": 13273.0266,
7295
+ "rewards/accuracies": 0.9624999761581421,
7296
+ "rewards/chosen": 0.197954460978508,
7297
+ "rewards/margins": 0.5282526612281799,
7298
+ "rewards/rejected": -0.33029812574386597,
7299
+ "step": 4850
7300
+ },
7301
+ {
7302
+ "epoch": 24.653164556962025,
7303
+ "grad_norm": 425662.97201424866,
7304
+ "learning_rate": 1.7486681291131307e-07,
7305
+ "logits/chosen": -0.32900291681289673,
7306
+ "logits/rejected": 0.18864622712135315,
7307
+ "logps/chosen": -32.36582565307617,
7308
+ "logps/rejected": -566.5547485351562,
7309
+ "loss": 13350.5594,
7310
+ "rewards/accuracies": 0.9750000238418579,
7311
+ "rewards/chosen": 0.19610336422920227,
7312
+ "rewards/margins": 0.5321984887123108,
7313
+ "rewards/rejected": -0.3360951244831085,
7314
+ "step": 4860
7315
+ },
7316
+ {
7317
+ "epoch": 24.70379746835443,
7318
+ "grad_norm": 402351.7657170625,
7319
+ "learning_rate": 1.7408335944844877e-07,
7320
+ "logits/chosen": -2.112635850906372,
7321
+ "logits/rejected": -1.4337832927703857,
7322
+ "logps/chosen": -36.27765655517578,
7323
+ "logps/rejected": -588.708740234375,
7324
+ "loss": 13690.0375,
7325
+ "rewards/accuracies": 1.0,
7326
+ "rewards/chosen": 0.20721419155597687,
7327
+ "rewards/margins": 0.5532296299934387,
7328
+ "rewards/rejected": -0.34601545333862305,
7329
+ "step": 4870
7330
+ },
7331
+ {
7332
+ "epoch": 24.754430379746836,
7333
+ "grad_norm": 357871.7969812476,
7334
+ "learning_rate": 1.7329990598558445e-07,
7335
+ "logits/chosen": -0.34599849581718445,
7336
+ "logits/rejected": 0.0005200624582357705,
7337
+ "logps/chosen": -28.50592041015625,
7338
+ "logps/rejected": -560.6458740234375,
7339
+ "loss": 13133.4,
7340
+ "rewards/accuracies": 0.987500011920929,
7341
+ "rewards/chosen": 0.1971847414970398,
7342
+ "rewards/margins": 0.5326961278915405,
7343
+ "rewards/rejected": -0.33551135659217834,
7344
+ "step": 4880
7345
+ },
7346
+ {
7347
+ "epoch": 24.80506329113924,
7348
+ "grad_norm": 610016.4055058825,
7349
+ "learning_rate": 1.7251645252272016e-07,
7350
+ "logits/chosen": -1.225339651107788,
7351
+ "logits/rejected": -0.8243592977523804,
7352
+ "logps/chosen": -34.17732620239258,
7353
+ "logps/rejected": -581.1436767578125,
7354
+ "loss": 12571.1,
7355
+ "rewards/accuracies": 1.0,
7356
+ "rewards/chosen": 0.20419716835021973,
7357
+ "rewards/margins": 0.5520066022872925,
7358
+ "rewards/rejected": -0.347809374332428,
7359
+ "step": 4890
7360
+ },
7361
+ {
7362
+ "epoch": 24.855696202531647,
7363
+ "grad_norm": 379732.159808034,
7364
+ "learning_rate": 1.7173299905985584e-07,
7365
+ "logits/chosen": -1.817439317703247,
7366
+ "logits/rejected": -1.4895861148834229,
7367
+ "logps/chosen": -30.775598526000977,
7368
+ "logps/rejected": -570.484130859375,
7369
+ "loss": 13212.3781,
7370
+ "rewards/accuracies": 0.9750000238418579,
7371
+ "rewards/chosen": 0.20220620930194855,
7372
+ "rewards/margins": 0.5392990112304688,
7373
+ "rewards/rejected": -0.3370928466320038,
7374
+ "step": 4900
7375
+ },
7376
+ {
7377
+ "epoch": 24.906329113924052,
7378
+ "grad_norm": 324902.93726753094,
7379
+ "learning_rate": 1.7094954559699154e-07,
7380
+ "logits/chosen": -0.6947388648986816,
7381
+ "logits/rejected": -0.4560522437095642,
7382
+ "logps/chosen": -40.327796936035156,
7383
+ "logps/rejected": -581.7532348632812,
7384
+ "loss": 12891.9016,
7385
+ "rewards/accuracies": 0.987500011920929,
7386
+ "rewards/chosen": 0.20357458293437958,
7387
+ "rewards/margins": 0.5383836030960083,
7388
+ "rewards/rejected": -0.33480900526046753,
7389
+ "step": 4910
7390
+ },
7391
+ {
7392
+ "epoch": 24.956962025316457,
7393
+ "grad_norm": 294429.7500390618,
7394
+ "learning_rate": 1.7016609213412722e-07,
7395
+ "logits/chosen": -0.9121583104133606,
7396
+ "logits/rejected": 0.40684938430786133,
7397
+ "logps/chosen": -28.22664451599121,
7398
+ "logps/rejected": -580.9795532226562,
7399
+ "loss": 13184.5812,
7400
+ "rewards/accuracies": 1.0,
7401
+ "rewards/chosen": 0.20078356564044952,
7402
+ "rewards/margins": 0.5547462701797485,
7403
+ "rewards/rejected": -0.3539626896381378,
7404
+ "step": 4920
7405
  }
7406
  ],
7407
  "logging_steps": 10,