cat-searcher commited on
Commit
b3e3fde
1 Parent(s): 689f398

Training in progress, epoch 32, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step6311/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step6311/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step6311/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step6311/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step6311/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step6311/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step6311/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step6311/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step6311/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step6311/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step6311/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step6311/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step6311/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step6311/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step6311/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step6311/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step6311/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1638964301fb436493919c9fca6ca0b24757bffcb3ab3631c4318b6d2c5c8da
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e67296286fe1995c5e46e13fe58ea336b5760742373cd74308f087993422e9
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3533e6d8ea544fc4ac3fea983acae782fc11c435392c67fd45efbba66afe7076
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c3852775e4256033ec3bbb1ed5e12a1ab01d36b2e7c7dfa237a1096da618a7c
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f72ffbe1117371c6d0b90a927c2bd5e0ca456a7c586cc30dfe0092af8d9ecb16
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:537bbcb71f2ca58a96edea530f6b5f05ed36f594640ad73ba45da2c4ba63a4ac
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c714754e6c8f8ef3f81965b74c1d37905a2dcd11a4942468b05dada83c63829e
3
+ size 2506176112
last-checkpoint/global_step6311/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0800d4f8b21fa8588dc1d10c8c2a4f8f3f905343d12a9ae3a6fca9ba22f61e
3
+ size 2506176112
last-checkpoint/global_step6311/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af698d0c6a7367349e9bb5f732a10dbe3e65bd62e6f7c19dceaa8c7eb53d63f1
3
+ size 85570
last-checkpoint/global_step6311/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:858e5e94768ad6f67b1fb3868dd116f4d8883c9770589981d581b40df4a97098
3
+ size 85506
last-checkpoint/global_step6311/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d203316b3f06902043bcb35792974b70921fce39bb38b45cd7cd2e8559d7b3d
3
+ size 85506
last-checkpoint/global_step6311/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc85a051658c2e2cf8843f05040e6207f3a9824e6eb392914d967306e1cc2d7
3
+ size 85506
last-checkpoint/global_step6311/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff794e200e434a6beb015ae1d4143bf6d1148d6c47b39da39e72e337662878a2
3
+ size 85506
last-checkpoint/global_step6311/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb9529a96a97431f16deb193473953e218d01c6f842bdfcfa6f37707a22cc88
3
+ size 85506
last-checkpoint/global_step6311/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faa1b1523feff7fa82521ad7403b9e750238f0dc56ba883bfe6bd9a5cb05a21b
3
+ size 85506
last-checkpoint/global_step6311/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2372cf41d29fbbd2d4204983371bbdc350d5ee42cc07209ffbeea2c756d98ff0
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step6113
 
1
+ global_step6311
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a98e86a0252e0053a50f39ee5da1bc91fc6cc770c00bdf4879a07beb1bfb774
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4166953b305647bfe20e1bbeb15a19c28014fe214420eadf6709470c2313a3
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90d998420cc3a03c1798986731cd33f00d6a7cd27ade984be3af7b6e9ba76532
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ab1e0ab13e8465a7f0033de41b00c3ef85f0686c27f26ccedd41583cf589de
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac3887e0b6e47ee941f0099109d55fe8c4958125034b41513ac29b0a09c9e86
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2aa29a1a0077819b6e6c9858b2870298aef9379363892f7f4a488e66c5bd38
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de0272654fc4c603ac86d1e7e8bda566a95de507f7e48193f810ed9d664b308b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291da2a884d82312ccaa711af98beaa52d4f41499984405613bb21b5148565a2
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09a612c5aff75737f917d89b930ca876fddca4236d73b7c4e2122a374a8c7279
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bbb7042adc0fc2a1ac7bf08c10090a51f5d0491c80cb36a3e5f1380294d890
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15d130c7a2df3aa45929d007a20cc7aee23015b41b9d24357c85880be9954c25
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f0dc59a31366ea078c9fd119c2fc25b442df27e88be4fecef8251ec325566a
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c0852f2ccb050a5008bb4e71f4b3a6ea8b845e136589936cf10d03133438507
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26207c8fd2f08c67b253e0d4b8b67c73971a41b870b2d0d9e52e1388c89513b5
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cad3ab70f668806214a1537af9f7d2fc6de4401f7eec2f544f9059052862cb5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac396bf6594a215db06382f8b0dfcb2360c2c6b1a95d8150fec6afd800852574
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57dc573e9fb6a6cc583ada6a4737e432c792b6361f3eb47de697d69b75f09d21
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2edf36543f3b923b139f3541d6d94d5a2d50de85da08b18fa6867198430e57c
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c46a2fd5508f6b24b7fd294db0c5ae6e928c7174f4e3caeeeeca276dd88690c5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f96eea372344884b68478842af038d9832fda66fa5d46d28035e601a7834efd4
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12fdc5d527314ffb69c899c4aaadd4582bc4cb2067e14fecb7c5f8dd01262d7f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac6a779be83142d0fb2f1a0e7950874d368e5e14f0bcab557da3d6573142406
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 30.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 6113,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9187,6 +9187,306 @@
9187
  "rewards/margins": 0.5498504042625427,
9188
  "rewards/rejected": -0.34040600061416626,
9189
  "step": 6110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9190
  }
9191
  ],
9192
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 32.0,
5
  "eval_steps": 100,
6
+ "global_step": 6311,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9187
  "rewards/margins": 0.5498504042625427,
9188
  "rewards/rejected": -0.34040600061416626,
9189
  "step": 6110
9190
+ },
9191
+ {
9192
+ "epoch": 31.03291139240506,
9193
+ "grad_norm": 111295.73044950665,
9194
+ "learning_rate": 7.615167659041052e-08,
9195
+ "logits/chosen": -0.7748550772666931,
9196
+ "logits/rejected": -0.973538875579834,
9197
+ "logps/chosen": -31.6827335357666,
9198
+ "logps/rejected": -594.8641357421875,
9199
+ "loss": 11721.4203,
9200
+ "rewards/accuracies": 0.987500011920929,
9201
+ "rewards/chosen": 0.20987281203269958,
9202
+ "rewards/margins": 0.5614258050918579,
9203
+ "rewards/rejected": -0.3515530228614807,
9204
+ "step": 6120
9205
+ },
9206
+ {
9207
+ "epoch": 31.083544303797467,
9208
+ "grad_norm": 132943.3056647964,
9209
+ "learning_rate": 7.536822312754621e-08,
9210
+ "logits/chosen": -2.017181396484375,
9211
+ "logits/rejected": -1.8383163213729858,
9212
+ "logps/chosen": -32.51802062988281,
9213
+ "logps/rejected": -609.6942138671875,
9214
+ "loss": 12392.7875,
9215
+ "rewards/accuracies": 1.0,
9216
+ "rewards/chosen": 0.22407253086566925,
9217
+ "rewards/margins": 0.582473874092102,
9218
+ "rewards/rejected": -0.3584012985229492,
9219
+ "step": 6130
9220
+ },
9221
+ {
9222
+ "epoch": 31.134177215189872,
9223
+ "grad_norm": 174931.96319021285,
9224
+ "learning_rate": 7.45847696646819e-08,
9225
+ "logits/chosen": -0.5535727143287659,
9226
+ "logits/rejected": 0.6218046545982361,
9227
+ "logps/chosen": -26.1910457611084,
9228
+ "logps/rejected": -551.5840454101562,
9229
+ "loss": 11699.3109,
9230
+ "rewards/accuracies": 0.987500011920929,
9231
+ "rewards/chosen": 0.20296287536621094,
9232
+ "rewards/margins": 0.5298973917961121,
9233
+ "rewards/rejected": -0.3269345760345459,
9234
+ "step": 6140
9235
+ },
9236
+ {
9237
+ "epoch": 31.184810126582278,
9238
+ "grad_norm": 168688.32644125135,
9239
+ "learning_rate": 7.380131620181761e-08,
9240
+ "logits/chosen": -1.008988618850708,
9241
+ "logits/rejected": -0.2778696119785309,
9242
+ "logps/chosen": -33.33096694946289,
9243
+ "logps/rejected": -607.976806640625,
9244
+ "loss": 11916.4016,
9245
+ "rewards/accuracies": 0.987500011920929,
9246
+ "rewards/chosen": 0.21690383553504944,
9247
+ "rewards/margins": 0.5754967331886292,
9248
+ "rewards/rejected": -0.3585929274559021,
9249
+ "step": 6150
9250
+ },
9251
+ {
9252
+ "epoch": 31.235443037974683,
9253
+ "grad_norm": 94661.132576451,
9254
+ "learning_rate": 7.30178627389533e-08,
9255
+ "logits/chosen": -3.0997250080108643,
9256
+ "logits/rejected": -2.1219401359558105,
9257
+ "logps/chosen": -27.209686279296875,
9258
+ "logps/rejected": -589.7662353515625,
9259
+ "loss": 12111.9188,
9260
+ "rewards/accuracies": 1.0,
9261
+ "rewards/chosen": 0.21759450435638428,
9262
+ "rewards/margins": 0.5674911737442017,
9263
+ "rewards/rejected": -0.3498966693878174,
9264
+ "step": 6160
9265
+ },
9266
+ {
9267
+ "epoch": 31.28607594936709,
9268
+ "grad_norm": 129537.98682999605,
9269
+ "learning_rate": 7.2234409276089e-08,
9270
+ "logits/chosen": -2.1777210235595703,
9271
+ "logits/rejected": -2.1664652824401855,
9272
+ "logps/chosen": -29.21515464782715,
9273
+ "logps/rejected": -575.5145263671875,
9274
+ "loss": 12396.4562,
9275
+ "rewards/accuracies": 0.987500011920929,
9276
+ "rewards/chosen": 0.20168697834014893,
9277
+ "rewards/margins": 0.5468615293502808,
9278
+ "rewards/rejected": -0.3451746106147766,
9279
+ "step": 6170
9280
+ },
9281
+ {
9282
+ "epoch": 31.336708860759494,
9283
+ "grad_norm": 146320.37748909468,
9284
+ "learning_rate": 7.145095581322469e-08,
9285
+ "logits/chosen": -0.37119048833847046,
9286
+ "logits/rejected": -0.12678974866867065,
9287
+ "logps/chosen": -27.464313507080078,
9288
+ "logps/rejected": -583.199462890625,
9289
+ "loss": 12035.1789,
9290
+ "rewards/accuracies": 1.0,
9291
+ "rewards/chosen": 0.20687448978424072,
9292
+ "rewards/margins": 0.5559764504432678,
9293
+ "rewards/rejected": -0.3491020202636719,
9294
+ "step": 6180
9295
+ },
9296
+ {
9297
+ "epoch": 31.3873417721519,
9298
+ "grad_norm": 123464.43072965978,
9299
+ "learning_rate": 7.066750235036038e-08,
9300
+ "logits/chosen": -1.114485740661621,
9301
+ "logits/rejected": -0.36546590924263,
9302
+ "logps/chosen": -24.96463394165039,
9303
+ "logps/rejected": -573.1627197265625,
9304
+ "loss": 12102.0078,
9305
+ "rewards/accuracies": 1.0,
9306
+ "rewards/chosen": 0.2025957852602005,
9307
+ "rewards/margins": 0.5483575463294983,
9308
+ "rewards/rejected": -0.3457617163658142,
9309
+ "step": 6190
9310
+ },
9311
+ {
9312
+ "epoch": 31.437974683544304,
9313
+ "grad_norm": 182155.23164206932,
9314
+ "learning_rate": 6.988404888749608e-08,
9315
+ "logits/chosen": -1.7520939111709595,
9316
+ "logits/rejected": -1.4854246377944946,
9317
+ "logps/chosen": -29.002777099609375,
9318
+ "logps/rejected": -592.4381713867188,
9319
+ "loss": 11423.6828,
9320
+ "rewards/accuracies": 1.0,
9321
+ "rewards/chosen": 0.2187313735485077,
9322
+ "rewards/margins": 0.5657260417938232,
9323
+ "rewards/rejected": -0.34699463844299316,
9324
+ "step": 6200
9325
+ },
9326
+ {
9327
+ "epoch": 31.48860759493671,
9328
+ "grad_norm": 148737.16455364344,
9329
+ "learning_rate": 6.910059542463177e-08,
9330
+ "logits/chosen": 0.025389552116394043,
9331
+ "logits/rejected": -0.27969443798065186,
9332
+ "logps/chosen": -17.67035675048828,
9333
+ "logps/rejected": -546.9998168945312,
9334
+ "loss": 11498.325,
9335
+ "rewards/accuracies": 0.987500011920929,
9336
+ "rewards/chosen": 0.19227565824985504,
9337
+ "rewards/margins": 0.5237180590629578,
9338
+ "rewards/rejected": -0.33144229650497437,
9339
+ "step": 6210
9340
+ },
9341
+ {
9342
+ "epoch": 31.539240506329115,
9343
+ "grad_norm": 186784.06647045226,
9344
+ "learning_rate": 6.831714196176746e-08,
9345
+ "logits/chosen": -3.0769848823547363,
9346
+ "logits/rejected": -2.87144136428833,
9347
+ "logps/chosen": -25.640066146850586,
9348
+ "logps/rejected": -605.6832885742188,
9349
+ "loss": 11701.2086,
9350
+ "rewards/accuracies": 1.0,
9351
+ "rewards/chosen": 0.21926145255565643,
9352
+ "rewards/margins": 0.5798953771591187,
9353
+ "rewards/rejected": -0.3606340289115906,
9354
+ "step": 6220
9355
+ },
9356
+ {
9357
+ "epoch": 31.58987341772152,
9358
+ "grad_norm": 108314.28535819704,
9359
+ "learning_rate": 6.753368849890315e-08,
9360
+ "logits/chosen": -0.5384847521781921,
9361
+ "logits/rejected": -0.6974294781684875,
9362
+ "logps/chosen": -26.830814361572266,
9363
+ "logps/rejected": -587.5255126953125,
9364
+ "loss": 11231.8016,
9365
+ "rewards/accuracies": 0.9750000238418579,
9366
+ "rewards/chosen": 0.20746394991874695,
9367
+ "rewards/margins": 0.557998776435852,
9368
+ "rewards/rejected": -0.3505348265171051,
9369
+ "step": 6230
9370
+ },
9371
+ {
9372
+ "epoch": 31.640506329113926,
9373
+ "grad_norm": 197387.20948770002,
9374
+ "learning_rate": 6.675023503603886e-08,
9375
+ "logits/chosen": -0.6654781103134155,
9376
+ "logits/rejected": -1.1572941541671753,
9377
+ "logps/chosen": -27.918231964111328,
9378
+ "logps/rejected": -592.8441162109375,
9379
+ "loss": 11850.0031,
9380
+ "rewards/accuracies": 1.0,
9381
+ "rewards/chosen": 0.21056847274303436,
9382
+ "rewards/margins": 0.5671868920326233,
9383
+ "rewards/rejected": -0.3566184341907501,
9384
+ "step": 6240
9385
+ },
9386
+ {
9387
+ "epoch": 31.691139240506327,
9388
+ "grad_norm": 178129.00858003844,
9389
+ "learning_rate": 6.596678157317455e-08,
9390
+ "logits/chosen": 0.17990253865718842,
9391
+ "logits/rejected": 0.15132752060890198,
9392
+ "logps/chosen": -26.486125946044922,
9393
+ "logps/rejected": -577.5296020507812,
9394
+ "loss": 12025.9992,
9395
+ "rewards/accuracies": 0.987500011920929,
9396
+ "rewards/chosen": 0.1961621642112732,
9397
+ "rewards/margins": 0.5460348725318909,
9398
+ "rewards/rejected": -0.34987273812294006,
9399
+ "step": 6250
9400
+ },
9401
+ {
9402
+ "epoch": 31.741772151898733,
9403
+ "grad_norm": 113204.1607298857,
9404
+ "learning_rate": 6.518332811031025e-08,
9405
+ "logits/chosen": -0.7701491117477417,
9406
+ "logits/rejected": -0.5652084946632385,
9407
+ "logps/chosen": -30.580230712890625,
9408
+ "logps/rejected": -575.9344482421875,
9409
+ "loss": 12611.7422,
9410
+ "rewards/accuracies": 0.987500011920929,
9411
+ "rewards/chosen": 0.20266905426979065,
9412
+ "rewards/margins": 0.5443531274795532,
9413
+ "rewards/rejected": -0.34168410301208496,
9414
+ "step": 6260
9415
+ },
9416
+ {
9417
+ "epoch": 31.792405063291138,
9418
+ "grad_norm": 170084.77349090017,
9419
+ "learning_rate": 6.439987464744594e-08,
9420
+ "logits/chosen": 0.8593052625656128,
9421
+ "logits/rejected": 1.1197197437286377,
9422
+ "logps/chosen": -26.577016830444336,
9423
+ "logps/rejected": -555.6820068359375,
9424
+ "loss": 12234.5422,
9425
+ "rewards/accuracies": 0.987500011920929,
9426
+ "rewards/chosen": 0.19716337323188782,
9427
+ "rewards/margins": 0.5328342318534851,
9428
+ "rewards/rejected": -0.33567091822624207,
9429
+ "step": 6270
9430
+ },
9431
+ {
9432
+ "epoch": 31.843037974683543,
9433
+ "grad_norm": 235274.58346107465,
9434
+ "learning_rate": 6.361642118458163e-08,
9435
+ "logits/chosen": -1.7307960987091064,
9436
+ "logits/rejected": -1.3535115718841553,
9437
+ "logps/chosen": -23.92806625366211,
9438
+ "logps/rejected": -565.2352294921875,
9439
+ "loss": 12517.5156,
9440
+ "rewards/accuracies": 0.987500011920929,
9441
+ "rewards/chosen": 0.2007029801607132,
9442
+ "rewards/margins": 0.5428507924079895,
9443
+ "rewards/rejected": -0.3421478271484375,
9444
+ "step": 6280
9445
+ },
9446
+ {
9447
+ "epoch": 31.89367088607595,
9448
+ "grad_norm": 190203.888446938,
9449
+ "learning_rate": 6.283296772171732e-08,
9450
+ "logits/chosen": -0.9662951231002808,
9451
+ "logits/rejected": -0.45983943343162537,
9452
+ "logps/chosen": -26.488794326782227,
9453
+ "logps/rejected": -565.1602783203125,
9454
+ "loss": 12050.4156,
9455
+ "rewards/accuracies": 0.987500011920929,
9456
+ "rewards/chosen": 0.20456723868846893,
9457
+ "rewards/margins": 0.5392366051673889,
9458
+ "rewards/rejected": -0.3346693515777588,
9459
+ "step": 6290
9460
+ },
9461
+ {
9462
+ "epoch": 31.944303797468354,
9463
+ "grad_norm": 169175.47682307824,
9464
+ "learning_rate": 6.204951425885302e-08,
9465
+ "logits/chosen": -1.9982364177703857,
9466
+ "logits/rejected": -1.282958745956421,
9467
+ "logps/chosen": -25.263113021850586,
9468
+ "logps/rejected": -584.7576293945312,
9469
+ "loss": 11806.3297,
9470
+ "rewards/accuracies": 1.0,
9471
+ "rewards/chosen": 0.20914848148822784,
9472
+ "rewards/margins": 0.5613822937011719,
9473
+ "rewards/rejected": -0.35223376750946045,
9474
+ "step": 6300
9475
+ },
9476
+ {
9477
+ "epoch": 31.99493670886076,
9478
+ "grad_norm": 142938.702725119,
9479
+ "learning_rate": 6.126606079598871e-08,
9480
+ "logits/chosen": -2.084618091583252,
9481
+ "logits/rejected": -1.6745023727416992,
9482
+ "logps/chosen": -24.918956756591797,
9483
+ "logps/rejected": -603.4859619140625,
9484
+ "loss": 12022.5133,
9485
+ "rewards/accuracies": 1.0,
9486
+ "rewards/chosen": 0.21575181186199188,
9487
+ "rewards/margins": 0.5819977521896362,
9488
+ "rewards/rejected": -0.36624595522880554,
9489
+ "step": 6310
9490
  }
9491
  ],
9492
  "logging_steps": 10,