cat-searcher commited on
Commit
9f1bdac
1 Parent(s): 8f3a603

Training in progress, epoch 28, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step5718/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step5718/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step5718/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step5718/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step5718/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step5718/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step5718/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step5718/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step5718/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step5718/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step5718/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step5718/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step5718/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step5718/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step5718/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step5718/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step5718/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd38c8d53cef6a27f96f7b421d3d5acb19ce05f445d6d62035a1e97e79f3f627
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c89a6cfa9861549dfcd1590200365ce0c25a15d698599c5baa2997fa1792463
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fdba19db1d418e7b2ee177ec5fe1c0e2ccf008a0275c8f937268506bce9f05d
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42cee2fd79cd1d6b6c3a0ee111ad23945fdc848b6f088f9fc721f86c321c5fbc
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f93875499f8ce0120e6376255ad48ae9397e785acb633fde389a6b2ac49e11c
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b49d8c24ca0c52538af8feffe99d3278097e67a82b3dfb164a19abf75c1c5dd
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e093b6caed17faa9268d407698a34ff3a5353d8f6b104dada895df571c3a25e
3
+ size 2506176112
last-checkpoint/global_step5718/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74f30ead704a9cc1b7b066f4a45d9736525415f9a92a8c31f4bac15e7e7b9763
3
+ size 2506176112
last-checkpoint/global_step5718/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8252e479868a5e0a4dfa749f80a1e8d524dd4c975c140119cfbd5abd9bf8afec
3
+ size 85570
last-checkpoint/global_step5718/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feeb32706d269794adf5b59514242cb1708a15909b313f060f2aef30bae63262
3
+ size 85506
last-checkpoint/global_step5718/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335cd955b91a12c7f066b1d5d2dbb9ecb39d21ac0b6de5e5b7fe280032ce5a7e
3
+ size 85506
last-checkpoint/global_step5718/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db19ca119d186937dfec156f64ed5185ab10005181a4f3d860a9d55af32a8feb
3
+ size 85506
last-checkpoint/global_step5718/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a134d7d307b5c30e4ad6723ea24295f2a5c589def33cb8ff5981d5b9632b0b
3
+ size 85506
last-checkpoint/global_step5718/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5101f24995338e1d56a12ada0fbc5ad6e72c43db79ad0c51f12b9255a2075ca0
3
+ size 85506
last-checkpoint/global_step5718/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdccfa685649f28110d93e0604078495d43dc0a91bcf95d6794dfa8a6409447
3
+ size 85506
last-checkpoint/global_step5718/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38dcdf96ca2a336a1bbe49a3b62871afbdf6b077d44e4602872b45e38fcfb1b4
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5521
 
1
+ global_step5718
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49f4a9ae06898314b6bbf8ad9fca6ca16dea158bd29ecc0506863d601b8345f4
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa3379a77e192ab0af64fe78334db958cf6214addb3b4fbbc67569fb9f2e836
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4832308f86af667638feea3c3365de459978e8c0f49e3d367cf4d5379875a12f
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c217d7738abb0675dccca5614343dba054b625a983adb5b66bb33a2cf128b5c4
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:038329a940174c7998542fe9a3c903ee0c21d0a2351959a1cb53ac9af3988f89
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a205f64c86d241517037857e791fc7cdcfd1b2d8a26ca46ff4e6430fc9491c64
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc2eb62cd1e31d2c95a28eaadd97a496b27751983378626efc3ee2a53ae743ff
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384e04a3f342b13aa2aff82b03d67994ff48a7b6e7d90ad53291b0ccf1124755
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c76f1ee6b7bfb2e7a6ca68f028fe40297bc56fa8287959be7a51545af2a824b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75af15b2dde4bfa82f45cc3b48588a123bca20dd4b565a0312d5c1198d8bca9
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4716e0a98e24ecef0261fa45d53aa531ce14f99b4992682e0257f7c483a80d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8162e1d617d2045c999376967bb33455e9055c2882d00e9e6c3e5639106c7cf2
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:732644bf9682f11fe70f041a2575d5822769847a3b56320e6cfdbd3914eb98f9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37df018beaaf1bcb0a0451dcfd03f257c34b245315c9d5243bad309614abb972
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3d90978ff8f9057d8587ec1258de11f4d5531805002e65d952f5725b93f51be
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc45f95eb705f82fae244f5f5bb1d1d060492c8b55aba2aa58162687e28952bc
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a013a277003a33e8bb2418ae66c1488282f502fc920260b5fc9f337843415ab2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ed548560db42b0406ce39f0d4bce5fac64c200cc3ab9248f1dd703dde9dfef
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9f8887e423cbfbb140f966733dae63123515ed5345b3e075eb892b0646282c8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865e7aa49ec74e2e8e5ffbc2b62c9edb6308476119fe3e77f2fe29961dd5deaf
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2d39cfa3808e402183de5c305fcd5a81f2c9ffede37076a3856931095792cc0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3921aadd9f9af170d702817ed4b4c15515108ff8717773c58f7ba3567a43d1be
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.0,
5
  "eval_steps": 100,
6
- "global_step": 5521,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8302,6 +8302,291 @@
8302
  "rewards/margins": 0.5627579689025879,
8303
  "rewards/rejected": -0.3508843779563904,
8304
  "step": 5520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8305
  }
8306
  ],
8307
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 5718,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8302
  "rewards/margins": 0.5627579689025879,
8303
  "rewards/rejected": -0.3508843779563904,
8304
  "step": 5520
8305
+ },
8306
+ {
8307
+ "epoch": 28.045569620253165,
8308
+ "grad_norm": 221196.22582529782,
8309
+ "learning_rate": 1.2237543089940458e-07,
8310
+ "logits/chosen": -2.0594754219055176,
8311
+ "logits/rejected": -0.8701013326644897,
8312
+ "logps/chosen": -27.682659149169922,
8313
+ "logps/rejected": -595.9862060546875,
8314
+ "loss": 12458.1609,
8315
+ "rewards/accuracies": 1.0,
8316
+ "rewards/chosen": 0.21054939925670624,
8317
+ "rewards/margins": 0.5732256174087524,
8318
+ "rewards/rejected": -0.362676203250885,
8319
+ "step": 5530
8320
+ },
8321
+ {
8322
+ "epoch": 28.09620253164557,
8323
+ "grad_norm": 270569.55775822,
8324
+ "learning_rate": 1.2159197743654026e-07,
8325
+ "logits/chosen": -1.7393264770507812,
8326
+ "logits/rejected": -1.1281194686889648,
8327
+ "logps/chosen": -24.0075626373291,
8328
+ "logps/rejected": -583.3763427734375,
8329
+ "loss": 12198.0859,
8330
+ "rewards/accuracies": 1.0,
8331
+ "rewards/chosen": 0.20713207125663757,
8332
+ "rewards/margins": 0.5608252286911011,
8333
+ "rewards/rejected": -0.35369327664375305,
8334
+ "step": 5540
8335
+ },
8336
+ {
8337
+ "epoch": 28.146835443037975,
8338
+ "grad_norm": 294202.9420634267,
8339
+ "learning_rate": 1.2080852397367596e-07,
8340
+ "logits/chosen": -1.1881496906280518,
8341
+ "logits/rejected": -1.9830278158187866,
8342
+ "logps/chosen": -27.52215003967285,
8343
+ "logps/rejected": -571.5521240234375,
8344
+ "loss": 12035.3297,
8345
+ "rewards/accuracies": 0.9750000238418579,
8346
+ "rewards/chosen": 0.198753222823143,
8347
+ "rewards/margins": 0.5402897596359253,
8348
+ "rewards/rejected": -0.34153658151626587,
8349
+ "step": 5550
8350
+ },
8351
+ {
8352
+ "epoch": 28.19746835443038,
8353
+ "grad_norm": 250256.82251298483,
8354
+ "learning_rate": 1.2002507051081164e-07,
8355
+ "logits/chosen": -2.069603443145752,
8356
+ "logits/rejected": -1.1806148290634155,
8357
+ "logps/chosen": -32.80065155029297,
8358
+ "logps/rejected": -584.74169921875,
8359
+ "loss": 12394.9164,
8360
+ "rewards/accuracies": 1.0,
8361
+ "rewards/chosen": 0.21380552649497986,
8362
+ "rewards/margins": 0.5591468811035156,
8363
+ "rewards/rejected": -0.345341295003891,
8364
+ "step": 5560
8365
+ },
8366
+ {
8367
+ "epoch": 28.248101265822786,
8368
+ "grad_norm": 295332.47087175207,
8369
+ "learning_rate": 1.1924161704794735e-07,
8370
+ "logits/chosen": -1.0471255779266357,
8371
+ "logits/rejected": -0.4857943654060364,
8372
+ "logps/chosen": -21.058979034423828,
8373
+ "logps/rejected": -572.4304809570312,
8374
+ "loss": 12514.1922,
8375
+ "rewards/accuracies": 0.987500011920929,
8376
+ "rewards/chosen": 0.1976846158504486,
8377
+ "rewards/margins": 0.548615574836731,
8378
+ "rewards/rejected": -0.35093095898628235,
8379
+ "step": 5570
8380
+ },
8381
+ {
8382
+ "epoch": 28.29873417721519,
8383
+ "grad_norm": 206143.0104728106,
8384
+ "learning_rate": 1.1845816358508304e-07,
8385
+ "logits/chosen": -2.556028366088867,
8386
+ "logits/rejected": -1.9551620483398438,
8387
+ "logps/chosen": -35.4798698425293,
8388
+ "logps/rejected": -586.085693359375,
8389
+ "loss": 12969.0906,
8390
+ "rewards/accuracies": 0.987500011920929,
8391
+ "rewards/chosen": 0.20620682835578918,
8392
+ "rewards/margins": 0.5480517148971558,
8393
+ "rewards/rejected": -0.34184494614601135,
8394
+ "step": 5580
8395
+ },
8396
+ {
8397
+ "epoch": 28.349367088607593,
8398
+ "grad_norm": 264961.57508088043,
8399
+ "learning_rate": 1.1767471012221873e-07,
8400
+ "logits/chosen": -1.3203023672103882,
8401
+ "logits/rejected": -0.41819173097610474,
8402
+ "logps/chosen": -35.34379959106445,
8403
+ "logps/rejected": -582.7780151367188,
8404
+ "loss": 11659.4484,
8405
+ "rewards/accuracies": 0.987500011920929,
8406
+ "rewards/chosen": 0.21271836757659912,
8407
+ "rewards/margins": 0.5540838837623596,
8408
+ "rewards/rejected": -0.3413654863834381,
8409
+ "step": 5590
8410
+ },
8411
+ {
8412
+ "epoch": 28.4,
8413
+ "grad_norm": 475056.8558156038,
8414
+ "learning_rate": 1.1689125665935443e-07,
8415
+ "logits/chosen": 0.2806483507156372,
8416
+ "logits/rejected": 0.8025129437446594,
8417
+ "logps/chosen": -32.23934555053711,
8418
+ "logps/rejected": -571.2871704101562,
8419
+ "loss": 12715.6078,
8420
+ "rewards/accuracies": 0.987500011920929,
8421
+ "rewards/chosen": 0.20215098559856415,
8422
+ "rewards/margins": 0.5371149778366089,
8423
+ "rewards/rejected": -0.33496397733688354,
8424
+ "step": 5600
8425
+ },
8426
+ {
8427
+ "epoch": 28.450632911392404,
8428
+ "grad_norm": 197134.08463352287,
8429
+ "learning_rate": 1.1610780319649012e-07,
8430
+ "logits/chosen": -0.42449599504470825,
8431
+ "logits/rejected": 0.20755800604820251,
8432
+ "logps/chosen": -30.612218856811523,
8433
+ "logps/rejected": -578.2881469726562,
8434
+ "loss": 12352.6281,
8435
+ "rewards/accuracies": 0.987500011920929,
8436
+ "rewards/chosen": 0.2100987732410431,
8437
+ "rewards/margins": 0.5525861978530884,
8438
+ "rewards/rejected": -0.34248748421669006,
8439
+ "step": 5610
8440
+ },
8441
+ {
8442
+ "epoch": 28.50126582278481,
8443
+ "grad_norm": 345771.22083863505,
8444
+ "learning_rate": 1.1532434973362581e-07,
8445
+ "logits/chosen": -1.5660457611083984,
8446
+ "logits/rejected": -0.7327693700790405,
8447
+ "logps/chosen": -22.58321762084961,
8448
+ "logps/rejected": -566.7658081054688,
8449
+ "loss": 11851.4547,
8450
+ "rewards/accuracies": 0.987500011920929,
8451
+ "rewards/chosen": 0.19859129190444946,
8452
+ "rewards/margins": 0.5416545271873474,
8453
+ "rewards/rejected": -0.3430632948875427,
8454
+ "step": 5620
8455
+ },
8456
+ {
8457
+ "epoch": 28.551898734177215,
8458
+ "grad_norm": 168585.12783774585,
8459
+ "learning_rate": 1.145408962707615e-07,
8460
+ "logits/chosen": -0.2972283363342285,
8461
+ "logits/rejected": -0.3674158453941345,
8462
+ "logps/chosen": -29.131546020507812,
8463
+ "logps/rejected": -597.9761962890625,
8464
+ "loss": 11512.6836,
8465
+ "rewards/accuracies": 1.0,
8466
+ "rewards/chosen": 0.21312180161476135,
8467
+ "rewards/margins": 0.5691269040107727,
8468
+ "rewards/rejected": -0.35600510239601135,
8469
+ "step": 5630
8470
+ },
8471
+ {
8472
+ "epoch": 28.60253164556962,
8473
+ "grad_norm": 208909.02036407648,
8474
+ "learning_rate": 1.137574428078972e-07,
8475
+ "logits/chosen": -1.0681560039520264,
8476
+ "logits/rejected": -0.39094457030296326,
8477
+ "logps/chosen": -36.012596130371094,
8478
+ "logps/rejected": -586.0516357421875,
8479
+ "loss": 12808.1297,
8480
+ "rewards/accuracies": 0.987500011920929,
8481
+ "rewards/chosen": 0.20493356883525848,
8482
+ "rewards/margins": 0.5560083389282227,
8483
+ "rewards/rejected": -0.3510746955871582,
8484
+ "step": 5640
8485
+ },
8486
+ {
8487
+ "epoch": 28.653164556962025,
8488
+ "grad_norm": 311846.42372199957,
8489
+ "learning_rate": 1.1297398934503289e-07,
8490
+ "logits/chosen": 0.5105953216552734,
8491
+ "logits/rejected": 1.009169101715088,
8492
+ "logps/chosen": -27.007156372070312,
8493
+ "logps/rejected": -599.7515258789062,
8494
+ "loss": 11991.1812,
8495
+ "rewards/accuracies": 1.0,
8496
+ "rewards/chosen": 0.21386167407035828,
8497
+ "rewards/margins": 0.572094202041626,
8498
+ "rewards/rejected": -0.3582325577735901,
8499
+ "step": 5650
8500
+ },
8501
+ {
8502
+ "epoch": 28.70379746835443,
8503
+ "grad_norm": 268717.3291778612,
8504
+ "learning_rate": 1.1219053588216858e-07,
8505
+ "logits/chosen": -0.8255645036697388,
8506
+ "logits/rejected": -0.8527682423591614,
8507
+ "logps/chosen": -21.579692840576172,
8508
+ "logps/rejected": -585.7736206054688,
8509
+ "loss": 12543.1672,
8510
+ "rewards/accuracies": 1.0,
8511
+ "rewards/chosen": 0.2072766274213791,
8512
+ "rewards/margins": 0.5643941760063171,
8513
+ "rewards/rejected": -0.35711759328842163,
8514
+ "step": 5660
8515
+ },
8516
+ {
8517
+ "epoch": 28.754430379746836,
8518
+ "grad_norm": 251846.43966430755,
8519
+ "learning_rate": 1.1140708241930429e-07,
8520
+ "logits/chosen": -1.6031732559204102,
8521
+ "logits/rejected": -0.6178330779075623,
8522
+ "logps/chosen": -30.777385711669922,
8523
+ "logps/rejected": -578.4786376953125,
8524
+ "loss": 12538.3711,
8525
+ "rewards/accuracies": 0.987500011920929,
8526
+ "rewards/chosen": 0.20804066956043243,
8527
+ "rewards/margins": 0.5539838075637817,
8528
+ "rewards/rejected": -0.3459431827068329,
8529
+ "step": 5670
8530
+ },
8531
+ {
8532
+ "epoch": 28.80506329113924,
8533
+ "grad_norm": 362284.87054599967,
8534
+ "learning_rate": 1.1062362895643998e-07,
8535
+ "logits/chosen": -0.6538245677947998,
8536
+ "logits/rejected": -0.3702305555343628,
8537
+ "logps/chosen": -26.261932373046875,
8538
+ "logps/rejected": -577.5454711914062,
8539
+ "loss": 13020.9297,
8540
+ "rewards/accuracies": 1.0,
8541
+ "rewards/chosen": 0.20354709029197693,
8542
+ "rewards/margins": 0.5529359579086304,
8543
+ "rewards/rejected": -0.34938886761665344,
8544
+ "step": 5680
8545
+ },
8546
+ {
8547
+ "epoch": 28.855696202531647,
8548
+ "grad_norm": 205761.31564795828,
8549
+ "learning_rate": 1.0984017549357568e-07,
8550
+ "logits/chosen": -2.4821999073028564,
8551
+ "logits/rejected": -2.7491514682769775,
8552
+ "logps/chosen": -33.985538482666016,
8553
+ "logps/rejected": -588.4347534179688,
8554
+ "loss": 12498.5609,
8555
+ "rewards/accuracies": 1.0,
8556
+ "rewards/chosen": 0.2070481777191162,
8557
+ "rewards/margins": 0.5542899370193481,
8558
+ "rewards/rejected": -0.34724172949790955,
8559
+ "step": 5690
8560
+ },
8561
+ {
8562
+ "epoch": 28.906329113924052,
8563
+ "grad_norm": 269249.39255954395,
8564
+ "learning_rate": 1.0905672203071137e-07,
8565
+ "logits/chosen": 0.24643035233020782,
8566
+ "logits/rejected": 0.39688020944595337,
8567
+ "logps/chosen": -22.365093231201172,
8568
+ "logps/rejected": -572.4009399414062,
8569
+ "loss": 12145.2008,
8570
+ "rewards/accuracies": 1.0,
8571
+ "rewards/chosen": 0.20018497109413147,
8572
+ "rewards/margins": 0.5481060147285461,
8573
+ "rewards/rejected": -0.3479210138320923,
8574
+ "step": 5700
8575
+ },
8576
+ {
8577
+ "epoch": 28.956962025316457,
8578
+ "grad_norm": 274939.7399900873,
8579
+ "learning_rate": 1.0827326856784706e-07,
8580
+ "logits/chosen": 0.24328431487083435,
8581
+ "logits/rejected": 0.056040357798337936,
8582
+ "logps/chosen": -27.859899520874023,
8583
+ "logps/rejected": -582.8697509765625,
8584
+ "loss": 12038.6203,
8585
+ "rewards/accuracies": 1.0,
8586
+ "rewards/chosen": 0.20646706223487854,
8587
+ "rewards/margins": 0.5581387281417847,
8588
+ "rewards/rejected": -0.35167163610458374,
8589
+ "step": 5710
8590
  }
8591
  ],
8592
  "logging_steps": 10,