cat-searcher commited on
Commit
1e90ce9
·
verified ·
1 Parent(s): d066457

Training in progress, epoch 22, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step4533/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step4533/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step4533/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step4533/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step4533/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step4533/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step4533/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step4533/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step4533/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step4533/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step4533/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step4533/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step4533/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step4533/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step4533/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step4533/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step4533/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fe3bbb691c976edec39fc9edffbb6b2cf820e02c17331efddc8fa4ac7dee36
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2526a90420c32897527f16232eadadc8198f30539e7b9603db4e3095336a4bac
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f996c91286c656d7c1d15b4586136bf69bf32b8fe12fcfae3c6230229b575d0
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a748747c27beba355539bbe7761cb736a916c3ec59fcb7d13bf0c4de8ebf81
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6412c6d17d8e7a9916cb406210b3c1c66750e850f003a5d060d9cc96a9c416b2
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc696da83cf3e9de2ddb484e0bfba72cab42bd7855b4280d903b74516e38a7b3
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db4e8b975b1ebb83514b08dddacfb6f12b9b5cac0438d166f59e6d5dc20147c
3
+ size 2506176112
last-checkpoint/global_step4533/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6181d650cdedc38d18bf685928c50089fad739e9605d67fdc07d4333840594d2
3
+ size 2506176112
last-checkpoint/global_step4533/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad6cf1e90722aa0bded9997f432975f9ac26246c04c133866fd46a50a3a486f
3
+ size 85570
last-checkpoint/global_step4533/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4a1345616e6896e2de6e8fcd0ca3739f245412a6f54e02cfeebffb6a198bfb
3
+ size 85506
last-checkpoint/global_step4533/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd3016ed6fe1eacc8c1cabc3b7f91cde643ba6e2c010cfbd4178c073605c763
3
+ size 85506
last-checkpoint/global_step4533/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40298aaeb4130d9e00dbbd161f7627bf1dbcea3137aec363457f42485c0cc4ea
3
+ size 85506
last-checkpoint/global_step4533/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4251ba385bb1157c4aaeead83fb9f7731efc285227f30464563868151c914d6f
3
+ size 85506
last-checkpoint/global_step4533/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fadbb60dc81290ffb98ff71ce4da2ba824de319edc1a076297ed87cb8c65003
3
+ size 85506
last-checkpoint/global_step4533/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b8149b7336b4255d3b4ec501fa71066f28d3daedfe26d40e2af38a4353fbbc4
3
+ size 85506
last-checkpoint/global_step4533/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c6345bbc9be12cb5ac4a597dbc0740ee29c21bee3e5fd186dc3a3d3abfb4948
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4336
 
1
+ global_step4533
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da6a4c69beed061f5c9128d0fc377ffac0dd574b2fd31065d9fac99fd04c06f7
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5749e6ea106dcd62f8a5403e9ab2697a968659c99e52d97cf37770a6dcf0de89
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a92fd02d10813ce3cef6dbcbb14b9a87a8544f0439ddb53aa479c171a1393b3a
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9913d4ed78dfdf1f7884966d3d5a627d1a427e9dfd802c854f7139effd18d42
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ffbd38f2626212a6ee67166039a4e916fc424a3ef78a112b81ecba34404f3d1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f43f0f70b0dbcc678819df90ae1498097a1a40b141c1b3634f47415b4753e46
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:761cc4f38afe955241d2348fca72dd9656d9ebe889e2a62241d58bd4bc402ab3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fea0bf58d3a4bfd4cd6e77b671fd76d0f08457d71ef115ed0b7a79d932ccecb
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df49e54f3e60ca7b97616f5b3ec776cb88dd34df15356bfe0565e073195eabb5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:485de881ac346a0d6293f173c620168e2c1c78bb56e9d35b719fe68b9599381c
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a34a02b568d14f0c9040613f420faf5a53edb4c287e30c4c7c5812d2c661932
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb3995c042fb6720f0e6292f2cdbc61899166b759822de9794a63f951332b53d
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbb893a42ae156daa2dddcfcf2ef773c576ec3c274aaf9e5a216bd1d659a190b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc5d0e51cb62121ef22456f6c04b602c2ee63280f1aa9215b5f6ca9ffd945a1
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b966d4dca42c479a7d82dd6ce6a32209ee7e19048adffff6ca6d5cbfafe89c52
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf6f7a95413d841f5781a52bd24e6916b171c6b1eae7a534a1e53e4a4d75dda
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfdec22eeccd05416d2a00acd09a775df394351b2aa4e3b1d32df8f7f4a8b07f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b59e0c70f333ceeb5e4b4cd778af69cd56a780cd649d81f2b1589c1edb774f
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89e852c764f530a6e44842564809b78387dfa34f7ec6fcece34742866c5d452e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab4acc6cc8c266eed2b1e03455904bdc72f90eae6636da6366d4a9d998a1c278
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a4a381db01b95441bf8a0f5bad46ab9935674f44c9e22dc758217b09cacfca8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d43ad6dc2ceca1f9dc036c87b7f18db786223a3aca759dd3644262f6faf9977
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.0,
5
  "eval_steps": 100,
6
- "global_step": 4336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6517,6 +6517,306 @@
6517
  "rewards/margins": 0.5605840086936951,
6518
  "rewards/rejected": -0.35054340958595276,
6519
  "step": 4330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6520
  }
6521
  ],
6522
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 4533,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6517
  "rewards/margins": 0.5605840086936951,
6518
  "rewards/rejected": -0.35054340958595276,
6519
  "step": 4330
6520
+ },
6521
+ {
6522
+ "epoch": 22.020253164556962,
6523
+ "grad_norm": 844000.7864919893,
6524
+ "learning_rate": 2.1560639298025696e-07,
6525
+ "logits/chosen": -0.2103087455034256,
6526
+ "logits/rejected": 0.07530391216278076,
6527
+ "logps/chosen": -30.565990447998047,
6528
+ "logps/rejected": -547.0203857421875,
6529
+ "loss": 14383.6906,
6530
+ "rewards/accuracies": 1.0,
6531
+ "rewards/chosen": 0.1880597323179245,
6532
+ "rewards/margins": 0.5152319073677063,
6533
+ "rewards/rejected": -0.3271721601486206,
6534
+ "step": 4340
6535
+ },
6536
+ {
6537
+ "epoch": 22.070886075949367,
6538
+ "grad_norm": 597784.613899612,
6539
+ "learning_rate": 2.1482293951739267e-07,
6540
+ "logits/chosen": -0.721124529838562,
6541
+ "logits/rejected": -0.21510323882102966,
6542
+ "logps/chosen": -37.94996643066406,
6543
+ "logps/rejected": -587.461181640625,
6544
+ "loss": 13822.8656,
6545
+ "rewards/accuracies": 1.0,
6546
+ "rewards/chosen": 0.19957685470581055,
6547
+ "rewards/margins": 0.5496448278427124,
6548
+ "rewards/rejected": -0.3500679135322571,
6549
+ "step": 4350
6550
+ },
6551
+ {
6552
+ "epoch": 22.121518987341773,
6553
+ "grad_norm": 468430.91971069,
6554
+ "learning_rate": 2.1403948605452835e-07,
6555
+ "logits/chosen": -1.418505072593689,
6556
+ "logits/rejected": -0.8604210019111633,
6557
+ "logps/chosen": -38.54343795776367,
6558
+ "logps/rejected": -585.6038818359375,
6559
+ "loss": 13499.3969,
6560
+ "rewards/accuracies": 0.987500011920929,
6561
+ "rewards/chosen": 0.2021259367465973,
6562
+ "rewards/margins": 0.5458530187606812,
6563
+ "rewards/rejected": -0.34372708201408386,
6564
+ "step": 4360
6565
+ },
6566
+ {
6567
+ "epoch": 22.172151898734178,
6568
+ "grad_norm": 838303.6265575557,
6569
+ "learning_rate": 2.1325603259166405e-07,
6570
+ "logits/chosen": -0.013787698931992054,
6571
+ "logits/rejected": -0.22224357724189758,
6572
+ "logps/chosen": -33.32988357543945,
6573
+ "logps/rejected": -576.55224609375,
6574
+ "loss": 13816.5812,
6575
+ "rewards/accuracies": 1.0,
6576
+ "rewards/chosen": 0.1937212496995926,
6577
+ "rewards/margins": 0.5373150110244751,
6578
+ "rewards/rejected": -0.3435937762260437,
6579
+ "step": 4370
6580
+ },
6581
+ {
6582
+ "epoch": 22.222784810126583,
6583
+ "grad_norm": 524213.07765733794,
6584
+ "learning_rate": 2.1247257912879973e-07,
6585
+ "logits/chosen": 0.3687540888786316,
6586
+ "logits/rejected": 0.8078397512435913,
6587
+ "logps/chosen": -38.822872161865234,
6588
+ "logps/rejected": -553.8123168945312,
6589
+ "loss": 12435.0875,
6590
+ "rewards/accuracies": 0.9750000238418579,
6591
+ "rewards/chosen": 0.18458959460258484,
6592
+ "rewards/margins": 0.5130779147148132,
6593
+ "rewards/rejected": -0.32848840951919556,
6594
+ "step": 4380
6595
+ },
6596
+ {
6597
+ "epoch": 22.27341772151899,
6598
+ "grad_norm": 476932.283051178,
6599
+ "learning_rate": 2.1168912566593544e-07,
6600
+ "logits/chosen": 0.6524232029914856,
6601
+ "logits/rejected": 0.6763177514076233,
6602
+ "logps/chosen": -41.4456901550293,
6603
+ "logps/rejected": -586.055419921875,
6604
+ "loss": 14132.7062,
6605
+ "rewards/accuracies": 0.9750000238418579,
6606
+ "rewards/chosen": 0.20029637217521667,
6607
+ "rewards/margins": 0.5451359152793884,
6608
+ "rewards/rejected": -0.34483957290649414,
6609
+ "step": 4390
6610
+ },
6611
+ {
6612
+ "epoch": 22.324050632911394,
6613
+ "grad_norm": 568972.1382617814,
6614
+ "learning_rate": 2.1090567220307112e-07,
6615
+ "logits/chosen": -0.3675435781478882,
6616
+ "logits/rejected": 0.2508888840675354,
6617
+ "logps/chosen": -37.127281188964844,
6618
+ "logps/rejected": -571.7310180664062,
6619
+ "loss": 13226.8641,
6620
+ "rewards/accuracies": 0.987500011920929,
6621
+ "rewards/chosen": 0.19992589950561523,
6622
+ "rewards/margins": 0.5378109812736511,
6623
+ "rewards/rejected": -0.33788514137268066,
6624
+ "step": 4400
6625
+ },
6626
+ {
6627
+ "epoch": 22.374683544303796,
6628
+ "grad_norm": 549953.3378298564,
6629
+ "learning_rate": 2.1012221874020682e-07,
6630
+ "logits/chosen": -0.3316110372543335,
6631
+ "logits/rejected": 0.12318412959575653,
6632
+ "logps/chosen": -45.176429748535156,
6633
+ "logps/rejected": -601.1099243164062,
6634
+ "loss": 13357.3594,
6635
+ "rewards/accuracies": 1.0,
6636
+ "rewards/chosen": 0.2056044340133667,
6637
+ "rewards/margins": 0.5584502220153809,
6638
+ "rewards/rejected": -0.35284581780433655,
6639
+ "step": 4410
6640
+ },
6641
+ {
6642
+ "epoch": 22.4253164556962,
6643
+ "grad_norm": 487398.89046152594,
6644
+ "learning_rate": 2.093387652773425e-07,
6645
+ "logits/chosen": -1.0198824405670166,
6646
+ "logits/rejected": -0.21292218565940857,
6647
+ "logps/chosen": -36.835960388183594,
6648
+ "logps/rejected": -577.4632568359375,
6649
+ "loss": 13915.3031,
6650
+ "rewards/accuracies": 1.0,
6651
+ "rewards/chosen": 0.20356829464435577,
6652
+ "rewards/margins": 0.543707013130188,
6653
+ "rewards/rejected": -0.3401387631893158,
6654
+ "step": 4420
6655
+ },
6656
+ {
6657
+ "epoch": 22.475949367088607,
6658
+ "grad_norm": 477361.2573301333,
6659
+ "learning_rate": 2.085553118144782e-07,
6660
+ "logits/chosen": 0.3704206943511963,
6661
+ "logits/rejected": 0.693733811378479,
6662
+ "logps/chosen": -46.64609146118164,
6663
+ "logps/rejected": -594.5731811523438,
6664
+ "loss": 13106.9359,
6665
+ "rewards/accuracies": 0.987500011920929,
6666
+ "rewards/chosen": 0.20661070942878723,
6667
+ "rewards/margins": 0.5531338453292847,
6668
+ "rewards/rejected": -0.34652310609817505,
6669
+ "step": 4430
6670
+ },
6671
+ {
6672
+ "epoch": 22.526582278481012,
6673
+ "grad_norm": 597606.9724370906,
6674
+ "learning_rate": 2.077718583516139e-07,
6675
+ "logits/chosen": -0.6012102365493774,
6676
+ "logits/rejected": -0.6212292909622192,
6677
+ "logps/chosen": -36.24720001220703,
6678
+ "logps/rejected": -570.0081787109375,
6679
+ "loss": 13390.8625,
6680
+ "rewards/accuracies": 1.0,
6681
+ "rewards/chosen": 0.20318233966827393,
6682
+ "rewards/margins": 0.5344886779785156,
6683
+ "rewards/rejected": -0.3313063085079193,
6684
+ "step": 4440
6685
+ },
6686
+ {
6687
+ "epoch": 22.577215189873417,
6688
+ "grad_norm": 469529.248927815,
6689
+ "learning_rate": 2.069884048887496e-07,
6690
+ "logits/chosen": -0.041382573544979095,
6691
+ "logits/rejected": 0.7878470420837402,
6692
+ "logps/chosen": -43.38654708862305,
6693
+ "logps/rejected": -568.7279052734375,
6694
+ "loss": 13333.4188,
6695
+ "rewards/accuracies": 1.0,
6696
+ "rewards/chosen": 0.19029700756072998,
6697
+ "rewards/margins": 0.5311275124549866,
6698
+ "rewards/rejected": -0.3408304750919342,
6699
+ "step": 4450
6700
+ },
6701
+ {
6702
+ "epoch": 22.627848101265823,
6703
+ "grad_norm": 402623.00766789017,
6704
+ "learning_rate": 2.0620495142588527e-07,
6705
+ "logits/chosen": -0.8500850796699524,
6706
+ "logits/rejected": 0.10065221786499023,
6707
+ "logps/chosen": -31.52435302734375,
6708
+ "logps/rejected": -562.478271484375,
6709
+ "loss": 13787.8797,
6710
+ "rewards/accuracies": 0.987500011920929,
6711
+ "rewards/chosen": 0.1997881382703781,
6712
+ "rewards/margins": 0.5369755029678345,
6713
+ "rewards/rejected": -0.33718740940093994,
6714
+ "step": 4460
6715
+ },
6716
+ {
6717
+ "epoch": 22.678481012658228,
6718
+ "grad_norm": 373755.1797101064,
6719
+ "learning_rate": 2.05421497963021e-07,
6720
+ "logits/chosen": -1.3291960954666138,
6721
+ "logits/rejected": -1.2023630142211914,
6722
+ "logps/chosen": -34.12505340576172,
6723
+ "logps/rejected": -600.3700561523438,
6724
+ "loss": 13297.9406,
6725
+ "rewards/accuracies": 1.0,
6726
+ "rewards/chosen": 0.20627860724925995,
6727
+ "rewards/margins": 0.5676389336585999,
6728
+ "rewards/rejected": -0.3613602817058563,
6729
+ "step": 4470
6730
+ },
6731
+ {
6732
+ "epoch": 22.729113924050633,
6733
+ "grad_norm": 402761.92027776636,
6734
+ "learning_rate": 2.0463804450015669e-07,
6735
+ "logits/chosen": -1.5893421173095703,
6736
+ "logits/rejected": -1.3823096752166748,
6737
+ "logps/chosen": -30.61318588256836,
6738
+ "logps/rejected": -584.5267944335938,
6739
+ "loss": 14326.1875,
6740
+ "rewards/accuracies": 0.987500011920929,
6741
+ "rewards/chosen": 0.20079275965690613,
6742
+ "rewards/margins": 0.5540488958358765,
6743
+ "rewards/rejected": -0.35325610637664795,
6744
+ "step": 4480
6745
+ },
6746
+ {
6747
+ "epoch": 22.77974683544304,
6748
+ "grad_norm": 547067.5872175789,
6749
+ "learning_rate": 2.038545910372924e-07,
6750
+ "logits/chosen": 0.4800703525543213,
6751
+ "logits/rejected": 1.4792516231536865,
6752
+ "logps/chosen": -27.049495697021484,
6753
+ "logps/rejected": -563.0673217773438,
6754
+ "loss": 14447.3891,
6755
+ "rewards/accuracies": 1.0,
6756
+ "rewards/chosen": 0.19703736901283264,
6757
+ "rewards/margins": 0.5409034490585327,
6758
+ "rewards/rejected": -0.3438660502433777,
6759
+ "step": 4490
6760
+ },
6761
+ {
6762
+ "epoch": 22.830379746835444,
6763
+ "grad_norm": 672757.480231201,
6764
+ "learning_rate": 2.0307113757442807e-07,
6765
+ "logits/chosen": 0.13832028210163116,
6766
+ "logits/rejected": 0.6534411907196045,
6767
+ "logps/chosen": -48.682350158691406,
6768
+ "logps/rejected": -608.1444091796875,
6769
+ "loss": 13146.0031,
6770
+ "rewards/accuracies": 1.0,
6771
+ "rewards/chosen": 0.20029571652412415,
6772
+ "rewards/margins": 0.5563368797302246,
6773
+ "rewards/rejected": -0.35604116320610046,
6774
+ "step": 4500
6775
+ },
6776
+ {
6777
+ "epoch": 22.88101265822785,
6778
+ "grad_norm": 369986.02432868385,
6779
+ "learning_rate": 2.0228768411156378e-07,
6780
+ "logits/chosen": -1.8307338953018188,
6781
+ "logits/rejected": -1.2095929384231567,
6782
+ "logps/chosen": -45.19769287109375,
6783
+ "logps/rejected": -578.0750122070312,
6784
+ "loss": 14329.1656,
6785
+ "rewards/accuracies": 1.0,
6786
+ "rewards/chosen": 0.1950627863407135,
6787
+ "rewards/margins": 0.5377144813537598,
6788
+ "rewards/rejected": -0.3426516652107239,
6789
+ "step": 4510
6790
+ },
6791
+ {
6792
+ "epoch": 22.931645569620255,
6793
+ "grad_norm": 699107.7543808775,
6794
+ "learning_rate": 2.0150423064869946e-07,
6795
+ "logits/chosen": -0.08685462176799774,
6796
+ "logits/rejected": 0.9019424319267273,
6797
+ "logps/chosen": -45.735721588134766,
6798
+ "logps/rejected": -577.2432861328125,
6799
+ "loss": 13698.3734,
6800
+ "rewards/accuracies": 1.0,
6801
+ "rewards/chosen": 0.19573049247264862,
6802
+ "rewards/margins": 0.5354525446891785,
6803
+ "rewards/rejected": -0.33972200751304626,
6804
+ "step": 4520
6805
+ },
6806
+ {
6807
+ "epoch": 22.98227848101266,
6808
+ "grad_norm": 406293.788277243,
6809
+ "learning_rate": 2.0072077718583516e-07,
6810
+ "logits/chosen": -0.8599483370780945,
6811
+ "logits/rejected": 0.11351003497838974,
6812
+ "logps/chosen": -26.267419815063477,
6813
+ "logps/rejected": -555.6368408203125,
6814
+ "loss": 13659.9219,
6815
+ "rewards/accuracies": 0.987500011920929,
6816
+ "rewards/chosen": 0.19417151808738708,
6817
+ "rewards/margins": 0.5340765714645386,
6818
+ "rewards/rejected": -0.3399050235748291,
6819
+ "step": 4530
6820
  }
6821
  ],
6822
  "logging_steps": 10,