cat-searcher commited on
Commit
751615c
1 Parent(s): 6c3748b

Training in progress, epoch 26, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step5323/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step5323/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step5323/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step5323/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step5323/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step5323/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step5323/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step5323/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step5323/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step5323/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step5323/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step5323/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step5323/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step5323/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step5323/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step5323/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5323/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c45275e2a07c3fe2dc76fe6e73907bebea1979e93eb54798352d7b941cf43860
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486f46a74f1844e305cc361999065e96a7130393dfebec4b0094877765872ebc
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9630faa6d05689defa38b0245a3f615a6c13341e19512728467a7e6575fede34
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fa91b706403340f1aaefec6e03fdb9606f4a8f84a75804e9e367829c295894
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81d00ca697e99f080ab9958e2463d29675f6ed93f730eda018d9cd52e86ef7c8
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1b65a00e71fbce09d38576aacce4e8c9153cab3f83e4cf23541a510e9a745d
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd9716daeac1afa3df409fdfeb053de9f6f4b7bbceb88edc67bb46fd92e0dc3
3
+ size 2506176112
last-checkpoint/global_step5323/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70645d8170cbe47f99afdcbbb9279d5ae0713fcad4b1bd7153c7daa8a37c2bfe
3
+ size 2506176112
last-checkpoint/global_step5323/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99a3c57f997def78498996acd45f3920b4af0cb94fbc24f587cdf28bc39b9457
3
+ size 85570
last-checkpoint/global_step5323/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc24fd6f74ecfb84bbc3c6dab05a7aae1274f2e9ad47abe5493c3e123553731
3
+ size 85506
last-checkpoint/global_step5323/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f1287a2d2a99286d11b9540bc49204134273c167db0a9d6edeaa44fa332572
3
+ size 85506
last-checkpoint/global_step5323/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2a202c58b8057221047d6237bc1f5ced77610d3f7c878014e2f8943c790e1c
3
+ size 85506
last-checkpoint/global_step5323/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0047739a53f9cb0ef0b3055e1f63b9653f9b1ba899d37ed3957acd4e26e82e4c
3
+ size 85506
last-checkpoint/global_step5323/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24287a46a8f00f6e19d80c35a7b42b0aa65577e16a1b2b6768ab05f7fd9fe8ba
3
+ size 85506
last-checkpoint/global_step5323/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d925e1f0f3cd831f0c87388c06efee756b87131f09b06b7b8071a926f1eceb94
3
+ size 85506
last-checkpoint/global_step5323/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af10b4e61e9e339c2ad41dbbee15d3e6bdf0944539c59256058631fe81b49bd
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5126
 
1
+ global_step5323
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab3b8a6a1f14b87eddcd6889e77ca358471e584db04f6d14f50ce0ca4a94e8e2
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e3dca496abe618138acc972547a2585aebd68888a0188157e082fac3e57ec8
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e312815d6589c577d3a29d19ba5e8956d7c9080646ba7b35c3708e364eb8f55
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ec3ac44a0c33cccc1363b0aaa3a4e381f89d397a266fbd331b9600bb0caba6
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dd0b885528e55ec25b01a487faef7810481e858198ac24b76aedb3688770c06
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c49abc3bdedbec1fc8e1028ef422150f19ee7470d7b542e1ad8869fc044d2af
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a451e60f451c0ed06e4b0d619be9f7981c5af29ba16d797996e102e4d1fd7514
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df12ca4106ff0831785a55b5da88f6c86f6f67bd3d09b2dced4f20b539b14f72
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff4027794d1d9c71e13291169d32d384e8f6078c931f43db354471cbc57d8639
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05fc0786faff729a3a1582f98b806b68d4f0b76aebb25cbad4431b73176b11c1
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40925f5ac9883b8dfe22197d58f18429503331adeff91ce58e72d56b5094171a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3117e1218a2dd3f7f8c516a840af48f6b93660d852cca124269f78c21f8577c
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0890e080f98bcfb81036d2db959cc45209e8c2f67a0dccde184473488395153
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdaa30c82476bf6a65e4eb9ca2ae7b95f1b38f41a6f5b2f1cbdda9af86a4a7a0
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e4b714cb76d14f84bc59d5d9ba706908caddc95de8f17bfbeb87cbce486cb3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cdde6931807139efa184e8a98108b74bb05730bc511336966b254b68dc93ee
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5c095f0a000582673860ef2dcfa50f1ba3d6bf9b31cb0a66349b60d581ecbe3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2268136932c55b3857d38c7cf3fc4bd3cdad532c156b9addebc6d26374374a
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e625b7623f260d65cb1001beba6e4d0df9ed61b3f496d3e767f280a6b73cde8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56cd9a502015b79e0ab94c92a04bd96c99aaf79ef8d64bf81d81eb702c10c2a8
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2a6611856f3b4f599b410c5f2fa04b4cd6d782a4bd921f15735728fc381869d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf59f6709e5846ccbaea01389f6f540264ed11dfc9a9817626a436b27c277a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.0,
5
  "eval_steps": 100,
6
- "global_step": 5126,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7702,6 +7702,306 @@
7702
  "rewards/margins": 0.5664650797843933,
7703
  "rewards/rejected": -0.3456335663795471,
7704
  "step": 5120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7705
  }
7706
  ],
7707
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 26.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 5323,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7702
  "rewards/margins": 0.5664650797843933,
7703
  "rewards/rejected": -0.3456335663795471,
7704
  "step": 5120
7705
+ },
7706
+ {
7707
+ "epoch": 26.020253164556962,
7708
+ "grad_norm": 437459.9001544906,
7709
+ "learning_rate": 1.537135694139768e-07,
7710
+ "logits/chosen": -1.4238073825836182,
7711
+ "logits/rejected": -1.5467934608459473,
7712
+ "logps/chosen": -32.6416015625,
7713
+ "logps/rejected": -585.3292236328125,
7714
+ "loss": 12902.432,
7715
+ "rewards/accuracies": 0.9750000238418579,
7716
+ "rewards/chosen": 0.2033694088459015,
7717
+ "rewards/margins": 0.5521097779273987,
7718
+ "rewards/rejected": -0.3487403094768524,
7719
+ "step": 5130
7720
+ },
7721
+ {
7722
+ "epoch": 26.070886075949367,
7723
+ "grad_norm": 461726.55326627713,
7724
+ "learning_rate": 1.529301159511125e-07,
7725
+ "logits/chosen": -1.0017569065093994,
7726
+ "logits/rejected": -0.677699089050293,
7727
+ "logps/chosen": -33.68021011352539,
7728
+ "logps/rejected": -586.3854370117188,
7729
+ "loss": 12206.9266,
7730
+ "rewards/accuracies": 1.0,
7731
+ "rewards/chosen": 0.20325596630573273,
7732
+ "rewards/margins": 0.5559948682785034,
7733
+ "rewards/rejected": -0.35273900628089905,
7734
+ "step": 5140
7735
+ },
7736
+ {
7737
+ "epoch": 26.121518987341773,
7738
+ "grad_norm": 223445.63437535468,
7739
+ "learning_rate": 1.521466624882482e-07,
7740
+ "logits/chosen": -1.4141124486923218,
7741
+ "logits/rejected": -0.6017986536026001,
7742
+ "logps/chosen": -29.84651756286621,
7743
+ "logps/rejected": -580.4603271484375,
7744
+ "loss": 12104.9586,
7745
+ "rewards/accuracies": 1.0,
7746
+ "rewards/chosen": 0.20142440497875214,
7747
+ "rewards/margins": 0.5523154139518738,
7748
+ "rewards/rejected": -0.35089102387428284,
7749
+ "step": 5150
7750
+ },
7751
+ {
7752
+ "epoch": 26.172151898734178,
7753
+ "grad_norm": 232119.6879833388,
7754
+ "learning_rate": 1.5136320902538388e-07,
7755
+ "logits/chosen": -0.7647647857666016,
7756
+ "logits/rejected": -0.6229702830314636,
7757
+ "logps/chosen": -34.456138610839844,
7758
+ "logps/rejected": -574.6653442382812,
7759
+ "loss": 12524.6094,
7760
+ "rewards/accuracies": 0.987500011920929,
7761
+ "rewards/chosen": 0.2094695270061493,
7762
+ "rewards/margins": 0.539394199848175,
7763
+ "rewards/rejected": -0.32992464303970337,
7764
+ "step": 5160
7765
+ },
7766
+ {
7767
+ "epoch": 26.222784810126583,
7768
+ "grad_norm": 478076.71264027077,
7769
+ "learning_rate": 1.5057975556251958e-07,
7770
+ "logits/chosen": -2.230821132659912,
7771
+ "logits/rejected": -2.297372579574585,
7772
+ "logps/chosen": -29.98971176147461,
7773
+ "logps/rejected": -588.6803588867188,
7774
+ "loss": 12188.2758,
7775
+ "rewards/accuracies": 1.0,
7776
+ "rewards/chosen": 0.21123230457305908,
7777
+ "rewards/margins": 0.5585904121398926,
7778
+ "rewards/rejected": -0.3473580479621887,
7779
+ "step": 5170
7780
+ },
7781
+ {
7782
+ "epoch": 26.27341772151899,
7783
+ "grad_norm": 287477.38205394626,
7784
+ "learning_rate": 1.4979630209965526e-07,
7785
+ "logits/chosen": 0.2648393511772156,
7786
+ "logits/rejected": 1.2140284776687622,
7787
+ "logps/chosen": -26.058353424072266,
7788
+ "logps/rejected": -575.137939453125,
7789
+ "loss": 13004.7297,
7790
+ "rewards/accuracies": 0.987500011920929,
7791
+ "rewards/chosen": 0.20219556987285614,
7792
+ "rewards/margins": 0.5560811758041382,
7793
+ "rewards/rejected": -0.35388559103012085,
7794
+ "step": 5180
7795
+ },
7796
+ {
7797
+ "epoch": 26.324050632911394,
7798
+ "grad_norm": 448400.95809014083,
7799
+ "learning_rate": 1.4901284863679097e-07,
7800
+ "logits/chosen": -0.47244685888290405,
7801
+ "logits/rejected": 0.34987983107566833,
7802
+ "logps/chosen": -47.41560745239258,
7803
+ "logps/rejected": -588.7420654296875,
7804
+ "loss": 12302.9594,
7805
+ "rewards/accuracies": 0.987500011920929,
7806
+ "rewards/chosen": 0.20305314660072327,
7807
+ "rewards/margins": 0.5488015413284302,
7808
+ "rewards/rejected": -0.3457483947277069,
7809
+ "step": 5190
7810
+ },
7811
+ {
7812
+ "epoch": 26.374683544303796,
7813
+ "grad_norm": 290914.6200870196,
7814
+ "learning_rate": 1.4822939517392665e-07,
7815
+ "logits/chosen": -1.5243618488311768,
7816
+ "logits/rejected": -0.6017967462539673,
7817
+ "logps/chosen": -33.99588394165039,
7818
+ "logps/rejected": -590.6222534179688,
7819
+ "loss": 12878.768,
7820
+ "rewards/accuracies": 1.0,
7821
+ "rewards/chosen": 0.20388083159923553,
7822
+ "rewards/margins": 0.5592610836029053,
7823
+ "rewards/rejected": -0.35538023710250854,
7824
+ "step": 5200
7825
+ },
7826
+ {
7827
+ "epoch": 26.4253164556962,
7828
+ "grad_norm": 715122.6528862711,
7829
+ "learning_rate": 1.4744594171106235e-07,
7830
+ "logits/chosen": -1.3308216333389282,
7831
+ "logits/rejected": -1.0356947183609009,
7832
+ "logps/chosen": -29.031147003173828,
7833
+ "logps/rejected": -595.821533203125,
7834
+ "loss": 12466.6016,
7835
+ "rewards/accuracies": 0.987500011920929,
7836
+ "rewards/chosen": 0.212965726852417,
7837
+ "rewards/margins": 0.5688080191612244,
7838
+ "rewards/rejected": -0.3558422923088074,
7839
+ "step": 5210
7840
+ },
7841
+ {
7842
+ "epoch": 26.475949367088607,
7843
+ "grad_norm": 266006.16874610144,
7844
+ "learning_rate": 1.4666248824819803e-07,
7845
+ "logits/chosen": -0.2248738706111908,
7846
+ "logits/rejected": 0.37806427478790283,
7847
+ "logps/chosen": -35.31621551513672,
7848
+ "logps/rejected": -578.3462524414062,
7849
+ "loss": 12517.1375,
7850
+ "rewards/accuracies": 0.987500011920929,
7851
+ "rewards/chosen": 0.20571331679821014,
7852
+ "rewards/margins": 0.5493656396865845,
7853
+ "rewards/rejected": -0.3436523675918579,
7854
+ "step": 5220
7855
+ },
7856
+ {
7857
+ "epoch": 26.526582278481012,
7858
+ "grad_norm": 296131.0633758982,
7859
+ "learning_rate": 1.4587903478533377e-07,
7860
+ "logits/chosen": -3.198024272918701,
7861
+ "logits/rejected": -2.1562371253967285,
7862
+ "logps/chosen": -24.365009307861328,
7863
+ "logps/rejected": -589.4319458007812,
7864
+ "loss": 12258.343,
7865
+ "rewards/accuracies": 1.0,
7866
+ "rewards/chosen": 0.2150738686323166,
7867
+ "rewards/margins": 0.5671111345291138,
7868
+ "rewards/rejected": -0.352037250995636,
7869
+ "step": 5230
7870
+ },
7871
+ {
7872
+ "epoch": 26.577215189873417,
7873
+ "grad_norm": 310894.2430575026,
7874
+ "learning_rate": 1.4509558132246945e-07,
7875
+ "logits/chosen": 1.5686824321746826,
7876
+ "logits/rejected": 1.7765287160873413,
7877
+ "logps/chosen": -25.171403884887695,
7878
+ "logps/rejected": -559.4937744140625,
7879
+ "loss": 13451.6969,
7880
+ "rewards/accuracies": 0.9750000238418579,
7881
+ "rewards/chosen": 0.18740372359752655,
7882
+ "rewards/margins": 0.5345771312713623,
7883
+ "rewards/rejected": -0.3471735119819641,
7884
+ "step": 5240
7885
+ },
7886
+ {
7887
+ "epoch": 26.627848101265823,
7888
+ "grad_norm": 273385.34239455353,
7889
+ "learning_rate": 1.4431212785960515e-07,
7890
+ "logits/chosen": 0.779743492603302,
7891
+ "logits/rejected": 0.5761479139328003,
7892
+ "logps/chosen": -24.774072647094727,
7893
+ "logps/rejected": -552.44775390625,
7894
+ "loss": 13444.4844,
7895
+ "rewards/accuracies": 0.987500011920929,
7896
+ "rewards/chosen": 0.1935535967350006,
7897
+ "rewards/margins": 0.5272840857505798,
7898
+ "rewards/rejected": -0.3337305188179016,
7899
+ "step": 5250
7900
+ },
7901
+ {
7902
+ "epoch": 26.678481012658228,
7903
+ "grad_norm": 292701.1225306038,
7904
+ "learning_rate": 1.4352867439674083e-07,
7905
+ "logits/chosen": -2.055417060852051,
7906
+ "logits/rejected": -1.5558016300201416,
7907
+ "logps/chosen": -34.77043151855469,
7908
+ "logps/rejected": -578.9781494140625,
7909
+ "loss": 12698.0,
7910
+ "rewards/accuracies": 0.987500011920929,
7911
+ "rewards/chosen": 0.20298035442829132,
7912
+ "rewards/margins": 0.543838381767273,
7913
+ "rewards/rejected": -0.3408580422401428,
7914
+ "step": 5260
7915
+ },
7916
+ {
7917
+ "epoch": 26.729113924050633,
7918
+ "grad_norm": 274251.20733361214,
7919
+ "learning_rate": 1.4274522093387654e-07,
7920
+ "logits/chosen": -0.7560523152351379,
7921
+ "logits/rejected": -0.4179345667362213,
7922
+ "logps/chosen": -35.23884201049805,
7923
+ "logps/rejected": -578.6085815429688,
7924
+ "loss": 12311.3711,
7925
+ "rewards/accuracies": 0.9624999761581421,
7926
+ "rewards/chosen": 0.19854024052619934,
7927
+ "rewards/margins": 0.5389177799224854,
7928
+ "rewards/rejected": -0.3403775095939636,
7929
+ "step": 5270
7930
+ },
7931
+ {
7932
+ "epoch": 26.77974683544304,
7933
+ "grad_norm": 540941.0207588519,
7934
+ "learning_rate": 1.4196176747101222e-07,
7935
+ "logits/chosen": -2.318772792816162,
7936
+ "logits/rejected": -2.123133420944214,
7937
+ "logps/chosen": -32.09846878051758,
7938
+ "logps/rejected": -575.356201171875,
7939
+ "loss": 12401.8453,
7940
+ "rewards/accuracies": 0.987500011920929,
7941
+ "rewards/chosen": 0.2020426243543625,
7942
+ "rewards/margins": 0.5411572456359863,
7943
+ "rewards/rejected": -0.33911454677581787,
7944
+ "step": 5280
7945
+ },
7946
+ {
7947
+ "epoch": 26.830379746835444,
7948
+ "grad_norm": 441696.9404493494,
7949
+ "learning_rate": 1.4117831400814792e-07,
7950
+ "logits/chosen": -2.1685147285461426,
7951
+ "logits/rejected": -1.5242393016815186,
7952
+ "logps/chosen": -22.024688720703125,
7953
+ "logps/rejected": -543.53076171875,
7954
+ "loss": 13786.8516,
7955
+ "rewards/accuracies": 0.987500011920929,
7956
+ "rewards/chosen": 0.1891135424375534,
7957
+ "rewards/margins": 0.523938775062561,
7958
+ "rewards/rejected": -0.33482515811920166,
7959
+ "step": 5290
7960
+ },
7961
+ {
7962
+ "epoch": 26.88101265822785,
7963
+ "grad_norm": 328168.9416709712,
7964
+ "learning_rate": 1.403948605452836e-07,
7965
+ "logits/chosen": -2.390831708908081,
7966
+ "logits/rejected": -1.6773532629013062,
7967
+ "logps/chosen": -37.75607681274414,
7968
+ "logps/rejected": -572.0828247070312,
7969
+ "loss": 13110.5859,
7970
+ "rewards/accuracies": 0.9750000238418579,
7971
+ "rewards/chosen": 0.20247995853424072,
7972
+ "rewards/margins": 0.539789617061615,
7973
+ "rewards/rejected": -0.33730968832969666,
7974
+ "step": 5300
7975
+ },
7976
+ {
7977
+ "epoch": 26.931645569620255,
7978
+ "grad_norm": 342604.3694047161,
7979
+ "learning_rate": 1.396114070824193e-07,
7980
+ "logits/chosen": -0.8812211751937866,
7981
+ "logits/rejected": -0.7407415509223938,
7982
+ "logps/chosen": -31.870285034179688,
7983
+ "logps/rejected": -576.1883544921875,
7984
+ "loss": 12753.4875,
7985
+ "rewards/accuracies": 1.0,
7986
+ "rewards/chosen": 0.2047419548034668,
7987
+ "rewards/margins": 0.5459688901901245,
7988
+ "rewards/rejected": -0.3412269353866577,
7989
+ "step": 5310
7990
+ },
7991
+ {
7992
+ "epoch": 26.98227848101266,
7993
+ "grad_norm": 327636.2077886267,
7994
+ "learning_rate": 1.38827953619555e-07,
7995
+ "logits/chosen": -1.1729528903961182,
7996
+ "logits/rejected": -0.7522214651107788,
7997
+ "logps/chosen": -41.73408508300781,
7998
+ "logps/rejected": -603.9337158203125,
7999
+ "loss": 11920.0102,
8000
+ "rewards/accuracies": 0.987500011920929,
8001
+ "rewards/chosen": 0.22109094262123108,
8002
+ "rewards/margins": 0.5666243433952332,
8003
+ "rewards/rejected": -0.3455334007740021,
8004
+ "step": 5320
8005
  }
8006
  ],
8007
  "logging_steps": 10,