cat-searcher commited on
Commit
ec5f551
1 Parent(s): cb82dde

Training in progress, epoch 28, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step5521/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step5521/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step5521/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step5521/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step5521/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step5521/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step5521/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step5521/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step5521/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step5521/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step5521/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step5521/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step5521/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step5521/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step5521/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step5521/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5521/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb574e73c858813a928ff498dcd92efcd52d39bf6be93eb19f482e11a521d46
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8e88ff3e1e0d27d0f652203c6c8b5a774c4bf0d489719cc7fc6e9de2bb46e3
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fb30080c8bf6107859398c3b1e15ac6ac4565566849c964afcb6f354ea1ae5
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35da7feb29692b199d80e67730abb3983caef4e1b0382cf78d411d246196f9d9
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:591123dfe28879cbe098dd6659b52eaae80c7bb2771ec06d6a603b62bc63fe74
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e642641e957b049f6002a70fb88a66fc527f94b80a7cd78722a8dd7875d89216
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8368bffa1785b2545bc93fc7f883c6b08555d632fc450f6d98baf24aeb7087e
3
+ size 2506176112
last-checkpoint/global_step5521/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3fd9da9c40302bee2f82faaf6129307040541c2bf197edb02c875734a4cc4e
3
+ size 2506176112
last-checkpoint/global_step5521/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff0d15d674a1437beca49a16157757d5b4baf26599e5166ebed4540b1a7332e2
3
+ size 85570
last-checkpoint/global_step5521/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2affdfc29fcb3d39a58b25db9a58d2a0625bfe72f9991c515efa1f965b7fe8e7
3
+ size 85506
last-checkpoint/global_step5521/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c43225a1f5e98e8c5e00bffee1e9bfe34bcffd0cf304e1ca11f1e5b9996a51ac
3
+ size 85506
last-checkpoint/global_step5521/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7356cccf7500d912fc837459640a05ccf3bb6a90b783e55b70e69002bd44e7fb
3
+ size 85506
last-checkpoint/global_step5521/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdc283d704de0c72ee00ca73a9a7e7d1a1476f4ee3267871f8c87791768ab0f
3
+ size 85506
last-checkpoint/global_step5521/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb8cc949be997c8ca9a59cb0ce35e0178d2683d8c9003245cb86046f76d968f
3
+ size 85506
last-checkpoint/global_step5521/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be11c9817cfbbf742d8f6d97cd03a3c9ca846924fe4c3d497537572d49aebeae
3
+ size 85506
last-checkpoint/global_step5521/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744b41b26aaa23988bf6b5023d6e71f632a5c5d64c7b6d8b845694193ec91bed
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5323
 
1
+ global_step5521
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48e3dca496abe618138acc972547a2585aebd68888a0188157e082fac3e57ec8
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f4a9ae06898314b6bbf8ad9fca6ca16dea158bd29ecc0506863d601b8345f4
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37ec3ac44a0c33cccc1363b0aaa3a4e381f89d397a266fbd331b9600bb0caba6
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4832308f86af667638feea3c3365de459978e8c0f49e3d367cf4d5379875a12f
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c49abc3bdedbec1fc8e1028ef422150f19ee7470d7b542e1ad8869fc044d2af
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038329a940174c7998542fe9a3c903ee0c21d0a2351959a1cb53ac9af3988f89
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df12ca4106ff0831785a55b5da88f6c86f6f67bd3d09b2dced4f20b539b14f72
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2eb62cd1e31d2c95a28eaadd97a496b27751983378626efc3ee2a53ae743ff
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05fc0786faff729a3a1582f98b806b68d4f0b76aebb25cbad4431b73176b11c1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c76f1ee6b7bfb2e7a6ca68f028fe40297bc56fa8287959be7a51545af2a824b
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3117e1218a2dd3f7f8c516a840af48f6b93660d852cca124269f78c21f8577c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4716e0a98e24ecef0261fa45d53aa531ce14f99b4992682e0257f7c483a80d
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdaa30c82476bf6a65e4eb9ca2ae7b95f1b38f41a6f5b2f1cbdda9af86a4a7a0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:732644bf9682f11fe70f041a2575d5822769847a3b56320e6cfdbd3914eb98f9
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cdde6931807139efa184e8a98108b74bb05730bc511336966b254b68dc93ee
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d90978ff8f9057d8587ec1258de11f4d5531805002e65d952f5725b93f51be
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2268136932c55b3857d38c7cf3fc4bd3cdad532c156b9addebc6d26374374a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a013a277003a33e8bb2418ae66c1488282f502fc920260b5fc9f337843415ab2
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56cd9a502015b79e0ab94c92a04bd96c99aaf79ef8d64bf81d81eb702c10c2a8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f8887e423cbfbb140f966733dae63123515ed5345b3e075eb892b0646282c8
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdf59f6709e5846ccbaea01389f6f540264ed11dfc9a9817626a436b27c277a6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d39cfa3808e402183de5c305fcd5a81f2c9ffede37076a3856931095792cc0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 5323,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8002,6 +8002,306 @@
8002
  "rewards/margins": 0.5666243433952332,
8003
  "rewards/rejected": -0.3455334007740021,
8004
  "step": 5320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8005
  }
8006
  ],
8007
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.0,
5
  "eval_steps": 100,
6
+ "global_step": 5521,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8002
  "rewards/margins": 0.5666243433952332,
8003
  "rewards/rejected": -0.3455334007740021,
8004
  "step": 5320
8005
+ },
8006
+ {
8007
+ "epoch": 27.03291139240506,
8008
+ "grad_norm": 306486.1159229183,
8009
+ "learning_rate": 1.380445001566907e-07,
8010
+ "logits/chosen": -0.2942148447036743,
8011
+ "logits/rejected": 0.29008275270462036,
8012
+ "logps/chosen": -28.0673770904541,
8013
+ "logps/rejected": -591.0224609375,
8014
+ "loss": 12393.8094,
8015
+ "rewards/accuracies": 1.0,
8016
+ "rewards/chosen": 0.21129322052001953,
8017
+ "rewards/margins": 0.562667965888977,
8018
+ "rewards/rejected": -0.35137468576431274,
8019
+ "step": 5330
8020
+ },
8021
+ {
8022
+ "epoch": 27.083544303797467,
8023
+ "grad_norm": 291301.0379049935,
8024
+ "learning_rate": 1.3726104669382637e-07,
8025
+ "logits/chosen": -0.04897233098745346,
8026
+ "logits/rejected": 0.2625051736831665,
8027
+ "logps/chosen": -28.288782119750977,
8028
+ "logps/rejected": -600.4498291015625,
8029
+ "loss": 12295.5109,
8030
+ "rewards/accuracies": 1.0,
8031
+ "rewards/chosen": 0.20883643627166748,
8032
+ "rewards/margins": 0.5699074864387512,
8033
+ "rewards/rejected": -0.36107105016708374,
8034
+ "step": 5340
8035
+ },
8036
+ {
8037
+ "epoch": 27.134177215189872,
8038
+ "grad_norm": 336826.5711799587,
8039
+ "learning_rate": 1.3647759323096208e-07,
8040
+ "logits/chosen": -3.574831008911133,
8041
+ "logits/rejected": -3.1615543365478516,
8042
+ "logps/chosen": -28.667476654052734,
8043
+ "logps/rejected": -610.046630859375,
8044
+ "loss": 12205.2984,
8045
+ "rewards/accuracies": 1.0,
8046
+ "rewards/chosen": 0.21462281048297882,
8047
+ "rewards/margins": 0.5811195373535156,
8048
+ "rewards/rejected": -0.3664968013763428,
8049
+ "step": 5350
8050
+ },
8051
+ {
8052
+ "epoch": 27.184810126582278,
8053
+ "grad_norm": 253108.22870561373,
8054
+ "learning_rate": 1.3569413976809776e-07,
8055
+ "logits/chosen": -1.275773048400879,
8056
+ "logits/rejected": -0.2816539406776428,
8057
+ "logps/chosen": -27.488027572631836,
8058
+ "logps/rejected": -576.04443359375,
8059
+ "loss": 12752.6922,
8060
+ "rewards/accuracies": 1.0,
8061
+ "rewards/chosen": 0.20755627751350403,
8062
+ "rewards/margins": 0.5584858059883118,
8063
+ "rewards/rejected": -0.35092949867248535,
8064
+ "step": 5360
8065
+ },
8066
+ {
8067
+ "epoch": 27.235443037974683,
8068
+ "grad_norm": 378986.1297500305,
8069
+ "learning_rate": 1.3491068630523347e-07,
8070
+ "logits/chosen": -0.7276864051818848,
8071
+ "logits/rejected": -0.2372014820575714,
8072
+ "logps/chosen": -27.652713775634766,
8073
+ "logps/rejected": -574.1866455078125,
8074
+ "loss": 12491.7875,
8075
+ "rewards/accuracies": 0.987500011920929,
8076
+ "rewards/chosen": 0.19472074508666992,
8077
+ "rewards/margins": 0.5458452701568604,
8078
+ "rewards/rejected": -0.35112449526786804,
8079
+ "step": 5370
8080
+ },
8081
+ {
8082
+ "epoch": 27.28607594936709,
8083
+ "grad_norm": 355029.2404666128,
8084
+ "learning_rate": 1.3412723284236915e-07,
8085
+ "logits/chosen": 0.03503293916583061,
8086
+ "logits/rejected": 0.09463844448328018,
8087
+ "logps/chosen": -20.01060676574707,
8088
+ "logps/rejected": -571.03369140625,
8089
+ "loss": 12906.6961,
8090
+ "rewards/accuracies": 1.0,
8091
+ "rewards/chosen": 0.1995813399553299,
8092
+ "rewards/margins": 0.5492128133773804,
8093
+ "rewards/rejected": -0.3496314287185669,
8094
+ "step": 5380
8095
+ },
8096
+ {
8097
+ "epoch": 27.336708860759494,
8098
+ "grad_norm": 174005.9141855672,
8099
+ "learning_rate": 1.3334377937950485e-07,
8100
+ "logits/chosen": -1.0307856798171997,
8101
+ "logits/rejected": -0.8787088394165039,
8102
+ "logps/chosen": -27.538768768310547,
8103
+ "logps/rejected": -584.1838989257812,
8104
+ "loss": 12431.2086,
8105
+ "rewards/accuracies": 0.987500011920929,
8106
+ "rewards/chosen": 0.20768491923809052,
8107
+ "rewards/margins": 0.558754026889801,
8108
+ "rewards/rejected": -0.3510691225528717,
8109
+ "step": 5390
8110
+ },
8111
+ {
8112
+ "epoch": 27.3873417721519,
8113
+ "grad_norm": 333107.0988957162,
8114
+ "learning_rate": 1.3256032591664053e-07,
8115
+ "logits/chosen": 0.49966010451316833,
8116
+ "logits/rejected": 1.4367059469223022,
8117
+ "logps/chosen": -22.20120620727539,
8118
+ "logps/rejected": -573.7286987304688,
8119
+ "loss": 12624.7586,
8120
+ "rewards/accuracies": 1.0,
8121
+ "rewards/chosen": 0.19961531460285187,
8122
+ "rewards/margins": 0.5520228743553162,
8123
+ "rewards/rejected": -0.35240763425827026,
8124
+ "step": 5400
8125
+ },
8126
+ {
8127
+ "epoch": 27.437974683544304,
8128
+ "grad_norm": 189125.20245582235,
8129
+ "learning_rate": 1.3177687245377624e-07,
8130
+ "logits/chosen": -0.491058886051178,
8131
+ "logits/rejected": -0.4180983603000641,
8132
+ "logps/chosen": -24.668697357177734,
8133
+ "logps/rejected": -574.7880249023438,
8134
+ "loss": 12818.4906,
8135
+ "rewards/accuracies": 1.0,
8136
+ "rewards/chosen": 0.203078955411911,
8137
+ "rewards/margins": 0.5475345849990845,
8138
+ "rewards/rejected": -0.34445568919181824,
8139
+ "step": 5410
8140
+ },
8141
+ {
8142
+ "epoch": 27.48860759493671,
8143
+ "grad_norm": 255453.1741505276,
8144
+ "learning_rate": 1.3099341899091192e-07,
8145
+ "logits/chosen": -1.3983430862426758,
8146
+ "logits/rejected": -1.0761035680770874,
8147
+ "logps/chosen": -28.14908790588379,
8148
+ "logps/rejected": -567.4022216796875,
8149
+ "loss": 12266.7156,
8150
+ "rewards/accuracies": 0.987500011920929,
8151
+ "rewards/chosen": 0.19684790074825287,
8152
+ "rewards/margins": 0.5404728055000305,
8153
+ "rewards/rejected": -0.34362491965293884,
8154
+ "step": 5420
8155
+ },
8156
+ {
8157
+ "epoch": 27.539240506329115,
8158
+ "grad_norm": 199249.17490991156,
8159
+ "learning_rate": 1.3020996552804765e-07,
8160
+ "logits/chosen": -1.3831968307495117,
8161
+ "logits/rejected": -0.9957733154296875,
8162
+ "logps/chosen": -34.38856887817383,
8163
+ "logps/rejected": -583.5364379882812,
8164
+ "loss": 12353.943,
8165
+ "rewards/accuracies": 0.987500011920929,
8166
+ "rewards/chosen": 0.20319747924804688,
8167
+ "rewards/margins": 0.5525364875793457,
8168
+ "rewards/rejected": -0.34933900833129883,
8169
+ "step": 5430
8170
+ },
8171
+ {
8172
+ "epoch": 27.58987341772152,
8173
+ "grad_norm": 372801.7448533588,
8174
+ "learning_rate": 1.2942651206518333e-07,
8175
+ "logits/chosen": 0.7253493070602417,
8176
+ "logits/rejected": 0.6416251063346863,
8177
+ "logps/chosen": -36.44821548461914,
8178
+ "logps/rejected": -557.3819580078125,
8179
+ "loss": 12762.9742,
8180
+ "rewards/accuracies": 0.9750000238418579,
8181
+ "rewards/chosen": 0.19811172783374786,
8182
+ "rewards/margins": 0.5241624116897583,
8183
+ "rewards/rejected": -0.32605066895484924,
8184
+ "step": 5440
8185
+ },
8186
+ {
8187
+ "epoch": 27.640506329113926,
8188
+ "grad_norm": 250437.30987597498,
8189
+ "learning_rate": 1.2864305860231904e-07,
8190
+ "logits/chosen": -1.6367158889770508,
8191
+ "logits/rejected": -0.9662375450134277,
8192
+ "logps/chosen": -32.858455657958984,
8193
+ "logps/rejected": -566.0185546875,
8194
+ "loss": 13013.8914,
8195
+ "rewards/accuracies": 0.987500011920929,
8196
+ "rewards/chosen": 0.20383331179618835,
8197
+ "rewards/margins": 0.5383815169334412,
8198
+ "rewards/rejected": -0.3345482349395752,
8199
+ "step": 5450
8200
+ },
8201
+ {
8202
+ "epoch": 27.691139240506327,
8203
+ "grad_norm": 395640.3468149828,
8204
+ "learning_rate": 1.2785960513945471e-07,
8205
+ "logits/chosen": -1.0696049928665161,
8206
+ "logits/rejected": -0.7029746770858765,
8207
+ "logps/chosen": -27.334697723388672,
8208
+ "logps/rejected": -572.7042846679688,
8209
+ "loss": 12608.9328,
8210
+ "rewards/accuracies": 1.0,
8211
+ "rewards/chosen": 0.2039167582988739,
8212
+ "rewards/margins": 0.5491318106651306,
8213
+ "rewards/rejected": -0.3452150225639343,
8214
+ "step": 5460
8215
+ },
8216
+ {
8217
+ "epoch": 27.741772151898733,
8218
+ "grad_norm": 737045.8738711793,
8219
+ "learning_rate": 1.2707615167659042e-07,
8220
+ "logits/chosen": -1.4398880004882812,
8221
+ "logits/rejected": -0.3085852265357971,
8222
+ "logps/chosen": -20.763835906982422,
8223
+ "logps/rejected": -557.7874755859375,
8224
+ "loss": 12662.9484,
8225
+ "rewards/accuracies": 1.0,
8226
+ "rewards/chosen": 0.20201142132282257,
8227
+ "rewards/margins": 0.5384231209754944,
8228
+ "rewards/rejected": -0.336411714553833,
8229
+ "step": 5470
8230
+ },
8231
+ {
8232
+ "epoch": 27.792405063291138,
8233
+ "grad_norm": 286929.61277431983,
8234
+ "learning_rate": 1.262926982137261e-07,
8235
+ "logits/chosen": -0.6262455582618713,
8236
+ "logits/rejected": -0.4802684783935547,
8237
+ "logps/chosen": -23.99846076965332,
8238
+ "logps/rejected": -575.8038940429688,
8239
+ "loss": 12141.5641,
8240
+ "rewards/accuracies": 0.987500011920929,
8241
+ "rewards/chosen": 0.19609448313713074,
8242
+ "rewards/margins": 0.551897406578064,
8243
+ "rewards/rejected": -0.355802983045578,
8244
+ "step": 5480
8245
+ },
8246
+ {
8247
+ "epoch": 27.843037974683543,
8248
+ "grad_norm": 749583.0814867924,
8249
+ "learning_rate": 1.255092447508618e-07,
8250
+ "logits/chosen": -1.7006927728652954,
8251
+ "logits/rejected": -1.0466101169586182,
8252
+ "logps/chosen": -29.710596084594727,
8253
+ "logps/rejected": -591.6888427734375,
8254
+ "loss": 12764.4375,
8255
+ "rewards/accuracies": 0.987500011920929,
8256
+ "rewards/chosen": 0.20634475350379944,
8257
+ "rewards/margins": 0.5570266842842102,
8258
+ "rewards/rejected": -0.3506819009780884,
8259
+ "step": 5490
8260
+ },
8261
+ {
8262
+ "epoch": 27.89367088607595,
8263
+ "grad_norm": 380933.42642122327,
8264
+ "learning_rate": 1.2472579128799749e-07,
8265
+ "logits/chosen": -1.4751110076904297,
8266
+ "logits/rejected": -0.9937122464179993,
8267
+ "logps/chosen": -34.70015335083008,
8268
+ "logps/rejected": -571.9727172851562,
8269
+ "loss": 12012.7281,
8270
+ "rewards/accuracies": 0.9750000238418579,
8271
+ "rewards/chosen": 0.2022305727005005,
8272
+ "rewards/margins": 0.5379850268363953,
8273
+ "rewards/rejected": -0.3357544541358948,
8274
+ "step": 5500
8275
+ },
8276
+ {
8277
+ "epoch": 27.944303797468354,
8278
+ "grad_norm": 258509.47313842815,
8279
+ "learning_rate": 1.2394233782513317e-07,
8280
+ "logits/chosen": -1.372650384902954,
8281
+ "logits/rejected": -1.0075037479400635,
8282
+ "logps/chosen": -35.64197540283203,
8283
+ "logps/rejected": -593.0923461914062,
8284
+ "loss": 11889.8898,
8285
+ "rewards/accuracies": 1.0,
8286
+ "rewards/chosen": 0.2117808610200882,
8287
+ "rewards/margins": 0.5591001510620117,
8288
+ "rewards/rejected": -0.34731921553611755,
8289
+ "step": 5510
8290
+ },
8291
+ {
8292
+ "epoch": 27.99493670886076,
8293
+ "grad_norm": 208938.2840249938,
8294
+ "learning_rate": 1.2315888436226887e-07,
8295
+ "logits/chosen": -2.1264805793762207,
8296
+ "logits/rejected": -1.4703245162963867,
8297
+ "logps/chosen": -32.981266021728516,
8298
+ "logps/rejected": -597.6434936523438,
8299
+ "loss": 12257.6922,
8300
+ "rewards/accuracies": 0.987500011920929,
8301
+ "rewards/chosen": 0.21187356114387512,
8302
+ "rewards/margins": 0.5627579689025879,
8303
+ "rewards/rejected": -0.3508843779563904,
8304
+ "step": 5520
8305
  }
8306
  ],
8307
  "logging_steps": 10,