cat-searcher commited on
Commit
81fd0d3
1 Parent(s): 4c91759

Training in progress, epoch 26, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step5126/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step5126/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step5126/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step5126/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step5126/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step5126/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step5126/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step5126/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step5126/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step5126/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step5126/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step5126/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step5126/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step5126/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step5126/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step5126/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step5126/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1af7760daa4102e118869727094ce0fbf14cf0ed307b27fd7a19ff85ef1ed21a
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f498a49895cf5e7427cb08addc7237919bba4b494e4662f68bc79562db491a54
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d45eede1b9dad005d98db27ce0db0913175175b92663cfb4f85bc45dbbd00604
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d0a86fae62a16d1e788480421c675f660b32f41088a78a07cbb65a4e0e0721e
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b033a977f0b9eb6e7524d3438e660be4cc3b071d1627f09f533fada7ecba6f
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93294787dbeb15413656d5ca75de1498bf9416ab80efbe381f748bf0a5dddb26
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d64b3b706fcc9670bc87aac98540dfdeb1369cd862a5c7efc91bafd9adb9ddc
3
+ size 2506176112
last-checkpoint/global_step5126/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41f3667f22c201d6f34a6995bb4c823becbc8bc45386074e697c4c504432fc4
3
+ size 2506176112
last-checkpoint/global_step5126/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcb70e44e7823fcf90a769bebf9d9d9c0c233dea2cf642f794df31a267f9758
3
+ size 85570
last-checkpoint/global_step5126/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793e32b15b99418321ce219dea2352f549f719604f1948e75215f5e62bafd89f
3
+ size 85506
last-checkpoint/global_step5126/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0cc4a5d3e2a700f6e59eeb9a0579057dfcd2067f6c2f9ea7a773b80af5754cd
3
+ size 85506
last-checkpoint/global_step5126/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98445b970e33eaa6fc846e2436e367d24db4520657bd50ab1a5b68db45d68b8
3
+ size 85506
last-checkpoint/global_step5126/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6597a5de591d9c3267ff18a867dbf2c93a54584fea066482d5d0e8de144cc0
3
+ size 85506
last-checkpoint/global_step5126/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b04cc0713b1195e919fe4270b38c3b6768391ade8ecda5dd8cfc4d3c2f70819
3
+ size 85506
last-checkpoint/global_step5126/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c99032ceba04a67c3ac793f3988d89e77588a122bb311d2d8386335b393870
3
+ size 85506
last-checkpoint/global_step5126/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a554fb7d11eb9dd4af7f666dbc86ec128b8514437d01f2b93b6201745af5d9
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4928
 
1
+ global_step5126
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9da2b7dcfa00a88a71ba39f69087d6c106b9fc3502f080f4fdf0c95ba7f0232
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab3b8a6a1f14b87eddcd6889e77ca358471e584db04f6d14f50ce0ca4a94e8e2
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66272c39e510a03fae4a8b8c2051165d69a38abcede58508cbcdf95984247125
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e312815d6589c577d3a29d19ba5e8956d7c9080646ba7b35c3708e364eb8f55
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6118faeaee4fdfaa13ac70fa72cfa116964f5a3b96fac7723d08df58a38b397c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd0b885528e55ec25b01a487faef7810481e858198ac24b76aedb3688770c06
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dfa684751cc5b0d0c6c20837671c3f615d1dea15a14f377cd3f328d98685be6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a451e60f451c0ed06e4b0d619be9f7981c5af29ba16d797996e102e4d1fd7514
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6288d8200e463b5ae8ad06a62ffa8cc379f48c86fd01515a13181f976efa71a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4027794d1d9c71e13291169d32d384e8f6078c931f43db354471cbc57d8639
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd3d0484a9201a8efbd51326113ef6b44a0b272cfa40529d39c56a9d67deac8e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40925f5ac9883b8dfe22197d58f18429503331adeff91ce58e72d56b5094171a
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:426c6fc3fcaae6fad4fdb83d268d91ed94e3e0e88bed483e185a78e86a9692b3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0890e080f98bcfb81036d2db959cc45209e8c2f67a0dccde184473488395153
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d089430782ecc3b7446f527601b569a4990fdd9f8d96c18c87dc8d503cbdb70
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e4b714cb76d14f84bc59d5d9ba706908caddc95de8f17bfbeb87cbce486cb3
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b61f73069ccc96351c7253c2334d20c170e92750457b685c9d79286d288292e9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c095f0a000582673860ef2dcfa50f1ba3d6bf9b31cb0a66349b60d581ecbe3
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e70ddcc8f9ba6c0be1ac8d1ba2f06e8a4253e0a843f3a263a85c073416ca948d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e625b7623f260d65cb1001beba6e4d0df9ed61b3f496d3e767f280a6b73cde8
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc0dea9c7426022052c6904d669ed6537aacbe6f69c00de710577f64daa74c6b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a6611856f3b4f599b410c5f2fa04b4cd6d782a4bd921f15735728fc381869d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 24.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 4928,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7402,6 +7402,306 @@
7402
  "rewards/margins": 0.5547462701797485,
7403
  "rewards/rejected": -0.3539626896381378,
7404
  "step": 4920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7405
  }
7406
  ],
7407
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 26.0,
5
  "eval_steps": 100,
6
+ "global_step": 5126,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7402
  "rewards/margins": 0.5547462701797485,
7403
  "rewards/rejected": -0.3539626896381378,
7404
  "step": 4920
7405
+ },
7406
+ {
7407
+ "epoch": 25.00759493670886,
7408
+ "grad_norm": 308388.08709269366,
7409
+ "learning_rate": 1.6938263867126293e-07,
7410
+ "logits/chosen": -1.6532137393951416,
7411
+ "logits/rejected": -1.572850227355957,
7412
+ "logps/chosen": -41.12345886230469,
7413
+ "logps/rejected": -613.5958862304688,
7414
+ "loss": 12755.7383,
7415
+ "rewards/accuracies": 0.9750000238418579,
7416
+ "rewards/chosen": 0.21259479224681854,
7417
+ "rewards/margins": 0.5684026479721069,
7418
+ "rewards/rejected": -0.3558078408241272,
7419
+ "step": 4930
7420
+ },
7421
+ {
7422
+ "epoch": 25.058227848101264,
7423
+ "grad_norm": 320761.03886897856,
7424
+ "learning_rate": 1.685991852083986e-07,
7425
+ "logits/chosen": -0.11034099757671356,
7426
+ "logits/rejected": -0.06293153762817383,
7427
+ "logps/chosen": -34.010704040527344,
7428
+ "logps/rejected": -583.318359375,
7429
+ "loss": 13300.3922,
7430
+ "rewards/accuracies": 0.9750000238418579,
7431
+ "rewards/chosen": 0.19742931425571442,
7432
+ "rewards/margins": 0.5516862273216248,
7433
+ "rewards/rejected": -0.35425692796707153,
7434
+ "step": 4940
7435
+ },
7436
+ {
7437
+ "epoch": 25.10886075949367,
7438
+ "grad_norm": 282559.397671993,
7439
+ "learning_rate": 1.6781573174553431e-07,
7440
+ "logits/chosen": 0.5274404883384705,
7441
+ "logits/rejected": 1.2507613897323608,
7442
+ "logps/chosen": -29.299930572509766,
7443
+ "logps/rejected": -554.8450927734375,
7444
+ "loss": 12685.2523,
7445
+ "rewards/accuracies": 0.9624999761581421,
7446
+ "rewards/chosen": 0.19330081343650818,
7447
+ "rewards/margins": 0.5271843671798706,
7448
+ "rewards/rejected": -0.3338836431503296,
7449
+ "step": 4950
7450
+ },
7451
+ {
7452
+ "epoch": 25.159493670886075,
7453
+ "grad_norm": 248533.31024359175,
7454
+ "learning_rate": 1.6703227828267e-07,
7455
+ "logits/chosen": -1.2484452724456787,
7456
+ "logits/rejected": -0.5531445741653442,
7457
+ "logps/chosen": -42.44970703125,
7458
+ "logps/rejected": -591.9672241210938,
7459
+ "loss": 12525.2,
7460
+ "rewards/accuracies": 1.0,
7461
+ "rewards/chosen": 0.20557789504528046,
7462
+ "rewards/margins": 0.5516069531440735,
7463
+ "rewards/rejected": -0.34602901339530945,
7464
+ "step": 4960
7465
+ },
7466
+ {
7467
+ "epoch": 25.21012658227848,
7468
+ "grad_norm": 365840.3682606488,
7469
+ "learning_rate": 1.662488248198057e-07,
7470
+ "logits/chosen": -1.5047721862792969,
7471
+ "logits/rejected": -1.5158735513687134,
7472
+ "logps/chosen": -31.838958740234375,
7473
+ "logps/rejected": -581.0045166015625,
7474
+ "loss": 13041.882,
7475
+ "rewards/accuracies": 0.9750000238418579,
7476
+ "rewards/chosen": 0.201541468501091,
7477
+ "rewards/margins": 0.5492666959762573,
7478
+ "rewards/rejected": -0.3477252125740051,
7479
+ "step": 4970
7480
+ },
7481
+ {
7482
+ "epoch": 25.260759493670886,
7483
+ "grad_norm": 364119.66442401055,
7484
+ "learning_rate": 1.6546537135694138e-07,
7485
+ "logits/chosen": -2.0333914756774902,
7486
+ "logits/rejected": -2.0420191287994385,
7487
+ "logps/chosen": -33.426788330078125,
7488
+ "logps/rejected": -577.18212890625,
7489
+ "loss": 13218.8875,
7490
+ "rewards/accuracies": 1.0,
7491
+ "rewards/chosen": 0.20231468975543976,
7492
+ "rewards/margins": 0.5456961989402771,
7493
+ "rewards/rejected": -0.3433815836906433,
7494
+ "step": 4980
7495
+ },
7496
+ {
7497
+ "epoch": 25.31139240506329,
7498
+ "grad_norm": 434691.5380135347,
7499
+ "learning_rate": 1.6468191789407709e-07,
7500
+ "logits/chosen": -0.23437795042991638,
7501
+ "logits/rejected": -0.03313719108700752,
7502
+ "logps/chosen": -33.025386810302734,
7503
+ "logps/rejected": -587.5833740234375,
7504
+ "loss": 12003.9711,
7505
+ "rewards/accuracies": 0.987500011920929,
7506
+ "rewards/chosen": 0.19747456908226013,
7507
+ "rewards/margins": 0.553167998790741,
7508
+ "rewards/rejected": -0.3556934595108032,
7509
+ "step": 4990
7510
+ },
7511
+ {
7512
+ "epoch": 25.362025316455696,
7513
+ "grad_norm": 257881.6224659914,
7514
+ "learning_rate": 1.6389846443121277e-07,
7515
+ "logits/chosen": 1.229998230934143,
7516
+ "logits/rejected": 1.8426265716552734,
7517
+ "logps/chosen": -31.151538848876953,
7518
+ "logps/rejected": -575.4852905273438,
7519
+ "loss": 13412.7078,
7520
+ "rewards/accuracies": 0.987500011920929,
7521
+ "rewards/chosen": 0.1954251229763031,
7522
+ "rewards/margins": 0.5429095029830933,
7523
+ "rewards/rejected": -0.34748440980911255,
7524
+ "step": 5000
7525
+ },
7526
+ {
7527
+ "epoch": 25.4126582278481,
7528
+ "grad_norm": 425285.73032920854,
7529
+ "learning_rate": 1.6311501096834847e-07,
7530
+ "logits/chosen": -1.241003155708313,
7531
+ "logits/rejected": -0.7176898121833801,
7532
+ "logps/chosen": -31.115795135498047,
7533
+ "logps/rejected": -558.19873046875,
7534
+ "loss": 13301.7094,
7535
+ "rewards/accuracies": 0.9624999761581421,
7536
+ "rewards/chosen": 0.19612053036689758,
7537
+ "rewards/margins": 0.5247890949249268,
7538
+ "rewards/rejected": -0.3286685347557068,
7539
+ "step": 5010
7540
+ },
7541
+ {
7542
+ "epoch": 25.463291139240507,
7543
+ "grad_norm": 372695.4381119174,
7544
+ "learning_rate": 1.6233155750548415e-07,
7545
+ "logits/chosen": -1.8982555866241455,
7546
+ "logits/rejected": -1.494901180267334,
7547
+ "logps/chosen": -28.403858184814453,
7548
+ "logps/rejected": -562.348388671875,
7549
+ "loss": 13093.6797,
7550
+ "rewards/accuracies": 1.0,
7551
+ "rewards/chosen": 0.203691765666008,
7552
+ "rewards/margins": 0.5354448556900024,
7553
+ "rewards/rejected": -0.33175310492515564,
7554
+ "step": 5020
7555
+ },
7556
+ {
7557
+ "epoch": 25.513924050632912,
7558
+ "grad_norm": 291137.30920257524,
7559
+ "learning_rate": 1.6154810404261986e-07,
7560
+ "logits/chosen": -0.2861802577972412,
7561
+ "logits/rejected": -0.4479186534881592,
7562
+ "logps/chosen": -23.825702667236328,
7563
+ "logps/rejected": -559.0096435546875,
7564
+ "loss": 12589.4609,
7565
+ "rewards/accuracies": 0.987500011920929,
7566
+ "rewards/chosen": 0.1913156658411026,
7567
+ "rewards/margins": 0.5378258228302002,
7568
+ "rewards/rejected": -0.346510112285614,
7569
+ "step": 5030
7570
+ },
7571
+ {
7572
+ "epoch": 25.564556962025318,
7573
+ "grad_norm": 273297.2570355529,
7574
+ "learning_rate": 1.6076465057975556e-07,
7575
+ "logits/chosen": -2.0077948570251465,
7576
+ "logits/rejected": -1.546903371810913,
7577
+ "logps/chosen": -34.178993225097656,
7578
+ "logps/rejected": -599.1771240234375,
7579
+ "loss": 12277.0906,
7580
+ "rewards/accuracies": 1.0,
7581
+ "rewards/chosen": 0.20620949566364288,
7582
+ "rewards/margins": 0.5666217803955078,
7583
+ "rewards/rejected": -0.36041226983070374,
7584
+ "step": 5040
7585
+ },
7586
+ {
7587
+ "epoch": 25.615189873417723,
7588
+ "grad_norm": 287331.7702661688,
7589
+ "learning_rate": 1.5998119711689127e-07,
7590
+ "logits/chosen": -0.9829635620117188,
7591
+ "logits/rejected": -0.3811960220336914,
7592
+ "logps/chosen": -32.14269256591797,
7593
+ "logps/rejected": -580.4415283203125,
7594
+ "loss": 12507.3219,
7595
+ "rewards/accuracies": 1.0,
7596
+ "rewards/chosen": 0.20907440781593323,
7597
+ "rewards/margins": 0.5523373484611511,
7598
+ "rewards/rejected": -0.3432629406452179,
7599
+ "step": 5050
7600
+ },
7601
+ {
7602
+ "epoch": 25.665822784810125,
7603
+ "grad_norm": 896554.0294317787,
7604
+ "learning_rate": 1.5919774365402695e-07,
7605
+ "logits/chosen": -1.3259598016738892,
7606
+ "logits/rejected": -0.9525947570800781,
7607
+ "logps/chosen": -25.666656494140625,
7608
+ "logps/rejected": -573.1832885742188,
7609
+ "loss": 12955.9469,
7610
+ "rewards/accuracies": 1.0,
7611
+ "rewards/chosen": 0.19553272426128387,
7612
+ "rewards/margins": 0.5421277284622192,
7613
+ "rewards/rejected": -0.3465949594974518,
7614
+ "step": 5060
7615
+ },
7616
+ {
7617
+ "epoch": 25.71645569620253,
7618
+ "grad_norm": 360559.08966435614,
7619
+ "learning_rate": 1.5841429019116266e-07,
7620
+ "logits/chosen": -2.50518536567688,
7621
+ "logits/rejected": -2.6326870918273926,
7622
+ "logps/chosen": -40.73974609375,
7623
+ "logps/rejected": -598.9993896484375,
7624
+ "loss": 13192.7609,
7625
+ "rewards/accuracies": 1.0,
7626
+ "rewards/chosen": 0.2110958993434906,
7627
+ "rewards/margins": 0.559829592704773,
7628
+ "rewards/rejected": -0.34873366355895996,
7629
+ "step": 5070
7630
+ },
7631
+ {
7632
+ "epoch": 25.767088607594935,
7633
+ "grad_norm": 354200.8480985467,
7634
+ "learning_rate": 1.5763083672829833e-07,
7635
+ "logits/chosen": 0.24985246360301971,
7636
+ "logits/rejected": 0.11640717834234238,
7637
+ "logps/chosen": -30.384597778320312,
7638
+ "logps/rejected": -595.9378662109375,
7639
+ "loss": 13357.8156,
7640
+ "rewards/accuracies": 1.0,
7641
+ "rewards/chosen": 0.20246371626853943,
7642
+ "rewards/margins": 0.5666370391845703,
7643
+ "rewards/rejected": -0.3641732633113861,
7644
+ "step": 5080
7645
+ },
7646
+ {
7647
+ "epoch": 25.81772151898734,
7648
+ "grad_norm": 419630.4907858681,
7649
+ "learning_rate": 1.5684738326543404e-07,
7650
+ "logits/chosen": -2.382422924041748,
7651
+ "logits/rejected": -1.6780860424041748,
7652
+ "logps/chosen": -32.89704132080078,
7653
+ "logps/rejected": -596.2845458984375,
7654
+ "loss": 13075.125,
7655
+ "rewards/accuracies": 1.0,
7656
+ "rewards/chosen": 0.20516617596149445,
7657
+ "rewards/margins": 0.5647061467170715,
7658
+ "rewards/rejected": -0.3595399558544159,
7659
+ "step": 5090
7660
+ },
7661
+ {
7662
+ "epoch": 25.868354430379746,
7663
+ "grad_norm": 239893.19190802056,
7664
+ "learning_rate": 1.5606392980256972e-07,
7665
+ "logits/chosen": -1.5904518365859985,
7666
+ "logits/rejected": -1.162544846534729,
7667
+ "logps/chosen": -29.703998565673828,
7668
+ "logps/rejected": -562.6304321289062,
7669
+ "loss": 12907.7898,
7670
+ "rewards/accuracies": 1.0,
7671
+ "rewards/chosen": 0.1967582404613495,
7672
+ "rewards/margins": 0.5360093116760254,
7673
+ "rewards/rejected": -0.3392511010169983,
7674
+ "step": 5100
7675
+ },
7676
+ {
7677
+ "epoch": 25.91898734177215,
7678
+ "grad_norm": 2769163.91672907,
7679
+ "learning_rate": 1.5528047633970543e-07,
7680
+ "logits/chosen": -0.4542008936405182,
7681
+ "logits/rejected": 0.3750479519367218,
7682
+ "logps/chosen": -40.263450622558594,
7683
+ "logps/rejected": -569.8021240234375,
7684
+ "loss": 12356.1203,
7685
+ "rewards/accuracies": 0.9750000238418579,
7686
+ "rewards/chosen": 0.19747862219810486,
7687
+ "rewards/margins": 0.5331242680549622,
7688
+ "rewards/rejected": -0.3356456160545349,
7689
+ "step": 5110
7690
+ },
7691
+ {
7692
+ "epoch": 25.969620253164557,
7693
+ "grad_norm": 414959.45582905615,
7694
+ "learning_rate": 1.544970228768411e-07,
7695
+ "logits/chosen": -2.780273914337158,
7696
+ "logits/rejected": -2.477725028991699,
7697
+ "logps/chosen": -34.733909606933594,
7698
+ "logps/rejected": -598.5794677734375,
7699
+ "loss": 12866.1969,
7700
+ "rewards/accuracies": 1.0,
7701
+ "rewards/chosen": 0.22083155810832977,
7702
+ "rewards/margins": 0.5664650797843933,
7703
+ "rewards/rejected": -0.3456335663795471,
7704
+ "step": 5120
7705
  }
7706
  ],
7707
  "logging_steps": 10,