cat-searcher commited on
Commit
4656255
1 Parent(s): e86348a

Training in progress, epoch 34, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step6706/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step6706/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step6706/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step6706/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step6706/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step6706/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step6706/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step6706/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step6706/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step6706/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step6706/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step6706/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step6706/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step6706/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step6706/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step6706/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +587 -2
last-checkpoint/global_step6706/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b42c56b1327b90cb9abc534edcfbb08932410debcd7305c0d1759e04bdc4d11
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9396272e9f609f397084631f7d4ffed08fff5c7a034e4194bfd0a7f72c70bfa1
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:591f9c4ae58767f09106f21436522ad83280a312251aec9632f74f3c9a6169e3
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49135e96f11f14c0edf4c5dffe815b40ae94488d644743ef2dd9de99786ea8b7
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f3f16c835dc750d8d496565e2bd2e880599b1ef853595956dcdd5d856fc3f2
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c1e57d8cf90c74381abe81ec860ea539ed7f1005ee89c7ae0fca24ef4c060e
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544f3afbdf2e5758131bde2e146c11a13beea591ed9ffd5bdc5734faad8a1043
3
+ size 2506176112
last-checkpoint/global_step6706/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a8e4440e7e8b59cfb3cc26a05b82c5d57ea5084e4017e3c33d0e483da159561
3
+ size 2506176112
last-checkpoint/global_step6706/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef8119caf8c39ec12de2099badeb63ad829ce60b1bfeeebb9e0517a09c0a976
3
+ size 85570
last-checkpoint/global_step6706/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89445868a574a4e6ca673c951b38372184c1b6f771b3bfa85d93241a42213e9
3
+ size 85506
last-checkpoint/global_step6706/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d31316c948c7d38bc67e1c7147275a977f4cc81870a86da11f10564b176127c
3
+ size 85506
last-checkpoint/global_step6706/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b50dcfc583c1b3c6bea42d40dad8d32b734dc8be0c79d7e09f61d63bf79cccb7
3
+ size 85506
last-checkpoint/global_step6706/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e31df5e5d1299642990ab63df3f37154cc9b1fabcfec91715a33296b7735163
3
+ size 85506
last-checkpoint/global_step6706/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6db17cba5932b9a1f9ea2964606a3e4054625b730789b3dd91f659e92c3486c
3
+ size 85506
last-checkpoint/global_step6706/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e00c574c29ae1516a5c427bcb11052789980cfebadc115603615c994101ab7
3
+ size 85506
last-checkpoint/global_step6706/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d51ee3a89f5d4297c41d5ecd7accd0c7f05aae37c660466fceca4c94b0656c3
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step6311
 
1
+ global_step6706
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a4166953b305647bfe20e1bbeb15a19c28014fe214420eadf6709470c2313a3
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dccd3cd5529e5f163e359e3786831ba7375631404b7ee8058d8b4408e5f6c21
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60ab1e0ab13e8465a7f0033de41b00c3ef85f0686c27f26ccedd41583cf589de
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84686bbc6149ef7c13cd1e65651e398a75147e6c2a6349adc73d12c6ffa69ab
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af2aa29a1a0077819b6e6c9858b2870298aef9379363892f7f4a488e66c5bd38
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1824a3c32cb0f9c63783531dc708888703e1f1c3a24fcb1359a551096190eeb4
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:291da2a884d82312ccaa711af98beaa52d4f41499984405613bb21b5148565a2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef711a8d330873d0c4240280ee39d5aa7914332ddf901ffeea474d7a3676d68e
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1bbb7042adc0fc2a1ac7bf08c10090a51f5d0491c80cb36a3e5f1380294d890
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1c319ca557b1ad565c5491231c88b982788beb22edcc7662c65f10bac37a88
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70f0dc59a31366ea078c9fd119c2fc25b442df27e88be4fecef8251ec325566a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda4459d526a1e94ec4135282c75d79950f5463b1f31627ef8f677d8a590cadd
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26207c8fd2f08c67b253e0d4b8b67c73971a41b870b2d0d9e52e1388c89513b5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c928635796240bfafd62cee0be31323babeadc7998190bc90de090567d3711fd
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac396bf6594a215db06382f8b0dfcb2360c2c6b1a95d8150fec6afd800852574
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1e06f947b66a18ac20f7698fc3fe6fbf2a0fa9b5d6b3460904a385c167db64
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2edf36543f3b923b139f3541d6d94d5a2d50de85da08b18fa6867198430e57c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a5533c6799b2aca827472a96ea0b581da238e7b44cbde527a6a83e4244ea55b
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f96eea372344884b68478842af038d9832fda66fa5d46d28035e601a7834efd4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd80eb1f10bbe498691d09f14b4f4a09102a449bc19f524f5b62f82768d3af5
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac6a779be83142d0fb2f1a0e7950874d368e5e14f0bcab557da3d6573142406
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c25188d68c243cfb7b17f6acfbc1e55a94e4598b4924ebbd056f93551f70969
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 32.0,
5
  "eval_steps": 100,
6
- "global_step": 6311,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9487,6 +9487,591 @@
9487
  "rewards/margins": 0.5819977521896362,
9488
  "rewards/rejected": -0.36624595522880554,
9489
  "step": 6310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9490
  }
9491
  ],
9492
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 34.0,
5
  "eval_steps": 100,
6
+ "global_step": 6706,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9487
  "rewards/margins": 0.5819977521896362,
9488
  "rewards/rejected": -0.36624595522880554,
9489
  "step": 6310
9490
+ },
9491
+ {
9492
+ "epoch": 32.04556962025316,
9493
+ "grad_norm": 146925.77007874168,
9494
+ "learning_rate": 6.04826073331244e-08,
9495
+ "logits/chosen": -1.0771139860153198,
9496
+ "logits/rejected": -0.38963261246681213,
9497
+ "logps/chosen": -25.353687286376953,
9498
+ "logps/rejected": -599.3104248046875,
9499
+ "loss": 11649.7609,
9500
+ "rewards/accuracies": 1.0,
9501
+ "rewards/chosen": 0.21451549232006073,
9502
+ "rewards/margins": 0.5766840577125549,
9503
+ "rewards/rejected": -0.3621685206890106,
9504
+ "step": 6320
9505
+ },
9506
+ {
9507
+ "epoch": 32.09620253164557,
9508
+ "grad_norm": 94333.82344683389,
9509
+ "learning_rate": 5.96991538702601e-08,
9510
+ "logits/chosen": -2.162341356277466,
9511
+ "logits/rejected": -1.5530678033828735,
9512
+ "logps/chosen": -36.120880126953125,
9513
+ "logps/rejected": -594.9260864257812,
9514
+ "loss": 11919.4,
9515
+ "rewards/accuracies": 0.987500011920929,
9516
+ "rewards/chosen": 0.2106127291917801,
9517
+ "rewards/margins": 0.558625340461731,
9518
+ "rewards/rejected": -0.34801262617111206,
9519
+ "step": 6330
9520
+ },
9521
+ {
9522
+ "epoch": 32.14683544303797,
9523
+ "grad_norm": 144438.33677050017,
9524
+ "learning_rate": 5.8915700407395795e-08,
9525
+ "logits/chosen": -0.8229999542236328,
9526
+ "logits/rejected": -0.037537313997745514,
9527
+ "logps/chosen": -25.43358612060547,
9528
+ "logps/rejected": -557.636474609375,
9529
+ "loss": 11297.6063,
9530
+ "rewards/accuracies": 0.9750000238418579,
9531
+ "rewards/chosen": 0.1968574970960617,
9532
+ "rewards/margins": 0.532370388507843,
9533
+ "rewards/rejected": -0.33551284670829773,
9534
+ "step": 6340
9535
+ },
9536
+ {
9537
+ "epoch": 32.19746835443038,
9538
+ "grad_norm": 109693.94525690017,
9539
+ "learning_rate": 5.813224694453149e-08,
9540
+ "logits/chosen": -3.077913761138916,
9541
+ "logits/rejected": -2.4543375968933105,
9542
+ "logps/chosen": -26.92588233947754,
9543
+ "logps/rejected": -583.3746337890625,
9544
+ "loss": 12147.5016,
9545
+ "rewards/accuracies": 0.9750000238418579,
9546
+ "rewards/chosen": 0.21152964234352112,
9547
+ "rewards/margins": 0.5570891499519348,
9548
+ "rewards/rejected": -0.3455595374107361,
9549
+ "step": 6350
9550
+ },
9551
+ {
9552
+ "epoch": 32.24810126582278,
9553
+ "grad_norm": 94464.04824246689,
9554
+ "learning_rate": 5.734879348166719e-08,
9555
+ "logits/chosen": -0.08146251738071442,
9556
+ "logits/rejected": -0.1943734884262085,
9557
+ "logps/chosen": -38.933929443359375,
9558
+ "logps/rejected": -599.4444580078125,
9559
+ "loss": 11706.7859,
9560
+ "rewards/accuracies": 0.987500011920929,
9561
+ "rewards/chosen": 0.21242408454418182,
9562
+ "rewards/margins": 0.5596734881401062,
9563
+ "rewards/rejected": -0.34724941849708557,
9564
+ "step": 6360
9565
+ },
9566
+ {
9567
+ "epoch": 32.29873417721519,
9568
+ "grad_norm": 93779.41167523999,
9569
+ "learning_rate": 5.656534001880288e-08,
9570
+ "logits/chosen": 0.4058389663696289,
9571
+ "logits/rejected": 0.994676947593689,
9572
+ "logps/chosen": -21.240737915039062,
9573
+ "logps/rejected": -573.2392578125,
9574
+ "loss": 12153.6359,
9575
+ "rewards/accuracies": 0.9750000238418579,
9576
+ "rewards/chosen": 0.19539888203144073,
9577
+ "rewards/margins": 0.550510048866272,
9578
+ "rewards/rejected": -0.35511118173599243,
9579
+ "step": 6370
9580
+ },
9581
+ {
9582
+ "epoch": 32.34936708860759,
9583
+ "grad_norm": 215459.26677533987,
9584
+ "learning_rate": 5.5781886555938573e-08,
9585
+ "logits/chosen": -1.0755536556243896,
9586
+ "logits/rejected": -0.2684146761894226,
9587
+ "logps/chosen": -25.781116485595703,
9588
+ "logps/rejected": -580.9659423828125,
9589
+ "loss": 11508.8133,
9590
+ "rewards/accuracies": 0.987500011920929,
9591
+ "rewards/chosen": 0.21089033782482147,
9592
+ "rewards/margins": 0.5592586994171143,
9593
+ "rewards/rejected": -0.34836840629577637,
9594
+ "step": 6380
9595
+ },
9596
+ {
9597
+ "epoch": 32.4,
9598
+ "grad_norm": 164612.93717131627,
9599
+ "learning_rate": 5.4998433093074266e-08,
9600
+ "logits/chosen": -2.730407238006592,
9601
+ "logits/rejected": -2.2623066902160645,
9602
+ "logps/chosen": -38.27416229248047,
9603
+ "logps/rejected": -612.3323364257812,
9604
+ "loss": 10969.9328,
9605
+ "rewards/accuracies": 0.987500011920929,
9606
+ "rewards/chosen": 0.22319836914539337,
9607
+ "rewards/margins": 0.573035478591919,
9608
+ "rewards/rejected": -0.34983712434768677,
9609
+ "step": 6390
9610
+ },
9611
+ {
9612
+ "epoch": 32.450632911392404,
9613
+ "grad_norm": 140032.81053392185,
9614
+ "learning_rate": 5.421497963020996e-08,
9615
+ "logits/chosen": -0.6492301821708679,
9616
+ "logits/rejected": -0.778862476348877,
9617
+ "logps/chosen": -28.754650115966797,
9618
+ "logps/rejected": -591.8221435546875,
9619
+ "loss": 12521.7703,
9620
+ "rewards/accuracies": 1.0,
9621
+ "rewards/chosen": 0.21057042479515076,
9622
+ "rewards/margins": 0.5634862780570984,
9623
+ "rewards/rejected": -0.35291582345962524,
9624
+ "step": 6400
9625
+ },
9626
+ {
9627
+ "epoch": 32.50126582278481,
9628
+ "grad_norm": 102205.70485715618,
9629
+ "learning_rate": 5.343152616734566e-08,
9630
+ "logits/chosen": -0.9864907264709473,
9631
+ "logits/rejected": -0.19051684439182281,
9632
+ "logps/chosen": -29.4318904876709,
9633
+ "logps/rejected": -605.131103515625,
9634
+ "loss": 11591.8508,
9635
+ "rewards/accuracies": 1.0,
9636
+ "rewards/chosen": 0.2185964584350586,
9637
+ "rewards/margins": 0.579878032207489,
9638
+ "rewards/rejected": -0.3612816333770752,
9639
+ "step": 6410
9640
+ },
9641
+ {
9642
+ "epoch": 32.551898734177215,
9643
+ "grad_norm": 103047.13529668628,
9644
+ "learning_rate": 5.264807270448135e-08,
9645
+ "logits/chosen": -2.3946361541748047,
9646
+ "logits/rejected": -1.8663170337677002,
9647
+ "logps/chosen": -22.362850189208984,
9648
+ "logps/rejected": -582.4278564453125,
9649
+ "loss": 11901.1398,
9650
+ "rewards/accuracies": 1.0,
9651
+ "rewards/chosen": 0.21427400410175323,
9652
+ "rewards/margins": 0.5642385482788086,
9653
+ "rewards/rejected": -0.34996455907821655,
9654
+ "step": 6420
9655
+ },
9656
+ {
9657
+ "epoch": 32.60253164556962,
9658
+ "grad_norm": 86074.947460872,
9659
+ "learning_rate": 5.1864619241617044e-08,
9660
+ "logits/chosen": 0.2598368227481842,
9661
+ "logits/rejected": 0.16884984076023102,
9662
+ "logps/chosen": -22.76316261291504,
9663
+ "logps/rejected": -594.866455078125,
9664
+ "loss": 12333.5344,
9665
+ "rewards/accuracies": 1.0,
9666
+ "rewards/chosen": 0.2121623456478119,
9667
+ "rewards/margins": 0.5697360038757324,
9668
+ "rewards/rejected": -0.35757365822792053,
9669
+ "step": 6430
9670
+ },
9671
+ {
9672
+ "epoch": 32.653164556962025,
9673
+ "grad_norm": 137970.73954909868,
9674
+ "learning_rate": 5.108116577875274e-08,
9675
+ "logits/chosen": -0.11699090898036957,
9676
+ "logits/rejected": 0.11212899535894394,
9677
+ "logps/chosen": -29.464065551757812,
9678
+ "logps/rejected": -573.3801879882812,
9679
+ "loss": 11953.9641,
9680
+ "rewards/accuracies": 0.987500011920929,
9681
+ "rewards/chosen": 0.21537606418132782,
9682
+ "rewards/margins": 0.5438817739486694,
9683
+ "rewards/rejected": -0.3285056948661804,
9684
+ "step": 6440
9685
+ },
9686
+ {
9687
+ "epoch": 32.70379746835443,
9688
+ "grad_norm": 460796.64629538235,
9689
+ "learning_rate": 5.029771231588843e-08,
9690
+ "logits/chosen": -1.4031693935394287,
9691
+ "logits/rejected": -2.1060502529144287,
9692
+ "logps/chosen": -23.794132232666016,
9693
+ "logps/rejected": -581.7036743164062,
9694
+ "loss": 12159.9719,
9695
+ "rewards/accuracies": 0.987500011920929,
9696
+ "rewards/chosen": 0.20509609580039978,
9697
+ "rewards/margins": 0.5560418367385864,
9698
+ "rewards/rejected": -0.35094568133354187,
9699
+ "step": 6450
9700
+ },
9701
+ {
9702
+ "epoch": 32.754430379746836,
9703
+ "grad_norm": 88571.49642806537,
9704
+ "learning_rate": 4.951425885302413e-08,
9705
+ "logits/chosen": -0.29163846373558044,
9706
+ "logits/rejected": 0.15456560254096985,
9707
+ "logps/chosen": -19.800487518310547,
9708
+ "logps/rejected": -562.6231689453125,
9709
+ "loss": 11758.9578,
9710
+ "rewards/accuracies": 1.0,
9711
+ "rewards/chosen": 0.2048061192035675,
9712
+ "rewards/margins": 0.5433157682418823,
9713
+ "rewards/rejected": -0.33850961923599243,
9714
+ "step": 6460
9715
+ },
9716
+ {
9717
+ "epoch": 32.80506329113924,
9718
+ "grad_norm": 166818.40028028333,
9719
+ "learning_rate": 4.873080539015982e-08,
9720
+ "logits/chosen": 0.3278934061527252,
9721
+ "logits/rejected": 0.6011670827865601,
9722
+ "logps/chosen": -33.445350646972656,
9723
+ "logps/rejected": -590.470703125,
9724
+ "loss": 11395.1164,
9725
+ "rewards/accuracies": 0.987500011920929,
9726
+ "rewards/chosen": 0.2123481035232544,
9727
+ "rewards/margins": 0.5555016994476318,
9728
+ "rewards/rejected": -0.34315359592437744,
9729
+ "step": 6470
9730
+ },
9731
+ {
9732
+ "epoch": 32.85569620253165,
9733
+ "grad_norm": 80619.8591659213,
9734
+ "learning_rate": 4.7947351927295515e-08,
9735
+ "logits/chosen": -1.3291213512420654,
9736
+ "logits/rejected": -1.6056814193725586,
9737
+ "logps/chosen": -29.16250228881836,
9738
+ "logps/rejected": -598.3140869140625,
9739
+ "loss": 11908.6562,
9740
+ "rewards/accuracies": 1.0,
9741
+ "rewards/chosen": 0.21245749294757843,
9742
+ "rewards/margins": 0.5684391856193542,
9743
+ "rewards/rejected": -0.3559816777706146,
9744
+ "step": 6480
9745
+ },
9746
+ {
9747
+ "epoch": 32.90632911392405,
9748
+ "grad_norm": 109452.38261580766,
9749
+ "learning_rate": 4.716389846443121e-08,
9750
+ "logits/chosen": -2.2227654457092285,
9751
+ "logits/rejected": -2.1318516731262207,
9752
+ "logps/chosen": -27.57879638671875,
9753
+ "logps/rejected": -593.1817626953125,
9754
+ "loss": 11900.8148,
9755
+ "rewards/accuracies": 1.0,
9756
+ "rewards/chosen": 0.2101704627275467,
9757
+ "rewards/margins": 0.565523624420166,
9758
+ "rewards/rejected": -0.35535311698913574,
9759
+ "step": 6490
9760
+ },
9761
+ {
9762
+ "epoch": 32.95696202531646,
9763
+ "grad_norm": 146037.74057243837,
9764
+ "learning_rate": 4.63804450015669e-08,
9765
+ "logits/chosen": -0.4855597913265228,
9766
+ "logits/rejected": -0.07905157655477524,
9767
+ "logps/chosen": -32.26173782348633,
9768
+ "logps/rejected": -582.983154296875,
9769
+ "loss": 12785.9484,
9770
+ "rewards/accuracies": 0.987500011920929,
9771
+ "rewards/chosen": 0.2102789580821991,
9772
+ "rewards/margins": 0.5541440844535828,
9773
+ "rewards/rejected": -0.34386518597602844,
9774
+ "step": 6500
9775
+ },
9776
+ {
9777
+ "epoch": 33.00759493670886,
9778
+ "grad_norm": 80554.44381289573,
9779
+ "learning_rate": 4.55969915387026e-08,
9780
+ "logits/chosen": -1.16013503074646,
9781
+ "logits/rejected": -1.237755537033081,
9782
+ "logps/chosen": -22.434879302978516,
9783
+ "logps/rejected": -572.4281005859375,
9784
+ "loss": 11892.3344,
9785
+ "rewards/accuracies": 1.0,
9786
+ "rewards/chosen": 0.2072155922651291,
9787
+ "rewards/margins": 0.54491126537323,
9788
+ "rewards/rejected": -0.3376956880092621,
9789
+ "step": 6510
9790
+ },
9791
+ {
9792
+ "epoch": 33.05822784810127,
9793
+ "grad_norm": 128557.62032643631,
9794
+ "learning_rate": 4.481353807583829e-08,
9795
+ "logits/chosen": -0.2354935109615326,
9796
+ "logits/rejected": 0.728766143321991,
9797
+ "logps/chosen": -29.432445526123047,
9798
+ "logps/rejected": -585.3494262695312,
9799
+ "loss": 11835.0961,
9800
+ "rewards/accuracies": 0.9750000238418579,
9801
+ "rewards/chosen": 0.2072407454252243,
9802
+ "rewards/margins": 0.5606441497802734,
9803
+ "rewards/rejected": -0.35340338945388794,
9804
+ "step": 6520
9805
+ },
9806
+ {
9807
+ "epoch": 33.10886075949367,
9808
+ "grad_norm": 91776.99508964189,
9809
+ "learning_rate": 4.4030084612973985e-08,
9810
+ "logits/chosen": -1.175462007522583,
9811
+ "logits/rejected": -1.1933832168579102,
9812
+ "logps/chosen": -21.900630950927734,
9813
+ "logps/rejected": -574.4762573242188,
9814
+ "loss": 12157.9109,
9815
+ "rewards/accuracies": 1.0,
9816
+ "rewards/chosen": 0.20543113350868225,
9817
+ "rewards/margins": 0.5517674684524536,
9818
+ "rewards/rejected": -0.346336305141449,
9819
+ "step": 6530
9820
+ },
9821
+ {
9822
+ "epoch": 33.15949367088608,
9823
+ "grad_norm": 89893.29258028018,
9824
+ "learning_rate": 4.324663115010968e-08,
9825
+ "logits/chosen": -0.7350924015045166,
9826
+ "logits/rejected": -0.16997528076171875,
9827
+ "logps/chosen": -23.83113670349121,
9828
+ "logps/rejected": -575.5424194335938,
9829
+ "loss": 11686.9375,
9830
+ "rewards/accuracies": 0.987500011920929,
9831
+ "rewards/chosen": 0.20410069823265076,
9832
+ "rewards/margins": 0.5512816309928894,
9833
+ "rewards/rejected": -0.34718090295791626,
9834
+ "step": 6540
9835
+ },
9836
+ {
9837
+ "epoch": 33.210126582278484,
9838
+ "grad_norm": 120975.35903766478,
9839
+ "learning_rate": 4.246317768724538e-08,
9840
+ "logits/chosen": -0.08163319528102875,
9841
+ "logits/rejected": 0.07650710642337799,
9842
+ "logps/chosen": -27.332035064697266,
9843
+ "logps/rejected": -579.8117065429688,
9844
+ "loss": 11339.9297,
9845
+ "rewards/accuracies": 0.987500011920929,
9846
+ "rewards/chosen": 0.2052970826625824,
9847
+ "rewards/margins": 0.5518554449081421,
9848
+ "rewards/rejected": -0.3465583324432373,
9849
+ "step": 6550
9850
+ },
9851
+ {
9852
+ "epoch": 33.26075949367089,
9853
+ "grad_norm": 180391.18731890293,
9854
+ "learning_rate": 4.167972422438107e-08,
9855
+ "logits/chosen": -0.8266963958740234,
9856
+ "logits/rejected": 1.0672438144683838,
9857
+ "logps/chosen": -23.287370681762695,
9858
+ "logps/rejected": -572.2568969726562,
9859
+ "loss": 11743.5586,
9860
+ "rewards/accuracies": 0.987500011920929,
9861
+ "rewards/chosen": 0.19874341785907745,
9862
+ "rewards/margins": 0.5572081804275513,
9863
+ "rewards/rejected": -0.35846468806266785,
9864
+ "step": 6560
9865
+ },
9866
+ {
9867
+ "epoch": 33.311392405063295,
9868
+ "grad_norm": 84282.72341083131,
9869
+ "learning_rate": 4.0896270761516763e-08,
9870
+ "logits/chosen": -1.91861093044281,
9871
+ "logits/rejected": -1.3766604661941528,
9872
+ "logps/chosen": -24.914443969726562,
9873
+ "logps/rejected": -581.4729614257812,
9874
+ "loss": 11078.6969,
9875
+ "rewards/accuracies": 1.0,
9876
+ "rewards/chosen": 0.21130716800689697,
9877
+ "rewards/margins": 0.5577182769775391,
9878
+ "rewards/rejected": -0.34641116857528687,
9879
+ "step": 6570
9880
+ },
9881
+ {
9882
+ "epoch": 33.36202531645569,
9883
+ "grad_norm": 199903.347381946,
9884
+ "learning_rate": 4.0112817298652456e-08,
9885
+ "logits/chosen": -1.2995800971984863,
9886
+ "logits/rejected": -1.6440702676773071,
9887
+ "logps/chosen": -22.356828689575195,
9888
+ "logps/rejected": -591.6265869140625,
9889
+ "loss": 11937.0477,
9890
+ "rewards/accuracies": 1.0,
9891
+ "rewards/chosen": 0.2072306126356125,
9892
+ "rewards/margins": 0.5668342709541321,
9893
+ "rewards/rejected": -0.3596035838127136,
9894
+ "step": 6580
9895
+ },
9896
+ {
9897
+ "epoch": 33.4126582278481,
9898
+ "grad_norm": 138603.96487037002,
9899
+ "learning_rate": 3.932936383578815e-08,
9900
+ "logits/chosen": 0.8098524212837219,
9901
+ "logits/rejected": 1.2947828769683838,
9902
+ "logps/chosen": -26.31606674194336,
9903
+ "logps/rejected": -584.9072265625,
9904
+ "loss": 11177.5336,
9905
+ "rewards/accuracies": 1.0,
9906
+ "rewards/chosen": 0.20386937260627747,
9907
+ "rewards/margins": 0.5589767694473267,
9908
+ "rewards/rejected": -0.3551073968410492,
9909
+ "step": 6590
9910
+ },
9911
+ {
9912
+ "epoch": 33.4632911392405,
9913
+ "grad_norm": 123948.78500072335,
9914
+ "learning_rate": 3.854591037292385e-08,
9915
+ "logits/chosen": -2.16947603225708,
9916
+ "logits/rejected": -1.0904394388198853,
9917
+ "logps/chosen": -42.8673095703125,
9918
+ "logps/rejected": -585.2350463867188,
9919
+ "loss": 11894.6641,
9920
+ "rewards/accuracies": 1.0,
9921
+ "rewards/chosen": 0.2195717990398407,
9922
+ "rewards/margins": 0.5601873397827148,
9923
+ "rewards/rejected": -0.34061557054519653,
9924
+ "step": 6600
9925
+ },
9926
+ {
9927
+ "epoch": 33.51392405063291,
9928
+ "grad_norm": 113327.62874252205,
9929
+ "learning_rate": 3.776245691005954e-08,
9930
+ "logits/chosen": -1.375249981880188,
9931
+ "logits/rejected": -0.7785667181015015,
9932
+ "logps/chosen": -29.649211883544922,
9933
+ "logps/rejected": -602.9840698242188,
9934
+ "loss": 12210.0344,
9935
+ "rewards/accuracies": 1.0,
9936
+ "rewards/chosen": 0.22011515498161316,
9937
+ "rewards/margins": 0.5793704390525818,
9938
+ "rewards/rejected": -0.35925528407096863,
9939
+ "step": 6610
9940
+ },
9941
+ {
9942
+ "epoch": 33.564556962025314,
9943
+ "grad_norm": 79524.96422723045,
9944
+ "learning_rate": 3.6979003447195234e-08,
9945
+ "logits/chosen": -0.7508550882339478,
9946
+ "logits/rejected": -0.23799777030944824,
9947
+ "logps/chosen": -17.09669303894043,
9948
+ "logps/rejected": -572.3134155273438,
9949
+ "loss": 12138.4203,
9950
+ "rewards/accuracies": 1.0,
9951
+ "rewards/chosen": 0.2047223150730133,
9952
+ "rewards/margins": 0.5538768768310547,
9953
+ "rewards/rejected": -0.34915462136268616,
9954
+ "step": 6620
9955
+ },
9956
+ {
9957
+ "epoch": 33.61518987341772,
9958
+ "grad_norm": 80597.64263401506,
9959
+ "learning_rate": 3.619554998433093e-08,
9960
+ "logits/chosen": -1.7500404119491577,
9961
+ "logits/rejected": -1.4937622547149658,
9962
+ "logps/chosen": -24.847320556640625,
9963
+ "logps/rejected": -594.1591796875,
9964
+ "loss": 12270.6344,
9965
+ "rewards/accuracies": 1.0,
9966
+ "rewards/chosen": 0.21394848823547363,
9967
+ "rewards/margins": 0.5700836181640625,
9968
+ "rewards/rejected": -0.35613518953323364,
9969
+ "step": 6630
9970
+ },
9971
+ {
9972
+ "epoch": 33.665822784810125,
9973
+ "grad_norm": 100669.75725024722,
9974
+ "learning_rate": 3.541209652146662e-08,
9975
+ "logits/chosen": -0.4524414539337158,
9976
+ "logits/rejected": -0.5694657564163208,
9977
+ "logps/chosen": -25.72067642211914,
9978
+ "logps/rejected": -572.9901123046875,
9979
+ "loss": 11448.4047,
9980
+ "rewards/accuracies": 0.9750000238418579,
9981
+ "rewards/chosen": 0.20151250064373016,
9982
+ "rewards/margins": 0.5470980405807495,
9983
+ "rewards/rejected": -0.345585435628891,
9984
+ "step": 6640
9985
+ },
9986
+ {
9987
+ "epoch": 33.71645569620253,
9988
+ "grad_norm": 136734.1372891588,
9989
+ "learning_rate": 3.462864305860232e-08,
9990
+ "logits/chosen": -0.10392338037490845,
9991
+ "logits/rejected": 0.025324154645204544,
9992
+ "logps/chosen": -23.138744354248047,
9993
+ "logps/rejected": -578.2369995117188,
9994
+ "loss": 11719.0234,
9995
+ "rewards/accuracies": 0.9750000238418579,
9996
+ "rewards/chosen": 0.2065146416425705,
9997
+ "rewards/margins": 0.5536417365074158,
9998
+ "rewards/rejected": -0.3471270501613617,
9999
+ "step": 6650
10000
+ },
10001
+ {
10002
+ "epoch": 33.767088607594935,
10003
+ "grad_norm": 96060.1935775592,
10004
+ "learning_rate": 3.384518959573801e-08,
10005
+ "logits/chosen": -1.5298357009887695,
10006
+ "logits/rejected": -1.111659049987793,
10007
+ "logps/chosen": -36.602691650390625,
10008
+ "logps/rejected": -594.2269287109375,
10009
+ "loss": 11903.4828,
10010
+ "rewards/accuracies": 0.987500011920929,
10011
+ "rewards/chosen": 0.21466748416423798,
10012
+ "rewards/margins": 0.5613253116607666,
10013
+ "rewards/rejected": -0.34665781259536743,
10014
+ "step": 6660
10015
+ },
10016
+ {
10017
+ "epoch": 33.81772151898734,
10018
+ "grad_norm": 82308.39144839271,
10019
+ "learning_rate": 3.3061736132873705e-08,
10020
+ "logits/chosen": -1.9629747867584229,
10021
+ "logits/rejected": -1.8584734201431274,
10022
+ "logps/chosen": -17.865947723388672,
10023
+ "logps/rejected": -566.314453125,
10024
+ "loss": 12147.5891,
10025
+ "rewards/accuracies": 1.0,
10026
+ "rewards/chosen": 0.2041165828704834,
10027
+ "rewards/margins": 0.5491331219673157,
10028
+ "rewards/rejected": -0.3450164496898651,
10029
+ "step": 6670
10030
+ },
10031
+ {
10032
+ "epoch": 33.868354430379746,
10033
+ "grad_norm": 132433.76933098322,
10034
+ "learning_rate": 3.22782826700094e-08,
10035
+ "logits/chosen": -0.10643855482339859,
10036
+ "logits/rejected": 0.1565506011247635,
10037
+ "logps/chosen": -23.206607818603516,
10038
+ "logps/rejected": -565.3855590820312,
10039
+ "loss": 11928.0656,
10040
+ "rewards/accuracies": 0.9750000238418579,
10041
+ "rewards/chosen": 0.19701281189918518,
10042
+ "rewards/margins": 0.5372076630592346,
10043
+ "rewards/rejected": -0.34019485116004944,
10044
+ "step": 6680
10045
+ },
10046
+ {
10047
+ "epoch": 33.91898734177215,
10048
+ "grad_norm": 99524.21425394616,
10049
+ "learning_rate": 3.149482920714509e-08,
10050
+ "logits/chosen": 0.7746875286102295,
10051
+ "logits/rejected": 1.4906342029571533,
10052
+ "logps/chosen": -28.62857437133789,
10053
+ "logps/rejected": -569.8626708984375,
10054
+ "loss": 11616.475,
10055
+ "rewards/accuracies": 0.9750000238418579,
10056
+ "rewards/chosen": 0.20620207488536835,
10057
+ "rewards/margins": 0.548004686832428,
10058
+ "rewards/rejected": -0.3418026268482208,
10059
+ "step": 6690
10060
+ },
10061
+ {
10062
+ "epoch": 33.96962025316456,
10063
+ "grad_norm": 72753.16066899289,
10064
+ "learning_rate": 3.071137574428079e-08,
10065
+ "logits/chosen": 0.6492331624031067,
10066
+ "logits/rejected": 0.7617141604423523,
10067
+ "logps/chosen": -25.677988052368164,
10068
+ "logps/rejected": -560.1131591796875,
10069
+ "loss": 12074.9086,
10070
+ "rewards/accuracies": 0.9750000238418579,
10071
+ "rewards/chosen": 0.19937190413475037,
10072
+ "rewards/margins": 0.5361818075180054,
10073
+ "rewards/rejected": -0.33680984377861023,
10074
+ "step": 6700
10075
  }
10076
  ],
10077
  "logging_steps": 10,