cat-searcher commited on
Commit
3cb495c
1 Parent(s): 4e1ff4b

Training in progress, epoch 16, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step3357/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step3357/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step3357/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step3357/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step3357/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step3357/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step3357/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step3357/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step3357/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step3357/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step3357/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step3357/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step3357/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step3357/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step3357/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step3357/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step3357/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:651dfd4d290244a1f1ed25e174745eeda53f83c1afd49bddb222769456acea8e
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f45441c5dddfa9aebb33d35dea7081c3fd19843f93ae43f1316128bcf7ecbd
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be22b35b5e310d4823197426b1cd8771c316b9b15fbb4942ab377e3613a0ca98
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b51b99bcd90701c6b97516470bcaab959b7e1922605d5890a31d2ba2f4d580
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec2a04e08e7e35026b691693353bc8fe5847d81fb99acf19398b7243dee628c
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c13dfce78f2ac90a9157ad2f0abb21e59d1100da60be9db95d0a1f5336fc8209
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f14a7f3e1501454fd040e30452a51f1a851474905f426344fd8a832d6508ec23
3
+ size 2506176112
last-checkpoint/global_step3357/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72714ef0021f7b5e8d8ab77b07e3b29b0beef1ef3cb644ec965dc8a17226688
3
+ size 2506176112
last-checkpoint/global_step3357/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656172ce79b5ff43fd76cd8ecb27884c941e173e847c11f888c3e98ac9533316
3
+ size 85570
last-checkpoint/global_step3357/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ef4f048ceb531e6ef19e52e0fbde839db8f8fd1d12ef45f132d6f89f4f0145
3
+ size 85506
last-checkpoint/global_step3357/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68cedc957959ed2fd5ba36d094b0e6e389a7422983ed8d4c10563a6408e9b40
3
+ size 85506
last-checkpoint/global_step3357/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d1268fd1df98fe5916753b22e9b23a49b58ee4b9f4ca149196b69e198904cc
3
+ size 85506
last-checkpoint/global_step3357/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc46b4ccbddfc0f7c25f583348c5d2cdb40c0392ea8cec3a0925d9d5ed56785
3
+ size 85506
last-checkpoint/global_step3357/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9afb70ad87e6ce43387783fa59973a4a6904164bf83ddcccd5b7fe0b820ac9
3
+ size 85506
last-checkpoint/global_step3357/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:162023a179a88c571b455c0ab3ea05ad7132b5fe8c517f18e503e04716909aae
3
+ size 85506
last-checkpoint/global_step3357/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ee316718258920bde55c9660e06cb5c39c6343956d78d173a25f15371fb1f3
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3160
 
1
+ global_step3357
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e8cc757116e636d03d7d2362f38003ee7b34b00b1dae4f4914662ad92e7fad
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3578035697ae915bf8ed319e400346be0b8f4d900849a6d07f0ff9b4c3b1711c
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba27efdeb5b44a8b8136905559c82e77a7f13309db0036dad3e99a470705fb98
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a652aa698a378ecc5fb4aaee9480c493c9a62c60f4f96b74c9d6698fa2aa8d33
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb7c3bc1248de8b4739437317b988d953fd64a5de9736606d74f9c8277f1b485
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0c9979566a5d89cb3c766336548670ec6f2291deba1b7ab1764c12d3187b24
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8e571d57a85eb2cdabf3f46c86e446bdb7d26aba8b1467b5e4b5bbe29ad42a7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e36a570d6158fc25d1cf5d9f8f450fc64c5a7683330277f89ff76d5f2fc6cd
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:489e5542988617525a395c45dc83ec6bf25b473812e139122f0a3f3d92f031d0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f619cbef4b74f1680d667c8788285a602392e63bdf3760ef3a59ec8864d483
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd77682efb711872c5be25e87e87a2726a2e7105422cddd00f04da7be35ca20
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fc037fba93ace1bf7ce01b1a5f7d785698d47b4cc2cedf2300bbf7a41ebf05c
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e44d9e7d535f5fbcd7cfef16ba22d32d5f445aacceba782a05df1f97d47a608a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab728c2461d6d1c64f04d7cbfdfcbfa7bd7ad0ef6e19d52458501ee81b27128
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a107290a0d9898930bc6abe369ee246ef7322541985fc2a5320e7775f5ea5c88
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27530e653ebf5997ae3159cdcde264607e6a6f86b7e3c7a1b3a1e8301cd43d03
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88ab49d56ee4079c2a208376064f825918f070addc8f0c58c5c594265f9e8a78
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fddaeb1257697bd7c0101abf1ab23f2925d0d9165cd8bddfbd22f8444db2b7
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d15033d06420b17d80db45c89544170faa67833d5a0d9c30a51a38a1102b073
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:942af3734a320fe12a3205a47ca1cdc7d1f0996bfde86c020a35545ccd2fd418
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e02caff31fe06a664e85dd7b31b3300391f1a9f4f3b97aaaec945d54216a88e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:605054ed043f8ba321ca13100ae25afc2296eb67de83d5027f6f7f6d891a4130
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.0,
5
  "eval_steps": 100,
6
- "global_step": 3160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4762,6 +4762,291 @@
4762
  "rewards/margins": 0.5268322825431824,
4763
  "rewards/rejected": -0.34266436100006104,
4764
  "step": 3160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4765
  }
4766
  ],
4767
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 3357,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4762
  "rewards/margins": 0.5268322825431824,
4763
  "rewards/rejected": -0.34266436100006104,
4764
  "step": 3160
4765
+ },
4766
+ {
4767
+ "epoch": 16.050632911392405,
4768
+ "grad_norm": 256869.56003810524,
4769
+ "learning_rate": 5.8915700407395795e-08,
4770
+ "logits/chosen": -1.1983295679092407,
4771
+ "logits/rejected": -0.22695603966712952,
4772
+ "logps/chosen": -41.12403106689453,
4773
+ "logps/rejected": -573.8383178710938,
4774
+ "loss": 14636.0719,
4775
+ "rewards/accuracies": 0.987500011920929,
4776
+ "rewards/chosen": 0.1912733018398285,
4777
+ "rewards/margins": 0.5368129014968872,
4778
+ "rewards/rejected": -0.3455396294593811,
4779
+ "step": 3170
4780
+ },
4781
+ {
4782
+ "epoch": 16.10126582278481,
4783
+ "grad_norm": 251620.82775792846,
4784
+ "learning_rate": 5.734879348166719e-08,
4785
+ "logits/chosen": -0.662868082523346,
4786
+ "logits/rejected": 0.3795197606086731,
4787
+ "logps/chosen": -38.75691604614258,
4788
+ "logps/rejected": -555.0902709960938,
4789
+ "loss": 14758.6562,
4790
+ "rewards/accuracies": 0.9624999761581421,
4791
+ "rewards/chosen": 0.1840089112520218,
4792
+ "rewards/margins": 0.5191300511360168,
4793
+ "rewards/rejected": -0.335121214389801,
4794
+ "step": 3180
4795
+ },
4796
+ {
4797
+ "epoch": 16.151898734177216,
4798
+ "grad_norm": 386320.34193101624,
4799
+ "learning_rate": 5.5781886555938573e-08,
4800
+ "logits/chosen": 0.9088973999023438,
4801
+ "logits/rejected": 1.0200951099395752,
4802
+ "logps/chosen": -37.841434478759766,
4803
+ "logps/rejected": -549.9398193359375,
4804
+ "loss": 14645.3125,
4805
+ "rewards/accuracies": 0.9750000238418579,
4806
+ "rewards/chosen": 0.18419453501701355,
4807
+ "rewards/margins": 0.5178717374801636,
4808
+ "rewards/rejected": -0.3336772620677948,
4809
+ "step": 3190
4810
+ },
4811
+ {
4812
+ "epoch": 16.20253164556962,
4813
+ "grad_norm": 323738.56127307797,
4814
+ "learning_rate": 5.421497963020996e-08,
4815
+ "logits/chosen": 1.6748106479644775,
4816
+ "logits/rejected": 1.7903064489364624,
4817
+ "logps/chosen": -43.683780670166016,
4818
+ "logps/rejected": -559.7962036132812,
4819
+ "loss": 14378.5187,
4820
+ "rewards/accuracies": 0.9750000238418579,
4821
+ "rewards/chosen": 0.1856391578912735,
4822
+ "rewards/margins": 0.5182951092720032,
4823
+ "rewards/rejected": -0.3326559364795685,
4824
+ "step": 3200
4825
+ },
4826
+ {
4827
+ "epoch": 16.253164556962027,
4828
+ "grad_norm": 254204.27494940045,
4829
+ "learning_rate": 5.264807270448135e-08,
4830
+ "logits/chosen": -0.028285836800932884,
4831
+ "logits/rejected": 0.47511911392211914,
4832
+ "logps/chosen": -46.74934005737305,
4833
+ "logps/rejected": -582.1607666015625,
4834
+ "loss": 14203.1469,
4835
+ "rewards/accuracies": 0.987500011920929,
4836
+ "rewards/chosen": 0.19257526099681854,
4837
+ "rewards/margins": 0.5342021584510803,
4838
+ "rewards/rejected": -0.3416268825531006,
4839
+ "step": 3210
4840
+ },
4841
+ {
4842
+ "epoch": 16.303797468354432,
4843
+ "grad_norm": 295536.9430947363,
4844
+ "learning_rate": 5.108116577875274e-08,
4845
+ "logits/chosen": 0.9740939140319824,
4846
+ "logits/rejected": 0.8530548810958862,
4847
+ "logps/chosen": -43.95893478393555,
4848
+ "logps/rejected": -566.3425903320312,
4849
+ "loss": 14617.1531,
4850
+ "rewards/accuracies": 1.0,
4851
+ "rewards/chosen": 0.18452490866184235,
4852
+ "rewards/margins": 0.5231844782829285,
4853
+ "rewards/rejected": -0.3386596143245697,
4854
+ "step": 3220
4855
+ },
4856
+ {
4857
+ "epoch": 16.354430379746834,
4858
+ "grad_norm": 228442.89270088554,
4859
+ "learning_rate": 4.951425885302413e-08,
4860
+ "logits/chosen": -0.6641544699668884,
4861
+ "logits/rejected": -0.42437514662742615,
4862
+ "logps/chosen": -42.97655487060547,
4863
+ "logps/rejected": -572.6472778320312,
4864
+ "loss": 14575.375,
4865
+ "rewards/accuracies": 0.987500011920929,
4866
+ "rewards/chosen": 0.19143202900886536,
4867
+ "rewards/margins": 0.5323026776313782,
4868
+ "rewards/rejected": -0.34087061882019043,
4869
+ "step": 3230
4870
+ },
4871
+ {
4872
+ "epoch": 16.40506329113924,
4873
+ "grad_norm": 280822.1227003712,
4874
+ "learning_rate": 4.7947351927295515e-08,
4875
+ "logits/chosen": 1.1500619649887085,
4876
+ "logits/rejected": 1.5377223491668701,
4877
+ "logps/chosen": -40.756866455078125,
4878
+ "logps/rejected": -555.7669067382812,
4879
+ "loss": 14355.8438,
4880
+ "rewards/accuracies": 1.0,
4881
+ "rewards/chosen": 0.18818344175815582,
4882
+ "rewards/margins": 0.5185222029685974,
4883
+ "rewards/rejected": -0.3303387761116028,
4884
+ "step": 3240
4885
+ },
4886
+ {
4887
+ "epoch": 16.455696202531644,
4888
+ "grad_norm": 211726.7404787661,
4889
+ "learning_rate": 4.63804450015669e-08,
4890
+ "logits/chosen": -0.1092449203133583,
4891
+ "logits/rejected": 0.2951999306678772,
4892
+ "logps/chosen": -42.441200256347656,
4893
+ "logps/rejected": -545.1079711914062,
4894
+ "loss": 14375.5266,
4895
+ "rewards/accuracies": 0.987500011920929,
4896
+ "rewards/chosen": 0.18679597973823547,
4897
+ "rewards/margins": 0.5060458779335022,
4898
+ "rewards/rejected": -0.31924980878829956,
4899
+ "step": 3250
4900
+ },
4901
+ {
4902
+ "epoch": 16.50632911392405,
4903
+ "grad_norm": 356888.551437776,
4904
+ "learning_rate": 4.481353807583829e-08,
4905
+ "logits/chosen": -1.3785438537597656,
4906
+ "logits/rejected": -1.0880242586135864,
4907
+ "logps/chosen": -54.5753288269043,
4908
+ "logps/rejected": -585.0982666015625,
4909
+ "loss": 13676.1484,
4910
+ "rewards/accuracies": 1.0,
4911
+ "rewards/chosen": 0.19741004705429077,
4912
+ "rewards/margins": 0.5335227251052856,
4913
+ "rewards/rejected": -0.3361126780509949,
4914
+ "step": 3260
4915
+ },
4916
+ {
4917
+ "epoch": 16.556962025316455,
4918
+ "grad_norm": 364581.3025715214,
4919
+ "learning_rate": 4.324663115010968e-08,
4920
+ "logits/chosen": -0.7049742341041565,
4921
+ "logits/rejected": -0.23324167728424072,
4922
+ "logps/chosen": -51.56848907470703,
4923
+ "logps/rejected": -578.4015502929688,
4924
+ "loss": 14484.6266,
4925
+ "rewards/accuracies": 0.987500011920929,
4926
+ "rewards/chosen": 0.18512576818466187,
4927
+ "rewards/margins": 0.5236076712608337,
4928
+ "rewards/rejected": -0.3384818732738495,
4929
+ "step": 3270
4930
+ },
4931
+ {
4932
+ "epoch": 16.60759493670886,
4933
+ "grad_norm": 336864.8330615521,
4934
+ "learning_rate": 4.167972422438107e-08,
4935
+ "logits/chosen": -0.9721381068229675,
4936
+ "logits/rejected": -1.1028145551681519,
4937
+ "logps/chosen": -55.94579315185547,
4938
+ "logps/rejected": -583.2372436523438,
4939
+ "loss": 14945.2641,
4940
+ "rewards/accuracies": 0.9750000238418579,
4941
+ "rewards/chosen": 0.19380484521389008,
4942
+ "rewards/margins": 0.5321142673492432,
4943
+ "rewards/rejected": -0.3383094370365143,
4944
+ "step": 3280
4945
+ },
4946
+ {
4947
+ "epoch": 16.658227848101266,
4948
+ "grad_norm": 310564.956837095,
4949
+ "learning_rate": 4.0112817298652456e-08,
4950
+ "logits/chosen": -0.6065518260002136,
4951
+ "logits/rejected": -0.21473164856433868,
4952
+ "logps/chosen": -46.307228088378906,
4953
+ "logps/rejected": -586.7664184570312,
4954
+ "loss": 14667.4531,
4955
+ "rewards/accuracies": 0.987500011920929,
4956
+ "rewards/chosen": 0.19551894068717957,
4957
+ "rewards/margins": 0.5414855480194092,
4958
+ "rewards/rejected": -0.345966637134552,
4959
+ "step": 3290
4960
+ },
4961
+ {
4962
+ "epoch": 16.70886075949367,
4963
+ "grad_norm": 329301.5108160766,
4964
+ "learning_rate": 3.854591037292385e-08,
4965
+ "logits/chosen": 0.40292587876319885,
4966
+ "logits/rejected": 1.5396214723587036,
4967
+ "logps/chosen": -40.793739318847656,
4968
+ "logps/rejected": -570.8857421875,
4969
+ "loss": 14524.3094,
4970
+ "rewards/accuracies": 0.987500011920929,
4971
+ "rewards/chosen": 0.18828611075878143,
4972
+ "rewards/margins": 0.5341116189956665,
4973
+ "rewards/rejected": -0.3458254337310791,
4974
+ "step": 3300
4975
+ },
4976
+ {
4977
+ "epoch": 16.759493670886076,
4978
+ "grad_norm": 389871.220870713,
4979
+ "learning_rate": 3.6979003447195234e-08,
4980
+ "logits/chosen": -0.2180454283952713,
4981
+ "logits/rejected": 0.63756263256073,
4982
+ "logps/chosen": -48.842628479003906,
4983
+ "logps/rejected": -596.3530883789062,
4984
+ "loss": 15026.0328,
4985
+ "rewards/accuracies": 1.0,
4986
+ "rewards/chosen": 0.19478780031204224,
4987
+ "rewards/margins": 0.5423206090927124,
4988
+ "rewards/rejected": -0.34753280878067017,
4989
+ "step": 3310
4990
+ },
4991
+ {
4992
+ "epoch": 16.810126582278482,
4993
+ "grad_norm": 297091.2945334893,
4994
+ "learning_rate": 3.541209652146662e-08,
4995
+ "logits/chosen": -0.4556306302547455,
4996
+ "logits/rejected": 0.1757240742444992,
4997
+ "logps/chosen": -52.64439010620117,
4998
+ "logps/rejected": -598.89990234375,
4999
+ "loss": 14405.2531,
5000
+ "rewards/accuracies": 0.987500011920929,
5001
+ "rewards/chosen": 0.19450917840003967,
5002
+ "rewards/margins": 0.5458864569664001,
5003
+ "rewards/rejected": -0.3513772487640381,
5004
+ "step": 3320
5005
+ },
5006
+ {
5007
+ "epoch": 16.860759493670887,
5008
+ "grad_norm": 1094427.122685082,
5009
+ "learning_rate": 3.384518959573801e-08,
5010
+ "logits/chosen": -0.09430136531591415,
5011
+ "logits/rejected": 0.669711709022522,
5012
+ "logps/chosen": -48.170013427734375,
5013
+ "logps/rejected": -584.9744873046875,
5014
+ "loss": 15005.1063,
5015
+ "rewards/accuracies": 0.987500011920929,
5016
+ "rewards/chosen": 0.1912693828344345,
5017
+ "rewards/margins": 0.5353102087974548,
5018
+ "rewards/rejected": -0.34404081106185913,
5019
+ "step": 3330
5020
+ },
5021
+ {
5022
+ "epoch": 16.911392405063292,
5023
+ "grad_norm": 266675.6307359935,
5024
+ "learning_rate": 3.22782826700094e-08,
5025
+ "logits/chosen": -0.09551366418600082,
5026
+ "logits/rejected": -0.07008041441440582,
5027
+ "logps/chosen": -36.88441848754883,
5028
+ "logps/rejected": -568.5509033203125,
5029
+ "loss": 13823.6516,
5030
+ "rewards/accuracies": 1.0,
5031
+ "rewards/chosen": 0.18999743461608887,
5032
+ "rewards/margins": 0.5339778661727905,
5033
+ "rewards/rejected": -0.34398046135902405,
5034
+ "step": 3340
5035
+ },
5036
+ {
5037
+ "epoch": 16.962025316455698,
5038
+ "grad_norm": 562034.347414135,
5039
+ "learning_rate": 3.071137574428079e-08,
5040
+ "logits/chosen": 0.6763383746147156,
5041
+ "logits/rejected": 0.4948856830596924,
5042
+ "logps/chosen": -46.25956726074219,
5043
+ "logps/rejected": -565.7184448242188,
5044
+ "loss": 14414.3859,
5045
+ "rewards/accuracies": 0.9750000238418579,
5046
+ "rewards/chosen": 0.18556642532348633,
5047
+ "rewards/margins": 0.5159622430801392,
5048
+ "rewards/rejected": -0.33039581775665283,
5049
+ "step": 3350
5050
  }
5051
  ],
5052
  "logging_steps": 10,