cat-searcher commited on
Commit
dce6179
1 Parent(s): 399396f

Training in progress, epoch 10, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step1975/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step1975/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step1975/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step1975/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step1975/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step1975/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step1975/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step1975/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step1975/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step1975/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step1975/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step1975/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step1975/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step1975/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step1975/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step1975/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step1975/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:158fdf1206d90f3d53f03f6777f5447c221fe532e12194e65242099fcced38a9
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b53db3d43333997ca32bd2265f0e5156f413521b14f6bc508cbb8c1d397912
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c64a6a960abe055f377aa0483a2e5a11f36361d99a16b35615b438b13e05002d
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e77c45b48df198e8830cac41e3fa4ae2d82371b8230add92f572786522c16015
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873839ac5d48ab353d5c1f108475fb9c761d9a9ebbd3fde16efb2c2e32148ac0
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da894f0661f3a32c85b6eaa7ad8450b5dc5d56952359780e6cf800fd36b0af4d
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22da807ff37f81b78c17ed1a259ffdb1c99cf8091fe97f3c94919f9276f062e9
3
+ size 2506176112
last-checkpoint/global_step1975/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcba14ca76d79f8403b081667d13aa806bfeafc24d46c77c9de5de1d637f77d5
3
+ size 2506176112
last-checkpoint/global_step1975/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc97a267a53404bed2bf3a9a7faec5240ac22ddd314f8cff203c1db78d59acd
3
+ size 85570
last-checkpoint/global_step1975/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff253423957bacfa2226dc71400a6365a2ffbff5dc196706e9376114e3111f5a
3
+ size 85506
last-checkpoint/global_step1975/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c003a7df9382cf3e24d03806497ae9902f5fc74d7f60ca1560f13a1b4bc39784
3
+ size 85506
last-checkpoint/global_step1975/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236ef95c4872dd77dd4d0915e6c75048a026a416bd4cbe669d929349ce3c01bd
3
+ size 85506
last-checkpoint/global_step1975/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714076982acbbe1f5a119e51b1d487088d8527ee90241348b6d3d70622eaee54
3
+ size 85506
last-checkpoint/global_step1975/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5da9fc440771360567a06dfc520423832ac2c8722b1d01f47f09d9bca45e6c3
3
+ size 85506
last-checkpoint/global_step1975/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f142914c3ce2768ca39f3c543ebb8b4bdb79a3f5406b292c4059e5dc0865f48
3
+ size 85506
last-checkpoint/global_step1975/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:193c38ec1f044177e1edba0e33b4ff754b9e9d8b8599c063f2120e6bd1550365
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1777
 
1
+ global_step1975
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09b755aaa0de9a9d7be5dd7cc1cf82ccedd0ac145120aec2032a624323902370
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b38d08370373f75d0ead3a8dc90bdfbcde940100178a1e4d8444d44db03c6f
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef4f1316aa95047dba9bf5e25ec454561d74e6768f37069512a5b410ac5fb8ad
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:258f456567db458691d028339987640e6c33e89da150b386c8f140c34d269bf8
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae9162e03c562553a5d9d13120f544d3c47ea71bb39aa44e18253675e17ed4a4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1165242405b17b3d6a8186ae61b13dcb1faa5a54320bebd74ef8d71b964bf7
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4809456871b3a40c8db7e0926a9db11b01149a1d483fb29b16fc69dabaf36c6f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:562c262916c9997ec644c42fed9655ab28706b74fca20290ca921c4761d6a4b0
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bb6bcf25ff148b74eea7dd4895fc42e9433538fff5d75f0d2ae6cb0c2fdadf0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d40f8118f513299624ded0a9bcf09778b961635615090409394d4f96f928f6
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f00ea04cd1a52c539d9cc948ac8a04676d6b99702acd09149565f781806f63f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4391f924238a4cb855c4cbdc6d1a14954f785431c75997d05c7a4ee6615dae7
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5571fb2fc1b413792b01ac691c759786855573992bab1d14875faccdaf8c881e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be7b19bb9543a16bf9f4cd96466ac581436f63070f5815f3a7ba57980608994f
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59019ba23ead9c15851cb4349397254458ce50ea3c2987090404f4f3842c6d8f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97da4a1ede0a3e0f96411cacd5bfdf84d9355198f7aadc9bcb8be41122043f63
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45fdffda57fda4a555da7a5de6fc6ec7324e0dae048b92519af6c4f6a1bc7412
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544cb6421b975bd5d2b2360a4e666003794e6197ae654d2ad963cd6572a86ede
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62fb2c13e63aba83c4505fae1639f79a33853d8f1bebe20cecb73bf53c8e7c46
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d6eb32a23f3bef6262bbcb2eda724b2fd6f5e579969aa27c71a5971331722b
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e16985aaf4cce287f446385c2d8f7c8409907ca0803309b7f28917440fa9de11
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8981ddaa6502db230aa8c1752f9e3ae4127b4c9a3bafb82f8ac10be916d022b6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 1777,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2677,6 +2677,306 @@
2677
  "rewards/margins": 0.45180240273475647,
2678
  "rewards/rejected": -0.3077693581581116,
2679
  "step": 1770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2680
  }
2681
  ],
2682
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 100,
6
+ "global_step": 1975,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2677
  "rewards/margins": 0.45180240273475647,
2678
  "rewards/rejected": -0.3077693581581116,
2679
  "step": 1770
2680
+ },
2681
+ {
2682
+ "epoch": 9.012658227848101,
2683
+ "grad_norm": 1042157.0097295721,
2684
+ "learning_rate": 2.767157630836728e-07,
2685
+ "logits/chosen": -2.344456911087036,
2686
+ "logits/rejected": -2.174999713897705,
2687
+ "logps/chosen": -74.14456939697266,
2688
+ "logps/rejected": -549.884033203125,
2689
+ "loss": 22791.725,
2690
+ "rewards/accuracies": 0.9750000238418579,
2691
+ "rewards/chosen": 0.16015887260437012,
2692
+ "rewards/margins": 0.47513628005981445,
2693
+ "rewards/rejected": -0.31497737765312195,
2694
+ "step": 1780
2695
+ },
2696
+ {
2697
+ "epoch": 9.063291139240507,
2698
+ "grad_norm": 1604328.8989550385,
2699
+ "learning_rate": 2.751488561579442e-07,
2700
+ "logits/chosen": -0.4028230607509613,
2701
+ "logits/rejected": -0.017443586140871048,
2702
+ "logps/chosen": -78.17924499511719,
2703
+ "logps/rejected": -555.5220947265625,
2704
+ "loss": 21934.7781,
2705
+ "rewards/accuracies": 0.987500011920929,
2706
+ "rewards/chosen": 0.161887988448143,
2707
+ "rewards/margins": 0.47605371475219727,
2708
+ "rewards/rejected": -0.3141656517982483,
2709
+ "step": 1790
2710
+ },
2711
+ {
2712
+ "epoch": 9.113924050632912,
2713
+ "grad_norm": 930218.7877013405,
2714
+ "learning_rate": 2.7358194923221564e-07,
2715
+ "logits/chosen": -0.10258030891418457,
2716
+ "logits/rejected": -0.2491408884525299,
2717
+ "logps/chosen": -67.35882568359375,
2718
+ "logps/rejected": -562.8963623046875,
2719
+ "loss": 20609.7047,
2720
+ "rewards/accuracies": 1.0,
2721
+ "rewards/chosen": 0.1559842973947525,
2722
+ "rewards/margins": 0.4920543134212494,
2723
+ "rewards/rejected": -0.33607012033462524,
2724
+ "step": 1800
2725
+ },
2726
+ {
2727
+ "epoch": 9.164556962025316,
2728
+ "grad_norm": 1965412.9139898522,
2729
+ "learning_rate": 2.72015042306487e-07,
2730
+ "logits/chosen": 0.5992544889450073,
2731
+ "logits/rejected": 0.6971222162246704,
2732
+ "logps/chosen": -68.12413024902344,
2733
+ "logps/rejected": -546.7501220703125,
2734
+ "loss": 21574.0656,
2735
+ "rewards/accuracies": 0.949999988079071,
2736
+ "rewards/chosen": 0.16274484992027283,
2737
+ "rewards/margins": 0.475511372089386,
2738
+ "rewards/rejected": -0.31276652216911316,
2739
+ "step": 1810
2740
+ },
2741
+ {
2742
+ "epoch": 9.215189873417721,
2743
+ "grad_norm": 1012215.1362345209,
2744
+ "learning_rate": 2.704481353807584e-07,
2745
+ "logits/chosen": -0.252922922372818,
2746
+ "logits/rejected": 0.7370151281356812,
2747
+ "logps/chosen": -68.61247253417969,
2748
+ "logps/rejected": -545.773193359375,
2749
+ "loss": 21584.0,
2750
+ "rewards/accuracies": 0.9750000238418579,
2751
+ "rewards/chosen": 0.15732263028621674,
2752
+ "rewards/margins": 0.47610074281692505,
2753
+ "rewards/rejected": -0.3187780976295471,
2754
+ "step": 1820
2755
+ },
2756
+ {
2757
+ "epoch": 9.265822784810126,
2758
+ "grad_norm": 1317328.2635211374,
2759
+ "learning_rate": 2.6888122845502977e-07,
2760
+ "logits/chosen": -0.5902656316757202,
2761
+ "logits/rejected": -0.200765460729599,
2762
+ "logps/chosen": -72.17051696777344,
2763
+ "logps/rejected": -560.718994140625,
2764
+ "loss": 20662.6562,
2765
+ "rewards/accuracies": 0.987500011920929,
2766
+ "rewards/chosen": 0.16373535990715027,
2767
+ "rewards/margins": 0.49004659056663513,
2768
+ "rewards/rejected": -0.32631123065948486,
2769
+ "step": 1830
2770
+ },
2771
+ {
2772
+ "epoch": 9.316455696202532,
2773
+ "grad_norm": 1202220.669797323,
2774
+ "learning_rate": 2.673143215293012e-07,
2775
+ "logits/chosen": -0.9152681231498718,
2776
+ "logits/rejected": -0.46515974402427673,
2777
+ "logps/chosen": -71.53898620605469,
2778
+ "logps/rejected": -545.0053100585938,
2779
+ "loss": 22147.6375,
2780
+ "rewards/accuracies": 0.987500011920929,
2781
+ "rewards/chosen": 0.1599002182483673,
2782
+ "rewards/margins": 0.47435054183006287,
2783
+ "rewards/rejected": -0.31445032358169556,
2784
+ "step": 1840
2785
+ },
2786
+ {
2787
+ "epoch": 9.367088607594937,
2788
+ "grad_norm": 858793.4443150639,
2789
+ "learning_rate": 2.6574741460357254e-07,
2790
+ "logits/chosen": 0.8187123537063599,
2791
+ "logits/rejected": 0.9660876393318176,
2792
+ "logps/chosen": -68.53959655761719,
2793
+ "logps/rejected": -533.693603515625,
2794
+ "loss": 22383.2656,
2795
+ "rewards/accuracies": 1.0,
2796
+ "rewards/chosen": 0.15871909260749817,
2797
+ "rewards/margins": 0.46780315041542053,
2798
+ "rewards/rejected": -0.30908405780792236,
2799
+ "step": 1850
2800
+ },
2801
+ {
2802
+ "epoch": 9.417721518987342,
2803
+ "grad_norm": 753710.4553891663,
2804
+ "learning_rate": 2.6418050767784395e-07,
2805
+ "logits/chosen": 0.07855646312236786,
2806
+ "logits/rejected": -0.0003270745219197124,
2807
+ "logps/chosen": -71.92098236083984,
2808
+ "logps/rejected": -532.4739990234375,
2809
+ "loss": 22731.7687,
2810
+ "rewards/accuracies": 0.9624999761581421,
2811
+ "rewards/chosen": 0.16191932559013367,
2812
+ "rewards/margins": 0.4635027348995209,
2813
+ "rewards/rejected": -0.3015834391117096,
2814
+ "step": 1860
2815
+ },
2816
+ {
2817
+ "epoch": 9.468354430379748,
2818
+ "grad_norm": 1208088.8106737435,
2819
+ "learning_rate": 2.626136007521153e-07,
2820
+ "logits/chosen": -0.23646318912506104,
2821
+ "logits/rejected": 0.0054475306533277035,
2822
+ "logps/chosen": -66.38209533691406,
2823
+ "logps/rejected": -541.2474365234375,
2824
+ "loss": 22257.4375,
2825
+ "rewards/accuracies": 0.987500011920929,
2826
+ "rewards/chosen": 0.15835285186767578,
2827
+ "rewards/margins": 0.47471290826797485,
2828
+ "rewards/rejected": -0.3163600265979767,
2829
+ "step": 1870
2830
+ },
2831
+ {
2832
+ "epoch": 9.518987341772151,
2833
+ "grad_norm": 1301078.6439378709,
2834
+ "learning_rate": 2.610466938263867e-07,
2835
+ "logits/chosen": -1.2212382555007935,
2836
+ "logits/rejected": -1.2270792722702026,
2837
+ "logps/chosen": -69.9106674194336,
2838
+ "logps/rejected": -537.7271728515625,
2839
+ "loss": 22528.825,
2840
+ "rewards/accuracies": 0.987500011920929,
2841
+ "rewards/chosen": 0.15471485257148743,
2842
+ "rewards/margins": 0.46409493684768677,
2843
+ "rewards/rejected": -0.30938002467155457,
2844
+ "step": 1880
2845
+ },
2846
+ {
2847
+ "epoch": 9.569620253164556,
2848
+ "grad_norm": 1146807.5987679055,
2849
+ "learning_rate": 2.594797869006581e-07,
2850
+ "logits/chosen": -1.618896484375,
2851
+ "logits/rejected": -1.3599251508712769,
2852
+ "logps/chosen": -77.14048767089844,
2853
+ "logps/rejected": -519.0086059570312,
2854
+ "loss": 20937.9,
2855
+ "rewards/accuracies": 0.987500011920929,
2856
+ "rewards/chosen": 0.1560250073671341,
2857
+ "rewards/margins": 0.44421762228012085,
2858
+ "rewards/rejected": -0.28819265961647034,
2859
+ "step": 1890
2860
+ },
2861
+ {
2862
+ "epoch": 9.620253164556962,
2863
+ "grad_norm": 1143412.3516794874,
2864
+ "learning_rate": 2.579128799749295e-07,
2865
+ "logits/chosen": -0.6647695302963257,
2866
+ "logits/rejected": -0.6680254936218262,
2867
+ "logps/chosen": -85.31086730957031,
2868
+ "logps/rejected": -573.4449462890625,
2869
+ "loss": 21446.8719,
2870
+ "rewards/accuracies": 1.0,
2871
+ "rewards/chosen": 0.16069479286670685,
2872
+ "rewards/margins": 0.486908495426178,
2873
+ "rewards/rejected": -0.32621368765830994,
2874
+ "step": 1900
2875
+ },
2876
+ {
2877
+ "epoch": 9.670886075949367,
2878
+ "grad_norm": 874554.4726819041,
2879
+ "learning_rate": 2.5634597304920085e-07,
2880
+ "logits/chosen": -2.4332644939422607,
2881
+ "logits/rejected": -2.143573522567749,
2882
+ "logps/chosen": -73.66841125488281,
2883
+ "logps/rejected": -567.8841552734375,
2884
+ "loss": 21540.7203,
2885
+ "rewards/accuracies": 1.0,
2886
+ "rewards/chosen": 0.1681254804134369,
2887
+ "rewards/margins": 0.49868589639663696,
2888
+ "rewards/rejected": -0.3305602967739105,
2889
+ "step": 1910
2890
+ },
2891
+ {
2892
+ "epoch": 9.721518987341772,
2893
+ "grad_norm": 1796698.8005837006,
2894
+ "learning_rate": 2.5477906612347227e-07,
2895
+ "logits/chosen": 1.2071720361709595,
2896
+ "logits/rejected": 1.811336874961853,
2897
+ "logps/chosen": -68.67604064941406,
2898
+ "logps/rejected": -531.2750244140625,
2899
+ "loss": 22819.1078,
2900
+ "rewards/accuracies": 0.987500011920929,
2901
+ "rewards/chosen": 0.1492142677307129,
2902
+ "rewards/margins": 0.4690275192260742,
2903
+ "rewards/rejected": -0.31981322169303894,
2904
+ "step": 1920
2905
+ },
2906
+ {
2907
+ "epoch": 9.772151898734178,
2908
+ "grad_norm": 1652289.4059097564,
2909
+ "learning_rate": 2.532121591977436e-07,
2910
+ "logits/chosen": -0.47033196687698364,
2911
+ "logits/rejected": -0.13743743300437927,
2912
+ "logps/chosen": -58.46977996826172,
2913
+ "logps/rejected": -548.3218383789062,
2914
+ "loss": 22147.9906,
2915
+ "rewards/accuracies": 1.0,
2916
+ "rewards/chosen": 0.16477254033088684,
2917
+ "rewards/margins": 0.4882374703884125,
2918
+ "rewards/rejected": -0.32346493005752563,
2919
+ "step": 1930
2920
+ },
2921
+ {
2922
+ "epoch": 9.822784810126583,
2923
+ "grad_norm": 1031570.3956932048,
2924
+ "learning_rate": 2.5164525227201504e-07,
2925
+ "logits/chosen": -1.3281480073928833,
2926
+ "logits/rejected": -0.6028780937194824,
2927
+ "logps/chosen": -71.20520782470703,
2928
+ "logps/rejected": -560.7177124023438,
2929
+ "loss": 21547.1453,
2930
+ "rewards/accuracies": 1.0,
2931
+ "rewards/chosen": 0.16829116642475128,
2932
+ "rewards/margins": 0.4920671880245209,
2933
+ "rewards/rejected": -0.3237760066986084,
2934
+ "step": 1940
2935
+ },
2936
+ {
2937
+ "epoch": 9.873417721518987,
2938
+ "grad_norm": 997159.4818661372,
2939
+ "learning_rate": 2.500783453462864e-07,
2940
+ "logits/chosen": 0.0865519791841507,
2941
+ "logits/rejected": 1.0491398572921753,
2942
+ "logps/chosen": -66.77009582519531,
2943
+ "logps/rejected": -538.1752319335938,
2944
+ "loss": 21311.2047,
2945
+ "rewards/accuracies": 0.987500011920929,
2946
+ "rewards/chosen": 0.15671603381633759,
2947
+ "rewards/margins": 0.4763658046722412,
2948
+ "rewards/rejected": -0.3196497857570648,
2949
+ "step": 1950
2950
+ },
2951
+ {
2952
+ "epoch": 9.924050632911392,
2953
+ "grad_norm": 2765789.1484618983,
2954
+ "learning_rate": 2.485114384205578e-07,
2955
+ "logits/chosen": 0.05377687141299248,
2956
+ "logits/rejected": 0.6552912592887878,
2957
+ "logps/chosen": -67.99398803710938,
2958
+ "logps/rejected": -554.9031982421875,
2959
+ "loss": 20360.5656,
2960
+ "rewards/accuracies": 0.9750000238418579,
2961
+ "rewards/chosen": 0.16012230515480042,
2962
+ "rewards/margins": 0.48966652154922485,
2963
+ "rewards/rejected": -0.3295442461967468,
2964
+ "step": 1960
2965
+ },
2966
+ {
2967
+ "epoch": 9.974683544303797,
2968
+ "grad_norm": 778456.3899893347,
2969
+ "learning_rate": 2.4694453149482917e-07,
2970
+ "logits/chosen": -1.8621749877929688,
2971
+ "logits/rejected": -0.9629243612289429,
2972
+ "logps/chosen": -76.34040832519531,
2973
+ "logps/rejected": -570.4073486328125,
2974
+ "loss": 20853.2188,
2975
+ "rewards/accuracies": 0.987500011920929,
2976
+ "rewards/chosen": 0.16522939503192902,
2977
+ "rewards/margins": 0.4955335259437561,
2978
+ "rewards/rejected": -0.3303041160106659,
2979
+ "step": 1970
2980
  }
2981
  ],
2982
  "logging_steps": 10,