cat-searcher commited on
Commit
e68a32e
1 Parent(s): 95faeb6

Training in progress, epoch 6, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step1185/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step1185/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step1185/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step1185/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step1185/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step1185/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step1185/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step1185/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step1185/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step1185/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step1185/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step1185/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step1185/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step1185/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step1185/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step1185/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step1185/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f21d01407e4ae8a4d21c9fd5893cf8ca287be0b9b934e521d9384635f18f1c2
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8313e6ebe59d6901911ea4541307ffd402279ceceee244b8dde2d56f3a2cec0e
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1571b96ea2af6e1647bcde55265f70ff473276797a75ce0619f097860b4a947
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c09c6fba3e452c74f8de197ae829254444f4835c2ca80c291a3fd2a8c6682a7
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4749128accf2bed04a08e33fbebe11c6f476fc6db4c512555a17dfd28887ac
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d75da79afcaa6bdb54610a766beb6f87ff7c53556bc6c24a213db0069b806623
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac0e47a3653dd346b2a4a922b8505a46fb180f907a776b9b19f8d10d4bb6219
3
+ size 2506176112
last-checkpoint/global_step1185/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f113dad5a2fda8115b019f5d8ffe32f0daed0932073d3f86f48ea41c9c2b5d20
3
+ size 2506176112
last-checkpoint/global_step1185/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb3848f079be4da89d2ffd5d86a06aac7399d1a94758e16836b1f3cb6bbddb1
3
+ size 85570
last-checkpoint/global_step1185/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd50ef6205cfa99c20dae6bfa3745126bff94c78d9dd260f7a64b1280c4f22e
3
+ size 85506
last-checkpoint/global_step1185/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8751c19421e5e2129fcbd690463069fab0590118b296d30c39437349f1b2f36b
3
+ size 85506
last-checkpoint/global_step1185/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b1b9a37b0fc0893afbec8cc4c52ba3f185e2a8bdc3fa381bdba8ac149b8fc14
3
+ size 85506
last-checkpoint/global_step1185/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51fbdd9346c615833b76aab54ebfd9ecef0fd730a129c1dfc47e82e7c23ad5f2
3
+ size 85506
last-checkpoint/global_step1185/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7cc1872b0b31675bb1cc95e19745fc429642614c768b0d5ccdd3ece3a4febe
3
+ size 85506
last-checkpoint/global_step1185/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71ad488f9b8acba557e59638cda7de14ca3a9dc4696c3e9708a64699a59db88a
3
+ size 85506
last-checkpoint/global_step1185/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adddbe045c84109518e94317b78b1e18ba1bb893dd74a5509c03cb68d51efadc
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step987
 
1
+ global_step1185
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9d6347bbdfb78d6d728cf68948d2c89598dfbfde2c1c992084431e44430796d
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbec331a03b63bf09d63c111d004f44d4e1b4622bd66a68a51c8a93312a7aaed
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbb9d1038339e10330e9562076f77aac42d42c8f7c5245bf246911f8ffc69ef0
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62a449b2e0be7027fb456871163c3388d55fd9f5230ef65a2e8ae1bd77075ac
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a0ef6f96a48e59aa52c4b471312c2a62378c19acc7ebbae839612b03a7d775a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0628a9017696045a3a29e9eaffc71e9262d855716e773c0c3be760a1fe85bc8
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab11d533c0fdad46ea8b8e295ba5fdb705e078eeb88cc28f37d82913508766e9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df342004a4d8e3626bf2a9f689fde7c8bfd6d995e14931f5496eda1f456cb6f2
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:615c168147e3465ce5bfab6da2ff4afc68566ce00ec0f0c6c9fc988038a58d0a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02096eb4e8850b91490e80e4a042e2e60f71bd2abc6a269d62c271649cb77d2
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79f71e8f8674ecaef9f8cdcbf7ac457a8b8ff15b12694ba2a2fffcb4b43f0f08
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326c778d3d0e7e3d5665fa0a9ecd92986609c430da08b41611d6c05dc19815a8
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88cf6d674dab5545c300a55135f08ca935730a3d35e2c419fb0b333f19482c19
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d978dcb0c34e022ee6750e9d86814b8c82e4965d7e07662f35f06eeac12938f3
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2754f2cd8824702f027870d93748b3c0491b0ecd30f1e3d8e937116b2be6151f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e83399aed1d9d173c3e07b2efa8530c956b62b2b68394c2ed0d43bd8bba9d1
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1385124ac55604598f45ea6e2d141f29456647d3e7c10d12ca64ec93d312be8d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606ab3ca92e3d20c327c69fdcce7f7e39bec2f2c3538b036088b255f917e3ba4
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:416538efaec7391fa8fe782fb15146b83e5612d9e1961292c34c53e964806873
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1276a987dd22c9093fec58921ba19f340a28f18bff635cc01324e09a3c37ac3a
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebe1f41c97c016e1df7ebf5446401ec464be377a52a8190323220b8692dc187a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251c2a48bb3a46f7c0365ebd02f9e250fbea04549ecdfec993cf3e0a3155f3a0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 987,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1492,6 +1492,306 @@
1492
  "rewards/margins": 0.24509286880493164,
1493
  "rewards/rejected": -0.17864301800727844,
1494
  "step": 980
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1495
  }
1496
  ],
1497
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
  "eval_steps": 100,
6
+ "global_step": 1185,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1492
  "rewards/margins": 0.24509286880493164,
1493
  "rewards/rejected": -0.17864301800727844,
1494
  "step": 980
1495
+ },
1496
+ {
1497
+ "epoch": 5.012658227848101,
1498
+ "grad_norm": 1442340.8531233447,
1499
+ "learning_rate": 4.005014102162331e-07,
1500
+ "logits/chosen": -7.928460121154785,
1501
+ "logits/rejected": -7.941502571105957,
1502
+ "logps/chosen": -175.59664916992188,
1503
+ "logps/rejected": -406.7601623535156,
1504
+ "loss": 62010.275,
1505
+ "rewards/accuracies": 0.987500011920929,
1506
+ "rewards/chosen": 0.06751301139593124,
1507
+ "rewards/margins": 0.23539571464061737,
1508
+ "rewards/rejected": -0.16788268089294434,
1509
+ "step": 990
1510
+ },
1511
+ {
1512
+ "epoch": 5.063291139240507,
1513
+ "grad_norm": 1557498.8859861568,
1514
+ "learning_rate": 3.989345032905045e-07,
1515
+ "logits/chosen": -7.7452850341796875,
1516
+ "logits/rejected": -8.02453899383545,
1517
+ "logps/chosen": -154.46292114257812,
1518
+ "logps/rejected": -469.1910095214844,
1519
+ "loss": 49347.1687,
1520
+ "rewards/accuracies": 0.987500011920929,
1521
+ "rewards/chosen": 0.08384937047958374,
1522
+ "rewards/margins": 0.31221631169319153,
1523
+ "rewards/rejected": -0.2283669412136078,
1524
+ "step": 1000
1525
+ },
1526
+ {
1527
+ "epoch": 5.113924050632911,
1528
+ "grad_norm": 1581238.5613807905,
1529
+ "learning_rate": 3.973675963647759e-07,
1530
+ "logits/chosen": -7.881131649017334,
1531
+ "logits/rejected": -7.651412010192871,
1532
+ "logps/chosen": -169.71153259277344,
1533
+ "logps/rejected": -476.58477783203125,
1534
+ "loss": 49390.7562,
1535
+ "rewards/accuracies": 0.9750000238418579,
1536
+ "rewards/chosen": 0.08512581884860992,
1537
+ "rewards/margins": 0.3120972514152527,
1538
+ "rewards/rejected": -0.22697141766548157,
1539
+ "step": 1010
1540
+ },
1541
+ {
1542
+ "epoch": 5.1645569620253164,
1543
+ "grad_norm": 1497324.3970905554,
1544
+ "learning_rate": 3.958006894390473e-07,
1545
+ "logits/chosen": -6.736274719238281,
1546
+ "logits/rejected": -6.750421047210693,
1547
+ "logps/chosen": -151.04129028320312,
1548
+ "logps/rejected": -459.47808837890625,
1549
+ "loss": 49656.7812,
1550
+ "rewards/accuracies": 0.987500011920929,
1551
+ "rewards/chosen": 0.07378469407558441,
1552
+ "rewards/margins": 0.3127291798591614,
1553
+ "rewards/rejected": -0.23894445598125458,
1554
+ "step": 1020
1555
+ },
1556
+ {
1557
+ "epoch": 5.215189873417722,
1558
+ "grad_norm": 1898671.7222835466,
1559
+ "learning_rate": 3.942337825133187e-07,
1560
+ "logits/chosen": -7.030360221862793,
1561
+ "logits/rejected": -6.9101104736328125,
1562
+ "logps/chosen": -168.35183715820312,
1563
+ "logps/rejected": -469.60235595703125,
1564
+ "loss": 49247.5312,
1565
+ "rewards/accuracies": 0.987500011920929,
1566
+ "rewards/chosen": 0.08571706712245941,
1567
+ "rewards/margins": 0.3044472634792328,
1568
+ "rewards/rejected": -0.21873018145561218,
1569
+ "step": 1030
1570
+ },
1571
+ {
1572
+ "epoch": 5.265822784810126,
1573
+ "grad_norm": 1859831.3291458376,
1574
+ "learning_rate": 3.926668755875901e-07,
1575
+ "logits/chosen": -6.842263698577881,
1576
+ "logits/rejected": -6.943556308746338,
1577
+ "logps/chosen": -153.25328063964844,
1578
+ "logps/rejected": -473.513427734375,
1579
+ "loss": 51145.4938,
1580
+ "rewards/accuracies": 1.0,
1581
+ "rewards/chosen": 0.08420612663030624,
1582
+ "rewards/margins": 0.3194884657859802,
1583
+ "rewards/rejected": -0.235282301902771,
1584
+ "step": 1040
1585
+ },
1586
+ {
1587
+ "epoch": 5.3164556962025316,
1588
+ "grad_norm": 1855378.6614461695,
1589
+ "learning_rate": 3.910999686618615e-07,
1590
+ "logits/chosen": -7.331165313720703,
1591
+ "logits/rejected": -7.468164920806885,
1592
+ "logps/chosen": -162.1797637939453,
1593
+ "logps/rejected": -474.08074951171875,
1594
+ "loss": 50799.1687,
1595
+ "rewards/accuracies": 0.9750000238418579,
1596
+ "rewards/chosen": 0.0886077731847763,
1597
+ "rewards/margins": 0.31340503692626953,
1598
+ "rewards/rejected": -0.22479727864265442,
1599
+ "step": 1050
1600
+ },
1601
+ {
1602
+ "epoch": 5.367088607594937,
1603
+ "grad_norm": 1600231.8694471747,
1604
+ "learning_rate": 3.895330617361329e-07,
1605
+ "logits/chosen": -7.2842841148376465,
1606
+ "logits/rejected": -7.146345615386963,
1607
+ "logps/chosen": -140.54055786132812,
1608
+ "logps/rejected": -446.4241638183594,
1609
+ "loss": 49384.9875,
1610
+ "rewards/accuracies": 0.9624999761581421,
1611
+ "rewards/chosen": 0.08458932489156723,
1612
+ "rewards/margins": 0.3061215877532959,
1613
+ "rewards/rejected": -0.22153222560882568,
1614
+ "step": 1060
1615
+ },
1616
+ {
1617
+ "epoch": 5.417721518987342,
1618
+ "grad_norm": 1820648.707460815,
1619
+ "learning_rate": 3.8796615481040425e-07,
1620
+ "logits/chosen": -7.4867706298828125,
1621
+ "logits/rejected": -7.318013668060303,
1622
+ "logps/chosen": -162.54937744140625,
1623
+ "logps/rejected": -469.13433837890625,
1624
+ "loss": 48744.0469,
1625
+ "rewards/accuracies": 0.987500011920929,
1626
+ "rewards/chosen": 0.0876765102148056,
1627
+ "rewards/margins": 0.31078898906707764,
1628
+ "rewards/rejected": -0.22311246395111084,
1629
+ "step": 1070
1630
+ },
1631
+ {
1632
+ "epoch": 5.468354430379747,
1633
+ "grad_norm": 1629981.2772913359,
1634
+ "learning_rate": 3.8639924788467566e-07,
1635
+ "logits/chosen": -8.141877174377441,
1636
+ "logits/rejected": -7.992497444152832,
1637
+ "logps/chosen": -151.8604736328125,
1638
+ "logps/rejected": -496.25201416015625,
1639
+ "loss": 46868.6719,
1640
+ "rewards/accuracies": 1.0,
1641
+ "rewards/chosen": 0.09172078222036362,
1642
+ "rewards/margins": 0.3495192527770996,
1643
+ "rewards/rejected": -0.257798433303833,
1644
+ "step": 1080
1645
+ },
1646
+ {
1647
+ "epoch": 5.518987341772152,
1648
+ "grad_norm": 1843259.5793917184,
1649
+ "learning_rate": 3.84832340958947e-07,
1650
+ "logits/chosen": -7.577700614929199,
1651
+ "logits/rejected": -7.340989589691162,
1652
+ "logps/chosen": -152.68710327148438,
1653
+ "logps/rejected": -466.3287048339844,
1654
+ "loss": 48765.2375,
1655
+ "rewards/accuracies": 0.987500011920929,
1656
+ "rewards/chosen": 0.08904045075178146,
1657
+ "rewards/margins": 0.31981557607650757,
1658
+ "rewards/rejected": -0.2307751476764679,
1659
+ "step": 1090
1660
+ },
1661
+ {
1662
+ "epoch": 5.569620253164557,
1663
+ "grad_norm": 1848670.003471961,
1664
+ "learning_rate": 3.8326543403321843e-07,
1665
+ "logits/chosen": -5.992789268493652,
1666
+ "logits/rejected": -5.831528663635254,
1667
+ "logps/chosen": -131.7107696533203,
1668
+ "logps/rejected": -433.0040588378906,
1669
+ "loss": 48441.2188,
1670
+ "rewards/accuracies": 0.987500011920929,
1671
+ "rewards/chosen": 0.08974520117044449,
1672
+ "rewards/margins": 0.2995590269565582,
1673
+ "rewards/rejected": -0.20981380343437195,
1674
+ "step": 1100
1675
+ },
1676
+ {
1677
+ "epoch": 5.620253164556962,
1678
+ "grad_norm": 1834994.3527284127,
1679
+ "learning_rate": 3.816985271074898e-07,
1680
+ "logits/chosen": -6.8782501220703125,
1681
+ "logits/rejected": -7.123211860656738,
1682
+ "logps/chosen": -143.1776885986328,
1683
+ "logps/rejected": -439.9363708496094,
1684
+ "loss": 50301.1625,
1685
+ "rewards/accuracies": 0.987500011920929,
1686
+ "rewards/chosen": 0.0787430927157402,
1687
+ "rewards/margins": 0.29441121220588684,
1688
+ "rewards/rejected": -0.21566812694072723,
1689
+ "step": 1110
1690
+ },
1691
+ {
1692
+ "epoch": 5.670886075949367,
1693
+ "grad_norm": 2055858.9168272892,
1694
+ "learning_rate": 3.801316201817612e-07,
1695
+ "logits/chosen": -7.6317338943481445,
1696
+ "logits/rejected": -7.619107723236084,
1697
+ "logps/chosen": -152.3334503173828,
1698
+ "logps/rejected": -453.30120849609375,
1699
+ "loss": 49359.2312,
1700
+ "rewards/accuracies": 0.9624999761581421,
1701
+ "rewards/chosen": 0.0867711529135704,
1702
+ "rewards/margins": 0.2968466281890869,
1703
+ "rewards/rejected": -0.2100754976272583,
1704
+ "step": 1120
1705
+ },
1706
+ {
1707
+ "epoch": 5.7215189873417724,
1708
+ "grad_norm": 1760917.726879333,
1709
+ "learning_rate": 3.7856471325603256e-07,
1710
+ "logits/chosen": -6.669379234313965,
1711
+ "logits/rejected": -6.568717002868652,
1712
+ "logps/chosen": -152.34774780273438,
1713
+ "logps/rejected": -439.8075256347656,
1714
+ "loss": 48808.2812,
1715
+ "rewards/accuracies": 1.0,
1716
+ "rewards/chosen": 0.08005286753177643,
1717
+ "rewards/margins": 0.28860196471214294,
1718
+ "rewards/rejected": -0.20854909718036652,
1719
+ "step": 1130
1720
+ },
1721
+ {
1722
+ "epoch": 5.772151898734177,
1723
+ "grad_norm": 1793917.574084858,
1724
+ "learning_rate": 3.76997806330304e-07,
1725
+ "logits/chosen": -7.020206451416016,
1726
+ "logits/rejected": -6.4513840675354,
1727
+ "logps/chosen": -126.99436950683594,
1728
+ "logps/rejected": -429.0069274902344,
1729
+ "loss": 48991.9938,
1730
+ "rewards/accuracies": 1.0,
1731
+ "rewards/chosen": 0.08981131762266159,
1732
+ "rewards/margins": 0.3046417832374573,
1733
+ "rewards/rejected": -0.21483047306537628,
1734
+ "step": 1140
1735
+ },
1736
+ {
1737
+ "epoch": 5.822784810126582,
1738
+ "grad_norm": 1856995.4726512374,
1739
+ "learning_rate": 3.7543089940457533e-07,
1740
+ "logits/chosen": -7.1540846824646,
1741
+ "logits/rejected": -7.103608131408691,
1742
+ "logps/chosen": -150.0362548828125,
1743
+ "logps/rejected": -459.3680114746094,
1744
+ "loss": 45240.3094,
1745
+ "rewards/accuracies": 0.9750000238418579,
1746
+ "rewards/chosen": 0.08858338743448257,
1747
+ "rewards/margins": 0.3066866397857666,
1748
+ "rewards/rejected": -0.21810325980186462,
1749
+ "step": 1150
1750
+ },
1751
+ {
1752
+ "epoch": 5.8734177215189876,
1753
+ "grad_norm": 2252812.5376150296,
1754
+ "learning_rate": 3.7386399247884675e-07,
1755
+ "logits/chosen": -6.23285436630249,
1756
+ "logits/rejected": -5.795694351196289,
1757
+ "logps/chosen": -145.6466827392578,
1758
+ "logps/rejected": -485.41229248046875,
1759
+ "loss": 46892.1625,
1760
+ "rewards/accuracies": 1.0,
1761
+ "rewards/chosen": 0.09205026924610138,
1762
+ "rewards/margins": 0.34098342061042786,
1763
+ "rewards/rejected": -0.24893316626548767,
1764
+ "step": 1160
1765
+ },
1766
+ {
1767
+ "epoch": 5.924050632911392,
1768
+ "grad_norm": 1669143.1623524264,
1769
+ "learning_rate": 3.722970855531181e-07,
1770
+ "logits/chosen": -7.314904689788818,
1771
+ "logits/rejected": -7.455816745758057,
1772
+ "logps/chosen": -133.58151245117188,
1773
+ "logps/rejected": -482.9154357910156,
1774
+ "loss": 46493.0938,
1775
+ "rewards/accuracies": 0.987500011920929,
1776
+ "rewards/chosen": 0.09256922453641891,
1777
+ "rewards/margins": 0.34824666380882263,
1778
+ "rewards/rejected": -0.2556774616241455,
1779
+ "step": 1170
1780
+ },
1781
+ {
1782
+ "epoch": 5.974683544303797,
1783
+ "grad_norm": 1914279.6891733713,
1784
+ "learning_rate": 3.707301786273895e-07,
1785
+ "logits/chosen": -6.429854393005371,
1786
+ "logits/rejected": -5.985020160675049,
1787
+ "logps/chosen": -142.39651489257812,
1788
+ "logps/rejected": -442.7286682128906,
1789
+ "loss": 47640.0813,
1790
+ "rewards/accuracies": 0.949999988079071,
1791
+ "rewards/chosen": 0.08776311576366425,
1792
+ "rewards/margins": 0.30018630623817444,
1793
+ "rewards/rejected": -0.2124231606721878,
1794
+ "step": 1180
1795
  }
1796
  ],
1797
  "logging_steps": 10,