|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 2421, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 61.79818261705088, |
|
"learning_rate": 2.05761316872428e-09, |
|
"logits/chosen": -3.5, |
|
"logits/rejected": -1.4140625, |
|
"logps/chosen": -262.0, |
|
"logps/rejected": -788.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 71.93619140446947, |
|
"learning_rate": 2.0576131687242796e-08, |
|
"logits/chosen": -1.9140625, |
|
"logits/rejected": -3.0625, |
|
"logps/chosen": -648.0, |
|
"logps/rejected": -760.0, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.033447265625, |
|
"rewards/margins": -0.033447265625, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 95.51795034006538, |
|
"learning_rate": 4.115226337448559e-08, |
|
"logits/chosen": -1.828125, |
|
"logits/rejected": -2.671875, |
|
"logps/chosen": -484.0, |
|
"logps/rejected": -352.0, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": 0.02001953125, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.02001953125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 83.15780879075649, |
|
"learning_rate": 6.172839506172839e-08, |
|
"logits/chosen": -1.8671875, |
|
"logits/rejected": -1.75, |
|
"logps/chosen": -404.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.10000000149011612, |
|
"rewards/chosen": -0.0150146484375, |
|
"rewards/margins": -0.02001953125, |
|
"rewards/rejected": 0.0050048828125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 78.74493611311031, |
|
"learning_rate": 8.230452674897118e-08, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0250244140625, |
|
"rewards/margins": 0.06005859375, |
|
"rewards/rejected": -0.03515625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 84.22871977974292, |
|
"learning_rate": 1.02880658436214e-07, |
|
"logits/chosen": -1.6640625, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -592.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.10000000149011612, |
|
"rewards/chosen": -0.0400390625, |
|
"rewards/margins": -0.030029296875, |
|
"rewards/rejected": -0.010009765625, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 57.8386192430028, |
|
"learning_rate": 1.2345679012345677e-07, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.9375, |
|
"logps/chosen": -362.0, |
|
"logps/rejected": -368.0, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.10000000149011612, |
|
"rewards/chosen": -0.0050048828125, |
|
"rewards/margins": -0.044921875, |
|
"rewards/rejected": 0.0400390625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 108.85645337456651, |
|
"learning_rate": 1.4403292181069958e-07, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -468.0, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.05517578125, |
|
"rewards/margins": 0.0751953125, |
|
"rewards/rejected": -0.02001953125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 70.2187244330304, |
|
"learning_rate": 1.6460905349794237e-07, |
|
"logits/chosen": -1.8984375, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -506.0, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.050048828125, |
|
"rewards/margins": -0.03515625, |
|
"rewards/rejected": -0.0150146484375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 54.744834672425526, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.050048828125, |
|
"rewards/margins": 0.06494140625, |
|
"rewards/rejected": -0.0150146484375, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 98.04764626666417, |
|
"learning_rate": 2.05761316872428e-07, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.03515625, |
|
"rewards/margins": -0.0301513671875, |
|
"rewards/rejected": 0.06494140625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 78.67800061099335, |
|
"learning_rate": 2.2633744855967078e-07, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.703125, |
|
"logps/chosen": -484.0, |
|
"logps/rejected": -428.0, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.054931640625, |
|
"rewards/margins": 0.06982421875, |
|
"rewards/rejected": -0.01507568359375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 98.90783538853094, |
|
"learning_rate": 2.4691358024691354e-07, |
|
"logits/chosen": -1.8984375, |
|
"logits/rejected": -1.71875, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -752.0, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.25, |
|
"rewards/margins": 0.205078125, |
|
"rewards/rejected": 0.045166015625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 183.377786164052, |
|
"learning_rate": 2.6748971193415635e-07, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.78125, |
|
"logps/chosen": -408.0, |
|
"logps/rejected": -358.0, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.09521484375, |
|
"rewards/margins": 0.02001953125, |
|
"rewards/rejected": -0.115234375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 75.16806808811697, |
|
"learning_rate": 2.8806584362139917e-07, |
|
"logits/chosen": -1.359375, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -556.0, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0703125, |
|
"rewards/margins": 0.0400390625, |
|
"rewards/rejected": -0.1103515625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 59.26706314390388, |
|
"learning_rate": 3.086419753086419e-07, |
|
"logits/chosen": -2.390625, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -382.0, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.125, |
|
"rewards/margins": 0.1357421875, |
|
"rewards/rejected": -0.010009765625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 103.75204061254972, |
|
"learning_rate": 3.2921810699588474e-07, |
|
"logits/chosen": -2.46875, |
|
"logits/rejected": -1.6171875, |
|
"logps/chosen": -398.0, |
|
"logps/rejected": -486.0, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0703125, |
|
"rewards/margins": 0.08056640625, |
|
"rewards/rejected": -0.150390625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 113.86192442502461, |
|
"learning_rate": 3.4979423868312755e-07, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -1.9140625, |
|
"logps/chosen": -450.0, |
|
"logps/rejected": -464.0, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.205078125, |
|
"rewards/margins": 0.1455078125, |
|
"rewards/rejected": -0.3515625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 63.20529708150695, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -1.8046875, |
|
"logits/rejected": -2.8125, |
|
"logps/chosen": -744.0, |
|
"logps/rejected": -584.0, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0849609375, |
|
"rewards/margins": 0.1806640625, |
|
"rewards/rejected": -0.265625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 83.60084870315862, |
|
"learning_rate": 3.909465020576131e-07, |
|
"logits/chosen": -1.7578125, |
|
"logits/rejected": -1.828125, |
|
"logps/chosen": -716.0, |
|
"logps/rejected": -848.0, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1201171875, |
|
"rewards/margins": 0.400390625, |
|
"rewards/rejected": -0.51953125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 62.24290659916015, |
|
"learning_rate": 4.11522633744856e-07, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.23046875, |
|
"rewards/margins": 0.49609375, |
|
"rewards/rejected": -0.265625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 54.79737482658103, |
|
"learning_rate": 4.320987654320987e-07, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1103515625, |
|
"rewards/margins": 0.56640625, |
|
"rewards/rejected": -0.455078125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 75.36398911732266, |
|
"learning_rate": 4.5267489711934156e-07, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -512.0, |
|
"logps/rejected": -390.0, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1806640625, |
|
"rewards/margins": -0.0103759765625, |
|
"rewards/rejected": -0.169921875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 65.42502577887264, |
|
"learning_rate": 4.732510288065844e-07, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -652.0, |
|
"logps/rejected": -504.0, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.134765625, |
|
"rewards/margins": 0.359375, |
|
"rewards/rejected": -0.49609375, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 101.59685524071844, |
|
"learning_rate": 4.938271604938271e-07, |
|
"logits/chosen": -1.6953125, |
|
"logits/rejected": -1.6640625, |
|
"logps/chosen": -704.0, |
|
"logps/rejected": -548.0, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.37109375, |
|
"rewards/margins": 0.1103515625, |
|
"rewards/rejected": -0.48046875, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 56.985215974106104, |
|
"learning_rate": 4.999872565682321e-07, |
|
"logits/chosen": -1.4921875, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -732.0, |
|
"logps/rejected": -496.0, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.205078125, |
|
"rewards/margins": 0.6328125, |
|
"rewards/rejected": -0.42578125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 60.61672615933723, |
|
"learning_rate": 4.999248428870611e-07, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -3.453125, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -430.0, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.1396484375, |
|
"rewards/margins": 0.37109375, |
|
"rewards/rejected": -0.51171875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 171.89078996410038, |
|
"learning_rate": 4.99810431295357e-07, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -688.0, |
|
"logps/rejected": -488.0, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.828125, |
|
"rewards/margins": 0.005462646484375, |
|
"rewards/rejected": -0.8359375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 132.22262423790198, |
|
"learning_rate": 4.99644045596931e-07, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -2.875, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -356.0, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2451171875, |
|
"rewards/margins": 0.474609375, |
|
"rewards/rejected": -0.71875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 123.26679640601527, |
|
"learning_rate": 4.994257204090243e-07, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -3.0, |
|
"logps/chosen": -580.0, |
|
"logps/rejected": -476.0, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.42578125, |
|
"rewards/margins": 0.5703125, |
|
"rewards/rejected": -0.99609375, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 47.55857001285771, |
|
"learning_rate": 4.991555011551073e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -624.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07470703125, |
|
"rewards/margins": 0.88671875, |
|
"rewards/rejected": -0.9609375, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 57.83726580735741, |
|
"learning_rate": 4.988334440554274e-07, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -800.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.5234375, |
|
"rewards/rejected": -0.6328125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 63.9187247476568, |
|
"learning_rate": 4.984596161153135e-07, |
|
"logits/chosen": -1.96875, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -612.0, |
|
"logps/rejected": -664.0, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8515625, |
|
"rewards/margins": 0.33984375, |
|
"rewards/rejected": -1.1875, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 59.66658958585498, |
|
"learning_rate": 4.980340951112345e-07, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -652.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.65625, |
|
"rewards/margins": 0.58984375, |
|
"rewards/rejected": -1.25, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 51.91101244482548, |
|
"learning_rate": 4.975569695746179e-07, |
|
"logits/chosen": -1.4140625, |
|
"logits/rejected": -2.703125, |
|
"logps/chosen": -716.0, |
|
"logps/rejected": -496.0, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.259765625, |
|
"rewards/margins": 0.4140625, |
|
"rewards/rejected": -0.671875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 72.46042088468953, |
|
"learning_rate": 4.970283387734303e-07, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.546875, |
|
"rewards/margins": 0.1943359375, |
|
"rewards/rejected": -0.7421875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 98.36392274253446, |
|
"learning_rate": 4.964483126915245e-07, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.375, |
|
"rewards/margins": 0.62109375, |
|
"rewards/rejected": -0.99609375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 35.92451209539994, |
|
"learning_rate": 4.958170120057565e-07, |
|
"logits/chosen": -1.875, |
|
"logits/rejected": -3.46875, |
|
"logps/chosen": -438.0, |
|
"logps/rejected": -400.0, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.39453125, |
|
"rewards/margins": 0.75390625, |
|
"rewards/rejected": -1.1484375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 43.212042789239504, |
|
"learning_rate": 4.951345680608787e-07, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -2.65625, |
|
"logps/chosen": -660.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.140625, |
|
"rewards/margins": 0.201171875, |
|
"rewards/rejected": -1.34375, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 55.92901896058768, |
|
"learning_rate": 4.944011228422125e-07, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -508.0, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6953125, |
|
"rewards/margins": 0.427734375, |
|
"rewards/rejected": -1.125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 48.27467634078171, |
|
"learning_rate": 4.936168289461084e-07, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -390.0, |
|
"logps/rejected": -452.0, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.76953125, |
|
"rewards/margins": 0.2353515625, |
|
"rewards/rejected": -1.0078125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 43.39520482895247, |
|
"learning_rate": 4.92781849548197e-07, |
|
"logits/chosen": -1.6640625, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -580.0, |
|
"logps/rejected": -548.0, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0234375, |
|
"rewards/margins": 0.5859375, |
|
"rewards/rejected": -1.609375, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 53.58876818695228, |
|
"learning_rate": 4.918963583694396e-07, |
|
"logits/chosen": -1.765625, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.93359375, |
|
"rewards/margins": 0.59765625, |
|
"rewards/rejected": -1.53125, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 195.21265227218643, |
|
"learning_rate": 4.909605396399855e-07, |
|
"logits/chosen": -1.9375, |
|
"logits/rejected": -1.625, |
|
"logps/chosen": -414.0, |
|
"logps/rejected": -756.0, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.64453125, |
|
"rewards/margins": 1.6640625, |
|
"rewards/rejected": -2.3125, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 46.48860995329084, |
|
"learning_rate": 4.899745880608417e-07, |
|
"logits/chosen": -1.5, |
|
"logits/rejected": -1.921875, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -664.0, |
|
"loss": 0.7219, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.921875, |
|
"rewards/margins": 0.462890625, |
|
"rewards/rejected": -1.3828125, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 42.31847046504421, |
|
"learning_rate": 4.889387087633647e-07, |
|
"logits/chosen": -1.4296875, |
|
"logits/rejected": -2.84375, |
|
"logps/chosen": -612.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.69921875, |
|
"rewards/margins": 0.6328125, |
|
"rewards/rejected": -1.328125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 43.44689835885094, |
|
"learning_rate": 4.878531172665815e-07, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -2.75, |
|
"logps/chosen": -608.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.169921875, |
|
"rewards/margins": 0.9453125, |
|
"rewards/rejected": -1.1171875, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 47.14528734842805, |
|
"learning_rate": 4.867180394323509e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -700.0, |
|
"logps/rejected": -732.0, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0, |
|
"rewards/margins": 1.3359375, |
|
"rewards/rejected": -2.328125, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 43.11262095050651, |
|
"learning_rate": 4.855337114183711e-07, |
|
"logits/chosen": -1.421875, |
|
"logits/rejected": -1.5859375, |
|
"logps/chosen": -644.0, |
|
"logps/rejected": -692.0, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.330078125, |
|
"rewards/margins": 1.53125, |
|
"rewards/rejected": -1.859375, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 45.38075851262632, |
|
"learning_rate": 4.843003796290469e-07, |
|
"logits/chosen": -1.8046875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -772.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.375, |
|
"rewards/margins": 0.71484375, |
|
"rewards/rejected": -1.09375, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 50.08648685280834, |
|
"learning_rate": 4.830183006642236e-07, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -444.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.83203125, |
|
"rewards/margins": 0.34765625, |
|
"rewards/rejected": -1.1796875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -1.0859375, |
|
"eval_logits/rejected": -1.0078125, |
|
"eval_logps/chosen": -540.0, |
|
"eval_logps/rejected": -620.0, |
|
"eval_loss": 0.49058592319488525, |
|
"eval_rewards/accuracies": 0.8055555820465088, |
|
"eval_rewards/chosen": -0.7578125, |
|
"eval_rewards/margins": 0.86328125, |
|
"eval_rewards/rejected": -1.625, |
|
"eval_runtime": 50.5034, |
|
"eval_samples_per_second": 20.791, |
|
"eval_steps_per_second": 0.178, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 45.830329324613906, |
|
"learning_rate": 4.816877412658007e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.84375, |
|
"logps/chosen": -772.0, |
|
"logps/rejected": -616.0, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6171875, |
|
"rewards/margins": 0.921875, |
|
"rewards/rejected": -1.5390625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 53.190960939965166, |
|
"learning_rate": 4.80308978262235e-07, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -2.609375, |
|
"logps/chosen": -548.0, |
|
"logps/rejected": -502.0, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.88671875, |
|
"rewards/margins": 0.84375, |
|
"rewards/rejected": -1.7265625, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 45.04181332411229, |
|
"learning_rate": 4.788822985109449e-07, |
|
"logits/chosen": -1.875, |
|
"logits/rejected": -1.9140625, |
|
"logps/chosen": -432.0, |
|
"logps/rejected": -448.0, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.78515625, |
|
"rewards/margins": 0.50390625, |
|
"rewards/rejected": -1.2890625, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 40.85074772821745, |
|
"learning_rate": 4.774079988386296e-07, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -1.640625, |
|
"logps/chosen": -436.0, |
|
"logps/rejected": -464.0, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.94140625, |
|
"rewards/margins": 0.2197265625, |
|
"rewards/rejected": -1.15625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 45.70832159940248, |
|
"learning_rate": 4.7588638597951173e-07, |
|
"logits/chosen": -1.1171875, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -748.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.287109375, |
|
"rewards/margins": 1.640625, |
|
"rewards/rejected": -1.3515625, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 59.530448462467945, |
|
"learning_rate": 4.7431777651152103e-07, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -464.0, |
|
"logps/rejected": -452.0, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.890625, |
|
"rewards/margins": 0.380859375, |
|
"rewards/rejected": -1.2734375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 60.197735477558666, |
|
"learning_rate": 4.727024967904284e-07, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -1.4375, |
|
"logps/chosen": -414.0, |
|
"logps/rejected": -564.0, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7421875, |
|
"rewards/margins": -0.025634765625, |
|
"rewards/rejected": -0.71484375, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 42.26765729774977, |
|
"learning_rate": 4.710408828819463e-07, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2890625, |
|
"rewards/margins": 0.6484375, |
|
"rewards/rejected": -1.9375, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 50.89266004351078, |
|
"learning_rate": 4.6933328049180937e-07, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.734375, |
|
"rewards/margins": 0.421875, |
|
"rewards/rejected": -1.15625, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 47.93987624657981, |
|
"learning_rate": 4.6758004489384815e-07, |
|
"logits/chosen": -1.7734375, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -408.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2109375, |
|
"rewards/margins": 0.23828125, |
|
"rewards/rejected": -1.4453125, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 48.72108104567496, |
|
"learning_rate": 4.6578154085607323e-07, |
|
"logits/chosen": -2.8125, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -580.0, |
|
"logps/rejected": -824.0, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.51953125, |
|
"rewards/margins": 1.4609375, |
|
"rewards/rejected": -1.984375, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 55.85530255600864, |
|
"learning_rate": 4.639381425647841e-07, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.64453125, |
|
"rewards/margins": 1.0, |
|
"rewards/rejected": -1.6484375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 59.68466050286291, |
|
"learning_rate": 4.6205023354671735e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -446.0, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.77734375, |
|
"rewards/margins": 0.875, |
|
"rewards/rejected": -1.6484375, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 44.610719658440864, |
|
"learning_rate": 4.601182065892529e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -460.0, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.91015625, |
|
"rewards/margins": 0.53515625, |
|
"rewards/rejected": -1.4453125, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 49.65621035961898, |
|
"learning_rate": 4.581424636586928e-07, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -456.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.83984375, |
|
"rewards/margins": 0.2060546875, |
|
"rewards/rejected": -1.046875, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 37.22285340163814, |
|
"learning_rate": 4.561234158166305e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.3046875, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.87109375, |
|
"rewards/margins": 0.75, |
|
"rewards/rejected": -1.6171875, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 72.63982166912706, |
|
"learning_rate": 4.5406148313442753e-07, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -354.0, |
|
"logps/rejected": -372.0, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.98046875, |
|
"rewards/margins": 0.7109375, |
|
"rewards/rejected": -1.6875, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 49.88245474539256, |
|
"learning_rate": 4.519570946058162e-07, |
|
"logits/chosen": -1.3046875, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -482.0, |
|
"logps/rejected": -556.0, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.84375, |
|
"rewards/margins": 1.0234375, |
|
"rewards/rejected": -1.8671875, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 36.17988110201782, |
|
"learning_rate": 4.4981068805764545e-07, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -700.0, |
|
"logps/rejected": -668.0, |
|
"loss": 0.4494, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.4765625, |
|
"rewards/margins": -0.234375, |
|
"rewards/rejected": -1.2421875, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 61.21171348805659, |
|
"learning_rate": 4.4762271005878913e-07, |
|
"logits/chosen": -1.765625, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -388.0, |
|
"logps/rejected": -446.0, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2578125, |
|
"rewards/margins": 0.55859375, |
|
"rewards/rejected": -1.8203125, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 52.27627457515467, |
|
"learning_rate": 4.4539361582723586e-07, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -892.0, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8828125, |
|
"rewards/margins": 1.46875, |
|
"rewards/rejected": -3.34375, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 65.21759338991524, |
|
"learning_rate": 4.431238691353784e-07, |
|
"logits/chosen": -1.7265625, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -486.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.125, |
|
"rewards/margins": 1.75, |
|
"rewards/rejected": -2.875, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 51.27261353179044, |
|
"learning_rate": 4.408139422135241e-07, |
|
"logits/chosen": -1.4140625, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -428.0, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2890625, |
|
"rewards/margins": 1.859375, |
|
"rewards/rejected": -3.15625, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 64.31719063334275, |
|
"learning_rate": 4.3846431565164596e-07, |
|
"logits/chosen": -1.390625, |
|
"logits/rejected": -1.3515625, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -608.0, |
|
"loss": 1.897, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.828125, |
|
"rewards/margins": 1.9453125, |
|
"rewards/rejected": -3.78125, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 46.19657074878751, |
|
"learning_rate": 4.360754782993929e-07, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -484.0, |
|
"logps/rejected": -434.0, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.328125, |
|
"rewards/margins": 0.7578125, |
|
"rewards/rejected": -2.078125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 55.55089778313834, |
|
"learning_rate": 4.336479271643833e-07, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -1.4609375, |
|
"logps/chosen": -488.0, |
|
"logps/rejected": -462.0, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2265625, |
|
"rewards/margins": 0.5390625, |
|
"rewards/rejected": -1.765625, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 77.27127183474478, |
|
"learning_rate": 4.3118216730880015e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0703125, |
|
"rewards/margins": 1.0859375, |
|
"rewards/rejected": -2.15625, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 61.141458979311125, |
|
"learning_rate": 4.286787117443108e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.3984375, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1796875, |
|
"rewards/margins": 1.1796875, |
|
"rewards/rejected": -2.359375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 48.22890637233016, |
|
"learning_rate": 4.261380813253328e-07, |
|
"logits/chosen": -1.828125, |
|
"logits/rejected": -2.578125, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -418.0, |
|
"loss": 0.4414, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1640625, |
|
"rewards/margins": 0.4375, |
|
"rewards/rejected": -1.6015625, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 34.09372820587962, |
|
"learning_rate": 4.2356080464066784e-07, |
|
"logits/chosen": -1.3671875, |
|
"logits/rejected": -3.171875, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -464.0, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.140625, |
|
"rewards/margins": 1.5390625, |
|
"rewards/rejected": -2.6875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 40.13298242319205, |
|
"learning_rate": 4.2094741790352673e-07, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.03125, |
|
"rewards/margins": 1.203125, |
|
"rewards/rejected": -2.234375, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 24.15795246510711, |
|
"learning_rate": 4.1829846483996813e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -884.0, |
|
"logps/rejected": -696.0, |
|
"loss": 0.2332, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.154296875, |
|
"rewards/margins": 2.125, |
|
"rewards/rejected": -2.28125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 40.12239181322821, |
|
"learning_rate": 4.156144965757735e-07, |
|
"logits/chosen": -2.828125, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -416.0, |
|
"logps/rejected": -464.0, |
|
"loss": 0.264, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.484375, |
|
"rewards/margins": 1.7734375, |
|
"rewards/rejected": -3.25, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 22.033239175244603, |
|
"learning_rate": 4.128960715217839e-07, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -636.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.2482, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7109375, |
|
"rewards/margins": 2.125, |
|
"rewards/rejected": -2.828125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 24.492268202299986, |
|
"learning_rate": 4.1014375525771963e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -636.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0361328125, |
|
"rewards/margins": 2.453125, |
|
"rewards/rejected": -2.421875, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 31.641742017850387, |
|
"learning_rate": 4.0735812041450926e-07, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.6484375, |
|
"logps/chosen": -596.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.2428, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5390625, |
|
"rewards/margins": 2.5625, |
|
"rewards/rejected": -3.09375, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 44.35625008242433, |
|
"learning_rate": 4.045397465551513e-07, |
|
"logits/chosen": -1.5234375, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -520.0, |
|
"logps/rejected": -596.0, |
|
"loss": 0.2393, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0859375, |
|
"rewards/margins": 1.171875, |
|
"rewards/rejected": -2.265625, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 47.415350341880725, |
|
"learning_rate": 4.0168922005413384e-07, |
|
"logits/chosen": -1.1171875, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -692.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.2804, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.65625, |
|
"rewards/margins": 1.5, |
|
"rewards/rejected": -2.15625, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 31.910462711815494, |
|
"learning_rate": 3.988071339754366e-07, |
|
"logits/chosen": -1.296875, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -632.0, |
|
"logps/rejected": -736.0, |
|
"loss": 0.2293, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21875, |
|
"rewards/margins": 2.296875, |
|
"rewards/rejected": -2.515625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 27.7996029933054, |
|
"learning_rate": 3.958940879491418e-07, |
|
"logits/chosen": -1.4296875, |
|
"logits/rejected": -1.984375, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -498.0, |
|
"loss": 0.2693, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.84765625, |
|
"rewards/margins": 1.734375, |
|
"rewards/rejected": -2.578125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 24.76648851906884, |
|
"learning_rate": 3.9295068804667823e-07, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -498.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.73046875, |
|
"rewards/margins": 2.203125, |
|
"rewards/rejected": -2.9375, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 23.922743861511947, |
|
"learning_rate": 3.899775466547261e-07, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.231, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.86328125, |
|
"rewards/margins": 1.8046875, |
|
"rewards/rejected": -2.671875, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 51.66225882393323, |
|
"learning_rate": 3.8697528234780674e-07, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -1.6640625, |
|
"logps/chosen": -400.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.9229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.515625, |
|
"rewards/margins": 2.140625, |
|
"rewards/rejected": -3.640625, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 38.82610683477862, |
|
"learning_rate": 3.839445197595863e-07, |
|
"logits/chosen": -1.8671875, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -418.0, |
|
"logps/rejected": -378.0, |
|
"loss": 0.231, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.71484375, |
|
"rewards/margins": 2.03125, |
|
"rewards/rejected": -2.75, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 22.389795986878887, |
|
"learning_rate": 3.8088588945291734e-07, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -2.453125, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -496.0, |
|
"loss": 0.2115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.90625, |
|
"rewards/margins": 1.8203125, |
|
"rewards/rejected": -2.734375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 22.583198913209376, |
|
"learning_rate": 3.778000277886483e-07, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.2358, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.326171875, |
|
"rewards/margins": 2.78125, |
|
"rewards/rejected": -3.109375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 21.507764548315773, |
|
"learning_rate": 3.746875767932255e-07, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -668.0, |
|
"loss": 0.2342, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.453125, |
|
"rewards/margins": 2.078125, |
|
"rewards/rejected": -3.53125, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 23.64514805939643, |
|
"learning_rate": 3.7154918402511714e-07, |
|
"logits/chosen": -1.5078125, |
|
"logits/rejected": -3.703125, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -436.0, |
|
"loss": 0.2427, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.66015625, |
|
"rewards/margins": 2.46875, |
|
"rewards/rejected": -3.125, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 25.279762808321895, |
|
"learning_rate": 3.6838550244008573e-07, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -556.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.2198, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.875, |
|
"rewards/margins": 1.6015625, |
|
"rewards/rejected": -2.484375, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 23.158055403708982, |
|
"learning_rate": 3.651971902553381e-07, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -2.609375, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -486.0, |
|
"loss": 0.2367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.63671875, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -3.140625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_logits/chosen": -1.0859375, |
|
"eval_logits/rejected": -0.9921875, |
|
"eval_logps/chosen": -564.0, |
|
"eval_logps/rejected": -656.0, |
|
"eval_loss": 0.4058724045753479, |
|
"eval_rewards/accuracies": 0.8888888955116272, |
|
"eval_rewards/chosen": -1.90625, |
|
"eval_rewards/margins": 1.5625, |
|
"eval_rewards/rejected": -3.453125, |
|
"eval_runtime": 50.8495, |
|
"eval_samples_per_second": 20.649, |
|
"eval_steps_per_second": 0.177, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 39.231752515284, |
|
"learning_rate": 3.6198491081258066e-07, |
|
"logits/chosen": -1.1484375, |
|
"logits/rejected": -1.8828125, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -494.0, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.265625, |
|
"rewards/margins": 1.734375, |
|
"rewards/rejected": -3.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 29.796507962550486, |
|
"learning_rate": 3.58749332440008e-07, |
|
"logits/chosen": -1.359375, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -556.0, |
|
"loss": 0.2362, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2421875, |
|
"rewards/margins": 1.640625, |
|
"rewards/rejected": -2.875, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 21.63597550992159, |
|
"learning_rate": 3.55491128313255e-07, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -1.7734375, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -548.0, |
|
"loss": 0.2107, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5703125, |
|
"rewards/margins": 2.5, |
|
"rewards/rejected": -4.0625, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 14.702295094535923, |
|
"learning_rate": 3.522109763153392e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.6328125, |
|
"logps/chosen": -442.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.2199, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3984375, |
|
"rewards/margins": 1.796875, |
|
"rewards/rejected": -3.203125, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 21.29596218246951, |
|
"learning_rate": 3.489095588956249e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.890625, |
|
"logps/chosen": -648.0, |
|
"logps/rejected": -668.0, |
|
"loss": 0.2325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7890625, |
|
"rewards/margins": 2.53125, |
|
"rewards/rejected": -3.328125, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 52.09643118168795, |
|
"learning_rate": 3.455875629278363e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.2484, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2578125, |
|
"rewards/margins": 2.265625, |
|
"rewards/rejected": -3.515625, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 35.57618950847242, |
|
"learning_rate": 3.4224567956715085e-07, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.8515625, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -564.0, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.703125, |
|
"rewards/margins": 1.8203125, |
|
"rewards/rejected": -2.515625, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 20.490288705612254, |
|
"learning_rate": 3.388846041064012e-07, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -796.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0078125, |
|
"rewards/margins": 1.8125, |
|
"rewards/rejected": -2.828125, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 25.587126609494646, |
|
"learning_rate": 3.355050358314172e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.078125, |
|
"logps/chosen": -728.0, |
|
"logps/rejected": -732.0, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3203125, |
|
"rewards/margins": 2.125, |
|
"rewards/rejected": -3.4375, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 32.538109074719216, |
|
"learning_rate": 3.321076778755358e-07, |
|
"logits/chosen": -1.8359375, |
|
"logits/rejected": -1.3125, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.2117, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.359375, |
|
"rewards/margins": 1.90625, |
|
"rewards/rejected": -3.265625, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 19.634746831169892, |
|
"learning_rate": 3.2869323707331176e-07, |
|
"logits/chosen": -1.453125, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -490.0, |
|
"logps/rejected": -536.0, |
|
"loss": 0.217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.390625, |
|
"rewards/margins": 2.171875, |
|
"rewards/rejected": -3.5625, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 52.58964934069639, |
|
"learning_rate": 3.2526242381345766e-07, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -2.390625, |
|
"logps/chosen": -484.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.2209, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1875, |
|
"rewards/margins": 3.359375, |
|
"rewards/rejected": -4.53125, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 26.692041535833823, |
|
"learning_rate": 3.218159518910443e-07, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.7265625, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.2288, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6328125, |
|
"rewards/margins": 2.015625, |
|
"rewards/rejected": -3.640625, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 36.803960598284185, |
|
"learning_rate": 3.183545383589927e-07, |
|
"logits/chosen": -1.1484375, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -628.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.2016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4375, |
|
"rewards/margins": 2.0625, |
|
"rewards/rejected": -3.5, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 62.57539343068982, |
|
"learning_rate": 3.148789033788889e-07, |
|
"logits/chosen": -1.3359375, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -504.0, |
|
"logps/rejected": -524.0, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0546875, |
|
"rewards/margins": 2.234375, |
|
"rewards/rejected": -3.28125, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 27.545095580039987, |
|
"learning_rate": 3.113897700711502e-07, |
|
"logits/chosen": -0.9375, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -412.0, |
|
"logps/rejected": -728.0, |
|
"loss": 0.2339, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.484375, |
|
"rewards/margins": 2.71875, |
|
"rewards/rejected": -4.1875, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 20.69956429305614, |
|
"learning_rate": 3.078878643645778e-07, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.4296875, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.921875, |
|
"rewards/margins": 2.25, |
|
"rewards/rejected": -4.1875, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 30.998852642526103, |
|
"learning_rate": 3.0437391484532403e-07, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -804.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.203125, |
|
"rewards/margins": 2.390625, |
|
"rewards/rejected": -3.59375, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 20.204732181232373, |
|
"learning_rate": 3.0084865260530666e-07, |
|
"logits/chosen": -1.6328125, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -780.0, |
|
"loss": 0.2196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.92578125, |
|
"rewards/margins": 3.203125, |
|
"rewards/rejected": -4.125, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 38.16020262352693, |
|
"learning_rate": 2.9731281109010253e-07, |
|
"logits/chosen": -1.8203125, |
|
"logits/rejected": -1.8359375, |
|
"logps/chosen": -446.0, |
|
"logps/rejected": -484.0, |
|
"loss": 0.233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.640625, |
|
"rewards/margins": 2.40625, |
|
"rewards/rejected": -4.0625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 29.06302627643078, |
|
"learning_rate": 2.937671259463512e-07, |
|
"logits/chosen": -1.8515625, |
|
"logits/rejected": -1.625, |
|
"logps/chosen": -536.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.2302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.296875, |
|
"rewards/margins": 3.484375, |
|
"rewards/rejected": -4.78125, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 91.98272670818265, |
|
"learning_rate": 2.9021233486869994e-07, |
|
"logits/chosen": -1.4375, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -672.0, |
|
"logps/rejected": -560.0, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.484375, |
|
"rewards/margins": 1.6953125, |
|
"rewards/rejected": -3.1875, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 30.373071126803815, |
|
"learning_rate": 2.8664917744632423e-07, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.90625, |
|
"rewards/margins": 2.40625, |
|
"rewards/rejected": -4.3125, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 24.064215386247714, |
|
"learning_rate": 2.830783950090522e-07, |
|
"logits/chosen": -1.2265625, |
|
"logits/rejected": -1.734375, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -462.0, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.875, |
|
"rewards/margins": 1.96875, |
|
"rewards/rejected": -3.828125, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 38.77040858523863, |
|
"learning_rate": 2.7950073047312855e-07, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -640.0, |
|
"logps/rejected": -752.0, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8125, |
|
"rewards/margins": 2.40625, |
|
"rewards/rejected": -4.21875, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 36.115163088163186, |
|
"learning_rate": 2.759169281866472e-07, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -430.0, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.953125, |
|
"rewards/margins": 1.7734375, |
|
"rewards/rejected": -3.71875, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 21.74767552298878, |
|
"learning_rate": 2.72327733774687e-07, |
|
"logits/chosen": -1.28125, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -752.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.2159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8125, |
|
"rewards/margins": 2.40625, |
|
"rewards/rejected": -3.21875, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 29.90116775368825, |
|
"learning_rate": 2.6873389398418085e-07, |
|
"logits/chosen": -1.4453125, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -420.0, |
|
"logps/rejected": -490.0, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.390625, |
|
"rewards/margins": 1.59375, |
|
"rewards/rejected": -3.984375, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 23.793756365026436, |
|
"learning_rate": 2.6513615652855246e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.3046875, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.2011, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.84375, |
|
"rewards/margins": 1.53125, |
|
"rewards/rejected": -3.375, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 43.491509201941334, |
|
"learning_rate": 2.6153526993215085e-07, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.6171875, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -548.0, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.375, |
|
"rewards/margins": 2.34375, |
|
"rewards/rejected": -4.71875, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 43.19768159653058, |
|
"learning_rate": 2.579319833745169e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -460.0, |
|
"logps/rejected": -406.0, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.65625, |
|
"rewards/margins": 2.171875, |
|
"rewards/rejected": -3.828125, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 35.267489669162956, |
|
"learning_rate": 2.5432704653451374e-07, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -1.234375, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -728.0, |
|
"loss": 0.1962, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.046875, |
|
"rewards/margins": 3.109375, |
|
"rewards/rejected": -5.15625, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 17.971641900070907, |
|
"learning_rate": 2.5072120943435246e-07, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -784.0, |
|
"logps/rejected": -808.0, |
|
"loss": 0.1927, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.453125, |
|
"rewards/margins": 2.765625, |
|
"rewards/rejected": -4.21875, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 26.236832847315178, |
|
"learning_rate": 2.471152222835471e-07, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -1.5859375, |
|
"logps/chosen": -684.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.078125, |
|
"rewards/margins": 3.125, |
|
"rewards/rejected": -4.1875, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 25.147006316460192, |
|
"learning_rate": 2.4350983532283043e-07, |
|
"logits/chosen": -1.4296875, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -472.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.34375, |
|
"rewards/margins": 2.125, |
|
"rewards/rejected": -3.46875, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 54.14713597288178, |
|
"learning_rate": 2.39905798668063e-07, |
|
"logits/chosen": -1.34375, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -510.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.2336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8359375, |
|
"rewards/margins": 2.484375, |
|
"rewards/rejected": -4.3125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 21.942609458104677, |
|
"learning_rate": 2.3630386215416878e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.8671875, |
|
"logps/chosen": -620.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1484375, |
|
"rewards/margins": 3.0, |
|
"rewards/rejected": -4.15625, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 72.72707544492208, |
|
"learning_rate": 2.3270477517912835e-07, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.1171875, |
|
"logps/chosen": -716.0, |
|
"logps/rejected": -708.0, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1875, |
|
"rewards/margins": 1.84375, |
|
"rewards/rejected": -4.03125, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 22.411175858752106, |
|
"learning_rate": 2.291092865480641e-07, |
|
"logits/chosen": -1.4375, |
|
"logits/rejected": -1.7890625, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7109375, |
|
"rewards/margins": 3.09375, |
|
"rewards/rejected": -4.8125, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 124.47455963689552, |
|
"learning_rate": 2.2551814431744758e-07, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -592.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.3274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9765625, |
|
"rewards/margins": 2.40625, |
|
"rewards/rejected": -4.375, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 40.10464388387894, |
|
"learning_rate": 2.2193209563946382e-07, |
|
"logits/chosen": -1.1875, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -740.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.2608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.34375, |
|
"rewards/margins": 2.984375, |
|
"rewards/rejected": -5.3125, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 139.91739653913288, |
|
"learning_rate": 2.1835188660656265e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -600.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.1985, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.546875, |
|
"rewards/margins": 2.125, |
|
"rewards/rejected": -3.671875, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 45.52923168908793, |
|
"learning_rate": 2.147782620962314e-07, |
|
"logits/chosen": -1.5, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.2292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4375, |
|
"rewards/margins": 2.140625, |
|
"rewards/rejected": -3.578125, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 55.0384600231688, |
|
"learning_rate": 2.112119656160199e-07, |
|
"logits/chosen": -1.15625, |
|
"logits/rejected": -1.25, |
|
"logps/chosen": -620.0, |
|
"logps/rejected": -700.0, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8046875, |
|
"rewards/margins": 1.6953125, |
|
"rewards/rejected": -3.484375, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 18.297415510730342, |
|
"learning_rate": 2.0765373914885047e-07, |
|
"logits/chosen": -1.8203125, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -418.0, |
|
"logps/rejected": -508.0, |
|
"loss": 0.2187, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3828125, |
|
"rewards/margins": 1.5234375, |
|
"rewards/rejected": -2.90625, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 25.14760440570739, |
|
"learning_rate": 2.0410432299864556e-07, |
|
"logits/chosen": -1.46875, |
|
"logits/rejected": -1.3828125, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -816.0, |
|
"loss": 0.2162, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.125, |
|
"rewards/margins": 2.9375, |
|
"rewards/rejected": -5.0625, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 25.279928980019168, |
|
"learning_rate": 2.0056445563630423e-07, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.217, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.40625, |
|
"rewards/margins": 2.046875, |
|
"rewards/rejected": -3.46875, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 31.005511813027777, |
|
"learning_rate": 1.9703487354606018e-07, |
|
"logits/chosen": -2.640625, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.2051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.25, |
|
"rewards/margins": 3.265625, |
|
"rewards/rejected": -4.53125, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 17.907410467346242, |
|
"learning_rate": 1.935163110722533e-07, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.8515625, |
|
"logps/chosen": -672.0, |
|
"logps/rejected": -528.0, |
|
"loss": 0.2019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.82421875, |
|
"rewards/margins": 2.359375, |
|
"rewards/rejected": -3.171875, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 37.56302682379733, |
|
"learning_rate": 1.900095002665459e-07, |
|
"logits/chosen": -1.375, |
|
"logits/rejected": -1.5859375, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -668.0, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4609375, |
|
"rewards/margins": 2.109375, |
|
"rewards/rejected": -3.5625, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": -1.0546875, |
|
"eval_logits/rejected": -0.9765625, |
|
"eval_logps/chosen": -568.0, |
|
"eval_logps/rejected": -668.0, |
|
"eval_loss": 0.3828948140144348, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -2.203125, |
|
"eval_rewards/margins": 1.8125, |
|
"eval_rewards/rejected": -4.03125, |
|
"eval_runtime": 49.0823, |
|
"eval_samples_per_second": 21.393, |
|
"eval_steps_per_second": 0.183, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 142.03485499088688, |
|
"learning_rate": 1.8651517073561673e-07, |
|
"logits/chosen": -1.9140625, |
|
"logits/rejected": -1.84375, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -444.0, |
|
"loss": 0.2354, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0546875, |
|
"rewards/margins": 1.6640625, |
|
"rewards/rejected": -2.71875, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 20.904872000144213, |
|
"learning_rate": 1.8303404948936285e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.3828125, |
|
"logps/chosen": -466.0, |
|
"logps/rejected": -492.0, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.98046875, |
|
"rewards/margins": 2.203125, |
|
"rewards/rejected": -3.171875, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 23.082974237096174, |
|
"learning_rate": 1.7956686078964255e-07, |
|
"logits/chosen": -1.375, |
|
"logits/rejected": -1.4375, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.2083, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.421875, |
|
"rewards/margins": 3.09375, |
|
"rewards/rejected": -4.5, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 65.20385510486626, |
|
"learning_rate": 1.7611432599958924e-07, |
|
"logits/chosen": -1.9140625, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -352.0, |
|
"logps/rejected": -392.0, |
|
"loss": 0.2083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3515625, |
|
"rewards/margins": 1.9609375, |
|
"rewards/rejected": -3.3125, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 26.23633528972487, |
|
"learning_rate": 1.726771634335293e-07, |
|
"logits/chosen": -1.4609375, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -456.0, |
|
"loss": 0.2321, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0703125, |
|
"rewards/margins": 1.6015625, |
|
"rewards/rejected": -2.671875, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 49.10798542538174, |
|
"learning_rate": 1.6925608820753325e-07, |
|
"logits/chosen": -0.83203125, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -708.0, |
|
"logps/rejected": -880.0, |
|
"loss": 0.2232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.86328125, |
|
"rewards/margins": 2.625, |
|
"rewards/rejected": -3.484375, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 52.60647313486629, |
|
"learning_rate": 1.6585181209063321e-07, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -472.0, |
|
"logps/rejected": -704.0, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5546875, |
|
"rewards/margins": 2.578125, |
|
"rewards/rejected": -4.125, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 24.213468426964006, |
|
"learning_rate": 1.6246504335673625e-07, |
|
"logits/chosen": -1.0390625, |
|
"logits/rejected": -1.4453125, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -856.0, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9375, |
|
"rewards/margins": 2.15625, |
|
"rewards/rejected": -3.09375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 47.733763616697836, |
|
"learning_rate": 1.590964866372652e-07, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.2734375, |
|
"logps/chosen": -636.0, |
|
"logps/rejected": -784.0, |
|
"loss": 0.2083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3515625, |
|
"rewards/margins": 2.828125, |
|
"rewards/rejected": -4.1875, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 37.63668160123638, |
|
"learning_rate": 1.5574684277455685e-07, |
|
"logits/chosen": -1.765625, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -464.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.22, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5234375, |
|
"rewards/margins": 1.765625, |
|
"rewards/rejected": -3.28125, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 20.84423028894674, |
|
"learning_rate": 1.5241680867604905e-07, |
|
"logits/chosen": -1.0078125, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -660.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.73046875, |
|
"rewards/margins": 2.265625, |
|
"rewards/rejected": -3.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 15.238180752697565, |
|
"learning_rate": 1.4910707716928586e-07, |
|
"logits/chosen": -1.75, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -696.0, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.828125, |
|
"rewards/margins": 3.359375, |
|
"rewards/rejected": -5.1875, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 13.583277201205796, |
|
"learning_rate": 1.4581833685777228e-07, |
|
"logits/chosen": -1.34375, |
|
"logits/rejected": -1.578125, |
|
"logps/chosen": -552.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5234375, |
|
"rewards/margins": 2.65625, |
|
"rewards/rejected": -4.1875, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 14.86440122341942, |
|
"learning_rate": 1.4255127197770707e-07, |
|
"logits/chosen": -1.4609375, |
|
"logits/rejected": -1.3828125, |
|
"logps/chosen": -434.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.65625, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -4.1875, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 15.578800057924948, |
|
"learning_rate": 1.3930656225562474e-07, |
|
"logits/chosen": -1.6640625, |
|
"logits/rejected": -1.515625, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.90625, |
|
"rewards/margins": 3.28125, |
|
"rewards/rejected": -5.1875, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 13.991553452696552, |
|
"learning_rate": 1.360848827669756e-07, |
|
"logits/chosen": -1.421875, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -524.0, |
|
"logps/rejected": -520.0, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7578125, |
|
"rewards/margins": 2.734375, |
|
"rewards/rejected": -4.5, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 21.745298822673373, |
|
"learning_rate": 1.3288690379567314e-07, |
|
"logits/chosen": -1.4140625, |
|
"logits/rejected": -1.84375, |
|
"logps/chosen": -506.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.78125, |
|
"rewards/margins": 2.171875, |
|
"rewards/rejected": -3.953125, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 15.334862616251963, |
|
"learning_rate": 1.2971329069463932e-07, |
|
"logits/chosen": -1.328125, |
|
"logits/rejected": -1.8984375, |
|
"logps/chosen": -632.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.1169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7421875, |
|
"rewards/margins": 3.03125, |
|
"rewards/rejected": -4.78125, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 36.5802518789977, |
|
"learning_rate": 1.2656470374737434e-07, |
|
"logits/chosen": -1.1875, |
|
"logits/rejected": -1.3671875, |
|
"logps/chosen": -716.0, |
|
"logps/rejected": -1024.0, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.859375, |
|
"rewards/margins": 4.25, |
|
"rewards/rejected": -6.125, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 35.20242961161644, |
|
"learning_rate": 1.2344179803058264e-07, |
|
"logits/chosen": -1.2578125, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -528.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1875, |
|
"rewards/margins": 2.25, |
|
"rewards/rejected": -4.4375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 20.682912146389263, |
|
"learning_rate": 1.203452232778807e-07, |
|
"logits/chosen": -1.4375, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -748.0, |
|
"logps/rejected": -824.0, |
|
"loss": 0.1213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.78125, |
|
"rewards/margins": 3.921875, |
|
"rewards/rejected": -5.71875, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 21.01075482943445, |
|
"learning_rate": 1.1727562374461788e-07, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -1.515625, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5859375, |
|
"rewards/margins": 3.53125, |
|
"rewards/rejected": -5.125, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 29.30233670676864, |
|
"learning_rate": 1.142336380738361e-07, |
|
"logits/chosen": -1.3203125, |
|
"logits/rejected": -1.109375, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3125, |
|
"rewards/margins": 2.90625, |
|
"rewards/rejected": -5.21875, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 16.664591107532367, |
|
"learning_rate": 1.1121989916339756e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -2.9375, |
|
"logps/chosen": -732.0, |
|
"logps/rejected": -624.0, |
|
"loss": 0.1121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.46875, |
|
"rewards/margins": 3.109375, |
|
"rewards/rejected": -5.59375, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 83.44488397290417, |
|
"learning_rate": 1.0823503403430734e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.5546875, |
|
"logps/chosen": -648.0, |
|
"logps/rejected": -508.0, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.859375, |
|
"rewards/margins": 2.90625, |
|
"rewards/rejected": -4.78125, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 15.585689114051172, |
|
"learning_rate": 1.0527966370025964e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -716.0, |
|
"logps/rejected": -692.0, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.015625, |
|
"rewards/margins": 2.875, |
|
"rewards/rejected": -4.875, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 10.765461249613185, |
|
"learning_rate": 1.0235440303843302e-07, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.59375, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -5.125, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 12.386913795936541, |
|
"learning_rate": 9.945986066156248e-08, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.8828125, |
|
"logps/chosen": -498.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5703125, |
|
"rewards/margins": 3.5625, |
|
"rewards/rejected": -5.125, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 16.61091563337375, |
|
"learning_rate": 9.659663879131503e-08, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.3125, |
|
"logps/chosen": -560.0, |
|
"logps/rejected": -528.0, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.40625, |
|
"rewards/margins": 2.71875, |
|
"rewards/rejected": -5.125, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 24.411403141380244, |
|
"learning_rate": 9.376533313299542e-08, |
|
"logits/chosen": -1.2265625, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -772.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.078125, |
|
"rewards/margins": 2.9375, |
|
"rewards/rejected": -5.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 15.505538034971874, |
|
"learning_rate": 9.096653275160641e-08, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.59375, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7109375, |
|
"rewards/margins": 2.765625, |
|
"rewards/rejected": -4.46875, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 15.710939806805685, |
|
"learning_rate": 8.820081994929207e-08, |
|
"logits/chosen": -1.7421875, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -724.0, |
|
"logps/rejected": -656.0, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4375, |
|
"rewards/margins": 2.4375, |
|
"rewards/rejected": -4.875, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 15.364495322714388, |
|
"learning_rate": 8.546877014418671e-08, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -496.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.1282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.046875, |
|
"rewards/margins": 2.484375, |
|
"rewards/rejected": -4.53125, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 13.756725707803474, |
|
"learning_rate": 8.277095175069738e-08, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -1.4609375, |
|
"logps/chosen": -532.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.390625, |
|
"rewards/margins": 2.9375, |
|
"rewards/rejected": -5.3125, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 23.80983088717624, |
|
"learning_rate": 8.010792606124228e-08, |
|
"logits/chosen": -1.0703125, |
|
"logits/rejected": -1.0546875, |
|
"logps/chosen": -672.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.890625, |
|
"rewards/margins": 3.53125, |
|
"rewards/rejected": -5.40625, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 17.774551031970322, |
|
"learning_rate": 7.748024712947204e-08, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -636.0, |
|
"logps/rejected": -652.0, |
|
"loss": 0.1291, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.28125, |
|
"rewards/margins": 2.34375, |
|
"rewards/rejected": -4.625, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 14.943619749566544, |
|
"learning_rate": 7.488846165499596e-08, |
|
"logits/chosen": -1.3984375, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -684.0, |
|
"loss": 0.1282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.734375, |
|
"rewards/margins": 3.515625, |
|
"rewards/rejected": -6.25, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 16.562697765445648, |
|
"learning_rate": 7.233310886963942e-08, |
|
"logits/chosen": -1.375, |
|
"logits/rejected": -1.3984375, |
|
"logps/chosen": -474.0, |
|
"logps/rejected": -544.0, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.28125, |
|
"rewards/margins": 2.5, |
|
"rewards/rejected": -4.78125, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 16.237370125481036, |
|
"learning_rate": 6.981472042525416e-08, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -640.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.1077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.078125, |
|
"rewards/margins": 2.890625, |
|
"rewards/rejected": -4.96875, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 12.54574310017106, |
|
"learning_rate": 6.7333820283106e-08, |
|
"logits/chosen": -0.94921875, |
|
"logits/rejected": -1.3828125, |
|
"logps/chosen": -696.0, |
|
"logps/rejected": -840.0, |
|
"loss": 0.1192, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.578125, |
|
"rewards/margins": 4.125, |
|
"rewards/rejected": -5.71875, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 17.067698975214256, |
|
"learning_rate": 6.48909246048622e-08, |
|
"logits/chosen": -1.6953125, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -490.0, |
|
"logps/rejected": -560.0, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.078125, |
|
"rewards/margins": 3.03125, |
|
"rewards/rejected": -5.09375, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 17.315279196446202, |
|
"learning_rate": 6.248654164520237e-08, |
|
"logits/chosen": -1.2890625, |
|
"logits/rejected": -1.4609375, |
|
"logps/chosen": -458.0, |
|
"logps/rejected": -426.0, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.515625, |
|
"rewards/margins": 2.09375, |
|
"rewards/rejected": -4.625, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 15.397715588828959, |
|
"learning_rate": 6.012117164607347e-08, |
|
"logits/chosen": -0.90625, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -796.0, |
|
"logps/rejected": -708.0, |
|
"loss": 0.109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5078125, |
|
"rewards/margins": 3.78125, |
|
"rewards/rejected": -5.3125, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 45.120668890615434, |
|
"learning_rate": 5.779530673261279e-08, |
|
"logits/chosen": -1.0703125, |
|
"logits/rejected": -1.8125, |
|
"logps/chosen": -612.0, |
|
"logps/rejected": -820.0, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7265625, |
|
"rewards/margins": 3.5625, |
|
"rewards/rejected": -5.3125, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 12.405391170841106, |
|
"learning_rate": 5.5509430810758817e-08, |
|
"logits/chosen": -1.0234375, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -800.0, |
|
"logps/rejected": -848.0, |
|
"loss": 0.1012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.28125, |
|
"rewards/margins": 3.375, |
|
"rewards/rejected": -5.65625, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 13.29830997717489, |
|
"learning_rate": 5.3264019466573053e-08, |
|
"logits/chosen": -1.03125, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -660.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.859375, |
|
"rewards/margins": 3.640625, |
|
"rewards/rejected": -5.5, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 15.306039857091942, |
|
"learning_rate": 5.105953986729195e-08, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -576.0, |
|
"logps/rejected": -732.0, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4375, |
|
"rewards/margins": 3.140625, |
|
"rewards/rejected": -5.5625, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 17.90216325537495, |
|
"learning_rate": 4.889645066413112e-08, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.5546875, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.203125, |
|
"rewards/margins": 2.4375, |
|
"rewards/rejected": -4.625, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 13.665308623510128, |
|
"learning_rate": 4.67752018968606e-08, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -624.0, |
|
"logps/rejected": -592.0, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1875, |
|
"rewards/margins": 3.3125, |
|
"rewards/rejected": -5.5, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 14.773793343841884, |
|
"learning_rate": 4.4696234900172744e-08, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.0546875, |
|
"logps/chosen": -540.0, |
|
"logps/rejected": -824.0, |
|
"loss": 0.1132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4375, |
|
"rewards/margins": 3.46875, |
|
"rewards/rejected": -5.90625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_logits/chosen": -1.0625, |
|
"eval_logits/rejected": -0.96484375, |
|
"eval_logps/chosen": -592.0, |
|
"eval_logps/rejected": -696.0, |
|
"eval_loss": 0.37350770831108093, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -3.3125, |
|
"eval_rewards/margins": 2.09375, |
|
"eval_rewards/rejected": -5.40625, |
|
"eval_runtime": 49.8427, |
|
"eval_samples_per_second": 21.066, |
|
"eval_steps_per_second": 0.181, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 15.779889705391266, |
|
"learning_rate": 4.265998221186023e-08, |
|
"logits/chosen": -1.2421875, |
|
"logits/rejected": -1.1640625, |
|
"logps/chosen": -592.0, |
|
"logps/rejected": -560.0, |
|
"loss": 0.1073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.546875, |
|
"rewards/margins": 3.03125, |
|
"rewards/rejected": -5.5625, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 27.097026355265392, |
|
"learning_rate": 4.0666867482825135e-08, |
|
"logits/chosen": -1.0859375, |
|
"logits/rejected": -1.0859375, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -548.0, |
|
"loss": 0.117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.953125, |
|
"rewards/margins": 3.359375, |
|
"rewards/rejected": -5.3125, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 13.442767252473281, |
|
"learning_rate": 3.871730538893611e-08, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -736.0, |
|
"logps/rejected": -740.0, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.421875, |
|
"rewards/margins": 3.328125, |
|
"rewards/rejected": -4.75, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 17.818418586870905, |
|
"learning_rate": 3.681170154475391e-08, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -442.0, |
|
"logps/rejected": -620.0, |
|
"loss": 0.1236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.90625, |
|
"rewards/margins": 2.5625, |
|
"rewards/rejected": -4.46875, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 16.487242563455915, |
|
"learning_rate": 3.495045241914105e-08, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -2.546875, |
|
"logps/chosen": -584.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.1079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.25, |
|
"rewards/margins": 4.09375, |
|
"rewards/rejected": -6.34375, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 15.802897986414916, |
|
"learning_rate": 3.313394525277527e-08, |
|
"logits/chosen": -1.4609375, |
|
"logits/rejected": -1.3125, |
|
"logps/chosen": -482.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.265625, |
|
"rewards/margins": 3.078125, |
|
"rewards/rejected": -5.34375, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 11.010895733144364, |
|
"learning_rate": 3.1362557977582e-08, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -482.0, |
|
"logps/rejected": -506.0, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.65625, |
|
"rewards/margins": 1.8515625, |
|
"rewards/rejected": -4.5, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 19.922568800825218, |
|
"learning_rate": 2.963665913810451e-08, |
|
"logits/chosen": -1.0078125, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -712.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.1016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.34375, |
|
"rewards/margins": 2.796875, |
|
"rewards/rejected": -5.125, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 14.614675732714645, |
|
"learning_rate": 2.7956607814826366e-08, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.078125, |
|
"logps/chosen": -732.0, |
|
"logps/rejected": -712.0, |
|
"loss": 0.1244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.15625, |
|
"rewards/margins": 3.171875, |
|
"rewards/rejected": -5.34375, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 13.459903874571827, |
|
"learning_rate": 2.632275354946342e-08, |
|
"logits/chosen": -0.9375, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -386.0, |
|
"loss": 0.1195, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8359375, |
|
"rewards/margins": 2.0625, |
|
"rewards/rejected": -3.90625, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 11.287103509304053, |
|
"learning_rate": 2.4735436272239922e-08, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -2.484375, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -612.0, |
|
"loss": 0.096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.03125, |
|
"rewards/margins": 3.71875, |
|
"rewards/rejected": -5.75, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 9.246071212037243, |
|
"learning_rate": 2.319498623116492e-08, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -1.578125, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -796.0, |
|
"loss": 0.1098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.125, |
|
"rewards/margins": 4.15625, |
|
"rewards/rejected": -7.28125, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 25.87859572275051, |
|
"learning_rate": 2.1701723923322673e-08, |
|
"logits/chosen": -1.7265625, |
|
"logits/rejected": -1.8984375, |
|
"logps/chosen": -516.0, |
|
"logps/rejected": -644.0, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.484375, |
|
"rewards/margins": 3.4375, |
|
"rewards/rejected": -5.9375, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 22.24550487415097, |
|
"learning_rate": 2.0255960028191798e-08, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.703125, |
|
"logps/chosen": -502.0, |
|
"logps/rejected": -552.0, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.375, |
|
"rewards/margins": 3.0625, |
|
"rewards/rejected": -5.4375, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 22.983007966397018, |
|
"learning_rate": 1.8857995343007167e-08, |
|
"logits/chosen": -1.8125, |
|
"logits/rejected": -1.3828125, |
|
"logps/chosen": -728.0, |
|
"logps/rejected": -900.0, |
|
"loss": 0.1094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.96875, |
|
"rewards/margins": 3.96875, |
|
"rewards/rejected": -5.9375, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 25.097623669642626, |
|
"learning_rate": 1.7508120720177795e-08, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.03125, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.78125, |
|
"rewards/margins": 2.078125, |
|
"rewards/rejected": -4.875, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 32.20516783572916, |
|
"learning_rate": 1.6206617006773753e-08, |
|
"logits/chosen": -0.78515625, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -736.0, |
|
"logps/rejected": -556.0, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5078125, |
|
"rewards/margins": 2.828125, |
|
"rewards/rejected": -4.34375, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 11.93984865185982, |
|
"learning_rate": 1.4953754986094886e-08, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.6328125, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -580.0, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7109375, |
|
"rewards/margins": 2.640625, |
|
"rewards/rejected": -4.34375, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 30.015474257847476, |
|
"learning_rate": 1.3749795321332885e-08, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -664.0, |
|
"logps/rejected": -804.0, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.96875, |
|
"rewards/margins": 3.421875, |
|
"rewards/rejected": -5.40625, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 20.828265625017977, |
|
"learning_rate": 1.2594988501339665e-08, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -628.0, |
|
"logps/rejected": -684.0, |
|
"loss": 0.1094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.71875, |
|
"rewards/margins": 3.109375, |
|
"rewards/rejected": -5.8125, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 22.34873903931498, |
|
"learning_rate": 1.148957478851173e-08, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.375, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.1136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.328125, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -4.84375, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 15.346930767785905, |
|
"learning_rate": 1.0433784168802805e-08, |
|
"logits/chosen": -1.3125, |
|
"logits/rejected": -1.578125, |
|
"logps/chosen": -624.0, |
|
"logps/rejected": -820.0, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.03125, |
|
"rewards/margins": 3.015625, |
|
"rewards/rejected": -6.0625, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 16.847862725204944, |
|
"learning_rate": 9.427836303874115e-09, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -568.0, |
|
"logps/rejected": -648.0, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.921875, |
|
"rewards/margins": 4.0, |
|
"rewards/rejected": -6.9375, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 11.648381674685714, |
|
"learning_rate": 8.47194048539307e-09, |
|
"logits/chosen": -1.015625, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -880.0, |
|
"logps/rejected": -708.0, |
|
"loss": 0.114, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.515625, |
|
"rewards/margins": 3.515625, |
|
"rewards/rejected": -6.03125, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 18.35912272257459, |
|
"learning_rate": 7.566295591489052e-09, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -1.421875, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9140625, |
|
"rewards/margins": 3.015625, |
|
"rewards/rejected": -4.9375, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 13.681536454598728, |
|
"learning_rate": 6.71109004537615e-09, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -604.0, |
|
"logps/rejected": -664.0, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.46875, |
|
"rewards/margins": 3.0625, |
|
"rewards/rejected": -5.53125, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 9.950566904078942, |
|
"learning_rate": 5.906501776150763e-09, |
|
"logits/chosen": -1.0, |
|
"logits/rejected": -2.921875, |
|
"logps/chosen": -712.0, |
|
"logps/rejected": -600.0, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8515625, |
|
"rewards/margins": 3.109375, |
|
"rewards/rejected": -4.96875, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 22.085690818869246, |
|
"learning_rate": 5.152698181772857e-09, |
|
"logits/chosen": -1.140625, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -572.0, |
|
"logps/rejected": -760.0, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.125, |
|
"rewards/margins": 2.71875, |
|
"rewards/rejected": -4.84375, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 21.353543333604414, |
|
"learning_rate": 4.449836094238019e-09, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.890625, |
|
"logps/chosen": -620.0, |
|
"logps/rejected": -486.0, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9453125, |
|
"rewards/margins": 3.21875, |
|
"rewards/rejected": -5.15625, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 11.392655664834834, |
|
"learning_rate": 3.798061746947995e-09, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -500.0, |
|
"logps/rejected": -568.0, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.40625, |
|
"rewards/margins": 2.078125, |
|
"rewards/rejected": -4.5, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 16.899400188846528, |
|
"learning_rate": 3.1975107442860637e-09, |
|
"logits/chosen": -1.953125, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -928.0, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.453125, |
|
"rewards/margins": 4.59375, |
|
"rewards/rejected": -7.0625, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 18.0993042603957, |
|
"learning_rate": 2.6483080334041287e-09, |
|
"logits/chosen": -1.328125, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -652.0, |
|
"logps/rejected": -816.0, |
|
"loss": 0.0927, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.078125, |
|
"rewards/margins": 3.625, |
|
"rewards/rejected": -5.6875, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 18.326061508758997, |
|
"learning_rate": 2.1505678782269e-09, |
|
"logits/chosen": -1.3125, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -572.0, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.125, |
|
"rewards/margins": 3.1875, |
|
"rewards/rejected": -5.3125, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 20.00896083752618, |
|
"learning_rate": 1.7043938356787467e-09, |
|
"logits/chosen": -1.4921875, |
|
"logits/rejected": -1.3125, |
|
"logps/chosen": -354.0, |
|
"logps/rejected": -556.0, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.109375, |
|
"rewards/margins": 2.765625, |
|
"rewards/rejected": -4.875, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 33.844584510123674, |
|
"learning_rate": 1.30987873413832e-09, |
|
"logits/chosen": -0.94140625, |
|
"logits/rejected": -1.109375, |
|
"logps/chosen": -588.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.375, |
|
"rewards/margins": 2.859375, |
|
"rewards/rejected": -5.21875, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 13.322271542703083, |
|
"learning_rate": 9.671046541251393e-10, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -672.0, |
|
"logps/rejected": -576.0, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7890625, |
|
"rewards/margins": 3.765625, |
|
"rewards/rejected": -5.5625, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 28.085054792653725, |
|
"learning_rate": 6.761429112225326e-10, |
|
"logits/chosen": -1.1484375, |
|
"logits/rejected": -0.7734375, |
|
"logps/chosen": -688.0, |
|
"logps/rejected": -908.0, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8203125, |
|
"rewards/margins": 3.71875, |
|
"rewards/rejected": -5.53125, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 20.97872609812394, |
|
"learning_rate": 4.370540412399759e-10, |
|
"logits/chosen": -1.859375, |
|
"logits/rejected": -1.8125, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -684.0, |
|
"loss": 0.1135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.328125, |
|
"rewards/margins": 3.609375, |
|
"rewards/rejected": -5.9375, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 19.210485937449167, |
|
"learning_rate": 2.498877876184191e-10, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -1.046875, |
|
"logps/chosen": -688.0, |
|
"logps/rejected": -664.0, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6015625, |
|
"rewards/margins": 3.125, |
|
"rewards/rejected": -4.71875, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 12.682964834422256, |
|
"learning_rate": 1.1468309108100816e-10, |
|
"logits/chosen": -1.1171875, |
|
"logits/rejected": -1.3125, |
|
"logps/chosen": -456.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.1049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0, |
|
"rewards/margins": 3.109375, |
|
"rewards/rejected": -6.125, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 16.080027717160323, |
|
"learning_rate": 3.146808153123293e-11, |
|
"logits/chosen": -1.234375, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -492.0, |
|
"logps/rejected": -532.0, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.109375, |
|
"rewards/margins": 2.046875, |
|
"rewards/rejected": -4.15625, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 13.95546245482141, |
|
"learning_rate": 2.60072200469752e-13, |
|
"logits/chosen": -1.2265625, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -624.0, |
|
"logps/rejected": -482.0, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.34375, |
|
"rewards/margins": 2.84375, |
|
"rewards/rejected": -5.1875, |
|
"step": 2420 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2421, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|