|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 625, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.936507936507937e-08, |
|
"logits/chosen": -0.2010916769504547, |
|
"logits/rejected": 0.09005054831504822, |
|
"logps/chosen": -540.942626953125, |
|
"logps/rejected": -796.8775634765625, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.936507936507937e-07, |
|
"logits/chosen": -0.15247675776481628, |
|
"logits/rejected": -0.14707261323928833, |
|
"logps/chosen": -501.8849792480469, |
|
"logps/rejected": -774.5216064453125, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 3.564198050298728e-05, |
|
"rewards/margins": -2.1308711438905448e-05, |
|
"rewards/rejected": 5.695069921785034e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"logits/chosen": -0.1420287936925888, |
|
"logits/rejected": -0.10379602760076523, |
|
"logps/chosen": -489.9579162597656, |
|
"logps/rejected": -771.2081298828125, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0012544477358460426, |
|
"rewards/margins": 0.0017321283230558038, |
|
"rewards/rejected": -0.0029865759424865246, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": -0.16283434629440308, |
|
"logits/rejected": -0.14502206444740295, |
|
"logps/chosen": -532.1380615234375, |
|
"logps/rejected": -800.226806640625, |
|
"loss": 0.1996, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.007352855056524277, |
|
"rewards/margins": 0.009955727495253086, |
|
"rewards/rejected": -0.017308581620454788, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"logits/chosen": -0.19371934235095978, |
|
"logits/rejected": -0.15917012095451355, |
|
"logps/chosen": -529.4437255859375, |
|
"logps/rejected": -794.9968872070312, |
|
"loss": 0.1897, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.028293650597333908, |
|
"rewards/margins": 0.02773173525929451, |
|
"rewards/rejected": -0.05602538585662842, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.968253968253968e-06, |
|
"logits/chosen": -0.16935278475284576, |
|
"logits/rejected": -0.12196620553731918, |
|
"logps/chosen": -556.66064453125, |
|
"logps/rejected": -913.4127197265625, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.06831763684749603, |
|
"rewards/margins": 0.09133367240428925, |
|
"rewards/rejected": -0.15965132415294647, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": -0.18819646537303925, |
|
"logits/rejected": -0.21151557564735413, |
|
"logps/chosen": -641.8372802734375, |
|
"logps/rejected": -1064.3094482421875, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14167475700378418, |
|
"rewards/margins": 0.16748657822608948, |
|
"rewards/rejected": -0.30916133522987366, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998086282661188e-06, |
|
"logits/chosen": -0.19726331532001495, |
|
"logits/rejected": -0.2387177050113678, |
|
"logps/chosen": -630.7346801757812, |
|
"logps/rejected": -1035.373046875, |
|
"loss": 0.1511, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.13452503085136414, |
|
"rewards/margins": 0.14926694333553314, |
|
"rewards/rejected": -0.2837919592857361, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988720025682995e-06, |
|
"logits/chosen": -0.23227183520793915, |
|
"logits/rejected": -0.15486109256744385, |
|
"logps/chosen": -612.7208862304688, |
|
"logps/rejected": -1018.8267822265625, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.11648458242416382, |
|
"rewards/margins": 0.137832373380661, |
|
"rewards/rejected": -0.25431695580482483, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9715789537359126e-06, |
|
"logits/chosen": -0.269733726978302, |
|
"logits/rejected": -0.2005140334367752, |
|
"logps/chosen": -666.8603515625, |
|
"logps/rejected": -959.771484375, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1299312561750412, |
|
"rewards/margins": 0.12538622319698334, |
|
"rewards/rejected": -0.25531744956970215, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946716615897932e-06, |
|
"logits/chosen": -0.21513569355010986, |
|
"logits/rejected": -0.22341570258140564, |
|
"logps/chosen": -636.3090209960938, |
|
"logps/rejected": -982.4715576171875, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1491270661354065, |
|
"rewards/margins": 0.12507781386375427, |
|
"rewards/rejected": -0.27420490980148315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9142106826480114e-06, |
|
"logits/chosen": -0.26347213983535767, |
|
"logits/rejected": -0.1899401843547821, |
|
"logps/chosen": -682.718017578125, |
|
"logps/rejected": -1051.3671875, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1585748940706253, |
|
"rewards/margins": 0.13942097127437592, |
|
"rewards/rejected": -0.2979958653450012, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.874162703221823e-06, |
|
"logits/chosen": -0.28930234909057617, |
|
"logits/rejected": -0.21862976253032684, |
|
"logps/chosen": -624.7803344726562, |
|
"logps/rejected": -1044.067626953125, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1492077112197876, |
|
"rewards/margins": 0.15546968579292297, |
|
"rewards/rejected": -0.3046773374080658, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.826697788369752e-06, |
|
"logits/chosen": -0.2196696251630783, |
|
"logits/rejected": -0.22875969111919403, |
|
"logps/chosen": -621.0065307617188, |
|
"logps/rejected": -944.654296875, |
|
"loss": 0.1432, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15334677696228027, |
|
"rewards/margins": 0.11417926847934723, |
|
"rewards/rejected": -0.2675260007381439, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7719642195082224e-06, |
|
"logits/chosen": -0.23348715901374817, |
|
"logits/rejected": -0.21704678237438202, |
|
"logps/chosen": -599.64453125, |
|
"logps/rejected": -991.4964599609375, |
|
"loss": 0.1471, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13741596043109894, |
|
"rewards/margins": 0.13189604878425598, |
|
"rewards/rejected": -0.2693119943141937, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.710132985485355e-06, |
|
"logits/chosen": -0.2200555056333542, |
|
"logits/rejected": -0.20334219932556152, |
|
"logps/chosen": -661.56884765625, |
|
"logps/rejected": -1054.980712890625, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16635623574256897, |
|
"rewards/margins": 0.14565840363502502, |
|
"rewards/rejected": -0.312014639377594, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.641397248408122e-06, |
|
"logits/chosen": -0.20752374827861786, |
|
"logits/rejected": -0.24948814511299133, |
|
"logps/chosen": -719.3741455078125, |
|
"logps/rejected": -1020.0384521484375, |
|
"loss": 0.1338, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1749049723148346, |
|
"rewards/margins": 0.13863129913806915, |
|
"rewards/rejected": -0.31353622674942017, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5659717401997655e-06, |
|
"logits/chosen": -0.2512062191963196, |
|
"logits/rejected": -0.25224044919013977, |
|
"logps/chosen": -652.2489624023438, |
|
"logps/rejected": -1064.6927490234375, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.16943642497062683, |
|
"rewards/margins": 0.14737632870674133, |
|
"rewards/rejected": -0.3168127238750458, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4840920917726425e-06, |
|
"logits/chosen": -0.26564353704452515, |
|
"logits/rejected": -0.31829017400741577, |
|
"logps/chosen": -643.8062744140625, |
|
"logps/rejected": -1146.453857421875, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1721421331167221, |
|
"rewards/margins": 0.17220233380794525, |
|
"rewards/rejected": -0.34434446692466736, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396014096912182e-06, |
|
"logits/chosen": -0.30384334921836853, |
|
"logits/rejected": -0.3049886226654053, |
|
"logps/chosen": -700.3992919921875, |
|
"logps/rejected": -1094.860107421875, |
|
"loss": 0.1293, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.190101757645607, |
|
"rewards/margins": 0.16493478417396545, |
|
"rewards/rejected": -0.35503652691841125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302012913171584e-06, |
|
"logits/chosen": -0.22822122275829315, |
|
"logits/rejected": -0.21796974539756775, |
|
"logps/chosen": -703.4880981445312, |
|
"logps/rejected": -1098.6363525390625, |
|
"loss": 0.1291, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18932084739208221, |
|
"rewards/margins": 0.18700966238975525, |
|
"rewards/rejected": -0.37633052468299866, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.202382202273702e-06, |
|
"logits/chosen": -0.2829930782318115, |
|
"logits/rejected": -0.2941269278526306, |
|
"logps/chosen": -712.0001220703125, |
|
"logps/rejected": -1121.9896240234375, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.19748489558696747, |
|
"rewards/margins": 0.1557844579219818, |
|
"rewards/rejected": -0.3532693684101105, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.097433212705492e-06, |
|
"logits/chosen": -0.2563570737838745, |
|
"logits/rejected": -0.20130082964897156, |
|
"logps/chosen": -719.8380126953125, |
|
"logps/rejected": -1181.190185546875, |
|
"loss": 0.1349, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.2003975212574005, |
|
"rewards/margins": 0.1832597851753235, |
|
"rewards/rejected": -0.3836573362350464, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.987493807371033e-06, |
|
"logits/chosen": -0.19761434197425842, |
|
"logits/rejected": -0.24285447597503662, |
|
"logps/chosen": -713.5037841796875, |
|
"logps/rejected": -1127.9371337890625, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.20124192535877228, |
|
"rewards/margins": 0.16942095756530762, |
|
"rewards/rejected": -0.3706628680229187, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.872907439340758e-06, |
|
"logits/chosen": -0.22070392966270447, |
|
"logits/rejected": -0.2395116537809372, |
|
"logps/chosen": -689.1747436523438, |
|
"logps/rejected": -1246.12646484375, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.18724720180034637, |
|
"rewards/margins": 0.221848726272583, |
|
"rewards/rejected": -0.4090959429740906, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.75403207889666e-06, |
|
"logits/chosen": -0.2952207028865814, |
|
"logits/rejected": -0.286382257938385, |
|
"logps/chosen": -680.5881958007812, |
|
"logps/rejected": -1075.620849609375, |
|
"loss": 0.1351, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.20417407155036926, |
|
"rewards/margins": 0.16537046432495117, |
|
"rewards/rejected": -0.36954453587532043, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.631239095225417e-06, |
|
"logits/chosen": -0.28091198205947876, |
|
"logits/rejected": -0.25016146898269653, |
|
"logps/chosen": -717.04150390625, |
|
"logps/rejected": -1180.3306884765625, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2006029188632965, |
|
"rewards/margins": 0.1787930279970169, |
|
"rewards/rejected": -0.3793959617614746, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5049120962530608e-06, |
|
"logits/chosen": -0.23318138718605042, |
|
"logits/rejected": -0.1964143067598343, |
|
"logps/chosen": -722.5912475585938, |
|
"logps/rejected": -1105.978759765625, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.20104451477527618, |
|
"rewards/margins": 0.16880543529987335, |
|
"rewards/rejected": -0.36984992027282715, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3754457302455464e-06, |
|
"logits/chosen": -0.21553221344947815, |
|
"logits/rejected": -0.2545991837978363, |
|
"logps/chosen": -708.6617431640625, |
|
"logps/rejected": -1200.54248046875, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1849481165409088, |
|
"rewards/margins": 0.22220449149608612, |
|
"rewards/rejected": -0.40715259313583374, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2432444529190714e-06, |
|
"logits/chosen": -0.2878536283969879, |
|
"logits/rejected": -0.240807443857193, |
|
"logps/chosen": -685.4102783203125, |
|
"logps/rejected": -1073.6112060546875, |
|
"loss": 0.1278, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.19928036630153656, |
|
"rewards/margins": 0.167768195271492, |
|
"rewards/rejected": -0.36704859137535095, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1087212639117057e-06, |
|
"logits/chosen": -0.22543080151081085, |
|
"logits/rejected": -0.25053372979164124, |
|
"logps/chosen": -713.0574340820312, |
|
"logps/rejected": -1165.50244140625, |
|
"loss": 0.1199, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20086824893951416, |
|
"rewards/margins": 0.19784381985664368, |
|
"rewards/rejected": -0.39871203899383545, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9722964165636263e-06, |
|
"logits/chosen": -0.2477823793888092, |
|
"logits/rejected": -0.23944005370140076, |
|
"logps/chosen": -741.0659790039062, |
|
"logps/rejected": -1150.4737548828125, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21726730465888977, |
|
"rewards/margins": 0.18077899515628815, |
|
"rewards/rejected": -0.3980463147163391, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8343961050366275e-06, |
|
"logits/chosen": -0.206426739692688, |
|
"logits/rejected": -0.294622004032135, |
|
"logps/chosen": -698.1879272460938, |
|
"logps/rejected": -1074.371337890625, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1868043690919876, |
|
"rewards/margins": 0.1699267327785492, |
|
"rewards/rejected": -0.3567310869693756, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.695451132874385e-06, |
|
"logits/chosen": -0.2724359631538391, |
|
"logits/rejected": -0.21938621997833252, |
|
"logps/chosen": -708.3114013671875, |
|
"logps/rejected": -1099.7318115234375, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.18547296524047852, |
|
"rewards/margins": 0.1704384684562683, |
|
"rewards/rejected": -0.3559114336967468, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5558955671628964e-06, |
|
"logits/chosen": -0.25434714555740356, |
|
"logits/rejected": -0.25090768933296204, |
|
"logps/chosen": -695.5904541015625, |
|
"logps/rejected": -1176.5301513671875, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.21099016070365906, |
|
"rewards/margins": 0.20053663849830627, |
|
"rewards/rejected": -0.4115268290042877, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4161653824955654e-06, |
|
"logits/chosen": -0.28552037477493286, |
|
"logits/rejected": -0.24306419491767883, |
|
"logps/chosen": -764.7316284179688, |
|
"logps/rejected": -1231.4193115234375, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.24171674251556396, |
|
"rewards/margins": 0.21239931881427765, |
|
"rewards/rejected": -0.4541160464286804, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2766970989791697e-06, |
|
"logits/chosen": -0.25456491112709045, |
|
"logits/rejected": -0.21178500354290009, |
|
"logps/chosen": -765.3328247070312, |
|
"logps/rejected": -1261.208984375, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22941815853118896, |
|
"rewards/margins": 0.21526849269866943, |
|
"rewards/rejected": -0.4446867108345032, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1379264185356545e-06, |
|
"logits/chosen": -0.2724260687828064, |
|
"logits/rejected": -0.18538828194141388, |
|
"logps/chosen": -741.7236328125, |
|
"logps/rejected": -1078.375732421875, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.21514591574668884, |
|
"rewards/margins": 0.16624660789966583, |
|
"rewards/rejected": -0.38139253854751587, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.000286863759934e-06, |
|
"logits/chosen": -0.27519670128822327, |
|
"logits/rejected": -0.2832408845424652, |
|
"logps/chosen": -682.8367309570312, |
|
"logps/rejected": -1131.5003662109375, |
|
"loss": 0.1198, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.20102819800376892, |
|
"rewards/margins": 0.18376831710338593, |
|
"rewards/rejected": -0.38479650020599365, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8642084235859764e-06, |
|
"logits/chosen": -0.2652140259742737, |
|
"logits/rejected": -0.2889960706233978, |
|
"logps/chosen": -731.1627807617188, |
|
"logps/rejected": -1137.467041015625, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19975660741329193, |
|
"rewards/margins": 0.21678335964679718, |
|
"rewards/rejected": -0.4165399968624115, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7301162099921013e-06, |
|
"logits/chosen": -0.26221686601638794, |
|
"logits/rejected": -0.2641231119632721, |
|
"logps/chosen": -755.5460205078125, |
|
"logps/rejected": -1093.4853515625, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.21798157691955566, |
|
"rewards/margins": 0.17120864987373352, |
|
"rewards/rejected": -0.3891902267932892, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5984291299420117e-06, |
|
"logits/chosen": -0.2737795114517212, |
|
"logits/rejected": -0.26781201362609863, |
|
"logps/chosen": -703.7815551757812, |
|
"logps/rejected": -1142.2947998046875, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2116294652223587, |
|
"rewards/margins": 0.18610379099845886, |
|
"rewards/rejected": -0.39773327112197876, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4695585767104092e-06, |
|
"logits/chosen": -0.29544955492019653, |
|
"logits/rejected": -0.2773270308971405, |
|
"logps/chosen": -686.2037353515625, |
|
"logps/rejected": -1173.82958984375, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.22286900877952576, |
|
"rewards/margins": 0.183340385556221, |
|
"rewards/rejected": -0.40620937943458557, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3439071446815452e-06, |
|
"logits/chosen": -0.2415129840373993, |
|
"logits/rejected": -0.27339568734169006, |
|
"logps/chosen": -718.8626708984375, |
|
"logps/rejected": -1240.2945556640625, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.21683505177497864, |
|
"rewards/margins": 0.2326025515794754, |
|
"rewards/rejected": -0.44943755865097046, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2218673716356919e-06, |
|
"logits/chosen": -0.27473658323287964, |
|
"logits/rejected": -0.2448565512895584, |
|
"logps/chosen": -734.1240844726562, |
|
"logps/rejected": -1157.1583251953125, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.22244882583618164, |
|
"rewards/margins": 0.20606684684753418, |
|
"rewards/rejected": -0.4285156726837158, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.103820512452661e-06, |
|
"logits/chosen": -0.24782344698905945, |
|
"logits/rejected": -0.26655644178390503, |
|
"logps/chosen": -712.7034912109375, |
|
"logps/rejected": -1142.8284912109375, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.21107585728168488, |
|
"rewards/margins": 0.19358885288238525, |
|
"rewards/rejected": -0.40466469526290894, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.901353480633468e-07, |
|
"logits/chosen": -0.2719441056251526, |
|
"logits/rejected": -0.26369568705558777, |
|
"logps/chosen": -686.2820434570312, |
|
"logps/rejected": -1074.0950927734375, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.21289560198783875, |
|
"rewards/margins": 0.17246314883232117, |
|
"rewards/rejected": -0.3853587806224823, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.811670333701544e-07, |
|
"logits/chosen": -0.2672122120857239, |
|
"logits/rejected": -0.2347395420074463, |
|
"logps/chosen": -752.274169921875, |
|
"logps/rejected": -1232.9998779296875, |
|
"loss": 0.1098, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.2272864133119583, |
|
"rewards/margins": 0.21149499714374542, |
|
"rewards/rejected": -0.4387814402580261, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.772559877354341e-07, |
|
"logits/chosen": -0.24777153134346008, |
|
"logits/rejected": -0.2399456799030304, |
|
"logps/chosen": -736.2609252929688, |
|
"logps/rejected": -1130.094970703125, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.22519740462303162, |
|
"rewards/margins": 0.17224054038524628, |
|
"rewards/rejected": -0.3974379599094391, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.787268315040604e-07, |
|
"logits/chosen": -0.2684328854084015, |
|
"logits/rejected": -0.24628722667694092, |
|
"logps/chosen": -689.9044799804688, |
|
"logps/rejected": -1155.0946044921875, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21575360000133514, |
|
"rewards/margins": 0.1959463357925415, |
|
"rewards/rejected": -0.41169995069503784, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.858873718824829e-07, |
|
"logits/chosen": -0.26447051763534546, |
|
"logits/rejected": -0.26331770420074463, |
|
"logps/chosen": -707.0438842773438, |
|
"logps/rejected": -1152.1182861328125, |
|
"loss": 0.1172, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.20341674983501434, |
|
"rewards/margins": 0.19374600052833557, |
|
"rewards/rejected": -0.3971627652645111, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.990276413423817e-07, |
|
"logits/chosen": -0.23977680504322052, |
|
"logits/rejected": -0.22561779618263245, |
|
"logps/chosen": -729.1976318359375, |
|
"logps/rejected": -1142.5386962890625, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.21028919517993927, |
|
"rewards/margins": 0.1970864236354828, |
|
"rewards/rejected": -0.40737563371658325, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.184189915529796e-07, |
|
"logits/chosen": -0.26442182064056396, |
|
"logits/rejected": -0.25705209374427795, |
|
"logps/chosen": -737.6463623046875, |
|
"logps/rejected": -1247.0609130859375, |
|
"loss": 0.1102, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2177223414182663, |
|
"rewards/margins": 0.20199593901634216, |
|
"rewards/rejected": -0.41971832513809204, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4431324567258176e-07, |
|
"logits/chosen": -0.26106202602386475, |
|
"logits/rejected": -0.2545424997806549, |
|
"logps/chosen": -700.1468505859375, |
|
"logps/rejected": -1189.3382568359375, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20997758209705353, |
|
"rewards/margins": 0.2070033997297287, |
|
"rewards/rejected": -0.4169809818267822, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.769419116476052e-07, |
|
"logits/chosen": -0.2785791754722595, |
|
"logits/rejected": -0.26757797598838806, |
|
"logps/chosen": -753.193115234375, |
|
"logps/rejected": -1155.57763671875, |
|
"loss": 0.1094, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.21952767670154572, |
|
"rewards/margins": 0.1966555118560791, |
|
"rewards/rejected": -0.4161831736564636, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1651545897676512e-07, |
|
"logits/chosen": -0.21319365501403809, |
|
"logits/rejected": -0.20390382409095764, |
|
"logps/chosen": -736.0490112304688, |
|
"logps/rejected": -1217.077880859375, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21741120517253876, |
|
"rewards/margins": 0.21254892647266388, |
|
"rewards/rejected": -0.42996007204055786, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6322266119983222e-07, |
|
"logits/chosen": -0.24104240536689758, |
|
"logits/rejected": -0.2172488421201706, |
|
"logps/chosen": -765.8303833007812, |
|
"logps/rejected": -1205.9599609375, |
|
"loss": 0.11, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.21993395686149597, |
|
"rewards/margins": 0.19853533804416656, |
|
"rewards/rejected": -0.41846928000450134, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1723000616502167e-07, |
|
"logits/chosen": -0.26349014043807983, |
|
"logits/rejected": -0.3146611452102661, |
|
"logps/chosen": -755.0218505859375, |
|
"logps/rejected": -1200.8770751953125, |
|
"loss": 0.1129, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.21991780400276184, |
|
"rewards/margins": 0.22101625800132751, |
|
"rewards/rejected": -0.44093409180641174, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.868117591737585e-08, |
|
"logits/chosen": -0.20703573524951935, |
|
"logits/rejected": -0.21116182208061218, |
|
"logps/chosen": -741.433837890625, |
|
"logps/rejected": -1168.3992919921875, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.2117561548948288, |
|
"rewards/margins": 0.19863612949848175, |
|
"rewards/rejected": -0.41039222478866577, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.769659783295383e-08, |
|
"logits/chosen": -0.23616275191307068, |
|
"logits/rejected": -0.2213321179151535, |
|
"logps/chosen": -802.316650390625, |
|
"logps/rejected": -1224.220458984375, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.2234838753938675, |
|
"rewards/margins": 0.21354734897613525, |
|
"rewards/rejected": -0.43703120946884155, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4373068401120358e-08, |
|
"logits/chosen": -0.25830012559890747, |
|
"logits/rejected": -0.24621865153312683, |
|
"logps/chosen": -802.4405517578125, |
|
"logps/rejected": -1169.389892578125, |
|
"loss": 0.1136, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23679308593273163, |
|
"rewards/margins": 0.1784859597682953, |
|
"rewards/rejected": -0.4152790606021881, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.78345083022425e-09, |
|
"logits/chosen": -0.24250511825084686, |
|
"logits/rejected": -0.24350687861442566, |
|
"logps/chosen": -742.060546875, |
|
"logps/rejected": -1132.1630859375, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.2040189504623413, |
|
"rewards/margins": 0.19322746992111206, |
|
"rewards/rejected": -0.39724642038345337, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.764474213677654e-10, |
|
"logits/chosen": -0.2109394073486328, |
|
"logits/rejected": -0.22563035786151886, |
|
"logps/chosen": -744.6256103515625, |
|
"logps/rejected": -1185.656982421875, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23164710402488708, |
|
"rewards/margins": 0.20466098189353943, |
|
"rewards/rejected": -0.4363080859184265, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 625, |
|
"total_flos": 0.0, |
|
"train_loss": 0.13047290132045747, |
|
"train_runtime": 8392.5195, |
|
"train_samples_per_second": 3.575, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 625, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|