|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": -0.8713370561599731, |
|
"logits/rejected": -0.8000868558883667, |
|
"logps/chosen": -71.98661804199219, |
|
"logps/rejected": -66.86463928222656, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -1.0018833875656128, |
|
"logits/rejected": -0.9745380878448486, |
|
"logps/chosen": -17.541515350341797, |
|
"logps/rejected": -15.201577186584473, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.013888888992369175, |
|
"rewards/chosen": -2.488666950739571e-06, |
|
"rewards/margins": -2.0448896975722164e-05, |
|
"rewards/rejected": 1.7960232071345672e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -1.0368674993515015, |
|
"logits/rejected": -1.0124459266662598, |
|
"logps/chosen": -26.138195037841797, |
|
"logps/rejected": -24.956317901611328, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.04374999925494194, |
|
"rewards/chosen": -4.275892479199683e-06, |
|
"rewards/margins": 7.450838893419132e-05, |
|
"rewards/rejected": -7.878428004914895e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -1.0422089099884033, |
|
"logits/rejected": -1.0167020559310913, |
|
"logps/chosen": -16.177021026611328, |
|
"logps/rejected": -12.827156066894531, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.0062500000931322575, |
|
"rewards/chosen": 1.4253237168304622e-05, |
|
"rewards/margins": -4.276504478184506e-05, |
|
"rewards/rejected": 5.7018281950149685e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -1.0433483123779297, |
|
"logits/rejected": -1.0170705318450928, |
|
"logps/chosen": -14.03108024597168, |
|
"logps/rejected": -15.067425727844238, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.012500000186264515, |
|
"rewards/chosen": 6.6591557697393e-05, |
|
"rewards/margins": -1.4248180377762765e-05, |
|
"rewards/rejected": 8.083973807515576e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-06, |
|
"logits/chosen": -1.025377631187439, |
|
"logits/rejected": -0.9797303080558777, |
|
"logps/chosen": -8.638357162475586, |
|
"logps/rejected": -6.794472694396973, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.0062500000931322575, |
|
"rewards/chosen": 2.7260210117674433e-05, |
|
"rewards/margins": 9.803772627492435e-06, |
|
"rewards/rejected": 1.7456437490181997e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -1.0003960132598877, |
|
"logits/rejected": -0.97206050157547, |
|
"logps/chosen": -9.381010055541992, |
|
"logps/rejected": -9.556936264038086, |
|
"loss": 0.152, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 2.744483936112374e-06, |
|
"rewards/margins": -6.780033436371014e-05, |
|
"rewards/rejected": 7.054481829982251e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7761938666470405e-06, |
|
"logits/chosen": -1.1042237281799316, |
|
"logits/rejected": -1.0950109958648682, |
|
"logps/chosen": -18.2097225189209, |
|
"logps/rejected": -16.501184463500977, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.012500000186264515, |
|
"rewards/chosen": 6.527634104713798e-05, |
|
"rewards/margins": -3.1286239391192794e-05, |
|
"rewards/rejected": 9.656258771428838e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -1.0520336627960205, |
|
"logits/rejected": -1.0432665348052979, |
|
"logps/chosen": -1.5275797843933105, |
|
"logps/rejected": -1.3896734714508057, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -1.1070919754274655e-05, |
|
"rewards/margins": -1.675033900028211e-06, |
|
"rewards/rejected": -9.39588608162012e-06, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4890613722044526e-06, |
|
"logits/chosen": -0.9697812795639038, |
|
"logits/rejected": -0.9518159627914429, |
|
"logps/chosen": -14.255193710327148, |
|
"logps/rejected": -14.01024055480957, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": 0.00017183150339405984, |
|
"rewards/margins": 3.2527732400922105e-05, |
|
"rewards/rejected": 0.00013930378190707415, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -1.0500797033309937, |
|
"logits/rejected": -1.0140620470046997, |
|
"logps/chosen": -9.409942626953125, |
|
"logps/rejected": -6.755279541015625, |
|
"loss": 0.1204, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 1.4868164726067334e-05, |
|
"rewards/margins": -4.348888614913449e-05, |
|
"rewards/rejected": 5.835705087520182e-05, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -0.8371561169624329, |
|
"eval_logits/rejected": -0.7447798252105713, |
|
"eval_logps/chosen": -396.1279296875, |
|
"eval_logps/rejected": -367.0281982421875, |
|
"eval_loss": 0.05370998755097389, |
|
"eval_rewards/accuracies": 0.4494999945163727, |
|
"eval_rewards/chosen": 0.00018531581736169755, |
|
"eval_rewards/margins": -3.043685865122825e-05, |
|
"eval_rewards/rejected": 0.0002157526760129258, |
|
"eval_runtime": 546.3436, |
|
"eval_samples_per_second": 3.661, |
|
"eval_steps_per_second": 0.915, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.102189034962561e-06, |
|
"logits/chosen": -1.04524827003479, |
|
"logits/rejected": -1.0312702655792236, |
|
"logps/chosen": -21.346084594726562, |
|
"logps/rejected": -22.236892700195312, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.00011841374362120405, |
|
"rewards/margins": -2.6545517357590143e-06, |
|
"rewards/rejected": 0.0001210682894452475, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -1.0237395763397217, |
|
"logits/rejected": -0.9514573216438293, |
|
"logps/chosen": -19.651378631591797, |
|
"logps/rejected": -17.589191436767578, |
|
"loss": 0.145, |
|
"rewards/accuracies": 0.05000000074505806, |
|
"rewards/chosen": 0.0001629686012165621, |
|
"rewards/margins": 0.00013977885828353465, |
|
"rewards/rejected": 2.318973929504864e-05, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-06, |
|
"logits/chosen": -1.105330228805542, |
|
"logits/rejected": -1.073228120803833, |
|
"logps/chosen": -11.863168716430664, |
|
"logps/rejected": -9.925447463989258, |
|
"loss": 0.1365, |
|
"rewards/accuracies": 0.012500000186264515, |
|
"rewards/chosen": 7.793636905262247e-05, |
|
"rewards/margins": -2.657127333804965e-05, |
|
"rewards/rejected": 0.00010450764966662973, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -1.0086735486984253, |
|
"logits/rejected": -0.9279971122741699, |
|
"logps/chosen": -17.02583885192871, |
|
"logps/rejected": -12.851661682128906, |
|
"loss": 0.1655, |
|
"rewards/accuracies": 0.03750000149011612, |
|
"rewards/chosen": 0.000239654938923195, |
|
"rewards/margins": 7.573223410872743e-05, |
|
"rewards/rejected": 0.00016392269753850996, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1108510153447352e-06, |
|
"logits/chosen": -1.003169059753418, |
|
"logits/rejected": -0.9840670824050903, |
|
"logps/chosen": -14.473843574523926, |
|
"logps/rejected": -11.736700057983398, |
|
"loss": 0.1524, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.00021637363533955067, |
|
"rewards/margins": 0.00011020306556019932, |
|
"rewards/rejected": 0.00010617056250339374, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -1.0689656734466553, |
|
"logits/rejected": -1.0586764812469482, |
|
"logps/chosen": -14.865160942077637, |
|
"logps/rejected": -14.334310531616211, |
|
"loss": 0.1408, |
|
"rewards/accuracies": 0.03750000149011612, |
|
"rewards/chosen": 0.00026066350983455777, |
|
"rewards/margins": 8.685861394042149e-05, |
|
"rewards/rejected": 0.0001738048595143482, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.556095160739513e-06, |
|
"logits/chosen": -1.0525116920471191, |
|
"logits/rejected": -0.9792855381965637, |
|
"logps/chosen": -23.66820526123047, |
|
"logps/rejected": -22.754222869873047, |
|
"loss": 0.1427, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.00037254736525937915, |
|
"rewards/margins": 8.062725828494877e-05, |
|
"rewards/rejected": 0.00029192009242251515, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -1.0303270816802979, |
|
"logits/rejected": -0.9566599726676941, |
|
"logps/chosen": -26.506671905517578, |
|
"logps/rejected": -17.675500869750977, |
|
"loss": 0.1365, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.00029862881638109684, |
|
"rewards/margins": -5.4413605539593846e-05, |
|
"rewards/rejected": 0.00035304244374856353, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9985264605418185e-06, |
|
"logits/chosen": -1.0593003034591675, |
|
"logits/rejected": -1.015201449394226, |
|
"logps/chosen": -13.778154373168945, |
|
"logps/rejected": -12.183830261230469, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.012500000186264515, |
|
"rewards/chosen": 0.00020911027968395501, |
|
"rewards/margins": 3.675861080409959e-05, |
|
"rewards/rejected": 0.0001723516616038978, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.102203369140625, |
|
"logits/rejected": -1.0614955425262451, |
|
"logps/chosen": -22.697526931762695, |
|
"logps/rejected": -21.1995906829834, |
|
"loss": 0.1673, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.00026087305741384625, |
|
"rewards/margins": 3.989339165855199e-05, |
|
"rewards/rejected": 0.00022097965120337903, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -0.8517611026763916, |
|
"eval_logits/rejected": -0.7569481730461121, |
|
"eval_logps/chosen": -395.0410461425781, |
|
"eval_logps/rejected": -365.74951171875, |
|
"eval_loss": 0.05380060523748398, |
|
"eval_rewards/accuracies": 0.43050000071525574, |
|
"eval_rewards/chosen": 0.0012642339570447803, |
|
"eval_rewards/margins": -0.00022168662690091878, |
|
"eval_rewards/rejected": 0.001485920511186123, |
|
"eval_runtime": 546.0518, |
|
"eval_samples_per_second": 3.663, |
|
"eval_steps_per_second": 0.916, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.466103737583699e-06, |
|
"logits/chosen": -1.0760139226913452, |
|
"logits/rejected": -1.0579842329025269, |
|
"logps/chosen": -21.285213470458984, |
|
"logps/rejected": -18.403087615966797, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.01875000074505806, |
|
"rewards/chosen": 0.00021679059136658907, |
|
"rewards/margins": -9.555607539368793e-05, |
|
"rewards/rejected": 0.0003123466740362346, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -1.0322855710983276, |
|
"logits/rejected": -0.9539896249771118, |
|
"logps/chosen": -19.72937774658203, |
|
"logps/rejected": -17.00729751586914, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.03750000149011612, |
|
"rewards/chosen": 0.0002937263634521514, |
|
"rewards/margins": 0.00012941876775585115, |
|
"rewards/rejected": 0.0001643076102482155, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-07, |
|
"logits/chosen": -1.0536764860153198, |
|
"logits/rejected": -1.0304574966430664, |
|
"logps/chosen": -29.4453067779541, |
|
"logps/rejected": -24.992198944091797, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.0004715063259936869, |
|
"rewards/margins": 4.0133098082151264e-05, |
|
"rewards/rejected": 0.0004313732497394085, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -1.094244122505188, |
|
"logits/rejected": -1.053062915802002, |
|
"logps/chosen": -24.80852699279785, |
|
"logps/rejected": -23.72032356262207, |
|
"loss": 0.157, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": 0.00014916087093297392, |
|
"rewards/margins": -0.0001278716663364321, |
|
"rewards/rejected": 0.0002770325227174908, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049723e-07, |
|
"logits/chosen": -1.1208736896514893, |
|
"logits/rejected": -1.0476964712142944, |
|
"logps/chosen": -38.830894470214844, |
|
"logps/rejected": -27.4512939453125, |
|
"loss": 0.1685, |
|
"rewards/accuracies": 0.03750000149011612, |
|
"rewards/chosen": 0.0004542851238511503, |
|
"rewards/margins": -1.4414394172490574e-05, |
|
"rewards/rejected": 0.0004686995525844395, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -1.0925004482269287, |
|
"logits/rejected": -1.0564720630645752, |
|
"logps/chosen": -27.70499610900879, |
|
"logps/rejected": -23.542407989501953, |
|
"loss": 0.144, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": 0.00039118691347539425, |
|
"rewards/margins": -7.202206325018778e-05, |
|
"rewards/rejected": 0.0004632089694496244, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7248368952908055e-07, |
|
"logits/chosen": -1.074318528175354, |
|
"logits/rejected": -1.048244595527649, |
|
"logps/chosen": -32.280303955078125, |
|
"logps/rejected": -28.2834415435791, |
|
"loss": 0.1723, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.0003155902377329767, |
|
"rewards/margins": 1.3381155440583825e-05, |
|
"rewards/rejected": 0.0003022090531885624, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -1.0563385486602783, |
|
"logits/rejected": -1.0031250715255737, |
|
"logps/chosen": -25.797882080078125, |
|
"logps/rejected": -24.61809539794922, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.03750000149011612, |
|
"rewards/chosen": 0.00044602793059311807, |
|
"rewards/margins": 0.00014313617430161685, |
|
"rewards/rejected": 0.00030289177084341645, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-08, |
|
"logits/chosen": -1.0759773254394531, |
|
"logits/rejected": -1.001573085784912, |
|
"logps/chosen": -24.67647361755371, |
|
"logps/rejected": -17.433469772338867, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.01875000074505806, |
|
"rewards/chosen": 8.942681597545743e-05, |
|
"rewards/margins": -0.00017002139065880328, |
|
"rewards/rejected": 0.0002594481920823455, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -1.0650088787078857, |
|
"logits/rejected": -1.0040552616119385, |
|
"logps/chosen": -17.18710708618164, |
|
"logps/rejected": -16.234764099121094, |
|
"loss": 0.1395, |
|
"rewards/accuracies": 0.03125, |
|
"rewards/chosen": 0.00038518753717653453, |
|
"rewards/margins": 4.9668800784274936e-05, |
|
"rewards/rejected": 0.00033551876549609005, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -0.8541178107261658, |
|
"eval_logits/rejected": -0.7586507797241211, |
|
"eval_logps/chosen": -395.30059814453125, |
|
"eval_logps/rejected": -365.9885559082031, |
|
"eval_loss": 0.05379989370703697, |
|
"eval_rewards/accuracies": 0.43950000405311584, |
|
"eval_rewards/chosen": 0.00100463698618114, |
|
"eval_rewards/margins": -0.00024221515923272818, |
|
"eval_rewards/rejected": 0.0012468521017581224, |
|
"eval_runtime": 545.846, |
|
"eval_samples_per_second": 3.664, |
|
"eval_steps_per_second": 0.916, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336734e-10, |
|
"logits/chosen": -1.0554258823394775, |
|
"logits/rejected": -0.9918642044067383, |
|
"logps/chosen": -20.51413345336914, |
|
"logps/rejected": -15.299043655395508, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": 0.0004165357968304306, |
|
"rewards/margins": 0.00014305347576737404, |
|
"rewards/rejected": 0.0002734823210630566, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.10059727164797294, |
|
"train_runtime": 2585.2833, |
|
"train_samples_per_second": 1.934, |
|
"train_steps_per_second": 0.121 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|