|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.06544901065449, |
|
"eval_steps": 50, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0076103500761035, |
|
"grad_norm": 0.058339186012744904, |
|
"learning_rate": 4.999451708687114e-06, |
|
"logits/chosen": 14.268467903137207, |
|
"logits/rejected": 14.600369453430176, |
|
"logps/chosen": -0.2669850289821625, |
|
"logps/rejected": -0.3412467837333679, |
|
"loss": 0.9049, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4004775583744049, |
|
"rewards/margins": 0.11139259487390518, |
|
"rewards/rejected": -0.5118702054023743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015220700152207, |
|
"grad_norm": 0.049545690417289734, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": 14.14539623260498, |
|
"logits/rejected": 15.191584587097168, |
|
"logps/chosen": -0.25579872727394104, |
|
"logps/rejected": -0.3931494653224945, |
|
"loss": 0.8989, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.38369807600975037, |
|
"rewards/margins": 0.2060261219739914, |
|
"rewards/rejected": -0.5897241830825806, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0228310502283105, |
|
"grad_norm": 0.061699289828538895, |
|
"learning_rate": 4.9950668210706795e-06, |
|
"logits/chosen": 14.284139633178711, |
|
"logits/rejected": 15.006326675415039, |
|
"logps/chosen": -0.275672048330307, |
|
"logps/rejected": -0.3603581488132477, |
|
"loss": 0.9004, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4135080873966217, |
|
"rewards/margins": 0.12702910602092743, |
|
"rewards/rejected": -0.5405372381210327, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030441400304414, |
|
"grad_norm": 0.05706426501274109, |
|
"learning_rate": 4.9912321481237616e-06, |
|
"logits/chosen": 14.275796890258789, |
|
"logits/rejected": 14.935521125793457, |
|
"logps/chosen": -0.2802076041698456, |
|
"logps/rejected": -0.38278770446777344, |
|
"loss": 0.9138, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.42031145095825195, |
|
"rewards/margins": 0.15387018024921417, |
|
"rewards/rejected": -0.5741815567016602, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0380517503805175, |
|
"grad_norm": 0.05318514257669449, |
|
"learning_rate": 4.986304738420684e-06, |
|
"logits/chosen": 14.433627128601074, |
|
"logits/rejected": 15.458297729492188, |
|
"logps/chosen": -0.2581387162208557, |
|
"logps/rejected": -0.38208404183387756, |
|
"loss": 0.914, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.38720807433128357, |
|
"rewards/margins": 0.18591801822185516, |
|
"rewards/rejected": -0.5731261372566223, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0380517503805175, |
|
"eval_logits/chosen": 14.396967887878418, |
|
"eval_logits/rejected": 15.221076965332031, |
|
"eval_logps/chosen": -0.27519574761390686, |
|
"eval_logps/rejected": -0.3709692656993866, |
|
"eval_loss": 0.9084128141403198, |
|
"eval_rewards/accuracies": 0.5981308221817017, |
|
"eval_rewards/chosen": -0.4127936065196991, |
|
"eval_rewards/margins": 0.14366032183170319, |
|
"eval_rewards/rejected": -0.5564539432525635, |
|
"eval_runtime": 30.773, |
|
"eval_samples_per_second": 27.622, |
|
"eval_steps_per_second": 3.477, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.045662100456621, |
|
"grad_norm": 0.06310460716485977, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": 14.548416137695312, |
|
"logits/rejected": 15.526041030883789, |
|
"logps/chosen": -0.29403647780418396, |
|
"logps/rejected": -0.40682005882263184, |
|
"loss": 0.9082, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44105473160743713, |
|
"rewards/margins": 0.1691754311323166, |
|
"rewards/rejected": -0.6102300882339478, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0532724505327245, |
|
"grad_norm": 0.1258806735277176, |
|
"learning_rate": 4.973180832407471e-06, |
|
"logits/chosen": 14.390210151672363, |
|
"logits/rejected": 14.817584037780762, |
|
"logps/chosen": -0.25258123874664307, |
|
"logps/rejected": -0.36392712593078613, |
|
"loss": 0.896, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3788718581199646, |
|
"rewards/margins": 0.1670188158750534, |
|
"rewards/rejected": -0.5458906888961792, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.060882800608828, |
|
"grad_norm": 0.09006265550851822, |
|
"learning_rate": 4.964990092676263e-06, |
|
"logits/chosen": 13.844560623168945, |
|
"logits/rejected": 14.811120986938477, |
|
"logps/chosen": -0.2630843222141266, |
|
"logps/rejected": -0.3794577717781067, |
|
"loss": 0.8977, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3946264684200287, |
|
"rewards/margins": 0.17456015944480896, |
|
"rewards/rejected": -0.5691865682601929, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 0.07123688608407974, |
|
"learning_rate": 4.9557181268217225e-06, |
|
"logits/chosen": 13.927327156066895, |
|
"logits/rejected": 14.746416091918945, |
|
"logps/chosen": -0.25282323360443115, |
|
"logps/rejected": -0.3279832601547241, |
|
"loss": 0.9092, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.37923485040664673, |
|
"rewards/margins": 0.11274002492427826, |
|
"rewards/rejected": -0.4919748902320862, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.076103500761035, |
|
"grad_norm": 0.08333446085453033, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": 14.406118392944336, |
|
"logits/rejected": 14.770090103149414, |
|
"logps/chosen": -0.28569403290748596, |
|
"logps/rejected": -0.3596845269203186, |
|
"loss": 0.8932, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.42854103446006775, |
|
"rewards/margins": 0.11098580062389374, |
|
"rewards/rejected": -0.5395268201828003, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.076103500761035, |
|
"eval_logits/chosen": 13.925265312194824, |
|
"eval_logits/rejected": 14.808513641357422, |
|
"eval_logps/chosen": -0.2667020559310913, |
|
"eval_logps/rejected": -0.3739235997200012, |
|
"eval_loss": 0.8984279036521912, |
|
"eval_rewards/accuracies": 0.5981308221817017, |
|
"eval_rewards/chosen": -0.40005311369895935, |
|
"eval_rewards/margins": 0.16083234548568726, |
|
"eval_rewards/rejected": -0.5608854293823242, |
|
"eval_runtime": 30.7791, |
|
"eval_samples_per_second": 27.616, |
|
"eval_steps_per_second": 3.476, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0837138508371385, |
|
"grad_norm": 0.08474570512771606, |
|
"learning_rate": 4.933947257182901e-06, |
|
"logits/chosen": 13.641456604003906, |
|
"logits/rejected": 14.799921035766602, |
|
"logps/chosen": -0.2721528708934784, |
|
"logps/rejected": -0.38378894329071045, |
|
"loss": 0.8995, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.40822935104370117, |
|
"rewards/margins": 0.1674540936946869, |
|
"rewards/rejected": -0.5756834149360657, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.091324200913242, |
|
"grad_norm": 0.1004580408334732, |
|
"learning_rate": 4.921457902821578e-06, |
|
"logits/chosen": 13.835454940795898, |
|
"logits/rejected": 14.882522583007812, |
|
"logps/chosen": -0.28507837653160095, |
|
"logps/rejected": -0.39737468957901, |
|
"loss": 0.8795, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.42761754989624023, |
|
"rewards/margins": 0.16844449937343597, |
|
"rewards/rejected": -0.5960620641708374, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0989345509893455, |
|
"grad_norm": 0.09537151455879211, |
|
"learning_rate": 4.907906416994146e-06, |
|
"logits/chosen": 13.607874870300293, |
|
"logits/rejected": 14.091131210327148, |
|
"logps/chosen": -0.2739318013191223, |
|
"logps/rejected": -0.36800479888916016, |
|
"loss": 0.8912, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4108976721763611, |
|
"rewards/margins": 0.14110951125621796, |
|
"rewards/rejected": -0.5520071983337402, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.106544901065449, |
|
"grad_norm": 0.10281535238027573, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": 12.017224311828613, |
|
"logits/rejected": 13.04835319519043, |
|
"logps/chosen": -0.24072685837745667, |
|
"logps/rejected": -0.36906492710113525, |
|
"loss": 0.8908, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3610902428627014, |
|
"rewards/margins": 0.19250717759132385, |
|
"rewards/rejected": -0.5535974502563477, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1141552511415525, |
|
"grad_norm": 0.707987368106842, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": 12.522550582885742, |
|
"logits/rejected": 13.272679328918457, |
|
"logps/chosen": -0.2583540081977844, |
|
"logps/rejected": -0.3796755075454712, |
|
"loss": 0.8867, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.38753098249435425, |
|
"rewards/margins": 0.18198221921920776, |
|
"rewards/rejected": -0.569513201713562, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1141552511415525, |
|
"eval_logits/chosen": 11.989100456237793, |
|
"eval_logits/rejected": 12.92872142791748, |
|
"eval_logps/chosen": -0.27158522605895996, |
|
"eval_logps/rejected": -0.40521273016929626, |
|
"eval_loss": 0.8765817284584045, |
|
"eval_rewards/accuracies": 0.5981308221817017, |
|
"eval_rewards/chosen": -0.40737783908843994, |
|
"eval_rewards/margins": 0.20044119656085968, |
|
"eval_rewards/rejected": -0.6078190803527832, |
|
"eval_runtime": 30.7739, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.121765601217656, |
|
"grad_norm": 0.19342070817947388, |
|
"learning_rate": 4.860940925593703e-06, |
|
"logits/chosen": 11.095940589904785, |
|
"logits/rejected": 12.351040840148926, |
|
"logps/chosen": -0.24749942123889923, |
|
"logps/rejected": -0.43422192335128784, |
|
"loss": 0.8762, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.37124913930892944, |
|
"rewards/margins": 0.2800838053226471, |
|
"rewards/rejected": -0.6513329744338989, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1293759512937595, |
|
"grad_norm": 0.19374576210975647, |
|
"learning_rate": 4.84320497372973e-06, |
|
"logits/chosen": 10.510068893432617, |
|
"logits/rejected": 11.507593154907227, |
|
"logps/chosen": -0.26223134994506836, |
|
"logps/rejected": -0.43635931611061096, |
|
"loss": 0.8581, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.39334696531295776, |
|
"rewards/margins": 0.2611919641494751, |
|
"rewards/rejected": -0.6545389294624329, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 0.20330430567264557, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": 9.89570140838623, |
|
"logits/rejected": 10.669364929199219, |
|
"logps/chosen": -0.3143860101699829, |
|
"logps/rejected": -0.46989941596984863, |
|
"loss": 0.8558, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.47157901525497437, |
|
"rewards/margins": 0.23327013850212097, |
|
"rewards/rejected": -0.704849123954773, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1445966514459665, |
|
"grad_norm": 0.22942212224006653, |
|
"learning_rate": 4.804657878971252e-06, |
|
"logits/chosen": 8.887057304382324, |
|
"logits/rejected": 9.542157173156738, |
|
"logps/chosen": -0.2906036972999573, |
|
"logps/rejected": -0.4810206890106201, |
|
"loss": 0.8554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4359055459499359, |
|
"rewards/margins": 0.28562551736831665, |
|
"rewards/rejected": -0.7215310335159302, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15220700152207, |
|
"grad_norm": 0.29071903228759766, |
|
"learning_rate": 4.783863644106502e-06, |
|
"logits/chosen": 6.791537284851074, |
|
"logits/rejected": 7.366445064544678, |
|
"logps/chosen": -0.31382033228874207, |
|
"logps/rejected": -0.5417486429214478, |
|
"loss": 0.838, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4707304835319519, |
|
"rewards/margins": 0.34189245104789734, |
|
"rewards/rejected": -0.8126228451728821, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15220700152207, |
|
"eval_logits/chosen": 7.050150394439697, |
|
"eval_logits/rejected": 7.516275405883789, |
|
"eval_logps/chosen": -0.3289315402507782, |
|
"eval_logps/rejected": -0.5481724143028259, |
|
"eval_loss": 0.813983678817749, |
|
"eval_rewards/accuracies": 0.6168224215507507, |
|
"eval_rewards/chosen": -0.4933973252773285, |
|
"eval_rewards/margins": 0.3288613557815552, |
|
"eval_rewards/rejected": -0.8222586512565613, |
|
"eval_runtime": 30.7734, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1598173515981735, |
|
"grad_norm": 0.23101097345352173, |
|
"learning_rate": 4.762067631165049e-06, |
|
"logits/chosen": 5.132790565490723, |
|
"logits/rejected": 5.848537445068359, |
|
"logps/chosen": -0.33372369408607483, |
|
"logps/rejected": -0.5993582010269165, |
|
"loss": 0.8212, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5005855560302734, |
|
"rewards/margins": 0.3984517455101013, |
|
"rewards/rejected": -0.8990373611450195, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.167427701674277, |
|
"grad_norm": 0.5136363506317139, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": 3.807554244995117, |
|
"logits/rejected": 4.600871562957764, |
|
"logps/chosen": -0.32092416286468506, |
|
"logps/rejected": -0.651642918586731, |
|
"loss": 0.7851, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4813862442970276, |
|
"rewards/margins": 0.4960783123970032, |
|
"rewards/rejected": -0.977464497089386, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1750380517503805, |
|
"grad_norm": 0.4106898009777069, |
|
"learning_rate": 4.715508948078037e-06, |
|
"logits/chosen": 2.760650396347046, |
|
"logits/rejected": 2.1608071327209473, |
|
"logps/chosen": -0.43665003776550293, |
|
"logps/rejected": -0.8352751731872559, |
|
"loss": 0.7685, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6549750566482544, |
|
"rewards/margins": 0.5979377627372742, |
|
"rewards/rejected": -1.2529128789901733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.182648401826484, |
|
"grad_norm": 0.4719419479370117, |
|
"learning_rate": 4.690766700109659e-06, |
|
"logits/chosen": 3.1216347217559814, |
|
"logits/rejected": 2.7202537059783936, |
|
"logps/chosen": -0.444007933139801, |
|
"logps/rejected": -0.7697597742080688, |
|
"loss": 0.7474, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6660118699073792, |
|
"rewards/margins": 0.4886276125907898, |
|
"rewards/rejected": -1.154639482498169, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1902587519025875, |
|
"grad_norm": 0.548523485660553, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": 1.3678622245788574, |
|
"logits/rejected": 0.46835970878601074, |
|
"logps/chosen": -0.48227253556251526, |
|
"logps/rejected": -0.997289776802063, |
|
"loss": 0.7017, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7234088182449341, |
|
"rewards/margins": 0.7725256681442261, |
|
"rewards/rejected": -1.4959346055984497, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1902587519025875, |
|
"eval_logits/chosen": 2.1362831592559814, |
|
"eval_logits/rejected": 1.1932121515274048, |
|
"eval_logps/chosen": -0.500978946685791, |
|
"eval_logps/rejected": -1.0073517560958862, |
|
"eval_loss": 0.6914573907852173, |
|
"eval_rewards/accuracies": 0.6542056202888489, |
|
"eval_rewards/chosen": -0.7514683604240417, |
|
"eval_rewards/margins": 0.7595593929290771, |
|
"eval_rewards/rejected": -1.5110276937484741, |
|
"eval_runtime": 30.7706, |
|
"eval_samples_per_second": 27.624, |
|
"eval_steps_per_second": 3.477, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.197869101978691, |
|
"grad_norm": 0.700670063495636, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": 2.537666082382202, |
|
"logits/rejected": 1.3070740699768066, |
|
"logps/chosen": -0.59038907289505, |
|
"logps/rejected": -1.0600087642669678, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8855836987495422, |
|
"rewards/margins": 0.7044296264648438, |
|
"rewards/rejected": -1.5900132656097412, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 0.6454456448554993, |
|
"learning_rate": 4.610819813755038e-06, |
|
"logits/chosen": 2.312289237976074, |
|
"logits/rejected": 1.6705052852630615, |
|
"logps/chosen": -0.601074755191803, |
|
"logps/rejected": -1.12887442111969, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9016121029853821, |
|
"rewards/margins": 0.7916995286941528, |
|
"rewards/rejected": -1.6933116912841797, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.213089802130898, |
|
"grad_norm": 0.8001136183738708, |
|
"learning_rate": 4.582303101775249e-06, |
|
"logits/chosen": 1.6213299036026, |
|
"logits/rejected": 0.9048928022384644, |
|
"logps/chosen": -0.6731385588645935, |
|
"logps/rejected": -1.3181935548782349, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0097079277038574, |
|
"rewards/margins": 0.9675822257995605, |
|
"rewards/rejected": -1.977290153503418, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2207001522070015, |
|
"grad_norm": 0.45858490467071533, |
|
"learning_rate": 4.55287302283426e-06, |
|
"logits/chosen": 1.0463030338287354, |
|
"logits/rejected": 0.05798797681927681, |
|
"logps/chosen": -0.677167534828186, |
|
"logps/rejected": -1.4764039516448975, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0157512426376343, |
|
"rewards/margins": 1.1988548040390015, |
|
"rewards/rejected": -2.2146058082580566, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.228310502283105, |
|
"grad_norm": 0.5778977870941162, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": 2.3259291648864746, |
|
"logits/rejected": 1.6117414236068726, |
|
"logps/chosen": -0.7591919302940369, |
|
"logps/rejected": -1.5995824337005615, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1387879848480225, |
|
"rewards/margins": 1.2605856657028198, |
|
"rewards/rejected": -2.3993735313415527, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.228310502283105, |
|
"eval_logits/chosen": 1.9625831842422485, |
|
"eval_logits/rejected": 1.028193473815918, |
|
"eval_logps/chosen": -0.7516441941261292, |
|
"eval_logps/rejected": -1.771378517150879, |
|
"eval_loss": 0.5786539912223816, |
|
"eval_rewards/accuracies": 0.6915887594223022, |
|
"eval_rewards/chosen": -1.1274662017822266, |
|
"eval_rewards/margins": 1.5296014547348022, |
|
"eval_rewards/rejected": -2.6570677757263184, |
|
"eval_runtime": 30.7716, |
|
"eval_samples_per_second": 27.623, |
|
"eval_steps_per_second": 3.477, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2359208523592085, |
|
"grad_norm": 0.5383133292198181, |
|
"learning_rate": 4.491324795060491e-06, |
|
"logits/chosen": 1.2824015617370605, |
|
"logits/rejected": 0.7073851823806763, |
|
"logps/chosen": -0.8315173387527466, |
|
"logps/rejected": -1.9733762741088867, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2472760677337646, |
|
"rewards/margins": 1.7127883434295654, |
|
"rewards/rejected": -2.96006441116333, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.243531202435312, |
|
"grad_norm": 3.721909284591675, |
|
"learning_rate": 4.4592336433146e-06, |
|
"logits/chosen": 1.993947982788086, |
|
"logits/rejected": 1.192871332168579, |
|
"logps/chosen": -0.9074883460998535, |
|
"logps/rejected": -1.9389015436172485, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3612326383590698, |
|
"rewards/margins": 1.5471194982528687, |
|
"rewards/rejected": -2.9083518981933594, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2511415525114155, |
|
"grad_norm": 0.9611485004425049, |
|
"learning_rate": 4.426283106939474e-06, |
|
"logits/chosen": 0.607239305973053, |
|
"logits/rejected": 0.040740929543972015, |
|
"logps/chosen": -0.9696615934371948, |
|
"logps/rejected": -2.3865818977355957, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4544923305511475, |
|
"rewards/margins": 2.1253809928894043, |
|
"rewards/rejected": -3.5798733234405518, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.258751902587519, |
|
"grad_norm": 3.716665744781494, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": 1.486352801322937, |
|
"logits/rejected": 0.860406756401062, |
|
"logps/chosen": -0.9488881826400757, |
|
"logps/rejected": -2.771193027496338, |
|
"loss": 0.4584, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4233323335647583, |
|
"rewards/margins": 2.733457088470459, |
|
"rewards/rejected": -4.156789302825928, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2663622526636225, |
|
"grad_norm": 2.496544361114502, |
|
"learning_rate": 4.357862063693486e-06, |
|
"logits/chosen": 2.1065332889556885, |
|
"logits/rejected": 1.4116215705871582, |
|
"logps/chosen": -0.9290377497673035, |
|
"logps/rejected": -2.717181444168091, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.393556833267212, |
|
"rewards/margins": 2.682215452194214, |
|
"rewards/rejected": -4.075772285461426, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2663622526636225, |
|
"eval_logits/chosen": 2.3063719272613525, |
|
"eval_logits/rejected": 1.7392665147781372, |
|
"eval_logps/chosen": -0.9553582072257996, |
|
"eval_logps/rejected": -2.8578038215637207, |
|
"eval_loss": 0.43925610184669495, |
|
"eval_rewards/accuracies": 0.7196261882781982, |
|
"eval_rewards/chosen": -1.433037281036377, |
|
"eval_rewards/margins": 2.853668212890625, |
|
"eval_rewards/rejected": -4.286705493927002, |
|
"eval_runtime": 30.7732, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 1.0364434719085693, |
|
"learning_rate": 4.322421568553529e-06, |
|
"logits/chosen": 3.5145366191864014, |
|
"logits/rejected": 2.562318801879883, |
|
"logps/chosen": -0.9316509366035461, |
|
"logps/rejected": -2.7451562881469727, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3974764347076416, |
|
"rewards/margins": 2.7202582359313965, |
|
"rewards/rejected": -4.117734432220459, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2815829528158295, |
|
"grad_norm": 0.7246320843696594, |
|
"learning_rate": 4.286181699082008e-06, |
|
"logits/chosen": 1.6608537435531616, |
|
"logits/rejected": 1.27449631690979, |
|
"logps/chosen": -1.0797128677368164, |
|
"logps/rejected": -3.467390537261963, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6195694208145142, |
|
"rewards/margins": 3.5815162658691406, |
|
"rewards/rejected": -5.201085567474365, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.289193302891933, |
|
"grad_norm": 0.942298173904419, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": 2.1106579303741455, |
|
"logits/rejected": 1.5492799282073975, |
|
"logps/chosen": -1.2671682834625244, |
|
"logps/rejected": -3.201054811477661, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.900752305984497, |
|
"rewards/margins": 2.900829792022705, |
|
"rewards/rejected": -4.801582336425781, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2968036529680365, |
|
"grad_norm": 0.4278697371482849, |
|
"learning_rate": 4.211367764821722e-06, |
|
"logits/chosen": 3.2620933055877686, |
|
"logits/rejected": 2.7777600288391113, |
|
"logps/chosen": -1.0661684274673462, |
|
"logps/rejected": -3.025578022003174, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.599252462387085, |
|
"rewards/margins": 2.9391140937805176, |
|
"rewards/rejected": -4.53836727142334, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.30441400304414, |
|
"grad_norm": 0.6019588708877563, |
|
"learning_rate": 4.172826515897146e-06, |
|
"logits/chosen": 3.057295560836792, |
|
"logits/rejected": 2.397916078567505, |
|
"logps/chosen": -1.0584070682525635, |
|
"logps/rejected": -3.479670286178589, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5876106023788452, |
|
"rewards/margins": 3.631894588470459, |
|
"rewards/rejected": -5.219505310058594, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.30441400304414, |
|
"eval_logits/chosen": 3.2906479835510254, |
|
"eval_logits/rejected": 2.9191884994506836, |
|
"eval_logps/chosen": -1.1546303033828735, |
|
"eval_logps/rejected": -3.499722957611084, |
|
"eval_loss": 0.4080003499984741, |
|
"eval_rewards/accuracies": 0.7102803587913513, |
|
"eval_rewards/chosen": -1.731945514678955, |
|
"eval_rewards/margins": 3.517639398574829, |
|
"eval_rewards/rejected": -5.249584674835205, |
|
"eval_runtime": 30.7819, |
|
"eval_samples_per_second": 27.614, |
|
"eval_steps_per_second": 3.476, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3120243531202435, |
|
"grad_norm": 0.610105037689209, |
|
"learning_rate": 4.133551509975264e-06, |
|
"logits/chosen": 2.7366433143615723, |
|
"logits/rejected": 2.350151538848877, |
|
"logps/chosen": -1.3425816297531128, |
|
"logps/rejected": -4.451743125915527, |
|
"loss": 0.3834, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0138726234436035, |
|
"rewards/margins": 4.663742542266846, |
|
"rewards/rejected": -6.677615165710449, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.319634703196347, |
|
"grad_norm": 0.9136129021644592, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": 3.8271331787109375, |
|
"logits/rejected": 3.666091203689575, |
|
"logps/chosen": -1.3493579626083374, |
|
"logps/rejected": -3.8569908142089844, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0240368843078613, |
|
"rewards/margins": 3.7614493370056152, |
|
"rewards/rejected": -5.785486221313477, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3272450532724505, |
|
"grad_norm": 0.6076493859291077, |
|
"learning_rate": 4.052869450695776e-06, |
|
"logits/chosen": 3.027143955230713, |
|
"logits/rejected": 2.2761549949645996, |
|
"logps/chosen": -1.2890465259552002, |
|
"logps/rejected": -4.363173961639404, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9335696697235107, |
|
"rewards/margins": 4.6111907958984375, |
|
"rewards/rejected": -6.544760704040527, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.334855403348554, |
|
"grad_norm": 1.0694931745529175, |
|
"learning_rate": 4.011497787155938e-06, |
|
"logits/chosen": 3.989302158355713, |
|
"logits/rejected": 3.3767571449279785, |
|
"logps/chosen": -1.3799726963043213, |
|
"logps/rejected": -4.611227512359619, |
|
"loss": 0.3786, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0699591636657715, |
|
"rewards/margins": 4.846882343292236, |
|
"rewards/rejected": -6.916840553283691, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 2.3523929119110107, |
|
"learning_rate": 3.969463130731183e-06, |
|
"logits/chosen": 3.046278953552246, |
|
"logits/rejected": 2.7509286403656006, |
|
"logps/chosen": -1.577859878540039, |
|
"logps/rejected": -4.554004669189453, |
|
"loss": 0.3948, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3667898178100586, |
|
"rewards/margins": 4.464217185974121, |
|
"rewards/rejected": -6.831006050109863, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"eval_logits/chosen": 3.555213451385498, |
|
"eval_logits/rejected": 3.359722375869751, |
|
"eval_logps/chosen": -1.6125141382217407, |
|
"eval_logps/rejected": -4.374329566955566, |
|
"eval_loss": 0.3748260736465454, |
|
"eval_rewards/accuracies": 0.7943925261497498, |
|
"eval_rewards/chosen": -2.418771266937256, |
|
"eval_rewards/margins": 4.142723560333252, |
|
"eval_rewards/rejected": -6.56149435043335, |
|
"eval_runtime": 30.7795, |
|
"eval_samples_per_second": 27.616, |
|
"eval_steps_per_second": 3.476, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.350076103500761, |
|
"grad_norm": 1.113964557647705, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": 3.61175274848938, |
|
"logits/rejected": 3.547903537750244, |
|
"logps/chosen": -1.7464786767959595, |
|
"logps/rejected": -5.045803070068359, |
|
"loss": 0.3717, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.619718074798584, |
|
"rewards/margins": 4.948986053466797, |
|
"rewards/rejected": -7.568705081939697, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3576864535768645, |
|
"grad_norm": 1.5195355415344238, |
|
"learning_rate": 3.88347887310836e-06, |
|
"logits/chosen": 3.0807926654815674, |
|
"logits/rejected": 3.012016773223877, |
|
"logps/chosen": -2.164515733718872, |
|
"logps/rejected": -5.039651393890381, |
|
"loss": 0.3507, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.2467732429504395, |
|
"rewards/margins": 4.312704086303711, |
|
"rewards/rejected": -7.55947732925415, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.365296803652968, |
|
"grad_norm": 2.3880045413970947, |
|
"learning_rate": 3.839566987447492e-06, |
|
"logits/chosen": 2.4990105628967285, |
|
"logits/rejected": 2.5192058086395264, |
|
"logps/chosen": -2.5131685733795166, |
|
"logps/rejected": -5.811826705932617, |
|
"loss": 0.3326, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7697532176971436, |
|
"rewards/margins": 4.947987079620361, |
|
"rewards/rejected": -8.717740058898926, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3729071537290715, |
|
"grad_norm": 4.61068868637085, |
|
"learning_rate": 3.795067523432826e-06, |
|
"logits/chosen": 2.1001622676849365, |
|
"logits/rejected": 2.0562539100646973, |
|
"logps/chosen": -2.7572569847106934, |
|
"logps/rejected": -6.228929042816162, |
|
"loss": 0.3227, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.135885715484619, |
|
"rewards/margins": 5.207508563995361, |
|
"rewards/rejected": -9.343393325805664, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.380517503805175, |
|
"grad_norm": 8.403047561645508, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 3.1137287616729736, |
|
"logits/rejected": 2.6646764278411865, |
|
"logps/chosen": -2.8061861991882324, |
|
"logps/rejected": -6.236757755279541, |
|
"loss": 0.3422, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.2092790603637695, |
|
"rewards/margins": 5.1458563804626465, |
|
"rewards/rejected": -9.355135917663574, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.380517503805175, |
|
"eval_logits/chosen": 3.3083701133728027, |
|
"eval_logits/rejected": 3.13222336769104, |
|
"eval_logps/chosen": -2.6677865982055664, |
|
"eval_logps/rejected": -5.843282222747803, |
|
"eval_loss": 0.30595287680625916, |
|
"eval_rewards/accuracies": 0.8878504633903503, |
|
"eval_rewards/chosen": -4.001679420471191, |
|
"eval_rewards/margins": 4.763244152069092, |
|
"eval_rewards/rejected": -8.764924049377441, |
|
"eval_runtime": 30.7793, |
|
"eval_samples_per_second": 27.616, |
|
"eval_steps_per_second": 3.476, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3881278538812785, |
|
"grad_norm": 2.3582851886749268, |
|
"learning_rate": 3.7043841852542884e-06, |
|
"logits/chosen": 2.7116522789001465, |
|
"logits/rejected": 2.5776076316833496, |
|
"logps/chosen": -2.7367191314697266, |
|
"logps/rejected": -6.1324052810668945, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.10507869720459, |
|
"rewards/margins": 5.093530178070068, |
|
"rewards/rejected": -9.1986083984375, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.395738203957382, |
|
"grad_norm": 2.8183226585388184, |
|
"learning_rate": 3.658240087799655e-06, |
|
"logits/chosen": 2.327544689178467, |
|
"logits/rejected": 2.3745343685150146, |
|
"logps/chosen": -2.6957223415374756, |
|
"logps/rejected": -6.291537284851074, |
|
"loss": 0.295, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.043583393096924, |
|
"rewards/margins": 5.393722057342529, |
|
"rewards/rejected": -9.437305450439453, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4033485540334855, |
|
"grad_norm": 1.8313360214233398, |
|
"learning_rate": 3.611587947962319e-06, |
|
"logits/chosen": 2.4468109607696533, |
|
"logits/rejected": 2.4551472663879395, |
|
"logps/chosen": -2.7839953899383545, |
|
"logps/rejected": -6.5379180908203125, |
|
"loss": 0.2618, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.175992965698242, |
|
"rewards/margins": 5.63088321685791, |
|
"rewards/rejected": -9.806875228881836, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 2.2132411003112793, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": 3.125279664993286, |
|
"logits/rejected": 2.7795650959014893, |
|
"logps/chosen": -3.349208116531372, |
|
"logps/rejected": -6.923414707183838, |
|
"loss": 0.2592, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.023811340332031, |
|
"rewards/margins": 5.361310005187988, |
|
"rewards/rejected": -10.385122299194336, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4185692541856925, |
|
"grad_norm": 6.05848503112793, |
|
"learning_rate": 3.516841607689501e-06, |
|
"logits/chosen": 2.841399669647217, |
|
"logits/rejected": 2.997351884841919, |
|
"logps/chosen": -3.256176710128784, |
|
"logps/rejected": -7.098822593688965, |
|
"loss": 0.2603, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.884264945983887, |
|
"rewards/margins": 5.763968467712402, |
|
"rewards/rejected": -10.648235321044922, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4185692541856925, |
|
"eval_logits/chosen": 3.109469413757324, |
|
"eval_logits/rejected": 3.010756492614746, |
|
"eval_logps/chosen": -3.216036558151245, |
|
"eval_logps/rejected": -6.825747013092041, |
|
"eval_loss": 0.27887609601020813, |
|
"eval_rewards/accuracies": 0.8878504633903503, |
|
"eval_rewards/chosen": -4.824055194854736, |
|
"eval_rewards/margins": 5.414565563201904, |
|
"eval_rewards/rejected": -10.23862075805664, |
|
"eval_runtime": 30.773, |
|
"eval_samples_per_second": 27.622, |
|
"eval_steps_per_second": 3.477, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.426179604261796, |
|
"grad_norm": 1.8734403848648071, |
|
"learning_rate": 3.4687889661302577e-06, |
|
"logits/chosen": 1.8899682760238647, |
|
"logits/rejected": 1.7766664028167725, |
|
"logps/chosen": -3.1907763481140137, |
|
"logps/rejected": -7.273028373718262, |
|
"loss": 0.2669, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.7861647605896, |
|
"rewards/margins": 6.123377323150635, |
|
"rewards/rejected": -10.909541130065918, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4337899543378995, |
|
"grad_norm": 3.2115261554718018, |
|
"learning_rate": 3.4203113817116955e-06, |
|
"logits/chosen": 2.563091278076172, |
|
"logits/rejected": 2.530696392059326, |
|
"logps/chosen": -3.620448589324951, |
|
"logps/rejected": -7.546849250793457, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.430673599243164, |
|
"rewards/margins": 5.8896002769470215, |
|
"rewards/rejected": -11.320273399353027, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.441400304414003, |
|
"grad_norm": 3.684910297393799, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": 2.873882293701172, |
|
"logits/rejected": 3.193092107772827, |
|
"logps/chosen": -3.386859178543091, |
|
"logps/rejected": -7.514338493347168, |
|
"loss": 0.2715, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.080288887023926, |
|
"rewards/margins": 6.191219329833984, |
|
"rewards/rejected": -11.271509170532227, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4490106544901065, |
|
"grad_norm": 2.661367416381836, |
|
"learning_rate": 3.3221666168464584e-06, |
|
"logits/chosen": 2.5157277584075928, |
|
"logits/rejected": 2.5739080905914307, |
|
"logps/chosen": -3.658534526824951, |
|
"logps/rejected": -7.9988884925842285, |
|
"loss": 0.2454, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.487801551818848, |
|
"rewards/margins": 6.510530948638916, |
|
"rewards/rejected": -11.998331069946289, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.45662100456621, |
|
"grad_norm": 1.8180292844772339, |
|
"learning_rate": 3.272542485937369e-06, |
|
"logits/chosen": 2.6391870975494385, |
|
"logits/rejected": 2.72003173828125, |
|
"logps/chosen": -3.382587432861328, |
|
"logps/rejected": -8.08546257019043, |
|
"loss": 0.2706, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.073880672454834, |
|
"rewards/margins": 7.054312229156494, |
|
"rewards/rejected": -12.128194808959961, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45662100456621, |
|
"eval_logits/chosen": 3.235776901245117, |
|
"eval_logits/rejected": 3.2310192584991455, |
|
"eval_logps/chosen": -3.201641082763672, |
|
"eval_logps/rejected": -7.113856315612793, |
|
"eval_loss": 0.26078400015830994, |
|
"eval_rewards/accuracies": 0.8878504633903503, |
|
"eval_rewards/chosen": -4.802461624145508, |
|
"eval_rewards/margins": 5.8683247566223145, |
|
"eval_rewards/rejected": -10.67078685760498, |
|
"eval_runtime": 30.7777, |
|
"eval_samples_per_second": 27.617, |
|
"eval_steps_per_second": 3.477, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4642313546423135, |
|
"grad_norm": 3.1237454414367676, |
|
"learning_rate": 3.222579492361179e-06, |
|
"logits/chosen": 3.097729444503784, |
|
"logits/rejected": 2.8835232257843018, |
|
"logps/chosen": -3.2986862659454346, |
|
"logps/rejected": -7.824951171875, |
|
"loss": 0.2455, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.948029518127441, |
|
"rewards/margins": 6.789399147033691, |
|
"rewards/rejected": -11.7374267578125, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.471841704718417, |
|
"grad_norm": 2.250023365020752, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": 3.303495407104492, |
|
"logits/rejected": 3.124060869216919, |
|
"logps/chosen": -2.9880855083465576, |
|
"logps/rejected": -6.581275939941406, |
|
"loss": 0.2754, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.482128143310547, |
|
"rewards/margins": 5.389786243438721, |
|
"rewards/rejected": -9.87191390991211, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 4.364448547363281, |
|
"learning_rate": 3.121724717912138e-06, |
|
"logits/chosen": 2.8994319438934326, |
|
"logits/rejected": 2.593780755996704, |
|
"logps/chosen": -3.4450290203094482, |
|
"logps/rejected": -7.1797990798950195, |
|
"loss": 0.2316, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.167544364929199, |
|
"rewards/margins": 5.602154731750488, |
|
"rewards/rejected": -10.769698143005371, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.487062404870624, |
|
"grad_norm": 3.65561580657959, |
|
"learning_rate": 3.0708771752766397e-06, |
|
"logits/chosen": 3.1075518131256104, |
|
"logits/rejected": 2.8703231811523438, |
|
"logps/chosen": -3.26599383354187, |
|
"logps/rejected": -7.536534786224365, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.898990631103516, |
|
"rewards/margins": 6.4058122634887695, |
|
"rewards/rejected": -11.304803848266602, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4946727549467275, |
|
"grad_norm": 3.1211891174316406, |
|
"learning_rate": 3.019779227044398e-06, |
|
"logits/chosen": 3.2364888191223145, |
|
"logits/rejected": 3.3938751220703125, |
|
"logps/chosen": -3.538849353790283, |
|
"logps/rejected": -7.827691555023193, |
|
"loss": 0.2571, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.308274269104004, |
|
"rewards/margins": 6.433261871337891, |
|
"rewards/rejected": -11.741537094116211, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4946727549467275, |
|
"eval_logits/chosen": 3.091616153717041, |
|
"eval_logits/rejected": 3.0459396839141846, |
|
"eval_logps/chosen": -3.361125946044922, |
|
"eval_logps/rejected": -7.390212535858154, |
|
"eval_loss": 0.2536354660987854, |
|
"eval_rewards/accuracies": 0.8971962332725525, |
|
"eval_rewards/chosen": -5.041689395904541, |
|
"eval_rewards/margins": 6.043630599975586, |
|
"eval_rewards/rejected": -11.085319519042969, |
|
"eval_runtime": 30.7751, |
|
"eval_samples_per_second": 27.62, |
|
"eval_steps_per_second": 3.477, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.502283105022831, |
|
"grad_norm": 4.375415802001953, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": 1.8274192810058594, |
|
"logits/rejected": 1.9628839492797852, |
|
"logps/chosen": -3.083608627319336, |
|
"logps/rejected": -8.370513916015625, |
|
"loss": 0.2166, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.625412940979004, |
|
"rewards/margins": 7.930357456207275, |
|
"rewards/rejected": -12.555770874023438, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5098934550989346, |
|
"grad_norm": 3.6583638191223145, |
|
"learning_rate": 2.9169218667902562e-06, |
|
"logits/chosen": 2.5409281253814697, |
|
"logits/rejected": 2.46968150138855, |
|
"logps/chosen": -3.252990245819092, |
|
"logps/rejected": -7.6487884521484375, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.879485607147217, |
|
"rewards/margins": 6.593697547912598, |
|
"rewards/rejected": -11.473182678222656, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.517503805175038, |
|
"grad_norm": 2.009876251220703, |
|
"learning_rate": 2.8652075714060296e-06, |
|
"logits/chosen": 3.553900957107544, |
|
"logits/rejected": 3.4104526042938232, |
|
"logps/chosen": -2.9901890754699707, |
|
"logps/rejected": -7.472288608551025, |
|
"loss": 0.2405, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.485283374786377, |
|
"rewards/margins": 6.723149299621582, |
|
"rewards/rejected": -11.208433151245117, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5251141552511416, |
|
"grad_norm": 2.9065611362457275, |
|
"learning_rate": 2.813333083910761e-06, |
|
"logits/chosen": 2.323111057281494, |
|
"logits/rejected": 2.0086140632629395, |
|
"logps/chosen": -3.430807590484619, |
|
"logps/rejected": -8.105338096618652, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.146210670471191, |
|
"rewards/margins": 7.011796474456787, |
|
"rewards/rejected": -12.15800666809082, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.532724505327245, |
|
"grad_norm": 4.097139358520508, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": 1.9292926788330078, |
|
"logits/rejected": 2.0105385780334473, |
|
"logps/chosen": -3.337139129638672, |
|
"logps/rejected": -7.7645721435546875, |
|
"loss": 0.245, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.005709171295166, |
|
"rewards/margins": 6.641148567199707, |
|
"rewards/rejected": -11.646858215332031, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.532724505327245, |
|
"eval_logits/chosen": 2.946713924407959, |
|
"eval_logits/rejected": 2.912729501724243, |
|
"eval_logps/chosen": -3.3646414279937744, |
|
"eval_logps/rejected": -7.633481979370117, |
|
"eval_loss": 0.2412233203649521, |
|
"eval_rewards/accuracies": 0.9065420627593994, |
|
"eval_rewards/chosen": -5.046962261199951, |
|
"eval_rewards/margins": 6.403261184692383, |
|
"eval_rewards/rejected": -11.450223922729492, |
|
"eval_runtime": 30.7742, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5403348554033486, |
|
"grad_norm": 4.534071922302246, |
|
"learning_rate": 2.70919460833079e-06, |
|
"logits/chosen": 2.4406304359436035, |
|
"logits/rejected": 2.713369607925415, |
|
"logps/chosen": -3.4392433166503906, |
|
"logps/rejected": -8.228872299194336, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.158864498138428, |
|
"rewards/margins": 7.184444427490234, |
|
"rewards/rejected": -12.34330940246582, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 1.5941667556762695, |
|
"learning_rate": 2.6569762988232838e-06, |
|
"logits/chosen": 3.3259758949279785, |
|
"logits/rejected": 3.3186354637145996, |
|
"logps/chosen": -3.7198212146759033, |
|
"logps/rejected": -7.6400909423828125, |
|
"loss": 0.2339, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.5797319412231445, |
|
"rewards/margins": 5.8804030418396, |
|
"rewards/rejected": -11.460134506225586, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 5.972750186920166, |
|
"learning_rate": 2.604689134322999e-06, |
|
"logits/chosen": 2.455244302749634, |
|
"logits/rejected": 2.5398240089416504, |
|
"logps/chosen": -3.892965316772461, |
|
"logps/rejected": -8.59666633605957, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.83944845199585, |
|
"rewards/margins": 7.055548667907715, |
|
"rewards/rejected": -12.894998550415039, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.563165905631659, |
|
"grad_norm": 3.1441280841827393, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": 2.3029608726501465, |
|
"logits/rejected": 2.3662524223327637, |
|
"logps/chosen": -3.682513475418091, |
|
"logps/rejected": -8.340951919555664, |
|
"loss": 0.2723, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.523770332336426, |
|
"rewards/margins": 6.9876580238342285, |
|
"rewards/rejected": -12.511428833007812, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5707762557077626, |
|
"grad_norm": 2.30711030960083, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": 1.8446356058120728, |
|
"logits/rejected": 1.9468234777450562, |
|
"logps/chosen": -3.808454990386963, |
|
"logps/rejected": -8.047523498535156, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.712681770324707, |
|
"rewards/margins": 6.3586015701293945, |
|
"rewards/rejected": -12.071284294128418, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5707762557077626, |
|
"eval_logits/chosen": 2.7250914573669434, |
|
"eval_logits/rejected": 2.7465131282806396, |
|
"eval_logps/chosen": -3.5594334602355957, |
|
"eval_logps/rejected": -7.899675369262695, |
|
"eval_loss": 0.2329329252243042, |
|
"eval_rewards/accuracies": 0.9345794320106506, |
|
"eval_rewards/chosen": -5.339150905609131, |
|
"eval_rewards/margins": 6.510361671447754, |
|
"eval_rewards/rejected": -11.84951114654541, |
|
"eval_runtime": 30.7783, |
|
"eval_samples_per_second": 27.617, |
|
"eval_steps_per_second": 3.476, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.578386605783866, |
|
"grad_norm": 3.0719997882843018, |
|
"learning_rate": 2.447643950291608e-06, |
|
"logits/chosen": 2.3534064292907715, |
|
"logits/rejected": 2.401563882827759, |
|
"logps/chosen": -3.610807418823242, |
|
"logps/rejected": -7.985041618347168, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.416211128234863, |
|
"rewards/margins": 6.561351776123047, |
|
"rewards/rejected": -11.97756290435791, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5859969558599696, |
|
"grad_norm": 3.6076536178588867, |
|
"learning_rate": 2.3953108656770018e-06, |
|
"logits/chosen": 1.969351053237915, |
|
"logits/rejected": 2.2207655906677246, |
|
"logps/chosen": -2.9666709899902344, |
|
"logps/rejected": -8.2462797164917, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.450006484985352, |
|
"rewards/margins": 7.919413089752197, |
|
"rewards/rejected": -12.369420051574707, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.593607305936073, |
|
"grad_norm": 3.9795055389404297, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": 2.6336114406585693, |
|
"logits/rejected": 2.438262462615967, |
|
"logps/chosen": -3.4108245372772217, |
|
"logps/rejected": -8.591341018676758, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.116236209869385, |
|
"rewards/margins": 7.770774841308594, |
|
"rewards/rejected": -12.887011528015137, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6012176560121766, |
|
"grad_norm": 3.288729667663574, |
|
"learning_rate": 2.290805391669212e-06, |
|
"logits/chosen": 1.6185804605484009, |
|
"logits/rejected": 1.6822645664215088, |
|
"logps/chosen": -3.2728798389434814, |
|
"logps/rejected": -7.791805267333984, |
|
"loss": 0.2329, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.909319877624512, |
|
"rewards/margins": 6.778387546539307, |
|
"rewards/rejected": -11.687707901000977, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.60882800608828, |
|
"grad_norm": 2.315206289291382, |
|
"learning_rate": 2.238678841830867e-06, |
|
"logits/chosen": 2.018578052520752, |
|
"logits/rejected": 1.8465204238891602, |
|
"logps/chosen": -3.2794997692108154, |
|
"logps/rejected": -8.164708137512207, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.919250011444092, |
|
"rewards/margins": 7.327812194824219, |
|
"rewards/rejected": -12.247061729431152, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.60882800608828, |
|
"eval_logits/chosen": 3.08111572265625, |
|
"eval_logits/rejected": 3.0503101348876953, |
|
"eval_logps/chosen": -3.4727284908294678, |
|
"eval_logps/rejected": -7.895880699157715, |
|
"eval_loss": 0.22325536608695984, |
|
"eval_rewards/accuracies": 0.9252336621284485, |
|
"eval_rewards/chosen": -5.209092617034912, |
|
"eval_rewards/margins": 6.634730339050293, |
|
"eval_rewards/rejected": -11.84382152557373, |
|
"eval_runtime": 30.7753, |
|
"eval_samples_per_second": 27.62, |
|
"eval_steps_per_second": 3.477, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 2.4823191165924072, |
|
"learning_rate": 2.186666916089239e-06, |
|
"logits/chosen": 2.6345906257629395, |
|
"logits/rejected": 2.5206990242004395, |
|
"logps/chosen": -3.123882532119751, |
|
"logps/rejected": -7.772116184234619, |
|
"loss": 0.1931, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.685823440551758, |
|
"rewards/margins": 6.972352027893066, |
|
"rewards/rejected": -11.658174514770508, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.624048706240487, |
|
"grad_norm": 1.3935630321502686, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": 2.4640917778015137, |
|
"logits/rejected": 2.531430721282959, |
|
"logps/chosen": -3.5584664344787598, |
|
"logps/rejected": -8.64016342163086, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.337700366973877, |
|
"rewards/margins": 7.622546195983887, |
|
"rewards/rejected": -12.960246086120605, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6316590563165906, |
|
"grad_norm": 2.7727415561676025, |
|
"learning_rate": 2.0830781332097446e-06, |
|
"logits/chosen": 3.5152816772460938, |
|
"logits/rejected": 3.3117728233337402, |
|
"logps/chosen": -3.820496082305908, |
|
"logps/rejected": -8.241179466247559, |
|
"loss": 0.2315, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.730743408203125, |
|
"rewards/margins": 6.631025791168213, |
|
"rewards/rejected": -12.361770629882812, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.639269406392694, |
|
"grad_norm": 3.5436055660247803, |
|
"learning_rate": 2.031546713535688e-06, |
|
"logits/chosen": 2.813575506210327, |
|
"logits/rejected": 2.650791645050049, |
|
"logps/chosen": -3.552276134490967, |
|
"logps/rejected": -8.587823867797852, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.328413963317871, |
|
"rewards/margins": 7.553321838378906, |
|
"rewards/rejected": -12.881736755371094, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6468797564687976, |
|
"grad_norm": 4.140048027038574, |
|
"learning_rate": 1.9802207729556023e-06, |
|
"logits/chosen": 2.695815324783325, |
|
"logits/rejected": 2.509413242340088, |
|
"logps/chosen": -3.706005573272705, |
|
"logps/rejected": -8.673744201660156, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.55900764465332, |
|
"rewards/margins": 7.451608180999756, |
|
"rewards/rejected": -13.01061725616455, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6468797564687976, |
|
"eval_logits/chosen": 3.0777766704559326, |
|
"eval_logits/rejected": 3.120222806930542, |
|
"eval_logps/chosen": -3.801321268081665, |
|
"eval_logps/rejected": -8.376675605773926, |
|
"eval_loss": 0.21778903901576996, |
|
"eval_rewards/accuracies": 0.9345794320106506, |
|
"eval_rewards/chosen": -5.701981067657471, |
|
"eval_rewards/margins": 6.863031387329102, |
|
"eval_rewards/rejected": -12.565014839172363, |
|
"eval_runtime": 30.7706, |
|
"eval_samples_per_second": 27.624, |
|
"eval_steps_per_second": 3.477, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.654490106544901, |
|
"grad_norm": 3.0274274349212646, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": 2.446882486343384, |
|
"logits/rejected": 2.352177143096924, |
|
"logps/chosen": -3.8477814197540283, |
|
"logps/rejected": -9.340738296508789, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.771672248840332, |
|
"rewards/margins": 8.239435195922852, |
|
"rewards/rejected": -14.011106491088867, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6621004566210046, |
|
"grad_norm": 4.4860520362854, |
|
"learning_rate": 1.8782752820878636e-06, |
|
"logits/chosen": 2.77601957321167, |
|
"logits/rejected": 2.853281021118164, |
|
"logps/chosen": -4.01420259475708, |
|
"logps/rejected": -9.463164329528809, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.021303653717041, |
|
"rewards/margins": 8.173443794250488, |
|
"rewards/rejected": -14.194747924804688, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.669710806697108, |
|
"grad_norm": 6.593935012817383, |
|
"learning_rate": 1.827700448461836e-06, |
|
"logits/chosen": 2.5073022842407227, |
|
"logits/rejected": 2.9579524993896484, |
|
"logps/chosen": -3.9602768421173096, |
|
"logps/rejected": -9.482460021972656, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.940415382385254, |
|
"rewards/margins": 8.283275604248047, |
|
"rewards/rejected": -14.2236909866333, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6773211567732116, |
|
"grad_norm": 3.101680040359497, |
|
"learning_rate": 1.7774205076388207e-06, |
|
"logits/chosen": 2.27485990524292, |
|
"logits/rejected": 2.5005619525909424, |
|
"logps/chosen": -3.828552722930908, |
|
"logps/rejected": -8.851189613342285, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.742828845977783, |
|
"rewards/margins": 7.5339555740356445, |
|
"rewards/rejected": -13.27678394317627, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 3.6742115020751953, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 2.381319761276245, |
|
"logits/rejected": 2.543203353881836, |
|
"logps/chosen": -3.4646923542022705, |
|
"logps/rejected": -9.015104293823242, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.197038173675537, |
|
"rewards/margins": 8.325616836547852, |
|
"rewards/rejected": -13.522656440734863, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"eval_logits/chosen": 3.069916009902954, |
|
"eval_logits/rejected": 3.104632616043091, |
|
"eval_logps/chosen": -3.6102888584136963, |
|
"eval_logps/rejected": -8.349987983703613, |
|
"eval_loss": 0.2161051481962204, |
|
"eval_rewards/accuracies": 0.9345794320106506, |
|
"eval_rewards/chosen": -5.415433406829834, |
|
"eval_rewards/margins": 7.1095476150512695, |
|
"eval_rewards/rejected": -12.524979591369629, |
|
"eval_runtime": 30.7844, |
|
"eval_samples_per_second": 27.611, |
|
"eval_steps_per_second": 3.476, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6925418569254186, |
|
"grad_norm": 2.9264719486236572, |
|
"learning_rate": 1.677833383153542e-06, |
|
"logits/chosen": 2.0955498218536377, |
|
"logits/rejected": 2.1619057655334473, |
|
"logps/chosen": -3.460951328277588, |
|
"logps/rejected": -8.898179054260254, |
|
"loss": 0.1687, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.191426753997803, |
|
"rewards/margins": 8.155839920043945, |
|
"rewards/rejected": -13.347267150878906, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.700152207001522, |
|
"grad_norm": 2.0860531330108643, |
|
"learning_rate": 1.6285698816954626e-06, |
|
"logits/chosen": 3.346489429473877, |
|
"logits/rejected": 3.2421345710754395, |
|
"logps/chosen": -3.63940691947937, |
|
"logps/rejected": -8.418205261230469, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.459110260009766, |
|
"rewards/margins": 7.168198585510254, |
|
"rewards/rejected": -12.627306938171387, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7077625570776256, |
|
"grad_norm": 2.778262138366699, |
|
"learning_rate": 1.5796886182883053e-06, |
|
"logits/chosen": 1.7459516525268555, |
|
"logits/rejected": 2.1579155921936035, |
|
"logps/chosen": -3.3034520149230957, |
|
"logps/rejected": -9.191883087158203, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.955178260803223, |
|
"rewards/margins": 8.832646369934082, |
|
"rewards/rejected": -13.787823677062988, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.715372907153729, |
|
"grad_norm": 3.695908546447754, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": 2.7586328983306885, |
|
"logits/rejected": 2.920232057571411, |
|
"logps/chosen": -3.6659629344940186, |
|
"logps/rejected": -8.657777786254883, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.498944282531738, |
|
"rewards/margins": 7.487723350524902, |
|
"rewards/rejected": -12.986666679382324, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7229832572298326, |
|
"grad_norm": 3.2104835510253906, |
|
"learning_rate": 1.4831583923105e-06, |
|
"logits/chosen": 2.8813366889953613, |
|
"logits/rejected": 3.21830677986145, |
|
"logps/chosen": -3.41332745552063, |
|
"logps/rejected": -8.479570388793945, |
|
"loss": 0.2074, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.119990825653076, |
|
"rewards/margins": 7.599363803863525, |
|
"rewards/rejected": -12.719354629516602, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7229832572298326, |
|
"eval_logits/chosen": 3.281101703643799, |
|
"eval_logits/rejected": 3.3282196521759033, |
|
"eval_logps/chosen": -3.6136667728424072, |
|
"eval_logps/rejected": -8.419679641723633, |
|
"eval_loss": 0.21113382279872894, |
|
"eval_rewards/accuracies": 0.9252336621284485, |
|
"eval_rewards/chosen": -5.4205002784729, |
|
"eval_rewards/margins": 7.209019660949707, |
|
"eval_rewards/rejected": -12.629520416259766, |
|
"eval_runtime": 30.777, |
|
"eval_samples_per_second": 27.618, |
|
"eval_steps_per_second": 3.477, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.730593607305936, |
|
"grad_norm": 6.422901153564453, |
|
"learning_rate": 1.4355517710873184e-06, |
|
"logits/chosen": 2.0251851081848145, |
|
"logits/rejected": 2.1296639442443848, |
|
"logps/chosen": -3.15910005569458, |
|
"logps/rejected": -8.686817169189453, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.738650321960449, |
|
"rewards/margins": 8.29157543182373, |
|
"rewards/rejected": -13.03022575378418, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7382039573820396, |
|
"grad_norm": 1.537139654159546, |
|
"learning_rate": 1.388412052037682e-06, |
|
"logits/chosen": 3.1659352779388428, |
|
"logits/rejected": 3.161760091781616, |
|
"logps/chosen": -3.755237102508545, |
|
"logps/rejected": -8.484071731567383, |
|
"loss": 0.1823, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.6328558921813965, |
|
"rewards/margins": 7.093251705169678, |
|
"rewards/rejected": -12.726107597351074, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.745814307458143, |
|
"grad_norm": 3.024386405944824, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": 2.7146401405334473, |
|
"logits/rejected": 2.9919748306274414, |
|
"logps/chosen": -3.507610321044922, |
|
"logps/rejected": -9.035795211791992, |
|
"loss": 0.2122, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.261415004730225, |
|
"rewards/margins": 8.292278289794922, |
|
"rewards/rejected": -13.553693771362305, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 3.5992636680603027, |
|
"learning_rate": 1.2956158147457116e-06, |
|
"logits/chosen": 2.9386534690856934, |
|
"logits/rejected": 2.974626064300537, |
|
"logps/chosen": -3.5809288024902344, |
|
"logps/rejected": -9.47168254852295, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.371392726898193, |
|
"rewards/margins": 8.836130142211914, |
|
"rewards/rejected": -14.20752239227295, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.76103500761035, |
|
"grad_norm": 3.0734636783599854, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"logits/chosen": 2.984003782272339, |
|
"logits/rejected": 3.055147647857666, |
|
"logps/chosen": -4.086684226989746, |
|
"logps/rejected": -9.243562698364258, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.130026817321777, |
|
"rewards/margins": 7.735315799713135, |
|
"rewards/rejected": -13.865341186523438, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.76103500761035, |
|
"eval_logits/chosen": 3.18290638923645, |
|
"eval_logits/rejected": 3.2487096786499023, |
|
"eval_logps/chosen": -3.576455593109131, |
|
"eval_logps/rejected": -8.458187103271484, |
|
"eval_loss": 0.20949731767177582, |
|
"eval_rewards/accuracies": 0.9439252614974976, |
|
"eval_rewards/chosen": -5.364683151245117, |
|
"eval_rewards/margins": 7.322596549987793, |
|
"eval_rewards/rejected": -12.687278747558594, |
|
"eval_runtime": 30.7759, |
|
"eval_samples_per_second": 27.619, |
|
"eval_steps_per_second": 3.477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7686453576864536, |
|
"grad_norm": 4.717663288116455, |
|
"learning_rate": 1.204932476567175e-06, |
|
"logits/chosen": 2.96572208404541, |
|
"logits/rejected": 2.8679356575012207, |
|
"logps/chosen": -3.6380767822265625, |
|
"logps/rejected": -8.982633590698242, |
|
"loss": 0.1686, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.457114219665527, |
|
"rewards/margins": 8.01683521270752, |
|
"rewards/rejected": -13.473950386047363, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.776255707762557, |
|
"grad_norm": 2.1752161979675293, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": 2.227342128753662, |
|
"logits/rejected": 2.576072931289673, |
|
"logps/chosen": -3.2803902626037598, |
|
"logps/rejected": -8.933613777160645, |
|
"loss": 0.1802, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.920585632324219, |
|
"rewards/margins": 8.479835510253906, |
|
"rewards/rejected": -13.400421142578125, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7838660578386606, |
|
"grad_norm": 3.252668857574463, |
|
"learning_rate": 1.11652112689164e-06, |
|
"logits/chosen": 2.558826446533203, |
|
"logits/rejected": 2.4980645179748535, |
|
"logps/chosen": -3.639244794845581, |
|
"logps/rejected": -9.068904876708984, |
|
"loss": 0.2142, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.458867073059082, |
|
"rewards/margins": 8.144488334655762, |
|
"rewards/rejected": -13.603355407714844, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.791476407914764, |
|
"grad_norm": 4.632419109344482, |
|
"learning_rate": 1.073216080788921e-06, |
|
"logits/chosen": 1.8568460941314697, |
|
"logits/rejected": 2.2069149017333984, |
|
"logps/chosen": -3.4306697845458984, |
|
"logps/rejected": -8.670662879943848, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.146004676818848, |
|
"rewards/margins": 7.859990119934082, |
|
"rewards/rejected": -13.00599479675293, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7990867579908676, |
|
"grad_norm": 4.621112823486328, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"logits/chosen": 2.480006694793701, |
|
"logits/rejected": 2.7665817737579346, |
|
"logps/chosen": -3.486351728439331, |
|
"logps/rejected": -8.768314361572266, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.229527473449707, |
|
"rewards/margins": 7.922944068908691, |
|
"rewards/rejected": -13.152471542358398, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7990867579908676, |
|
"eval_logits/chosen": 3.231480598449707, |
|
"eval_logits/rejected": 3.311223030090332, |
|
"eval_logps/chosen": -3.6841485500335693, |
|
"eval_logps/rejected": -8.640739440917969, |
|
"eval_loss": 0.20705747604370117, |
|
"eval_rewards/accuracies": 0.9252336621284485, |
|
"eval_rewards/chosen": -5.526222229003906, |
|
"eval_rewards/margins": 7.434886455535889, |
|
"eval_rewards/rejected": -12.961106300354004, |
|
"eval_runtime": 30.7718, |
|
"eval_samples_per_second": 27.623, |
|
"eval_steps_per_second": 3.477, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.806697108066971, |
|
"grad_norm": 3.463608503341675, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": 2.379103183746338, |
|
"logits/rejected": 2.5155222415924072, |
|
"logps/chosen": -3.9454503059387207, |
|
"logps/rejected": -9.293844223022461, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.918175220489502, |
|
"rewards/margins": 8.022592544555664, |
|
"rewards/rejected": -13.940768241882324, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8143074581430746, |
|
"grad_norm": 5.665669918060303, |
|
"learning_rate": 9.471305493042243e-07, |
|
"logits/chosen": 2.449711322784424, |
|
"logits/rejected": 2.758481502532959, |
|
"logps/chosen": -3.280363082885742, |
|
"logps/rejected": -8.576845169067383, |
|
"loss": 0.2166, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.920544624328613, |
|
"rewards/margins": 7.9447221755981445, |
|
"rewards/rejected": -12.865266799926758, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 5.21759033203125, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits/chosen": 2.5535693168640137, |
|
"logits/rejected": 2.780562162399292, |
|
"logps/chosen": -3.6120028495788574, |
|
"logps/rejected": -9.69387435913086, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.418004035949707, |
|
"rewards/margins": 9.122807502746582, |
|
"rewards/rejected": -14.540811538696289, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8295281582952816, |
|
"grad_norm": 3.6049561500549316, |
|
"learning_rate": 8.664484900247363e-07, |
|
"logits/chosen": 3.09112811088562, |
|
"logits/rejected": 3.2834911346435547, |
|
"logps/chosen": -3.5656745433807373, |
|
"logps/rejected": -8.692608833312988, |
|
"loss": 0.1771, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.348511695861816, |
|
"rewards/margins": 7.69040060043335, |
|
"rewards/rejected": -13.038912773132324, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.837138508371385, |
|
"grad_norm": 6.876165390014648, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": 1.9753879308700562, |
|
"logits/rejected": 2.3351285457611084, |
|
"logps/chosen": -3.95485258102417, |
|
"logps/rejected": -9.459092140197754, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.932278633117676, |
|
"rewards/margins": 8.256359100341797, |
|
"rewards/rejected": -14.188638687133789, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.837138508371385, |
|
"eval_logits/chosen": 3.2720582485198975, |
|
"eval_logits/rejected": 3.3373920917510986, |
|
"eval_logps/chosen": -3.704756498336792, |
|
"eval_logps/rejected": -8.690947532653809, |
|
"eval_loss": 0.20036298036575317, |
|
"eval_rewards/accuracies": 0.9439252614974976, |
|
"eval_rewards/chosen": -5.557135105133057, |
|
"eval_rewards/margins": 7.479285717010498, |
|
"eval_rewards/rejected": -13.036419868469238, |
|
"eval_runtime": 30.7733, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8447488584474886, |
|
"grad_norm": 5.0880126953125, |
|
"learning_rate": 7.886322351782782e-07, |
|
"logits/chosen": 2.924659013748169, |
|
"logits/rejected": 3.131314277648926, |
|
"logps/chosen": -3.665433883666992, |
|
"logps/rejected": -9.120814323425293, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.4981513023376465, |
|
"rewards/margins": 8.183070182800293, |
|
"rewards/rejected": -13.681221008300781, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.852359208523592, |
|
"grad_norm": 2.2846970558166504, |
|
"learning_rate": 7.508416487165862e-07, |
|
"logits/chosen": 1.7218126058578491, |
|
"logits/rejected": 2.0311954021453857, |
|
"logps/chosen": -3.7894043922424316, |
|
"logps/rejected": -10.045234680175781, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.684107303619385, |
|
"rewards/margins": 9.383744239807129, |
|
"rewards/rejected": -15.067851066589355, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8599695585996956, |
|
"grad_norm": 2.9382874965667725, |
|
"learning_rate": 7.138183009179922e-07, |
|
"logits/chosen": 4.044937610626221, |
|
"logits/rejected": 3.507354736328125, |
|
"logps/chosen": -3.7679309844970703, |
|
"logps/rejected": -8.613094329833984, |
|
"loss": 0.2204, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.6518964767456055, |
|
"rewards/margins": 7.267745018005371, |
|
"rewards/rejected": -12.919641494750977, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.867579908675799, |
|
"grad_norm": 3.7730531692504883, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": 2.7572951316833496, |
|
"logits/rejected": 2.948090076446533, |
|
"logps/chosen": -3.9021010398864746, |
|
"logps/rejected": -8.59765625, |
|
"loss": 0.177, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.853151798248291, |
|
"rewards/margins": 7.043332576751709, |
|
"rewards/rejected": -12.896484375, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8751902587519026, |
|
"grad_norm": 6.001643657684326, |
|
"learning_rate": 6.421379363065142e-07, |
|
"logits/chosen": 2.880664825439453, |
|
"logits/rejected": 2.9854893684387207, |
|
"logps/chosen": -3.7101027965545654, |
|
"logps/rejected": -9.663796424865723, |
|
"loss": 0.1909, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.565154075622559, |
|
"rewards/margins": 8.93053913116455, |
|
"rewards/rejected": -14.495694160461426, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8751902587519026, |
|
"eval_logits/chosen": 3.2326529026031494, |
|
"eval_logits/rejected": 3.3147149085998535, |
|
"eval_logps/chosen": -3.758188486099243, |
|
"eval_logps/rejected": -8.758670806884766, |
|
"eval_loss": 0.19985900819301605, |
|
"eval_rewards/accuracies": 0.9439252614974976, |
|
"eval_rewards/chosen": -5.637282371520996, |
|
"eval_rewards/margins": 7.500723361968994, |
|
"eval_rewards/rejected": -13.138005256652832, |
|
"eval_runtime": 30.7775, |
|
"eval_samples_per_second": 27.618, |
|
"eval_steps_per_second": 3.477, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.882800608828006, |
|
"grad_norm": 2.555748224258423, |
|
"learning_rate": 6.075123608706093e-07, |
|
"logits/chosen": 2.5560011863708496, |
|
"logits/rejected": 2.8420631885528564, |
|
"logps/chosen": -3.545081377029419, |
|
"logps/rejected": -9.308977127075195, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.317622184753418, |
|
"rewards/margins": 8.645845413208008, |
|
"rewards/rejected": -13.963467597961426, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 2.0732498168945312, |
|
"learning_rate": 5.737168930605272e-07, |
|
"logits/chosen": 3.55595326423645, |
|
"logits/rejected": 3.61430025100708, |
|
"logps/chosen": -3.8471646308898926, |
|
"logps/rejected": -9.333372116088867, |
|
"loss": 0.2348, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.770747661590576, |
|
"rewards/margins": 8.229310989379883, |
|
"rewards/rejected": -14.0000581741333, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.898021308980213, |
|
"grad_norm": 3.7742130756378174, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": 3.0770957469940186, |
|
"logits/rejected": 3.1060760021209717, |
|
"logps/chosen": -3.524754285812378, |
|
"logps/rejected": -9.05833625793457, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.2871317863464355, |
|
"rewards/margins": 8.300371170043945, |
|
"rewards/rejected": -13.587503433227539, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9056316590563166, |
|
"grad_norm": 3.0568952560424805, |
|
"learning_rate": 5.086752049395094e-07, |
|
"logits/chosen": 2.4866814613342285, |
|
"logits/rejected": 2.541372537612915, |
|
"logps/chosen": -3.6101276874542236, |
|
"logps/rejected": -8.999945640563965, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.415192127227783, |
|
"rewards/margins": 8.08472728729248, |
|
"rewards/rejected": -13.499918937683105, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.91324200913242, |
|
"grad_norm": 2.707932949066162, |
|
"learning_rate": 4.774575140626317e-07, |
|
"logits/chosen": 3.445946216583252, |
|
"logits/rejected": 3.263925075531006, |
|
"logps/chosen": -4.120098114013672, |
|
"logps/rejected": -9.42192554473877, |
|
"loss": 0.1671, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.18014669418335, |
|
"rewards/margins": 7.952740669250488, |
|
"rewards/rejected": -14.13288688659668, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.91324200913242, |
|
"eval_logits/chosen": 3.2238974571228027, |
|
"eval_logits/rejected": 3.331603527069092, |
|
"eval_logps/chosen": -3.8190717697143555, |
|
"eval_logps/rejected": -8.885968208312988, |
|
"eval_loss": 0.1992855668067932, |
|
"eval_rewards/accuracies": 0.9532710313796997, |
|
"eval_rewards/chosen": -5.728607654571533, |
|
"eval_rewards/margins": 7.600344181060791, |
|
"eval_rewards/rejected": -13.328951835632324, |
|
"eval_runtime": 30.7793, |
|
"eval_samples_per_second": 27.616, |
|
"eval_steps_per_second": 3.476, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9208523592085236, |
|
"grad_norm": 4.369202136993408, |
|
"learning_rate": 4.4712697716573994e-07, |
|
"logits/chosen": 2.0237908363342285, |
|
"logits/rejected": 2.4287328720092773, |
|
"logps/chosen": -3.881247043609619, |
|
"logps/rejected": -8.949652671813965, |
|
"loss": 0.1802, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.82187032699585, |
|
"rewards/margins": 7.602608680725098, |
|
"rewards/rejected": -13.424479484558105, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.928462709284627, |
|
"grad_norm": 2.347022771835327, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": 2.990731716156006, |
|
"logits/rejected": 3.2576987743377686, |
|
"logps/chosen": -3.7282702922821045, |
|
"logps/rejected": -9.247597694396973, |
|
"loss": 0.1749, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.592406272888184, |
|
"rewards/margins": 8.278989791870117, |
|
"rewards/rejected": -13.871397018432617, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9360730593607306, |
|
"grad_norm": 5.136405944824219, |
|
"learning_rate": 3.891801862449629e-07, |
|
"logits/chosen": 3.323019027709961, |
|
"logits/rejected": 3.3164806365966797, |
|
"logps/chosen": -4.364622592926025, |
|
"logps/rejected": -8.973845481872559, |
|
"loss": 0.1725, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.546933650970459, |
|
"rewards/margins": 6.9138336181640625, |
|
"rewards/rejected": -13.46076774597168, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.943683409436834, |
|
"grad_norm": 3.7929859161376953, |
|
"learning_rate": 3.615893495987335e-07, |
|
"logits/chosen": 2.6891913414001465, |
|
"logits/rejected": 2.934324264526367, |
|
"logps/chosen": -3.581650972366333, |
|
"logps/rejected": -9.332942008972168, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.372476100921631, |
|
"rewards/margins": 8.626935958862305, |
|
"rewards/rejected": -13.999414443969727, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9512937595129376, |
|
"grad_norm": 3.160444974899292, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"logits/chosen": 3.7613556385040283, |
|
"logits/rejected": 3.6090006828308105, |
|
"logps/chosen": -3.6866767406463623, |
|
"logps/rejected": -8.86094856262207, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.530014991760254, |
|
"rewards/margins": 7.76140832901001, |
|
"rewards/rejected": -13.291422843933105, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9512937595129376, |
|
"eval_logits/chosen": 3.236210584640503, |
|
"eval_logits/rejected": 3.349398374557495, |
|
"eval_logps/chosen": -3.790215253829956, |
|
"eval_logps/rejected": -8.857583045959473, |
|
"eval_loss": 0.20043766498565674, |
|
"eval_rewards/accuracies": 0.9439252614974976, |
|
"eval_rewards/chosen": -5.6853227615356445, |
|
"eval_rewards/margins": 7.601051330566406, |
|
"eval_rewards/rejected": -13.286373138427734, |
|
"eval_runtime": 30.7834, |
|
"eval_samples_per_second": 27.612, |
|
"eval_steps_per_second": 3.476, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 2.2813334465026855, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": 2.8569092750549316, |
|
"logits/rejected": 2.8767528533935547, |
|
"logps/chosen": -3.3697330951690674, |
|
"logps/rejected": -9.593984603881836, |
|
"loss": 0.1905, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.054598808288574, |
|
"rewards/margins": 9.33637809753418, |
|
"rewards/rejected": -14.39097785949707, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9665144596651446, |
|
"grad_norm": 2.9309756755828857, |
|
"learning_rate": 2.844910519219632e-07, |
|
"logits/chosen": 2.750169038772583, |
|
"logits/rejected": 2.809312105178833, |
|
"logps/chosen": -3.7196624279022217, |
|
"logps/rejected": -8.989494323730469, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.579493999481201, |
|
"rewards/margins": 7.90474796295166, |
|
"rewards/rejected": -13.484240531921387, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.974124809741248, |
|
"grad_norm": 5.769137382507324, |
|
"learning_rate": 2.6072059940146775e-07, |
|
"logits/chosen": 2.346038579940796, |
|
"logits/rejected": 2.589761257171631, |
|
"logps/chosen": -3.7383828163146973, |
|
"logps/rejected": -9.559690475463867, |
|
"loss": 0.2178, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.607574939727783, |
|
"rewards/margins": 8.731962203979492, |
|
"rewards/rejected": -14.339536666870117, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9817351598173516, |
|
"grad_norm": 4.260366916656494, |
|
"learning_rate": 2.3793236883495164e-07, |
|
"logits/chosen": 3.3040289878845215, |
|
"logits/rejected": 3.387838363647461, |
|
"logps/chosen": -3.9504966735839844, |
|
"logps/rejected": -10.158651351928711, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.925745010375977, |
|
"rewards/margins": 9.312231063842773, |
|
"rewards/rejected": -15.23797607421875, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.989345509893455, |
|
"grad_norm": 3.250828981399536, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": 2.380584478378296, |
|
"logits/rejected": 2.4195797443389893, |
|
"logps/chosen": -3.5591416358947754, |
|
"logps/rejected": -9.089366912841797, |
|
"loss": 0.2022, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.338712692260742, |
|
"rewards/margins": 8.295337677001953, |
|
"rewards/rejected": -13.634050369262695, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.989345509893455, |
|
"eval_logits/chosen": 3.2631280422210693, |
|
"eval_logits/rejected": 3.3756356239318848, |
|
"eval_logps/chosen": -3.846240758895874, |
|
"eval_logps/rejected": -8.898624420166016, |
|
"eval_loss": 0.19997116923332214, |
|
"eval_rewards/accuracies": 0.9439252614974976, |
|
"eval_rewards/chosen": -5.769360542297363, |
|
"eval_rewards/margins": 7.578575611114502, |
|
"eval_rewards/rejected": -13.347936630249023, |
|
"eval_runtime": 30.7804, |
|
"eval_samples_per_second": 27.615, |
|
"eval_steps_per_second": 3.476, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9969558599695586, |
|
"grad_norm": 5.129979133605957, |
|
"learning_rate": 1.95342121028749e-07, |
|
"logits/chosen": 2.7255783081054688, |
|
"logits/rejected": 2.9942080974578857, |
|
"logps/chosen": -3.922934055328369, |
|
"logps/rejected": -9.830081939697266, |
|
"loss": 0.1968, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.884400367736816, |
|
"rewards/margins": 8.860723495483398, |
|
"rewards/rejected": -14.745122909545898, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.004566210045662, |
|
"grad_norm": 3.166839361190796, |
|
"learning_rate": 1.7555878527937164e-07, |
|
"logits/chosen": 2.710036039352417, |
|
"logits/rejected": 3.067323923110962, |
|
"logps/chosen": -3.608882188796997, |
|
"logps/rejected": -9.708028793334961, |
|
"loss": 0.1895, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.413322925567627, |
|
"rewards/margins": 9.148719787597656, |
|
"rewards/rejected": -14.562042236328125, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.0121765601217656, |
|
"grad_norm": 3.210637331008911, |
|
"learning_rate": 1.567950262702714e-07, |
|
"logits/chosen": 3.1890697479248047, |
|
"logits/rejected": 3.3442039489746094, |
|
"logps/chosen": -3.6700127124786377, |
|
"logps/rejected": -9.200743675231934, |
|
"loss": 0.1453, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.505018711090088, |
|
"rewards/margins": 8.296096801757812, |
|
"rewards/rejected": -13.801115036010742, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.0197869101978692, |
|
"grad_norm": 8.037925720214844, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": 2.9865851402282715, |
|
"logits/rejected": 3.3031258583068848, |
|
"logps/chosen": -3.9770216941833496, |
|
"logps/rejected": -9.31564998626709, |
|
"loss": 0.2209, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.9655327796936035, |
|
"rewards/margins": 8.007943153381348, |
|
"rewards/rejected": -13.973475456237793, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.0273972602739727, |
|
"grad_norm": 5.313143730163574, |
|
"learning_rate": 1.223587092621162e-07, |
|
"logits/chosen": 2.790269374847412, |
|
"logits/rejected": 2.7210495471954346, |
|
"logps/chosen": -4.057877540588379, |
|
"logps/rejected": -9.197371482849121, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.08681583404541, |
|
"rewards/margins": 7.7092413902282715, |
|
"rewards/rejected": -13.796056747436523, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0273972602739727, |
|
"eval_logits/chosen": 3.250354051589966, |
|
"eval_logits/rejected": 3.3716177940368652, |
|
"eval_logps/chosen": -3.8652184009552, |
|
"eval_logps/rejected": -8.942809104919434, |
|
"eval_loss": 0.19846539199352264, |
|
"eval_rewards/accuracies": 0.9532710313796997, |
|
"eval_rewards/chosen": -5.797828197479248, |
|
"eval_rewards/margins": 7.6163859367370605, |
|
"eval_rewards/rejected": -13.414214134216309, |
|
"eval_runtime": 30.7783, |
|
"eval_samples_per_second": 27.617, |
|
"eval_steps_per_second": 3.476, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.035007610350076, |
|
"grad_norm": 1.5798014402389526, |
|
"learning_rate": 1.067012561698319e-07, |
|
"logits/chosen": 2.5493528842926025, |
|
"logits/rejected": 2.98887038230896, |
|
"logps/chosen": -4.075139045715332, |
|
"logps/rejected": -9.38802433013916, |
|
"loss": 0.2168, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.112709045410156, |
|
"rewards/margins": 7.9693284034729, |
|
"rewards/rejected": -14.082036972045898, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.0426179604261796, |
|
"grad_norm": 4.165173053741455, |
|
"learning_rate": 9.209358300585474e-08, |
|
"logits/chosen": 2.2085108757019043, |
|
"logits/rejected": 2.3749470710754395, |
|
"logps/chosen": -3.804321765899658, |
|
"logps/rejected": -9.639230728149414, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.70648193359375, |
|
"rewards/margins": 8.752363204956055, |
|
"rewards/rejected": -14.458845138549805, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.0502283105022832, |
|
"grad_norm": 3.1482503414154053, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": 2.711009979248047, |
|
"logits/rejected": 2.975520610809326, |
|
"logps/chosen": -3.8621745109558105, |
|
"logps/rejected": -9.884195327758789, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.793261528015137, |
|
"rewards/margins": 9.033029556274414, |
|
"rewards/rejected": -14.826292037963867, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0578386605783865, |
|
"grad_norm": 2.6377687454223633, |
|
"learning_rate": 6.605274281709929e-08, |
|
"logits/chosen": 2.538268804550171, |
|
"logits/rejected": 2.8993492126464844, |
|
"logps/chosen": -4.549342632293701, |
|
"logps/rejected": -10.355640411376953, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.824014186859131, |
|
"rewards/margins": 8.70944595336914, |
|
"rewards/rejected": -15.53346061706543, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.06544901065449, |
|
"grad_norm": 3.0432941913604736, |
|
"learning_rate": 5.463099816548578e-08, |
|
"logits/chosen": 2.583897113800049, |
|
"logits/rejected": 2.744657278060913, |
|
"logps/chosen": -3.9556992053985596, |
|
"logps/rejected": -9.812037467956543, |
|
"loss": 0.1754, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.933548450469971, |
|
"rewards/margins": 8.784506797790527, |
|
"rewards/rejected": -14.718053817749023, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.06544901065449, |
|
"eval_logits/chosen": 3.22530198097229, |
|
"eval_logits/rejected": 3.359201192855835, |
|
"eval_logps/chosen": -3.870316743850708, |
|
"eval_logps/rejected": -8.951937675476074, |
|
"eval_loss": 0.198856920003891, |
|
"eval_rewards/accuracies": 0.9532710313796997, |
|
"eval_rewards/chosen": -5.80547571182251, |
|
"eval_rewards/margins": 7.622430801391602, |
|
"eval_rewards/rejected": -13.42790699005127, |
|
"eval_runtime": 30.7767, |
|
"eval_samples_per_second": 27.618, |
|
"eval_steps_per_second": 3.477, |
|
"step": 1400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.258274444010848e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|