|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 5.353972534143438, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.6582446098327637, |
|
"logits/rejected": -2.612395763397217, |
|
"logps/chosen": -310.3081359863281, |
|
"logps/rejected": -241.6246337890625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.00043685571290552616, |
|
"rewards/margins": -0.0005496515659615397, |
|
"rewards/rejected": 0.0001127958094002679, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 6.431385284276218, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.690976142883301, |
|
"logits/rejected": -2.615501880645752, |
|
"logps/chosen": -293.55859375, |
|
"logps/rejected": -265.65789794921875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.001354431384243071, |
|
"rewards/margins": 0.0023786118254065514, |
|
"rewards/rejected": -0.0010241802083328366, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 5.140938328988767, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6976418495178223, |
|
"logits/rejected": -2.6304168701171875, |
|
"logps/chosen": -277.8341064453125, |
|
"logps/rejected": -297.1772155761719, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004078004974871874, |
|
"rewards/margins": 0.009664928540587425, |
|
"rewards/rejected": -0.005586923565715551, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 5.971632655809275, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.616170883178711, |
|
"logits/rejected": -2.5451369285583496, |
|
"logps/chosen": -283.9632568359375, |
|
"logps/rejected": -259.82861328125, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.03654901683330536, |
|
"rewards/margins": 0.045721281319856644, |
|
"rewards/rejected": -0.00917226541787386, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 5.916656852320022, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.5970985889434814, |
|
"logits/rejected": -2.5133914947509766, |
|
"logps/chosen": -285.24835205078125, |
|
"logps/rejected": -247.303466796875, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.006145569030195475, |
|
"rewards/margins": 0.0578111931681633, |
|
"rewards/rejected": -0.06395676732063293, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.6066324710845947, |
|
"eval_logits/rejected": -2.506901979446411, |
|
"eval_logps/chosen": -286.6465759277344, |
|
"eval_logps/rejected": -258.62078857421875, |
|
"eval_loss": 0.6561177968978882, |
|
"eval_rewards/accuracies": 0.6767241358757019, |
|
"eval_rewards/chosen": -0.02640603668987751, |
|
"eval_rewards/margins": 0.10332722216844559, |
|
"eval_rewards/rejected": -0.12973324954509735, |
|
"eval_runtime": 91.0244, |
|
"eval_samples_per_second": 19.973, |
|
"eval_steps_per_second": 0.319, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 7.499634288772489, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.582371234893799, |
|
"logits/rejected": -2.5081627368927, |
|
"logps/chosen": -292.10491943359375, |
|
"logps/rejected": -282.31195068359375, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11516664922237396, |
|
"rewards/margins": 0.19041100144386292, |
|
"rewards/rejected": -0.30557766556739807, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 17.30037068758165, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.4502875804901123, |
|
"logits/rejected": -2.3790054321289062, |
|
"logps/chosen": -298.32244873046875, |
|
"logps/rejected": -273.11859130859375, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2997247576713562, |
|
"rewards/margins": 0.23786215484142303, |
|
"rewards/rejected": -0.5375869870185852, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 11.228813057299567, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.5100908279418945, |
|
"logits/rejected": -2.4027259349823, |
|
"logps/chosen": -306.67510986328125, |
|
"logps/rejected": -322.7925720214844, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.36647987365722656, |
|
"rewards/margins": 0.28450754284858704, |
|
"rewards/rejected": -0.6509873867034912, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 15.58920411413326, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -1.5505931377410889, |
|
"logits/rejected": -1.3694034814834595, |
|
"logps/chosen": -357.6716613769531, |
|
"logps/rejected": -372.05133056640625, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5348917245864868, |
|
"rewards/margins": 0.4341323971748352, |
|
"rewards/rejected": -0.9690243005752563, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 13.006583087547677, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -0.7837198972702026, |
|
"logits/rejected": -0.35428792238235474, |
|
"logps/chosen": -326.9918518066406, |
|
"logps/rejected": -346.36737060546875, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4772585332393646, |
|
"rewards/margins": 0.4704399108886719, |
|
"rewards/rejected": -0.9476984143257141, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": -0.09357786923646927, |
|
"eval_logits/rejected": 0.4795497953891754, |
|
"eval_logps/chosen": -374.6986083984375, |
|
"eval_logps/rejected": -392.9424743652344, |
|
"eval_loss": 0.5813368558883667, |
|
"eval_rewards/accuracies": 0.6724137663841248, |
|
"eval_rewards/chosen": -0.9069267511367798, |
|
"eval_rewards/margins": 0.5660232305526733, |
|
"eval_rewards/rejected": -1.4729499816894531, |
|
"eval_runtime": 91.4662, |
|
"eval_samples_per_second": 19.876, |
|
"eval_steps_per_second": 0.317, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 14.7655268267239, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -0.23613190650939941, |
|
"logits/rejected": 0.2948758006095886, |
|
"logps/chosen": -371.15667724609375, |
|
"logps/rejected": -427.76885986328125, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7391853332519531, |
|
"rewards/margins": 0.7208150625228882, |
|
"rewards/rejected": -1.4600005149841309, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 28.232913631245626, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": 0.3172193467617035, |
|
"logits/rejected": 1.228100299835205, |
|
"logps/chosen": -412.1929626464844, |
|
"logps/rejected": -428.08056640625, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0178120136260986, |
|
"rewards/margins": 0.6625908613204956, |
|
"rewards/rejected": -1.6804027557373047, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 23.106046920597972, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": 1.296276330947876, |
|
"logits/rejected": 2.0952706336975098, |
|
"logps/chosen": -377.18572998046875, |
|
"logps/rejected": -435.5022888183594, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8953048586845398, |
|
"rewards/margins": 0.8917394876480103, |
|
"rewards/rejected": -1.7870445251464844, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 21.3617509080007, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": 1.0354318618774414, |
|
"logits/rejected": 2.103768825531006, |
|
"logps/chosen": -374.42938232421875, |
|
"logps/rejected": -408.32342529296875, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9035611152648926, |
|
"rewards/margins": 0.7404158115386963, |
|
"rewards/rejected": -1.643977165222168, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 21.53708307235743, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": 0.8976553678512573, |
|
"logits/rejected": 2.0599629878997803, |
|
"logps/chosen": -370.0615234375, |
|
"logps/rejected": -411.820068359375, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7934576869010925, |
|
"rewards/margins": 0.716931939125061, |
|
"rewards/rejected": -1.5103896856307983, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": 1.9097994565963745, |
|
"eval_logits/rejected": 2.9840593338012695, |
|
"eval_logps/chosen": -382.4128112792969, |
|
"eval_logps/rejected": -426.216552734375, |
|
"eval_loss": 0.5533820390701294, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -0.9840683937072754, |
|
"eval_rewards/margins": 0.8216219544410706, |
|
"eval_rewards/rejected": -1.8056902885437012, |
|
"eval_runtime": 91.5586, |
|
"eval_samples_per_second": 19.856, |
|
"eval_steps_per_second": 0.317, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 17.634177990437703, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": 1.8957267999649048, |
|
"logits/rejected": 2.8357367515563965, |
|
"logps/chosen": -408.04705810546875, |
|
"logps/rejected": -450.3290100097656, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1164201498031616, |
|
"rewards/margins": 0.8357731103897095, |
|
"rewards/rejected": -1.952193021774292, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 24.6798606158854, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": 1.3192155361175537, |
|
"logits/rejected": 2.3843648433685303, |
|
"logps/chosen": -356.1672058105469, |
|
"logps/rejected": -387.1358947753906, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9024198651313782, |
|
"rewards/margins": 0.7546060681343079, |
|
"rewards/rejected": -1.6570260524749756, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 22.218831723434445, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": 1.3455697298049927, |
|
"logits/rejected": 2.5438590049743652, |
|
"logps/chosen": -351.62774658203125, |
|
"logps/rejected": -430.138427734375, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9148648381233215, |
|
"rewards/margins": 0.9510505795478821, |
|
"rewards/rejected": -1.8659156560897827, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 25.297690497973328, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": 1.7387921810150146, |
|
"logits/rejected": 2.998396396636963, |
|
"logps/chosen": -333.116455078125, |
|
"logps/rejected": -437.8639221191406, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7444877624511719, |
|
"rewards/margins": 1.007294774055481, |
|
"rewards/rejected": -1.7517824172973633, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 25.947440207471573, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": 1.9480648040771484, |
|
"logits/rejected": 2.9033870697021484, |
|
"logps/chosen": -380.21185302734375, |
|
"logps/rejected": -424.3128967285156, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1435054540634155, |
|
"rewards/margins": 0.736918032169342, |
|
"rewards/rejected": -1.8804235458374023, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": 2.4303267002105713, |
|
"eval_logits/rejected": 3.876626491546631, |
|
"eval_logps/chosen": -425.8599853515625, |
|
"eval_logps/rejected": -481.8061828613281, |
|
"eval_loss": 0.5367683172225952, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -1.4185398817062378, |
|
"eval_rewards/margins": 0.9430465698242188, |
|
"eval_rewards/rejected": -2.361586570739746, |
|
"eval_runtime": 91.0832, |
|
"eval_samples_per_second": 19.96, |
|
"eval_steps_per_second": 0.318, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 19.563478098052215, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": 3.3911328315734863, |
|
"logits/rejected": 4.194566249847412, |
|
"logps/chosen": -451.25238037109375, |
|
"logps/rejected": -539.4793701171875, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.744564414024353, |
|
"rewards/margins": 0.8036998510360718, |
|
"rewards/rejected": -2.548264503479004, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 24.94888206530693, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": 2.1577422618865967, |
|
"logits/rejected": 3.670943021774292, |
|
"logps/chosen": -426.3487854003906, |
|
"logps/rejected": -508.80780029296875, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.5799643993377686, |
|
"rewards/margins": 0.9677878618240356, |
|
"rewards/rejected": -2.5477521419525146, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 23.826139773858404, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": 0.8777297735214233, |
|
"logits/rejected": 2.3443570137023926, |
|
"logps/chosen": -384.8174743652344, |
|
"logps/rejected": -467.87298583984375, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1064906120300293, |
|
"rewards/margins": 1.0363706350326538, |
|
"rewards/rejected": -2.1428613662719727, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 18.150746967508407, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 2.9278922080993652, |
|
"logits/rejected": 3.511791706085205, |
|
"logps/chosen": -450.8653259277344, |
|
"logps/rejected": -547.5108642578125, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.810927391052246, |
|
"rewards/margins": 1.0066088438034058, |
|
"rewards/rejected": -2.8175363540649414, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 21.73286552315769, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": 2.6097209453582764, |
|
"logits/rejected": 3.895547389984131, |
|
"logps/chosen": -484.319091796875, |
|
"logps/rejected": -561.3115234375, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.1941843032836914, |
|
"rewards/margins": 0.9737985730171204, |
|
"rewards/rejected": -3.167982578277588, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": 2.454780101776123, |
|
"eval_logits/rejected": 4.2408928871154785, |
|
"eval_logps/chosen": -519.5304565429688, |
|
"eval_logps/rejected": -602.6265869140625, |
|
"eval_loss": 0.5234553217887878, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -2.3552448749542236, |
|
"eval_rewards/margins": 1.2145458459854126, |
|
"eval_rewards/rejected": -3.5697906017303467, |
|
"eval_runtime": 91.7528, |
|
"eval_samples_per_second": 19.814, |
|
"eval_steps_per_second": 0.316, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 29.190042715796082, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": 2.5975215435028076, |
|
"logits/rejected": 4.360453128814697, |
|
"logps/chosen": -554.3533935546875, |
|
"logps/rejected": -655.7716064453125, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5185165405273438, |
|
"rewards/margins": 1.447409987449646, |
|
"rewards/rejected": -3.9659264087677, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 20.081922974931803, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": 2.110996723175049, |
|
"logits/rejected": 3.4121272563934326, |
|
"logps/chosen": -542.6881103515625, |
|
"logps/rejected": -613.2174682617188, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.664341449737549, |
|
"rewards/margins": 0.9934176206588745, |
|
"rewards/rejected": -3.657759189605713, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 20.797156741141926, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": 1.95541250705719, |
|
"logits/rejected": 3.574702024459839, |
|
"logps/chosen": -541.0426025390625, |
|
"logps/rejected": -644.5146484375, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.637174129486084, |
|
"rewards/margins": 1.117336630821228, |
|
"rewards/rejected": -3.7545104026794434, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 22.250484161252675, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": 1.9210926294326782, |
|
"logits/rejected": 3.2735812664031982, |
|
"logps/chosen": -576.992431640625, |
|
"logps/rejected": -663.1363525390625, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8690013885498047, |
|
"rewards/margins": 1.0208569765090942, |
|
"rewards/rejected": -3.8898582458496094, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 24.39105290704078, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": 1.646095871925354, |
|
"logits/rejected": 3.443913221359253, |
|
"logps/chosen": -558.287353515625, |
|
"logps/rejected": -657.3651123046875, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.7306606769561768, |
|
"rewards/margins": 1.3434104919433594, |
|
"rewards/rejected": -4.074070930480957, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": 1.1080348491668701, |
|
"eval_logits/rejected": 3.015399694442749, |
|
"eval_logps/chosen": -535.3407592773438, |
|
"eval_logps/rejected": -617.0262451171875, |
|
"eval_loss": 0.5116756558418274, |
|
"eval_rewards/accuracies": 0.7198275923728943, |
|
"eval_rewards/chosen": -2.513347864151001, |
|
"eval_rewards/margins": 1.200439691543579, |
|
"eval_rewards/rejected": -3.713787794113159, |
|
"eval_runtime": 91.1655, |
|
"eval_samples_per_second": 19.942, |
|
"eval_steps_per_second": 0.318, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 27.989896858900266, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": 2.1850762367248535, |
|
"logits/rejected": 3.5708484649658203, |
|
"logps/chosen": -533.7103271484375, |
|
"logps/rejected": -638.1661376953125, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7955188751220703, |
|
"rewards/margins": 1.163648247718811, |
|
"rewards/rejected": -3.959167003631592, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 26.423781855718417, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": 1.8076130151748657, |
|
"logits/rejected": 4.071971893310547, |
|
"logps/chosen": -614.3458251953125, |
|
"logps/rejected": -702.5189819335938, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.0845131874084473, |
|
"rewards/margins": 1.4407538175582886, |
|
"rewards/rejected": -4.525267601013184, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 24.484829935546955, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": 2.0375962257385254, |
|
"logits/rejected": 3.5112037658691406, |
|
"logps/chosen": -607.512939453125, |
|
"logps/rejected": -720.7943115234375, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.2878499031066895, |
|
"rewards/margins": 1.2010066509246826, |
|
"rewards/rejected": -4.488856315612793, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 23.41145949202815, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": 1.657248854637146, |
|
"logits/rejected": 3.65797758102417, |
|
"logps/chosen": -553.1179809570312, |
|
"logps/rejected": -692.9093017578125, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.909062623977661, |
|
"rewards/margins": 1.4310705661773682, |
|
"rewards/rejected": -4.340132713317871, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 28.908000369301963, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": 0.9193560481071472, |
|
"logits/rejected": 3.4261555671691895, |
|
"logps/chosen": -548.4246215820312, |
|
"logps/rejected": -669.2120361328125, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.572448968887329, |
|
"rewards/margins": 1.67291259765625, |
|
"rewards/rejected": -4.245361804962158, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": 0.7126501202583313, |
|
"eval_logits/rejected": 2.685429096221924, |
|
"eval_logps/chosen": -547.6198120117188, |
|
"eval_logps/rejected": -636.6490478515625, |
|
"eval_loss": 0.5116574168205261, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -2.636138677597046, |
|
"eval_rewards/margins": 1.2738765478134155, |
|
"eval_rewards/rejected": -3.9100148677825928, |
|
"eval_runtime": 90.9536, |
|
"eval_samples_per_second": 19.988, |
|
"eval_steps_per_second": 0.319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 24.395714355317615, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": 0.9300888180732727, |
|
"logits/rejected": 2.3748581409454346, |
|
"logps/chosen": -543.5307006835938, |
|
"logps/rejected": -659.3692016601562, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6378746032714844, |
|
"rewards/margins": 1.2973625659942627, |
|
"rewards/rejected": -3.935237169265747, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 22.09369129566989, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": 0.2887948155403137, |
|
"logits/rejected": 2.4889461994171143, |
|
"logps/chosen": -591.7805786132812, |
|
"logps/rejected": -667.9407348632812, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5994677543640137, |
|
"rewards/margins": 1.3468105792999268, |
|
"rewards/rejected": -3.9462783336639404, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 26.209345843998328, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": 0.8550162315368652, |
|
"logits/rejected": 3.1779205799102783, |
|
"logps/chosen": -580.2351684570312, |
|
"logps/rejected": -637.8566284179688, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.8695898056030273, |
|
"rewards/margins": 1.257644534111023, |
|
"rewards/rejected": -4.127234935760498, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 21.12858235158005, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": 0.47364893555641174, |
|
"logits/rejected": 2.78879451751709, |
|
"logps/chosen": -587.971435546875, |
|
"logps/rejected": -724.6598510742188, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.7999932765960693, |
|
"rewards/margins": 1.9162429571151733, |
|
"rewards/rejected": -4.716236591339111, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 23.688779288096637, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": 1.2185232639312744, |
|
"logits/rejected": 2.7246413230895996, |
|
"logps/chosen": -606.762939453125, |
|
"logps/rejected": -717.7750244140625, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.1490612030029297, |
|
"rewards/margins": 1.1964399814605713, |
|
"rewards/rejected": -4.345500946044922, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": 0.863433837890625, |
|
"eval_logits/rejected": 2.9146454334259033, |
|
"eval_logps/chosen": -575.282958984375, |
|
"eval_logps/rejected": -674.3233032226562, |
|
"eval_loss": 0.5098804235458374, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -2.9127700328826904, |
|
"eval_rewards/margins": 1.3739889860153198, |
|
"eval_rewards/rejected": -4.286758899688721, |
|
"eval_runtime": 91.1821, |
|
"eval_samples_per_second": 19.938, |
|
"eval_steps_per_second": 0.318, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 23.71509307695075, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": 1.313011884689331, |
|
"logits/rejected": 2.788435459136963, |
|
"logps/chosen": -609.2062377929688, |
|
"logps/rejected": -684.1138305664062, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.2180449962615967, |
|
"rewards/margins": 1.0637754201889038, |
|
"rewards/rejected": -4.281820297241211, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 27.57431143958426, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": 1.1412100791931152, |
|
"logits/rejected": 3.4668610095977783, |
|
"logps/chosen": -616.3365478515625, |
|
"logps/rejected": -685.5581665039062, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.115288496017456, |
|
"rewards/margins": 1.404497504234314, |
|
"rewards/rejected": -4.5197858810424805, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 19.004038331375536, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": 1.3194568157196045, |
|
"logits/rejected": 2.8764185905456543, |
|
"logps/chosen": -561.5572509765625, |
|
"logps/rejected": -680.3653564453125, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9964187145233154, |
|
"rewards/margins": 1.2785086631774902, |
|
"rewards/rejected": -4.274927616119385, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5604413999330013, |
|
"train_runtime": 11415.5934, |
|
"train_samples_per_second": 4.884, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|