|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9976762199845082, |
|
"eval_steps": 100, |
|
"global_step": 322, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5151515151515152e-08, |
|
"logits/chosen": -3.641601800918579, |
|
"logits/rejected": -3.704906940460205, |
|
"logps/chosen": -318.3046875, |
|
"logps/rejected": -247.966064453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5151515151515152e-07, |
|
"logits/chosen": -3.612096071243286, |
|
"logits/rejected": -3.6512341499328613, |
|
"logps/chosen": -312.1371765136719, |
|
"logps/rejected": -262.4918212890625, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.4149305522441864, |
|
"rewards/chosen": 0.0010832061525434256, |
|
"rewards/margins": -0.0016401761677116156, |
|
"rewards/rejected": 0.002723382320255041, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"logits/chosen": -3.567340135574341, |
|
"logits/rejected": -3.6191534996032715, |
|
"logps/chosen": -303.0663757324219, |
|
"logps/rejected": -258.91925048828125, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": 0.004150650463998318, |
|
"rewards/margins": 0.0020177571568638086, |
|
"rewards/rejected": 0.00213289400562644, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -3.6112682819366455, |
|
"logits/rejected": -3.659569263458252, |
|
"logps/chosen": -317.9393310546875, |
|
"logps/rejected": -264.5162048339844, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.01230341475456953, |
|
"rewards/margins": 0.005747257731854916, |
|
"rewards/rejected": 0.006556157022714615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.878892733564014e-07, |
|
"logits/chosen": -3.586892604827881, |
|
"logits/rejected": -3.6325364112854004, |
|
"logps/chosen": -332.3243408203125, |
|
"logps/rejected": -269.69085693359375, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.573437511920929, |
|
"rewards/chosen": 0.02542785368859768, |
|
"rewards/margins": 0.01690300740301609, |
|
"rewards/rejected": 0.008524848148226738, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.705882352941176e-07, |
|
"logits/chosen": -3.6010265350341797, |
|
"logits/rejected": -3.644479274749756, |
|
"logps/chosen": -302.1268615722656, |
|
"logps/rejected": -262.81085205078125, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.04483611881732941, |
|
"rewards/margins": 0.03348752111196518, |
|
"rewards/rejected": 0.011348598636686802, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5328719723183387e-07, |
|
"logits/chosen": -3.59093976020813, |
|
"logits/rejected": -3.6495633125305176, |
|
"logps/chosen": -298.66375732421875, |
|
"logps/rejected": -249.2029266357422, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06126219779253006, |
|
"rewards/margins": 0.04494406655430794, |
|
"rewards/rejected": 0.016318131238222122, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.359861591695502e-07, |
|
"logits/chosen": -3.5717296600341797, |
|
"logits/rejected": -3.613762378692627, |
|
"logps/chosen": -296.14752197265625, |
|
"logps/rejected": -251.984619140625, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": 0.07934962958097458, |
|
"rewards/margins": 0.05292888730764389, |
|
"rewards/rejected": 0.026420753449201584, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.186851211072664e-07, |
|
"logits/chosen": -3.5886504650115967, |
|
"logits/rejected": -3.6460018157958984, |
|
"logps/chosen": -296.4681396484375, |
|
"logps/rejected": -241.16281127929688, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.081538125872612, |
|
"rewards/margins": 0.07539352774620056, |
|
"rewards/rejected": 0.006144602783024311, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.013840830449827e-07, |
|
"logits/chosen": -3.5883450508117676, |
|
"logits/rejected": -3.641509532928467, |
|
"logps/chosen": -295.5819091796875, |
|
"logps/rejected": -248.0808563232422, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": 0.0884767398238182, |
|
"rewards/margins": 0.08431808650493622, |
|
"rewards/rejected": 0.004158640280365944, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8408304498269895e-07, |
|
"logits/chosen": -3.5791542530059814, |
|
"logits/rejected": -3.641610622406006, |
|
"logps/chosen": -307.00860595703125, |
|
"logps/rejected": -254.98110961914062, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": 0.10308702290058136, |
|
"rewards/margins": 0.09007459133863449, |
|
"rewards/rejected": 0.013012421317398548, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.667820069204152e-07, |
|
"logits/chosen": -3.5962271690368652, |
|
"logits/rejected": -3.651643753051758, |
|
"logps/chosen": -304.69195556640625, |
|
"logps/rejected": -251.715576171875, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.10168228298425674, |
|
"rewards/margins": 0.1170080155134201, |
|
"rewards/rejected": -0.015325723215937614, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.494809688581315e-07, |
|
"logits/chosen": -3.5974292755126953, |
|
"logits/rejected": -3.640094757080078, |
|
"logps/chosen": -301.3868713378906, |
|
"logps/rejected": -269.7789611816406, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.11533381789922714, |
|
"rewards/margins": 0.15132203698158264, |
|
"rewards/rejected": -0.035988207906484604, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.321799307958477e-07, |
|
"logits/chosen": -3.5997886657714844, |
|
"logits/rejected": -3.650053024291992, |
|
"logps/chosen": -299.73773193359375, |
|
"logps/rejected": -264.46490478515625, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.676562488079071, |
|
"rewards/chosen": 0.12842229008674622, |
|
"rewards/margins": 0.14890247583389282, |
|
"rewards/rejected": -0.020480189472436905, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.14878892733564e-07, |
|
"logits/chosen": -3.621905565261841, |
|
"logits/rejected": -3.654386043548584, |
|
"logps/chosen": -290.0668029785156, |
|
"logps/rejected": -257.65985107421875, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.11696448177099228, |
|
"rewards/margins": 0.15646472573280334, |
|
"rewards/rejected": -0.03950025141239166, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.975778546712803e-07, |
|
"logits/chosen": -3.623357057571411, |
|
"logits/rejected": -3.676666259765625, |
|
"logps/chosen": -294.2871398925781, |
|
"logps/rejected": -242.1128692626953, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.12112095206975937, |
|
"rewards/margins": 0.1564914882183075, |
|
"rewards/rejected": -0.03537052497267723, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8027681660899653e-07, |
|
"logits/chosen": -3.59273099899292, |
|
"logits/rejected": -3.636326551437378, |
|
"logps/chosen": -292.4043884277344, |
|
"logps/rejected": -260.9197998046875, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.11952624469995499, |
|
"rewards/margins": 0.1534840166568756, |
|
"rewards/rejected": -0.033957768231630325, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.629757785467128e-07, |
|
"logits/chosen": -3.5805435180664062, |
|
"logits/rejected": -3.6122817993164062, |
|
"logps/chosen": -321.93280029296875, |
|
"logps/rejected": -265.8033447265625, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": 0.153924822807312, |
|
"rewards/margins": 0.18710294365882874, |
|
"rewards/rejected": -0.033178091049194336, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4567474048442904e-07, |
|
"logits/chosen": -3.583609104156494, |
|
"logits/rejected": -3.649141788482666, |
|
"logps/chosen": -307.5054626464844, |
|
"logps/rejected": -262.70068359375, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": 0.13762430846691132, |
|
"rewards/margins": 0.1973312795162201, |
|
"rewards/rejected": -0.059706974774599075, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2837370242214532e-07, |
|
"logits/chosen": -3.58601450920105, |
|
"logits/rejected": -3.6234772205352783, |
|
"logps/chosen": -305.1055603027344, |
|
"logps/rejected": -275.93402099609375, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.13389413058757782, |
|
"rewards/margins": 0.1799500435590744, |
|
"rewards/rejected": -0.04605592042207718, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1107266435986158e-07, |
|
"logits/chosen": -3.632913112640381, |
|
"logits/rejected": -3.663301944732666, |
|
"logps/chosen": -284.9013671875, |
|
"logps/rejected": -272.54888916015625, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": 0.1347268521785736, |
|
"rewards/margins": 0.19541791081428528, |
|
"rewards/rejected": -0.06069107726216316, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9377162629757786e-07, |
|
"logits/chosen": -3.5696029663085938, |
|
"logits/rejected": -3.612396240234375, |
|
"logps/chosen": -336.0824890136719, |
|
"logps/rejected": -277.74749755859375, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.17013664543628693, |
|
"rewards/margins": 0.21970205008983612, |
|
"rewards/rejected": -0.049565389752388, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.764705882352941e-07, |
|
"logits/chosen": -3.5933330059051514, |
|
"logits/rejected": -3.6415085792541504, |
|
"logps/chosen": -320.52630615234375, |
|
"logps/rejected": -268.8915100097656, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": 0.18930189311504364, |
|
"rewards/margins": 0.27452975511550903, |
|
"rewards/rejected": -0.0852278620004654, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5916955017301037e-07, |
|
"logits/chosen": -3.598538637161255, |
|
"logits/rejected": -3.6396663188934326, |
|
"logps/chosen": -289.67510986328125, |
|
"logps/rejected": -256.98455810546875, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.6578124761581421, |
|
"rewards/chosen": 0.13695955276489258, |
|
"rewards/margins": 0.18900053203105927, |
|
"rewards/rejected": -0.05204101279377937, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4186851211072665e-07, |
|
"logits/chosen": -3.594696044921875, |
|
"logits/rejected": -3.63602876663208, |
|
"logps/chosen": -316.30963134765625, |
|
"logps/rejected": -276.26434326171875, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.15562446415424347, |
|
"rewards/margins": 0.21413257718086243, |
|
"rewards/rejected": -0.058508098125457764, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.245674740484429e-07, |
|
"logits/chosen": -3.5807952880859375, |
|
"logits/rejected": -3.6095142364501953, |
|
"logps/chosen": -284.5511169433594, |
|
"logps/rejected": -264.4554443359375, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": 0.14841844141483307, |
|
"rewards/margins": 0.23373344540596008, |
|
"rewards/rejected": -0.08531501889228821, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0726643598615917e-07, |
|
"logits/chosen": -3.6167678833007812, |
|
"logits/rejected": -3.6560873985290527, |
|
"logps/chosen": -306.44512939453125, |
|
"logps/rejected": -287.98370361328125, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.15952260792255402, |
|
"rewards/margins": 0.2270907461643219, |
|
"rewards/rejected": -0.06756815314292908, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.996539792387543e-08, |
|
"logits/chosen": -3.568514347076416, |
|
"logits/rejected": -3.6281402111053467, |
|
"logps/chosen": -304.79949951171875, |
|
"logps/rejected": -268.81787109375, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.16781549155712128, |
|
"rewards/margins": 0.2681189179420471, |
|
"rewards/rejected": -0.10030338913202286, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.26643598615917e-08, |
|
"logits/chosen": -3.589585065841675, |
|
"logits/rejected": -3.639873504638672, |
|
"logps/chosen": -292.3293151855469, |
|
"logps/rejected": -258.10540771484375, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.17011049389839172, |
|
"rewards/margins": 0.26280853152275085, |
|
"rewards/rejected": -0.09269804507493973, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.536332179930796e-08, |
|
"logits/chosen": -3.614189624786377, |
|
"logits/rejected": -3.6663849353790283, |
|
"logps/chosen": -321.504638671875, |
|
"logps/rejected": -272.3258972167969, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": 0.191130131483078, |
|
"rewards/margins": 0.2547219395637512, |
|
"rewards/rejected": -0.06359181553125381, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.806228373702422e-08, |
|
"logits/chosen": -3.584780216217041, |
|
"logits/rejected": -3.6500515937805176, |
|
"logps/chosen": -293.27191162109375, |
|
"logps/rejected": -248.3306884765625, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.1609605848789215, |
|
"rewards/margins": 0.23433387279510498, |
|
"rewards/rejected": -0.07337325811386108, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0761245674740482e-08, |
|
"logits/chosen": -3.6181092262268066, |
|
"logits/rejected": -3.6486213207244873, |
|
"logps/chosen": -295.642333984375, |
|
"logps/rejected": -249.63027954101562, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.1750607192516327, |
|
"rewards/margins": 0.2609085440635681, |
|
"rewards/rejected": -0.08584781736135483, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4602076124567474e-09, |
|
"logits/chosen": -3.600691556930542, |
|
"logits/rejected": -3.6376967430114746, |
|
"logps/chosen": -296.1729736328125, |
|
"logps/rejected": -263.0238037109375, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": 0.18048205971717834, |
|
"rewards/margins": 0.26246386766433716, |
|
"rewards/rejected": -0.08198180049657822, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.5957722663879395, |
|
"eval_logits/rejected": -3.6472697257995605, |
|
"eval_logps/chosen": -297.0597839355469, |
|
"eval_logps/rejected": -260.8578186035156, |
|
"eval_loss": 0.6086059808731079, |
|
"eval_rewards/accuracies": 0.697604775428772, |
|
"eval_rewards/chosen": 0.13394081592559814, |
|
"eval_rewards/margins": 0.22443543374538422, |
|
"eval_rewards/rejected": -0.09049463272094727, |
|
"eval_runtime": 219.7724, |
|
"eval_samples_per_second": 9.1, |
|
"eval_steps_per_second": 0.76, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 322, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6369124913807982, |
|
"train_runtime": 9387.3825, |
|
"train_samples_per_second": 6.601, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 322, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|