{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9976762199845082, "eval_steps": 100, "global_step": 322, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.5151515151515152e-08, "logits/chosen": -3.641601800918579, "logits/rejected": -3.704906940460205, "logps/chosen": -318.3046875, "logps/rejected": -247.966064453125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.03, "learning_rate": 1.5151515151515152e-07, "logits/chosen": -3.612096071243286, "logits/rejected": -3.6512341499328613, "logps/chosen": -312.1371765136719, "logps/rejected": -262.4918212890625, "loss": 0.6957, "rewards/accuracies": 0.4149305522441864, "rewards/chosen": 0.0010832061525434256, "rewards/margins": -0.0016401761677116156, "rewards/rejected": 0.002723382320255041, "step": 10 }, { "epoch": 0.06, "learning_rate": 3.0303030303030305e-07, "logits/chosen": -3.567340135574341, "logits/rejected": -3.6191534996032715, "logps/chosen": -303.0663757324219, "logps/rejected": -258.91925048828125, "loss": 0.6922, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.004150650463998318, "rewards/margins": 0.0020177571568638086, "rewards/rejected": 0.00213289400562644, "step": 20 }, { "epoch": 0.09, "learning_rate": 4.545454545454545e-07, "logits/chosen": -3.6112682819366455, "logits/rejected": -3.659569263458252, "logps/chosen": -317.9393310546875, "logps/rejected": -264.5162048339844, "loss": 0.6916, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.01230341475456953, "rewards/margins": 0.005747257731854916, "rewards/rejected": 0.006556157022714615, "step": 30 }, { "epoch": 0.12, "learning_rate": 4.878892733564014e-07, "logits/chosen": -3.586892604827881, "logits/rejected": -3.6325364112854004, "logps/chosen": -332.3243408203125, "logps/rejected": -269.69085693359375, "loss": 0.6857, "rewards/accuracies": 0.573437511920929, "rewards/chosen": 0.02542785368859768, "rewards/margins": 0.01690300740301609, "rewards/rejected": 0.008524848148226738, "step": 40 }, { "epoch": 0.15, "learning_rate": 4.705882352941176e-07, "logits/chosen": -3.6010265350341797, "logits/rejected": -3.644479274749756, "logps/chosen": -302.1268615722656, "logps/rejected": -262.81085205078125, "loss": 0.6774, "rewards/accuracies": 0.609375, "rewards/chosen": 0.04483611881732941, "rewards/margins": 0.03348752111196518, "rewards/rejected": 0.011348598636686802, "step": 50 }, { "epoch": 0.19, "learning_rate": 4.5328719723183387e-07, "logits/chosen": -3.59093976020813, "logits/rejected": -3.6495633125305176, "logps/chosen": -298.66375732421875, "logps/rejected": -249.2029266357422, "loss": 0.6717, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.06126219779253006, "rewards/margins": 0.04494406655430794, "rewards/rejected": 0.016318131238222122, "step": 60 }, { "epoch": 0.22, "learning_rate": 4.359861591695502e-07, "logits/chosen": -3.5717296600341797, "logits/rejected": -3.613762378692627, "logps/chosen": -296.14752197265625, "logps/rejected": -251.984619140625, "loss": 0.664, "rewards/accuracies": 0.6390625238418579, "rewards/chosen": 0.07934962958097458, "rewards/margins": 0.05292888730764389, "rewards/rejected": 0.026420753449201584, "step": 70 }, { "epoch": 0.25, "learning_rate": 4.186851211072664e-07, "logits/chosen": -3.5886504650115967, "logits/rejected": -3.6460018157958984, "logps/chosen": -296.4681396484375, "logps/rejected": -241.16281127929688, "loss": 0.6613, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.081538125872612, "rewards/margins": 0.07539352774620056, "rewards/rejected": 0.006144602783024311, "step": 80 }, { "epoch": 0.28, "learning_rate": 4.013840830449827e-07, "logits/chosen": -3.5883450508117676, "logits/rejected": -3.641509532928467, "logps/chosen": -295.5819091796875, "logps/rejected": -248.0808563232422, "loss": 0.6567, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": 0.0884767398238182, "rewards/margins": 0.08431808650493622, "rewards/rejected": 0.004158640280365944, "step": 90 }, { "epoch": 0.31, "learning_rate": 3.8408304498269895e-07, "logits/chosen": -3.5791542530059814, "logits/rejected": -3.641610622406006, "logps/chosen": -307.00860595703125, "logps/rejected": -254.98110961914062, "loss": 0.6509, "rewards/accuracies": 0.667187511920929, "rewards/chosen": 0.10308702290058136, "rewards/margins": 0.09007459133863449, "rewards/rejected": 0.013012421317398548, "step": 100 }, { "epoch": 0.34, "learning_rate": 3.667820069204152e-07, "logits/chosen": -3.5962271690368652, "logits/rejected": -3.651643753051758, "logps/chosen": -304.69195556640625, "logps/rejected": -251.715576171875, "loss": 0.6456, "rewards/accuracies": 0.671875, "rewards/chosen": 0.10168228298425674, "rewards/margins": 0.1170080155134201, "rewards/rejected": -0.015325723215937614, "step": 110 }, { "epoch": 0.37, "learning_rate": 3.494809688581315e-07, "logits/chosen": -3.5974292755126953, "logits/rejected": -3.640094757080078, "logps/chosen": -301.3868713378906, "logps/rejected": -269.7789611816406, "loss": 0.6394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.11533381789922714, "rewards/margins": 0.15132203698158264, "rewards/rejected": -0.035988207906484604, "step": 120 }, { "epoch": 0.4, "learning_rate": 3.321799307958477e-07, "logits/chosen": -3.5997886657714844, "logits/rejected": -3.650053024291992, "logps/chosen": -299.73773193359375, "logps/rejected": -264.46490478515625, "loss": 0.6382, "rewards/accuracies": 0.676562488079071, "rewards/chosen": 0.12842229008674622, "rewards/margins": 0.14890247583389282, "rewards/rejected": -0.020480189472436905, "step": 130 }, { "epoch": 0.43, "learning_rate": 3.14878892733564e-07, "logits/chosen": -3.621905565261841, "logits/rejected": -3.654386043548584, "logps/chosen": -290.0668029785156, "logps/rejected": -257.65985107421875, "loss": 0.6343, "rewards/accuracies": 0.6875, "rewards/chosen": 0.11696448177099228, "rewards/margins": 0.15646472573280334, "rewards/rejected": -0.03950025141239166, "step": 140 }, { "epoch": 0.46, "learning_rate": 2.975778546712803e-07, "logits/chosen": -3.623357057571411, "logits/rejected": -3.676666259765625, "logps/chosen": -294.2871398925781, "logps/rejected": -242.1128692626953, "loss": 0.6344, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.12112095206975937, "rewards/margins": 0.1564914882183075, "rewards/rejected": -0.03537052497267723, "step": 150 }, { "epoch": 0.5, "learning_rate": 2.8027681660899653e-07, "logits/chosen": -3.59273099899292, "logits/rejected": -3.636326551437378, "logps/chosen": -292.4043884277344, "logps/rejected": -260.9197998046875, "loss": 0.6268, "rewards/accuracies": 0.6640625, "rewards/chosen": 0.11952624469995499, "rewards/margins": 0.1534840166568756, "rewards/rejected": -0.033957768231630325, "step": 160 }, { "epoch": 0.53, "learning_rate": 2.629757785467128e-07, "logits/chosen": -3.5805435180664062, "logits/rejected": -3.6122817993164062, "logps/chosen": -321.93280029296875, "logps/rejected": -265.8033447265625, "loss": 0.6218, "rewards/accuracies": 0.682812511920929, "rewards/chosen": 0.153924822807312, "rewards/margins": 0.18710294365882874, "rewards/rejected": -0.033178091049194336, "step": 170 }, { "epoch": 0.56, "learning_rate": 2.4567474048442904e-07, "logits/chosen": -3.583609104156494, "logits/rejected": -3.649141788482666, "logps/chosen": -307.5054626464844, "logps/rejected": -262.70068359375, "loss": 0.6243, "rewards/accuracies": 0.682812511920929, "rewards/chosen": 0.13762430846691132, "rewards/margins": 0.1973312795162201, "rewards/rejected": -0.059706974774599075, "step": 180 }, { "epoch": 0.59, "learning_rate": 2.2837370242214532e-07, "logits/chosen": -3.58601450920105, "logits/rejected": -3.6234772205352783, "logps/chosen": -305.1055603027344, "logps/rejected": -275.93402099609375, "loss": 0.624, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.13389413058757782, "rewards/margins": 0.1799500435590744, "rewards/rejected": -0.04605592042207718, "step": 190 }, { "epoch": 0.62, "learning_rate": 2.1107266435986158e-07, "logits/chosen": -3.632913112640381, "logits/rejected": -3.663301944732666, "logps/chosen": -284.9013671875, "logps/rejected": -272.54888916015625, "loss": 0.6186, "rewards/accuracies": 0.682812511920929, "rewards/chosen": 0.1347268521785736, "rewards/margins": 0.19541791081428528, "rewards/rejected": -0.06069107726216316, "step": 200 }, { "epoch": 0.65, "learning_rate": 1.9377162629757786e-07, "logits/chosen": -3.5696029663085938, "logits/rejected": -3.612396240234375, "logps/chosen": -336.0824890136719, "logps/rejected": -277.74749755859375, "loss": 0.6157, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": 0.17013664543628693, "rewards/margins": 0.21970205008983612, "rewards/rejected": -0.049565389752388, "step": 210 }, { "epoch": 0.68, "learning_rate": 1.764705882352941e-07, "logits/chosen": -3.5933330059051514, "logits/rejected": -3.6415085792541504, "logps/chosen": -320.52630615234375, "logps/rejected": -268.8915100097656, "loss": 0.6028, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": 0.18930189311504364, "rewards/margins": 0.27452975511550903, "rewards/rejected": -0.0852278620004654, "step": 220 }, { "epoch": 0.71, "learning_rate": 1.5916955017301037e-07, "logits/chosen": -3.598538637161255, "logits/rejected": -3.6396663188934326, "logps/chosen": -289.67510986328125, "logps/rejected": -256.98455810546875, "loss": 0.6267, "rewards/accuracies": 0.6578124761581421, "rewards/chosen": 0.13695955276489258, "rewards/margins": 0.18900053203105927, "rewards/rejected": -0.05204101279377937, "step": 230 }, { "epoch": 0.74, "learning_rate": 1.4186851211072665e-07, "logits/chosen": -3.594696044921875, "logits/rejected": -3.63602876663208, "logps/chosen": -316.30963134765625, "logps/rejected": -276.26434326171875, "loss": 0.6166, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.15562446415424347, "rewards/margins": 0.21413257718086243, "rewards/rejected": -0.058508098125457764, "step": 240 }, { "epoch": 0.77, "learning_rate": 1.245674740484429e-07, "logits/chosen": -3.5807952880859375, "logits/rejected": -3.6095142364501953, "logps/chosen": -284.5511169433594, "logps/rejected": -264.4554443359375, "loss": 0.609, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": 0.14841844141483307, "rewards/margins": 0.23373344540596008, "rewards/rejected": -0.08531501889228821, "step": 250 }, { "epoch": 0.81, "learning_rate": 1.0726643598615917e-07, "logits/chosen": -3.6167678833007812, "logits/rejected": -3.6560873985290527, "logps/chosen": -306.44512939453125, "logps/rejected": -287.98370361328125, "loss": 0.6082, "rewards/accuracies": 0.7109375, "rewards/chosen": 0.15952260792255402, "rewards/margins": 0.2270907461643219, "rewards/rejected": -0.06756815314292908, "step": 260 }, { "epoch": 0.84, "learning_rate": 8.996539792387543e-08, "logits/chosen": -3.568514347076416, "logits/rejected": -3.6281402111053467, "logps/chosen": -304.79949951171875, "logps/rejected": -268.81787109375, "loss": 0.5989, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": 0.16781549155712128, "rewards/margins": 0.2681189179420471, "rewards/rejected": -0.10030338913202286, "step": 270 }, { "epoch": 0.87, "learning_rate": 7.26643598615917e-08, "logits/chosen": -3.589585065841675, "logits/rejected": -3.639873504638672, "logps/chosen": -292.3293151855469, "logps/rejected": -258.10540771484375, "loss": 0.6103, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.17011049389839172, "rewards/margins": 0.26280853152275085, "rewards/rejected": -0.09269804507493973, "step": 280 }, { "epoch": 0.9, "learning_rate": 5.536332179930796e-08, "logits/chosen": -3.614189624786377, "logits/rejected": -3.6663849353790283, "logps/chosen": -321.504638671875, "logps/rejected": -272.3258972167969, "loss": 0.6118, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": 0.191130131483078, "rewards/margins": 0.2547219395637512, "rewards/rejected": -0.06359181553125381, "step": 290 }, { "epoch": 0.93, "learning_rate": 3.806228373702422e-08, "logits/chosen": -3.584780216217041, "logits/rejected": -3.6500515937805176, "logps/chosen": -293.27191162109375, "logps/rejected": -248.3306884765625, "loss": 0.6087, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.1609605848789215, "rewards/margins": 0.23433387279510498, "rewards/rejected": -0.07337325811386108, "step": 300 }, { "epoch": 0.96, "learning_rate": 2.0761245674740482e-08, "logits/chosen": -3.6181092262268066, "logits/rejected": -3.6486213207244873, "logps/chosen": -295.642333984375, "logps/rejected": -249.63027954101562, "loss": 0.6101, "rewards/accuracies": 0.7109375, "rewards/chosen": 0.1750607192516327, "rewards/margins": 0.2609085440635681, "rewards/rejected": -0.08584781736135483, "step": 310 }, { "epoch": 0.99, "learning_rate": 3.4602076124567474e-09, "logits/chosen": -3.600691556930542, "logits/rejected": -3.6376967430114746, "logps/chosen": -296.1729736328125, "logps/rejected": -263.0238037109375, "loss": 0.6101, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": 0.18048205971717834, "rewards/margins": 0.26246386766433716, "rewards/rejected": -0.08198180049657822, "step": 320 }, { "epoch": 1.0, "eval_logits/chosen": -3.5957722663879395, "eval_logits/rejected": -3.6472697257995605, "eval_logps/chosen": -297.0597839355469, "eval_logps/rejected": -260.8578186035156, "eval_loss": 0.6086059808731079, "eval_rewards/accuracies": 0.697604775428772, "eval_rewards/chosen": 0.13394081592559814, "eval_rewards/margins": 0.22443543374538422, "eval_rewards/rejected": -0.09049463272094727, "eval_runtime": 219.7724, "eval_samples_per_second": 9.1, "eval_steps_per_second": 0.76, "step": 322 }, { "epoch": 1.0, "step": 322, "total_flos": 0.0, "train_loss": 0.6369124913807982, "train_runtime": 9387.3825, "train_samples_per_second": 6.601, "train_steps_per_second": 0.034 } ], "logging_steps": 10, "max_steps": 322, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }