Adrastea-7b-v1.0-dpo-lora / trainer_state.json
Ber Zoidberg
Model save
9ad0f7d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9976762199845082,
"eval_steps": 100,
"global_step": 322,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.5151515151515152e-08,
"logits/chosen": -3.641601800918579,
"logits/rejected": -3.704906940460205,
"logps/chosen": -318.3046875,
"logps/rejected": -247.966064453125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.5151515151515152e-07,
"logits/chosen": -3.612096071243286,
"logits/rejected": -3.6512341499328613,
"logps/chosen": -312.1371765136719,
"logps/rejected": -262.4918212890625,
"loss": 0.6957,
"rewards/accuracies": 0.4149305522441864,
"rewards/chosen": 0.0010832061525434256,
"rewards/margins": -0.0016401761677116156,
"rewards/rejected": 0.002723382320255041,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 3.0303030303030305e-07,
"logits/chosen": -3.567340135574341,
"logits/rejected": -3.6191534996032715,
"logps/chosen": -303.0663757324219,
"logps/rejected": -258.91925048828125,
"loss": 0.6922,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": 0.004150650463998318,
"rewards/margins": 0.0020177571568638086,
"rewards/rejected": 0.00213289400562644,
"step": 20
},
{
"epoch": 0.09,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": -3.6112682819366455,
"logits/rejected": -3.659569263458252,
"logps/chosen": -317.9393310546875,
"logps/rejected": -264.5162048339844,
"loss": 0.6916,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.01230341475456953,
"rewards/margins": 0.005747257731854916,
"rewards/rejected": 0.006556157022714615,
"step": 30
},
{
"epoch": 0.12,
"learning_rate": 4.878892733564014e-07,
"logits/chosen": -3.586892604827881,
"logits/rejected": -3.6325364112854004,
"logps/chosen": -332.3243408203125,
"logps/rejected": -269.69085693359375,
"loss": 0.6857,
"rewards/accuracies": 0.573437511920929,
"rewards/chosen": 0.02542785368859768,
"rewards/margins": 0.01690300740301609,
"rewards/rejected": 0.008524848148226738,
"step": 40
},
{
"epoch": 0.15,
"learning_rate": 4.705882352941176e-07,
"logits/chosen": -3.6010265350341797,
"logits/rejected": -3.644479274749756,
"logps/chosen": -302.1268615722656,
"logps/rejected": -262.81085205078125,
"loss": 0.6774,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.04483611881732941,
"rewards/margins": 0.03348752111196518,
"rewards/rejected": 0.011348598636686802,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 4.5328719723183387e-07,
"logits/chosen": -3.59093976020813,
"logits/rejected": -3.6495633125305176,
"logps/chosen": -298.66375732421875,
"logps/rejected": -249.2029266357422,
"loss": 0.6717,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 0.06126219779253006,
"rewards/margins": 0.04494406655430794,
"rewards/rejected": 0.016318131238222122,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 4.359861591695502e-07,
"logits/chosen": -3.5717296600341797,
"logits/rejected": -3.613762378692627,
"logps/chosen": -296.14752197265625,
"logps/rejected": -251.984619140625,
"loss": 0.664,
"rewards/accuracies": 0.6390625238418579,
"rewards/chosen": 0.07934962958097458,
"rewards/margins": 0.05292888730764389,
"rewards/rejected": 0.026420753449201584,
"step": 70
},
{
"epoch": 0.25,
"learning_rate": 4.186851211072664e-07,
"logits/chosen": -3.5886504650115967,
"logits/rejected": -3.6460018157958984,
"logps/chosen": -296.4681396484375,
"logps/rejected": -241.16281127929688,
"loss": 0.6613,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.081538125872612,
"rewards/margins": 0.07539352774620056,
"rewards/rejected": 0.006144602783024311,
"step": 80
},
{
"epoch": 0.28,
"learning_rate": 4.013840830449827e-07,
"logits/chosen": -3.5883450508117676,
"logits/rejected": -3.641509532928467,
"logps/chosen": -295.5819091796875,
"logps/rejected": -248.0808563232422,
"loss": 0.6567,
"rewards/accuracies": 0.6859375238418579,
"rewards/chosen": 0.0884767398238182,
"rewards/margins": 0.08431808650493622,
"rewards/rejected": 0.004158640280365944,
"step": 90
},
{
"epoch": 0.31,
"learning_rate": 3.8408304498269895e-07,
"logits/chosen": -3.5791542530059814,
"logits/rejected": -3.641610622406006,
"logps/chosen": -307.00860595703125,
"logps/rejected": -254.98110961914062,
"loss": 0.6509,
"rewards/accuracies": 0.667187511920929,
"rewards/chosen": 0.10308702290058136,
"rewards/margins": 0.09007459133863449,
"rewards/rejected": 0.013012421317398548,
"step": 100
},
{
"epoch": 0.34,
"learning_rate": 3.667820069204152e-07,
"logits/chosen": -3.5962271690368652,
"logits/rejected": -3.651643753051758,
"logps/chosen": -304.69195556640625,
"logps/rejected": -251.715576171875,
"loss": 0.6456,
"rewards/accuracies": 0.671875,
"rewards/chosen": 0.10168228298425674,
"rewards/margins": 0.1170080155134201,
"rewards/rejected": -0.015325723215937614,
"step": 110
},
{
"epoch": 0.37,
"learning_rate": 3.494809688581315e-07,
"logits/chosen": -3.5974292755126953,
"logits/rejected": -3.640094757080078,
"logps/chosen": -301.3868713378906,
"logps/rejected": -269.7789611816406,
"loss": 0.6394,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.11533381789922714,
"rewards/margins": 0.15132203698158264,
"rewards/rejected": -0.035988207906484604,
"step": 120
},
{
"epoch": 0.4,
"learning_rate": 3.321799307958477e-07,
"logits/chosen": -3.5997886657714844,
"logits/rejected": -3.650053024291992,
"logps/chosen": -299.73773193359375,
"logps/rejected": -264.46490478515625,
"loss": 0.6382,
"rewards/accuracies": 0.676562488079071,
"rewards/chosen": 0.12842229008674622,
"rewards/margins": 0.14890247583389282,
"rewards/rejected": -0.020480189472436905,
"step": 130
},
{
"epoch": 0.43,
"learning_rate": 3.14878892733564e-07,
"logits/chosen": -3.621905565261841,
"logits/rejected": -3.654386043548584,
"logps/chosen": -290.0668029785156,
"logps/rejected": -257.65985107421875,
"loss": 0.6343,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.11696448177099228,
"rewards/margins": 0.15646472573280334,
"rewards/rejected": -0.03950025141239166,
"step": 140
},
{
"epoch": 0.46,
"learning_rate": 2.975778546712803e-07,
"logits/chosen": -3.623357057571411,
"logits/rejected": -3.676666259765625,
"logps/chosen": -294.2871398925781,
"logps/rejected": -242.1128692626953,
"loss": 0.6344,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.12112095206975937,
"rewards/margins": 0.1564914882183075,
"rewards/rejected": -0.03537052497267723,
"step": 150
},
{
"epoch": 0.5,
"learning_rate": 2.8027681660899653e-07,
"logits/chosen": -3.59273099899292,
"logits/rejected": -3.636326551437378,
"logps/chosen": -292.4043884277344,
"logps/rejected": -260.9197998046875,
"loss": 0.6268,
"rewards/accuracies": 0.6640625,
"rewards/chosen": 0.11952624469995499,
"rewards/margins": 0.1534840166568756,
"rewards/rejected": -0.033957768231630325,
"step": 160
},
{
"epoch": 0.53,
"learning_rate": 2.629757785467128e-07,
"logits/chosen": -3.5805435180664062,
"logits/rejected": -3.6122817993164062,
"logps/chosen": -321.93280029296875,
"logps/rejected": -265.8033447265625,
"loss": 0.6218,
"rewards/accuracies": 0.682812511920929,
"rewards/chosen": 0.153924822807312,
"rewards/margins": 0.18710294365882874,
"rewards/rejected": -0.033178091049194336,
"step": 170
},
{
"epoch": 0.56,
"learning_rate": 2.4567474048442904e-07,
"logits/chosen": -3.583609104156494,
"logits/rejected": -3.649141788482666,
"logps/chosen": -307.5054626464844,
"logps/rejected": -262.70068359375,
"loss": 0.6243,
"rewards/accuracies": 0.682812511920929,
"rewards/chosen": 0.13762430846691132,
"rewards/margins": 0.1973312795162201,
"rewards/rejected": -0.059706974774599075,
"step": 180
},
{
"epoch": 0.59,
"learning_rate": 2.2837370242214532e-07,
"logits/chosen": -3.58601450920105,
"logits/rejected": -3.6234772205352783,
"logps/chosen": -305.1055603027344,
"logps/rejected": -275.93402099609375,
"loss": 0.624,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": 0.13389413058757782,
"rewards/margins": 0.1799500435590744,
"rewards/rejected": -0.04605592042207718,
"step": 190
},
{
"epoch": 0.62,
"learning_rate": 2.1107266435986158e-07,
"logits/chosen": -3.632913112640381,
"logits/rejected": -3.663301944732666,
"logps/chosen": -284.9013671875,
"logps/rejected": -272.54888916015625,
"loss": 0.6186,
"rewards/accuracies": 0.682812511920929,
"rewards/chosen": 0.1347268521785736,
"rewards/margins": 0.19541791081428528,
"rewards/rejected": -0.06069107726216316,
"step": 200
},
{
"epoch": 0.65,
"learning_rate": 1.9377162629757786e-07,
"logits/chosen": -3.5696029663085938,
"logits/rejected": -3.612396240234375,
"logps/chosen": -336.0824890136719,
"logps/rejected": -277.74749755859375,
"loss": 0.6157,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": 0.17013664543628693,
"rewards/margins": 0.21970205008983612,
"rewards/rejected": -0.049565389752388,
"step": 210
},
{
"epoch": 0.68,
"learning_rate": 1.764705882352941e-07,
"logits/chosen": -3.5933330059051514,
"logits/rejected": -3.6415085792541504,
"logps/chosen": -320.52630615234375,
"logps/rejected": -268.8915100097656,
"loss": 0.6028,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": 0.18930189311504364,
"rewards/margins": 0.27452975511550903,
"rewards/rejected": -0.0852278620004654,
"step": 220
},
{
"epoch": 0.71,
"learning_rate": 1.5916955017301037e-07,
"logits/chosen": -3.598538637161255,
"logits/rejected": -3.6396663188934326,
"logps/chosen": -289.67510986328125,
"logps/rejected": -256.98455810546875,
"loss": 0.6267,
"rewards/accuracies": 0.6578124761581421,
"rewards/chosen": 0.13695955276489258,
"rewards/margins": 0.18900053203105927,
"rewards/rejected": -0.05204101279377937,
"step": 230
},
{
"epoch": 0.74,
"learning_rate": 1.4186851211072665e-07,
"logits/chosen": -3.594696044921875,
"logits/rejected": -3.63602876663208,
"logps/chosen": -316.30963134765625,
"logps/rejected": -276.26434326171875,
"loss": 0.6166,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": 0.15562446415424347,
"rewards/margins": 0.21413257718086243,
"rewards/rejected": -0.058508098125457764,
"step": 240
},
{
"epoch": 0.77,
"learning_rate": 1.245674740484429e-07,
"logits/chosen": -3.5807952880859375,
"logits/rejected": -3.6095142364501953,
"logps/chosen": -284.5511169433594,
"logps/rejected": -264.4554443359375,
"loss": 0.609,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": 0.14841844141483307,
"rewards/margins": 0.23373344540596008,
"rewards/rejected": -0.08531501889228821,
"step": 250
},
{
"epoch": 0.81,
"learning_rate": 1.0726643598615917e-07,
"logits/chosen": -3.6167678833007812,
"logits/rejected": -3.6560873985290527,
"logps/chosen": -306.44512939453125,
"logps/rejected": -287.98370361328125,
"loss": 0.6082,
"rewards/accuracies": 0.7109375,
"rewards/chosen": 0.15952260792255402,
"rewards/margins": 0.2270907461643219,
"rewards/rejected": -0.06756815314292908,
"step": 260
},
{
"epoch": 0.84,
"learning_rate": 8.996539792387543e-08,
"logits/chosen": -3.568514347076416,
"logits/rejected": -3.6281402111053467,
"logps/chosen": -304.79949951171875,
"logps/rejected": -268.81787109375,
"loss": 0.5989,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": 0.16781549155712128,
"rewards/margins": 0.2681189179420471,
"rewards/rejected": -0.10030338913202286,
"step": 270
},
{
"epoch": 0.87,
"learning_rate": 7.26643598615917e-08,
"logits/chosen": -3.589585065841675,
"logits/rejected": -3.639873504638672,
"logps/chosen": -292.3293151855469,
"logps/rejected": -258.10540771484375,
"loss": 0.6103,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.17011049389839172,
"rewards/margins": 0.26280853152275085,
"rewards/rejected": -0.09269804507493973,
"step": 280
},
{
"epoch": 0.9,
"learning_rate": 5.536332179930796e-08,
"logits/chosen": -3.614189624786377,
"logits/rejected": -3.6663849353790283,
"logps/chosen": -321.504638671875,
"logps/rejected": -272.3258972167969,
"loss": 0.6118,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": 0.191130131483078,
"rewards/margins": 0.2547219395637512,
"rewards/rejected": -0.06359181553125381,
"step": 290
},
{
"epoch": 0.93,
"learning_rate": 3.806228373702422e-08,
"logits/chosen": -3.584780216217041,
"logits/rejected": -3.6500515937805176,
"logps/chosen": -293.27191162109375,
"logps/rejected": -248.3306884765625,
"loss": 0.6087,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": 0.1609605848789215,
"rewards/margins": 0.23433387279510498,
"rewards/rejected": -0.07337325811386108,
"step": 300
},
{
"epoch": 0.96,
"learning_rate": 2.0761245674740482e-08,
"logits/chosen": -3.6181092262268066,
"logits/rejected": -3.6486213207244873,
"logps/chosen": -295.642333984375,
"logps/rejected": -249.63027954101562,
"loss": 0.6101,
"rewards/accuracies": 0.7109375,
"rewards/chosen": 0.1750607192516327,
"rewards/margins": 0.2609085440635681,
"rewards/rejected": -0.08584781736135483,
"step": 310
},
{
"epoch": 0.99,
"learning_rate": 3.4602076124567474e-09,
"logits/chosen": -3.600691556930542,
"logits/rejected": -3.6376967430114746,
"logps/chosen": -296.1729736328125,
"logps/rejected": -263.0238037109375,
"loss": 0.6101,
"rewards/accuracies": 0.6859375238418579,
"rewards/chosen": 0.18048205971717834,
"rewards/margins": 0.26246386766433716,
"rewards/rejected": -0.08198180049657822,
"step": 320
},
{
"epoch": 1.0,
"eval_logits/chosen": -3.5957722663879395,
"eval_logits/rejected": -3.6472697257995605,
"eval_logps/chosen": -297.0597839355469,
"eval_logps/rejected": -260.8578186035156,
"eval_loss": 0.6086059808731079,
"eval_rewards/accuracies": 0.697604775428772,
"eval_rewards/chosen": 0.13394081592559814,
"eval_rewards/margins": 0.22443543374538422,
"eval_rewards/rejected": -0.09049463272094727,
"eval_runtime": 219.7724,
"eval_samples_per_second": 9.1,
"eval_steps_per_second": 0.76,
"step": 322
},
{
"epoch": 1.0,
"step": 322,
"total_flos": 0.0,
"train_loss": 0.6369124913807982,
"train_runtime": 9387.3825,
"train_samples_per_second": 6.601,
"train_steps_per_second": 0.034
}
],
"logging_steps": 10,
"max_steps": 322,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}