phi-2-irepo-chatml-v12-i1 / trainer_state.json
lole25's picture
Model save
f6622ad verified
raw
history blame contribute delete
No virus
45.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994666666666666,
"eval_steps": 500,
"global_step": 937,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.319148936170213e-08,
"logits/chosen": 0.06642268598079681,
"logits/rejected": 0.23397813737392426,
"logps/chosen": -587.28369140625,
"logps/rejected": -568.082763671875,
"loss": 0.279,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 5.319148936170213e-07,
"logits/chosen": 0.16077889502048492,
"logits/rejected": 0.28465068340301514,
"logps/chosen": -462.99114990234375,
"logps/rejected": -441.3485107421875,
"loss": 0.303,
"rewards/accuracies": 0.4236111044883728,
"rewards/chosen": -0.0005749252159148455,
"rewards/margins": 0.0002628265065141022,
"rewards/rejected": -0.0008377517224289477,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.0638297872340427e-06,
"logits/chosen": 0.2244517058134079,
"logits/rejected": 0.2146037071943283,
"logps/chosen": -456.9951171875,
"logps/rejected": -442.496826171875,
"loss": 0.3007,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.0011023276019841433,
"rewards/margins": -6.420163117581978e-05,
"rewards/rejected": -0.0010381259489804506,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.595744680851064e-06,
"logits/chosen": 0.34518542885780334,
"logits/rejected": 0.15579931437969208,
"logps/chosen": -414.3968200683594,
"logps/rejected": -407.1432800292969,
"loss": 0.3064,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.0004762631724588573,
"rewards/margins": 6.935702549526468e-05,
"rewards/rejected": -0.0005456201615743339,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 2.1276595744680853e-06,
"logits/chosen": 0.19979876279830933,
"logits/rejected": 0.1475386917591095,
"logps/chosen": -387.1222839355469,
"logps/rejected": -380.3912048339844,
"loss": 0.3173,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": 8.398960198974237e-05,
"rewards/margins": 0.0001377248700009659,
"rewards/rejected": -5.3735253459308296e-05,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 2.6595744680851065e-06,
"logits/chosen": 0.20050282776355743,
"logits/rejected": 0.2853023409843445,
"logps/chosen": -405.1722717285156,
"logps/rejected": -433.797119140625,
"loss": 0.2969,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.00031604920513927937,
"rewards/margins": 9.358949318993837e-05,
"rewards/rejected": -0.0004096386837773025,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 3.191489361702128e-06,
"logits/chosen": 0.22685687243938446,
"logits/rejected": 0.2761882245540619,
"logps/chosen": -418.8284606933594,
"logps/rejected": -444.300537109375,
"loss": 0.2916,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0013369970256462693,
"rewards/margins": 0.00045625813072547317,
"rewards/rejected": -0.0017932550981640816,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 3.723404255319149e-06,
"logits/chosen": 0.1869155615568161,
"logits/rejected": 0.2700553834438324,
"logps/chosen": -443.9104919433594,
"logps/rejected": -423.21075439453125,
"loss": 0.2976,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.003828343003988266,
"rewards/margins": 0.0006897930870763958,
"rewards/rejected": -0.004518135450780392,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 4.255319148936171e-06,
"logits/chosen": 0.23689034581184387,
"logits/rejected": 0.21069273352622986,
"logps/chosen": -386.25067138671875,
"logps/rejected": -387.7801818847656,
"loss": 0.3143,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.003951665014028549,
"rewards/margins": 0.0011586709879338741,
"rewards/rejected": -0.005110335536301136,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 4.787234042553192e-06,
"logits/chosen": 0.24118606746196747,
"logits/rejected": 0.25480058789253235,
"logps/chosen": -412.43499755859375,
"logps/rejected": -408.15802001953125,
"loss": 0.3065,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.010140495374798775,
"rewards/margins": 0.001615689368918538,
"rewards/rejected": -0.01175618451088667,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 4.999375059004058e-06,
"logits/chosen": 0.1671404391527176,
"logits/rejected": 0.2540619969367981,
"logps/chosen": -389.2574157714844,
"logps/rejected": -388.87408447265625,
"loss": 0.3053,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.022764097899198532,
"rewards/margins": 0.0024351924657821655,
"rewards/rejected": -0.02519928850233555,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 4.9955571065548795e-06,
"logits/chosen": 0.23305337131023407,
"logits/rejected": 0.22437167167663574,
"logps/chosen": -441.59771728515625,
"logps/rejected": -446.51971435546875,
"loss": 0.3081,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.04231434687972069,
"rewards/margins": 0.003846729639917612,
"rewards/rejected": -0.04616107791662216,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 4.9882736864879e-06,
"logits/chosen": 0.17237094044685364,
"logits/rejected": 0.20950445532798767,
"logps/chosen": -497.03741455078125,
"logps/rejected": -493.6482849121094,
"loss": 0.3001,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.06084052473306656,
"rewards/margins": 0.009210348129272461,
"rewards/rejected": -0.07005088031291962,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 4.977534912960124e-06,
"logits/chosen": 0.12596510350704193,
"logits/rejected": 0.18595007061958313,
"logps/chosen": -534.695556640625,
"logps/rejected": -506.4364318847656,
"loss": 0.285,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.09366725385189056,
"rewards/margins": 0.009929810650646687,
"rewards/rejected": -0.10359706729650497,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 4.963355698422092e-06,
"logits/chosen": 0.12804082036018372,
"logits/rejected": 0.10300163924694061,
"logps/chosen": -515.469970703125,
"logps/rejected": -508.8036193847656,
"loss": 0.3043,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.11144615709781647,
"rewards/margins": 0.012134796939790249,
"rewards/rejected": -0.1235809326171875,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 4.945755732909625e-06,
"logits/chosen": 0.16220004856586456,
"logits/rejected": 0.08028533309698105,
"logps/chosen": -479.33184814453125,
"logps/rejected": -524.7337646484375,
"loss": 0.2802,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.1306999772787094,
"rewards/margins": 0.03233319893479347,
"rewards/rejected": -0.16303318738937378,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 4.924759456701167e-06,
"logits/chosen": 0.18928228318691254,
"logits/rejected": 0.14869533479213715,
"logps/chosen": -573.3798828125,
"logps/rejected": -610.5779418945312,
"loss": 0.2894,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.168126180768013,
"rewards/margins": 0.04517129063606262,
"rewards/rejected": -0.21329745650291443,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 4.900396026378671e-06,
"logits/chosen": 0.062172818928956985,
"logits/rejected": 0.019718164578080177,
"logps/chosen": -475.08551025390625,
"logps/rejected": -561.6641845703125,
"loss": 0.2631,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.1646568328142166,
"rewards/margins": 0.06741134822368622,
"rewards/rejected": -0.23206815123558044,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 4.872699274339169e-06,
"logits/chosen": -0.0011120836716145277,
"logits/rejected": 0.13960300385951996,
"logps/chosen": -525.2228393554688,
"logps/rejected": -511.014404296875,
"loss": 0.2908,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.14511564373970032,
"rewards/margins": 0.03634321317076683,
"rewards/rejected": -0.18145884573459625,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 4.8417076618132434e-06,
"logits/chosen": 0.11656410992145538,
"logits/rejected": 0.08704119175672531,
"logps/chosen": -514.813720703125,
"logps/rejected": -595.0880126953125,
"loss": 0.2617,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.14817455410957336,
"rewards/margins": 0.04434273764491081,
"rewards/rejected": -0.19251729547977448,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 4.807464225455655e-06,
"logits/chosen": 0.06601261347532272,
"logits/rejected": 0.12538839876651764,
"logps/chosen": -538.46142578125,
"logps/rejected": -592.1277465820312,
"loss": 0.2739,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.15786947309970856,
"rewards/margins": 0.04001317173242569,
"rewards/rejected": -0.19788263738155365,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 4.770016517582283e-06,
"logits/chosen": 0.04595109820365906,
"logits/rejected": 0.05397043749690056,
"logps/chosen": -542.3662109375,
"logps/rejected": -587.703125,
"loss": 0.2803,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.15854400396347046,
"rewards/margins": 0.037279583513736725,
"rewards/rejected": -0.19582359492778778,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 4.7294165401363616e-06,
"logits/chosen": 0.06908506900072098,
"logits/rejected": 0.0783570259809494,
"logps/chosen": -543.8673095703125,
"logps/rejected": -552.3768920898438,
"loss": 0.273,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.14275754988193512,
"rewards/margins": 0.04143111780285835,
"rewards/rejected": -0.18418867886066437,
"step": 220
},
{
"epoch": 0.25,
"learning_rate": 4.68572067247573e-06,
"logits/chosen": 0.07219888269901276,
"logits/rejected": 0.0497373566031456,
"logps/chosen": -572.8856201171875,
"logps/rejected": -622.0572509765625,
"loss": 0.2833,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.14573441445827484,
"rewards/margins": 0.06495748460292816,
"rewards/rejected": -0.210691899061203,
"step": 230
},
{
"epoch": 0.26,
"learning_rate": 4.638989593081364e-06,
"logits/chosen": -0.10666439682245255,
"logits/rejected": 0.05354728549718857,
"logps/chosen": -541.7586059570312,
"logps/rejected": -586.1435546875,
"loss": 0.2883,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.14508689939975739,
"rewards/margins": 0.048395391553640366,
"rewards/rejected": -0.19348229467868805,
"step": 240
},
{
"epoch": 0.27,
"learning_rate": 4.5892881952959015e-06,
"logits/chosen": 0.07505561411380768,
"logits/rejected": -0.051252782344818115,
"logps/chosen": -555.9990844726562,
"logps/rejected": -585.9085083007812,
"loss": 0.2815,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.14657357335090637,
"rewards/margins": 0.04433682560920715,
"rewards/rejected": -0.19091038405895233,
"step": 250
},
{
"epoch": 0.28,
"learning_rate": 4.536685497209182e-06,
"logits/chosen": -0.020656492561101913,
"logits/rejected": 0.007626605220139027,
"logps/chosen": -612.7149658203125,
"logps/rejected": -613.846435546875,
"loss": 0.2945,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.1504904180765152,
"rewards/margins": 0.03951232135295868,
"rewards/rejected": -0.19000275433063507,
"step": 260
},
{
"epoch": 0.29,
"learning_rate": 4.481254545815943e-06,
"logits/chosen": 0.06314031779766083,
"logits/rejected": 0.05012714862823486,
"logps/chosen": -572.8782958984375,
"logps/rejected": -599.4718627929688,
"loss": 0.286,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.11262224614620209,
"rewards/margins": 0.0400107316672802,
"rewards/rejected": -0.15263298153877258,
"step": 270
},
{
"epoch": 0.3,
"learning_rate": 4.42307231557875e-06,
"logits/chosen": 4.419684410095215e-05,
"logits/rejected": 0.1705075055360794,
"logps/chosen": -478.0445251464844,
"logps/rejected": -514.4085693359375,
"loss": 0.2574,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.08713702112436295,
"rewards/margins": 0.0530150942504406,
"rewards/rejected": -0.14015211164951324,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 4.3622196015370305e-06,
"logits/chosen": -0.05049672722816467,
"logits/rejected": 0.10665085166692734,
"logps/chosen": -562.5294799804688,
"logps/rejected": -583.0447998046875,
"loss": 0.2866,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.12877288460731506,
"rewards/margins": 0.0415426567196846,
"rewards/rejected": -0.17031553387641907,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 4.298780907110648e-06,
"logits/chosen": -0.13402745127677917,
"logits/rejected": 0.04227043688297272,
"logps/chosen": -570.83935546875,
"logps/rejected": -597.737060546875,
"loss": 0.2749,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.14190678298473358,
"rewards/margins": 0.054900676012039185,
"rewards/rejected": -0.19680745899677277,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 4.23284432675381e-06,
"logits/chosen": -0.07339149713516235,
"logits/rejected": -0.040264565497636795,
"logps/chosen": -619.7767333984375,
"logps/rejected": -682.0263061523438,
"loss": 0.2589,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.16093352437019348,
"rewards/margins": 0.07607638835906982,
"rewards/rejected": -0.2370099127292633,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 4.164501423622277e-06,
"logits/chosen": -0.028974998742341995,
"logits/rejected": -0.06319359689950943,
"logps/chosen": -549.9708862304688,
"logps/rejected": -637.650390625,
"loss": 0.264,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.13445612788200378,
"rewards/margins": 0.07859645038843155,
"rewards/rejected": -0.21305255591869354,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 4.0938471024237355e-06,
"logits/chosen": -0.11449748277664185,
"logits/rejected": -0.03637564182281494,
"logps/chosen": -601.0888671875,
"logps/rejected": -669.8153076171875,
"loss": 0.2698,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.1675841063261032,
"rewards/margins": 0.0730680450797081,
"rewards/rejected": -0.2406521737575531,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 4.020979477627907e-06,
"logits/chosen": -0.08815717697143555,
"logits/rejected": 0.015436625108122826,
"logps/chosen": -504.43658447265625,
"logps/rejected": -530.493408203125,
"loss": 0.2631,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.11692949384450912,
"rewards/margins": 0.04244539141654968,
"rewards/rejected": -0.1593748927116394,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 3.9459997372194105e-06,
"logits/chosen": -0.061802517622709274,
"logits/rejected": 0.09361619502305984,
"logps/chosen": -579.513671875,
"logps/rejected": -639.0809936523438,
"loss": 0.2755,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.16077642142772675,
"rewards/margins": 0.05602121353149414,
"rewards/rejected": -0.2167976200580597,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 3.869012002182573e-06,
"logits/chosen": -0.10564370453357697,
"logits/rejected": 0.02583186700940132,
"logps/chosen": -590.0396728515625,
"logps/rejected": -647.909423828125,
"loss": 0.2786,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.15724439918994904,
"rewards/margins": 0.0704963356256485,
"rewards/rejected": -0.22774071991443634,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 3.7901231819133104e-06,
"logits/chosen": -0.10458900034427643,
"logits/rejected": -0.004534685518592596,
"logps/chosen": -542.1465454101562,
"logps/rejected": -573.3040771484375,
"loss": 0.2668,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.1385766863822937,
"rewards/margins": 0.05575231835246086,
"rewards/rejected": -0.19432899355888367,
"step": 370
},
{
"epoch": 0.41,
"learning_rate": 3.709442825758875e-06,
"logits/chosen": -0.06990720331668854,
"logits/rejected": 0.01404495257884264,
"logps/chosen": -505.4917907714844,
"logps/rejected": -572.2424926757812,
"loss": 0.2636,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.14204895496368408,
"rewards/margins": 0.050913404673337936,
"rewards/rejected": -0.19296236336231232,
"step": 380
},
{
"epoch": 0.42,
"learning_rate": 3.6270829708916113e-06,
"logits/chosen": -0.040005024522542953,
"logits/rejected": 0.017743710428476334,
"logps/chosen": -548.9078369140625,
"logps/rejected": -573.68408203125,
"loss": 0.2815,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.1460207998752594,
"rewards/margins": 0.041480742394924164,
"rewards/rejected": -0.18750153481960297,
"step": 390
},
{
"epoch": 0.43,
"learning_rate": 3.543157986727991e-06,
"logits/chosen": -0.13051895797252655,
"logits/rejected": 0.012003961019217968,
"logps/chosen": -543.142578125,
"logps/rejected": -594.59326171875,
"loss": 0.2753,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.13953322172164917,
"rewards/margins": 0.0578032024204731,
"rewards/rejected": -0.19733640551567078,
"step": 400
},
{
"epoch": 0.44,
"learning_rate": 3.4577844161089614e-06,
"logits/chosen": -0.03122936561703682,
"logits/rejected": 0.010242189280688763,
"logps/chosen": -564.1265869140625,
"logps/rejected": -600.2008666992188,
"loss": 0.2742,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.14681796729564667,
"rewards/margins": 0.06189022213220596,
"rewards/rejected": -0.20870819687843323,
"step": 410
},
{
"epoch": 0.45,
"learning_rate": 3.3710808134621577e-06,
"logits/chosen": -0.010301386937499046,
"logits/rejected": -0.039281733334064484,
"logps/chosen": -603.3815307617188,
"logps/rejected": -645.9063110351562,
"loss": 0.2761,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.15229162573814392,
"rewards/margins": 0.06186581775546074,
"rewards/rejected": -0.21415743231773376,
"step": 420
},
{
"epoch": 0.46,
"learning_rate": 3.2831675801707126e-06,
"logits/chosen": -0.08892063051462173,
"logits/rejected": -0.0773845762014389,
"logps/chosen": -537.9869995117188,
"logps/rejected": -586.5262451171875,
"loss": 0.2721,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.13618162274360657,
"rewards/margins": 0.06536873430013657,
"rewards/rejected": -0.20155039429664612,
"step": 430
},
{
"epoch": 0.47,
"learning_rate": 3.194166797377289e-06,
"logits/chosen": -0.03688632696866989,
"logits/rejected": -0.00927029736340046,
"logps/chosen": -540.3975219726562,
"logps/rejected": -622.5618896484375,
"loss": 0.2622,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.12767064571380615,
"rewards/margins": 0.07470119744539261,
"rewards/rejected": -0.20237183570861816,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 3.104202056455501e-06,
"logits/chosen": -0.05976264923810959,
"logits/rejected": -0.16253043711185455,
"logps/chosen": -514.6741943359375,
"logps/rejected": -578.7728271484375,
"loss": 0.2588,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.13386496901512146,
"rewards/margins": 0.05132218077778816,
"rewards/rejected": -0.18518713116645813,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 3.013398287384144e-06,
"logits/chosen": -0.15890637040138245,
"logits/rejected": -0.05094796419143677,
"logps/chosen": -516.9619750976562,
"logps/rejected": -604.88525390625,
"loss": 0.2671,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.11203358322381973,
"rewards/margins": 0.08909189701080322,
"rewards/rejected": -0.20112547278404236,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 2.9218815852625717e-06,
"logits/chosen": -0.17138849198818207,
"logits/rejected": -0.08313537389039993,
"logps/chosen": -573.4813232421875,
"logps/rejected": -608.16552734375,
"loss": 0.2695,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.13088415563106537,
"rewards/margins": 0.047856587916612625,
"rewards/rejected": -0.1787407547235489,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 2.829779035208113e-06,
"logits/chosen": -0.12412846088409424,
"logits/rejected": -0.11923656612634659,
"logps/chosen": -472.38726806640625,
"logps/rejected": -527.9046020507812,
"loss": 0.2658,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.09405693411827087,
"rewards/margins": 0.057601846754550934,
"rewards/rejected": -0.1516587734222412,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 2.737218535878705e-06,
"logits/chosen": -0.11355652660131454,
"logits/rejected": -0.06184381991624832,
"logps/chosen": -522.3283081054688,
"logps/rejected": -575.9847412109375,
"loss": 0.2596,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.11173069477081299,
"rewards/margins": 0.06776181608438492,
"rewards/rejected": -0.1794925034046173,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 2.64432862186579e-06,
"logits/chosen": -0.1486915647983551,
"logits/rejected": -0.1126946210861206,
"logps/chosen": -472.10009765625,
"logps/rejected": -565.5072021484375,
"loss": 0.2578,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.10586412250995636,
"rewards/margins": 0.08054044097661972,
"rewards/rejected": -0.1864045411348343,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 2.551238285204126e-06,
"logits/chosen": -0.07071704417467117,
"logits/rejected": -0.06895752251148224,
"logps/chosen": -553.7777709960938,
"logps/rejected": -641.0071411132812,
"loss": 0.2755,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.12443629652261734,
"rewards/margins": 0.08012167364358902,
"rewards/rejected": -0.20455794036388397,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 2.4580767962463688e-06,
"logits/chosen": -0.07696928828954697,
"logits/rejected": 0.0007179826498031616,
"logps/chosen": -548.6107788085938,
"logps/rejected": -587.250732421875,
"loss": 0.2776,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.14552563428878784,
"rewards/margins": 0.04918716475367546,
"rewards/rejected": -0.1947127878665924,
"step": 520
},
{
"epoch": 0.57,
"learning_rate": 2.3649735241511546e-06,
"logits/chosen": -0.07807435840368271,
"logits/rejected": 0.01831636391580105,
"logps/chosen": -556.7833251953125,
"logps/rejected": -576.1497192382812,
"loss": 0.2592,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.13193278014659882,
"rewards/margins": 0.05105576664209366,
"rewards/rejected": -0.18298853933811188,
"step": 530
},
{
"epoch": 0.58,
"learning_rate": 2.2720577572339914e-06,
"logits/chosen": -0.014492440037429333,
"logits/rejected": -0.10470409691333771,
"logps/chosen": -520.7554931640625,
"logps/rejected": -621.583251953125,
"loss": 0.2589,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.1109011173248291,
"rewards/margins": 0.08521705865859985,
"rewards/rejected": -0.19611816108226776,
"step": 540
},
{
"epoch": 0.59,
"learning_rate": 2.1794585234303995e-06,
"logits/chosen": -0.09592770040035248,
"logits/rejected": 0.03173860162496567,
"logps/chosen": -527.00537109375,
"logps/rejected": -587.9857177734375,
"loss": 0.2698,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.13213616609573364,
"rewards/margins": 0.07161318510770798,
"rewards/rejected": -0.2037493735551834,
"step": 550
},
{
"epoch": 0.6,
"learning_rate": 2.0873044111206407e-06,
"logits/chosen": -0.08211179822683334,
"logits/rejected": -0.07464434206485748,
"logps/chosen": -526.9556884765625,
"logps/rejected": -597.501708984375,
"loss": 0.263,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.12764397263526917,
"rewards/margins": 0.07685311138629913,
"rewards/rejected": -0.2044970691204071,
"step": 560
},
{
"epoch": 0.61,
"learning_rate": 1.9957233905648293e-06,
"logits/chosen": -0.17138975858688354,
"logits/rejected": 0.06973910331726074,
"logps/chosen": -606.2980346679688,
"logps/rejected": -656.0491943359375,
"loss": 0.2673,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.13349631428718567,
"rewards/margins": 0.08181539922952652,
"rewards/rejected": -0.2153116911649704,
"step": 570
},
{
"epoch": 0.62,
"learning_rate": 1.904842636196402e-06,
"logits/chosen": -0.12237439304590225,
"logits/rejected": 0.0009635284659452736,
"logps/chosen": -486.32818603515625,
"logps/rejected": -522.0555419921875,
"loss": 0.2611,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09265846014022827,
"rewards/margins": 0.06704847514629364,
"rewards/rejected": -0.15970692038536072,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 1.814788350020726e-06,
"logits/chosen": -0.0846022367477417,
"logits/rejected": 0.020748872309923172,
"logps/chosen": -515.6177368164062,
"logps/rejected": -512.121337890625,
"loss": 0.288,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.10487208515405655,
"rewards/margins": 0.0322984978556633,
"rewards/rejected": -0.13717058300971985,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 1.725685586364051e-06,
"logits/chosen": -0.1460862159729004,
"logits/rejected": -0.06654468178749084,
"logps/chosen": -464.4956970214844,
"logps/rejected": -489.59161376953125,
"loss": 0.2786,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.09209474176168442,
"rewards/margins": 0.04379875212907791,
"rewards/rejected": -0.13589349389076233,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 1.6376580782162172e-06,
"logits/chosen": -0.1410539448261261,
"logits/rejected": -0.08074741810560226,
"logps/chosen": -483.46893310546875,
"logps/rejected": -526.5164794921875,
"loss": 0.2829,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09680439531803131,
"rewards/margins": 0.06490226835012436,
"rewards/rejected": -0.16170665621757507,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 1.550828065408227e-06,
"logits/chosen": -0.09655305743217468,
"logits/rejected": -0.0762481540441513,
"logps/chosen": -468.4337463378906,
"logps/rejected": -525.4623413085938,
"loss": 0.2619,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08364450931549072,
"rewards/margins": 0.0691133439540863,
"rewards/rejected": -0.15275785326957703,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 1.4653161248633053e-06,
"logits/chosen": -0.03865772485733032,
"logits/rejected": -0.10341192781925201,
"logps/chosen": -468.88763427734375,
"logps/rejected": -524.7953491210938,
"loss": 0.2589,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08644279092550278,
"rewards/margins": 0.0591856949031353,
"rewards/rejected": -0.14562849700450897,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 1.381241003157162e-06,
"logits/chosen": -0.08597133308649063,
"logits/rejected": 0.0006229489808902144,
"logps/chosen": -556.832275390625,
"logps/rejected": -577.3524780273438,
"loss": 0.2942,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.1141686886548996,
"rewards/margins": 0.03842983394861221,
"rewards/rejected": -0.1525985300540924,
"step": 640
},
{
"epoch": 0.69,
"learning_rate": 1.298719451619979e-06,
"logits/chosen": -0.09541022032499313,
"logits/rejected": -0.05003209039568901,
"logps/chosen": -469.9234313964844,
"logps/rejected": -529.8631591796875,
"loss": 0.2683,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.09767267853021622,
"rewards/margins": 0.054049454629421234,
"rewards/rejected": -0.15172213315963745,
"step": 650
},
{
"epoch": 0.7,
"learning_rate": 1.2178660642091036e-06,
"logits/chosen": -0.2083953619003296,
"logits/rejected": 0.05156536027789116,
"logps/chosen": -587.299560546875,
"logps/rejected": -581.7611694335938,
"loss": 0.2608,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.10773968696594238,
"rewards/margins": 0.06141304969787598,
"rewards/rejected": -0.16915276646614075,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 1.1387931183775821e-06,
"logits/chosen": -0.09526301920413971,
"logits/rejected": -0.06854981184005737,
"logps/chosen": -553.5062255859375,
"logps/rejected": -567.5850219726562,
"loss": 0.2713,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.10804013162851334,
"rewards/margins": 0.06674468517303467,
"rewards/rejected": -0.1747848242521286,
"step": 670
},
{
"epoch": 0.73,
"learning_rate": 1.061610419159532e-06,
"logits/chosen": -0.15921640396118164,
"logits/rejected": -0.22180967032909393,
"logps/chosen": -464.23480224609375,
"logps/rejected": -517.2071533203125,
"loss": 0.2618,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.1085701733827591,
"rewards/margins": 0.06596361845731735,
"rewards/rejected": -0.17453376948833466,
"step": 680
},
{
"epoch": 0.74,
"learning_rate": 9.864251466888364e-07,
"logits/chosen": -0.2035103738307953,
"logits/rejected": -0.13756130635738373,
"logps/chosen": -533.3971557617188,
"logps/rejected": -562.695068359375,
"loss": 0.2758,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.12624426186084747,
"rewards/margins": 0.05400489643216133,
"rewards/rejected": -0.1802491694688797,
"step": 690
},
{
"epoch": 0.75,
"learning_rate": 9.133417073629288e-07,
"logits/chosen": -0.19000104069709778,
"logits/rejected": -0.17901551723480225,
"logps/chosen": -564.2060546875,
"logps/rejected": -620.484619140625,
"loss": 0.2765,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.12581291794776917,
"rewards/margins": 0.06180128455162048,
"rewards/rejected": -0.18761418759822845,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 8.424615888583332e-07,
"logits/chosen": -0.1307680755853653,
"logits/rejected": -0.10078835487365723,
"logps/chosen": -524.8638916015625,
"logps/rejected": -553.104736328125,
"loss": 0.2792,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.13305291533470154,
"rewards/margins": 0.040464796125888824,
"rewards/rejected": -0.17351767420768738,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 7.738832191993092e-07,
"logits/chosen": -0.11268335580825806,
"logits/rejected": -0.05942006781697273,
"logps/chosen": -497.02325439453125,
"logps/rejected": -589.494384765625,
"loss": 0.267,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.11972874402999878,
"rewards/margins": 0.0718456357717514,
"rewards/rejected": -0.19157439470291138,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 7.077018300752917e-07,
"logits/chosen": -0.14758452773094177,
"logits/rejected": -0.0012020498979836702,
"logps/chosen": -520.2757568359375,
"logps/rejected": -585.8750610351562,
"loss": 0.2769,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.11923079192638397,
"rewards/margins": 0.06183774396777153,
"rewards/rejected": -0.1810685396194458,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 6.440093245969342e-07,
"logits/chosen": -0.1631493866443634,
"logits/rejected": -0.14262652397155762,
"logps/chosen": -531.8765869140625,
"logps/rejected": -570.99365234375,
"loss": 0.2753,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.11439726501703262,
"rewards/margins": 0.05860968679189682,
"rewards/rejected": -0.17300695180892944,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 5.828941496744075e-07,
"logits/chosen": -0.13462567329406738,
"logits/rejected": -0.10775252431631088,
"logps/chosen": -528.6737060546875,
"logps/rejected": -559.4716796875,
"loss": 0.2773,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.11941119283437729,
"rewards/margins": 0.06873499602079391,
"rewards/rejected": -0.1881461888551712,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 5.244411731951671e-07,
"logits/chosen": -0.10648471117019653,
"logits/rejected": -0.12774226069450378,
"logps/chosen": -540.2073974609375,
"logps/rejected": -609.1519165039062,
"loss": 0.2717,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.1273835450410843,
"rewards/margins": 0.07286655902862549,
"rewards/rejected": -0.2002500742673874,
"step": 760
},
{
"epoch": 0.82,
"learning_rate": 4.6873156617173594e-07,
"logits/chosen": -0.2068806141614914,
"logits/rejected": -0.1860518753528595,
"logps/chosen": -510.51983642578125,
"logps/rejected": -575.1392822265625,
"loss": 0.2802,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.1164456382393837,
"rewards/margins": 0.06441595405340195,
"rewards/rejected": -0.18086162209510803,
"step": 770
},
{
"epoch": 0.83,
"learning_rate": 4.1584269002318653e-07,
"logits/chosen": -0.23118607699871063,
"logits/rejected": 0.012592856772243977,
"logps/chosen": -493.88201904296875,
"logps/rejected": -535.3624267578125,
"loss": 0.2541,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.10904928296804428,
"rewards/margins": 0.06981517374515533,
"rewards/rejected": -0.17886444926261902,
"step": 780
},
{
"epoch": 0.84,
"learning_rate": 3.658479891468258e-07,
"logits/chosen": -0.13056764006614685,
"logits/rejected": -0.06715533137321472,
"logps/chosen": -526.5809326171875,
"logps/rejected": -599.15771484375,
"loss": 0.2635,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.12021216005086899,
"rewards/margins": 0.07990214973688126,
"rewards/rejected": -0.20011429488658905,
"step": 790
},
{
"epoch": 0.85,
"learning_rate": 3.18816888929272e-07,
"logits/chosen": -0.2064342051744461,
"logits/rejected": -0.1310141682624817,
"logps/chosen": -503.92022705078125,
"logps/rejected": -535.3073120117188,
"loss": 0.2788,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.1168203130364418,
"rewards/margins": 0.04998582974076271,
"rewards/rejected": -0.1668061465024948,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 2.748146993385484e-07,
"logits/chosen": -0.15658999979496002,
"logits/rejected": -0.05465535447001457,
"logps/chosen": -493.89666748046875,
"logps/rejected": -565.4508056640625,
"loss": 0.2593,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.11054392158985138,
"rewards/margins": 0.062444061040878296,
"rewards/rejected": -0.17298798263072968,
"step": 810
},
{
"epoch": 0.87,
"learning_rate": 2.3390252423108077e-07,
"logits/chosen": -0.12076146900653839,
"logits/rejected": -0.07717995345592499,
"logps/chosen": -557.0513305664062,
"logps/rejected": -630.1217041015625,
"loss": 0.26,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.12639924883842468,
"rewards/margins": 0.06895993649959564,
"rewards/rejected": -0.19535920023918152,
"step": 820
},
{
"epoch": 0.89,
"learning_rate": 1.961371764995243e-07,
"logits/chosen": -0.18750372529029846,
"logits/rejected": -0.20195484161376953,
"logps/chosen": -431.05615234375,
"logps/rejected": -488.2513732910156,
"loss": 0.2672,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.0998988226056099,
"rewards/margins": 0.054485417902469635,
"rewards/rejected": -0.15438422560691833,
"step": 830
},
{
"epoch": 0.9,
"learning_rate": 1.61571099179261e-07,
"logits/chosen": -0.17934174835681915,
"logits/rejected": -0.13482218980789185,
"logps/chosen": -465.31268310546875,
"logps/rejected": -541.5718994140625,
"loss": 0.2379,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.11106850951910019,
"rewards/margins": 0.07404305785894394,
"rewards/rejected": -0.18511156737804413,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 1.3025229262312367e-07,
"logits/chosen": -0.16116994619369507,
"logits/rejected": -0.06133908033370972,
"logps/chosen": -567.8401489257812,
"logps/rejected": -617.7798461914062,
"loss": 0.2591,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.12380583584308624,
"rewards/margins": 0.07460357248783112,
"rewards/rejected": -0.19840940833091736,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 1.0222424784546853e-07,
"logits/chosen": -0.09671641886234283,
"logits/rejected": -0.10632093995809555,
"logps/chosen": -526.3566284179688,
"logps/rejected": -554.2623291015625,
"loss": 0.271,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.1161075010895729,
"rewards/margins": 0.05733795836567879,
"rewards/rejected": -0.1734454333782196,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 7.752588612816553e-08,
"logits/chosen": -0.15584774315357208,
"logits/rejected": -0.18242886662483215,
"logps/chosen": -535.07373046875,
"logps/rejected": -580.4075927734375,
"loss": 0.2676,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.12142983824014664,
"rewards/margins": 0.06167648360133171,
"rewards/rejected": -0.18310633301734924,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 5.619150497236991e-08,
"logits/chosen": -0.09849689900875092,
"logits/rejected": -0.10616960376501083,
"logps/chosen": -480.7345275878906,
"logps/rejected": -568.6452026367188,
"loss": 0.2573,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.12242833524942398,
"rewards/margins": 0.07691850513219833,
"rewards/rejected": -0.19934681057929993,
"step": 880
},
{
"epoch": 0.95,
"learning_rate": 3.825073047112743e-08,
"logits/chosen": -0.1931258887052536,
"logits/rejected": -0.14527785778045654,
"logps/chosen": -525.9476318359375,
"logps/rejected": -556.6074829101562,
"loss": 0.2718,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.1265556812286377,
"rewards/margins": 0.05168802663683891,
"rewards/rejected": -0.1782437115907669,
"step": 890
},
{
"epoch": 0.96,
"learning_rate": 2.372847616895685e-08,
"logits/chosen": -0.1395512819290161,
"logits/rejected": -0.15351735055446625,
"logps/chosen": -519.0070190429688,
"logps/rejected": -630.4365234375,
"loss": 0.25,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.12783722579479218,
"rewards/margins": 0.08357492834329605,
"rewards/rejected": -0.21141216158866882,
"step": 900
},
{
"epoch": 0.97,
"learning_rate": 1.264490846553279e-08,
"logits/chosen": -0.1655835509300232,
"logits/rejected": -0.06148504465818405,
"logps/chosen": -530.4078979492188,
"logps/rejected": -588.1494140625,
"loss": 0.2773,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.13116273283958435,
"rewards/margins": 0.060104191303253174,
"rewards/rejected": -0.19126692414283752,
"step": 910
},
{
"epoch": 0.98,
"learning_rate": 5.015418611516165e-09,
"logits/chosen": -0.13306137919425964,
"logits/rejected": -0.1015244722366333,
"logps/chosen": -540.9669189453125,
"logps/rejected": -633.6178588867188,
"loss": 0.2619,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.13009101152420044,
"rewards/margins": 0.07983705401420593,
"rewards/rejected": -0.20992806553840637,
"step": 920
},
{
"epoch": 0.99,
"learning_rate": 8.506013354186993e-10,
"logits/chosen": -0.16086629033088684,
"logits/rejected": -0.07110301405191422,
"logps/chosen": -540.5888061523438,
"logps/rejected": -531.308349609375,
"loss": 0.2795,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.11827180534601212,
"rewards/margins": 0.032559461891651154,
"rewards/rejected": -0.15083125233650208,
"step": 930
},
{
"epoch": 1.0,
"step": 937,
"total_flos": 0.0,
"train_loss": 0.275421927202982,
"train_runtime": 7850.8319,
"train_samples_per_second": 3.821,
"train_steps_per_second": 0.119
}
],
"logging_steps": 10,
"max_steps": 937,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}