|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": 0.06642268598079681, |
|
"logits/rejected": 0.23397813737392426, |
|
"logps/chosen": -587.28369140625, |
|
"logps/rejected": -568.082763671875, |
|
"loss": 0.279, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": 0.16077889502048492, |
|
"logits/rejected": 0.28465068340301514, |
|
"logps/chosen": -462.99114990234375, |
|
"logps/rejected": -441.3485107421875, |
|
"loss": 0.303, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.0005749252159148455, |
|
"rewards/margins": 0.0002628265065141022, |
|
"rewards/rejected": -0.0008377517224289477, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.2244517058134079, |
|
"logits/rejected": 0.2146037071943283, |
|
"logps/chosen": -456.9951171875, |
|
"logps/rejected": -442.496826171875, |
|
"loss": 0.3007, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.0011023276019841433, |
|
"rewards/margins": -6.420163117581978e-05, |
|
"rewards/rejected": -0.0010381259489804506, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": 0.34518542885780334, |
|
"logits/rejected": 0.15579931437969208, |
|
"logps/chosen": -414.3968200683594, |
|
"logps/rejected": -407.1432800292969, |
|
"loss": 0.3064, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0004762631724588573, |
|
"rewards/margins": 6.935702549526468e-05, |
|
"rewards/rejected": -0.0005456201615743339, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.19979876279830933, |
|
"logits/rejected": 0.1475386917591095, |
|
"logps/chosen": -387.1222839355469, |
|
"logps/rejected": -380.3912048339844, |
|
"loss": 0.3173, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 8.398960198974237e-05, |
|
"rewards/margins": 0.0001377248700009659, |
|
"rewards/rejected": -5.3735253459308296e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": 0.20050282776355743, |
|
"logits/rejected": 0.2853023409843445, |
|
"logps/chosen": -405.1722717285156, |
|
"logps/rejected": -433.797119140625, |
|
"loss": 0.2969, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.00031604920513927937, |
|
"rewards/margins": 9.358949318993837e-05, |
|
"rewards/rejected": -0.0004096386837773025, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.22685687243938446, |
|
"logits/rejected": 0.2761882245540619, |
|
"logps/chosen": -418.8284606933594, |
|
"logps/rejected": -444.300537109375, |
|
"loss": 0.2916, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0013369970256462693, |
|
"rewards/margins": 0.00045625813072547317, |
|
"rewards/rejected": -0.0017932550981640816, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": 0.1869155615568161, |
|
"logits/rejected": 0.2700553834438324, |
|
"logps/chosen": -443.9104919433594, |
|
"logps/rejected": -423.21075439453125, |
|
"loss": 0.2976, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.003828343003988266, |
|
"rewards/margins": 0.0006897930870763958, |
|
"rewards/rejected": -0.004518135450780392, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.23689034581184387, |
|
"logits/rejected": 0.21069273352622986, |
|
"logps/chosen": -386.25067138671875, |
|
"logps/rejected": -387.7801818847656, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.003951665014028549, |
|
"rewards/margins": 0.0011586709879338741, |
|
"rewards/rejected": -0.005110335536301136, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": 0.24118606746196747, |
|
"logits/rejected": 0.25480058789253235, |
|
"logps/chosen": -412.43499755859375, |
|
"logps/rejected": -408.15802001953125, |
|
"loss": 0.3065, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.010140495374798775, |
|
"rewards/margins": 0.001615689368918538, |
|
"rewards/rejected": -0.01175618451088667, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": 0.1671404391527176, |
|
"logits/rejected": 0.2540619969367981, |
|
"logps/chosen": -389.2574157714844, |
|
"logps/rejected": -388.87408447265625, |
|
"loss": 0.3053, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.022764097899198532, |
|
"rewards/margins": 0.0024351924657821655, |
|
"rewards/rejected": -0.02519928850233555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": 0.23305337131023407, |
|
"logits/rejected": 0.22437167167663574, |
|
"logps/chosen": -441.59771728515625, |
|
"logps/rejected": -446.51971435546875, |
|
"loss": 0.3081, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.04231434687972069, |
|
"rewards/margins": 0.003846729639917612, |
|
"rewards/rejected": -0.04616107791662216, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": 0.17237094044685364, |
|
"logits/rejected": 0.20950445532798767, |
|
"logps/chosen": -497.03741455078125, |
|
"logps/rejected": -493.6482849121094, |
|
"loss": 0.3001, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.06084052473306656, |
|
"rewards/margins": 0.009210348129272461, |
|
"rewards/rejected": -0.07005088031291962, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": 0.12596510350704193, |
|
"logits/rejected": 0.18595007061958313, |
|
"logps/chosen": -534.695556640625, |
|
"logps/rejected": -506.4364318847656, |
|
"loss": 0.285, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09366725385189056, |
|
"rewards/margins": 0.009929810650646687, |
|
"rewards/rejected": -0.10359706729650497, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": 0.12804082036018372, |
|
"logits/rejected": 0.10300163924694061, |
|
"logps/chosen": -515.469970703125, |
|
"logps/rejected": -508.8036193847656, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11144615709781647, |
|
"rewards/margins": 0.012134796939790249, |
|
"rewards/rejected": -0.1235809326171875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": 0.16220004856586456, |
|
"logits/rejected": 0.08028533309698105, |
|
"logps/chosen": -479.33184814453125, |
|
"logps/rejected": -524.7337646484375, |
|
"loss": 0.2802, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1306999772787094, |
|
"rewards/margins": 0.03233319893479347, |
|
"rewards/rejected": -0.16303318738937378, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": 0.18928228318691254, |
|
"logits/rejected": 0.14869533479213715, |
|
"logps/chosen": -573.3798828125, |
|
"logps/rejected": -610.5779418945312, |
|
"loss": 0.2894, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.168126180768013, |
|
"rewards/margins": 0.04517129063606262, |
|
"rewards/rejected": -0.21329745650291443, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": 0.062172818928956985, |
|
"logits/rejected": 0.019718164578080177, |
|
"logps/chosen": -475.08551025390625, |
|
"logps/rejected": -561.6641845703125, |
|
"loss": 0.2631, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.1646568328142166, |
|
"rewards/margins": 0.06741134822368622, |
|
"rewards/rejected": -0.23206815123558044, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": -0.0011120836716145277, |
|
"logits/rejected": 0.13960300385951996, |
|
"logps/chosen": -525.2228393554688, |
|
"logps/rejected": -511.014404296875, |
|
"loss": 0.2908, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.14511564373970032, |
|
"rewards/margins": 0.03634321317076683, |
|
"rewards/rejected": -0.18145884573459625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": 0.11656410992145538, |
|
"logits/rejected": 0.08704119175672531, |
|
"logps/chosen": -514.813720703125, |
|
"logps/rejected": -595.0880126953125, |
|
"loss": 0.2617, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.14817455410957336, |
|
"rewards/margins": 0.04434273764491081, |
|
"rewards/rejected": -0.19251729547977448, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": 0.06601261347532272, |
|
"logits/rejected": 0.12538839876651764, |
|
"logps/chosen": -538.46142578125, |
|
"logps/rejected": -592.1277465820312, |
|
"loss": 0.2739, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.15786947309970856, |
|
"rewards/margins": 0.04001317173242569, |
|
"rewards/rejected": -0.19788263738155365, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": 0.04595109820365906, |
|
"logits/rejected": 0.05397043749690056, |
|
"logps/chosen": -542.3662109375, |
|
"logps/rejected": -587.703125, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.15854400396347046, |
|
"rewards/margins": 0.037279583513736725, |
|
"rewards/rejected": -0.19582359492778778, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": 0.06908506900072098, |
|
"logits/rejected": 0.0783570259809494, |
|
"logps/chosen": -543.8673095703125, |
|
"logps/rejected": -552.3768920898438, |
|
"loss": 0.273, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.14275754988193512, |
|
"rewards/margins": 0.04143111780285835, |
|
"rewards/rejected": -0.18418867886066437, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": 0.07219888269901276, |
|
"logits/rejected": 0.0497373566031456, |
|
"logps/chosen": -572.8856201171875, |
|
"logps/rejected": -622.0572509765625, |
|
"loss": 0.2833, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.14573441445827484, |
|
"rewards/margins": 0.06495748460292816, |
|
"rewards/rejected": -0.210691899061203, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -0.10666439682245255, |
|
"logits/rejected": 0.05354728549718857, |
|
"logps/chosen": -541.7586059570312, |
|
"logps/rejected": -586.1435546875, |
|
"loss": 0.2883, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.14508689939975739, |
|
"rewards/margins": 0.048395391553640366, |
|
"rewards/rejected": -0.19348229467868805, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": 0.07505561411380768, |
|
"logits/rejected": -0.051252782344818115, |
|
"logps/chosen": -555.9990844726562, |
|
"logps/rejected": -585.9085083007812, |
|
"loss": 0.2815, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14657357335090637, |
|
"rewards/margins": 0.04433682560920715, |
|
"rewards/rejected": -0.19091038405895233, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.020656492561101913, |
|
"logits/rejected": 0.007626605220139027, |
|
"logps/chosen": -612.7149658203125, |
|
"logps/rejected": -613.846435546875, |
|
"loss": 0.2945, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1504904180765152, |
|
"rewards/margins": 0.03951232135295868, |
|
"rewards/rejected": -0.19000275433063507, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": 0.06314031779766083, |
|
"logits/rejected": 0.05012714862823486, |
|
"logps/chosen": -572.8782958984375, |
|
"logps/rejected": -599.4718627929688, |
|
"loss": 0.286, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11262224614620209, |
|
"rewards/margins": 0.0400107316672802, |
|
"rewards/rejected": -0.15263298153877258, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": 4.419684410095215e-05, |
|
"logits/rejected": 0.1705075055360794, |
|
"logps/chosen": -478.0445251464844, |
|
"logps/rejected": -514.4085693359375, |
|
"loss": 0.2574, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08713702112436295, |
|
"rewards/margins": 0.0530150942504406, |
|
"rewards/rejected": -0.14015211164951324, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.05049672722816467, |
|
"logits/rejected": 0.10665085166692734, |
|
"logps/chosen": -562.5294799804688, |
|
"logps/rejected": -583.0447998046875, |
|
"loss": 0.2866, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12877288460731506, |
|
"rewards/margins": 0.0415426567196846, |
|
"rewards/rejected": -0.17031553387641907, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -0.13402745127677917, |
|
"logits/rejected": 0.04227043688297272, |
|
"logps/chosen": -570.83935546875, |
|
"logps/rejected": -597.737060546875, |
|
"loss": 0.2749, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.14190678298473358, |
|
"rewards/margins": 0.054900676012039185, |
|
"rewards/rejected": -0.19680745899677277, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.07339149713516235, |
|
"logits/rejected": -0.040264565497636795, |
|
"logps/chosen": -619.7767333984375, |
|
"logps/rejected": -682.0263061523438, |
|
"loss": 0.2589, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.16093352437019348, |
|
"rewards/margins": 0.07607638835906982, |
|
"rewards/rejected": -0.2370099127292633, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.028974998742341995, |
|
"logits/rejected": -0.06319359689950943, |
|
"logps/chosen": -549.9708862304688, |
|
"logps/rejected": -637.650390625, |
|
"loss": 0.264, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13445612788200378, |
|
"rewards/margins": 0.07859645038843155, |
|
"rewards/rejected": -0.21305255591869354, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.11449748277664185, |
|
"logits/rejected": -0.03637564182281494, |
|
"logps/chosen": -601.0888671875, |
|
"logps/rejected": -669.8153076171875, |
|
"loss": 0.2698, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1675841063261032, |
|
"rewards/margins": 0.0730680450797081, |
|
"rewards/rejected": -0.2406521737575531, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -0.08815717697143555, |
|
"logits/rejected": 0.015436625108122826, |
|
"logps/chosen": -504.43658447265625, |
|
"logps/rejected": -530.493408203125, |
|
"loss": 0.2631, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.11692949384450912, |
|
"rewards/margins": 0.04244539141654968, |
|
"rewards/rejected": -0.1593748927116394, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -0.061802517622709274, |
|
"logits/rejected": 0.09361619502305984, |
|
"logps/chosen": -579.513671875, |
|
"logps/rejected": -639.0809936523438, |
|
"loss": 0.2755, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.16077642142772675, |
|
"rewards/margins": 0.05602121353149414, |
|
"rewards/rejected": -0.2167976200580597, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.10564370453357697, |
|
"logits/rejected": 0.02583186700940132, |
|
"logps/chosen": -590.0396728515625, |
|
"logps/rejected": -647.909423828125, |
|
"loss": 0.2786, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.15724439918994904, |
|
"rewards/margins": 0.0704963356256485, |
|
"rewards/rejected": -0.22774071991443634, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.10458900034427643, |
|
"logits/rejected": -0.004534685518592596, |
|
"logps/chosen": -542.1465454101562, |
|
"logps/rejected": -573.3040771484375, |
|
"loss": 0.2668, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.1385766863822937, |
|
"rewards/margins": 0.05575231835246086, |
|
"rewards/rejected": -0.19432899355888367, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.06990720331668854, |
|
"logits/rejected": 0.01404495257884264, |
|
"logps/chosen": -505.4917907714844, |
|
"logps/rejected": -572.2424926757812, |
|
"loss": 0.2636, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14204895496368408, |
|
"rewards/margins": 0.050913404673337936, |
|
"rewards/rejected": -0.19296236336231232, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.040005024522542953, |
|
"logits/rejected": 0.017743710428476334, |
|
"logps/chosen": -548.9078369140625, |
|
"logps/rejected": -573.68408203125, |
|
"loss": 0.2815, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.1460207998752594, |
|
"rewards/margins": 0.041480742394924164, |
|
"rewards/rejected": -0.18750153481960297, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -0.13051895797252655, |
|
"logits/rejected": 0.012003961019217968, |
|
"logps/chosen": -543.142578125, |
|
"logps/rejected": -594.59326171875, |
|
"loss": 0.2753, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13953322172164917, |
|
"rewards/margins": 0.0578032024204731, |
|
"rewards/rejected": -0.19733640551567078, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": -0.03122936561703682, |
|
"logits/rejected": 0.010242189280688763, |
|
"logps/chosen": -564.1265869140625, |
|
"logps/rejected": -600.2008666992188, |
|
"loss": 0.2742, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.14681796729564667, |
|
"rewards/margins": 0.06189022213220596, |
|
"rewards/rejected": -0.20870819687843323, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.010301386937499046, |
|
"logits/rejected": -0.039281733334064484, |
|
"logps/chosen": -603.3815307617188, |
|
"logps/rejected": -645.9063110351562, |
|
"loss": 0.2761, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.15229162573814392, |
|
"rewards/margins": 0.06186581775546074, |
|
"rewards/rejected": -0.21415743231773376, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.08892063051462173, |
|
"logits/rejected": -0.0773845762014389, |
|
"logps/chosen": -537.9869995117188, |
|
"logps/rejected": -586.5262451171875, |
|
"loss": 0.2721, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13618162274360657, |
|
"rewards/margins": 0.06536873430013657, |
|
"rewards/rejected": -0.20155039429664612, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.03688632696866989, |
|
"logits/rejected": -0.00927029736340046, |
|
"logps/chosen": -540.3975219726562, |
|
"logps/rejected": -622.5618896484375, |
|
"loss": 0.2622, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12767064571380615, |
|
"rewards/margins": 0.07470119744539261, |
|
"rewards/rejected": -0.20237183570861816, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.05976264923810959, |
|
"logits/rejected": -0.16253043711185455, |
|
"logps/chosen": -514.6741943359375, |
|
"logps/rejected": -578.7728271484375, |
|
"loss": 0.2588, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13386496901512146, |
|
"rewards/margins": 0.05132218077778816, |
|
"rewards/rejected": -0.18518713116645813, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -0.15890637040138245, |
|
"logits/rejected": -0.05094796419143677, |
|
"logps/chosen": -516.9619750976562, |
|
"logps/rejected": -604.88525390625, |
|
"loss": 0.2671, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11203358322381973, |
|
"rewards/margins": 0.08909189701080322, |
|
"rewards/rejected": -0.20112547278404236, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.17138849198818207, |
|
"logits/rejected": -0.08313537389039993, |
|
"logps/chosen": -573.4813232421875, |
|
"logps/rejected": -608.16552734375, |
|
"loss": 0.2695, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13088415563106537, |
|
"rewards/margins": 0.047856587916612625, |
|
"rewards/rejected": -0.1787407547235489, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -0.12412846088409424, |
|
"logits/rejected": -0.11923656612634659, |
|
"logps/chosen": -472.38726806640625, |
|
"logps/rejected": -527.9046020507812, |
|
"loss": 0.2658, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.09405693411827087, |
|
"rewards/margins": 0.057601846754550934, |
|
"rewards/rejected": -0.1516587734222412, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -0.11355652660131454, |
|
"logits/rejected": -0.06184381991624832, |
|
"logps/chosen": -522.3283081054688, |
|
"logps/rejected": -575.9847412109375, |
|
"loss": 0.2596, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.11173069477081299, |
|
"rewards/margins": 0.06776181608438492, |
|
"rewards/rejected": -0.1794925034046173, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.1486915647983551, |
|
"logits/rejected": -0.1126946210861206, |
|
"logps/chosen": -472.10009765625, |
|
"logps/rejected": -565.5072021484375, |
|
"loss": 0.2578, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10586412250995636, |
|
"rewards/margins": 0.08054044097661972, |
|
"rewards/rejected": -0.1864045411348343, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.07071704417467117, |
|
"logits/rejected": -0.06895752251148224, |
|
"logps/chosen": -553.7777709960938, |
|
"logps/rejected": -641.0071411132812, |
|
"loss": 0.2755, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12443629652261734, |
|
"rewards/margins": 0.08012167364358902, |
|
"rewards/rejected": -0.20455794036388397, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.07696928828954697, |
|
"logits/rejected": 0.0007179826498031616, |
|
"logps/chosen": -548.6107788085938, |
|
"logps/rejected": -587.250732421875, |
|
"loss": 0.2776, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14552563428878784, |
|
"rewards/margins": 0.04918716475367546, |
|
"rewards/rejected": -0.1947127878665924, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.07807435840368271, |
|
"logits/rejected": 0.01831636391580105, |
|
"logps/chosen": -556.7833251953125, |
|
"logps/rejected": -576.1497192382812, |
|
"loss": 0.2592, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.13193278014659882, |
|
"rewards/margins": 0.05105576664209366, |
|
"rewards/rejected": -0.18298853933811188, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.014492440037429333, |
|
"logits/rejected": -0.10470409691333771, |
|
"logps/chosen": -520.7554931640625, |
|
"logps/rejected": -621.583251953125, |
|
"loss": 0.2589, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1109011173248291, |
|
"rewards/margins": 0.08521705865859985, |
|
"rewards/rejected": -0.19611816108226776, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.09592770040035248, |
|
"logits/rejected": 0.03173860162496567, |
|
"logps/chosen": -527.00537109375, |
|
"logps/rejected": -587.9857177734375, |
|
"loss": 0.2698, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13213616609573364, |
|
"rewards/margins": 0.07161318510770798, |
|
"rewards/rejected": -0.2037493735551834, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -0.08211179822683334, |
|
"logits/rejected": -0.07464434206485748, |
|
"logps/chosen": -526.9556884765625, |
|
"logps/rejected": -597.501708984375, |
|
"loss": 0.263, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12764397263526917, |
|
"rewards/margins": 0.07685311138629913, |
|
"rewards/rejected": -0.2044970691204071, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.17138975858688354, |
|
"logits/rejected": 0.06973910331726074, |
|
"logps/chosen": -606.2980346679688, |
|
"logps/rejected": -656.0491943359375, |
|
"loss": 0.2673, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13349631428718567, |
|
"rewards/margins": 0.08181539922952652, |
|
"rewards/rejected": -0.2153116911649704, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.12237439304590225, |
|
"logits/rejected": 0.0009635284659452736, |
|
"logps/chosen": -486.32818603515625, |
|
"logps/rejected": -522.0555419921875, |
|
"loss": 0.2611, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09265846014022827, |
|
"rewards/margins": 0.06704847514629364, |
|
"rewards/rejected": -0.15970692038536072, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -0.0846022367477417, |
|
"logits/rejected": 0.020748872309923172, |
|
"logps/chosen": -515.6177368164062, |
|
"logps/rejected": -512.121337890625, |
|
"loss": 0.288, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.10487208515405655, |
|
"rewards/margins": 0.0322984978556633, |
|
"rewards/rejected": -0.13717058300971985, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.1460862159729004, |
|
"logits/rejected": -0.06654468178749084, |
|
"logps/chosen": -464.4956970214844, |
|
"logps/rejected": -489.59161376953125, |
|
"loss": 0.2786, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.09209474176168442, |
|
"rewards/margins": 0.04379875212907791, |
|
"rewards/rejected": -0.13589349389076233, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.1410539448261261, |
|
"logits/rejected": -0.08074741810560226, |
|
"logps/chosen": -483.46893310546875, |
|
"logps/rejected": -526.5164794921875, |
|
"loss": 0.2829, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09680439531803131, |
|
"rewards/margins": 0.06490226835012436, |
|
"rewards/rejected": -0.16170665621757507, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.09655305743217468, |
|
"logits/rejected": -0.0762481540441513, |
|
"logps/chosen": -468.4337463378906, |
|
"logps/rejected": -525.4623413085938, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08364450931549072, |
|
"rewards/margins": 0.0691133439540863, |
|
"rewards/rejected": -0.15275785326957703, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.03865772485733032, |
|
"logits/rejected": -0.10341192781925201, |
|
"logps/chosen": -468.88763427734375, |
|
"logps/rejected": -524.7953491210938, |
|
"loss": 0.2589, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08644279092550278, |
|
"rewards/margins": 0.0591856949031353, |
|
"rewards/rejected": -0.14562849700450897, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.08597133308649063, |
|
"logits/rejected": 0.0006229489808902144, |
|
"logps/chosen": -556.832275390625, |
|
"logps/rejected": -577.3524780273438, |
|
"loss": 0.2942, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1141686886548996, |
|
"rewards/margins": 0.03842983394861221, |
|
"rewards/rejected": -0.1525985300540924, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -0.09541022032499313, |
|
"logits/rejected": -0.05003209039568901, |
|
"logps/chosen": -469.9234313964844, |
|
"logps/rejected": -529.8631591796875, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09767267853021622, |
|
"rewards/margins": 0.054049454629421234, |
|
"rewards/rejected": -0.15172213315963745, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.2083953619003296, |
|
"logits/rejected": 0.05156536027789116, |
|
"logps/chosen": -587.299560546875, |
|
"logps/rejected": -581.7611694335938, |
|
"loss": 0.2608, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.10773968696594238, |
|
"rewards/margins": 0.06141304969787598, |
|
"rewards/rejected": -0.16915276646614075, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -0.09526301920413971, |
|
"logits/rejected": -0.06854981184005737, |
|
"logps/chosen": -553.5062255859375, |
|
"logps/rejected": -567.5850219726562, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.10804013162851334, |
|
"rewards/margins": 0.06674468517303467, |
|
"rewards/rejected": -0.1747848242521286, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -0.15921640396118164, |
|
"logits/rejected": -0.22180967032909393, |
|
"logps/chosen": -464.23480224609375, |
|
"logps/rejected": -517.2071533203125, |
|
"loss": 0.2618, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.1085701733827591, |
|
"rewards/margins": 0.06596361845731735, |
|
"rewards/rejected": -0.17453376948833466, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -0.2035103738307953, |
|
"logits/rejected": -0.13756130635738373, |
|
"logps/chosen": -533.3971557617188, |
|
"logps/rejected": -562.695068359375, |
|
"loss": 0.2758, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.12624426186084747, |
|
"rewards/margins": 0.05400489643216133, |
|
"rewards/rejected": -0.1802491694688797, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.19000104069709778, |
|
"logits/rejected": -0.17901551723480225, |
|
"logps/chosen": -564.2060546875, |
|
"logps/rejected": -620.484619140625, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.12581291794776917, |
|
"rewards/margins": 0.06180128455162048, |
|
"rewards/rejected": -0.18761418759822845, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.1307680755853653, |
|
"logits/rejected": -0.10078835487365723, |
|
"logps/chosen": -524.8638916015625, |
|
"logps/rejected": -553.104736328125, |
|
"loss": 0.2792, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.13305291533470154, |
|
"rewards/margins": 0.040464796125888824, |
|
"rewards/rejected": -0.17351767420768738, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.11268335580825806, |
|
"logits/rejected": -0.05942006781697273, |
|
"logps/chosen": -497.02325439453125, |
|
"logps/rejected": -589.494384765625, |
|
"loss": 0.267, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11972874402999878, |
|
"rewards/margins": 0.0718456357717514, |
|
"rewards/rejected": -0.19157439470291138, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.14758452773094177, |
|
"logits/rejected": -0.0012020498979836702, |
|
"logps/chosen": -520.2757568359375, |
|
"logps/rejected": -585.8750610351562, |
|
"loss": 0.2769, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.11923079192638397, |
|
"rewards/margins": 0.06183774396777153, |
|
"rewards/rejected": -0.1810685396194458, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.1631493866443634, |
|
"logits/rejected": -0.14262652397155762, |
|
"logps/chosen": -531.8765869140625, |
|
"logps/rejected": -570.99365234375, |
|
"loss": 0.2753, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11439726501703262, |
|
"rewards/margins": 0.05860968679189682, |
|
"rewards/rejected": -0.17300695180892944, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.13462567329406738, |
|
"logits/rejected": -0.10775252431631088, |
|
"logps/chosen": -528.6737060546875, |
|
"logps/rejected": -559.4716796875, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.11941119283437729, |
|
"rewards/margins": 0.06873499602079391, |
|
"rewards/rejected": -0.1881461888551712, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.10648471117019653, |
|
"logits/rejected": -0.12774226069450378, |
|
"logps/chosen": -540.2073974609375, |
|
"logps/rejected": -609.1519165039062, |
|
"loss": 0.2717, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1273835450410843, |
|
"rewards/margins": 0.07286655902862549, |
|
"rewards/rejected": -0.2002500742673874, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.2068806141614914, |
|
"logits/rejected": -0.1860518753528595, |
|
"logps/chosen": -510.51983642578125, |
|
"logps/rejected": -575.1392822265625, |
|
"loss": 0.2802, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.1164456382393837, |
|
"rewards/margins": 0.06441595405340195, |
|
"rewards/rejected": -0.18086162209510803, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -0.23118607699871063, |
|
"logits/rejected": 0.012592856772243977, |
|
"logps/chosen": -493.88201904296875, |
|
"logps/rejected": -535.3624267578125, |
|
"loss": 0.2541, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.10904928296804428, |
|
"rewards/margins": 0.06981517374515533, |
|
"rewards/rejected": -0.17886444926261902, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.13056764006614685, |
|
"logits/rejected": -0.06715533137321472, |
|
"logps/chosen": -526.5809326171875, |
|
"logps/rejected": -599.15771484375, |
|
"loss": 0.2635, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12021216005086899, |
|
"rewards/margins": 0.07990214973688126, |
|
"rewards/rejected": -0.20011429488658905, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.2064342051744461, |
|
"logits/rejected": -0.1310141682624817, |
|
"logps/chosen": -503.92022705078125, |
|
"logps/rejected": -535.3073120117188, |
|
"loss": 0.2788, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1168203130364418, |
|
"rewards/margins": 0.04998582974076271, |
|
"rewards/rejected": -0.1668061465024948, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.15658999979496002, |
|
"logits/rejected": -0.05465535447001457, |
|
"logps/chosen": -493.89666748046875, |
|
"logps/rejected": -565.4508056640625, |
|
"loss": 0.2593, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.11054392158985138, |
|
"rewards/margins": 0.062444061040878296, |
|
"rewards/rejected": -0.17298798263072968, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.12076146900653839, |
|
"logits/rejected": -0.07717995345592499, |
|
"logps/chosen": -557.0513305664062, |
|
"logps/rejected": -630.1217041015625, |
|
"loss": 0.26, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12639924883842468, |
|
"rewards/margins": 0.06895993649959564, |
|
"rewards/rejected": -0.19535920023918152, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.18750372529029846, |
|
"logits/rejected": -0.20195484161376953, |
|
"logps/chosen": -431.05615234375, |
|
"logps/rejected": -488.2513732910156, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0998988226056099, |
|
"rewards/margins": 0.054485417902469635, |
|
"rewards/rejected": -0.15438422560691833, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.17934174835681915, |
|
"logits/rejected": -0.13482218980789185, |
|
"logps/chosen": -465.31268310546875, |
|
"logps/rejected": -541.5718994140625, |
|
"loss": 0.2379, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11106850951910019, |
|
"rewards/margins": 0.07404305785894394, |
|
"rewards/rejected": -0.18511156737804413, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.16116994619369507, |
|
"logits/rejected": -0.06133908033370972, |
|
"logps/chosen": -567.8401489257812, |
|
"logps/rejected": -617.7798461914062, |
|
"loss": 0.2591, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12380583584308624, |
|
"rewards/margins": 0.07460357248783112, |
|
"rewards/rejected": -0.19840940833091736, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.09671641886234283, |
|
"logits/rejected": -0.10632093995809555, |
|
"logps/chosen": -526.3566284179688, |
|
"logps/rejected": -554.2623291015625, |
|
"loss": 0.271, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.1161075010895729, |
|
"rewards/margins": 0.05733795836567879, |
|
"rewards/rejected": -0.1734454333782196, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.15584774315357208, |
|
"logits/rejected": -0.18242886662483215, |
|
"logps/chosen": -535.07373046875, |
|
"logps/rejected": -580.4075927734375, |
|
"loss": 0.2676, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12142983824014664, |
|
"rewards/margins": 0.06167648360133171, |
|
"rewards/rejected": -0.18310633301734924, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -0.09849689900875092, |
|
"logits/rejected": -0.10616960376501083, |
|
"logps/chosen": -480.7345275878906, |
|
"logps/rejected": -568.6452026367188, |
|
"loss": 0.2573, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.12242833524942398, |
|
"rewards/margins": 0.07691850513219833, |
|
"rewards/rejected": -0.19934681057929993, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -0.1931258887052536, |
|
"logits/rejected": -0.14527785778045654, |
|
"logps/chosen": -525.9476318359375, |
|
"logps/rejected": -556.6074829101562, |
|
"loss": 0.2718, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1265556812286377, |
|
"rewards/margins": 0.05168802663683891, |
|
"rewards/rejected": -0.1782437115907669, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.1395512819290161, |
|
"logits/rejected": -0.15351735055446625, |
|
"logps/chosen": -519.0070190429688, |
|
"logps/rejected": -630.4365234375, |
|
"loss": 0.25, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12783722579479218, |
|
"rewards/margins": 0.08357492834329605, |
|
"rewards/rejected": -0.21141216158866882, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.1655835509300232, |
|
"logits/rejected": -0.06148504465818405, |
|
"logps/chosen": -530.4078979492188, |
|
"logps/rejected": -588.1494140625, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13116273283958435, |
|
"rewards/margins": 0.060104191303253174, |
|
"rewards/rejected": -0.19126692414283752, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -0.13306137919425964, |
|
"logits/rejected": -0.1015244722366333, |
|
"logps/chosen": -540.9669189453125, |
|
"logps/rejected": -633.6178588867188, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13009101152420044, |
|
"rewards/margins": 0.07983705401420593, |
|
"rewards/rejected": -0.20992806553840637, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -0.16086629033088684, |
|
"logits/rejected": -0.07110301405191422, |
|
"logps/chosen": -540.5888061523438, |
|
"logps/rejected": -531.308349609375, |
|
"loss": 0.2795, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.11827180534601212, |
|
"rewards/margins": 0.032559461891651154, |
|
"rewards/rejected": -0.15083125233650208, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.275421927202982, |
|
"train_runtime": 7850.8319, |
|
"train_samples_per_second": 3.821, |
|
"train_steps_per_second": 0.119 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|