|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0638297872340426e-07, |
|
"logits/chosen": 0.1359557956457138, |
|
"logits/rejected": 0.030706744641065598, |
|
"logps/chosen": -736.0869140625, |
|
"logps/rejected": -613.6344604492188, |
|
"loss": 2.0331, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.11667777597904205, |
|
"logits/rejected": 0.26604601740837097, |
|
"logps/chosen": -546.5281982421875, |
|
"logps/rejected": -597.5736083984375, |
|
"loss": 2.1592, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.0007250224007293582, |
|
"rewards/margins": 0.00040180076030083, |
|
"rewards/rejected": 0.0003232216986361891, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.16373148560523987, |
|
"logits/rejected": 0.2677033543586731, |
|
"logps/chosen": -604.6590576171875, |
|
"logps/rejected": -649.482177734375, |
|
"loss": 2.0972, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0005862273974344134, |
|
"rewards/margins": -0.0003054165281355381, |
|
"rewards/rejected": -0.0002808108984027058, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.14978544414043427, |
|
"logits/rejected": 0.1915779411792755, |
|
"logps/chosen": -594.8548583984375, |
|
"logps/rejected": -588.2429809570312, |
|
"loss": 2.122, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.004188057966530323, |
|
"rewards/margins": 0.0009490737575106323, |
|
"rewards/rejected": -0.0051371315494179726, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.16862796247005463, |
|
"logits/rejected": 0.23586151003837585, |
|
"logps/chosen": -574.7235107421875, |
|
"logps/rejected": -631.8544921875, |
|
"loss": 2.1863, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.01229151152074337, |
|
"rewards/margins": 0.005582691170275211, |
|
"rewards/rejected": -0.017874203622341156, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999373573764188e-06, |
|
"logits/chosen": 0.1411871314048767, |
|
"logits/rejected": 0.2258455753326416, |
|
"logps/chosen": -612.8582763671875, |
|
"logps/rejected": -636.5026245117188, |
|
"loss": 2.1508, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04941480979323387, |
|
"rewards/margins": 0.019247086718678474, |
|
"rewards/rejected": -0.0686618983745575, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988245838331339e-06, |
|
"logits/chosen": 0.17244111001491547, |
|
"logits/rejected": 0.17342150211334229, |
|
"logps/chosen": -634.6348266601562, |
|
"logps/rejected": -667.5384521484375, |
|
"loss": 2.0758, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13025899231433868, |
|
"rewards/margins": 0.05111612752079964, |
|
"rewards/rejected": -0.18137511610984802, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963268819535228e-06, |
|
"logits/chosen": 0.12650486826896667, |
|
"logits/rejected": 0.14093999564647675, |
|
"logps/chosen": -608.5107421875, |
|
"logps/rejected": -702.1578369140625, |
|
"loss": 2.0556, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.21341009438037872, |
|
"rewards/margins": 0.09893321990966797, |
|
"rewards/rejected": -0.3123432993888855, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9245815365216115e-06, |
|
"logits/chosen": 0.19184628129005432, |
|
"logits/rejected": 0.2408786565065384, |
|
"logps/chosen": -679.4183349609375, |
|
"logps/rejected": -609.7093505859375, |
|
"loss": 2.1137, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.21463651955127716, |
|
"rewards/margins": 0.05772104859352112, |
|
"rewards/rejected": -0.2723575234413147, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872399318152594e-06, |
|
"logits/chosen": 0.1250939965248108, |
|
"logits/rejected": 0.18045032024383545, |
|
"logps/chosen": -622.2333374023438, |
|
"logps/rejected": -655.4575805664062, |
|
"loss": 2.0044, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1839352548122406, |
|
"rewards/margins": 0.10977420955896378, |
|
"rewards/rejected": -0.2937094569206238, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807012604511542e-06, |
|
"logits/chosen": 0.18265239894390106, |
|
"logits/rejected": 0.2614283859729767, |
|
"logps/chosen": -649.8997802734375, |
|
"logps/rejected": -658.8975830078125, |
|
"loss": 1.9995, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.16064395010471344, |
|
"rewards/margins": 0.08805385231971741, |
|
"rewards/rejected": -0.24869783222675323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.728785330347771e-06, |
|
"logits/chosen": 0.2479465901851654, |
|
"logits/rejected": 0.2932817339897156, |
|
"logps/chosen": -674.0836181640625, |
|
"logps/rejected": -645.6417236328125, |
|
"loss": 1.895, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12688389420509338, |
|
"rewards/margins": 0.08782283961772919, |
|
"rewards/rejected": -0.21470670402050018, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.63815289945858e-06, |
|
"logits/chosen": 0.19643843173980713, |
|
"logits/rejected": 0.2974274456501007, |
|
"logps/chosen": -573.49658203125, |
|
"logps/rejected": -666.606689453125, |
|
"loss": 1.89, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.09824337065219879, |
|
"rewards/margins": 0.13982543349266052, |
|
"rewards/rejected": -0.2380688190460205, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535619761282989e-06, |
|
"logits/chosen": 0.23821644484996796, |
|
"logits/rejected": 0.288485586643219, |
|
"logps/chosen": -590.9158935546875, |
|
"logps/rejected": -623.23974609375, |
|
"loss": 1.9389, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.14589470624923706, |
|
"rewards/margins": 0.12624357640743256, |
|
"rewards/rejected": -0.2721382975578308, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42175660319555e-06, |
|
"logits/chosen": 0.2631734013557434, |
|
"logits/rejected": 0.2810806632041931, |
|
"logps/chosen": -645.8680419921875, |
|
"logps/rejected": -654.8004760742188, |
|
"loss": 1.8203, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.20492109656333923, |
|
"rewards/margins": 0.20386295020580292, |
|
"rewards/rejected": -0.40878406167030334, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.297197174127619e-06, |
|
"logits/chosen": 0.2586398422718048, |
|
"logits/rejected": 0.3086986839771271, |
|
"logps/chosen": -619.4220581054688, |
|
"logps/rejected": -697.2005615234375, |
|
"loss": 1.7553, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.24971242249011993, |
|
"rewards/margins": 0.2221045196056366, |
|
"rewards/rejected": -0.4718169569969177, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.162634757195418e-06, |
|
"logits/chosen": 0.2681664526462555, |
|
"logits/rejected": 0.2807798683643341, |
|
"logps/chosen": -630.39306640625, |
|
"logps/rejected": -645.6117553710938, |
|
"loss": 1.8404, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.19384464621543884, |
|
"rewards/margins": 0.1983100175857544, |
|
"rewards/rejected": -0.39215466380119324, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.018818310967843e-06, |
|
"logits/chosen": 0.27496370673179626, |
|
"logits/rejected": 0.30781346559524536, |
|
"logps/chosen": -559.2887573242188, |
|
"logps/rejected": -601.3917846679688, |
|
"loss": 1.8382, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0747746005654335, |
|
"rewards/margins": 0.19791939854621887, |
|
"rewards/rejected": -0.2726939916610718, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.866548300851254e-06, |
|
"logits/chosen": 0.2482290267944336, |
|
"logits/rejected": 0.2852781414985657, |
|
"logps/chosen": -620.8068237304688, |
|
"logps/rejected": -665.9005737304688, |
|
"loss": 1.8229, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10497160255908966, |
|
"rewards/margins": 0.20543234050273895, |
|
"rewards/rejected": -0.3104039430618286, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.706672243793271e-06, |
|
"logits/chosen": 0.2958913743495941, |
|
"logits/rejected": 0.3795389235019684, |
|
"logps/chosen": -611.8587646484375, |
|
"logps/rejected": -658.9635009765625, |
|
"loss": 1.7752, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0870656967163086, |
|
"rewards/margins": 0.23995642364025116, |
|
"rewards/rejected": -0.32702213525772095, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5400799911032357e-06, |
|
"logits/chosen": 0.2935205101966858, |
|
"logits/rejected": 0.3416239321231842, |
|
"logps/chosen": -660.2877197265625, |
|
"logps/rejected": -730.04541015625, |
|
"loss": 1.7351, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1828436255455017, |
|
"rewards/margins": 0.3010478913784027, |
|
"rewards/rejected": -0.4838915765285492, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3676987756445894e-06, |
|
"logits/chosen": 0.24807122349739075, |
|
"logits/rejected": 0.32862648367881775, |
|
"logps/chosen": -605.8773193359375, |
|
"logps/rejected": -641.6677856445312, |
|
"loss": 1.8245, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13868902623653412, |
|
"rewards/margins": 0.2735101878643036, |
|
"rewards/rejected": -0.4121991991996765, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1904880509659397e-06, |
|
"logits/chosen": 0.270724892616272, |
|
"logits/rejected": 0.3151053786277771, |
|
"logps/chosen": -650.7314453125, |
|
"logps/rejected": -708.2312622070312, |
|
"loss": 1.735, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.16527561843395233, |
|
"rewards/margins": 0.2484448254108429, |
|
"rewards/rejected": -0.4137204587459564, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0094341510955697e-06, |
|
"logits/chosen": 0.19233042001724243, |
|
"logits/rejected": 0.29483872652053833, |
|
"logps/chosen": -663.5474243164062, |
|
"logps/rejected": -743.0173950195312, |
|
"loss": 1.7378, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.14797742664813995, |
|
"rewards/margins": 0.3706679344177246, |
|
"rewards/rejected": -0.5186454057693481, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.825544800722376e-06, |
|
"logits/chosen": 0.2124979943037033, |
|
"logits/rejected": 0.3365432620048523, |
|
"logps/chosen": -619.9740600585938, |
|
"logps/rejected": -700.7166748046875, |
|
"loss": 1.8168, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1956629902124405, |
|
"rewards/margins": 0.2987174093723297, |
|
"rewards/rejected": -0.494380384683609, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.639843506318899e-06, |
|
"logits/chosen": 0.2796134054660797, |
|
"logits/rejected": 0.2740449607372284, |
|
"logps/chosen": -582.3416748046875, |
|
"logps/rejected": -674.327880859375, |
|
"loss": 1.8901, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.19822832942008972, |
|
"rewards/margins": 0.19228845834732056, |
|
"rewards/rejected": -0.3905167877674103, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4533638594248094e-06, |
|
"logits/chosen": 0.25897207856178284, |
|
"logits/rejected": 0.31485193967819214, |
|
"logps/chosen": -604.8118896484375, |
|
"logps/rejected": -667.9144897460938, |
|
"loss": 1.8606, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14205999672412872, |
|
"rewards/margins": 0.28450149297714233, |
|
"rewards/rejected": -0.42656150460243225, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2671437837980943e-06, |
|
"logits/chosen": 0.22259187698364258, |
|
"logits/rejected": 0.22855930030345917, |
|
"logps/chosen": -593.6612548828125, |
|
"logps/rejected": -673.6566162109375, |
|
"loss": 1.7486, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14823240041732788, |
|
"rewards/margins": 0.2802043557167053, |
|
"rewards/rejected": -0.4284366965293884, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.082219758453629e-06, |
|
"logits/chosen": 0.2169434130191803, |
|
"logits/rejected": 0.2703471779823303, |
|
"logps/chosen": -611.6048583984375, |
|
"logps/rejected": -682.5806884765625, |
|
"loss": 1.6556, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.11671599000692368, |
|
"rewards/margins": 0.26952022314071655, |
|
"rewards/rejected": -0.3862362205982208, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.899621048743019e-06, |
|
"logits/chosen": 0.22146745026111603, |
|
"logits/rejected": 0.34733515977859497, |
|
"logps/chosen": -603.9933471679688, |
|
"logps/rejected": -673.3649291992188, |
|
"loss": 1.7238, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.20439250767230988, |
|
"rewards/margins": 0.2682177424430847, |
|
"rewards/rejected": -0.4726102352142334, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7203639775848423e-06, |
|
"logits/chosen": 0.19099445641040802, |
|
"logits/rejected": 0.3011043667793274, |
|
"logps/chosen": -606.6263427734375, |
|
"logps/rejected": -639.6136474609375, |
|
"loss": 1.8381, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.17796705663204193, |
|
"rewards/margins": 0.23042461276054382, |
|
"rewards/rejected": -0.40839165449142456, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5454462687309445e-06, |
|
"logits/chosen": 0.2036764919757843, |
|
"logits/rejected": 0.26239025592803955, |
|
"logps/chosen": -602.3845825195312, |
|
"logps/rejected": -666.4627075195312, |
|
"loss": 1.8042, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1518932580947876, |
|
"rewards/margins": 0.2536298632621765, |
|
"rewards/rejected": -0.4055231511592865, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3758414935535147e-06, |
|
"logits/chosen": 0.21739721298217773, |
|
"logits/rejected": 0.2840099334716797, |
|
"logps/chosen": -636.0455322265625, |
|
"logps/rejected": -709.1137084960938, |
|
"loss": 1.65, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.16815349459648132, |
|
"rewards/margins": 0.29733169078826904, |
|
"rewards/rejected": -0.465485155582428, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2124936522614622e-06, |
|
"logits/chosen": 0.20938508212566376, |
|
"logits/rejected": 0.22490420937538147, |
|
"logps/chosen": -615.7994995117188, |
|
"logps/rejected": -669.2200927734375, |
|
"loss": 1.7098, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.18394342064857483, |
|
"rewards/margins": 0.31033387780189514, |
|
"rewards/rejected": -0.49427732825279236, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0563119197063934e-06, |
|
"logits/chosen": 0.23827771842479706, |
|
"logits/rejected": 0.2663131356239319, |
|
"logps/chosen": -612.7750244140625, |
|
"logps/rejected": -685.60107421875, |
|
"loss": 1.7109, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.19161880016326904, |
|
"rewards/margins": 0.26392242312431335, |
|
"rewards/rejected": -0.4555412232875824, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.081655850224449e-07, |
|
"logits/chosen": 0.19827114045619965, |
|
"logits/rejected": 0.2343660295009613, |
|
"logps/chosen": -628.5892333984375, |
|
"logps/rejected": -699.3311767578125, |
|
"loss": 1.6981, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.23514249920845032, |
|
"rewards/margins": 0.30311545729637146, |
|
"rewards/rejected": -0.5382579565048218, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.688792132653111e-07, |
|
"logits/chosen": 0.19120459258556366, |
|
"logits/rejected": 0.2861759066581726, |
|
"logps/chosen": -659.7528076171875, |
|
"logps/rejected": -748.490234375, |
|
"loss": 1.6967, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.19031907618045807, |
|
"rewards/margins": 0.34352895617485046, |
|
"rewards/rejected": -0.533847987651825, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.392280559802341e-07, |
|
"logits/chosen": 0.2406836450099945, |
|
"logits/rejected": 0.23908407986164093, |
|
"logps/chosen": -658.35400390625, |
|
"logps/rejected": -720.8883666992188, |
|
"loss": 1.7368, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.23702308535575867, |
|
"rewards/margins": 0.24957367777824402, |
|
"rewards/rejected": -0.48659682273864746, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.199337362431792e-07, |
|
"logits/chosen": 0.26719361543655396, |
|
"logits/rejected": 0.1743316501379013, |
|
"logps/chosen": -621.3897094726562, |
|
"logps/rejected": -680.0, |
|
"loss": 1.7425, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.22942551970481873, |
|
"rewards/margins": 0.26667481660842896, |
|
"rewards/rejected": -0.49610036611557007, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1166023219176176e-07, |
|
"logits/chosen": 0.21561181545257568, |
|
"logits/rejected": 0.286629855632782, |
|
"logps/chosen": -654.0867919921875, |
|
"logps/rejected": -668.467529296875, |
|
"loss": 1.6798, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21663355827331543, |
|
"rewards/margins": 0.2600599527359009, |
|
"rewards/rejected": -0.4766935408115387, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.150101814011136e-07, |
|
"logits/chosen": 0.16323356330394745, |
|
"logits/rejected": 0.21500280499458313, |
|
"logps/chosen": -600.4713134765625, |
|
"logps/rejected": -730.5057983398438, |
|
"loss": 1.7084, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.19050315022468567, |
|
"rewards/margins": 0.28324562311172485, |
|
"rewards/rejected": -0.47374874353408813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3052152667409289e-07, |
|
"logits/chosen": 0.1962326616048813, |
|
"logits/rejected": 0.22506949305534363, |
|
"logps/chosen": -614.2760009765625, |
|
"logps/rejected": -675.3383178710938, |
|
"loss": 1.7679, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1459427773952484, |
|
"rewards/margins": 0.3252793252468109, |
|
"rewards/rejected": -0.4712221026420593, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5866452191498488e-07, |
|
"logits/chosen": 0.20015636086463928, |
|
"logits/rejected": 0.25162121653556824, |
|
"logps/chosen": -651.9236450195312, |
|
"logps/rejected": -707.2882080078125, |
|
"loss": 1.7514, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.23218846321105957, |
|
"rewards/margins": 0.2290785312652588, |
|
"rewards/rejected": -0.46126699447631836, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.983911475163727e-08, |
|
"logits/chosen": 0.16698592901229858, |
|
"logits/rejected": 0.2591376304626465, |
|
"logps/chosen": -590.045166015625, |
|
"logps/rejected": -642.6705322265625, |
|
"loss": 1.8093, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.17988340556621552, |
|
"rewards/margins": 0.23005299270153046, |
|
"rewards/rejected": -0.4099363684654236, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.437272047405712e-08, |
|
"logits/chosen": 0.1858983337879181, |
|
"logits/rejected": 0.3158418536186218, |
|
"logps/chosen": -559.8682250976562, |
|
"logps/rejected": -648.7040405273438, |
|
"loss": 1.7686, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.19649046659469604, |
|
"rewards/margins": 0.26454511284828186, |
|
"rewards/rejected": -0.4610355794429779, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.251839967945535e-08, |
|
"logits/chosen": 0.13786078989505768, |
|
"logits/rejected": 0.2333669662475586, |
|
"logps/chosen": -645.2703857421875, |
|
"logps/rejected": -707.0418090820312, |
|
"loss": 1.6172, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.17300908267498016, |
|
"rewards/margins": 0.3292023241519928, |
|
"rewards/rejected": -0.5022113919258118, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.453449766758933e-09, |
|
"logits/chosen": 0.1742466688156128, |
|
"logits/rejected": 0.2268284559249878, |
|
"logps/chosen": -576.7985229492188, |
|
"logps/rejected": -652.7803344726562, |
|
"loss": 1.7297, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.24071533977985382, |
|
"rewards/margins": 0.18981412053108215, |
|
"rewards/rejected": -0.4305294454097748, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 468, |
|
"total_flos": 0.0, |
|
"train_loss": 1.8394590188295414, |
|
"train_runtime": 15861.5475, |
|
"train_samples_per_second": 1.891, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|