|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7849442958831787, |
|
"logits/rejected": -2.6491470336914062, |
|
"logps/chosen": -296.04052734375, |
|
"logps/rejected": -290.067138671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.6485776901245117, |
|
"logits/rejected": -2.668163776397705, |
|
"logps/chosen": -278.5010986328125, |
|
"logps/rejected": -242.14556884765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.00010202997509622946, |
|
"rewards/margins": -2.5319219275843352e-05, |
|
"rewards/rejected": 0.00012734916526824236, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.680675506591797, |
|
"logits/rejected": -2.6557466983795166, |
|
"logps/chosen": -276.21734619140625, |
|
"logps/rejected": -246.01904296875, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0011814588215202093, |
|
"rewards/margins": 0.0016449552495032549, |
|
"rewards/rejected": -0.00046349636977538466, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6720199584960938, |
|
"logits/rejected": -2.6549432277679443, |
|
"logps/chosen": -291.7905578613281, |
|
"logps/rejected": -274.3994140625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.005823396146297455, |
|
"rewards/margins": 0.010018911212682724, |
|
"rewards/rejected": -0.0041955155320465565, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.628032684326172, |
|
"logits/rejected": -2.619035482406616, |
|
"logps/chosen": -304.5899963378906, |
|
"logps/rejected": -277.96881103515625, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.029324647039175034, |
|
"rewards/margins": 0.036928076297044754, |
|
"rewards/rejected": -0.007603424601256847, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -2.573026180267334, |
|
"logits/rejected": -2.5565147399902344, |
|
"logps/chosen": -298.73712158203125, |
|
"logps/rejected": -287.00067138671875, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.0008149007335305214, |
|
"rewards/margins": 0.07650883495807648, |
|
"rewards/rejected": -0.07732372730970383, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.531870126724243, |
|
"logits/rejected": -2.4707417488098145, |
|
"logps/chosen": -287.1637268066406, |
|
"logps/rejected": -255.89389038085938, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.04180709645152092, |
|
"rewards/margins": 0.16505756974220276, |
|
"rewards/rejected": -0.20686468482017517, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -2.568704128265381, |
|
"logits/rejected": -2.5438482761383057, |
|
"logps/chosen": -302.0035705566406, |
|
"logps/rejected": -290.6741943359375, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21947097778320312, |
|
"rewards/margins": 0.2124950885772705, |
|
"rewards/rejected": -0.43196606636047363, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -2.531491756439209, |
|
"logits/rejected": -2.5237112045288086, |
|
"logps/chosen": -306.1647033691406, |
|
"logps/rejected": -327.8365478515625, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.19688533246517181, |
|
"rewards/margins": 0.20536482334136963, |
|
"rewards/rejected": -0.40225014090538025, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.4698944091796875, |
|
"logits/rejected": -2.4603869915008545, |
|
"logps/chosen": -288.2850341796875, |
|
"logps/rejected": -302.39886474609375, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.2713998854160309, |
|
"rewards/margins": 0.3568421006202698, |
|
"rewards/rejected": -0.6282418966293335, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -2.0585427284240723, |
|
"logits/rejected": -1.9373886585235596, |
|
"logps/chosen": -344.68841552734375, |
|
"logps/rejected": -376.37469482421875, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.6193090677261353, |
|
"rewards/margins": 0.4864380955696106, |
|
"rewards/rejected": -1.105747103691101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.6453033685684204, |
|
"eval_logits/rejected": -1.5540069341659546, |
|
"eval_logps/chosen": -378.94012451171875, |
|
"eval_logps/rejected": -411.0334777832031, |
|
"eval_loss": 0.577957808971405, |
|
"eval_rewards/accuracies": 0.72817462682724, |
|
"eval_rewards/chosen": -0.9483685493469238, |
|
"eval_rewards/margins": 0.544223964214325, |
|
"eval_rewards/rejected": -1.4925925731658936, |
|
"eval_runtime": 208.6669, |
|
"eval_samples_per_second": 9.585, |
|
"eval_steps_per_second": 0.302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -1.7725579738616943, |
|
"logits/rejected": -1.6586863994598389, |
|
"logps/chosen": -357.3164978027344, |
|
"logps/rejected": -375.01446533203125, |
|
"loss": 0.5672, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.5988589525222778, |
|
"rewards/margins": 0.5000169277191162, |
|
"rewards/rejected": -1.098875880241394, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -1.257874608039856, |
|
"logits/rejected": -1.0021642446517944, |
|
"logps/chosen": -349.68670654296875, |
|
"logps/rejected": -368.8821716308594, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.6418596506118774, |
|
"rewards/margins": 0.6733947992324829, |
|
"rewards/rejected": -1.3152544498443604, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -1.2160699367523193, |
|
"logits/rejected": -0.9747945666313171, |
|
"logps/chosen": -334.8248291015625, |
|
"logps/rejected": -386.0693664550781, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5901191830635071, |
|
"rewards/margins": 0.5741550922393799, |
|
"rewards/rejected": -1.1642743349075317, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -0.9076126217842102, |
|
"logits/rejected": -0.6055578589439392, |
|
"logps/chosen": -373.19189453125, |
|
"logps/rejected": -408.32000732421875, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7667745351791382, |
|
"rewards/margins": 0.648857057094574, |
|
"rewards/rejected": -1.415631651878357, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -0.38298824429512024, |
|
"logits/rejected": -0.04839229956269264, |
|
"logps/chosen": -370.04217529296875, |
|
"logps/rejected": -408.8004150390625, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.8585665822029114, |
|
"rewards/margins": 0.7127848863601685, |
|
"rewards/rejected": -1.571351408958435, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -0.11348650604486465, |
|
"logits/rejected": 0.40840038657188416, |
|
"logps/chosen": -361.98883056640625, |
|
"logps/rejected": -427.78009033203125, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8002462387084961, |
|
"rewards/margins": 0.7850725650787354, |
|
"rewards/rejected": -1.5853188037872314, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -0.36594608426094055, |
|
"logits/rejected": 0.12834864854812622, |
|
"logps/chosen": -341.146728515625, |
|
"logps/rejected": -363.8439025878906, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7850489020347595, |
|
"rewards/margins": 0.5657275915145874, |
|
"rewards/rejected": -1.3507764339447021, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": 0.2961498200893402, |
|
"logits/rejected": 0.45205220580101013, |
|
"logps/chosen": -387.6252746582031, |
|
"logps/rejected": -447.474609375, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1021279096603394, |
|
"rewards/margins": 0.7641929388046265, |
|
"rewards/rejected": -1.8663208484649658, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": 0.20093505084514618, |
|
"logits/rejected": 0.6715067625045776, |
|
"logps/chosen": -378.05401611328125, |
|
"logps/rejected": -456.7447204589844, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.09689462184906, |
|
"rewards/margins": 0.8885995149612427, |
|
"rewards/rejected": -1.9854942560195923, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -0.23716697096824646, |
|
"logits/rejected": 0.3149642050266266, |
|
"logps/chosen": -397.9941711425781, |
|
"logps/rejected": -436.1918029785156, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0792362689971924, |
|
"rewards/margins": 0.7798489928245544, |
|
"rewards/rejected": -1.8590853214263916, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.12909530103206635, |
|
"eval_logits/rejected": 0.39986252784729004, |
|
"eval_logps/chosen": -386.8340759277344, |
|
"eval_logps/rejected": -445.9253845214844, |
|
"eval_loss": 0.5233325958251953, |
|
"eval_rewards/accuracies": 0.7480158805847168, |
|
"eval_rewards/chosen": -1.0273078680038452, |
|
"eval_rewards/margins": 0.8142038583755493, |
|
"eval_rewards/rejected": -1.841511845588684, |
|
"eval_runtime": 209.3065, |
|
"eval_samples_per_second": 9.555, |
|
"eval_steps_per_second": 0.301, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -0.3405931890010834, |
|
"logits/rejected": 0.4394424557685852, |
|
"logps/chosen": -386.2831115722656, |
|
"logps/rejected": -428.07794189453125, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.9731036424636841, |
|
"rewards/margins": 0.7027429938316345, |
|
"rewards/rejected": -1.6758466958999634, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": 0.157635897397995, |
|
"logits/rejected": 0.5433846712112427, |
|
"logps/chosen": -354.6939697265625, |
|
"logps/rejected": -411.32525634765625, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.9413167834281921, |
|
"rewards/margins": 0.7305358648300171, |
|
"rewards/rejected": -1.671852707862854, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": 0.11759161949157715, |
|
"logits/rejected": 0.7300031185150146, |
|
"logps/chosen": -358.34796142578125, |
|
"logps/rejected": -431.40289306640625, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.9373432397842407, |
|
"rewards/margins": 0.8745949864387512, |
|
"rewards/rejected": -1.8119380474090576, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": 0.12542644143104553, |
|
"logits/rejected": 0.8369342088699341, |
|
"logps/chosen": -376.5246276855469, |
|
"logps/rejected": -436.3438415527344, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.1107540130615234, |
|
"rewards/margins": 0.8059130907058716, |
|
"rewards/rejected": -1.9166672229766846, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": 0.2841692566871643, |
|
"logits/rejected": 0.8800110816955566, |
|
"logps/chosen": -380.837890625, |
|
"logps/rejected": -435.38226318359375, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0715522766113281, |
|
"rewards/margins": 0.7880513072013855, |
|
"rewards/rejected": -1.8596036434173584, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": 0.1456121802330017, |
|
"logits/rejected": 0.8049761056900024, |
|
"logps/chosen": -384.6329650878906, |
|
"logps/rejected": -444.5585021972656, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0436666011810303, |
|
"rewards/margins": 0.885493278503418, |
|
"rewards/rejected": -1.9291598796844482, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": 0.13575513660907745, |
|
"logits/rejected": 0.6915581822395325, |
|
"logps/chosen": -405.340576171875, |
|
"logps/rejected": -488.1787109375, |
|
"loss": 0.5023, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.161525011062622, |
|
"rewards/margins": 0.9342771768569946, |
|
"rewards/rejected": -2.095802068710327, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": 0.29470258951187134, |
|
"logits/rejected": 0.8073743581771851, |
|
"logps/chosen": -387.95574951171875, |
|
"logps/rejected": -448.05804443359375, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.1971309185028076, |
|
"rewards/margins": 0.8766487836837769, |
|
"rewards/rejected": -2.073779582977295, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": 0.03936057537794113, |
|
"logits/rejected": 0.7195956110954285, |
|
"logps/chosen": -383.6938781738281, |
|
"logps/rejected": -436.23773193359375, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.0866312980651855, |
|
"rewards/margins": 0.7917761206626892, |
|
"rewards/rejected": -1.8784072399139404, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -0.12395141273736954, |
|
"logits/rejected": 0.7462641000747681, |
|
"logps/chosen": -394.690185546875, |
|
"logps/rejected": -464.9306640625, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9518837928771973, |
|
"rewards/margins": 0.7403639554977417, |
|
"rewards/rejected": -1.692247748374939, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.04246729612350464, |
|
"eval_logits/rejected": 0.7445986866950989, |
|
"eval_logps/chosen": -387.199462890625, |
|
"eval_logps/rejected": -449.6838684082031, |
|
"eval_loss": 0.5109054446220398, |
|
"eval_rewards/accuracies": 0.7599206566810608, |
|
"eval_rewards/chosen": -1.0309618711471558, |
|
"eval_rewards/margins": 0.8481349349021912, |
|
"eval_rewards/rejected": -1.8790968656539917, |
|
"eval_runtime": 208.3757, |
|
"eval_samples_per_second": 9.598, |
|
"eval_steps_per_second": 0.302, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": 0.17005488276481628, |
|
"logits/rejected": 1.0273408889770508, |
|
"logps/chosen": -386.69549560546875, |
|
"logps/rejected": -422.31549072265625, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1317487955093384, |
|
"rewards/margins": 0.818356990814209, |
|
"rewards/rejected": -1.9501060247421265, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": 0.7025367021560669, |
|
"logits/rejected": 1.2278884649276733, |
|
"logps/chosen": -391.64910888671875, |
|
"logps/rejected": -471.31494140625, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -1.1856224536895752, |
|
"rewards/margins": 0.9644185900688171, |
|
"rewards/rejected": -2.150041103363037, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": 0.15242072939872742, |
|
"logits/rejected": 0.7564742565155029, |
|
"logps/chosen": -425.6622619628906, |
|
"logps/rejected": -465.29278564453125, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.2175956964492798, |
|
"rewards/margins": 0.7860982418060303, |
|
"rewards/rejected": -2.0036940574645996, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": 0.1247793436050415, |
|
"logits/rejected": 0.7608783841133118, |
|
"logps/chosen": -374.5887756347656, |
|
"logps/rejected": -422.9578552246094, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -1.023194432258606, |
|
"rewards/margins": 0.6715623140335083, |
|
"rewards/rejected": -1.6947567462921143, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -0.013246958144009113, |
|
"logits/rejected": 0.9326921701431274, |
|
"logps/chosen": -357.45355224609375, |
|
"logps/rejected": -430.1778869628906, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9961371421813965, |
|
"rewards/margins": 0.8724290728569031, |
|
"rewards/rejected": -1.8685661554336548, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 0.26757997274398804, |
|
"logits/rejected": 0.936874508857727, |
|
"logps/chosen": -416.90478515625, |
|
"logps/rejected": -454.53900146484375, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1874934434890747, |
|
"rewards/margins": 0.8222919702529907, |
|
"rewards/rejected": -2.0097854137420654, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": 0.48094987869262695, |
|
"logits/rejected": 1.185869574546814, |
|
"logps/chosen": -426.731201171875, |
|
"logps/rejected": -468.48614501953125, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.1760939359664917, |
|
"rewards/margins": 0.9535791277885437, |
|
"rewards/rejected": -2.1296730041503906, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": 0.14980120956897736, |
|
"logits/rejected": 0.717765212059021, |
|
"logps/chosen": -390.82574462890625, |
|
"logps/rejected": -463.6338806152344, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1005865335464478, |
|
"rewards/margins": 0.8899344205856323, |
|
"rewards/rejected": -1.9905208349227905, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": 0.19815652072429657, |
|
"logits/rejected": 0.8587054014205933, |
|
"logps/chosen": -387.7861022949219, |
|
"logps/rejected": -460.3731994628906, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -1.1267964839935303, |
|
"rewards/margins": 0.7973768711090088, |
|
"rewards/rejected": -1.9241735935211182, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": 0.047690752893686295, |
|
"logits/rejected": 0.9750245809555054, |
|
"logps/chosen": -440.0252990722656, |
|
"logps/rejected": -506.23193359375, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -1.1925568580627441, |
|
"rewards/margins": 1.0378857851028442, |
|
"rewards/rejected": -2.230442762374878, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.06345783174037933, |
|
"eval_logits/rejected": 0.9021896719932556, |
|
"eval_logps/chosen": -397.17987060546875, |
|
"eval_logps/rejected": -468.7184143066406, |
|
"eval_loss": 0.504673957824707, |
|
"eval_rewards/accuracies": 0.7638888955116272, |
|
"eval_rewards/chosen": -1.1307663917541504, |
|
"eval_rewards/margins": 0.9386757016181946, |
|
"eval_rewards/rejected": -2.0694420337677, |
|
"eval_runtime": 208.3315, |
|
"eval_samples_per_second": 9.6, |
|
"eval_steps_per_second": 0.302, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": 0.439073383808136, |
|
"logits/rejected": 1.0371900796890259, |
|
"logps/chosen": -382.6383056640625, |
|
"logps/rejected": -434.5480041503906, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -1.165606141090393, |
|
"rewards/margins": 0.8086501359939575, |
|
"rewards/rejected": -1.974256157875061, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": 0.2883056402206421, |
|
"logits/rejected": 0.8607487678527832, |
|
"logps/chosen": -395.8135681152344, |
|
"logps/rejected": -458.44488525390625, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -1.1481564044952393, |
|
"rewards/margins": 0.8051254153251648, |
|
"rewards/rejected": -1.9532817602157593, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": 0.26005080342292786, |
|
"logits/rejected": 1.015019416809082, |
|
"logps/chosen": -401.0879821777344, |
|
"logps/rejected": -446.59149169921875, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1175105571746826, |
|
"rewards/margins": 0.8690218925476074, |
|
"rewards/rejected": -1.98653244972229, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": 0.05823874473571777, |
|
"logits/rejected": 0.874756932258606, |
|
"logps/chosen": -370.11029052734375, |
|
"logps/rejected": -455.4200134277344, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0955357551574707, |
|
"rewards/margins": 0.9608632326126099, |
|
"rewards/rejected": -2.056398868560791, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": 0.14340977370738983, |
|
"logits/rejected": 0.9994899034500122, |
|
"logps/chosen": -401.8331604003906, |
|
"logps/rejected": -473.13031005859375, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1936233043670654, |
|
"rewards/margins": 0.7889004349708557, |
|
"rewards/rejected": -1.9825239181518555, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": 0.197782963514328, |
|
"logits/rejected": 0.9863063097000122, |
|
"logps/chosen": -387.4159240722656, |
|
"logps/rejected": -460.01123046875, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1579214334487915, |
|
"rewards/margins": 0.9438018798828125, |
|
"rewards/rejected": -2.1017231941223145, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": 0.09159674495458603, |
|
"logits/rejected": 0.8309410214424133, |
|
"logps/chosen": -416.267333984375, |
|
"logps/rejected": -479.33917236328125, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -1.1502647399902344, |
|
"rewards/margins": 0.9664583206176758, |
|
"rewards/rejected": -2.11672306060791, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08043583084202413, |
|
"train_runtime": 2697.0957, |
|
"train_samples_per_second": 22.667, |
|
"train_steps_per_second": 0.177 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|