|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993998799759952, |
|
"eval_steps": 100, |
|
"global_step": 1249, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -2.6824121475219727, |
|
"logits/rejected": -2.7049124240875244, |
|
"logps/chosen": -275.1597900390625, |
|
"logps/rejected": -271.6430969238281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -2.6062471866607666, |
|
"logits/rejected": -2.633519172668457, |
|
"logps/chosen": -301.788818359375, |
|
"logps/rejected": -324.23992919921875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.0003357415844220668, |
|
"rewards/margins": -0.00043744672439061105, |
|
"rewards/rejected": 0.00010170514724450186, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -2.5865533351898193, |
|
"logits/rejected": -2.589942693710327, |
|
"logps/chosen": -269.1646423339844, |
|
"logps/rejected": -289.1620178222656, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0017813893500715494, |
|
"rewards/margins": -0.00042842660332098603, |
|
"rewards/rejected": -0.0013529626885429025, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.5753893852233887, |
|
"logits/rejected": -2.58815860748291, |
|
"logps/chosen": -291.62603759765625, |
|
"logps/rejected": -311.4107971191406, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0035930208396166563, |
|
"rewards/margins": -0.00022997017367742956, |
|
"rewards/rejected": -0.0033630509860813618, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -2.612914562225342, |
|
"logits/rejected": -2.619166135787964, |
|
"logps/chosen": -264.53851318359375, |
|
"logps/rejected": -273.5920104980469, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.005524822510778904, |
|
"rewards/margins": 0.0004894191515631974, |
|
"rewards/rejected": -0.006014241836965084, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.56376576423645, |
|
"logits/rejected": -2.5438730716705322, |
|
"logps/chosen": -264.94989013671875, |
|
"logps/rejected": -269.7061767578125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.008829522877931595, |
|
"rewards/margins": -0.0001453599688829854, |
|
"rewards/rejected": -0.008684162981808186, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.630518674850464, |
|
"logits/rejected": -2.6339123249053955, |
|
"logps/chosen": -277.233642578125, |
|
"logps/rejected": -296.16107177734375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.013874058611690998, |
|
"rewards/margins": 0.0025573372840881348, |
|
"rewards/rejected": -0.016431394964456558, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.623034954071045, |
|
"logits/rejected": -2.6133813858032227, |
|
"logps/chosen": -280.8143005371094, |
|
"logps/rejected": -286.6803283691406, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.01927858218550682, |
|
"rewards/margins": 0.00032674067188054323, |
|
"rewards/rejected": -0.019605323672294617, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.6520907878875732, |
|
"logits/rejected": -2.6506881713867188, |
|
"logps/chosen": -276.94451904296875, |
|
"logps/rejected": -297.6499328613281, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.017164330929517746, |
|
"rewards/margins": 0.0016351321246474981, |
|
"rewards/rejected": -0.018799465149641037, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.6500167846679688, |
|
"logits/rejected": -2.6388657093048096, |
|
"logps/chosen": -311.24853515625, |
|
"logps/rejected": -316.82427978515625, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.01826111041009426, |
|
"rewards/margins": 0.008519862778484821, |
|
"rewards/rejected": -0.026780972257256508, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.649533271789551, |
|
"logits/rejected": -2.647923231124878, |
|
"logps/chosen": -276.5948486328125, |
|
"logps/rejected": -290.6126708984375, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.025616105645895004, |
|
"rewards/margins": 0.00467633968219161, |
|
"rewards/rejected": -0.030292445793747902, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.6257693767547607, |
|
"eval_logits/rejected": -2.622962474822998, |
|
"eval_logps/chosen": -222.508056640625, |
|
"eval_logps/rejected": -228.3364715576172, |
|
"eval_loss": 0.6922365427017212, |
|
"eval_rewards/accuracies": 0.5236666798591614, |
|
"eval_rewards/chosen": -0.01801561377942562, |
|
"eval_rewards/margins": 0.0019916673190891743, |
|
"eval_rewards/rejected": -0.020007280632853508, |
|
"eval_runtime": 1605.2167, |
|
"eval_samples_per_second": 1.865, |
|
"eval_steps_per_second": 0.234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -2.6500422954559326, |
|
"logits/rejected": -2.6527457237243652, |
|
"logps/chosen": -299.06976318359375, |
|
"logps/rejected": -309.92266845703125, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.026254886761307716, |
|
"rewards/margins": 0.009161447174847126, |
|
"rewards/rejected": -0.03541633114218712, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -2.6758484840393066, |
|
"logits/rejected": -2.6881041526794434, |
|
"logps/chosen": -278.5473327636719, |
|
"logps/rejected": -296.552734375, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.03307301551103592, |
|
"rewards/margins": 0.006715013645589352, |
|
"rewards/rejected": -0.039788030087947845, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999755876225375e-06, |
|
"logits/chosen": -2.6458847522735596, |
|
"logits/rejected": -2.6281027793884277, |
|
"logps/chosen": -294.466796875, |
|
"logps/rejected": -315.05035400390625, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.036008208990097046, |
|
"rewards/margins": 0.011871300637722015, |
|
"rewards/rejected": -0.04787950962781906, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997803172081864e-06, |
|
"logits/chosen": -2.6800694465637207, |
|
"logits/rejected": -2.6806652545928955, |
|
"logps/chosen": -289.0479736328125, |
|
"logps/rejected": -302.6410217285156, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03994801267981529, |
|
"rewards/margins": 0.017537599429488182, |
|
"rewards/rejected": -0.057485610246658325, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9938992891651825e-06, |
|
"logits/chosen": -2.662532091140747, |
|
"logits/rejected": -2.652069568634033, |
|
"logps/chosen": -277.6954040527344, |
|
"logps/rejected": -300.7398986816406, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.05284956097602844, |
|
"rewards/margins": 0.02098439633846283, |
|
"rewards/rejected": -0.07383395731449127, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988047277024456e-06, |
|
"logits/chosen": -2.7222561836242676, |
|
"logits/rejected": -2.732853412628174, |
|
"logps/chosen": -288.4869689941406, |
|
"logps/rejected": -303.99114990234375, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.07921075075864792, |
|
"rewards/margins": 0.024997711181640625, |
|
"rewards/rejected": -0.10420846939086914, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980251707005417e-06, |
|
"logits/chosen": -2.719470500946045, |
|
"logits/rejected": -2.6922366619110107, |
|
"logps/chosen": -307.56341552734375, |
|
"logps/rejected": -315.6242370605469, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.10631716251373291, |
|
"rewards/margins": 0.0096724983304739, |
|
"rewards/rejected": -0.11598964780569077, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970518668679459e-06, |
|
"logits/chosen": -2.7305121421813965, |
|
"logits/rejected": -2.715012311935425, |
|
"logps/chosen": -303.9269104003906, |
|
"logps/rejected": -311.3086853027344, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08977223932743073, |
|
"rewards/margins": 0.02493356727063656, |
|
"rewards/rejected": -0.11470580101013184, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958855765086722e-06, |
|
"logits/chosen": -2.7576236724853516, |
|
"logits/rejected": -2.754725933074951, |
|
"logps/chosen": -283.64117431640625, |
|
"logps/rejected": -293.0472717285156, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.058794498443603516, |
|
"rewards/margins": 0.02001199498772621, |
|
"rewards/rejected": -0.07880649715662003, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945272106796919e-06, |
|
"logits/chosen": -2.7707252502441406, |
|
"logits/rejected": -2.7755463123321533, |
|
"logps/chosen": -284.79974365234375, |
|
"logps/rejected": -300.4746398925781, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07238452136516571, |
|
"rewards/margins": 0.021619267761707306, |
|
"rewards/rejected": -0.09400378167629242, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -2.7632017135620117, |
|
"eval_logits/rejected": -2.7602405548095703, |
|
"eval_logps/chosen": -228.6281280517578, |
|
"eval_logps/rejected": -235.652587890625, |
|
"eval_loss": 0.6873453259468079, |
|
"eval_rewards/accuracies": 0.5546666383743286, |
|
"eval_rewards/chosen": -0.07921627163887024, |
|
"eval_rewards/margins": 0.013952008448541164, |
|
"eval_rewards/rejected": -0.09316828101873398, |
|
"eval_runtime": 1606.4122, |
|
"eval_samples_per_second": 1.864, |
|
"eval_steps_per_second": 0.233, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.929778304792537e-06, |
|
"logits/chosen": -2.7544033527374268, |
|
"logits/rejected": -2.758392333984375, |
|
"logps/chosen": -311.075439453125, |
|
"logps/rejected": -315.38006591796875, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09139387309551239, |
|
"rewards/margins": 0.04242325574159622, |
|
"rewards/rejected": -0.1338171362876892, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912386462179987e-06, |
|
"logits/chosen": -2.783567190170288, |
|
"logits/rejected": -2.779317617416382, |
|
"logps/chosen": -298.64764404296875, |
|
"logps/rejected": -325.9937744140625, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10082165896892548, |
|
"rewards/margins": 0.04564264789223671, |
|
"rewards/rejected": -0.14646431803703308, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893110164735167e-06, |
|
"logits/chosen": -2.8824093341827393, |
|
"logits/rejected": -2.879509449005127, |
|
"logps/chosen": -305.2967224121094, |
|
"logps/rejected": -319.56695556640625, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13284233212471008, |
|
"rewards/margins": 0.03221544623374939, |
|
"rewards/rejected": -0.16505777835845947, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.871964470290823e-06, |
|
"logits/chosen": -2.902723789215088, |
|
"logits/rejected": -2.9203548431396484, |
|
"logps/chosen": -309.1792907714844, |
|
"logps/rejected": -331.7867736816406, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15928030014038086, |
|
"rewards/margins": 0.0632576271891594, |
|
"rewards/rejected": -0.22253794968128204, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.848965896974006e-06, |
|
"logits/chosen": -2.9424166679382324, |
|
"logits/rejected": -2.9350640773773193, |
|
"logps/chosen": -302.81573486328125, |
|
"logps/rejected": -325.1131896972656, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.19259200990200043, |
|
"rewards/margins": 0.042614568024873734, |
|
"rewards/rejected": -0.23520657420158386, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8241324103028055e-06, |
|
"logits/chosen": -3.106982707977295, |
|
"logits/rejected": -3.080066204071045, |
|
"logps/chosen": -312.7992248535156, |
|
"logps/rejected": -328.4107360839844, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.24676008522510529, |
|
"rewards/margins": 0.07433095574378967, |
|
"rewards/rejected": -0.32109108567237854, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.797483409152438e-06, |
|
"logits/chosen": -3.210594892501831, |
|
"logits/rejected": -3.2006404399871826, |
|
"logps/chosen": -308.2965393066406, |
|
"logps/rejected": -333.50439453125, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.33079180121421814, |
|
"rewards/margins": 0.08313147723674774, |
|
"rewards/rejected": -0.4139232635498047, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769039710601669e-06, |
|
"logits/chosen": -3.357815980911255, |
|
"logits/rejected": -3.363719940185547, |
|
"logps/chosen": -316.1602783203125, |
|
"logps/rejected": -338.216552734375, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.42240840196609497, |
|
"rewards/margins": 0.08246298134326935, |
|
"rewards/rejected": -0.5048713088035583, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.738823533671383e-06, |
|
"logits/chosen": -3.4880664348602295, |
|
"logits/rejected": -3.4769935607910156, |
|
"logps/chosen": -351.0909729003906, |
|
"logps/rejected": -368.14385986328125, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5264540314674377, |
|
"rewards/margins": 0.04465585574507713, |
|
"rewards/rejected": -0.5711098909378052, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.706858481968017e-06, |
|
"logits/chosen": -3.45086407661438, |
|
"logits/rejected": -3.455495834350586, |
|
"logps/chosen": -340.1499938964844, |
|
"logps/rejected": -352.09429931640625, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5941203236579895, |
|
"rewards/margins": 0.046401310712099075, |
|
"rewards/rejected": -0.6405216455459595, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -3.506669044494629, |
|
"eval_logits/rejected": -3.501077175140381, |
|
"eval_logps/chosen": -272.03741455078125, |
|
"eval_logps/rejected": -281.38311767578125, |
|
"eval_loss": 0.6839653253555298, |
|
"eval_rewards/accuracies": 0.5473333597183228, |
|
"eval_rewards/chosen": -0.5133091807365417, |
|
"eval_rewards/margins": 0.03716452047228813, |
|
"eval_rewards/rejected": -0.5504736304283142, |
|
"eval_runtime": 1603.1979, |
|
"eval_samples_per_second": 1.868, |
|
"eval_steps_per_second": 0.234, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673169525245416e-06, |
|
"logits/chosen": -3.4355697631835938, |
|
"logits/rejected": -3.412278652191162, |
|
"logps/chosen": -336.8961181640625, |
|
"logps/rejected": -369.5231018066406, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5616176724433899, |
|
"rewards/margins": 0.09071463346481323, |
|
"rewards/rejected": -0.6523322463035583, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.63778297989952e-06, |
|
"logits/chosen": -3.588507890701294, |
|
"logits/rejected": -3.570230484008789, |
|
"logps/chosen": -343.86956787109375, |
|
"logps/rejected": -364.1554260253906, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5944578051567078, |
|
"rewards/margins": 0.08861590176820755, |
|
"rewards/rejected": -0.6830736994743347, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.60072648841109e-06, |
|
"logits/chosen": -3.744804859161377, |
|
"logits/rejected": -3.735940456390381, |
|
"logps/chosen": -364.733642578125, |
|
"logps/rejected": -394.76348876953125, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7255194783210754, |
|
"rewards/margins": 0.18435899913311005, |
|
"rewards/rejected": -0.9098785519599915, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562028997752574e-06, |
|
"logits/chosen": -3.9339213371276855, |
|
"logits/rejected": -3.9240550994873047, |
|
"logps/chosen": -382.0846252441406, |
|
"logps/rejected": -410.460205078125, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9037133455276489, |
|
"rewards/margins": 0.0800480842590332, |
|
"rewards/rejected": -0.9837613105773926, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.521720736775947e-06, |
|
"logits/chosen": -3.9775962829589844, |
|
"logits/rejected": -4.001706123352051, |
|
"logps/chosen": -399.79351806640625, |
|
"logps/rejected": -413.55987548828125, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.040808916091919, |
|
"rewards/margins": 0.1233123168349266, |
|
"rewards/rejected": -1.1641212701797485, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.479833192599198e-06, |
|
"logits/chosen": -3.9514153003692627, |
|
"logits/rejected": -3.929912567138672, |
|
"logps/chosen": -390.7148132324219, |
|
"logps/rejected": -413.5050354003906, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.901436984539032, |
|
"rewards/margins": 0.10929825156927109, |
|
"rewards/rejected": -1.010735273361206, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.436399086009928e-06, |
|
"logits/chosen": -3.7934257984161377, |
|
"logits/rejected": -3.7594401836395264, |
|
"logps/chosen": -366.05413818359375, |
|
"logps/rejected": -387.0696716308594, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7858292460441589, |
|
"rewards/margins": 0.1209360808134079, |
|
"rewards/rejected": -0.9067652821540833, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.391452345905239e-06, |
|
"logits/chosen": -3.6905906200408936, |
|
"logits/rejected": -3.701895236968994, |
|
"logps/chosen": -377.90118408203125, |
|
"logps/rejected": -394.57147216796875, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.8731447458267212, |
|
"rewards/margins": 0.11739823967218399, |
|
"rewards/rejected": -0.990543007850647, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3450280827879125e-06, |
|
"logits/chosen": -3.7442736625671387, |
|
"logits/rejected": -3.769087553024292, |
|
"logps/chosen": -379.1444091796875, |
|
"logps/rejected": -399.7411804199219, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9029915928840637, |
|
"rewards/margins": 0.11410637944936752, |
|
"rewards/rejected": -1.0170979499816895, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.297162561339554e-06, |
|
"logits/chosen": -3.6351356506347656, |
|
"logits/rejected": -3.6010661125183105, |
|
"logps/chosen": -385.5706481933594, |
|
"logps/rejected": -412.40130615234375, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9261330366134644, |
|
"rewards/margins": 0.13883258402347565, |
|
"rewards/rejected": -1.0649656057357788, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -3.7522990703582764, |
|
"eval_logits/rejected": -3.7427072525024414, |
|
"eval_logps/chosen": -306.9489440917969, |
|
"eval_logps/rejected": -319.11859130859375, |
|
"eval_loss": 0.6807547211647034, |
|
"eval_rewards/accuracies": 0.5583333373069763, |
|
"eval_rewards/chosen": -0.862424373626709, |
|
"eval_rewards/margins": 0.06540393829345703, |
|
"eval_rewards/rejected": -0.9278281927108765, |
|
"eval_runtime": 1662.1993, |
|
"eval_samples_per_second": 1.801, |
|
"eval_steps_per_second": 0.226, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.247893172092157e-06, |
|
"logits/chosen": -3.6088128089904785, |
|
"logits/rejected": -3.6062328815460205, |
|
"logps/chosen": -375.0890197753906, |
|
"logps/rejected": -409.6904296875, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.948278546333313, |
|
"rewards/margins": 0.12885506451129913, |
|
"rewards/rejected": -1.0771336555480957, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.197258402220187e-06, |
|
"logits/chosen": -3.6457858085632324, |
|
"logits/rejected": -3.658412218093872, |
|
"logps/chosen": -383.5570068359375, |
|
"logps/rejected": -422.8628845214844, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.950586199760437, |
|
"rewards/margins": 0.34554851055145264, |
|
"rewards/rejected": -1.2961347103118896, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.145297805476023e-06, |
|
"logits/chosen": -3.6771697998046875, |
|
"logits/rejected": -3.6845245361328125, |
|
"logps/chosen": -379.49029541015625, |
|
"logps/rejected": -410.7177734375, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9332340359687805, |
|
"rewards/margins": 0.1954960823059082, |
|
"rewards/rejected": -1.1287301778793335, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.092051971292228e-06, |
|
"logits/chosen": -3.715477705001831, |
|
"logits/rejected": -3.7073216438293457, |
|
"logps/chosen": -379.3064270019531, |
|
"logps/rejected": -404.5085144042969, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9848654866218567, |
|
"rewards/margins": 0.13351285457611084, |
|
"rewards/rejected": -1.1183784008026123, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.037562493074792e-06, |
|
"logits/chosen": -3.800858974456787, |
|
"logits/rejected": -3.8201656341552734, |
|
"logps/chosen": -404.48602294921875, |
|
"logps/rejected": -423.95330810546875, |
|
"loss": 0.6378, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.056680679321289, |
|
"rewards/margins": 0.18673589825630188, |
|
"rewards/rejected": -1.2434165477752686, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.981871935712112e-06, |
|
"logits/chosen": -3.9585278034210205, |
|
"logits/rejected": -3.9149413108825684, |
|
"logps/chosen": -382.9043884277344, |
|
"logps/rejected": -413.051025390625, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.0201416015625, |
|
"rewards/margins": 0.13921280205249786, |
|
"rewards/rejected": -1.1593544483184814, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.925023802325094e-06, |
|
"logits/chosen": -4.015332221984863, |
|
"logits/rejected": -3.9858086109161377, |
|
"logps/chosen": -397.7122802734375, |
|
"logps/rejected": -432.3436584472656, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.045425295829773, |
|
"rewards/margins": 0.2483961582183838, |
|
"rewards/rejected": -1.2938215732574463, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.867062500284342e-06, |
|
"logits/chosen": -4.052220344543457, |
|
"logits/rejected": -4.021946907043457, |
|
"logps/chosen": -385.6368103027344, |
|
"logps/rejected": -421.88824462890625, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0936793088912964, |
|
"rewards/margins": 0.17554286122322083, |
|
"rewards/rejected": -1.2692222595214844, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8080333065209885e-06, |
|
"logits/chosen": -4.067862510681152, |
|
"logits/rejected": -4.076433181762695, |
|
"logps/chosen": -389.60333251953125, |
|
"logps/rejected": -390.48321533203125, |
|
"loss": 0.7053, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.09650719165802, |
|
"rewards/margins": 0.06325383484363556, |
|
"rewards/rejected": -1.1597610712051392, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7479823321582624e-06, |
|
"logits/chosen": -3.942683458328247, |
|
"logits/rejected": -3.9045321941375732, |
|
"logps/chosen": -378.3946228027344, |
|
"logps/rejected": -416.160400390625, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.91758793592453, |
|
"rewards/margins": 0.18643911182880402, |
|
"rewards/rejected": -1.1040270328521729, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -4.11630392074585, |
|
"eval_logits/rejected": -4.106530666351318, |
|
"eval_logps/chosen": -313.0755920410156, |
|
"eval_logps/rejected": -326.7316589355469, |
|
"eval_loss": 0.6783922910690308, |
|
"eval_rewards/accuracies": 0.5580000281333923, |
|
"eval_rewards/chosen": -0.9236910939216614, |
|
"eval_rewards/margins": 0.08026820421218872, |
|
"eval_rewards/rejected": -1.00395929813385, |
|
"eval_runtime": 1601.521, |
|
"eval_samples_per_second": 1.869, |
|
"eval_steps_per_second": 0.234, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.686956486491419e-06, |
|
"logits/chosen": -3.9557979106903076, |
|
"logits/rejected": -3.950779676437378, |
|
"logps/chosen": -388.673583984375, |
|
"logps/rejected": -427.42962646484375, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9443014860153198, |
|
"rewards/margins": 0.30186328291893005, |
|
"rewards/rejected": -1.2461649179458618, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.625003440344166e-06, |
|
"logits/chosen": -4.053747653961182, |
|
"logits/rejected": -4.082505226135254, |
|
"logps/chosen": -371.04339599609375, |
|
"logps/rejected": -383.62921142578125, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9302754402160645, |
|
"rewards/margins": 0.08710617572069168, |
|
"rewards/rejected": -1.0173815488815308, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.562171588830231e-06, |
|
"logits/chosen": -4.011529445648193, |
|
"logits/rejected": -3.9905009269714355, |
|
"logps/chosen": -379.8844299316406, |
|
"logps/rejected": -406.698974609375, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9027377963066101, |
|
"rewards/margins": 0.07473494112491608, |
|
"rewards/rejected": -0.9774727821350098, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4985100135491245e-06, |
|
"logits/chosen": -3.9951107501983643, |
|
"logits/rejected": -3.956082582473755, |
|
"logps/chosen": -383.92889404296875, |
|
"logps/rejected": -426.62347412109375, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8952111005783081, |
|
"rewards/margins": 0.21770212054252625, |
|
"rewards/rejected": -1.1129133701324463, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4340684442456673e-06, |
|
"logits/chosen": -4.0311408042907715, |
|
"logits/rejected": -4.022164821624756, |
|
"logps/chosen": -383.31878662109375, |
|
"logps/rejected": -409.2886047363281, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.953955352306366, |
|
"rewards/margins": 0.1485908329486847, |
|
"rewards/rejected": -1.1025463342666626, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3688972199631974e-06, |
|
"logits/chosen": -4.0242533683776855, |
|
"logits/rejected": -4.047430992126465, |
|
"logps/chosen": -385.8880920410156, |
|
"logps/rejected": -412.330810546875, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9757701754570007, |
|
"rewards/margins": 0.2515910267829895, |
|
"rewards/rejected": -1.2273612022399902, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3030472497208354e-06, |
|
"logits/chosen": -4.045630931854248, |
|
"logits/rejected": -4.000221252441406, |
|
"logps/chosen": -382.03607177734375, |
|
"logps/rejected": -447.540283203125, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.047086477279663, |
|
"rewards/margins": 0.2707362473011017, |
|
"rewards/rejected": -1.3178224563598633, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.236569972745492e-06, |
|
"logits/chosen": -4.149961471557617, |
|
"logits/rejected": -4.139186382293701, |
|
"logps/chosen": -379.27294921875, |
|
"logps/rejected": -405.08392333984375, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.05259108543396, |
|
"rewards/margins": 0.16109797358512878, |
|
"rewards/rejected": -1.213688850402832, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1695173182897126e-06, |
|
"logits/chosen": -4.157639026641846, |
|
"logits/rejected": -4.137297630310059, |
|
"logps/chosen": -400.1607666015625, |
|
"logps/rejected": -438.38275146484375, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2017654180526733, |
|
"rewards/margins": 0.17644351720809937, |
|
"rewards/rejected": -1.378208875656128, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.10194166506673e-06, |
|
"logits/chosen": -4.194487571716309, |
|
"logits/rejected": -4.153265476226807, |
|
"logps/chosen": -393.9078063964844, |
|
"logps/rejected": -442.1280822753906, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0909817218780518, |
|
"rewards/margins": 0.2533242404460907, |
|
"rewards/rejected": -1.3443058729171753, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -4.4554667472839355, |
|
"eval_logits/rejected": -4.444517135620117, |
|
"eval_logps/chosen": -340.80841064453125, |
|
"eval_logps/rejected": -357.2098693847656, |
|
"eval_loss": 0.6766642928123474, |
|
"eval_rewards/accuracies": 0.5630000233650208, |
|
"eval_rewards/chosen": -1.2010191679000854, |
|
"eval_rewards/margins": 0.1077219545841217, |
|
"eval_rewards/rejected": -1.3087410926818848, |
|
"eval_runtime": 1605.4995, |
|
"eval_samples_per_second": 1.865, |
|
"eval_steps_per_second": 0.234, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0338958003344115e-06, |
|
"logits/chosen": -4.3319549560546875, |
|
"logits/rejected": -4.279036045074463, |
|
"logps/chosen": -399.0089416503906, |
|
"logps/rejected": -435.59075927734375, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2491410970687866, |
|
"rewards/margins": 0.2489248812198639, |
|
"rewards/rejected": -1.4980661869049072, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9654328786600823e-06, |
|
"logits/chosen": -4.308805465698242, |
|
"logits/rejected": -4.258028984069824, |
|
"logps/chosen": -398.9922180175781, |
|
"logps/rejected": -443.50775146484375, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2274248600006104, |
|
"rewards/margins": 0.2125413715839386, |
|
"rewards/rejected": -1.4399662017822266, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.896606380398402e-06, |
|
"logits/chosen": -4.377072334289551, |
|
"logits/rejected": -4.414391994476318, |
|
"logps/chosen": -420.06884765625, |
|
"logps/rejected": -445.095458984375, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.3716115951538086, |
|
"rewards/margins": 0.1795826405286789, |
|
"rewards/rejected": -1.551194190979004, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.827470069914772e-06, |
|
"logits/chosen": -4.2714762687683105, |
|
"logits/rejected": -4.23270320892334, |
|
"logps/chosen": -426.16943359375, |
|
"logps/rejected": -454.91278076171875, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.4019109010696411, |
|
"rewards/margins": 0.15336069464683533, |
|
"rewards/rejected": -1.5552715063095093, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7580779535868675e-06, |
|
"logits/chosen": -4.235100746154785, |
|
"logits/rejected": -4.236804008483887, |
|
"logps/chosen": -408.59002685546875, |
|
"logps/rejected": -438.03204345703125, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2248241901397705, |
|
"rewards/margins": 0.1827341616153717, |
|
"rewards/rejected": -1.4075584411621094, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.688484237617129e-06, |
|
"logits/chosen": -4.117595672607422, |
|
"logits/rejected": -4.08989143371582, |
|
"logps/chosen": -397.66925048828125, |
|
"logps/rejected": -433.625732421875, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0825097560882568, |
|
"rewards/margins": 0.2466181069612503, |
|
"rewards/rejected": -1.3291277885437012, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6187432856891585e-06, |
|
"logits/chosen": -4.0606913566589355, |
|
"logits/rejected": -4.052728176116943, |
|
"logps/chosen": -407.27392578125, |
|
"logps/rejected": -445.926025390625, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.151308298110962, |
|
"rewards/margins": 0.19334861636161804, |
|
"rewards/rejected": -1.344657063484192, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.548909576501096e-06, |
|
"logits/chosen": -4.1063737869262695, |
|
"logits/rejected": -4.097175121307373, |
|
"logps/chosen": -412.988037109375, |
|
"logps/rejected": -442.6796875, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.2146575450897217, |
|
"rewards/margins": 0.1937810331583023, |
|
"rewards/rejected": -1.4084386825561523, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4790376612091503e-06, |
|
"logits/chosen": -4.220733642578125, |
|
"logits/rejected": -4.182897090911865, |
|
"logps/chosen": -444.1158142089844, |
|
"logps/rejected": -476.25286865234375, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4591256380081177, |
|
"rewards/margins": 0.2400621622800827, |
|
"rewards/rejected": -1.6991876363754272, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.40918212081453e-06, |
|
"logits/chosen": -4.308882713317871, |
|
"logits/rejected": -4.258685111999512, |
|
"logps/chosen": -410.50714111328125, |
|
"logps/rejected": -470.98712158203125, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4014512300491333, |
|
"rewards/margins": 0.4143608510494232, |
|
"rewards/rejected": -1.8158118724822998, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -4.460686683654785, |
|
"eval_logits/rejected": -4.447921276092529, |
|
"eval_logps/chosen": -376.8887939453125, |
|
"eval_logps/rejected": -395.554931640625, |
|
"eval_loss": 0.680784285068512, |
|
"eval_rewards/accuracies": 0.5633333325386047, |
|
"eval_rewards/chosen": -1.561822772026062, |
|
"eval_rewards/margins": 0.1303686946630478, |
|
"eval_rewards/rejected": -1.6921913623809814, |
|
"eval_runtime": 1600.8511, |
|
"eval_samples_per_second": 1.87, |
|
"eval_steps_per_second": 0.234, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3393975235270654e-06, |
|
"logits/chosen": -4.24044132232666, |
|
"logits/rejected": -4.221581935882568, |
|
"logps/chosen": -451.12353515625, |
|
"logps/rejected": -492.0777893066406, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.5504473447799683, |
|
"rewards/margins": 0.23769822716712952, |
|
"rewards/rejected": -1.788145661354065, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2697383821388153e-06, |
|
"logits/chosen": -4.228194236755371, |
|
"logits/rejected": -4.242656230926514, |
|
"logps/chosen": -432.177001953125, |
|
"logps/rejected": -457.5282287597656, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.4517402648925781, |
|
"rewards/margins": 0.22140760719776154, |
|
"rewards/rejected": -1.6731477975845337, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2002591114409657e-06, |
|
"logits/chosen": -4.1490349769592285, |
|
"logits/rejected": -4.144831657409668, |
|
"logps/chosen": -428.9891662597656, |
|
"logps/rejected": -466.90228271484375, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3725817203521729, |
|
"rewards/margins": 0.26138487458229065, |
|
"rewards/rejected": -1.6339666843414307, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.131013985717285e-06, |
|
"logits/chosen": -4.210469722747803, |
|
"logits/rejected": -4.1584882736206055, |
|
"logps/chosen": -442.4186096191406, |
|
"logps/rejected": -490.4705505371094, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4414502382278442, |
|
"rewards/margins": 0.2493906468153, |
|
"rewards/rejected": -1.690840721130371, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.062057096347338e-06, |
|
"logits/chosen": -4.1790266036987305, |
|
"logits/rejected": -4.148745059967041, |
|
"logps/chosen": -418.1016540527344, |
|
"logps/rejected": -432.9185485839844, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2657266855239868, |
|
"rewards/margins": 0.14516393840312958, |
|
"rewards/rejected": -1.4108905792236328, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9934423095525733e-06, |
|
"logits/chosen": -4.040548324584961, |
|
"logits/rejected": -4.0535197257995605, |
|
"logps/chosen": -413.2955017089844, |
|
"logps/rejected": -438.1127014160156, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1577858924865723, |
|
"rewards/margins": 0.2499733865261078, |
|
"rewards/rejected": -1.4077593088150024, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9252232243182986e-06, |
|
"logits/chosen": -4.132304668426514, |
|
"logits/rejected": -4.086350440979004, |
|
"logps/chosen": -374.2073974609375, |
|
"logps/rejected": -421.70947265625, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0375020503997803, |
|
"rewards/margins": 0.3012349009513855, |
|
"rewards/rejected": -1.3387370109558105, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8574531305244043e-06, |
|
"logits/chosen": -3.9333484172821045, |
|
"logits/rejected": -3.921320676803589, |
|
"logps/chosen": -413.23236083984375, |
|
"logps/rejected": -459.419677734375, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1705034971237183, |
|
"rewards/margins": 0.31472960114479065, |
|
"rewards/rejected": -1.485233187675476, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7901849673175559e-06, |
|
"logits/chosen": -4.004987716674805, |
|
"logits/rejected": -3.96270751953125, |
|
"logps/chosen": -415.433349609375, |
|
"logps/rejected": -450.65447998046875, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1887439489364624, |
|
"rewards/margins": 0.20863094925880432, |
|
"rewards/rejected": -1.3973749876022339, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7234712817573555e-06, |
|
"logits/chosen": -3.995453357696533, |
|
"logits/rejected": -3.992877960205078, |
|
"logps/chosen": -452.4390563964844, |
|
"logps/rejected": -477.8495178222656, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3979339599609375, |
|
"rewards/margins": 0.2388983964920044, |
|
"rewards/rejected": -1.6368324756622314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -4.182389259338379, |
|
"eval_logits/rejected": -4.171318531036377, |
|
"eval_logps/chosen": -357.1873474121094, |
|
"eval_logps/rejected": -375.4593200683594, |
|
"eval_loss": 0.6752617955207825, |
|
"eval_rewards/accuracies": 0.5659999847412109, |
|
"eval_rewards/chosen": -1.3648083209991455, |
|
"eval_rewards/margins": 0.12642760574817657, |
|
"eval_rewards/rejected": -1.4912358522415161, |
|
"eval_runtime": 1604.6041, |
|
"eval_samples_per_second": 1.866, |
|
"eval_steps_per_second": 0.234, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6573641877687936e-06, |
|
"logits/chosen": -4.01367712020874, |
|
"logits/rejected": -3.9943137168884277, |
|
"logps/chosen": -420.02734375, |
|
"logps/rejected": -469.4754333496094, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3465534448623657, |
|
"rewards/margins": 0.27429622411727905, |
|
"rewards/rejected": -1.620849370956421, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.591915325433034e-06, |
|
"logits/chosen": -4.069581031799316, |
|
"logits/rejected": -4.076575756072998, |
|
"logps/chosen": -411.4861755371094, |
|
"logps/rejected": -448.5120544433594, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.289924144744873, |
|
"rewards/margins": 0.3182913661003113, |
|
"rewards/rejected": -1.608215570449829, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5271758206483664e-06, |
|
"logits/chosen": -4.08115291595459, |
|
"logits/rejected": -4.065010070800781, |
|
"logps/chosen": -438.2860412597656, |
|
"logps/rejected": -472.0508728027344, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.458389163017273, |
|
"rewards/margins": 0.23057003319263458, |
|
"rewards/rejected": -1.6889593601226807, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4631962451927966e-06, |
|
"logits/chosen": -3.989629030227661, |
|
"logits/rejected": -3.970881938934326, |
|
"logps/chosen": -431.90972900390625, |
|
"logps/rejected": -473.61505126953125, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3902056217193604, |
|
"rewards/margins": 0.27469927072525024, |
|
"rewards/rejected": -1.6649048328399658, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4000265772195032e-06, |
|
"logits/chosen": -4.167831897735596, |
|
"logits/rejected": -4.113857746124268, |
|
"logps/chosen": -430.6526794433594, |
|
"logps/rejected": -475.681884765625, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3779888153076172, |
|
"rewards/margins": 0.2821890711784363, |
|
"rewards/rejected": -1.6601779460906982, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3377161622160077e-06, |
|
"logits/chosen": -4.1135406494140625, |
|
"logits/rejected": -4.103851795196533, |
|
"logps/chosen": -431.333740234375, |
|
"logps/rejected": -472.18505859375, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4065897464752197, |
|
"rewards/margins": 0.28285306692123413, |
|
"rewards/rejected": -1.6894428730010986, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.276313674457553e-06, |
|
"logits/chosen": -4.243846416473389, |
|
"logits/rejected": -4.234537601470947, |
|
"logps/chosen": -417.1517639160156, |
|
"logps/rejected": -473.24517822265625, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4315975904464722, |
|
"rewards/margins": 0.36709946393966675, |
|
"rewards/rejected": -1.7986972332000732, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2158670789848095e-06, |
|
"logits/chosen": -4.32526159286499, |
|
"logits/rejected": -4.3212785720825195, |
|
"logps/chosen": -463.43548583984375, |
|
"logps/rejected": -504.515380859375, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7021582126617432, |
|
"rewards/margins": 0.33437836170196533, |
|
"rewards/rejected": -2.036536693572998, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1564235941356016e-06, |
|
"logits/chosen": -4.47339391708374, |
|
"logits/rejected": -4.411566257476807, |
|
"logps/chosen": -455.85986328125, |
|
"logps/rejected": -513.0088500976562, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.7953081130981445, |
|
"rewards/margins": 0.3589649796485901, |
|
"rewards/rejected": -2.154273271560669, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0980296546599254e-06, |
|
"logits/chosen": -4.28812313079834, |
|
"logits/rejected": -4.28458309173584, |
|
"logps/chosen": -480.78045654296875, |
|
"logps/rejected": -516.3783569335938, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.8175837993621826, |
|
"rewards/margins": 0.40267056226730347, |
|
"rewards/rejected": -2.220254421234131, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -4.539231300354004, |
|
"eval_logits/rejected": -4.5250043869018555, |
|
"eval_logps/chosen": -389.2689208984375, |
|
"eval_logps/rejected": -409.6128234863281, |
|
"eval_loss": 0.6791353821754456, |
|
"eval_rewards/accuracies": 0.5713333487510681, |
|
"eval_rewards/chosen": -1.6856240034103394, |
|
"eval_rewards/margins": 0.14714699983596802, |
|
"eval_rewards/rejected": -1.8327711820602417, |
|
"eval_runtime": 1601.9204, |
|
"eval_samples_per_second": 1.869, |
|
"eval_steps_per_second": 0.234, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.040730875447083e-06, |
|
"logits/chosen": -4.249950885772705, |
|
"logits/rejected": -4.256103515625, |
|
"logps/chosen": -454.0740661621094, |
|
"logps/rejected": -481.5208435058594, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5884063243865967, |
|
"rewards/margins": 0.2100685089826584, |
|
"rewards/rejected": -1.798474669456482, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.845720158932414e-07, |
|
"logits/chosen": -4.217880725860596, |
|
"logits/rejected": -4.230615615844727, |
|
"logps/chosen": -421.2108459472656, |
|
"logps/rejected": -443.8993225097656, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.46346914768219, |
|
"rewards/margins": 0.2133568972349167, |
|
"rewards/rejected": -1.6768258810043335, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.295969449372796e-07, |
|
"logits/chosen": -4.250065803527832, |
|
"logits/rejected": -4.226481914520264, |
|
"logps/chosen": -418.61883544921875, |
|
"logps/rejected": -455.81170654296875, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3508899211883545, |
|
"rewards/margins": 0.25358742475509644, |
|
"rewards/rejected": -1.6044775247573853, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.758486067922176e-07, |
|
"logits/chosen": -4.2414469718933105, |
|
"logits/rejected": -4.200159549713135, |
|
"logps/chosen": -413.689697265625, |
|
"logps/rejected": -464.592529296875, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3547637462615967, |
|
"rewards/margins": 0.32075968384742737, |
|
"rewards/rejected": -1.6755234003067017, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.233689873990006e-07, |
|
"logits/chosen": -4.249865531921387, |
|
"logits/rejected": -4.214231491088867, |
|
"logps/chosen": -419.63397216796875, |
|
"logps/rejected": -491.7757873535156, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4119402170181274, |
|
"rewards/margins": 0.5434682369232178, |
|
"rewards/rejected": -1.9554083347320557, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.721990816288555e-07, |
|
"logits/chosen": -4.227639198303223, |
|
"logits/rejected": -4.187970161437988, |
|
"logps/chosen": -395.7589416503906, |
|
"logps/rejected": -436.3963928222656, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3264906406402588, |
|
"rewards/margins": 0.2781200408935547, |
|
"rewards/rejected": -1.604610800743103, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.223788612598148e-07, |
|
"logits/chosen": -4.25420618057251, |
|
"logits/rejected": -4.232513427734375, |
|
"logps/chosen": -437.45721435546875, |
|
"logps/rejected": -472.1407165527344, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4481703042984009, |
|
"rewards/margins": 0.1885295808315277, |
|
"rewards/rejected": -1.636699914932251, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.73947243752448e-07, |
|
"logits/chosen": -4.329432964324951, |
|
"logits/rejected": -4.338817596435547, |
|
"logps/chosen": -462.8741760253906, |
|
"logps/rejected": -490.8958435058594, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.6740070581436157, |
|
"rewards/margins": 0.3544973134994507, |
|
"rewards/rejected": -2.0285041332244873, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.269420618491759e-07, |
|
"logits/chosen": -4.2786760330200195, |
|
"logits/rejected": -4.245335578918457, |
|
"logps/chosen": -423.904296875, |
|
"logps/rejected": -463.173095703125, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.5292352437973022, |
|
"rewards/margins": 0.23503565788269043, |
|
"rewards/rejected": -1.7642710208892822, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.814000340209267e-07, |
|
"logits/chosen": -4.25986385345459, |
|
"logits/rejected": -4.194713592529297, |
|
"logps/chosen": -439.45086669921875, |
|
"logps/rejected": -495.15771484375, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.533198356628418, |
|
"rewards/margins": 0.34696659445762634, |
|
"rewards/rejected": -1.8801651000976562, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -4.503380298614502, |
|
"eval_logits/rejected": -4.4899396896362305, |
|
"eval_logps/chosen": -382.5414733886719, |
|
"eval_logps/rejected": -403.2298889160156, |
|
"eval_loss": 0.676650881767273, |
|
"eval_rewards/accuracies": 0.5770000219345093, |
|
"eval_rewards/chosen": -1.618349313735962, |
|
"eval_rewards/margins": 0.15059219300746918, |
|
"eval_rewards/rejected": -1.7689412832260132, |
|
"eval_runtime": 1634.1825, |
|
"eval_samples_per_second": 1.832, |
|
"eval_steps_per_second": 0.229, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.373567357842111e-07, |
|
"logits/chosen": -4.2339959144592285, |
|
"logits/rejected": -4.205721378326416, |
|
"logps/chosen": -433.2574768066406, |
|
"logps/rejected": -476.2079162597656, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.4623594284057617, |
|
"rewards/margins": 0.33363303542137146, |
|
"rewards/rejected": -1.7959926128387451, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.948465719110226e-07, |
|
"logits/chosen": -4.32258415222168, |
|
"logits/rejected": -4.291365623474121, |
|
"logps/chosen": -420.07330322265625, |
|
"logps/rejected": -451.5908203125, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.45734441280365, |
|
"rewards/margins": 0.24639002978801727, |
|
"rewards/rejected": -1.7037346363067627, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.539027495532766e-07, |
|
"logits/chosen": -4.28394889831543, |
|
"logits/rejected": -4.300284385681152, |
|
"logps/chosen": -417.7969665527344, |
|
"logps/rejected": -456.79180908203125, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4292632341384888, |
|
"rewards/margins": 0.28898704051971436, |
|
"rewards/rejected": -1.718250036239624, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.14557252302783e-07, |
|
"logits/chosen": -4.28948450088501, |
|
"logits/rejected": -4.251836776733398, |
|
"logps/chosen": -435.3079528808594, |
|
"logps/rejected": -474.75006103515625, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.5503196716308594, |
|
"rewards/margins": 0.23696406185626984, |
|
"rewards/rejected": -1.7872836589813232, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7684081520700884e-07, |
|
"logits/chosen": -4.184053897857666, |
|
"logits/rejected": -4.1847357749938965, |
|
"logps/chosen": -452.0301818847656, |
|
"logps/rejected": -472.8980407714844, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5176578760147095, |
|
"rewards/margins": 0.2665901780128479, |
|
"rewards/rejected": -1.7842479944229126, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.407829007601507e-07, |
|
"logits/chosen": -4.219012260437012, |
|
"logits/rejected": -4.17362117767334, |
|
"logps/chosen": -430.40997314453125, |
|
"logps/rejected": -481.66363525390625, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4400875568389893, |
|
"rewards/margins": 0.3422803282737732, |
|
"rewards/rejected": -1.7823677062988281, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.064116758882724e-07, |
|
"logits/chosen": -4.185604572296143, |
|
"logits/rejected": -4.131557941436768, |
|
"logps/chosen": -446.9971618652344, |
|
"logps/rejected": -507.7557067871094, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5457991361618042, |
|
"rewards/margins": 0.41560330986976624, |
|
"rewards/rejected": -1.9614025354385376, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.737539899464908e-07, |
|
"logits/chosen": -4.246437072753906, |
|
"logits/rejected": -4.235133647918701, |
|
"logps/chosen": -405.3433532714844, |
|
"logps/rejected": -456.494140625, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4034773111343384, |
|
"rewards/margins": 0.37458181381225586, |
|
"rewards/rejected": -1.7780590057373047, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4283535374538645e-07, |
|
"logits/chosen": -4.18662691116333, |
|
"logits/rejected": -4.184709072113037, |
|
"logps/chosen": -436.64849853515625, |
|
"logps/rejected": -474.5104064941406, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4191557168960571, |
|
"rewards/margins": 0.30806541442871094, |
|
"rewards/rejected": -1.727220892906189, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1367991962303298e-07, |
|
"logits/chosen": -4.172500133514404, |
|
"logits/rejected": -4.158700942993164, |
|
"logps/chosen": -419.20977783203125, |
|
"logps/rejected": -447.9820251464844, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.5145409107208252, |
|
"rewards/margins": 0.15635111927986145, |
|
"rewards/rejected": -1.6708920001983643, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -4.4235520362854, |
|
"eval_logits/rejected": -4.410409927368164, |
|
"eval_logps/chosen": -377.40155029296875, |
|
"eval_logps/rejected": -397.9567565917969, |
|
"eval_loss": 0.6752503514289856, |
|
"eval_rewards/accuracies": 0.5776666402816772, |
|
"eval_rewards/chosen": -1.5669503211975098, |
|
"eval_rewards/margins": 0.1492597907781601, |
|
"eval_rewards/rejected": -1.7162100076675415, |
|
"eval_runtime": 1708.8293, |
|
"eval_samples_per_second": 1.752, |
|
"eval_steps_per_second": 0.219, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8631046257820278e-07, |
|
"logits/chosen": -4.177311897277832, |
|
"logits/rejected": -4.192288398742676, |
|
"logps/chosen": -433.6348571777344, |
|
"logps/rejected": -464.0807189941406, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4329723119735718, |
|
"rewards/margins": 0.2695394456386566, |
|
"rewards/rejected": -1.7025117874145508, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6074836247950143e-07, |
|
"logits/chosen": -4.152982234954834, |
|
"logits/rejected": -4.155800819396973, |
|
"logps/chosen": -444.71722412109375, |
|
"logps/rejected": -471.8829650878906, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.4441962242126465, |
|
"rewards/margins": 0.21269936859607697, |
|
"rewards/rejected": -1.656895399093628, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.370135873643097e-07, |
|
"logits/chosen": -4.171530723571777, |
|
"logits/rejected": -4.211108684539795, |
|
"logps/chosen": -438.52178955078125, |
|
"logps/rejected": -458.4681701660156, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4780899286270142, |
|
"rewards/margins": 0.28698402643203735, |
|
"rewards/rejected": -1.7650740146636963, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1512467784059372e-07, |
|
"logits/chosen": -4.241854667663574, |
|
"logits/rejected": -4.201731204986572, |
|
"logps/chosen": -400.08306884765625, |
|
"logps/rejected": -440.25250244140625, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.385571837425232, |
|
"rewards/margins": 0.30431246757507324, |
|
"rewards/rejected": -1.6898844242095947, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.509873260376251e-08, |
|
"logits/chosen": -4.173992156982422, |
|
"logits/rejected": -4.1186909675598145, |
|
"logps/chosen": -429.03570556640625, |
|
"logps/rejected": -502.2874450683594, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4350130558013916, |
|
"rewards/margins": 0.4506250321865082, |
|
"rewards/rejected": -1.8856379985809326, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.695139507988559e-08, |
|
"logits/chosen": -4.202746391296387, |
|
"logits/rejected": -4.225730895996094, |
|
"logps/chosen": -442.8606872558594, |
|
"logps/rejected": -478.89776611328125, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4460103511810303, |
|
"rewards/margins": 0.257608562707901, |
|
"rewards/rejected": -1.7036190032958984, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.069684120570684e-08, |
|
"logits/chosen": -4.227932929992676, |
|
"logits/rejected": -4.133112907409668, |
|
"logps/chosen": -433.41192626953125, |
|
"logps/rejected": -483.49383544921875, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.5354173183441162, |
|
"rewards/margins": 0.30013877153396606, |
|
"rewards/rejected": -1.8355562686920166, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.634776835499871e-08, |
|
"logits/chosen": -4.156412124633789, |
|
"logits/rejected": -4.1273393630981445, |
|
"logps/chosen": -413.9664611816406, |
|
"logps/rejected": -457.55877685546875, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4416838884353638, |
|
"rewards/margins": 0.2954239845275879, |
|
"rewards/rejected": -1.7371078729629517, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3915385419908964e-08, |
|
"logits/chosen": -4.120321750640869, |
|
"logits/rejected": -4.165139675140381, |
|
"logps/chosen": -432.268310546875, |
|
"logps/rejected": -465.0489807128906, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4545971155166626, |
|
"rewards/margins": 0.2788635790348053, |
|
"rewards/rejected": -1.7334604263305664, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3409404055043938e-08, |
|
"logits/chosen": -4.251112937927246, |
|
"logits/rejected": -4.216982841491699, |
|
"logps/chosen": -437.51922607421875, |
|
"logps/rejected": -471.106201171875, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.5188627243041992, |
|
"rewards/margins": 0.23063072562217712, |
|
"rewards/rejected": -1.7494935989379883, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -4.397921085357666, |
|
"eval_logits/rejected": -4.384759902954102, |
|
"eval_logps/chosen": -375.8670959472656, |
|
"eval_logps/rejected": -396.2835693359375, |
|
"eval_loss": 0.6749910712242126, |
|
"eval_rewards/accuracies": 0.5773333311080933, |
|
"eval_rewards/chosen": -1.5516059398651123, |
|
"eval_rewards/margins": 0.14787256717681885, |
|
"eval_rewards/rejected": -1.6994785070419312, |
|
"eval_runtime": 1629.1995, |
|
"eval_samples_per_second": 1.838, |
|
"eval_steps_per_second": 0.23, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4838031091134186e-08, |
|
"logits/chosen": -4.187882423400879, |
|
"logits/rejected": -4.119401454925537, |
|
"logps/chosen": -407.01824951171875, |
|
"logps/rejected": -472.28631591796875, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4217784404754639, |
|
"rewards/margins": 0.3864634931087494, |
|
"rewards/rejected": -1.808241844177246, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.207962124201774e-09, |
|
"logits/chosen": -4.208897590637207, |
|
"logits/rejected": -4.17116641998291, |
|
"logps/chosen": -435.45013427734375, |
|
"logps/rejected": -472.75421142578125, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3684009313583374, |
|
"rewards/margins": 0.32645002007484436, |
|
"rewards/rejected": -1.6948511600494385, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.5243762852441023e-09, |
|
"logits/chosen": -4.157495021820068, |
|
"logits/rejected": -4.110323429107666, |
|
"logps/chosen": -427.61651611328125, |
|
"logps/rejected": -471.6131286621094, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.4408422708511353, |
|
"rewards/margins": 0.2736855149269104, |
|
"rewards/rejected": -1.7145278453826904, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.909321945129278e-10, |
|
"logits/chosen": -4.126371383666992, |
|
"logits/rejected": -4.085521221160889, |
|
"logps/chosen": -441.79095458984375, |
|
"logps/rejected": -493.57049560546875, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.3874802589416504, |
|
"rewards/margins": 0.3715583086013794, |
|
"rewards/rejected": -1.7590383291244507, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1249, |
|
"total_flos": 0.0, |
|
"train_loss": 0.647387065536218, |
|
"train_runtime": 42673.6748, |
|
"train_samples_per_second": 0.469, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1249, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|