|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9994765768123528, |
|
"eval_steps": 100, |
|
"global_step": 3820, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3089005235602096e-08, |
|
"logits/chosen": 0.896942138671875, |
|
"logits/rejected": 0.9175108075141907, |
|
"logps/chosen": -192.32028198242188, |
|
"logps/rejected": -193.69876098632812, |
|
"loss": 2500.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3089005235602095e-07, |
|
"logits/chosen": 0.903715968132019, |
|
"logits/rejected": 0.9309377670288086, |
|
"logps/chosen": -253.598876953125, |
|
"logps/rejected": -228.25482177734375, |
|
"loss": 2504.6897, |
|
"rewards/accuracies": 0.3819444477558136, |
|
"rewards/chosen": -0.0001807510998332873, |
|
"rewards/margins": -0.0004412428825162351, |
|
"rewards/rejected": 0.00026049179723486304, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": 0.8256899118423462, |
|
"logits/rejected": 0.9293961524963379, |
|
"logps/chosen": -252.84963989257812, |
|
"logps/rejected": -214.4913330078125, |
|
"loss": 2511.0686, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0004701187717728317, |
|
"rewards/margins": -0.0010407656664028764, |
|
"rewards/rejected": 0.0005706468946300447, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.926701570680629e-07, |
|
"logits/chosen": 0.8985889554023743, |
|
"logits/rejected": 0.8785662651062012, |
|
"logps/chosen": -236.40536499023438, |
|
"logps/rejected": -219.20285034179688, |
|
"loss": 2494.8072, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0004474873130675405, |
|
"rewards/margins": 0.0005769692361354828, |
|
"rewards/rejected": -0.00012948190851602703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": 0.819919228553772, |
|
"logits/rejected": 0.9144619703292847, |
|
"logps/chosen": -252.99588012695312, |
|
"logps/rejected": -225.9224853515625, |
|
"loss": 2504.7604, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0003411176148802042, |
|
"rewards/margins": -0.0004235326196067035, |
|
"rewards/rejected": 8.241502655437216e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.544502617801048e-07, |
|
"logits/chosen": 0.7974398136138916, |
|
"logits/rejected": 0.8803712725639343, |
|
"logps/chosen": -254.3247528076172, |
|
"logps/rejected": -243.318603515625, |
|
"loss": 2498.3947, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.00023676609271205962, |
|
"rewards/margins": 0.0002204025659011677, |
|
"rewards/rejected": -0.00045716846943832934, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": 0.8626053929328918, |
|
"logits/rejected": 0.8485649824142456, |
|
"logps/chosen": -262.6585693359375, |
|
"logps/rejected": -248.63272094726562, |
|
"loss": 2500.4902, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0002292692952323705, |
|
"rewards/margins": 2.0124425645917654e-05, |
|
"rewards/rejected": -0.0002493937499821186, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.162303664921466e-07, |
|
"logits/chosen": 0.8897444605827332, |
|
"logits/rejected": 0.8922082185745239, |
|
"logps/chosen": -232.531005859375, |
|
"logps/rejected": -234.0869903564453, |
|
"loss": 2496.8041, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 3.7896970752626657e-06, |
|
"rewards/margins": 0.00037148987757973373, |
|
"rewards/rejected": -0.00036770018050447106, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": 0.8788009881973267, |
|
"logits/rejected": 0.8891068696975708, |
|
"logps/chosen": -242.5009765625, |
|
"logps/rejected": -229.5125732421875, |
|
"loss": 2508.6898, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.0007867829990573227, |
|
"rewards/margins": -0.0008174808463081717, |
|
"rewards/rejected": 3.069788363063708e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.1780104712041885e-06, |
|
"logits/chosen": 0.8606799840927124, |
|
"logits/rejected": 0.9575719833374023, |
|
"logps/chosen": -232.0597686767578, |
|
"logps/rejected": -218.4732666015625, |
|
"loss": 2496.8559, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0003882866003550589, |
|
"rewards/margins": 0.000359431782271713, |
|
"rewards/rejected": 2.8854870834038593e-05, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": 0.8833224177360535, |
|
"logits/rejected": 0.8661258816719055, |
|
"logps/chosen": -245.799072265625, |
|
"logps/rejected": -249.2645721435547, |
|
"loss": 2496.843, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0005279771285131574, |
|
"rewards/margins": 0.00037464461638592184, |
|
"rewards/rejected": -0.0009026216575875878, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": 0.8318074345588684, |
|
"eval_logits/rejected": 0.8888298273086548, |
|
"eval_logps/chosen": -256.65057373046875, |
|
"eval_logps/rejected": -233.56494140625, |
|
"eval_loss": 2502.266845703125, |
|
"eval_rewards/accuracies": 0.5005000233650208, |
|
"eval_rewards/chosen": -0.0003313073539175093, |
|
"eval_rewards/margins": -0.00017098072567023337, |
|
"eval_rewards/rejected": -0.00016032661369536072, |
|
"eval_runtime": 416.835, |
|
"eval_samples_per_second": 4.798, |
|
"eval_steps_per_second": 1.2, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4397905759162306e-06, |
|
"logits/chosen": 0.9012953042984009, |
|
"logits/rejected": 0.8766192197799683, |
|
"logps/chosen": -229.46292114257812, |
|
"logps/rejected": -210.2642364501953, |
|
"loss": 2501.3449, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00010574392217677087, |
|
"rewards/margins": -7.803810149198398e-05, |
|
"rewards/rejected": -2.7705809770850465e-05, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": 0.8108441233634949, |
|
"logits/rejected": 0.8906086087226868, |
|
"logps/chosen": -273.14385986328125, |
|
"logps/rejected": -259.1924133300781, |
|
"loss": 2494.2678, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.00021679741621483117, |
|
"rewards/margins": 0.0006280258530750871, |
|
"rewards/rejected": -0.00041122836410067976, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.7015706806282726e-06, |
|
"logits/chosen": 0.8712674975395203, |
|
"logits/rejected": 0.9049458503723145, |
|
"logps/chosen": -277.8616943359375, |
|
"logps/rejected": -222.53662109375, |
|
"loss": 2489.5006, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0006732499459758401, |
|
"rewards/margins": 0.001107201213017106, |
|
"rewards/rejected": -0.0004339513252489269, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": 0.7955681085586548, |
|
"logits/rejected": 0.8811987638473511, |
|
"logps/chosen": -248.83865356445312, |
|
"logps/rejected": -246.317138671875, |
|
"loss": 2504.0979, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.00031032637343741953, |
|
"rewards/margins": -0.00034084441722370684, |
|
"rewards/rejected": 3.0518032872350886e-05, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9633507853403143e-06, |
|
"logits/chosen": 0.7933157086372375, |
|
"logits/rejected": 0.8591764569282532, |
|
"logps/chosen": -257.7363586425781, |
|
"logps/rejected": -217.54580688476562, |
|
"loss": 2507.8082, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0007291415822692215, |
|
"rewards/margins": -0.0007318807765841484, |
|
"rewards/rejected": 2.739173851296073e-06, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": 0.8041954040527344, |
|
"logits/rejected": 0.8887465596199036, |
|
"logps/chosen": -276.43304443359375, |
|
"logps/rejected": -250.4193572998047, |
|
"loss": 2504.2807, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00010556764755165204, |
|
"rewards/margins": -0.0003692187019623816, |
|
"rewards/rejected": 0.00026365104713477194, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.2251308900523565e-06, |
|
"logits/chosen": 0.8059272766113281, |
|
"logits/rejected": 0.8950363993644714, |
|
"logps/chosen": -274.240234375, |
|
"logps/rejected": -247.8701171875, |
|
"loss": 2501.8248, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.001084670191630721, |
|
"rewards/margins": -0.0001141707762144506, |
|
"rewards/rejected": -0.0009704994154162705, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": 0.8783141374588013, |
|
"logits/rejected": 0.8253491520881653, |
|
"logps/chosen": -242.3585968017578, |
|
"logps/rejected": -221.0929718017578, |
|
"loss": 2496.3063, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0004814372514374554, |
|
"rewards/margins": 0.0004220888367854059, |
|
"rewards/rejected": -0.0009035261464305222, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.4869109947643982e-06, |
|
"logits/chosen": 0.8767743110656738, |
|
"logits/rejected": 0.8822822570800781, |
|
"logps/chosen": -246.2511444091797, |
|
"logps/rejected": -224.4364471435547, |
|
"loss": 2498.3156, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.00032182232826016843, |
|
"rewards/margins": 0.00022237170196603984, |
|
"rewards/rejected": 9.945056081051007e-05, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": 0.855573296546936, |
|
"logits/rejected": 0.9106731414794922, |
|
"logps/chosen": -258.25885009765625, |
|
"logps/rejected": -236.6140594482422, |
|
"loss": 2499.2807, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.00018535128037910908, |
|
"rewards/margins": 0.0001189738031825982, |
|
"rewards/rejected": -0.0003043250762857497, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": 0.8310006856918335, |
|
"eval_logits/rejected": 0.8882209062576294, |
|
"eval_logps/chosen": -256.6106262207031, |
|
"eval_logps/rejected": -233.5994873046875, |
|
"eval_loss": 2494.83544921875, |
|
"eval_rewards/accuracies": 0.5189999938011169, |
|
"eval_rewards/chosen": 6.786447193007916e-05, |
|
"eval_rewards/margins": 0.0005738017498515546, |
|
"eval_rewards/rejected": -0.0005059372633695602, |
|
"eval_runtime": 416.4863, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.74869109947644e-06, |
|
"logits/chosen": 0.9517833590507507, |
|
"logits/rejected": 0.910740852355957, |
|
"logps/chosen": -244.80032348632812, |
|
"logps/rejected": -232.45321655273438, |
|
"loss": 2500.5746, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -9.258640056941658e-05, |
|
"rewards/margins": -6.444106020353502e-06, |
|
"rewards/rejected": -8.61423322930932e-05, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": 0.8369059562683105, |
|
"logits/rejected": 0.8937622904777527, |
|
"logps/chosen": -267.40948486328125, |
|
"logps/rejected": -209.13290405273438, |
|
"loss": 2487.1783, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0004562476242426783, |
|
"rewards/margins": 0.0013524172827601433, |
|
"rewards/rejected": -0.0008961696876212955, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.010471204188482e-06, |
|
"logits/chosen": 0.8930699229240417, |
|
"logits/rejected": 0.9343907237052917, |
|
"logps/chosen": -258.6376037597656, |
|
"logps/rejected": -221.6857452392578, |
|
"loss": 2493.877, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0002062669227598235, |
|
"rewards/margins": 0.0006668218411505222, |
|
"rewards/rejected": -0.00046055493294261396, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": 0.9376400113105774, |
|
"logits/rejected": 0.8995400667190552, |
|
"logps/chosen": -228.9315185546875, |
|
"logps/rejected": -242.112548828125, |
|
"loss": 2480.1844, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0004402367048896849, |
|
"rewards/margins": 0.002044759690761566, |
|
"rewards/rejected": -0.001604523160494864, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.2722513089005235e-06, |
|
"logits/chosen": 0.9079924821853638, |
|
"logits/rejected": 0.8767238855361938, |
|
"logps/chosen": -242.71121215820312, |
|
"logps/rejected": -233.9228973388672, |
|
"loss": 2493.2939, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -6.936644058441743e-05, |
|
"rewards/margins": 0.0007372990949079394, |
|
"rewards/rejected": -0.0008066653972491622, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": 0.8814166784286499, |
|
"logits/rejected": 0.9410937428474426, |
|
"logps/chosen": -236.88095092773438, |
|
"logps/rejected": -223.930419921875, |
|
"loss": 2496.3828, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.00017086375737562776, |
|
"rewards/margins": 0.0004164519195910543, |
|
"rewards/rejected": -0.0005873156478628516, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.534031413612566e-06, |
|
"logits/chosen": 0.8372557759284973, |
|
"logits/rejected": 0.8741558194160461, |
|
"logps/chosen": -212.303466796875, |
|
"logps/rejected": -239.15158081054688, |
|
"loss": 2483.2088, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.00031574201420880854, |
|
"rewards/margins": 0.0017516377847641706, |
|
"rewards/rejected": -0.0020673798862844706, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": 0.8835927844047546, |
|
"logits/rejected": 0.9312320947647095, |
|
"logps/chosen": -250.3417510986328, |
|
"logps/rejected": -263.51531982421875, |
|
"loss": 2492.1223, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0005325328675098717, |
|
"rewards/margins": 0.000863347842823714, |
|
"rewards/rejected": -0.0013958807103335857, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.7958115183246074e-06, |
|
"logits/chosen": 0.8323150873184204, |
|
"logits/rejected": 0.8896921277046204, |
|
"logps/chosen": -250.1656951904297, |
|
"logps/rejected": -234.757568359375, |
|
"loss": 2483.249, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.00013801059685647488, |
|
"rewards/margins": 0.0017679758602753282, |
|
"rewards/rejected": -0.001905986457131803, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": 0.8511127233505249, |
|
"logits/rejected": 0.8209661245346069, |
|
"logps/chosen": -273.7172546386719, |
|
"logps/rejected": -250.82748413085938, |
|
"loss": 2477.7609, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0011967200553044677, |
|
"rewards/margins": 0.0023162723518908024, |
|
"rewards/rejected": -0.0035129922907799482, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": 0.8318725824356079, |
|
"eval_logits/rejected": 0.8892252445220947, |
|
"eval_logps/chosen": -256.7284851074219, |
|
"eval_logps/rejected": -233.8547821044922, |
|
"eval_loss": 2481.50146484375, |
|
"eval_rewards/accuracies": 0.559499979019165, |
|
"eval_rewards/chosen": -0.001110685057938099, |
|
"eval_rewards/margins": 0.0019479345064610243, |
|
"eval_rewards/rejected": -0.003058619564399123, |
|
"eval_runtime": 416.6935, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.05759162303665e-06, |
|
"logits/chosen": 0.7432538866996765, |
|
"logits/rejected": 0.817090630531311, |
|
"logps/chosen": -274.85931396484375, |
|
"logps/rejected": -236.4228057861328, |
|
"loss": 2469.5742, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.001123375492170453, |
|
"rewards/margins": 0.003155052661895752, |
|
"rewards/rejected": -0.004278427921235561, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": 0.9147623777389526, |
|
"logits/rejected": 0.9334859848022461, |
|
"logps/chosen": -233.425537109375, |
|
"logps/rejected": -214.6092071533203, |
|
"loss": 2457.892, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.00017569802002981305, |
|
"rewards/margins": 0.004348465241491795, |
|
"rewards/rejected": -0.004172767512500286, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.319371727748692e-06, |
|
"logits/chosen": 0.8749006390571594, |
|
"logits/rejected": 0.9252738952636719, |
|
"logps/chosen": -247.8308563232422, |
|
"logps/rejected": -218.9490203857422, |
|
"loss": 2463.7824, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0017152890795841813, |
|
"rewards/margins": 0.0037990615237504244, |
|
"rewards/rejected": -0.0055143507197499275, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": 0.8781224489212036, |
|
"logits/rejected": 0.9259663820266724, |
|
"logps/chosen": -253.4806365966797, |
|
"logps/rejected": -239.67434692382812, |
|
"loss": 2474.3055, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0034939595498144627, |
|
"rewards/margins": 0.0027302266098558903, |
|
"rewards/rejected": -0.00622418662533164, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5811518324607335e-06, |
|
"logits/chosen": 0.7855554223060608, |
|
"logits/rejected": 0.9314893484115601, |
|
"logps/chosen": -255.0915985107422, |
|
"logps/rejected": -206.8708038330078, |
|
"loss": 2432.458, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0009129707468673587, |
|
"rewards/margins": 0.007046517916023731, |
|
"rewards/rejected": -0.007959488779306412, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": 0.8957219123840332, |
|
"logits/rejected": 0.8874330520629883, |
|
"logps/chosen": -257.02764892578125, |
|
"logps/rejected": -230.2834014892578, |
|
"loss": 2441.5672, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.001695218845270574, |
|
"rewards/margins": 0.006147631909698248, |
|
"rewards/rejected": -0.007842850871384144, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.842931937172775e-06, |
|
"logits/chosen": 0.9125442504882812, |
|
"logits/rejected": 0.8982815742492676, |
|
"logps/chosen": -230.16940307617188, |
|
"logps/rejected": -211.3495635986328, |
|
"loss": 2439.5756, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.004968739114701748, |
|
"rewards/margins": 0.006435071583837271, |
|
"rewards/rejected": -0.011403810232877731, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": 0.9134615063667297, |
|
"logits/rejected": 0.8667083978652954, |
|
"logps/chosen": -267.7635192871094, |
|
"logps/rejected": -219.404541015625, |
|
"loss": 2438.1262, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.004255574196577072, |
|
"rewards/margins": 0.006601777859032154, |
|
"rewards/rejected": -0.0108573529869318, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.999933200062888e-06, |
|
"logits/chosen": 0.8681972622871399, |
|
"logits/rejected": 0.8684479594230652, |
|
"logps/chosen": -253.1089324951172, |
|
"logps/rejected": -232.1811981201172, |
|
"loss": 2414.8473, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0038361139595508575, |
|
"rewards/margins": 0.009047028608620167, |
|
"rewards/rejected": -0.012883143499493599, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": 0.9156022071838379, |
|
"logits/rejected": 0.9197471737861633, |
|
"logps/chosen": -260.40093994140625, |
|
"logps/rejected": -238.50961303710938, |
|
"loss": 2428.4195, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.005848343018442392, |
|
"rewards/margins": 0.007789201103150845, |
|
"rewards/rejected": -0.013637542724609375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.8404272794723511, |
|
"eval_logits/rejected": 0.8983384966850281, |
|
"eval_logps/chosen": -257.29510498046875, |
|
"eval_logps/rejected": -235.11265563964844, |
|
"eval_loss": 2419.1044921875, |
|
"eval_rewards/accuracies": 0.6495000123977661, |
|
"eval_rewards/chosen": -0.006776793394237757, |
|
"eval_rewards/margins": 0.008860657922923565, |
|
"eval_rewards/rejected": -0.01563744992017746, |
|
"eval_runtime": 416.4578, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999181741766532e-06, |
|
"logits/chosen": 0.8992105722427368, |
|
"logits/rejected": 0.8969219923019409, |
|
"logps/chosen": -252.7702178955078, |
|
"logps/rejected": -249.8018798828125, |
|
"loss": 2438.1242, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.007479649968445301, |
|
"rewards/margins": 0.006951476447284222, |
|
"rewards/rejected": -0.014431129209697247, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": 0.913814902305603, |
|
"logits/rejected": 0.9069592356681824, |
|
"logps/chosen": -255.3079833984375, |
|
"logps/rejected": -251.0774383544922, |
|
"loss": 2417.1992, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.011085378006100655, |
|
"rewards/margins": 0.009236546233296394, |
|
"rewards/rejected": -0.020321926102042198, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.997595577070068e-06, |
|
"logits/chosen": 0.8943805694580078, |
|
"logits/rejected": 0.8994030952453613, |
|
"logps/chosen": -235.75009155273438, |
|
"logps/rejected": -232.7864990234375, |
|
"loss": 2429.1924, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.012942090630531311, |
|
"rewards/margins": 0.007976246066391468, |
|
"rewards/rejected": -0.020918335765600204, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": 0.8081871867179871, |
|
"logits/rejected": 0.8945296406745911, |
|
"logps/chosen": -231.6891632080078, |
|
"logps/rejected": -253.6363067626953, |
|
"loss": 2484.0605, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.012701654806733131, |
|
"rewards/margins": 0.003084682859480381, |
|
"rewards/rejected": -0.015786338597536087, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.995175235739619e-06, |
|
"logits/chosen": 0.8565770983695984, |
|
"logits/rejected": 0.8623224496841431, |
|
"logps/chosen": -264.3777770996094, |
|
"logps/rejected": -267.23712158203125, |
|
"loss": 2392.5188, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.011126170866191387, |
|
"rewards/margins": 0.011980591341853142, |
|
"rewards/rejected": -0.023106763139367104, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": 0.78350430727005, |
|
"logits/rejected": 0.8708200454711914, |
|
"logps/chosen": -267.3443298339844, |
|
"logps/rejected": -236.05459594726562, |
|
"loss": 2404.3129, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.015727100893855095, |
|
"rewards/margins": 0.010795451700687408, |
|
"rewards/rejected": -0.026522550731897354, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.991921526149529e-06, |
|
"logits/chosen": 0.9162321090698242, |
|
"logits/rejected": 0.9280640482902527, |
|
"logps/chosen": -256.3532409667969, |
|
"logps/rejected": -247.89315795898438, |
|
"loss": 2386.3984, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.01587381586432457, |
|
"rewards/margins": 0.01281227171421051, |
|
"rewards/rejected": -0.02868608757853508, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": 0.8048622012138367, |
|
"logits/rejected": 0.8283928036689758, |
|
"logps/chosen": -246.1285858154297, |
|
"logps/rejected": -252.7605743408203, |
|
"loss": 2396.8398, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.021037602797150612, |
|
"rewards/margins": 0.012100132182240486, |
|
"rewards/rejected": -0.0331377312541008, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.987835535012371e-06, |
|
"logits/chosen": 0.8453197479248047, |
|
"logits/rejected": 0.86089026927948, |
|
"logps/chosen": -240.1754913330078, |
|
"logps/rejected": -229.0949249267578, |
|
"loss": 2350.6213, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.019980577751994133, |
|
"rewards/margins": 0.01681477203965187, |
|
"rewards/rejected": -0.03679535537958145, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": 0.8507258296012878, |
|
"logits/rejected": 0.8831195831298828, |
|
"logps/chosen": -264.3097839355469, |
|
"logps/rejected": -267.3841552734375, |
|
"loss": 2296.8842, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.019409244880080223, |
|
"rewards/margins": 0.023494381457567215, |
|
"rewards/rejected": -0.04290362820029259, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 0.8214389681816101, |
|
"eval_logits/rejected": 0.8805551528930664, |
|
"eval_logps/chosen": -259.0124206542969, |
|
"eval_logps/rejected": -237.73793029785156, |
|
"eval_loss": 2349.435791015625, |
|
"eval_rewards/accuracies": 0.656499981880188, |
|
"eval_rewards/chosen": -0.023950034752488136, |
|
"eval_rewards/margins": 0.017940117046236992, |
|
"eval_rewards/rejected": -0.04189015179872513, |
|
"eval_runtime": 416.5178, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.2, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.98291862701599e-06, |
|
"logits/chosen": 0.8344039916992188, |
|
"logits/rejected": 0.8795874714851379, |
|
"logps/chosen": -254.190673828125, |
|
"logps/rejected": -214.09396362304688, |
|
"loss": 2307.1967, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02996075712144375, |
|
"rewards/margins": 0.022381700575351715, |
|
"rewards/rejected": -0.052342455834150314, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": 0.8595240712165833, |
|
"logits/rejected": 0.903663158416748, |
|
"logps/chosen": -237.7533416748047, |
|
"logps/rejected": -241.7561492919922, |
|
"loss": 2330.1758, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02604847028851509, |
|
"rewards/margins": 0.020816484466195107, |
|
"rewards/rejected": -0.04686495289206505, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.977172444367718e-06, |
|
"logits/chosen": 0.8232777714729309, |
|
"logits/rejected": 0.8955798149108887, |
|
"logps/chosen": -248.8101806640625, |
|
"logps/rejected": -226.21786499023438, |
|
"loss": 2284.3357, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0257116612046957, |
|
"rewards/margins": 0.026299094781279564, |
|
"rewards/rejected": -0.052010755985975266, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": 0.8850505948066711, |
|
"logits/rejected": 0.9008530378341675, |
|
"logps/chosen": -260.18963623046875, |
|
"logps/rejected": -228.6940155029297, |
|
"loss": 2314.4127, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.028372693806886673, |
|
"rewards/margins": 0.02354586310684681, |
|
"rewards/rejected": -0.051918547600507736, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9705989062458805e-06, |
|
"logits/chosen": 0.8566417694091797, |
|
"logits/rejected": 0.8558026552200317, |
|
"logps/chosen": -242.9883270263672, |
|
"logps/rejected": -247.76370239257812, |
|
"loss": 2339.4164, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03327028453350067, |
|
"rewards/margins": 0.02076330967247486, |
|
"rewards/rejected": -0.054033588618040085, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": 0.7901058793067932, |
|
"logits/rejected": 0.8357075452804565, |
|
"logps/chosen": -263.3518371582031, |
|
"logps/rejected": -250.97958374023438, |
|
"loss": 2302.1379, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.04014524444937706, |
|
"rewards/margins": 0.025143718346953392, |
|
"rewards/rejected": -0.0652889683842659, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.963200208158811e-06, |
|
"logits/chosen": 0.8461757898330688, |
|
"logits/rejected": 0.9372328519821167, |
|
"logps/chosen": -223.61373901367188, |
|
"logps/rejected": -212.1987762451172, |
|
"loss": 2320.3783, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.04337463900446892, |
|
"rewards/margins": 0.021725038066506386, |
|
"rewards/rejected": -0.06509967893362045, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": 0.7897135615348816, |
|
"logits/rejected": 0.9126697778701782, |
|
"logps/chosen": -294.5254821777344, |
|
"logps/rejected": -232.0474395751953, |
|
"loss": 2167.2896, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03270890563726425, |
|
"rewards/margins": 0.041936445981264114, |
|
"rewards/rejected": -0.07464535534381866, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.95497882121157e-06, |
|
"logits/chosen": 0.8067277669906616, |
|
"logits/rejected": 0.8418477773666382, |
|
"logps/chosen": -240.95639038085938, |
|
"logps/rejected": -217.8979949951172, |
|
"loss": 2249.4051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.04393559694290161, |
|
"rewards/margins": 0.03141217678785324, |
|
"rewards/rejected": -0.07534776628017426, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": 0.8411356806755066, |
|
"logits/rejected": 0.8524805307388306, |
|
"logps/chosen": -241.7827606201172, |
|
"logps/rejected": -234.87985229492188, |
|
"loss": 2254.5846, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04195866733789444, |
|
"rewards/margins": 0.03153757005929947, |
|
"rewards/rejected": -0.07349623739719391, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 0.7868022322654724, |
|
"eval_logits/rejected": 0.8478493094444275, |
|
"eval_logps/chosen": -261.86590576171875, |
|
"eval_logps/rejected": -241.83828735351562, |
|
"eval_loss": 2273.499267578125, |
|
"eval_rewards/accuracies": 0.6570000052452087, |
|
"eval_rewards/chosen": -0.05248467996716499, |
|
"eval_rewards/margins": 0.03040897473692894, |
|
"eval_rewards/rejected": -0.08289366215467453, |
|
"eval_runtime": 416.6239, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.945937491280611e-06, |
|
"logits/chosen": 0.7756252288818359, |
|
"logits/rejected": 0.8814484477043152, |
|
"logps/chosen": -245.0117950439453, |
|
"logps/rejected": -217.2014923095703, |
|
"loss": 2348.3988, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.05819234997034073, |
|
"rewards/margins": 0.021564457565546036, |
|
"rewards/rejected": -0.07975681126117706, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": 0.7903825640678406, |
|
"logits/rejected": 0.8663345575332642, |
|
"logps/chosen": -270.52239990234375, |
|
"logps/rejected": -254.25369262695312, |
|
"loss": 2181.5219, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05034894496202469, |
|
"rewards/margins": 0.042683206498622894, |
|
"rewards/rejected": -0.09303215146064758, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9360792380966875e-06, |
|
"logits/chosen": 0.8880133628845215, |
|
"logits/rejected": 0.8636928796768188, |
|
"logps/chosen": -241.08279418945312, |
|
"logps/rejected": -220.8309783935547, |
|
"loss": 2263.642, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.05800148844718933, |
|
"rewards/margins": 0.0320889875292778, |
|
"rewards/rejected": -0.09009047597646713, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": 0.8702048063278198, |
|
"logits/rejected": 0.8985433578491211, |
|
"logps/chosen": -240.00460815429688, |
|
"logps/rejected": -238.61227416992188, |
|
"loss": 2244.0928, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05921437591314316, |
|
"rewards/margins": 0.032827965915203094, |
|
"rewards/rejected": -0.09204234182834625, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925407354236279e-06, |
|
"logits/chosen": 0.8151038885116577, |
|
"logits/rejected": 0.8790351152420044, |
|
"logps/chosen": -241.45755004882812, |
|
"logps/rejected": -226.37667846679688, |
|
"loss": 2236.8199, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.06360708922147751, |
|
"rewards/margins": 0.03408312052488327, |
|
"rewards/rejected": -0.09769020974636078, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": 0.8537635803222656, |
|
"logits/rejected": 0.8909260034561157, |
|
"logps/chosen": -221.7104034423828, |
|
"logps/rejected": -206.9849395751953, |
|
"loss": 2317.7387, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07372693717479706, |
|
"rewards/margins": 0.02632719837129116, |
|
"rewards/rejected": -0.10005412995815277, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.913925404021905e-06, |
|
"logits/chosen": 0.8039971590042114, |
|
"logits/rejected": 0.8006811141967773, |
|
"logps/chosen": -240.97213745117188, |
|
"logps/rejected": -209.97219848632812, |
|
"loss": 2208.3297, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.07874181121587753, |
|
"rewards/margins": 0.03945617750287056, |
|
"rewards/rejected": -0.11819799244403839, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": 0.8469578623771667, |
|
"logits/rejected": 0.8435947299003601, |
|
"logps/chosen": -257.61810302734375, |
|
"logps/rejected": -245.639892578125, |
|
"loss": 2260.2275, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07468362152576447, |
|
"rewards/margins": 0.03678290545940399, |
|
"rewards/rejected": -0.11146652698516846, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.901637222331665e-06, |
|
"logits/chosen": 0.7657278776168823, |
|
"logits/rejected": 0.7471415996551514, |
|
"logps/chosen": -259.5301513671875, |
|
"logps/rejected": -236.86032104492188, |
|
"loss": 2287.06, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.08623397350311279, |
|
"rewards/margins": 0.03296409547328949, |
|
"rewards/rejected": -0.11919806897640228, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": 0.7977254390716553, |
|
"logits/rejected": 0.8500420451164246, |
|
"logps/chosen": -244.900634765625, |
|
"logps/rejected": -251.4091339111328, |
|
"loss": 2330.7787, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08788236975669861, |
|
"rewards/margins": 0.02753649279475212, |
|
"rewards/rejected": -0.11541886627674103, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": 0.7516666650772095, |
|
"eval_logits/rejected": 0.812827467918396, |
|
"eval_logps/chosen": -264.809326171875, |
|
"eval_logps/rejected": -245.7631378173828, |
|
"eval_loss": 2224.3349609375, |
|
"eval_rewards/accuracies": 0.6629999876022339, |
|
"eval_rewards/chosen": -0.08191882818937302, |
|
"eval_rewards/margins": 0.0402236245572567, |
|
"eval_rewards/rejected": -0.12214244902133942, |
|
"eval_runtime": 416.6908, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8885469133184235e-06, |
|
"logits/chosen": 0.8586422204971313, |
|
"logits/rejected": 0.8278988003730774, |
|
"logps/chosen": -244.2412109375, |
|
"logps/rejected": -248.913818359375, |
|
"loss": 2265.9631, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0877029225230217, |
|
"rewards/margins": 0.03201908990740776, |
|
"rewards/rejected": -0.11972200870513916, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": 0.853478729724884, |
|
"logits/rejected": 0.9013971090316772, |
|
"logps/chosen": -240.85440063476562, |
|
"logps/rejected": -233.39761352539062, |
|
"loss": 2238.1352, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08841414749622345, |
|
"rewards/margins": 0.03821689262986183, |
|
"rewards/rejected": -0.12663105130195618, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.874658849039054e-06, |
|
"logits/chosen": 0.6829933524131775, |
|
"logits/rejected": 0.7707113027572632, |
|
"logps/chosen": -274.42095947265625, |
|
"logps/rejected": -232.1612091064453, |
|
"loss": 2119.9938, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07465031743049622, |
|
"rewards/margins": 0.049049459397792816, |
|
"rewards/rejected": -0.12369978427886963, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": 0.7617892026901245, |
|
"logits/rejected": 0.8164576292037964, |
|
"logps/chosen": -267.9073486328125, |
|
"logps/rejected": -240.621826171875, |
|
"loss": 2243.2557, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.085872121155262, |
|
"rewards/margins": 0.042114924639463425, |
|
"rewards/rejected": -0.1279870569705963, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.859977667994209e-06, |
|
"logits/chosen": 0.74756920337677, |
|
"logits/rejected": 0.8244425654411316, |
|
"logps/chosen": -255.57754516601562, |
|
"logps/rejected": -242.3626251220703, |
|
"loss": 2231.4938, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09324190765619278, |
|
"rewards/margins": 0.04055342823266983, |
|
"rewards/rejected": -0.1337953507900238, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": 0.7698923945426941, |
|
"logits/rejected": 0.7992275953292847, |
|
"logps/chosen": -256.96868896484375, |
|
"logps/rejected": -271.1941833496094, |
|
"loss": 2239.7266, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.08798633515834808, |
|
"rewards/margins": 0.0419507697224617, |
|
"rewards/rejected": -0.1299370974302292, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.844508273579097e-06, |
|
"logits/chosen": 0.803545355796814, |
|
"logits/rejected": 0.7743754982948303, |
|
"logps/chosen": -249.5584716796875, |
|
"logps/rejected": -238.66683959960938, |
|
"loss": 2204.2545, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.08735480904579163, |
|
"rewards/margins": 0.04183940216898918, |
|
"rewards/rejected": -0.1291942000389099, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": 0.7211157083511353, |
|
"logits/rejected": 0.7573873400688171, |
|
"logps/chosen": -251.00076293945312, |
|
"logps/rejected": -248.5784454345703, |
|
"loss": 2175.5199, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09977405518293381, |
|
"rewards/margins": 0.047359712421894073, |
|
"rewards/rejected": -0.14713376760482788, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.82825583244579e-06, |
|
"logits/chosen": 0.7303954362869263, |
|
"logits/rejected": 0.7423623204231262, |
|
"logps/chosen": -271.29888916015625, |
|
"logps/rejected": -255.95645141601562, |
|
"loss": 2097.0797, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.09688698500394821, |
|
"rewards/margins": 0.060234714299440384, |
|
"rewards/rejected": -0.1571216881275177, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": 0.7324298620223999, |
|
"logits/rejected": 0.8285747766494751, |
|
"logps/chosen": -259.47650146484375, |
|
"logps/rejected": -261.0966491699219, |
|
"loss": 2223.6863, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1077154278755188, |
|
"rewards/margins": 0.04667884111404419, |
|
"rewards/rejected": -0.154394268989563, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.6992308497428894, |
|
"eval_logits/rejected": 0.7610952854156494, |
|
"eval_logps/chosen": -266.70574951171875, |
|
"eval_logps/rejected": -248.42222595214844, |
|
"eval_loss": 2196.09912109375, |
|
"eval_rewards/accuracies": 0.6675000190734863, |
|
"eval_rewards/chosen": -0.1008833572268486, |
|
"eval_rewards/margins": 0.04784964770078659, |
|
"eval_rewards/rejected": -0.1487330049276352, |
|
"eval_runtime": 416.5458, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 1.2, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.811225772777603e-06, |
|
"logits/chosen": 0.8175959587097168, |
|
"logits/rejected": 0.7778623700141907, |
|
"logps/chosen": -281.6056823730469, |
|
"logps/rejected": -235.4732666015625, |
|
"loss": 2154.2184, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10602346807718277, |
|
"rewards/margins": 0.0522245354950428, |
|
"rewards/rejected": -0.15824799239635468, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": 0.6645683646202087, |
|
"logits/rejected": 0.7504470944404602, |
|
"logps/chosen": -262.6244812011719, |
|
"logps/rejected": -246.62728881835938, |
|
"loss": 2070.7916, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08415599167346954, |
|
"rewards/margins": 0.06298204511404037, |
|
"rewards/rejected": -0.14713802933692932, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.793423782476125e-06, |
|
"logits/chosen": 0.7014611959457397, |
|
"logits/rejected": 0.7595884203910828, |
|
"logps/chosen": -261.1951599121094, |
|
"logps/rejected": -237.8996124267578, |
|
"loss": 2303.9654, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11634109169244766, |
|
"rewards/margins": 0.03935299813747406, |
|
"rewards/rejected": -0.15569409728050232, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": 0.7220847010612488, |
|
"logits/rejected": 0.8208295702934265, |
|
"logps/chosen": -274.5125427246094, |
|
"logps/rejected": -267.01275634765625, |
|
"loss": 2374.0922, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.11991143226623535, |
|
"rewards/margins": 0.03418232128024101, |
|
"rewards/rejected": -0.15409375727176666, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.774855807261504e-06, |
|
"logits/chosen": 0.771617591381073, |
|
"logits/rejected": 0.7759231925010681, |
|
"logps/chosen": -266.54156494140625, |
|
"logps/rejected": -232.45114135742188, |
|
"loss": 2085.9527, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.09387587755918503, |
|
"rewards/margins": 0.059171438217163086, |
|
"rewards/rejected": -0.1530473232269287, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": 0.7542043924331665, |
|
"logits/rejected": 0.7199236154556274, |
|
"logps/chosen": -247.95004272460938, |
|
"logps/rejected": -234.8477325439453, |
|
"loss": 2140.2543, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09680913388729095, |
|
"rewards/margins": 0.05759376287460327, |
|
"rewards/rejected": -0.15440289676189423, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.755528048686629e-06, |
|
"logits/chosen": 0.7054905891418457, |
|
"logits/rejected": 0.7627168297767639, |
|
"logps/chosen": -269.0238342285156, |
|
"logps/rejected": -231.31594848632812, |
|
"loss": 2018.9971, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.09635048359632492, |
|
"rewards/margins": 0.07080944627523422, |
|
"rewards/rejected": -0.16715992987155914, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": 0.7587330937385559, |
|
"logits/rejected": 0.7288376092910767, |
|
"logps/chosen": -279.3045959472656, |
|
"logps/rejected": -248.2842559814453, |
|
"loss": 2182.2826, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.10375384986400604, |
|
"rewards/margins": 0.04985477030277252, |
|
"rewards/rejected": -0.15360862016677856, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.73544696206586e-06, |
|
"logits/chosen": 0.7099634408950806, |
|
"logits/rejected": 0.7651978731155396, |
|
"logps/chosen": -241.7110137939453, |
|
"logps/rejected": -226.1469268798828, |
|
"loss": 2313.518, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11171738058328629, |
|
"rewards/margins": 0.033566057682037354, |
|
"rewards/rejected": -0.14528343081474304, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": 0.6832414865493774, |
|
"logits/rejected": 0.6996358633041382, |
|
"logps/chosen": -287.02655029296875, |
|
"logps/rejected": -271.8457946777344, |
|
"loss": 2066.7418, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1082179993391037, |
|
"rewards/margins": 0.06335000693798065, |
|
"rewards/rejected": -0.17156800627708435, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 0.6917389035224915, |
|
"eval_logits/rejected": 0.7518260478973389, |
|
"eval_logps/chosen": -267.73968505859375, |
|
"eval_logps/rejected": -250.13189697265625, |
|
"eval_loss": 2166.0732421875, |
|
"eval_rewards/accuracies": 0.6700000166893005, |
|
"eval_rewards/chosen": -0.11122233420610428, |
|
"eval_rewards/margins": 0.0546073243021965, |
|
"eval_rewards/rejected": -0.16582968831062317, |
|
"eval_runtime": 416.784, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 1.2, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.7146192543190005e-06, |
|
"logits/chosen": 0.7068012952804565, |
|
"logits/rejected": 0.7604703307151794, |
|
"logps/chosen": -301.75897216796875, |
|
"logps/rejected": -260.5961608886719, |
|
"loss": 2114.2752, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10466556251049042, |
|
"rewards/margins": 0.06352122128009796, |
|
"rewards/rejected": -0.16818679869174957, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": 0.7013599872589111, |
|
"logits/rejected": 0.7524459362030029, |
|
"logps/chosen": -280.307861328125, |
|
"logps/rejected": -256.722900390625, |
|
"loss": 2081.7857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11534447968006134, |
|
"rewards/margins": 0.06791369616985321, |
|
"rewards/rejected": -0.18325819075107574, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.693051881731251e-06, |
|
"logits/chosen": 0.6879482269287109, |
|
"logits/rejected": 0.7315651178359985, |
|
"logps/chosen": -267.4771728515625, |
|
"logps/rejected": -269.62249755859375, |
|
"loss": 2219.8021, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.11604435741901398, |
|
"rewards/margins": 0.04904730245471001, |
|
"rewards/rejected": -0.16509169340133667, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": 0.7221347093582153, |
|
"logits/rejected": 0.8089338541030884, |
|
"logps/chosen": -277.0709533691406, |
|
"logps/rejected": -288.2813415527344, |
|
"loss": 2066.718, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.09484682977199554, |
|
"rewards/margins": 0.066488116979599, |
|
"rewards/rejected": -0.16133496165275574, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.670752047629855e-06, |
|
"logits/chosen": 0.7649358510971069, |
|
"logits/rejected": 0.8068546056747437, |
|
"logps/chosen": -289.7987976074219, |
|
"logps/rejected": -257.411865234375, |
|
"loss": 1989.5814, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09663524478673935, |
|
"rewards/margins": 0.07417033612728119, |
|
"rewards/rejected": -0.17080560326576233, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": 0.6801126599311829, |
|
"logits/rejected": 0.7201008796691895, |
|
"logps/chosen": -275.82244873046875, |
|
"logps/rejected": -263.333251953125, |
|
"loss": 2137.0553, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10409847646951675, |
|
"rewards/margins": 0.06283750385046005, |
|
"rewards/rejected": -0.1669359654188156, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.647727199978255e-06, |
|
"logits/chosen": 0.675479531288147, |
|
"logits/rejected": 0.755820095539093, |
|
"logps/chosen": -281.660888671875, |
|
"logps/rejected": -264.6354675292969, |
|
"loss": 2290.1266, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11869573593139648, |
|
"rewards/margins": 0.0478622205555439, |
|
"rewards/rejected": -0.1665579378604889, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": 0.6294044852256775, |
|
"logits/rejected": 0.701261043548584, |
|
"logps/chosen": -249.8705596923828, |
|
"logps/rejected": -245.52346801757812, |
|
"loss": 2214.392, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.12245059013366699, |
|
"rewards/margins": 0.05106619745492935, |
|
"rewards/rejected": -0.17351679503917694, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.623985028888527e-06, |
|
"logits/chosen": 0.7620214223861694, |
|
"logits/rejected": 0.799843430519104, |
|
"logps/chosen": -236.2934112548828, |
|
"logps/rejected": -222.5919189453125, |
|
"loss": 2149.6912, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12090835720300674, |
|
"rewards/margins": 0.05957023426890373, |
|
"rewards/rejected": -0.18047860264778137, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": 0.7124743461608887, |
|
"logits/rejected": 0.6904253363609314, |
|
"logps/chosen": -265.0976257324219, |
|
"logps/rejected": -254.4890594482422, |
|
"loss": 2119.2691, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11531393229961395, |
|
"rewards/margins": 0.0636182576417923, |
|
"rewards/rejected": -0.17893218994140625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.6619382500648499, |
|
"eval_logits/rejected": 0.721328854560852, |
|
"eval_logps/chosen": -268.7693176269531, |
|
"eval_logps/rejected": -251.76100158691406, |
|
"eval_loss": 2138.93115234375, |
|
"eval_rewards/accuracies": 0.671500027179718, |
|
"eval_rewards/chosen": -0.12151883542537689, |
|
"eval_rewards/margins": 0.060602057725191116, |
|
"eval_rewards/rejected": -0.1821209043264389, |
|
"eval_runtime": 416.4897, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.599533464052951e-06, |
|
"logits/chosen": 0.7095866203308105, |
|
"logits/rejected": 0.7142434120178223, |
|
"logps/chosen": -285.8958740234375, |
|
"logps/rejected": -269.59906005859375, |
|
"loss": 2002.4428, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.09995652735233307, |
|
"rewards/margins": 0.08161594718694687, |
|
"rewards/rejected": -0.18157246708869934, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": 0.6786423921585083, |
|
"logits/rejected": 0.7168447375297546, |
|
"logps/chosen": -285.7081298828125, |
|
"logps/rejected": -245.5774688720703, |
|
"loss": 2104.4162, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10604969412088394, |
|
"rewards/margins": 0.06783930957317352, |
|
"rewards/rejected": -0.17388899624347687, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.574380672095555e-06, |
|
"logits/chosen": 0.6884575486183167, |
|
"logits/rejected": 0.7298802733421326, |
|
"logps/chosen": -223.19393920898438, |
|
"logps/rejected": -240.6363983154297, |
|
"loss": 2218.8305, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1402120441198349, |
|
"rewards/margins": 0.049252741038799286, |
|
"rewards/rejected": -0.1894647777080536, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": 0.6861045360565186, |
|
"logits/rejected": 0.7231858968734741, |
|
"logps/chosen": -282.3648376464844, |
|
"logps/rejected": -247.057373046875, |
|
"loss": 2149.1324, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1236349567770958, |
|
"rewards/margins": 0.05487797409296036, |
|
"rewards/rejected": -0.17851293087005615, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.548535053844527e-06, |
|
"logits/chosen": 0.6396581530570984, |
|
"logits/rejected": 0.7092006206512451, |
|
"logps/chosen": -280.1047058105469, |
|
"logps/rejected": -267.3292541503906, |
|
"loss": 2057.9182, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.12618432939052582, |
|
"rewards/margins": 0.07123459875583649, |
|
"rewards/rejected": -0.1974189579486847, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": 0.7520751357078552, |
|
"logits/rejected": 0.7533235549926758, |
|
"logps/chosen": -246.0561065673828, |
|
"logps/rejected": -210.5857391357422, |
|
"loss": 2157.4354, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.11811725795269012, |
|
"rewards/margins": 0.05882970616221428, |
|
"rewards/rejected": -0.1769469678401947, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.522005241526411e-06, |
|
"logits/chosen": 0.670494019985199, |
|
"logits/rejected": 0.7469106912612915, |
|
"logps/chosen": -281.83013916015625, |
|
"logps/rejected": -235.8207244873047, |
|
"loss": 2224.4785, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12622186541557312, |
|
"rewards/margins": 0.04977993294596672, |
|
"rewards/rejected": -0.17600181698799133, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": 0.6910241842269897, |
|
"logits/rejected": 0.7257175445556641, |
|
"logps/chosen": -276.518798828125, |
|
"logps/rejected": -260.69781494140625, |
|
"loss": 2003.6445, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1153578907251358, |
|
"rewards/margins": 0.07534319162368774, |
|
"rewards/rejected": -0.19070109724998474, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.494800095883014e-06, |
|
"logits/chosen": 0.6132059097290039, |
|
"logits/rejected": 0.6958727836608887, |
|
"logps/chosen": -290.1937255859375, |
|
"logps/rejected": -244.7795867919922, |
|
"loss": 1914.4395, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1075458973646164, |
|
"rewards/margins": 0.09307406842708588, |
|
"rewards/rejected": -0.20061998069286346, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": 0.5848616361618042, |
|
"logits/rejected": 0.6844476461410522, |
|
"logps/chosen": -286.0004577636719, |
|
"logps/rejected": -235.7235565185547, |
|
"loss": 2191.7109, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12465153634548187, |
|
"rewards/margins": 0.05475841090083122, |
|
"rewards/rejected": -0.1794099658727646, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": 0.6583799123764038, |
|
"eval_logits/rejected": 0.7175658345222473, |
|
"eval_logps/chosen": -269.19097900390625, |
|
"eval_logps/rejected": -252.60589599609375, |
|
"eval_loss": 2121.8115234375, |
|
"eval_rewards/accuracies": 0.6694999933242798, |
|
"eval_rewards/chosen": -0.12573528289794922, |
|
"eval_rewards/margins": 0.06483451277017593, |
|
"eval_rewards/rejected": -0.19056977331638336, |
|
"eval_runtime": 416.4568, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.466928703211981e-06, |
|
"logits/chosen": 0.6980951428413391, |
|
"logits/rejected": 0.6862035989761353, |
|
"logps/chosen": -281.24700927734375, |
|
"logps/rejected": -248.4276123046875, |
|
"loss": 2063.3184, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11639384180307388, |
|
"rewards/margins": 0.07433497160673141, |
|
"rewards/rejected": -0.1907288283109665, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": 0.678636372089386, |
|
"logits/rejected": 0.6571283936500549, |
|
"logps/chosen": -258.97674560546875, |
|
"logps/rejected": -254.3134307861328, |
|
"loss": 2036.2967, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10998505353927612, |
|
"rewards/margins": 0.06968870759010315, |
|
"rewards/rejected": -0.17967377603054047, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.438400372332058e-06, |
|
"logits/chosen": 0.7093490958213806, |
|
"logits/rejected": 0.7673132419586182, |
|
"logps/chosen": -265.0453796386719, |
|
"logps/rejected": -250.15982055664062, |
|
"loss": 1899.6914, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.10779963433742523, |
|
"rewards/margins": 0.08556055277585983, |
|
"rewards/rejected": -0.19336020946502686, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": 0.6779216527938843, |
|
"logits/rejected": 0.7495394945144653, |
|
"logps/chosen": -276.6440734863281, |
|
"logps/rejected": -247.07150268554688, |
|
"loss": 2002.1414, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11242429912090302, |
|
"rewards/margins": 0.08376909792423248, |
|
"rewards/rejected": -0.19619342684745789, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.409224631474014e-06, |
|
"logits/chosen": 0.6950255632400513, |
|
"logits/rejected": 0.7308493256568909, |
|
"logps/chosen": -258.3533020019531, |
|
"logps/rejected": -235.54483032226562, |
|
"loss": 1995.5852, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12844698131084442, |
|
"rewards/margins": 0.07693418860435486, |
|
"rewards/rejected": -0.20538118481636047, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": 0.6389755010604858, |
|
"logits/rejected": 0.6700756549835205, |
|
"logps/chosen": -264.4709167480469, |
|
"logps/rejected": -256.2208557128906, |
|
"loss": 2016.508, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.13102933764457703, |
|
"rewards/margins": 0.07655525207519531, |
|
"rewards/rejected": -0.20758457481861115, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.379411225098292e-06, |
|
"logits/chosen": 0.6980705261230469, |
|
"logits/rejected": 0.7923838496208191, |
|
"logps/chosen": -283.1957702636719, |
|
"logps/rejected": -267.30938720703125, |
|
"loss": 2012.2674, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.138292133808136, |
|
"rewards/margins": 0.08078579604625702, |
|
"rewards/rejected": -0.2190779149532318, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": 0.6915451288223267, |
|
"logits/rejected": 0.6770834922790527, |
|
"logps/chosen": -258.2509460449219, |
|
"logps/rejected": -266.7464294433594, |
|
"loss": 2101.8338, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13359954953193665, |
|
"rewards/margins": 0.07587826251983643, |
|
"rewards/rejected": -0.20947781205177307, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.348970110640437e-06, |
|
"logits/chosen": 0.6509718298912048, |
|
"logits/rejected": 0.7129366993904114, |
|
"logps/chosen": -258.56280517578125, |
|
"logps/rejected": -237.4566192626953, |
|
"loss": 2027.6717, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14995795488357544, |
|
"rewards/margins": 0.07759587466716766, |
|
"rewards/rejected": -0.2275538146495819, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": 0.5978332161903381, |
|
"logits/rejected": 0.6565033793449402, |
|
"logps/chosen": -272.02166748046875, |
|
"logps/rejected": -271.3028259277344, |
|
"loss": 2308.1883, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1387997567653656, |
|
"rewards/margins": 0.04619471728801727, |
|
"rewards/rejected": -0.18499447405338287, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.6329967379570007, |
|
"eval_logits/rejected": 0.6920445561408997, |
|
"eval_logps/chosen": -270.7044372558594, |
|
"eval_logps/rejected": -254.78115844726562, |
|
"eval_loss": 2110.306884765625, |
|
"eval_rewards/accuracies": 0.6664999723434448, |
|
"eval_rewards/chosen": -0.14087003469467163, |
|
"eval_rewards/margins": 0.07145243883132935, |
|
"eval_rewards/rejected": -0.21232248842716217, |
|
"eval_runtime": 416.6934, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.317911455185396e-06, |
|
"logits/chosen": 0.6959893703460693, |
|
"logits/rejected": 0.7259203791618347, |
|
"logps/chosen": -266.06829833984375, |
|
"logps/rejected": -238.3871307373047, |
|
"loss": 2262.5527, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.15047064423561096, |
|
"rewards/margins": 0.04972488805651665, |
|
"rewards/rejected": -0.2001955509185791, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": 0.6275352239608765, |
|
"logits/rejected": 0.7193800806999207, |
|
"logps/chosen": -266.05908203125, |
|
"logps/rejected": -258.8971252441406, |
|
"loss": 2233.5896, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1438441276550293, |
|
"rewards/margins": 0.05622429400682449, |
|
"rewards/rejected": -0.2000684291124344, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.286245632071791e-06, |
|
"logits/chosen": 0.6443454623222351, |
|
"logits/rejected": 0.6870865225791931, |
|
"logps/chosen": -257.45611572265625, |
|
"logps/rejected": -253.72756958007812, |
|
"loss": 2126.4154, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.15624599158763885, |
|
"rewards/margins": 0.06528286635875702, |
|
"rewards/rejected": -0.22152885794639587, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": 0.7713927626609802, |
|
"logits/rejected": 0.7753847241401672, |
|
"logps/chosen": -263.57708740234375, |
|
"logps/rejected": -270.3426513671875, |
|
"loss": 2108.1736, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12420948594808578, |
|
"rewards/margins": 0.06607901304960251, |
|
"rewards/rejected": -0.1902884989976883, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.253983217427313e-06, |
|
"logits/chosen": 0.6878337860107422, |
|
"logits/rejected": 0.7090884447097778, |
|
"logps/chosen": -271.42657470703125, |
|
"logps/rejected": -288.615478515625, |
|
"loss": 2128.0695, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.13797307014465332, |
|
"rewards/margins": 0.06890544295310974, |
|
"rewards/rejected": -0.20687851309776306, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": 0.6635148525238037, |
|
"logits/rejected": 0.6455484628677368, |
|
"logps/chosen": -266.98248291015625, |
|
"logps/rejected": -250.23403930664062, |
|
"loss": 2209.6877, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.13750961422920227, |
|
"rewards/margins": 0.05613657087087631, |
|
"rewards/rejected": -0.1936461478471756, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.221134986636371e-06, |
|
"logits/chosen": 0.6171488761901855, |
|
"logits/rejected": 0.6567360758781433, |
|
"logps/chosen": -273.8824157714844, |
|
"logps/rejected": -249.103515625, |
|
"loss": 1885.2906, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11038468778133392, |
|
"rewards/margins": 0.10038020461797714, |
|
"rewards/rejected": -0.21076488494873047, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": 0.7011705636978149, |
|
"logits/rejected": 0.6791177988052368, |
|
"logps/chosen": -251.37478637695312, |
|
"logps/rejected": -261.1529846191406, |
|
"loss": 2258.1799, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.14569328725337982, |
|
"rewards/margins": 0.0609690323472023, |
|
"rewards/rejected": -0.20666229724884033, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.1877119107412165e-06, |
|
"logits/chosen": 0.6343793869018555, |
|
"logits/rejected": 0.6927725672721863, |
|
"logps/chosen": -237.8370819091797, |
|
"logps/rejected": -256.54254150390625, |
|
"loss": 2061.6873, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1375429332256317, |
|
"rewards/margins": 0.07506345212459564, |
|
"rewards/rejected": -0.21260638535022736, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": 0.6356396675109863, |
|
"logits/rejected": 0.7518913149833679, |
|
"logps/chosen": -275.9583435058594, |
|
"logps/rejected": -253.1591033935547, |
|
"loss": 1996.7178, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1259593516588211, |
|
"rewards/margins": 0.09192151576280594, |
|
"rewards/rejected": -0.21788087487220764, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": 0.6140788793563843, |
|
"eval_logits/rejected": 0.6721699833869934, |
|
"eval_logps/chosen": -269.7620544433594, |
|
"eval_logps/rejected": -253.97259521484375, |
|
"eval_loss": 2095.31298828125, |
|
"eval_rewards/accuracies": 0.6754999756813049, |
|
"eval_rewards/chosen": -0.1314462274312973, |
|
"eval_rewards/margins": 0.07279053330421448, |
|
"eval_rewards/rejected": -0.20423679053783417, |
|
"eval_runtime": 416.679, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.15372515277769e-06, |
|
"logits/chosen": 0.6244436502456665, |
|
"logits/rejected": 0.6609630584716797, |
|
"logps/chosen": -280.32794189453125, |
|
"logps/rejected": -248.7582244873047, |
|
"loss": 2011.6244, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11511021852493286, |
|
"rewards/margins": 0.08765153586864471, |
|
"rewards/rejected": -0.20276173949241638, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": 0.6894992589950562, |
|
"logits/rejected": 0.6712801456451416, |
|
"logps/chosen": -263.05767822265625, |
|
"logps/rejected": -234.242431640625, |
|
"loss": 1956.5369, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.11226965487003326, |
|
"rewards/margins": 0.09646574407815933, |
|
"rewards/rejected": -0.208735391497612, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.119186064046868e-06, |
|
"logits/chosen": 0.6183528900146484, |
|
"logits/rejected": 0.644507110118866, |
|
"logps/chosen": -274.1941223144531, |
|
"logps/rejected": -245.35562133789062, |
|
"loss": 2166.2785, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.12718230485916138, |
|
"rewards/margins": 0.06536950916051865, |
|
"rewards/rejected": -0.19255182147026062, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": 0.6670488715171814, |
|
"logits/rejected": 0.6754225492477417, |
|
"logps/chosen": -278.2078552246094, |
|
"logps/rejected": -241.5122528076172, |
|
"loss": 1883.2881, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.119564950466156, |
|
"rewards/margins": 0.09150619804859161, |
|
"rewards/rejected": -0.2110711634159088, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.084106180323813e-06, |
|
"logits/chosen": 0.6214176416397095, |
|
"logits/rejected": 0.66867595911026, |
|
"logps/chosen": -261.7587890625, |
|
"logps/rejected": -251.6807098388672, |
|
"loss": 2017.3074, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1294572651386261, |
|
"rewards/margins": 0.08191975206136703, |
|
"rewards/rejected": -0.21137702465057373, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": 0.6692546606063843, |
|
"logits/rejected": 0.6942587494850159, |
|
"logps/chosen": -259.8294982910156, |
|
"logps/rejected": -268.8597412109375, |
|
"loss": 2097.6473, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12980519235134125, |
|
"rewards/margins": 0.07286903262138367, |
|
"rewards/rejected": -0.20267422497272491, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.048497218004724e-06, |
|
"logits/chosen": 0.5632964968681335, |
|
"logits/rejected": 0.6666015386581421, |
|
"logps/chosen": -265.3409729003906, |
|
"logps/rejected": -252.78958129882812, |
|
"loss": 2076.2803, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.13252495229244232, |
|
"rewards/margins": 0.08360429853200912, |
|
"rewards/rejected": -0.21612922847270966, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": 0.6174412369728088, |
|
"logits/rejected": 0.6783226728439331, |
|
"logps/chosen": -249.213134765625, |
|
"logps/rejected": -241.413330078125, |
|
"loss": 2285.0457, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.14881116151809692, |
|
"rewards/margins": 0.05003712326288223, |
|
"rewards/rejected": -0.19884827733039856, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.012371070193753e-06, |
|
"logits/chosen": 0.6269063353538513, |
|
"logits/rejected": 0.6346549391746521, |
|
"logps/chosen": -241.5870361328125, |
|
"logps/rejected": -245.96456909179688, |
|
"loss": 2133.8158, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1412040889263153, |
|
"rewards/margins": 0.06111832335591316, |
|
"rewards/rejected": -0.20232239365577698, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": 0.6533576250076294, |
|
"logits/rejected": 0.6740087866783142, |
|
"logps/chosen": -283.4432678222656, |
|
"logps/rejected": -267.79071044921875, |
|
"loss": 2038.3844, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1330070197582245, |
|
"rewards/margins": 0.07623559981584549, |
|
"rewards/rejected": -0.20924265682697296, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 0.5933060050010681, |
|
"eval_logits/rejected": 0.6512511968612671, |
|
"eval_logps/chosen": -270.4488220214844, |
|
"eval_logps/rejected": -254.944091796875, |
|
"eval_loss": 2085.085205078125, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -0.13831348717212677, |
|
"eval_rewards/margins": 0.07563827186822891, |
|
"eval_rewards/rejected": -0.21395176649093628, |
|
"eval_runtime": 416.609, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.975739802730805e-06, |
|
"logits/chosen": 0.5807250738143921, |
|
"logits/rejected": 0.6861320734024048, |
|
"logps/chosen": -298.25604248046875, |
|
"logps/rejected": -273.8665466308594, |
|
"loss": 2016.2207, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12952394783496857, |
|
"rewards/margins": 0.08187790215015411, |
|
"rewards/rejected": -0.21140184998512268, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": 0.6601132750511169, |
|
"logits/rejected": 0.6705759763717651, |
|
"logps/chosen": -266.2096252441406, |
|
"logps/rejected": -249.5722198486328, |
|
"loss": 2228.1479, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1489478349685669, |
|
"rewards/margins": 0.05832044407725334, |
|
"rewards/rejected": -0.20726828277111053, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.938615650161645e-06, |
|
"logits/chosen": 0.6056556701660156, |
|
"logits/rejected": 0.5954689979553223, |
|
"logps/chosen": -244.7415771484375, |
|
"logps/rejected": -234.8352813720703, |
|
"loss": 2023.5078, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.11950768530368805, |
|
"rewards/margins": 0.0772583931684494, |
|
"rewards/rejected": -0.19676607847213745, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": 0.5716265439987183, |
|
"logits/rejected": 0.6326289176940918, |
|
"logps/chosen": -257.7333984375, |
|
"logps/rejected": -234.2956085205078, |
|
"loss": 2199.8311, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.14616943895816803, |
|
"rewards/margins": 0.055809132754802704, |
|
"rewards/rejected": -0.20197856426239014, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.9010110116516595e-06, |
|
"logits/chosen": 0.655591607093811, |
|
"logits/rejected": 0.7094139456748962, |
|
"logps/chosen": -266.137939453125, |
|
"logps/rejected": -247.2853240966797, |
|
"loss": 1965.8004, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.13736537098884583, |
|
"rewards/margins": 0.0867539569735527, |
|
"rewards/rejected": -0.22411933541297913, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": 0.6148696541786194, |
|
"logits/rejected": 0.6896382570266724, |
|
"logps/chosen": -256.70904541015625, |
|
"logps/rejected": -245.3892822265625, |
|
"loss": 2095.5068, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12945787608623505, |
|
"rewards/margins": 0.07017168402671814, |
|
"rewards/rejected": -0.19962957501411438, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.8629384468446365e-06, |
|
"logits/chosen": 0.5744356513023376, |
|
"logits/rejected": 0.5954487919807434, |
|
"logps/chosen": -250.98178100585938, |
|
"logps/rejected": -272.93701171875, |
|
"loss": 2119.459, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.13649822771549225, |
|
"rewards/margins": 0.07962769269943237, |
|
"rewards/rejected": -0.21612592041492462, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": 0.6630114912986755, |
|
"logits/rejected": 0.6675506830215454, |
|
"logps/chosen": -265.52447509765625, |
|
"logps/rejected": -247.32455444335938, |
|
"loss": 1991.6379, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12121538817882538, |
|
"rewards/margins": 0.08006526529788971, |
|
"rewards/rejected": -0.20128066837787628, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.824410671667948e-06, |
|
"logits/chosen": 0.6106497645378113, |
|
"logits/rejected": 0.680738091468811, |
|
"logps/chosen": -260.89288330078125, |
|
"logps/rejected": -252.52017211914062, |
|
"loss": 1942.9977, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12056465446949005, |
|
"rewards/margins": 0.09015407413244247, |
|
"rewards/rejected": -0.21071875095367432, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": 0.5932056903839111, |
|
"logits/rejected": 0.6287232637405396, |
|
"logps/chosen": -291.2415771484375, |
|
"logps/rejected": -279.5646057128906, |
|
"loss": 2094.2182, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.13307559490203857, |
|
"rewards/margins": 0.0750352293252945, |
|
"rewards/rejected": -0.20811080932617188, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": 0.5898318886756897, |
|
"eval_logits/rejected": 0.6474130749702454, |
|
"eval_logps/chosen": -270.5129089355469, |
|
"eval_logps/rejected": -255.21328735351562, |
|
"eval_loss": 2076.30419921875, |
|
"eval_rewards/accuracies": 0.6790000200271606, |
|
"eval_rewards/chosen": -0.13895468413829803, |
|
"eval_rewards/margins": 0.07768914848566055, |
|
"eval_rewards/rejected": -0.2166438102722168, |
|
"eval_runtime": 416.7121, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.7854405540855268e-06, |
|
"logits/chosen": 0.580877959728241, |
|
"logits/rejected": 0.6030541658401489, |
|
"logps/chosen": -255.82693481445312, |
|
"logps/rejected": -249.1620635986328, |
|
"loss": 2165.7623, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1440289467573166, |
|
"rewards/margins": 0.06519783288240433, |
|
"rewards/rejected": -0.2092268019914627, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": 0.612065851688385, |
|
"logits/rejected": 0.6108434200286865, |
|
"logps/chosen": -236.6591796875, |
|
"logps/rejected": -245.4730224609375, |
|
"loss": 2151.6375, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.13691964745521545, |
|
"rewards/margins": 0.06442641466856003, |
|
"rewards/rejected": -0.20134606957435608, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7460411098000804e-06, |
|
"logits/chosen": 0.620397686958313, |
|
"logits/rejected": 0.6705790758132935, |
|
"logps/chosen": -279.47003173828125, |
|
"logps/rejected": -242.50320434570312, |
|
"loss": 2097.6518, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.13826757669448853, |
|
"rewards/margins": 0.06730998307466507, |
|
"rewards/rejected": -0.2055775672197342, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": 0.5731703042984009, |
|
"logits/rejected": 0.6038475036621094, |
|
"logps/chosen": -273.57684326171875, |
|
"logps/rejected": -263.6417541503906, |
|
"loss": 1980.56, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13297039270401, |
|
"rewards/margins": 0.08892510086297989, |
|
"rewards/rejected": -0.2218955010175705, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.706225497905946e-06, |
|
"logits/chosen": 0.5495398640632629, |
|
"logits/rejected": 0.6184272170066833, |
|
"logps/chosen": -278.1634521484375, |
|
"logps/rejected": -250.0457763671875, |
|
"loss": 1925.0881, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14073289930820465, |
|
"rewards/margins": 0.08853240311145782, |
|
"rewards/rejected": -0.22926530241966248, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": 0.6478545069694519, |
|
"logits/rejected": 0.6362086534500122, |
|
"logps/chosen": -267.02618408203125, |
|
"logps/rejected": -239.1699676513672, |
|
"loss": 2063.3338, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.13111211359500885, |
|
"rewards/margins": 0.07986196875572205, |
|
"rewards/rejected": -0.2109740674495697, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6660070164940614e-06, |
|
"logits/chosen": 0.6316484808921814, |
|
"logits/rejected": 0.686813473701477, |
|
"logps/chosen": -254.73837280273438, |
|
"logps/rejected": -249.4886016845703, |
|
"loss": 1958.6893, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1321488916873932, |
|
"rewards/margins": 0.09855500608682632, |
|
"rewards/rejected": -0.23070387542247772, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": 0.6227657794952393, |
|
"logits/rejected": 0.642948567867279, |
|
"logps/chosen": -254.3902587890625, |
|
"logps/rejected": -239.3006134033203, |
|
"loss": 2093.4068, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.14098116755485535, |
|
"rewards/margins": 0.07492861151695251, |
|
"rewards/rejected": -0.21590976417064667, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6253990982105114e-06, |
|
"logits/chosen": 0.5823426842689514, |
|
"logits/rejected": 0.6044851541519165, |
|
"logps/chosen": -282.6208801269531, |
|
"logps/rejected": -284.2301330566406, |
|
"loss": 2308.56, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1489720195531845, |
|
"rewards/margins": 0.050694145262241364, |
|
"rewards/rejected": -0.19966615736484528, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": 0.6414502859115601, |
|
"logits/rejected": 0.6938506960868835, |
|
"logps/chosen": -254.117431640625, |
|
"logps/rejected": -234.0909423828125, |
|
"loss": 2171.3457, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12779875099658966, |
|
"rewards/margins": 0.06595613807439804, |
|
"rewards/rejected": -0.1937548816204071, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.5817673802375793, |
|
"eval_logits/rejected": 0.6391910910606384, |
|
"eval_logps/chosen": -270.3594970703125, |
|
"eval_logps/rejected": -255.21298217773438, |
|
"eval_loss": 2069.375732421875, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -0.137420654296875, |
|
"eval_rewards/margins": 0.07922003418207169, |
|
"eval_rewards/rejected": -0.2166406810283661, |
|
"eval_runtime": 416.6975, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5844153057701303e-06, |
|
"logits/chosen": 0.6806268095970154, |
|
"logits/rejected": 0.6613883376121521, |
|
"logps/chosen": -293.35455322265625, |
|
"logps/rejected": -249.47317504882812, |
|
"loss": 2235.1336, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.13428549468517303, |
|
"rewards/margins": 0.06987977027893066, |
|
"rewards/rejected": -0.2041652649641037, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": 0.5934259295463562, |
|
"logits/rejected": 0.6393053531646729, |
|
"logps/chosen": -263.8625183105469, |
|
"logps/rejected": -268.93646240234375, |
|
"loss": 2115.3395, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.12568698823451996, |
|
"rewards/margins": 0.06675116717815399, |
|
"rewards/rejected": -0.19243815541267395, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5430693274266694e-06, |
|
"logits/chosen": 0.6212111711502075, |
|
"logits/rejected": 0.6776979565620422, |
|
"logps/chosen": -265.48065185546875, |
|
"logps/rejected": -242.78189086914062, |
|
"loss": 1885.1145, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.12569081783294678, |
|
"rewards/margins": 0.09405811876058578, |
|
"rewards/rejected": -0.21974892914295197, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": 0.5869291424751282, |
|
"logits/rejected": 0.614780843257904, |
|
"logps/chosen": -309.3984069824219, |
|
"logps/rejected": -281.76800537109375, |
|
"loss": 2295.801, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13158050179481506, |
|
"rewards/margins": 0.05296233296394348, |
|
"rewards/rejected": -0.18454284965991974, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5013749724010298e-06, |
|
"logits/chosen": 0.6291738152503967, |
|
"logits/rejected": 0.6847606897354126, |
|
"logps/chosen": -269.4018249511719, |
|
"logps/rejected": -248.9547576904297, |
|
"loss": 1985.4633, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12610626220703125, |
|
"rewards/margins": 0.08754386752843857, |
|
"rewards/rejected": -0.21365013718605042, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": 0.6193640828132629, |
|
"logits/rejected": 0.6583009958267212, |
|
"logps/chosen": -260.786865234375, |
|
"logps/rejected": -246.417724609375, |
|
"loss": 2067.8201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13031774759292603, |
|
"rewards/margins": 0.07650937139987946, |
|
"rewards/rejected": -0.20682711899280548, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.459346166269093e-06, |
|
"logits/chosen": 0.569218635559082, |
|
"logits/rejected": 0.6178910136222839, |
|
"logps/chosen": -286.010498046875, |
|
"logps/rejected": -284.7559509277344, |
|
"loss": 2057.5484, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11479117721319199, |
|
"rewards/margins": 0.08598540723323822, |
|
"rewards/rejected": -0.20077654719352722, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": 0.6346784234046936, |
|
"logits/rejected": 0.7338213920593262, |
|
"logps/chosen": -300.38739013671875, |
|
"logps/rejected": -259.44525146484375, |
|
"loss": 1994.4248, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13437870144844055, |
|
"rewards/margins": 0.08268047124147415, |
|
"rewards/rejected": -0.2170591652393341, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.416996946310694e-06, |
|
"logits/chosen": 0.5468164086341858, |
|
"logits/rejected": 0.5939579010009766, |
|
"logps/chosen": -299.57061767578125, |
|
"logps/rejected": -265.4569396972656, |
|
"loss": 1896.8961, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11913790553808212, |
|
"rewards/margins": 0.10197613388299942, |
|
"rewards/rejected": -0.22111406922340393, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": 0.5587860345840454, |
|
"logits/rejected": 0.6276572346687317, |
|
"logps/chosen": -253.8291473388672, |
|
"logps/rejected": -239.05685424804688, |
|
"loss": 2189.3863, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.14831021428108215, |
|
"rewards/margins": 0.06738562881946564, |
|
"rewards/rejected": -0.2156958281993866, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": 0.5722830295562744, |
|
"eval_logits/rejected": 0.629075288772583, |
|
"eval_logps/chosen": -270.473876953125, |
|
"eval_logps/rejected": -255.46749877929688, |
|
"eval_loss": 2062.199462890625, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": -0.13856419920921326, |
|
"eval_rewards/margins": 0.08062165975570679, |
|
"eval_rewards/rejected": -0.21918585896492004, |
|
"eval_runtime": 416.4246, |
|
"eval_samples_per_second": 4.803, |
|
"eval_steps_per_second": 1.201, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.3743414568212828e-06, |
|
"logits/chosen": 0.6158628463745117, |
|
"logits/rejected": 0.6673662066459656, |
|
"logps/chosen": -296.7020263671875, |
|
"logps/rejected": -242.6101837158203, |
|
"loss": 2082.5406, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.13253936171531677, |
|
"rewards/margins": 0.07927088439464569, |
|
"rewards/rejected": -0.21181027591228485, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": 0.6274576187133789, |
|
"logits/rejected": 0.6294026374816895, |
|
"logps/chosen": -293.9241638183594, |
|
"logps/rejected": -253.40036010742188, |
|
"loss": 2083.2709, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13250832259655, |
|
"rewards/margins": 0.08279639482498169, |
|
"rewards/rejected": -0.21530470252037048, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.331393944387845e-06, |
|
"logits/chosen": 0.5965205430984497, |
|
"logits/rejected": 0.70032799243927, |
|
"logps/chosen": -291.1014099121094, |
|
"logps/rejected": -274.6158752441406, |
|
"loss": 2108.5279, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.12708380818367004, |
|
"rewards/margins": 0.08121255040168762, |
|
"rewards/rejected": -0.20829637348651886, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": 0.588997483253479, |
|
"logits/rejected": 0.6009566783905029, |
|
"logps/chosen": -244.7908172607422, |
|
"logps/rejected": -238.26651000976562, |
|
"loss": 2017.7098, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11969755589962006, |
|
"rewards/margins": 0.07758014649152756, |
|
"rewards/rejected": -0.19727769494056702, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.288168753130657e-06, |
|
"logits/chosen": 0.6095719933509827, |
|
"logits/rejected": 0.6279308199882507, |
|
"logps/chosen": -250.91116333007812, |
|
"logps/rejected": -265.10302734375, |
|
"loss": 1975.5611, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1158142238855362, |
|
"rewards/margins": 0.08399386703968048, |
|
"rewards/rejected": -0.19980809092521667, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": 0.5891221165657043, |
|
"logits/rejected": 0.5675392746925354, |
|
"logps/chosen": -283.29901123046875, |
|
"logps/rejected": -258.32562255859375, |
|
"loss": 1991.3586, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1158837080001831, |
|
"rewards/margins": 0.09360859543085098, |
|
"rewards/rejected": -0.2094922959804535, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2446803199124666e-06, |
|
"logits/chosen": 0.542614221572876, |
|
"logits/rejected": 0.5660384893417358, |
|
"logps/chosen": -260.6263427734375, |
|
"logps/rejected": -240.3424835205078, |
|
"loss": 2044.5988, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12066911160945892, |
|
"rewards/margins": 0.0799749344587326, |
|
"rewards/rejected": -0.20064406096935272, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": 0.5963379144668579, |
|
"logits/rejected": 0.6299723386764526, |
|
"logps/chosen": -248.21920776367188, |
|
"logps/rejected": -235.8214874267578, |
|
"loss": 2095.258, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12164044380187988, |
|
"rewards/margins": 0.07495652884244919, |
|
"rewards/rejected": -0.19659698009490967, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2009431695166985e-06, |
|
"logits/chosen": 0.5749480724334717, |
|
"logits/rejected": 0.627223014831543, |
|
"logps/chosen": -239.6404266357422, |
|
"logps/rejected": -239.62014770507812, |
|
"loss": 1970.9955, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11313848197460175, |
|
"rewards/margins": 0.08247244358062744, |
|
"rewards/rejected": -0.19561094045639038, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": 0.6324980854988098, |
|
"logits/rejected": 0.6260117888450623, |
|
"logps/chosen": -244.56362915039062, |
|
"logps/rejected": -245.474609375, |
|
"loss": 2292.8938, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.14712049067020416, |
|
"rewards/margins": 0.05482936650514603, |
|
"rewards/rejected": -0.2019498646259308, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 0.5703141689300537, |
|
"eval_logits/rejected": 0.627535343170166, |
|
"eval_logps/chosen": -268.5789489746094, |
|
"eval_logps/rejected": -253.6024627685547, |
|
"eval_loss": 2053.1298828125, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": -0.11961515992879868, |
|
"eval_rewards/margins": 0.08092045783996582, |
|
"eval_rewards/rejected": -0.2005356103181839, |
|
"eval_runtime": 416.7248, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.156971909796295e-06, |
|
"logits/chosen": 0.6370185613632202, |
|
"logits/rejected": 0.7445378303527832, |
|
"logps/chosen": -265.6059265136719, |
|
"logps/rejected": -232.7034454345703, |
|
"loss": 1958.5383, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11271758377552032, |
|
"rewards/margins": 0.08874475955963135, |
|
"rewards/rejected": -0.20146234333515167, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": 0.623261034488678, |
|
"logits/rejected": 0.6591364741325378, |
|
"logps/chosen": -263.49993896484375, |
|
"logps/rejected": -241.6349334716797, |
|
"loss": 1940.6969, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1059044599533081, |
|
"rewards/margins": 0.09299680590629578, |
|
"rewards/rejected": -0.19890126585960388, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.1127812267948095e-06, |
|
"logits/chosen": 0.6355741024017334, |
|
"logits/rejected": 0.6655168533325195, |
|
"logps/chosen": -264.20062255859375, |
|
"logps/rejected": -258.43310546875, |
|
"loss": 1993.0014, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.12248332798480988, |
|
"rewards/margins": 0.08727528899908066, |
|
"rewards/rejected": -0.20975859463214874, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": 0.548941433429718, |
|
"logits/rejected": 0.5715293884277344, |
|
"logps/chosen": -277.3153076171875, |
|
"logps/rejected": -248.1029510498047, |
|
"loss": 2108.9805, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.10435888916254044, |
|
"rewards/margins": 0.07835109531879425, |
|
"rewards/rejected": -0.18270999193191528, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.068385879841389e-06, |
|
"logits/chosen": 0.6165980100631714, |
|
"logits/rejected": 0.6937299966812134, |
|
"logps/chosen": -233.5325469970703, |
|
"logps/rejected": -246.7562255859375, |
|
"loss": 2168.8945, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10994801670312881, |
|
"rewards/margins": 0.06389383226633072, |
|
"rewards/rejected": -0.17384183406829834, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": 0.6379483938217163, |
|
"logits/rejected": 0.6270259618759155, |
|
"logps/chosen": -266.16009521484375, |
|
"logps/rejected": -259.60919189453125, |
|
"loss": 2145.4496, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.11401374638080597, |
|
"rewards/margins": 0.07185572385787964, |
|
"rewards/rejected": -0.1858694702386856, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.02380069662128e-06, |
|
"logits/chosen": 0.623966634273529, |
|
"logits/rejected": 0.5938777327537537, |
|
"logps/chosen": -252.69869995117188, |
|
"logps/rejected": -245.094482421875, |
|
"loss": 2034.7914, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.10203299671411514, |
|
"rewards/margins": 0.07526172697544098, |
|
"rewards/rejected": -0.17729471623897552, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": 0.6284725069999695, |
|
"logits/rejected": 0.6297743916511536, |
|
"logps/chosen": -270.23333740234375, |
|
"logps/rejected": -252.12704467773438, |
|
"loss": 2150.0496, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10082075744867325, |
|
"rewards/margins": 0.06684517115354538, |
|
"rewards/rejected": -0.16766592860221863, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.979040568223498e-06, |
|
"logits/chosen": 0.5534299612045288, |
|
"logits/rejected": 0.675399661064148, |
|
"logps/chosen": -263.3745422363281, |
|
"logps/rejected": -266.8883972167969, |
|
"loss": 2205.4939, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11857882887125015, |
|
"rewards/margins": 0.06667140126228333, |
|
"rewards/rejected": -0.18525022268295288, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": 0.6073340773582458, |
|
"logits/rejected": 0.6487486362457275, |
|
"logps/chosen": -292.90704345703125, |
|
"logps/rejected": -238.2965545654297, |
|
"loss": 2085.5805, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.09591428190469742, |
|
"rewards/margins": 0.08266115188598633, |
|
"rewards/rejected": -0.17857542634010315, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": 0.5747328996658325, |
|
"eval_logits/rejected": 0.6318737864494324, |
|
"eval_logps/chosen": -267.4730224609375, |
|
"eval_logps/rejected": -252.61306762695312, |
|
"eval_loss": 2052.32373046875, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -0.10855603218078613, |
|
"eval_rewards/margins": 0.08208546042442322, |
|
"eval_rewards/rejected": -0.19064147770404816, |
|
"eval_runtime": 416.81, |
|
"eval_samples_per_second": 4.798, |
|
"eval_steps_per_second": 1.2, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": 0.5886205434799194, |
|
"logits/rejected": 0.6063315272331238, |
|
"logps/chosen": -281.51129150390625, |
|
"logps/rejected": -263.0057678222656, |
|
"loss": 1992.2906, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10820697247982025, |
|
"rewards/margins": 0.08012684434652328, |
|
"rewards/rejected": -0.18833380937576294, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": 0.6380060315132141, |
|
"logits/rejected": 0.6841104030609131, |
|
"logps/chosen": -267.55145263671875, |
|
"logps/rejected": -248.828369140625, |
|
"loss": 2045.3801, |
|
"rewards/accuracies": 0.6604167222976685, |
|
"rewards/chosen": -0.10555033385753632, |
|
"rewards/margins": 0.08623509109020233, |
|
"rewards/rejected": -0.19178542494773865, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.889055327409301e-06, |
|
"logits/chosen": 0.5285671353340149, |
|
"logits/rejected": 0.5704804062843323, |
|
"logps/chosen": -263.24725341796875, |
|
"logps/rejected": -248.02395629882812, |
|
"loss": 2023.134, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10273389518260956, |
|
"rewards/margins": 0.08054333180189133, |
|
"rewards/rejected": -0.1832772046327591, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": 0.5331718325614929, |
|
"logits/rejected": 0.5988043546676636, |
|
"logps/chosen": -242.79061889648438, |
|
"logps/rejected": -238.0269012451172, |
|
"loss": 1960.2693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09900447726249695, |
|
"rewards/margins": 0.0868852287530899, |
|
"rewards/rejected": -0.18588972091674805, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.843860269332339e-06, |
|
"logits/chosen": 0.6072074174880981, |
|
"logits/rejected": 0.631058394908905, |
|
"logps/chosen": -273.4151306152344, |
|
"logps/rejected": -246.41238403320312, |
|
"loss": 1955.907, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09934862703084946, |
|
"rewards/margins": 0.09240168333053589, |
|
"rewards/rejected": -0.19175033271312714, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": 0.6683967113494873, |
|
"logits/rejected": 0.6856907606124878, |
|
"logps/chosen": -257.2086181640625, |
|
"logps/rejected": -247.91683959960938, |
|
"loss": 1924.8703, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1071348562836647, |
|
"rewards/margins": 0.09893598407506943, |
|
"rewards/rejected": -0.20607082545757294, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.7985503647187063e-06, |
|
"logits/chosen": 0.5825555920600891, |
|
"logits/rejected": 0.6476297378540039, |
|
"logps/chosen": -288.1867980957031, |
|
"logps/rejected": -254.5727996826172, |
|
"loss": 1907.2662, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.08619461953639984, |
|
"rewards/margins": 0.10769355297088623, |
|
"rewards/rejected": -0.19388815760612488, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": 0.5205335021018982, |
|
"logits/rejected": 0.6433119177818298, |
|
"logps/chosen": -261.37982177734375, |
|
"logps/rejected": -240.10952758789062, |
|
"loss": 2241.4863, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12351379543542862, |
|
"rewards/margins": 0.05891970917582512, |
|
"rewards/rejected": -0.18243351578712463, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.753140746708477e-06, |
|
"logits/chosen": 0.6216637492179871, |
|
"logits/rejected": 0.669810950756073, |
|
"logps/chosen": -282.500244140625, |
|
"logps/rejected": -273.12310791015625, |
|
"loss": 1961.7119, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.09472217410802841, |
|
"rewards/margins": 0.09439438581466675, |
|
"rewards/rejected": -0.18911656737327576, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": 0.638060986995697, |
|
"logits/rejected": 0.7034865617752075, |
|
"logps/chosen": -258.02447509765625, |
|
"logps/rejected": -246.9062957763672, |
|
"loss": 1847.759, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09892908483743668, |
|
"rewards/margins": 0.10315445810556412, |
|
"rewards/rejected": -0.2020835429430008, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_logits/chosen": 0.5763067603111267, |
|
"eval_logits/rejected": 0.6332587599754333, |
|
"eval_logps/chosen": -267.7949523925781, |
|
"eval_logps/rejected": -253.0826873779297, |
|
"eval_loss": 2050.417724609375, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": -0.11177488416433334, |
|
"eval_rewards/margins": 0.0835629403591156, |
|
"eval_rewards/rejected": -0.19533783197402954, |
|
"eval_runtime": 416.585, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.707646581745188e-06, |
|
"logits/chosen": 0.6024230718612671, |
|
"logits/rejected": 0.6550949811935425, |
|
"logps/chosen": -275.99761962890625, |
|
"logps/rejected": -272.4120178222656, |
|
"loss": 1981.8529, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.0975821316242218, |
|
"rewards/margins": 0.08785782754421234, |
|
"rewards/rejected": -0.18543997406959534, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": 0.629960298538208, |
|
"logits/rejected": 0.6249616742134094, |
|
"logps/chosen": -256.87603759765625, |
|
"logps/rejected": -253.21109008789062, |
|
"loss": 2136.5217, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10265711694955826, |
|
"rewards/margins": 0.0642293393611908, |
|
"rewards/rejected": -0.16688646376132965, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.6620830645103753e-06, |
|
"logits/chosen": 0.6109344959259033, |
|
"logits/rejected": 0.6072026491165161, |
|
"logps/chosen": -266.4075012207031, |
|
"logps/rejected": -258.20208740234375, |
|
"loss": 1938.8361, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09916529804468155, |
|
"rewards/margins": 0.09021677076816559, |
|
"rewards/rejected": -0.18938204646110535, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": 0.566746711730957, |
|
"logits/rejected": 0.6063026189804077, |
|
"logps/chosen": -261.76739501953125, |
|
"logps/rejected": -270.6283874511719, |
|
"loss": 2085.5938, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12018795311450958, |
|
"rewards/margins": 0.08117975294589996, |
|
"rewards/rejected": -0.20136770606040955, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.6164654128486683e-06, |
|
"logits/chosen": 0.5058253407478333, |
|
"logits/rejected": 0.6028685569763184, |
|
"logps/chosen": -267.67376708984375, |
|
"logps/rejected": -230.2966766357422, |
|
"loss": 2055.1498, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11003967374563217, |
|
"rewards/margins": 0.08447955548763275, |
|
"rewards/rejected": -0.19451923668384552, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": 0.5947778820991516, |
|
"logits/rejected": 0.5901384353637695, |
|
"logps/chosen": -278.0001525878906, |
|
"logps/rejected": -242.13583374023438, |
|
"loss": 1877.0174, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11745290458202362, |
|
"rewards/margins": 0.10013137012720108, |
|
"rewards/rejected": -0.2175842821598053, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.5708088626851546e-06, |
|
"logits/chosen": 0.5502884387969971, |
|
"logits/rejected": 0.603992760181427, |
|
"logps/chosen": -269.38360595703125, |
|
"logps/rejected": -244.87619018554688, |
|
"loss": 2015.0283, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12011172622442245, |
|
"rewards/margins": 0.08565986156463623, |
|
"rewards/rejected": -0.20577159523963928, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": 0.5619412064552307, |
|
"logits/rejected": 0.6468341946601868, |
|
"logps/chosen": -252.68115234375, |
|
"logps/rejected": -252.705322265625, |
|
"loss": 1855.9893, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10235454142093658, |
|
"rewards/margins": 0.10716482251882553, |
|
"rewards/rejected": -0.20951935648918152, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.525128662936707e-06, |
|
"logits/chosen": 0.512058436870575, |
|
"logits/rejected": 0.5677643418312073, |
|
"logps/chosen": -270.7825012207031, |
|
"logps/rejected": -260.822509765625, |
|
"loss": 1831.3346, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1064097136259079, |
|
"rewards/margins": 0.10622493177652359, |
|
"rewards/rejected": -0.21263465285301208, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": 0.620409369468689, |
|
"logits/rejected": 0.6358670592308044, |
|
"logps/chosen": -258.42706298828125, |
|
"logps/rejected": -258.72161865234375, |
|
"loss": 2024.9559, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10622759163379669, |
|
"rewards/margins": 0.08639432489871979, |
|
"rewards/rejected": -0.19262190163135529, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_logits/chosen": 0.5589507818222046, |
|
"eval_logits/rejected": 0.6156801581382751, |
|
"eval_logps/chosen": -268.8072814941406, |
|
"eval_logps/rejected": -254.37991333007812, |
|
"eval_loss": 2046.75927734375, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -0.12189868092536926, |
|
"eval_rewards/margins": 0.08641137927770615, |
|
"eval_rewards/rejected": -0.2083100527524948, |
|
"eval_runtime": 416.7259, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.479440070418967e-06, |
|
"logits/chosen": 0.5901846885681152, |
|
"logits/rejected": 0.6195170283317566, |
|
"logps/chosen": -249.45816040039062, |
|
"logps/rejected": -253.7944793701172, |
|
"loss": 2205.6043, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.13660283386707306, |
|
"rewards/margins": 0.06599629670381546, |
|
"rewards/rejected": -0.20259912312030792, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": 0.5504690408706665, |
|
"logits/rejected": 0.6531665921211243, |
|
"logps/chosen": -259.79010009765625, |
|
"logps/rejected": -284.0751647949219, |
|
"loss": 1959.0818, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11810295283794403, |
|
"rewards/margins": 0.0964241549372673, |
|
"rewards/rejected": -0.21452713012695312, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.433758344750691e-06, |
|
"logits/chosen": 0.5741318464279175, |
|
"logits/rejected": 0.6458116173744202, |
|
"logps/chosen": -295.03192138671875, |
|
"logps/rejected": -276.64251708984375, |
|
"loss": 1911.3146, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11703141778707504, |
|
"rewards/margins": 0.10501817613840103, |
|
"rewards/rejected": -0.2220495641231537, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": 0.5908122062683105, |
|
"logits/rejected": 0.6558480858802795, |
|
"logps/chosen": -283.0213928222656, |
|
"logps/rejected": -252.56600952148438, |
|
"loss": 1911.8793, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11346153914928436, |
|
"rewards/margins": 0.10086224228143692, |
|
"rewards/rejected": -0.2143237590789795, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.3880987432571675e-06, |
|
"logits/chosen": 0.5616129040718079, |
|
"logits/rejected": 0.593204915523529, |
|
"logps/chosen": -268.16583251953125, |
|
"logps/rejected": -262.17755126953125, |
|
"loss": 1994.0697, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.11974780261516571, |
|
"rewards/margins": 0.09457085281610489, |
|
"rewards/rejected": -0.2143186628818512, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": 0.5762392282485962, |
|
"logits/rejected": 0.615722119808197, |
|
"logps/chosen": -250.62509155273438, |
|
"logps/rejected": -253.91049194335938, |
|
"loss": 2099.1633, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1276218444108963, |
|
"rewards/margins": 0.07122951745986938, |
|
"rewards/rejected": -0.19885137677192688, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.3424765158743867e-06, |
|
"logits/chosen": 0.6059794425964355, |
|
"logits/rejected": 0.6645799875259399, |
|
"logps/chosen": -255.7693634033203, |
|
"logps/rejected": -251.96951293945312, |
|
"loss": 2010.217, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12624487280845642, |
|
"rewards/margins": 0.09581606835126877, |
|
"rewards/rejected": -0.2220609486103058, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": 0.516992449760437, |
|
"logits/rejected": 0.5456847548484802, |
|
"logps/chosen": -265.10321044921875, |
|
"logps/rejected": -246.3701629638672, |
|
"loss": 1933.4984, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11627723276615143, |
|
"rewards/margins": 0.0940864160656929, |
|
"rewards/rejected": -0.21036362648010254, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.296906900055691e-06, |
|
"logits/chosen": 0.596808135509491, |
|
"logits/rejected": 0.6393652558326721, |
|
"logps/chosen": -264.455810546875, |
|
"logps/rejected": -256.40667724609375, |
|
"loss": 2172.6984, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12352782487869263, |
|
"rewards/margins": 0.07605434954166412, |
|
"rewards/rejected": -0.19958215951919556, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": 0.5940336585044861, |
|
"logits/rejected": 0.6305769085884094, |
|
"logps/chosen": -256.6434326171875, |
|
"logps/rejected": -245.391357421875, |
|
"loss": 2038.6354, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11867289245128632, |
|
"rewards/margins": 0.09147666394710541, |
|
"rewards/rejected": -0.21014957129955292, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_logits/chosen": 0.5517618656158447, |
|
"eval_logits/rejected": 0.6082795858383179, |
|
"eval_logps/chosen": -268.6722106933594, |
|
"eval_logps/rejected": -254.27310180664062, |
|
"eval_loss": 2043.57275390625, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": -0.12054779380559921, |
|
"eval_rewards/margins": 0.08669425547122955, |
|
"eval_rewards/rejected": -0.20724207162857056, |
|
"eval_runtime": 416.7766, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.251405115682587e-06, |
|
"logits/chosen": 0.5902246236801147, |
|
"logits/rejected": 0.5983418822288513, |
|
"logps/chosen": -263.2071228027344, |
|
"logps/rejected": -272.0802307128906, |
|
"loss": 2017.8775, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11642640829086304, |
|
"rewards/margins": 0.09181423485279083, |
|
"rewards/rejected": -0.20824062824249268, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": 0.5431746244430542, |
|
"logits/rejected": 0.6418278217315674, |
|
"logps/chosen": -264.33465576171875, |
|
"logps/rejected": -251.9208984375, |
|
"loss": 2095.6342, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10533900558948517, |
|
"rewards/margins": 0.07776842266321182, |
|
"rewards/rejected": -0.1831074208021164, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.205986359981431e-06, |
|
"logits/chosen": 0.5207514762878418, |
|
"logits/rejected": 0.6270573139190674, |
|
"logps/chosen": -285.76849365234375, |
|
"logps/rejected": -277.93426513671875, |
|
"loss": 1912.0979, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11343447118997574, |
|
"rewards/margins": 0.1047770231962204, |
|
"rewards/rejected": -0.21821150183677673, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": 0.5806037783622742, |
|
"logits/rejected": 0.6543610095977783, |
|
"logps/chosen": -281.1925354003906, |
|
"logps/rejected": -278.2233581542969, |
|
"loss": 2272.857, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11976996809244156, |
|
"rewards/margins": 0.06387045979499817, |
|
"rewards/rejected": -0.18364043533802032, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.1606658024476744e-06, |
|
"logits/chosen": 0.5554038286209106, |
|
"logits/rejected": 0.5429580211639404, |
|
"logps/chosen": -269.9796447753906, |
|
"logps/rejected": -250.5005340576172, |
|
"loss": 2188.1607, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1146581768989563, |
|
"rewards/margins": 0.07278671860694885, |
|
"rewards/rejected": -0.18744489550590515, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": 0.610127866268158, |
|
"logits/rejected": 0.6246207356452942, |
|
"logps/chosen": -274.421142578125, |
|
"logps/rejected": -259.88336181640625, |
|
"loss": 1878.2621, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10982956737279892, |
|
"rewards/margins": 0.10056765377521515, |
|
"rewards/rejected": -0.21039721369743347, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.1154585797793826e-06, |
|
"logits/chosen": 0.6410681009292603, |
|
"logits/rejected": 0.6446506977081299, |
|
"logps/chosen": -262.7099304199219, |
|
"logps/rejected": -243.33847045898438, |
|
"loss": 1969.0021, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10027629137039185, |
|
"rewards/margins": 0.08689162135124207, |
|
"rewards/rejected": -0.1871679127216339, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": 0.6191312670707703, |
|
"logits/rejected": 0.6812275648117065, |
|
"logps/chosen": -258.3789978027344, |
|
"logps/rejected": -247.3417510986328, |
|
"loss": 1765.025, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10864460468292236, |
|
"rewards/margins": 0.11250102519989014, |
|
"rewards/rejected": -0.2211456298828125, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.070379790821693e-06, |
|
"logits/chosen": 0.5654376745223999, |
|
"logits/rejected": 0.6462022066116333, |
|
"logps/chosen": -301.2412414550781, |
|
"logps/rejected": -276.8460998535156, |
|
"loss": 2045.4492, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10371474176645279, |
|
"rewards/margins": 0.09003494679927826, |
|
"rewards/rejected": -0.19374969601631165, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": 0.5810787081718445, |
|
"logits/rejected": 0.6360457539558411, |
|
"logps/chosen": -254.06838989257812, |
|
"logps/rejected": -243.23989868164062, |
|
"loss": 2022.9617, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11232425272464752, |
|
"rewards/margins": 0.07984773069620132, |
|
"rewards/rejected": -0.19217197597026825, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_logits/chosen": 0.5535383820533752, |
|
"eval_logits/rejected": 0.6101322174072266, |
|
"eval_logps/chosen": -268.3490905761719, |
|
"eval_logps/rejected": -253.95965576171875, |
|
"eval_loss": 2035.585693359375, |
|
"eval_rewards/accuracies": 0.6894999742507935, |
|
"eval_rewards/chosen": -0.11731643229722977, |
|
"eval_rewards/margins": 0.08679118007421494, |
|
"eval_rewards/rejected": -0.2041076123714447, |
|
"eval_runtime": 416.4094, |
|
"eval_samples_per_second": 4.803, |
|
"eval_steps_per_second": 1.201, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.0254444915239287e-06, |
|
"logits/chosen": 0.5468884706497192, |
|
"logits/rejected": 0.5753307938575745, |
|
"logps/chosen": -271.94940185546875, |
|
"logps/rejected": -244.8318328857422, |
|
"loss": 1994.7408, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1074294000864029, |
|
"rewards/margins": 0.08415937423706055, |
|
"rewards/rejected": -0.19158877432346344, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": 0.6623051762580872, |
|
"logits/rejected": 0.6782322525978088, |
|
"logps/chosen": -220.7380828857422, |
|
"logps/rejected": -242.05850219726562, |
|
"loss": 1864.7361, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10245666652917862, |
|
"rewards/margins": 0.10949740558862686, |
|
"rewards/rejected": -0.2119540423154831, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9806676899110305e-06, |
|
"logits/chosen": 0.6308891773223877, |
|
"logits/rejected": 0.6477428674697876, |
|
"logps/chosen": -262.88897705078125, |
|
"logps/rejected": -255.49362182617188, |
|
"loss": 1843.8875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10279623419046402, |
|
"rewards/margins": 0.11021213233470917, |
|
"rewards/rejected": -0.21300837397575378, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": 0.5211482048034668, |
|
"logits/rejected": 0.6105703115463257, |
|
"logps/chosen": -272.09698486328125, |
|
"logps/rejected": -240.05392456054688, |
|
"loss": 1991.6119, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.12355583906173706, |
|
"rewards/margins": 0.08727772533893585, |
|
"rewards/rejected": -0.2108335793018341, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9360643410710027e-06, |
|
"logits/chosen": 0.6230972409248352, |
|
"logits/rejected": 0.6428076028823853, |
|
"logps/chosen": -297.76300048828125, |
|
"logps/rejected": -262.3421325683594, |
|
"loss": 2047.1437, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10469541698694229, |
|
"rewards/margins": 0.09010159224271774, |
|
"rewards/rejected": -0.19479700922966003, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": 0.5903237462043762, |
|
"logits/rejected": 0.6455060243606567, |
|
"logps/chosen": -288.16815185546875, |
|
"logps/rejected": -289.3240966796875, |
|
"loss": 1579.4779, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.08982647955417633, |
|
"rewards/margins": 0.14532434940338135, |
|
"rewards/rejected": -0.23515084385871887, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.8916493421600287e-06, |
|
"logits/chosen": 0.5603612065315247, |
|
"logits/rejected": 0.5792626142501831, |
|
"logps/chosen": -243.1230010986328, |
|
"logps/rejected": -257.9002685546875, |
|
"loss": 2017.2086, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12239034473896027, |
|
"rewards/margins": 0.0815977230668068, |
|
"rewards/rejected": -0.20398807525634766, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": 0.49118170142173767, |
|
"logits/rejected": 0.5506534576416016, |
|
"logps/chosen": -268.86822509765625, |
|
"logps/rejected": -274.64202880859375, |
|
"loss": 1977.4205, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11895406246185303, |
|
"rewards/margins": 0.10293842852115631, |
|
"rewards/rejected": -0.22189247608184814, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.84743752742695e-06, |
|
"logits/chosen": 0.6215322613716125, |
|
"logits/rejected": 0.6151038408279419, |
|
"logps/chosen": -265.5515441894531, |
|
"logps/rejected": -278.13177490234375, |
|
"loss": 1889.5199, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.10073964297771454, |
|
"rewards/margins": 0.11166242510080338, |
|
"rewards/rejected": -0.2124020755290985, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": 0.6138418912887573, |
|
"logits/rejected": 0.6189366579055786, |
|
"logps/chosen": -263.2386169433594, |
|
"logps/rejected": -239.09963989257812, |
|
"loss": 1871.641, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12337759882211685, |
|
"rewards/margins": 0.09867466986179352, |
|
"rewards/rejected": -0.22205229103565216, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_logits/chosen": 0.5482152104377747, |
|
"eval_logits/rejected": 0.6045916676521301, |
|
"eval_logps/chosen": -268.51605224609375, |
|
"eval_logps/rejected": -254.28311157226562, |
|
"eval_loss": 2036.3372802734375, |
|
"eval_rewards/accuracies": 0.6894999742507935, |
|
"eval_rewards/chosen": -0.11898616701364517, |
|
"eval_rewards/margins": 0.0883559137582779, |
|
"eval_rewards/rejected": -0.20734207332134247, |
|
"eval_runtime": 416.6781, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8034436632587394e-06, |
|
"logits/chosen": 0.5728852152824402, |
|
"logits/rejected": 0.6265703439712524, |
|
"logps/chosen": -237.0697784423828, |
|
"logps/rejected": -242.29367065429688, |
|
"loss": 1969.1203, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10528033971786499, |
|
"rewards/margins": 0.0880698561668396, |
|
"rewards/rejected": -0.1933501809835434, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": 0.5749053359031677, |
|
"logits/rejected": 0.6042163372039795, |
|
"logps/chosen": -278.8114013671875, |
|
"logps/rejected": -279.18695068359375, |
|
"loss": 1906.6891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1059660091996193, |
|
"rewards/margins": 0.10437663644552231, |
|
"rewards/rejected": -0.210342675447464, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.7596824432486537e-06, |
|
"logits/chosen": 0.5984959602355957, |
|
"logits/rejected": 0.6386197209358215, |
|
"logps/chosen": -292.53143310546875, |
|
"logps/rejected": -256.42620849609375, |
|
"loss": 2003.0641, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10370471328496933, |
|
"rewards/margins": 0.09288345277309418, |
|
"rewards/rejected": -0.1965881586074829, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": 0.5537322163581848, |
|
"logits/rejected": 0.5942158102989197, |
|
"logps/chosen": -246.1141815185547, |
|
"logps/rejected": -253.85617065429688, |
|
"loss": 2137.652, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13521219789981842, |
|
"rewards/margins": 0.0802813172340393, |
|
"rewards/rejected": -0.21549351513385773, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7161684832886893e-06, |
|
"logits/chosen": 0.5406220555305481, |
|
"logits/rejected": 0.540827751159668, |
|
"logps/chosen": -242.9103546142578, |
|
"logps/rejected": -247.41921997070312, |
|
"loss": 2005.6266, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12078257650136948, |
|
"rewards/margins": 0.09046939015388489, |
|
"rewards/rejected": -0.21125197410583496, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": 0.5730911493301392, |
|
"logits/rejected": 0.5966663956642151, |
|
"logps/chosen": -276.0887145996094, |
|
"logps/rejected": -264.39910888671875, |
|
"loss": 2211.1135, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12395117431879044, |
|
"rewards/margins": 0.07060922682285309, |
|
"rewards/rejected": -0.19456037878990173, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.6729163166879964e-06, |
|
"logits/chosen": 0.5936635136604309, |
|
"logits/rejected": 0.6355383396148682, |
|
"logps/chosen": -258.3261413574219, |
|
"logps/rejected": -233.99075317382812, |
|
"loss": 1757.0482, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1076178103685379, |
|
"rewards/margins": 0.11206640303134918, |
|
"rewards/rejected": -0.21968421339988708, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": 0.6025252342224121, |
|
"logits/rejected": 0.6687902808189392, |
|
"logps/chosen": -254.2070770263672, |
|
"logps/rejected": -237.63967895507812, |
|
"loss": 2055.3113, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1099240779876709, |
|
"rewards/margins": 0.09202177077531815, |
|
"rewards/rejected": -0.20194585621356964, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.629940389318867e-06, |
|
"logits/chosen": 0.5291022062301636, |
|
"logits/rejected": 0.616036593914032, |
|
"logps/chosen": -294.7336730957031, |
|
"logps/rejected": -240.91796875, |
|
"loss": 1902.4217, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10780209302902222, |
|
"rewards/margins": 0.10337891429662704, |
|
"rewards/rejected": -0.21118099987506866, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": 0.5608310103416443, |
|
"logits/rejected": 0.6271076798439026, |
|
"logps/chosen": -256.45770263671875, |
|
"logps/rejected": -253.8623809814453, |
|
"loss": 1907.3463, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10689397901296616, |
|
"rewards/margins": 0.10536620765924454, |
|
"rewards/rejected": -0.2122601717710495, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_logits/chosen": 0.5460030436515808, |
|
"eval_logits/rejected": 0.6022311449050903, |
|
"eval_logps/chosen": -268.7764587402344, |
|
"eval_logps/rejected": -254.62974548339844, |
|
"eval_loss": 2034.7010498046875, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": -0.12159038335084915, |
|
"eval_rewards/margins": 0.08921793848276138, |
|
"eval_rewards/rejected": -0.21080833673477173, |
|
"eval_runtime": 416.6626, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.587255054791937e-06, |
|
"logits/chosen": 0.5321905016899109, |
|
"logits/rejected": 0.589474081993103, |
|
"logps/chosen": -281.2105407714844, |
|
"logps/rejected": -264.56298828125, |
|
"loss": 2016.9854, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10593251138925552, |
|
"rewards/margins": 0.08659417182207108, |
|
"rewards/rejected": -0.1925266534090042, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": 0.5880864262580872, |
|
"logits/rejected": 0.6149991750717163, |
|
"logps/chosen": -251.75973510742188, |
|
"logps/rejected": -262.3134765625, |
|
"loss": 2130.8975, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12219718843698502, |
|
"rewards/margins": 0.08161304891109467, |
|
"rewards/rejected": -0.2038102149963379, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5448745696621915e-06, |
|
"logits/chosen": 0.5654980540275574, |
|
"logits/rejected": 0.6478559970855713, |
|
"logps/chosen": -272.79864501953125, |
|
"logps/rejected": -258.56402587890625, |
|
"loss": 2114.0654, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11762849986553192, |
|
"rewards/margins": 0.08065593242645264, |
|
"rewards/rejected": -0.19828443229198456, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": 0.5662246942520142, |
|
"logits/rejected": 0.6270356178283691, |
|
"logps/chosen": -272.17449951171875, |
|
"logps/rejected": -237.2474365234375, |
|
"loss": 2106.9717, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11701546609401703, |
|
"rewards/margins": 0.0775529146194458, |
|
"rewards/rejected": -0.19456836581230164, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.5028130886673936e-06, |
|
"logits/chosen": 0.5928006172180176, |
|
"logits/rejected": 0.641442060470581, |
|
"logps/chosen": -263.0660705566406, |
|
"logps/rejected": -252.6539306640625, |
|
"loss": 2000.1971, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.111760713160038, |
|
"rewards/margins": 0.0842764601111412, |
|
"rewards/rejected": -0.1960371732711792, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": 0.5778559446334839, |
|
"logits/rejected": 0.6045337915420532, |
|
"logps/chosen": -284.07061767578125, |
|
"logps/rejected": -270.4360656738281, |
|
"loss": 1938.5168, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12265495210886002, |
|
"rewards/margins": 0.09922391176223755, |
|
"rewards/rejected": -0.22187885642051697, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.4610846600005164e-06, |
|
"logits/chosen": 0.6385133862495422, |
|
"logits/rejected": 0.6164069175720215, |
|
"logps/chosen": -291.48590087890625, |
|
"logps/rejected": -241.2799072265625, |
|
"loss": 2040.5221, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11285148561000824, |
|
"rewards/margins": 0.08863021433353424, |
|
"rewards/rejected": -0.20148172974586487, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": 0.6022018194198608, |
|
"logits/rejected": 0.6375949382781982, |
|
"logps/chosen": -250.8367156982422, |
|
"logps/rejected": -242.37881469726562, |
|
"loss": 2072.085, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11340691894292831, |
|
"rewards/margins": 0.07300657033920288, |
|
"rewards/rejected": -0.1864134818315506, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.4197032206177618e-06, |
|
"logits/chosen": 0.6561594605445862, |
|
"logits/rejected": 0.7297431230545044, |
|
"logps/chosen": -249.74887084960938, |
|
"logps/rejected": -237.26779174804688, |
|
"loss": 2067.224, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12197699397802353, |
|
"rewards/margins": 0.08562152087688446, |
|
"rewards/rejected": -0.2075985223054886, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": 0.6001744270324707, |
|
"logits/rejected": 0.6553865075111389, |
|
"logps/chosen": -252.2941436767578, |
|
"logps/rejected": -243.48025512695312, |
|
"loss": 1884.6086, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.11671899259090424, |
|
"rewards/margins": 0.10958864539861679, |
|
"rewards/rejected": -0.22630766034126282, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_logits/chosen": 0.545119047164917, |
|
"eval_logits/rejected": 0.601308286190033, |
|
"eval_logps/chosen": -268.77081298828125, |
|
"eval_logps/rejected": -254.6013946533203, |
|
"eval_loss": 2033.7977294921875, |
|
"eval_rewards/accuracies": 0.6909999847412109, |
|
"eval_rewards/chosen": -0.12153391540050507, |
|
"eval_rewards/margins": 0.08899100124835968, |
|
"eval_rewards/rejected": -0.21052493155002594, |
|
"eval_runtime": 416.4661, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.3786825915837299e-06, |
|
"logits/chosen": 0.6044927835464478, |
|
"logits/rejected": 0.608493447303772, |
|
"logps/chosen": -268.0179138183594, |
|
"logps/rejected": -251.26168823242188, |
|
"loss": 1763.491, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.08965936303138733, |
|
"rewards/margins": 0.12351739406585693, |
|
"rewards/rejected": -0.21317675709724426, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": 0.5879210233688354, |
|
"logits/rejected": 0.6254302263259888, |
|
"logps/chosen": -268.9471740722656, |
|
"logps/rejected": -246.9387664794922, |
|
"loss": 1806.3043, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.09920786321163177, |
|
"rewards/margins": 0.1189170852303505, |
|
"rewards/rejected": -0.21812494099140167, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.3380364734552935e-06, |
|
"logits/chosen": 0.6040158867835999, |
|
"logits/rejected": 0.6454821825027466, |
|
"logps/chosen": -239.55313110351562, |
|
"logps/rejected": -252.41641235351562, |
|
"loss": 1881.491, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11133377254009247, |
|
"rewards/margins": 0.10438641160726547, |
|
"rewards/rejected": -0.21572017669677734, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": 0.5422452688217163, |
|
"logits/rejected": 0.5576962232589722, |
|
"logps/chosen": -267.02020263671875, |
|
"logps/rejected": -233.59628295898438, |
|
"loss": 1922.1236, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1054287701845169, |
|
"rewards/margins": 0.10195982456207275, |
|
"rewards/rejected": -0.20738859474658966, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.2977784417057262e-06, |
|
"logits/chosen": 0.5648713111877441, |
|
"logits/rejected": 0.5970919132232666, |
|
"logps/chosen": -266.724365234375, |
|
"logps/rejected": -252.4540557861328, |
|
"loss": 1788.8666, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11531468480825424, |
|
"rewards/margins": 0.11205389350652695, |
|
"rewards/rejected": -0.22736859321594238, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": 0.5807424783706665, |
|
"logits/rejected": 0.64664626121521, |
|
"logps/chosen": -253.5209197998047, |
|
"logps/rejected": -251.5238800048828, |
|
"loss": 2035.5062, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12570129334926605, |
|
"rewards/margins": 0.09053059667348862, |
|
"rewards/rejected": -0.21623189747333527, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.2579219421906049e-06, |
|
"logits/chosen": 0.612740159034729, |
|
"logits/rejected": 0.6295909285545349, |
|
"logps/chosen": -275.30938720703125, |
|
"logps/rejected": -246.85986328125, |
|
"loss": 1918.4975, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11256787925958633, |
|
"rewards/margins": 0.1016065701842308, |
|
"rewards/rejected": -0.21417441964149475, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": 0.5338586568832397, |
|
"logits/rejected": 0.614523708820343, |
|
"logps/chosen": -248.32406616210938, |
|
"logps/rejected": -234.80862426757812, |
|
"loss": 2030.9229, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12608769536018372, |
|
"rewards/margins": 0.09196772426366806, |
|
"rewards/rejected": -0.21805541217327118, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.2184802866569991e-06, |
|
"logits/chosen": 0.5740771889686584, |
|
"logits/rejected": 0.5626708269119263, |
|
"logps/chosen": -256.43524169921875, |
|
"logps/rejected": -254.8041534423828, |
|
"loss": 1881.1102, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12152848392724991, |
|
"rewards/margins": 0.1093037948012352, |
|
"rewards/rejected": -0.2308322638273239, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": 0.5784533023834229, |
|
"logits/rejected": 0.5800845623016357, |
|
"logps/chosen": -261.47900390625, |
|
"logps/rejected": -261.27618408203125, |
|
"loss": 2034.9129, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12217319011688232, |
|
"rewards/margins": 0.08460931479930878, |
|
"rewards/rejected": -0.2067825049161911, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_logits/chosen": 0.5425635576248169, |
|
"eval_logits/rejected": 0.5986801385879517, |
|
"eval_logps/chosen": -268.96331787109375, |
|
"eval_logps/rejected": -254.94712829589844, |
|
"eval_loss": 2032.544677734375, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -0.12345867604017258, |
|
"eval_rewards/margins": 0.0905236080288887, |
|
"eval_rewards/rejected": -0.21398229897022247, |
|
"eval_runtime": 416.8138, |
|
"eval_samples_per_second": 4.798, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1794666482974617e-06, |
|
"logits/chosen": 0.5704789161682129, |
|
"logits/rejected": 0.6782268285751343, |
|
"logps/chosen": -282.65875244140625, |
|
"logps/rejected": -257.2977600097656, |
|
"loss": 1989.7289, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11434787511825562, |
|
"rewards/margins": 0.0926680713891983, |
|
"rewards/rejected": -0.20701594650745392, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": 0.5337072014808655, |
|
"logits/rejected": 0.5367878675460815, |
|
"logps/chosen": -264.4056701660156, |
|
"logps/rejected": -245.18301391601562, |
|
"loss": 1913.7168, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.10953982174396515, |
|
"rewards/margins": 0.10163428634405136, |
|
"rewards/rejected": -0.2111741304397583, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1408940573502838e-06, |
|
"logits/chosen": 0.5485426783561707, |
|
"logits/rejected": 0.6499109864234924, |
|
"logps/chosen": -264.18505859375, |
|
"logps/rejected": -238.2395477294922, |
|
"loss": 1907.4213, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12582936882972717, |
|
"rewards/margins": 0.09968879073858261, |
|
"rewards/rejected": -0.22551818192005157, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": 0.5939881205558777, |
|
"logits/rejected": 0.6111994981765747, |
|
"logps/chosen": -261.0160827636719, |
|
"logps/rejected": -244.2744140625, |
|
"loss": 2013.2748, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.12591084837913513, |
|
"rewards/margins": 0.08752218633890152, |
|
"rewards/rejected": -0.21343302726745605, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1027753967475046e-06, |
|
"logits/chosen": 0.5890164375305176, |
|
"logits/rejected": 0.6029259562492371, |
|
"logps/chosen": -259.98382568359375, |
|
"logps/rejected": -254.6918487548828, |
|
"loss": 1947.2754, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1243789941072464, |
|
"rewards/margins": 0.09525910019874573, |
|
"rewards/rejected": -0.21963807940483093, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": 0.5902668833732605, |
|
"logits/rejected": 0.6080381274223328, |
|
"logps/chosen": -248.3684844970703, |
|
"logps/rejected": -263.76776123046875, |
|
"loss": 2085.1875, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.13539178669452667, |
|
"rewards/margins": 0.07983563095331192, |
|
"rewards/rejected": -0.215227410197258, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0651233978121145e-06, |
|
"logits/chosen": 0.5521366596221924, |
|
"logits/rejected": 0.5906900763511658, |
|
"logps/chosen": -300.05230712890625, |
|
"logps/rejected": -272.1240234375, |
|
"loss": 1883.4229, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11396439373493195, |
|
"rewards/margins": 0.09765832126140594, |
|
"rewards/rejected": -0.2116227149963379, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": 0.6347781419754028, |
|
"logits/rejected": 0.6657929420471191, |
|
"logps/chosen": -250.8879852294922, |
|
"logps/rejected": -223.33255004882812, |
|
"loss": 1819.1854, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11131460964679718, |
|
"rewards/margins": 0.11518070846796036, |
|
"rewards/rejected": -0.22649531066417694, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0279506360059005e-06, |
|
"logits/chosen": 0.5551185011863708, |
|
"logits/rejected": 0.5792326331138611, |
|
"logps/chosen": -262.4249572753906, |
|
"logps/rejected": -267.63763427734375, |
|
"loss": 2209.3523, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12959793210029602, |
|
"rewards/margins": 0.06600113213062286, |
|
"rewards/rejected": -0.19559906423091888, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": 0.6019959449768066, |
|
"logits/rejected": 0.6074908971786499, |
|
"logps/chosen": -273.5536193847656, |
|
"logps/rejected": -267.5385437011719, |
|
"loss": 2068.2822, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.13250732421875, |
|
"rewards/margins": 0.09504042565822601, |
|
"rewards/rejected": -0.2275477647781372, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_logits/chosen": 0.5382584929466248, |
|
"eval_logits/rejected": 0.5942660570144653, |
|
"eval_logps/chosen": -269.1269836425781, |
|
"eval_logps/rejected": -255.16705322265625, |
|
"eval_loss": 2030.8697509765625, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -0.1250954419374466, |
|
"eval_rewards/margins": 0.09108588099479675, |
|
"eval_rewards/rejected": -0.21618132293224335, |
|
"eval_runtime": 416.6389, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.912695267293383e-07, |
|
"logits/chosen": 0.5214653015136719, |
|
"logits/rejected": 0.5876752734184265, |
|
"logps/chosen": -265.47882080078125, |
|
"logps/rejected": -239.1663360595703, |
|
"loss": 1950.4564, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10924456268548965, |
|
"rewards/margins": 0.09817437827587128, |
|
"rewards/rejected": -0.20741891860961914, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": 0.6136573553085327, |
|
"logits/rejected": 0.6188865900039673, |
|
"logps/chosen": -284.9826965332031, |
|
"logps/rejected": -261.3671875, |
|
"loss": 2210.9309, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.12047699838876724, |
|
"rewards/margins": 0.06873573362827301, |
|
"rewards/rejected": -0.18921272456645966, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.550923211749557e-07, |
|
"logits/chosen": 0.5326896905899048, |
|
"logits/rejected": 0.5845073461532593, |
|
"logps/chosen": -260.52069091796875, |
|
"logps/rejected": -268.2504577636719, |
|
"loss": 2028.4584, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11890892684459686, |
|
"rewards/margins": 0.08525559306144714, |
|
"rewards/rejected": -0.2041645348072052, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": 0.6162235736846924, |
|
"logits/rejected": 0.5728213787078857, |
|
"logps/chosen": -272.336181640625, |
|
"logps/rejected": -252.33346557617188, |
|
"loss": 1941.8066, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10710117965936661, |
|
"rewards/margins": 0.10685823112726212, |
|
"rewards/rejected": -0.21395941078662872, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.194311022355279e-07, |
|
"logits/chosen": 0.5015624761581421, |
|
"logits/rejected": 0.5448901057243347, |
|
"logps/chosen": -276.95538330078125, |
|
"logps/rejected": -250.8726348876953, |
|
"loss": 1832.3256, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.10919035971164703, |
|
"rewards/margins": 0.11640901863574982, |
|
"rewards/rejected": -0.22559937834739685, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": 0.5708821415901184, |
|
"logits/rejected": 0.558485209941864, |
|
"logps/chosen": -258.96807861328125, |
|
"logps/rejected": -263.4228210449219, |
|
"loss": 1947.8791, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11477123200893402, |
|
"rewards/margins": 0.09229175001382828, |
|
"rewards/rejected": -0.2070629894733429, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.842977804684938e-07, |
|
"logits/chosen": 0.5845485925674438, |
|
"logits/rejected": 0.6778086423873901, |
|
"logps/chosen": -245.46102905273438, |
|
"logps/rejected": -233.6326141357422, |
|
"loss": 2066.3828, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12042129039764404, |
|
"rewards/margins": 0.08121231943368912, |
|
"rewards/rejected": -0.20163361728191376, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": 0.5501264929771423, |
|
"logits/rejected": 0.6287878751754761, |
|
"logps/chosen": -281.37371826171875, |
|
"logps/rejected": -252.8254852294922, |
|
"loss": 1868.0959, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1148761659860611, |
|
"rewards/margins": 0.1036820039153099, |
|
"rewards/rejected": -0.2185581624507904, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.497040901179232e-07, |
|
"logits/chosen": 0.5025564432144165, |
|
"logits/rejected": 0.5421415567398071, |
|
"logps/chosen": -276.861572265625, |
|
"logps/rejected": -267.47723388671875, |
|
"loss": 1753.2416, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10830195993185043, |
|
"rewards/margins": 0.12366169691085815, |
|
"rewards/rejected": -0.2319636344909668, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": 0.6039875745773315, |
|
"logits/rejected": 0.6563787460327148, |
|
"logps/chosen": -282.04266357421875, |
|
"logps/rejected": -257.56622314453125, |
|
"loss": 1977.4029, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12028930336236954, |
|
"rewards/margins": 0.09056351333856583, |
|
"rewards/rejected": -0.21085281670093536, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_logits/chosen": 0.5381389260292053, |
|
"eval_logits/rejected": 0.5940784811973572, |
|
"eval_logps/chosen": -269.125244140625, |
|
"eval_logps/rejected": -255.16897583007812, |
|
"eval_loss": 2030.603271484375, |
|
"eval_rewards/accuracies": 0.6894999742507935, |
|
"eval_rewards/chosen": -0.12507818639278412, |
|
"eval_rewards/margins": 0.09112255275249481, |
|
"eval_rewards/rejected": -0.21620073914527893, |
|
"eval_runtime": 416.5425, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 1.2, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.156615851953798e-07, |
|
"logits/chosen": 0.559486448764801, |
|
"logits/rejected": 0.5794366598129272, |
|
"logps/chosen": -256.5633239746094, |
|
"logps/rejected": -259.57696533203125, |
|
"loss": 1948.7941, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.10746540874242783, |
|
"rewards/margins": 0.10354423522949219, |
|
"rewards/rejected": -0.2110096514225006, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": 0.6247807741165161, |
|
"logits/rejected": 0.6010321974754333, |
|
"logps/chosen": -271.8721923828125, |
|
"logps/rejected": -256.86480712890625, |
|
"loss": 2099.2482, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12578730285167694, |
|
"rewards/margins": 0.084382563829422, |
|
"rewards/rejected": -0.21016988158226013, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.821816356209677e-07, |
|
"logits/chosen": 0.5775936841964722, |
|
"logits/rejected": 0.6070097088813782, |
|
"logps/chosen": -272.50653076171875, |
|
"logps/rejected": -251.46243286132812, |
|
"loss": 2020.2645, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10367625951766968, |
|
"rewards/margins": 0.08927679061889648, |
|
"rewards/rejected": -0.19295303523540497, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": 0.521769642829895, |
|
"logits/rejected": 0.5275167226791382, |
|
"logps/chosen": -289.0915222167969, |
|
"logps/rejected": -263.7314453125, |
|
"loss": 1947.6189, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12388887256383896, |
|
"rewards/margins": 0.1033380776643753, |
|
"rewards/rejected": -0.22722692787647247, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.492754234258794e-07, |
|
"logits/chosen": 0.5926128625869751, |
|
"logits/rejected": 0.6193209886550903, |
|
"logps/chosen": -241.3407440185547, |
|
"logps/rejected": -225.17724609375, |
|
"loss": 1876.9375, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10547138750553131, |
|
"rewards/margins": 0.10478665679693222, |
|
"rewards/rejected": -0.21025805175304413, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": 0.5721119046211243, |
|
"logits/rejected": 0.5718821287155151, |
|
"logps/chosen": -267.39471435546875, |
|
"logps/rejected": -244.5928192138672, |
|
"loss": 2126.3432, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.1287916600704193, |
|
"rewards/margins": 0.07899868488311768, |
|
"rewards/rejected": -0.2077903300523758, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.169539390176769e-07, |
|
"logits/chosen": 0.5741583704948425, |
|
"logits/rejected": 0.5660156011581421, |
|
"logps/chosen": -219.59640502929688, |
|
"logps/rejected": -233.7797088623047, |
|
"loss": 1845.3854, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.12960752844810486, |
|
"rewards/margins": 0.10263533890247345, |
|
"rewards/rejected": -0.23224285244941711, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": 0.5922077298164368, |
|
"logits/rejected": 0.6388793587684631, |
|
"logps/chosen": -258.38946533203125, |
|
"logps/rejected": -236.8799285888672, |
|
"loss": 1677.0656, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.11209128051996231, |
|
"rewards/margins": 0.13626167178153992, |
|
"rewards/rejected": -0.24835292994976044, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.852279775095976e-07, |
|
"logits/chosen": 0.6180992722511292, |
|
"logits/rejected": 0.6189014911651611, |
|
"logps/chosen": -272.6584167480469, |
|
"logps/rejected": -247.75033569335938, |
|
"loss": 1925.8682, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11567474901676178, |
|
"rewards/margins": 0.09763548523187637, |
|
"rewards/rejected": -0.21331021189689636, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": 0.6014515161514282, |
|
"logits/rejected": 0.5875986814498901, |
|
"logps/chosen": -265.0668029785156, |
|
"logps/rejected": -241.8825225830078, |
|
"loss": 2110.2887, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1249975711107254, |
|
"rewards/margins": 0.07782919704914093, |
|
"rewards/rejected": -0.20282676815986633, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_logits/chosen": 0.5348395109176636, |
|
"eval_logits/rejected": 0.5908406972885132, |
|
"eval_logps/chosen": -269.2049865722656, |
|
"eval_logps/rejected": -255.2820587158203, |
|
"eval_loss": 2030.5706787109375, |
|
"eval_rewards/accuracies": 0.690500020980835, |
|
"eval_rewards/chosen": -0.12587547302246094, |
|
"eval_rewards/margins": 0.09145611524581909, |
|
"eval_rewards/rejected": -0.21733158826828003, |
|
"eval_runtime": 416.6652, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.541081351150638e-07, |
|
"logits/chosen": 0.5409640669822693, |
|
"logits/rejected": 0.5331202149391174, |
|
"logps/chosen": -279.83941650390625, |
|
"logps/rejected": -291.9646301269531, |
|
"loss": 2035.416, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.10843896865844727, |
|
"rewards/margins": 0.09788022935390472, |
|
"rewards/rejected": -0.2063191831111908, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": 0.5321037769317627, |
|
"logits/rejected": 0.5582699775695801, |
|
"logps/chosen": -267.9813232421875, |
|
"logps/rejected": -264.12567138671875, |
|
"loss": 2006.4391, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10174393653869629, |
|
"rewards/margins": 0.08532971143722534, |
|
"rewards/rejected": -0.18707364797592163, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.2360480560864e-07, |
|
"logits/chosen": 0.5698617696762085, |
|
"logits/rejected": 0.5839768648147583, |
|
"logps/chosen": -251.5703125, |
|
"logps/rejected": -235.4780731201172, |
|
"loss": 1821.7498, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10450971126556396, |
|
"rewards/margins": 0.10978861898183823, |
|
"rewards/rejected": -0.2142982929944992, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": 0.5382856726646423, |
|
"logits/rejected": 0.6018794178962708, |
|
"logps/chosen": -304.603271484375, |
|
"logps/rejected": -263.90570068359375, |
|
"loss": 1820.3352, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10189330577850342, |
|
"rewards/margins": 0.12546256184577942, |
|
"rewards/rejected": -0.22735583782196045, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.937281768545919e-07, |
|
"logits/chosen": 0.600039005279541, |
|
"logits/rejected": 0.5895189046859741, |
|
"logps/chosen": -288.302734375, |
|
"logps/rejected": -266.3108215332031, |
|
"loss": 2142.2947, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1374974250793457, |
|
"rewards/margins": 0.08518020063638687, |
|
"rewards/rejected": -0.22267761826515198, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": 0.577072024345398, |
|
"logits/rejected": 0.5782276391983032, |
|
"logps/chosen": -258.71734619140625, |
|
"logps/rejected": -252.88906860351562, |
|
"loss": 2083.8391, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.13044390082359314, |
|
"rewards/margins": 0.09016549587249756, |
|
"rewards/rejected": -0.22060942649841309, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.644882274042285e-07, |
|
"logits/chosen": 0.5784581303596497, |
|
"logits/rejected": 0.5805580019950867, |
|
"logps/chosen": -286.2455139160156, |
|
"logps/rejected": -253.4708251953125, |
|
"loss": 1979.6775, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11061519384384155, |
|
"rewards/margins": 0.10562906414270401, |
|
"rewards/rejected": -0.21624425053596497, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": 0.5630078911781311, |
|
"logits/rejected": 0.5682691335678101, |
|
"logps/chosen": -267.6597900390625, |
|
"logps/rejected": -250.3827667236328, |
|
"loss": 1964.8426, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1262790709733963, |
|
"rewards/margins": 0.09482350945472717, |
|
"rewards/rejected": -0.22110256552696228, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.358947231631375e-07, |
|
"logits/chosen": 0.534908652305603, |
|
"logits/rejected": 0.5746644139289856, |
|
"logps/chosen": -283.8402404785156, |
|
"logps/rejected": -272.68670654296875, |
|
"loss": 1792.1418, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0998634472489357, |
|
"rewards/margins": 0.1285194605588913, |
|
"rewards/rejected": -0.2283829152584076, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": 0.5141295194625854, |
|
"logits/rejected": 0.5280352234840393, |
|
"logps/chosen": -262.8772888183594, |
|
"logps/rejected": -246.84671020507812, |
|
"loss": 2068.2863, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.10809364169836044, |
|
"rewards/margins": 0.08412571996450424, |
|
"rewards/rejected": -0.19221936166286469, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_logits/chosen": 0.5356869697570801, |
|
"eval_logits/rejected": 0.5913118720054626, |
|
"eval_logps/chosen": -269.03900146484375, |
|
"eval_logps/rejected": -255.10865783691406, |
|
"eval_loss": 2029.4173583984375, |
|
"eval_rewards/accuracies": 0.6934999823570251, |
|
"eval_rewards/chosen": -0.12421557307243347, |
|
"eval_rewards/margins": 0.09138190746307373, |
|
"eval_rewards/rejected": -0.2155974805355072, |
|
"eval_runtime": 416.645, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.07957214129464e-07, |
|
"logits/chosen": 0.6343733072280884, |
|
"logits/rejected": 0.6377061605453491, |
|
"logps/chosen": -230.1392059326172, |
|
"logps/rejected": -217.2322540283203, |
|
"loss": 2110.5152, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.14483976364135742, |
|
"rewards/margins": 0.082811638712883, |
|
"rewards/rejected": -0.22765140235424042, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": 0.5317670702934265, |
|
"logits/rejected": 0.5477628707885742, |
|
"logps/chosen": -253.5054931640625, |
|
"logps/rejected": -295.16021728515625, |
|
"loss": 2379.184, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.13306215405464172, |
|
"rewards/margins": 0.05652584508061409, |
|
"rewards/rejected": -0.18958799540996552, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.806850312042782e-07, |
|
"logits/chosen": 0.6451593637466431, |
|
"logits/rejected": 0.5899637937545776, |
|
"logps/chosen": -289.49151611328125, |
|
"logps/rejected": -257.98443603515625, |
|
"loss": 1992.5604, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12486691772937775, |
|
"rewards/margins": 0.09148009121417999, |
|
"rewards/rejected": -0.21634697914123535, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": 0.5886529684066772, |
|
"logits/rejected": 0.5409609079360962, |
|
"logps/chosen": -258.1368103027344, |
|
"logps/rejected": -249.2908935546875, |
|
"loss": 1995.9428, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11228666454553604, |
|
"rewards/margins": 0.08399216085672379, |
|
"rewards/rejected": -0.19627881050109863, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.540872830751386e-07, |
|
"logits/chosen": 0.5374349355697632, |
|
"logits/rejected": 0.5601732134819031, |
|
"logps/chosen": -266.9260559082031, |
|
"logps/rejected": -266.8581237792969, |
|
"loss": 2206.0141, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12373526394367218, |
|
"rewards/margins": 0.060657333582639694, |
|
"rewards/rejected": -0.18439260125160217, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": 0.5198964476585388, |
|
"logits/rejected": 0.5673514008522034, |
|
"logps/chosen": -250.6959228515625, |
|
"logps/rejected": -233.34774780273438, |
|
"loss": 1759.9059, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11838851869106293, |
|
"rewards/margins": 0.12481342256069183, |
|
"rewards/rejected": -0.24320194125175476, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.281728531738563e-07, |
|
"logits/chosen": 0.597510814666748, |
|
"logits/rejected": 0.6312834620475769, |
|
"logps/chosen": -268.5950012207031, |
|
"logps/rejected": -250.58059692382812, |
|
"loss": 1960.2506, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11448697745800018, |
|
"rewards/margins": 0.09015806764364243, |
|
"rewards/rejected": -0.204645037651062, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": 0.507027268409729, |
|
"logits/rejected": 0.5732488632202148, |
|
"logps/chosen": -280.98382568359375, |
|
"logps/rejected": -286.677734375, |
|
"loss": 1965.5633, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12583956122398376, |
|
"rewards/margins": 0.10458560287952423, |
|
"rewards/rejected": -0.230425164103508, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.029503967095097e-07, |
|
"logits/chosen": 0.4729984402656555, |
|
"logits/rejected": 0.5827825665473938, |
|
"logps/chosen": -268.3514709472656, |
|
"logps/rejected": -247.7762451171875, |
|
"loss": 1878.176, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1029181256890297, |
|
"rewards/margins": 0.09399056434631348, |
|
"rewards/rejected": -0.19690869748592377, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": 0.5795052647590637, |
|
"logits/rejected": 0.6329609155654907, |
|
"logps/chosen": -262.9614562988281, |
|
"logps/rejected": -240.7771453857422, |
|
"loss": 1977.8852, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1271854192018509, |
|
"rewards/margins": 0.08980287611484528, |
|
"rewards/rejected": -0.21698825061321259, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": 0.5363709926605225, |
|
"eval_logits/rejected": 0.5920352935791016, |
|
"eval_logps/chosen": -269.10711669921875, |
|
"eval_logps/rejected": -255.2016143798828, |
|
"eval_loss": 2026.12890625, |
|
"eval_rewards/accuracies": 0.6959999799728394, |
|
"eval_rewards/chosen": -0.1248970478773117, |
|
"eval_rewards/margins": 0.09163003414869308, |
|
"eval_rewards/rejected": -0.21652711927890778, |
|
"eval_runtime": 416.4729, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.784283377776651e-07, |
|
"logits/chosen": 0.6236351728439331, |
|
"logits/rejected": 0.630204975605011, |
|
"logps/chosen": -267.3162536621094, |
|
"logps/rejected": -241.71484375, |
|
"loss": 2151.366, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.13855421543121338, |
|
"rewards/margins": 0.07831588387489319, |
|
"rewards/rejected": -0.21687009930610657, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": 0.586463451385498, |
|
"logits/rejected": 0.6219819784164429, |
|
"logps/chosen": -240.7449493408203, |
|
"logps/rejected": -260.012451171875, |
|
"loss": 2145.4057, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.12818796932697296, |
|
"rewards/margins": 0.07563059777021408, |
|
"rewards/rejected": -0.20381855964660645, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.5461486654680746e-07, |
|
"logits/chosen": 0.5574949979782104, |
|
"logits/rejected": 0.6360602378845215, |
|
"logps/chosen": -262.3418884277344, |
|
"logps/rejected": -256.82366943359375, |
|
"loss": 2092.1422, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10938684642314911, |
|
"rewards/margins": 0.08223484456539154, |
|
"rewards/rejected": -0.19162169098854065, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": 0.5418500304222107, |
|
"logits/rejected": 0.5625206828117371, |
|
"logps/chosen": -271.43792724609375, |
|
"logps/rejected": -255.0489501953125, |
|
"loss": 2035.8221, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13326099514961243, |
|
"rewards/margins": 0.08890596777200699, |
|
"rewards/rejected": -0.2221669703722, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.315179365228824e-07, |
|
"logits/chosen": 0.5612285733222961, |
|
"logits/rejected": 0.5996168851852417, |
|
"logps/chosen": -284.46612548828125, |
|
"logps/rejected": -261.4803771972656, |
|
"loss": 1981.793, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12592165172100067, |
|
"rewards/margins": 0.0976746454834938, |
|
"rewards/rejected": -0.22359630465507507, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": 0.5869094729423523, |
|
"logits/rejected": 0.5811904668807983, |
|
"logps/chosen": -273.99249267578125, |
|
"logps/rejected": -274.64410400390625, |
|
"loss": 2093.5598, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11422860622406006, |
|
"rewards/margins": 0.09214137494564056, |
|
"rewards/rejected": -0.20636996626853943, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.09145261892895e-07, |
|
"logits/chosen": 0.5232716798782349, |
|
"logits/rejected": 0.6522939801216125, |
|
"logps/chosen": -266.1856384277344, |
|
"logps/rejected": -255.4128875732422, |
|
"loss": 1884.3191, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12048964202404022, |
|
"rewards/margins": 0.1096540093421936, |
|
"rewards/rejected": -0.23014366626739502, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": 0.5434025526046753, |
|
"logits/rejected": 0.5516559481620789, |
|
"logps/chosen": -261.6842346191406, |
|
"logps/rejected": -268.0997619628906, |
|
"loss": 2020.6832, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12252092361450195, |
|
"rewards/margins": 0.08777900040149689, |
|
"rewards/rejected": -0.21029992401599884, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.8750431494843076e-07, |
|
"logits/chosen": 0.5793955326080322, |
|
"logits/rejected": 0.5846759676933289, |
|
"logps/chosen": -256.3296813964844, |
|
"logps/rejected": -258.81512451171875, |
|
"loss": 2103.4049, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13012662529945374, |
|
"rewards/margins": 0.07379056513309479, |
|
"rewards/rejected": -0.20391719043254852, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": 0.5790996551513672, |
|
"logits/rejected": 0.5938167572021484, |
|
"logps/chosen": -229.91567993164062, |
|
"logps/rejected": -236.54916381835938, |
|
"loss": 2123.3787, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12688665091991425, |
|
"rewards/margins": 0.07247930765151978, |
|
"rewards/rejected": -0.19936595857143402, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_logits/chosen": 0.5370410680770874, |
|
"eval_logits/rejected": 0.5926198363304138, |
|
"eval_logps/chosen": -269.0932922363281, |
|
"eval_logps/rejected": -255.16659545898438, |
|
"eval_loss": 2027.355224609375, |
|
"eval_rewards/accuracies": 0.6930000185966492, |
|
"eval_rewards/chosen": -0.12475859373807907, |
|
"eval_rewards/margins": 0.09141821414232254, |
|
"eval_rewards/rejected": -0.2161768227815628, |
|
"eval_runtime": 416.6319, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.666023235899734e-07, |
|
"logits/chosen": 0.5439051389694214, |
|
"logits/rejected": 0.638985276222229, |
|
"logps/chosen": -249.70217895507812, |
|
"logps/rejected": -246.07040405273438, |
|
"loss": 1936.2746, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13094648718833923, |
|
"rewards/margins": 0.10491780191659927, |
|
"rewards/rejected": -0.23586425185203552, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": 0.5696260929107666, |
|
"logits/rejected": 0.6271142959594727, |
|
"logps/chosen": -288.08721923828125, |
|
"logps/rejected": -263.6356201171875, |
|
"loss": 1974.6082, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11936695873737335, |
|
"rewards/margins": 0.10857198387384415, |
|
"rewards/rejected": -0.2279389351606369, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.4644626891284243e-07, |
|
"logits/chosen": 0.5715283155441284, |
|
"logits/rejected": 0.6530539393424988, |
|
"logps/chosen": -245.0167236328125, |
|
"logps/rejected": -238.81460571289062, |
|
"loss": 2066.4543, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12561528384685516, |
|
"rewards/margins": 0.08124671876430511, |
|
"rewards/rejected": -0.20686200261116028, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": 0.5698996186256409, |
|
"logits/rejected": 0.6029760837554932, |
|
"logps/chosen": -264.3577575683594, |
|
"logps/rejected": -271.068115234375, |
|
"loss": 2090.4994, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1360047161579132, |
|
"rewards/margins": 0.08008682727813721, |
|
"rewards/rejected": -0.2160915583372116, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2704288287556718e-07, |
|
"logits/chosen": 0.5687640309333801, |
|
"logits/rejected": 0.5940347909927368, |
|
"logps/chosen": -257.6128845214844, |
|
"logps/rejected": -248.91015625, |
|
"loss": 2136.7539, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12395147979259491, |
|
"rewards/margins": 0.0794539600610733, |
|
"rewards/rejected": -0.2034054547548294, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": 0.5388206839561462, |
|
"logits/rejected": 0.5561047792434692, |
|
"logps/chosen": -261.9334716796875, |
|
"logps/rejected": -260.14556884765625, |
|
"loss": 1989.5404, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1216350793838501, |
|
"rewards/margins": 0.09191958606243134, |
|
"rewards/rejected": -0.21355466544628143, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.083986460514631e-07, |
|
"logits/chosen": 0.5701113343238831, |
|
"logits/rejected": 0.6196510195732117, |
|
"logps/chosen": -251.4109344482422, |
|
"logps/rejected": -252.1616668701172, |
|
"loss": 1820.4641, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12642905116081238, |
|
"rewards/margins": 0.10934920608997345, |
|
"rewards/rejected": -0.23577824234962463, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": 0.5514119863510132, |
|
"logits/rejected": 0.5474542379379272, |
|
"logps/chosen": -256.45831298828125, |
|
"logps/rejected": -227.55514526367188, |
|
"loss": 1845.2707, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10441195964813232, |
|
"rewards/margins": 0.1090712919831276, |
|
"rewards/rejected": -0.21348324418067932, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.9051978546417715e-07, |
|
"logits/chosen": 0.5202070474624634, |
|
"logits/rejected": 0.5690991282463074, |
|
"logps/chosen": -260.46600341796875, |
|
"logps/rejected": -261.4082946777344, |
|
"loss": 1912.7502, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.10940267145633698, |
|
"rewards/margins": 0.10180971771478653, |
|
"rewards/rejected": -0.2112123966217041, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": 0.5479332208633423, |
|
"logits/rejected": 0.5715588331222534, |
|
"logps/chosen": -258.5849609375, |
|
"logps/rejected": -264.29388427734375, |
|
"loss": 1945.4934, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12341777980327606, |
|
"rewards/margins": 0.09188680350780487, |
|
"rewards/rejected": -0.21530456840991974, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_logits/chosen": 0.5352820754051208, |
|
"eval_logits/rejected": 0.5908908247947693, |
|
"eval_logps/chosen": -269.1009826660156, |
|
"eval_logps/rejected": -255.1898651123047, |
|
"eval_loss": 2025.7803955078125, |
|
"eval_rewards/accuracies": 0.6934999823570251, |
|
"eval_rewards/chosen": -0.12483509629964828, |
|
"eval_rewards/margins": 0.09157437831163406, |
|
"eval_rewards/rejected": -0.21640948951244354, |
|
"eval_runtime": 416.572, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 1.2, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.7341227250790989e-07, |
|
"logits/chosen": 0.5836583375930786, |
|
"logits/rejected": 0.632857084274292, |
|
"logps/chosen": -245.8205108642578, |
|
"logps/rejected": -252.48471069335938, |
|
"loss": 1828.1664, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.10635235160589218, |
|
"rewards/margins": 0.11655166000127792, |
|
"rewards/rejected": -0.2229039967060089, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": 0.6221760511398315, |
|
"logits/rejected": 0.5567342042922974, |
|
"logps/chosen": -266.02239990234375, |
|
"logps/rejected": -246.35385131835938, |
|
"loss": 1826.1057, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09994658827781677, |
|
"rewards/margins": 0.1130019798874855, |
|
"rewards/rejected": -0.21294856071472168, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.5708182095301867e-07, |
|
"logits/chosen": 0.6005284190177917, |
|
"logits/rejected": 0.6083909869194031, |
|
"logps/chosen": -280.53741455078125, |
|
"logps/rejected": -261.88201904296875, |
|
"loss": 1851.2512, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11887475103139877, |
|
"rewards/margins": 0.10133900493383408, |
|
"rewards/rejected": -0.22021374106407166, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": 0.5680890083312988, |
|
"logits/rejected": 0.5507141351699829, |
|
"logps/chosen": -271.96990966796875, |
|
"logps/rejected": -241.1654815673828, |
|
"loss": 2041.0072, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1112411841750145, |
|
"rewards/margins": 0.08822907507419586, |
|
"rewards/rejected": -0.19947026669979095, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.4153388503766492e-07, |
|
"logits/chosen": 0.5438860654830933, |
|
"logits/rejected": 0.5644111633300781, |
|
"logps/chosen": -279.3092346191406, |
|
"logps/rejected": -239.37167358398438, |
|
"loss": 1966.0102, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1369594931602478, |
|
"rewards/margins": 0.09016064554452896, |
|
"rewards/rejected": -0.22712013125419617, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": 0.6110261082649231, |
|
"logits/rejected": 0.6002285480499268, |
|
"logps/chosen": -255.5424346923828, |
|
"logps/rejected": -258.6153259277344, |
|
"loss": 1778.4668, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.10746339708566666, |
|
"rewards/margins": 0.12720224261283875, |
|
"rewards/rejected": -0.23466560244560242, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.2677365764614452e-07, |
|
"logits/chosen": 0.6116484999656677, |
|
"logits/rejected": 0.6142521500587463, |
|
"logps/chosen": -251.9376983642578, |
|
"logps/rejected": -247.8961639404297, |
|
"loss": 1943.7922, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12416845560073853, |
|
"rewards/margins": 0.09600269794464111, |
|
"rewards/rejected": -0.22017112374305725, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": 0.5488280057907104, |
|
"logits/rejected": 0.6329927444458008, |
|
"logps/chosen": -261.445068359375, |
|
"logps/rejected": -255.37142944335938, |
|
"loss": 2087.1572, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11578680574893951, |
|
"rewards/margins": 0.08292602747678757, |
|
"rewards/rejected": -0.19871282577514648, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1280606857450387e-07, |
|
"logits/chosen": 0.5712449550628662, |
|
"logits/rejected": 0.6291993856430054, |
|
"logps/chosen": -243.8418731689453, |
|
"logps/rejected": -233.1319122314453, |
|
"loss": 1809.2799, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.11564090102910995, |
|
"rewards/margins": 0.11533119529485703, |
|
"rewards/rejected": -0.23097209632396698, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": 0.5486131906509399, |
|
"logits/rejected": 0.6034047603607178, |
|
"logps/chosen": -253.94577026367188, |
|
"logps/rejected": -243.55593872070312, |
|
"loss": 1937.2627, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10992947965860367, |
|
"rewards/margins": 0.09977956861257553, |
|
"rewards/rejected": -0.2097090482711792, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_logits/chosen": 0.5346845984458923, |
|
"eval_logits/rejected": 0.5903106927871704, |
|
"eval_logps/chosen": -269.0877990722656, |
|
"eval_logps/rejected": -255.17501831054688, |
|
"eval_loss": 2027.823974609375, |
|
"eval_rewards/accuracies": 0.6930000185966492, |
|
"eval_rewards/chosen": -0.12470405548810959, |
|
"eval_rewards/margins": 0.09155706316232681, |
|
"eval_rewards/rejected": -0.2162611186504364, |
|
"eval_runtime": 416.489, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.96357828840297e-08, |
|
"logits/chosen": 0.5791751742362976, |
|
"logits/rejected": 0.6535072326660156, |
|
"logps/chosen": -262.40301513671875, |
|
"logps/rejected": -260.7125549316406, |
|
"loss": 1964.6746, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1026170626282692, |
|
"rewards/margins": 0.08860354125499725, |
|
"rewards/rejected": -0.19122058153152466, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": 0.5727615356445312, |
|
"logits/rejected": 0.5819220542907715, |
|
"logps/chosen": -284.75616455078125, |
|
"logps/rejected": -250.07083129882812, |
|
"loss": 2092.7152, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.12308394908905029, |
|
"rewards/margins": 0.08286546170711517, |
|
"rewards/rejected": -0.20594939589500427, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.726719934315648e-08, |
|
"logits/chosen": 0.5491209626197815, |
|
"logits/rejected": 0.5870348811149597, |
|
"logps/chosen": -249.99295043945312, |
|
"logps/rejected": -249.68679809570312, |
|
"loss": 1912.009, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10659299790859222, |
|
"rewards/margins": 0.09919796884059906, |
|
"rewards/rejected": -0.20579096674919128, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": 0.6029896140098572, |
|
"logits/rejected": 0.6406581997871399, |
|
"logps/chosen": -256.2204284667969, |
|
"logps/rejected": -229.89990234375, |
|
"loss": 1913.6865, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10959267616271973, |
|
"rewards/margins": 0.1080545037984848, |
|
"rewards/rejected": -0.21764719486236572, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.57044489583128e-08, |
|
"logits/chosen": 0.5283448100090027, |
|
"logits/rejected": 0.5756082534790039, |
|
"logps/chosen": -266.2628479003906, |
|
"logps/rejected": -251.70962524414062, |
|
"loss": 2214.9162, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12659896910190582, |
|
"rewards/margins": 0.06956067681312561, |
|
"rewards/rejected": -0.19615966081619263, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": 0.5648905038833618, |
|
"logits/rejected": 0.5851987600326538, |
|
"logps/chosen": -255.75704956054688, |
|
"logps/rejected": -235.11666870117188, |
|
"loss": 1911.8838, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10966980457305908, |
|
"rewards/margins": 0.09904567152261734, |
|
"rewards/rejected": -0.20871546864509583, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.495139359419922e-08, |
|
"logits/chosen": 0.5362564921379089, |
|
"logits/rejected": 0.629612922668457, |
|
"logps/chosen": -303.055419921875, |
|
"logps/rejected": -271.39288330078125, |
|
"loss": 1875.824, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1098160371184349, |
|
"rewards/margins": 0.11708301305770874, |
|
"rewards/rejected": -0.22689905762672424, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": 0.5597736239433289, |
|
"logits/rejected": 0.5716227889060974, |
|
"logps/chosen": -262.5528564453125, |
|
"logps/rejected": -251.4736785888672, |
|
"loss": 1887.5355, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10405333340167999, |
|
"rewards/margins": 0.11675725132226944, |
|
"rewards/rejected": -0.22081057727336884, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.501162468395688e-08, |
|
"logits/chosen": 0.5817372798919678, |
|
"logits/rejected": 0.5784239768981934, |
|
"logps/chosen": -251.4989013671875, |
|
"logps/rejected": -250.26522827148438, |
|
"loss": 1920.6814, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12673336267471313, |
|
"rewards/margins": 0.11053230613470078, |
|
"rewards/rejected": -0.2372656762599945, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": 0.5656172037124634, |
|
"logits/rejected": 0.6273232102394104, |
|
"logps/chosen": -250.90109252929688, |
|
"logps/rejected": -256.27178955078125, |
|
"loss": 2007.2062, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1188703402876854, |
|
"rewards/margins": 0.09515853226184845, |
|
"rewards/rejected": -0.21402888000011444, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_logits/chosen": 0.5352125763893127, |
|
"eval_logits/rejected": 0.5910032391548157, |
|
"eval_logps/chosen": -269.0622863769531, |
|
"eval_logps/rejected": -255.18426513671875, |
|
"eval_loss": 2025.32275390625, |
|
"eval_rewards/accuracies": 0.6894999742507935, |
|
"eval_rewards/chosen": -0.12444862723350525, |
|
"eval_rewards/margins": 0.0919048860669136, |
|
"eval_rewards/rejected": -0.21635350584983826, |
|
"eval_runtime": 416.4513, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 1.201, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.5888462029658186e-08, |
|
"logits/chosen": 0.5575802326202393, |
|
"logits/rejected": 0.5975883603096008, |
|
"logps/chosen": -251.73623657226562, |
|
"logps/rejected": -250.5563201904297, |
|
"loss": 1952.7262, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12859514355659485, |
|
"rewards/margins": 0.0945589691400528, |
|
"rewards/rejected": -0.22315411269664764, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": 0.5034095048904419, |
|
"logits/rejected": 0.5554597973823547, |
|
"logps/chosen": -264.1334533691406, |
|
"logps/rejected": -260.69805908203125, |
|
"loss": 2102.8385, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1385246217250824, |
|
"rewards/margins": 0.08275660127401352, |
|
"rewards/rejected": -0.22128121554851532, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.7584952693519025e-08, |
|
"logits/chosen": 0.5984662175178528, |
|
"logits/rejected": 0.5975054502487183, |
|
"logps/chosen": -270.71661376953125, |
|
"logps/rejected": -260.6340026855469, |
|
"loss": 1962.2367, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11859796196222305, |
|
"rewards/margins": 0.0891033262014389, |
|
"rewards/rejected": -0.20770128071308136, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": 0.5739267468452454, |
|
"logits/rejected": 0.577027440071106, |
|
"logps/chosen": -249.9311065673828, |
|
"logps/rejected": -226.3584442138672, |
|
"loss": 2134.9982, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.13262119889259338, |
|
"rewards/margins": 0.08364128321409225, |
|
"rewards/rejected": -0.21626248955726624, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.0103869980206145e-08, |
|
"logits/chosen": 0.6304140090942383, |
|
"logits/rejected": 0.63347989320755, |
|
"logps/chosen": -239.28915405273438, |
|
"logps/rejected": -258.83660888671875, |
|
"loss": 2049.8613, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.13652729988098145, |
|
"rewards/margins": 0.08251725137233734, |
|
"rewards/rejected": -0.2190445363521576, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": 0.5943226218223572, |
|
"logits/rejected": 0.6503596305847168, |
|
"logps/chosen": -264.0231018066406, |
|
"logps/rejected": -245.2039031982422, |
|
"loss": 2003.8398, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12203504145145416, |
|
"rewards/margins": 0.09293092787265778, |
|
"rewards/rejected": -0.21496596932411194, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.3447712510573928e-08, |
|
"logits/chosen": 0.6132981777191162, |
|
"logits/rejected": 0.6643080115318298, |
|
"logps/chosen": -258.1444091796875, |
|
"logps/rejected": -241.70986938476562, |
|
"loss": 1827.9928, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12529726326465607, |
|
"rewards/margins": 0.11168257147073746, |
|
"rewards/rejected": -0.23697984218597412, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": 0.5302231907844543, |
|
"logits/rejected": 0.5352843999862671, |
|
"logps/chosen": -270.13079833984375, |
|
"logps/rejected": -240.57931518554688, |
|
"loss": 2135.2258, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10986328125, |
|
"rewards/margins": 0.07277282327413559, |
|
"rewards/rejected": -0.182636097073555, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7618703387147495e-08, |
|
"logits/chosen": 0.5543524622917175, |
|
"logits/rejected": 0.5519607663154602, |
|
"logps/chosen": -281.1496276855469, |
|
"logps/rejected": -274.3504943847656, |
|
"loss": 1951.3572, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10969813168048859, |
|
"rewards/margins": 0.09588075429201126, |
|
"rewards/rejected": -0.20557889342308044, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": 0.5777779817581177, |
|
"logits/rejected": 0.6056709289550781, |
|
"logps/chosen": -258.98333740234375, |
|
"logps/rejected": -245.7554473876953, |
|
"loss": 2076.715, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.11231188476085663, |
|
"rewards/margins": 0.09381435066461563, |
|
"rewards/rejected": -0.20612624287605286, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_logits/chosen": 0.5358251333236694, |
|
"eval_logits/rejected": 0.5913307666778564, |
|
"eval_logps/chosen": -269.0487365722656, |
|
"eval_logps/rejected": -255.13833618164062, |
|
"eval_loss": 2027.4857177734375, |
|
"eval_rewards/accuracies": 0.6919999718666077, |
|
"eval_rewards/chosen": -0.12431324273347855, |
|
"eval_rewards/margins": 0.09158134460449219, |
|
"eval_rewards/rejected": -0.21589456498622894, |
|
"eval_runtime": 416.7132, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 1.2, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.2618789451623314e-08, |
|
"logits/chosen": 0.5645478963851929, |
|
"logits/rejected": 0.6109569072723389, |
|
"logps/chosen": -224.93197631835938, |
|
"logps/rejected": -236.52059936523438, |
|
"loss": 2014.5852, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12038487195968628, |
|
"rewards/margins": 0.08836686611175537, |
|
"rewards/rejected": -0.20875172317028046, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": 0.5508732795715332, |
|
"logits/rejected": 0.6115376353263855, |
|
"logps/chosen": -280.9700927734375, |
|
"logps/rejected": -276.9327087402344, |
|
"loss": 2099.5396, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12365047633647919, |
|
"rewards/margins": 0.08925069868564606, |
|
"rewards/rejected": -0.21290118992328644, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.449640634639878e-09, |
|
"logits/chosen": 0.5355272889137268, |
|
"logits/rejected": 0.5811390280723572, |
|
"logps/chosen": -234.78927612304688, |
|
"logps/rejected": -228.4997100830078, |
|
"loss": 2043.5014, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1230451837182045, |
|
"rewards/margins": 0.08070604503154755, |
|
"rewards/rejected": -0.20375123620033264, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": 0.5882354974746704, |
|
"logits/rejected": 0.572884738445282, |
|
"logps/chosen": -238.9161834716797, |
|
"logps/rejected": -247.5298309326172, |
|
"loss": 2155.4453, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.13735826313495636, |
|
"rewards/margins": 0.06890521943569183, |
|
"rewards/rejected": -0.2062634974718094, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.112649398034686e-09, |
|
"logits/chosen": 0.6161108016967773, |
|
"logits/rejected": 0.6904915571212769, |
|
"logps/chosen": -284.59112548828125, |
|
"logps/rejected": -254.5317840576172, |
|
"loss": 2025.8148, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.125542551279068, |
|
"rewards/margins": 0.1034855991601944, |
|
"rewards/rejected": -0.2290281355381012, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": 0.5819270610809326, |
|
"logits/rejected": 0.552914023399353, |
|
"logps/chosen": -255.23886108398438, |
|
"logps/rejected": -249.76144409179688, |
|
"loss": 1779.8414, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11097989976406097, |
|
"rewards/margins": 0.1095919981598854, |
|
"rewards/rejected": -0.22057190537452698, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.6089302697732133e-09, |
|
"logits/chosen": 0.5825963020324707, |
|
"logits/rejected": 0.5435997843742371, |
|
"logps/chosen": -250.9562225341797, |
|
"logps/rejected": -227.83010864257812, |
|
"loss": 1853.0563, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11256451904773712, |
|
"rewards/margins": 0.10355798900127411, |
|
"rewards/rejected": -0.21612253785133362, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": 0.6074197292327881, |
|
"logits/rejected": 0.6451854705810547, |
|
"logps/chosen": -269.6044616699219, |
|
"logps/rejected": -231.6178436279297, |
|
"loss": 1905.3814, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10596567392349243, |
|
"rewards/margins": 0.099820576608181, |
|
"rewards/rejected": -0.20578625798225403, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 9.393194717061127e-10, |
|
"logits/chosen": 0.5966477394104004, |
|
"logits/rejected": 0.57940673828125, |
|
"logps/chosen": -261.906982421875, |
|
"logps/rejected": -243.7214813232422, |
|
"loss": 2099.1896, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12468767166137695, |
|
"rewards/margins": 0.0848483294248581, |
|
"rewards/rejected": -0.20953598618507385, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": 0.5209355354309082, |
|
"logits/rejected": 0.6011817455291748, |
|
"logps/chosen": -257.47882080078125, |
|
"logps/rejected": -253.18017578125, |
|
"loss": 2055.2201, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12113461643457413, |
|
"rewards/margins": 0.09108567237854004, |
|
"rewards/rejected": -0.21222028136253357, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_logits/chosen": 0.5346859693527222, |
|
"eval_logits/rejected": 0.5902337431907654, |
|
"eval_logps/chosen": -269.0542907714844, |
|
"eval_logps/rejected": -255.1454620361328, |
|
"eval_loss": 2027.8082275390625, |
|
"eval_rewards/accuracies": 0.6919999718666077, |
|
"eval_rewards/chosen": -0.12436838448047638, |
|
"eval_rewards/margins": 0.09159712493419647, |
|
"eval_rewards/rejected": -0.21596547961235046, |
|
"eval_runtime": 416.5485, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 1.2, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0437464027707179e-10, |
|
"logits/chosen": 0.5776988863945007, |
|
"logits/rejected": 0.6123504638671875, |
|
"logps/chosen": -265.8362121582031, |
|
"logps/rejected": -237.8904571533203, |
|
"loss": 2055.609, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1267283707857132, |
|
"rewards/margins": 0.08532574027776718, |
|
"rewards/rejected": -0.21205410361289978, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.5118510127067566, |
|
"logits/rejected": 0.5845987200737, |
|
"logps/chosen": -274.1031188964844, |
|
"logps/rejected": -256.99688720703125, |
|
"loss": 2139.6068, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.13095693290233612, |
|
"rewards/margins": 0.07750894129276276, |
|
"rewards/rejected": -0.20846585929393768, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3820, |
|
"total_flos": 0.0, |
|
"train_loss": 2099.8451463309884, |
|
"train_runtime": 42790.3459, |
|
"train_samples_per_second": 1.429, |
|
"train_steps_per_second": 0.089 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3820, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|