|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 933, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003215434083601286, |
|
"grad_norm": 23.397309482542667, |
|
"learning_rate": 5.3191489361702125e-09, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.03125, |
|
"logps/chosen": -208.0, |
|
"logps/rejected": -222.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03215434083601286, |
|
"grad_norm": 23.57746975960187, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/chosen": -1.1875, |
|
"logits/rejected": -1.09375, |
|
"logps/chosen": -213.0, |
|
"logps/rejected": -231.0, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.125, |
|
"rewards/chosen": -0.00970458984375, |
|
"rewards/margins": -0.0167236328125, |
|
"rewards/rejected": 0.0069580078125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06430868167202572, |
|
"grad_norm": 21.844695661092338, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.2265625, |
|
"logits/rejected": -1.1328125, |
|
"logps/chosen": -219.0, |
|
"logps/rejected": -228.0, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": -0.0024871826171875, |
|
"rewards/margins": -0.0050048828125, |
|
"rewards/rejected": 0.00250244140625, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09646302250803858, |
|
"grad_norm": 22.14117303336626, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/chosen": -1.2578125, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -218.0, |
|
"logps/rejected": -230.0, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.00750732421875, |
|
"rewards/margins": 0.02001953125, |
|
"rewards/rejected": -0.01251220703125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12861736334405144, |
|
"grad_norm": 22.76154135985023, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.2265625, |
|
"logits/rejected": -1.140625, |
|
"logps/chosen": -215.0, |
|
"logps/rejected": -226.0, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.00124359130859375, |
|
"rewards/margins": 0.039306640625, |
|
"rewards/rejected": -0.0380859375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1607717041800643, |
|
"grad_norm": 21.567825630686524, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -217.0, |
|
"logps/rejected": -227.0, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0238037109375, |
|
"rewards/margins": 0.0703125, |
|
"rewards/rejected": -0.09375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19292604501607716, |
|
"grad_norm": 21.53606646856858, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -218.0, |
|
"logps/rejected": -230.0, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0595703125, |
|
"rewards/margins": 0.1083984375, |
|
"rewards/rejected": -0.16796875, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22508038585209003, |
|
"grad_norm": 21.30483704624207, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -219.0, |
|
"logps/rejected": -235.0, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12255859375, |
|
"rewards/margins": 0.30078125, |
|
"rewards/rejected": -0.421875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2572347266881029, |
|
"grad_norm": 18.590303323906987, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -222.0, |
|
"logps/rejected": -232.0, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.158203125, |
|
"rewards/margins": 0.39453125, |
|
"rewards/rejected": -0.55078125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28938906752411575, |
|
"grad_norm": 20.3165553500463, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/chosen": -1.15625, |
|
"logits/rejected": -1.078125, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -242.0, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2412109375, |
|
"rewards/margins": 0.60546875, |
|
"rewards/rejected": -0.84765625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3215434083601286, |
|
"grad_norm": 17.689841150596088, |
|
"learning_rate": 4.964243146603099e-07, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.125, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -243.0, |
|
"loss": 0.4343, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32421875, |
|
"rewards/margins": 0.8828125, |
|
"rewards/rejected": -1.2109375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3536977491961415, |
|
"grad_norm": 20.592213216312736, |
|
"learning_rate": 4.904648390941597e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.0859375, |
|
"logps/chosen": -221.0, |
|
"logps/rejected": -244.0, |
|
"loss": 0.3958, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.58984375, |
|
"rewards/margins": 0.921875, |
|
"rewards/rejected": -1.5078125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3858520900321543, |
|
"grad_norm": 17.994190672784203, |
|
"learning_rate": 4.845053635280095e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.0625, |
|
"logps/chosen": -221.0, |
|
"logps/rejected": -250.0, |
|
"loss": 0.3702, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.412109375, |
|
"rewards/margins": 1.578125, |
|
"rewards/rejected": -1.9921875, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4180064308681672, |
|
"grad_norm": 16.58709660930582, |
|
"learning_rate": 4.785458879618593e-07, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.0859375, |
|
"logps/chosen": -224.0, |
|
"logps/rejected": -252.0, |
|
"loss": 0.3455, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.609375, |
|
"rewards/margins": 1.7109375, |
|
"rewards/rejected": -2.328125, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45016077170418006, |
|
"grad_norm": 14.667056588711544, |
|
"learning_rate": 4.7258641239570916e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -225.0, |
|
"logps/rejected": -251.0, |
|
"loss": 0.3535, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8046875, |
|
"rewards/margins": 1.3046875, |
|
"rewards/rejected": -2.109375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48231511254019294, |
|
"grad_norm": 21.035028234813357, |
|
"learning_rate": 4.66626936829559e-07, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.0625, |
|
"logps/chosen": -221.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.3197, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.48046875, |
|
"rewards/margins": 1.921875, |
|
"rewards/rejected": -2.390625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5144694533762058, |
|
"grad_norm": 19.336937556140025, |
|
"learning_rate": 4.606674612634088e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.078125, |
|
"logps/chosen": -231.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.2981, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.76171875, |
|
"rewards/margins": 1.9765625, |
|
"rewards/rejected": -2.75, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5466237942122186, |
|
"grad_norm": 20.280342272598105, |
|
"learning_rate": 4.547079856972586e-07, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -260.0, |
|
"loss": 0.3015, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.84375, |
|
"rewards/margins": 2.15625, |
|
"rewards/rejected": -2.984375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5787781350482315, |
|
"grad_norm": 18.675089959384973, |
|
"learning_rate": 4.487485101311084e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.09375, |
|
"logps/chosen": -228.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.2779, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.0, |
|
"rewards/margins": 2.46875, |
|
"rewards/rejected": -3.46875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6109324758842444, |
|
"grad_norm": 15.672066362012437, |
|
"learning_rate": 4.4278903456495827e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.1015625, |
|
"logps/chosen": -216.0, |
|
"logps/rejected": -260.0, |
|
"loss": 0.2408, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.58984375, |
|
"rewards/margins": 2.703125, |
|
"rewards/rejected": -3.296875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6430868167202572, |
|
"grad_norm": 18.48266036458051, |
|
"learning_rate": 4.368295589988081e-07, |
|
"logits/chosen": -1.15625, |
|
"logits/rejected": -1.125, |
|
"logps/chosen": -224.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.2538, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.97265625, |
|
"rewards/margins": 2.21875, |
|
"rewards/rejected": -3.1875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6752411575562701, |
|
"grad_norm": 16.008423170204743, |
|
"learning_rate": 4.308700834326579e-07, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.09375, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -264.0, |
|
"loss": 0.2678, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0703125, |
|
"rewards/margins": 2.296875, |
|
"rewards/rejected": -3.359375, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.707395498392283, |
|
"grad_norm": 20.163363935554287, |
|
"learning_rate": 4.249106078665077e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.1015625, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.2411, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.98828125, |
|
"rewards/margins": 2.609375, |
|
"rewards/rejected": -3.609375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7395498392282959, |
|
"grad_norm": 11.375387489737529, |
|
"learning_rate": 4.1895113230035757e-07, |
|
"logits/chosen": -1.2109375, |
|
"logits/rejected": -1.09375, |
|
"logps/chosen": -219.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.59375, |
|
"rewards/margins": 2.90625, |
|
"rewards/rejected": -3.5, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7717041800643086, |
|
"grad_norm": 15.657974217745089, |
|
"learning_rate": 4.129916567342074e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.109375, |
|
"logps/chosen": -228.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5546875, |
|
"rewards/margins": 3.03125, |
|
"rewards/rejected": -3.59375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8038585209003215, |
|
"grad_norm": 18.359644123878613, |
|
"learning_rate": 4.070321811680572e-07, |
|
"logits/chosen": -1.140625, |
|
"logits/rejected": -1.078125, |
|
"logps/chosen": -236.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.2571, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2265625, |
|
"rewards/margins": 2.375, |
|
"rewards/rejected": -3.59375, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8360128617363344, |
|
"grad_norm": 13.680451389998446, |
|
"learning_rate": 4.0107270560190706e-07, |
|
"logits/chosen": -1.125, |
|
"logits/rejected": -1.0703125, |
|
"logps/chosen": -233.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1171875, |
|
"rewards/margins": 2.625, |
|
"rewards/rejected": -3.734375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8681672025723473, |
|
"grad_norm": 18.25163529420958, |
|
"learning_rate": 3.9511323003575687e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.1328125, |
|
"logps/chosen": -218.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.2586, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.70703125, |
|
"rewards/margins": 3.1875, |
|
"rewards/rejected": -3.90625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9003215434083601, |
|
"grad_norm": 14.492841167088857, |
|
"learning_rate": 3.8915375446960663e-07, |
|
"logits/chosen": -1.2421875, |
|
"logits/rejected": -1.1015625, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.91796875, |
|
"rewards/margins": 3.21875, |
|
"rewards/rejected": -4.125, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.932475884244373, |
|
"grad_norm": 19.00411989834722, |
|
"learning_rate": 3.8319427890345644e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.125, |
|
"logps/chosen": -231.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.2532, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.203125, |
|
"rewards/margins": 2.953125, |
|
"rewards/rejected": -4.15625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9646302250803859, |
|
"grad_norm": 18.047279890271277, |
|
"learning_rate": 3.772348033373063e-07, |
|
"logits/chosen": -1.2578125, |
|
"logits/rejected": -1.1796875, |
|
"logps/chosen": -227.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.25, |
|
"rewards/margins": 2.78125, |
|
"rewards/rejected": -4.03125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9967845659163987, |
|
"grad_norm": 14.414629538685833, |
|
"learning_rate": 3.712753277711561e-07, |
|
"logits/chosen": -1.15625, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -276.0, |
|
"loss": 0.2628, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3203125, |
|
"rewards/margins": 3.21875, |
|
"rewards/rejected": -4.5625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.109375, |
|
"eval_logits/rejected": -1.078125, |
|
"eval_logps/chosen": -242.0, |
|
"eval_logps/rejected": -280.0, |
|
"eval_loss": 0.25456055998802185, |
|
"eval_rewards/accuracies": 0.8928571343421936, |
|
"eval_rewards/chosen": -1.59375, |
|
"eval_rewards/margins": 2.5, |
|
"eval_rewards/rejected": -4.09375, |
|
"eval_runtime": 13.3962, |
|
"eval_samples_per_second": 14.93, |
|
"eval_steps_per_second": 0.523, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.0289389067524115, |
|
"grad_norm": 17.0863207504575, |
|
"learning_rate": 3.6531585220500593e-07, |
|
"logits/chosen": -1.1875, |
|
"logits/rejected": -1.1171875, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.46875, |
|
"rewards/margins": 3.171875, |
|
"rewards/rejected": -4.65625, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0610932475884245, |
|
"grad_norm": 16.677239705070818, |
|
"learning_rate": 3.5935637663885575e-07, |
|
"logits/chosen": -1.140625, |
|
"logits/rejected": -1.078125, |
|
"logps/chosen": -227.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.1571, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0546875, |
|
"rewards/margins": 3.34375, |
|
"rewards/rejected": -4.40625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0932475884244373, |
|
"grad_norm": 10.856248834607923, |
|
"learning_rate": 3.533969010727056e-07, |
|
"logits/chosen": -1.2421875, |
|
"logits/rejected": -1.1328125, |
|
"logps/chosen": -214.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.1592, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.44140625, |
|
"rewards/margins": 3.515625, |
|
"rewards/rejected": -3.953125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1254019292604502, |
|
"grad_norm": 13.938721593944486, |
|
"learning_rate": 3.474374255065554e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.125, |
|
"logps/chosen": -225.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.1772, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.609375, |
|
"rewards/margins": 3.21875, |
|
"rewards/rejected": -3.828125, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.157556270096463, |
|
"grad_norm": 10.918161410613566, |
|
"learning_rate": 3.4147794994040524e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -220.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.7421875, |
|
"rewards/margins": 3.515625, |
|
"rewards/rejected": -4.28125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.189710610932476, |
|
"grad_norm": 12.533024868051346, |
|
"learning_rate": 3.3551847437425505e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -216.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.1498, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.35546875, |
|
"rewards/margins": 3.890625, |
|
"rewards/rejected": -4.25, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.2218649517684887, |
|
"grad_norm": 11.822761117843957, |
|
"learning_rate": 3.295589988081049e-07, |
|
"logits/chosen": -1.2265625, |
|
"logits/rejected": -1.1484375, |
|
"logps/chosen": -230.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.87890625, |
|
"rewards/margins": 3.53125, |
|
"rewards/rejected": -4.40625, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.2540192926045015, |
|
"grad_norm": 19.16313397500957, |
|
"learning_rate": 3.235995232419547e-07, |
|
"logits/chosen": -1.2734375, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -228.0, |
|
"logps/rejected": -274.0, |
|
"loss": 0.1389, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3046875, |
|
"rewards/margins": 3.421875, |
|
"rewards/rejected": -4.71875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2861736334405145, |
|
"grad_norm": 7.920419893053243, |
|
"learning_rate": 3.176400476758045e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -231.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2890625, |
|
"rewards/margins": 3.8125, |
|
"rewards/rejected": -5.09375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3183279742765273, |
|
"grad_norm": 8.689744422632383, |
|
"learning_rate": 3.116805721096543e-07, |
|
"logits/chosen": -1.3125, |
|
"logits/rejected": -1.2578125, |
|
"logps/chosen": -224.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.94921875, |
|
"rewards/margins": 3.53125, |
|
"rewards/rejected": -4.46875, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.3504823151125402, |
|
"grad_norm": 20.36462500582759, |
|
"learning_rate": 3.0572109654350416e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -224.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.1647, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7109375, |
|
"rewards/margins": 3.96875, |
|
"rewards/rejected": -4.65625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.382636655948553, |
|
"grad_norm": 14.621196452907952, |
|
"learning_rate": 2.99761620977354e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -230.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2578125, |
|
"rewards/margins": 3.9375, |
|
"rewards/rejected": -5.1875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.414790996784566, |
|
"grad_norm": 17.49354229044474, |
|
"learning_rate": 2.938021454112038e-07, |
|
"logits/chosen": -1.2734375, |
|
"logits/rejected": -1.234375, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2109375, |
|
"rewards/margins": 3.84375, |
|
"rewards/rejected": -5.0625, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.4469453376205788, |
|
"grad_norm": 17.99399315172917, |
|
"learning_rate": 2.878426698450536e-07, |
|
"logits/chosen": -1.2734375, |
|
"logits/rejected": -1.2578125, |
|
"logps/chosen": -229.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.1453, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.390625, |
|
"rewards/margins": 3.75, |
|
"rewards/rejected": -5.125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4790996784565915, |
|
"grad_norm": 18.416796356815684, |
|
"learning_rate": 2.8188319427890346e-07, |
|
"logits/chosen": -1.2890625, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -224.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.1459, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.6328125, |
|
"rewards/margins": 4.46875, |
|
"rewards/rejected": -5.09375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.5112540192926045, |
|
"grad_norm": 19.457413865686302, |
|
"learning_rate": 2.759237187127533e-07, |
|
"logits/chosen": -1.234375, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -224.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.234375, |
|
"rewards/margins": 4.40625, |
|
"rewards/rejected": -5.65625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5434083601286175, |
|
"grad_norm": 18.71539290622009, |
|
"learning_rate": 2.699642431466031e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -225.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.1567, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.0546875, |
|
"rewards/margins": 3.984375, |
|
"rewards/rejected": -5.03125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5755627009646302, |
|
"grad_norm": 7.638738229331756, |
|
"learning_rate": 2.640047675804529e-07, |
|
"logits/chosen": -1.328125, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -222.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.796875, |
|
"rewards/margins": 4.25, |
|
"rewards/rejected": -5.03125, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.607717041800643, |
|
"grad_norm": 18.04383652392598, |
|
"learning_rate": 2.5804529201430277e-07, |
|
"logits/chosen": -1.28125, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.6484375, |
|
"rewards/margins": 3.546875, |
|
"rewards/rejected": -5.1875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.639871382636656, |
|
"grad_norm": 18.099954428385335, |
|
"learning_rate": 2.520858164481526e-07, |
|
"logits/chosen": -1.3125, |
|
"logits/rejected": -1.234375, |
|
"logps/chosen": -235.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.5, |
|
"rewards/margins": 4.1875, |
|
"rewards/rejected": -5.6875, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.6720257234726688, |
|
"grad_norm": 19.636653808772405, |
|
"learning_rate": 2.461263408820024e-07, |
|
"logits/chosen": -1.34375, |
|
"logits/rejected": -1.265625, |
|
"logps/chosen": -222.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.296875, |
|
"rewards/margins": 3.9375, |
|
"rewards/rejected": -5.25, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.7041800643086815, |
|
"grad_norm": 18.821881156411266, |
|
"learning_rate": 2.401668653158522e-07, |
|
"logits/chosen": -1.3046875, |
|
"logits/rejected": -1.234375, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.3125, |
|
"rewards/margins": 4.375, |
|
"rewards/rejected": -5.6875, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.7363344051446945, |
|
"grad_norm": 16.68080760246729, |
|
"learning_rate": 2.3420738974970201e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.1796875, |
|
"logps/chosen": -238.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.1301, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.609375, |
|
"rewards/margins": 4.03125, |
|
"rewards/rejected": -5.65625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.7684887459807075, |
|
"grad_norm": 11.1883219134591, |
|
"learning_rate": 2.2824791418355183e-07, |
|
"logits/chosen": -1.2734375, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.1439, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4140625, |
|
"rewards/margins": 3.515625, |
|
"rewards/rejected": -4.9375, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.8006430868167203, |
|
"grad_norm": 14.402157991184708, |
|
"learning_rate": 2.2228843861740164e-07, |
|
"logits/chosen": -1.3046875, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -223.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.1640625, |
|
"rewards/margins": 4.1875, |
|
"rewards/rejected": -5.375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.832797427652733, |
|
"grad_norm": 8.543260510191107, |
|
"learning_rate": 2.1632896305125148e-07, |
|
"logits/chosen": -1.3046875, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -243.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.1198, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.8515625, |
|
"rewards/margins": 4.28125, |
|
"rewards/rejected": -6.125, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.864951768488746, |
|
"grad_norm": 14.538481332349868, |
|
"learning_rate": 2.1036948748510132e-07, |
|
"logits/chosen": -1.3125, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -235.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.4375, |
|
"rewards/margins": 4.59375, |
|
"rewards/rejected": -6.03125, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.897106109324759, |
|
"grad_norm": 6.224897480222425, |
|
"learning_rate": 2.0441001191895113e-07, |
|
"logits/chosen": -1.2890625, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.4921875, |
|
"rewards/margins": 4.53125, |
|
"rewards/rejected": -6.03125, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.9292604501607717, |
|
"grad_norm": 12.988933840362778, |
|
"learning_rate": 1.9845053635280097e-07, |
|
"logits/chosen": -1.2578125, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.578125, |
|
"rewards/margins": 4.125, |
|
"rewards/rejected": -5.71875, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9614147909967845, |
|
"grad_norm": 17.555655322085006, |
|
"learning_rate": 1.9249106078665075e-07, |
|
"logits/chosen": -1.34375, |
|
"logits/rejected": -1.265625, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.1242, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.0859375, |
|
"rewards/margins": 4.6875, |
|
"rewards/rejected": -5.75, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.9935691318327975, |
|
"grad_norm": 17.434413286354665, |
|
"learning_rate": 1.865315852205006e-07, |
|
"logits/chosen": -1.2890625, |
|
"logits/rejected": -1.2734375, |
|
"logps/chosen": -227.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.1197, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.2265625, |
|
"rewards/margins": 4.25, |
|
"rewards/rejected": -5.5, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.1953125, |
|
"eval_logits/rejected": -1.15625, |
|
"eval_logps/chosen": -244.0, |
|
"eval_logps/rejected": -290.0, |
|
"eval_loss": 0.21818359196186066, |
|
"eval_rewards/accuracies": 0.9107142686843872, |
|
"eval_rewards/chosen": -1.828125, |
|
"eval_rewards/margins": 3.34375, |
|
"eval_rewards/rejected": -5.15625, |
|
"eval_runtime": 13.1854, |
|
"eval_samples_per_second": 15.168, |
|
"eval_steps_per_second": 0.531, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.0257234726688105, |
|
"grad_norm": 17.447272647052497, |
|
"learning_rate": 1.805721096543504e-07, |
|
"logits/chosen": -1.328125, |
|
"logits/rejected": -1.265625, |
|
"logps/chosen": -231.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.640625, |
|
"rewards/margins": 3.96875, |
|
"rewards/rejected": -5.625, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.057877813504823, |
|
"grad_norm": 6.687947493471657, |
|
"learning_rate": 1.7461263408820024e-07, |
|
"logits/chosen": -1.375, |
|
"logits/rejected": -1.2890625, |
|
"logps/chosen": -230.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.34375, |
|
"rewards/margins": 4.28125, |
|
"rewards/rejected": -5.625, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.090032154340836, |
|
"grad_norm": 7.218763454804789, |
|
"learning_rate": 1.6865315852205006e-07, |
|
"logits/chosen": -1.3671875, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -221.0, |
|
"logps/rejected": -276.0, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.94921875, |
|
"rewards/margins": 4.28125, |
|
"rewards/rejected": -5.21875, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.122186495176849, |
|
"grad_norm": 7.374097342668032, |
|
"learning_rate": 1.626936829558999e-07, |
|
"logits/chosen": -1.3359375, |
|
"logits/rejected": -1.2578125, |
|
"logps/chosen": -231.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.0546875, |
|
"rewards/margins": 4.40625, |
|
"rewards/rejected": -5.46875, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.154340836012862, |
|
"grad_norm": 4.341286139803693, |
|
"learning_rate": 1.5673420738974968e-07, |
|
"logits/chosen": -1.2890625, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.4375, |
|
"rewards/margins": 4.5, |
|
"rewards/rejected": -5.9375, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.1864951768488745, |
|
"grad_norm": 7.206372321957506, |
|
"learning_rate": 1.5077473182359952e-07, |
|
"logits/chosen": -1.296875, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -230.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1328125, |
|
"rewards/margins": 4.71875, |
|
"rewards/rejected": -5.875, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.2186495176848875, |
|
"grad_norm": 8.477557654975314, |
|
"learning_rate": 1.4481525625744933e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1640625, |
|
"rewards/margins": 5.28125, |
|
"rewards/rejected": -6.46875, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.2508038585209005, |
|
"grad_norm": 13.009501084613339, |
|
"learning_rate": 1.3885578069129917e-07, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -223.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.546875, |
|
"rewards/margins": 5.125, |
|
"rewards/rejected": -5.65625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.282958199356913, |
|
"grad_norm": 7.401785467318329, |
|
"learning_rate": 1.3289630512514898e-07, |
|
"logits/chosen": -1.296875, |
|
"logits/rejected": -1.203125, |
|
"logps/chosen": -237.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3515625, |
|
"rewards/margins": 4.84375, |
|
"rewards/rejected": -6.1875, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.315112540192926, |
|
"grad_norm": 7.055774707472605, |
|
"learning_rate": 1.2693682955899882e-07, |
|
"logits/chosen": -1.3125, |
|
"logits/rejected": -1.25, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.9921875, |
|
"rewards/margins": 4.90625, |
|
"rewards/rejected": -5.90625, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.347266881028939, |
|
"grad_norm": 6.572242774401927, |
|
"learning_rate": 1.2097735399284863e-07, |
|
"logits/chosen": -1.390625, |
|
"logits/rejected": -1.296875, |
|
"logps/chosen": -219.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.86328125, |
|
"rewards/margins": 4.96875, |
|
"rewards/rejected": -5.84375, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.379421221864952, |
|
"grad_norm": 15.366208632575415, |
|
"learning_rate": 1.1501787842669844e-07, |
|
"logits/chosen": -1.2734375, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -235.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.4140625, |
|
"rewards/margins": 4.4375, |
|
"rewards/rejected": -5.875, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.4115755627009645, |
|
"grad_norm": 12.35630685312423, |
|
"learning_rate": 1.0905840286054827e-07, |
|
"logits/chosen": -1.390625, |
|
"logits/rejected": -1.296875, |
|
"logps/chosen": -225.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3046875, |
|
"rewards/margins": 4.75, |
|
"rewards/rejected": -6.0625, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.4437299035369775, |
|
"grad_norm": 7.767813770736527, |
|
"learning_rate": 1.030989272943981e-07, |
|
"logits/chosen": -1.3359375, |
|
"logits/rejected": -1.25, |
|
"logps/chosen": -227.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1484375, |
|
"rewards/margins": 5.34375, |
|
"rewards/rejected": -6.5, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.4758842443729905, |
|
"grad_norm": 6.935057168009298, |
|
"learning_rate": 9.713945172824791e-08, |
|
"logits/chosen": -1.40625, |
|
"logits/rejected": -1.3046875, |
|
"logps/chosen": -233.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.4140625, |
|
"rewards/margins": 4.375, |
|
"rewards/rejected": -5.78125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.508038585209003, |
|
"grad_norm": 7.387415658831761, |
|
"learning_rate": 9.117997616209773e-08, |
|
"logits/chosen": -1.4140625, |
|
"logits/rejected": -1.25, |
|
"logps/chosen": -229.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.234375, |
|
"rewards/margins": 4.9375, |
|
"rewards/rejected": -6.15625, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.540192926045016, |
|
"grad_norm": 7.499201582822142, |
|
"learning_rate": 8.522050059594756e-08, |
|
"logits/chosen": -1.359375, |
|
"logits/rejected": -1.265625, |
|
"logps/chosen": -233.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.0842, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.6328125, |
|
"rewards/margins": 5.25, |
|
"rewards/rejected": -6.875, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.572347266881029, |
|
"grad_norm": 5.063871367224483, |
|
"learning_rate": 7.926102502979737e-08, |
|
"logits/chosen": -1.3203125, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -241.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.828125, |
|
"rewards/margins": 4.6875, |
|
"rewards/rejected": -6.5, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.604501607717042, |
|
"grad_norm": 6.927135247218095, |
|
"learning_rate": 7.33015494636472e-08, |
|
"logits/chosen": -1.453125, |
|
"logits/rejected": -1.296875, |
|
"logps/chosen": -226.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8828125, |
|
"rewards/margins": 5.5, |
|
"rewards/rejected": -6.375, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.6366559485530545, |
|
"grad_norm": 7.241185858179814, |
|
"learning_rate": 6.734207389749702e-08, |
|
"logits/chosen": -1.375, |
|
"logits/rejected": -1.28125, |
|
"logps/chosen": -228.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.4765625, |
|
"rewards/margins": 4.71875, |
|
"rewards/rejected": -6.1875, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.6688102893890675, |
|
"grad_norm": 9.774371876097607, |
|
"learning_rate": 6.138259833134683e-08, |
|
"logits/chosen": -1.3671875, |
|
"logits/rejected": -1.2421875, |
|
"logps/chosen": -241.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.578125, |
|
"rewards/margins": 4.75, |
|
"rewards/rejected": -6.3125, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.7009646302250805, |
|
"grad_norm": 15.985434869180738, |
|
"learning_rate": 5.542312276519666e-08, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -236.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1953125, |
|
"rewards/margins": 4.875, |
|
"rewards/rejected": -6.0625, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.733118971061093, |
|
"grad_norm": 22.758284549493595, |
|
"learning_rate": 4.9463647199046485e-08, |
|
"logits/chosen": -1.390625, |
|
"logits/rejected": -1.3203125, |
|
"logps/chosen": -232.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1796875, |
|
"rewards/margins": 4.9375, |
|
"rewards/rejected": -6.125, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.765273311897106, |
|
"grad_norm": 10.96281071419714, |
|
"learning_rate": 4.3504171632896303e-08, |
|
"logits/chosen": -1.3046875, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.453125, |
|
"rewards/margins": 5.15625, |
|
"rewards/rejected": -6.625, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.797427652733119, |
|
"grad_norm": 13.67107616299505, |
|
"learning_rate": 3.754469606674612e-08, |
|
"logits/chosen": -1.296875, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -229.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.125, |
|
"rewards/margins": 4.875, |
|
"rewards/rejected": -6.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.829581993569132, |
|
"grad_norm": 9.842883541605707, |
|
"learning_rate": 3.158522050059595e-08, |
|
"logits/chosen": -1.2734375, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -239.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7265625, |
|
"rewards/margins": 4.875, |
|
"rewards/rejected": -6.625, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.861736334405145, |
|
"grad_norm": 10.023217691488869, |
|
"learning_rate": 2.562574493444577e-08, |
|
"logits/chosen": -1.3828125, |
|
"logits/rejected": -1.3125, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -304.0, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3671875, |
|
"rewards/margins": 5.6875, |
|
"rewards/rejected": -7.0625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.8938906752411575, |
|
"grad_norm": 15.555782009030803, |
|
"learning_rate": 1.966626936829559e-08, |
|
"logits/chosen": -1.3828125, |
|
"logits/rejected": -1.2578125, |
|
"logps/chosen": -238.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.875, |
|
"rewards/margins": 4.75, |
|
"rewards/rejected": -6.65625, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.9260450160771705, |
|
"grad_norm": 7.88303291402779, |
|
"learning_rate": 1.370679380214541e-08, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -1.265625, |
|
"logps/chosen": -228.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2578125, |
|
"rewards/margins": 5.1875, |
|
"rewards/rejected": -6.4375, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.958199356913183, |
|
"grad_norm": 29.29264784081242, |
|
"learning_rate": 7.747318235995233e-09, |
|
"logits/chosen": -1.359375, |
|
"logits/rejected": -1.3046875, |
|
"logps/chosen": -225.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1171875, |
|
"rewards/margins": 4.90625, |
|
"rewards/rejected": -6.03125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.990353697749196, |
|
"grad_norm": 3.502420852724699, |
|
"learning_rate": 1.7878426698450536e-09, |
|
"logits/chosen": -1.3515625, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.390625, |
|
"rewards/margins": 4.8125, |
|
"rewards/rejected": -6.1875, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -1.234375, |
|
"eval_logits/rejected": -1.1953125, |
|
"eval_logps/chosen": -244.0, |
|
"eval_logps/rejected": -294.0, |
|
"eval_loss": 0.22002440690994263, |
|
"eval_rewards/accuracies": 0.9107142686843872, |
|
"eval_rewards/chosen": -1.8203125, |
|
"eval_rewards/margins": 3.796875, |
|
"eval_rewards/rejected": -5.625, |
|
"eval_runtime": 14.8376, |
|
"eval_samples_per_second": 13.479, |
|
"eval_steps_per_second": 0.472, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 933, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2005606321148806, |
|
"train_runtime": 5242.1427, |
|
"train_samples_per_second": 5.68, |
|
"train_steps_per_second": 0.178 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 933, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|