|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 2038, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 11.418422500469418, |
|
"learning_rate": 2.4509803921568627e-09, |
|
"logits/chosen": -0.4609375, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -1832.0, |
|
"logps/rejected": -1832.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 10.679791571668222, |
|
"learning_rate": 2.4509803921568626e-08, |
|
"logits/chosen": -0.55859375, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2784.0, |
|
"logps/rejected": -2624.0, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.2888889014720917, |
|
"rewards/chosen": -0.0011749267578125, |
|
"rewards/margins": -0.0142822265625, |
|
"rewards/rejected": 0.01312255859375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 9.399862723559004, |
|
"learning_rate": 4.901960784313725e-08, |
|
"logits/chosen": -0.62109375, |
|
"logits/rejected": -0.72265625, |
|
"logps/chosen": -2064.0, |
|
"logps/rejected": -1632.0, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.01373291015625, |
|
"rewards/margins": 0.012939453125, |
|
"rewards/rejected": 0.00079345703125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.520323557414129, |
|
"learning_rate": 7.352941176470588e-08, |
|
"logits/chosen": -0.61328125, |
|
"logits/rejected": -0.6015625, |
|
"logps/chosen": -1984.0, |
|
"logps/rejected": -1968.0, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.36000004410743713, |
|
"rewards/chosen": -0.01300048828125, |
|
"rewards/margins": -0.0107421875, |
|
"rewards/rejected": -0.002197265625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.053039743567291, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -0.5078125, |
|
"logits/rejected": -0.54296875, |
|
"logps/chosen": -2176.0, |
|
"logps/rejected": -2024.0, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.36000004410743713, |
|
"rewards/chosen": 0.01409912109375, |
|
"rewards/margins": 0.0230712890625, |
|
"rewards/rejected": -0.009033203125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.828208455861878, |
|
"learning_rate": 1.2254901960784314e-07, |
|
"logits/chosen": -0.6015625, |
|
"logits/rejected": -0.703125, |
|
"logps/chosen": -2080.0, |
|
"logps/rejected": -1624.0, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0101318359375, |
|
"rewards/margins": 0.00958251953125, |
|
"rewards/rejected": 0.000507354736328125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.30578886430554, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -0.5625, |
|
"logits/rejected": -0.703125, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -1904.0, |
|
"loss": 0.6971, |
|
"rewards/accuracies": 0.40000003576278687, |
|
"rewards/chosen": -0.004302978515625, |
|
"rewards/margins": -0.017578125, |
|
"rewards/rejected": 0.01324462890625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.148448836585857, |
|
"learning_rate": 1.715686274509804e-07, |
|
"logits/chosen": -0.66015625, |
|
"logits/rejected": -0.7109375, |
|
"logps/chosen": -2112.0, |
|
"logps/rejected": -1880.0, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.055419921875, |
|
"rewards/margins": 0.03466796875, |
|
"rewards/rejected": 0.020751953125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 9.100710948464306, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -0.53515625, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -2256.0, |
|
"logps/rejected": -2040.0, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.41999998688697815, |
|
"rewards/chosen": 0.050537109375, |
|
"rewards/margins": 0.0162353515625, |
|
"rewards/rejected": 0.034423828125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 11.809225699546186, |
|
"learning_rate": 2.2058823529411763e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2000.0, |
|
"logps/rejected": -1712.0, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.48000001907348633, |
|
"rewards/chosen": 0.0888671875, |
|
"rewards/margins": 0.0196533203125, |
|
"rewards/rejected": 0.0693359375, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.00322127291456, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"logits/chosen": -0.5703125, |
|
"logits/rejected": -0.66015625, |
|
"logps/chosen": -2208.0, |
|
"logps/rejected": -1944.0, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.1591796875, |
|
"rewards/margins": 0.0250244140625, |
|
"rewards/rejected": 0.1337890625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -0.6640625, |
|
"eval_logits/rejected": -0.72265625, |
|
"eval_logps/chosen": -2352.0, |
|
"eval_logps/rejected": -2048.0, |
|
"eval_loss": 0.6767656207084656, |
|
"eval_rewards/accuracies": 0.449404776096344, |
|
"eval_rewards/chosen": 0.2314453125, |
|
"eval_rewards/margins": 0.04052734375, |
|
"eval_rewards/rejected": 0.1904296875, |
|
"eval_runtime": 90.1206, |
|
"eval_samples_per_second": 22.192, |
|
"eval_steps_per_second": 0.466, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.799509248452377, |
|
"learning_rate": 2.6960784313725486e-07, |
|
"logits/chosen": -0.55859375, |
|
"logits/rejected": -0.6640625, |
|
"logps/chosen": -2432.0, |
|
"logps/rejected": -1888.0, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.255859375, |
|
"rewards/margins": 0.04736328125, |
|
"rewards/rejected": 0.2080078125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 9.997348094627812, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -0.59765625, |
|
"logits/rejected": -0.5546875, |
|
"logps/chosen": -2304.0, |
|
"logps/rejected": -2352.0, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.28515625, |
|
"rewards/margins": -0.00159454345703125, |
|
"rewards/rejected": 0.287109375, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.489336936701164, |
|
"learning_rate": 3.1862745098039215e-07, |
|
"logits/chosen": -0.58203125, |
|
"logits/rejected": -0.69140625, |
|
"logps/chosen": -2240.0, |
|
"logps/rejected": -1608.0, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.34375, |
|
"rewards/margins": 0.103515625, |
|
"rewards/rejected": 0.2412109375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.373655221952864, |
|
"learning_rate": 3.431372549019608e-07, |
|
"logits/chosen": -0.6328125, |
|
"logits/rejected": -0.6640625, |
|
"logps/chosen": -2256.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.376953125, |
|
"rewards/margins": 0.0556640625, |
|
"rewards/rejected": 0.322265625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.530198235177303, |
|
"learning_rate": 3.6764705882352943e-07, |
|
"logits/chosen": -0.6328125, |
|
"logits/rejected": -0.74609375, |
|
"logps/chosen": -2560.0, |
|
"logps/rejected": -2000.0, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.5399999618530273, |
|
"rewards/chosen": 0.466796875, |
|
"rewards/margins": 0.1103515625, |
|
"rewards/rejected": 0.35546875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.93615787272239, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -2544.0, |
|
"logps/rejected": -2272.0, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.4599999785423279, |
|
"rewards/chosen": 0.4921875, |
|
"rewards/margins": 0.08984375, |
|
"rewards/rejected": 0.40234375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 8.034753030681635, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.59375, |
|
"logits/rejected": -0.703125, |
|
"logps/chosen": -2400.0, |
|
"logps/rejected": -1968.0, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.42000001668930054, |
|
"rewards/chosen": 0.48828125, |
|
"rewards/margins": 0.0771484375, |
|
"rewards/rejected": 0.41015625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 8.810211928299461, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -0.5, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -3008.0, |
|
"logps/rejected": -2416.0, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.7265625, |
|
"rewards/margins": 0.2216796875, |
|
"rewards/rejected": 0.5078125, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 8.455528608933424, |
|
"learning_rate": 4.656862745098039e-07, |
|
"logits/chosen": -0.484375, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -2544.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.6171875, |
|
"rewards/margins": 0.1083984375, |
|
"rewards/rejected": 0.51171875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.256083265710721, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -0.515625, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -2768.0, |
|
"logps/rejected": -2576.0, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.70703125, |
|
"rewards/margins": 0.05224609375, |
|
"rewards/rejected": 0.65625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -0.66796875, |
|
"eval_logits/rejected": -0.7265625, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2016.0, |
|
"eval_loss": 0.6565937399864197, |
|
"eval_rewards/accuracies": 0.494047611951828, |
|
"eval_rewards/chosen": 0.59765625, |
|
"eval_rewards/margins": 0.11083984375, |
|
"eval_rewards/rejected": 0.48828125, |
|
"eval_runtime": 89.4808, |
|
"eval_samples_per_second": 22.351, |
|
"eval_steps_per_second": 0.469, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 10.458558020929722, |
|
"learning_rate": 4.999867958705476e-07, |
|
"logits/chosen": -0.58203125, |
|
"logits/rejected": -0.61328125, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -2272.0, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.47999995946884155, |
|
"rewards/chosen": 0.61328125, |
|
"rewards/margins": 0.0166015625, |
|
"rewards/rejected": 0.59765625, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.655404888327219, |
|
"learning_rate": 4.999061090193831e-07, |
|
"logits/chosen": -0.609375, |
|
"logits/rejected": -0.70703125, |
|
"logps/chosen": -2528.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 0.6796875, |
|
"rewards/margins": 0.1416015625, |
|
"rewards/rejected": 0.53515625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.56468733064606, |
|
"learning_rate": 4.997520945910046e-07, |
|
"logits/chosen": -0.58203125, |
|
"logits/rejected": -0.7109375, |
|
"logps/chosen": -2368.0, |
|
"logps/rejected": -1848.0, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.58984375, |
|
"rewards/margins": 0.1865234375, |
|
"rewards/rejected": 0.400390625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.35483083586776, |
|
"learning_rate": 4.995247977764035e-07, |
|
"logits/chosen": -0.6875, |
|
"logits/rejected": -0.75, |
|
"logps/chosen": -2192.0, |
|
"logps/rejected": -1968.0, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.4399999976158142, |
|
"rewards/chosen": 0.484375, |
|
"rewards/margins": 0.0615234375, |
|
"rewards/rejected": 0.423828125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.99023631027908, |
|
"learning_rate": 4.992242852691269e-07, |
|
"logits/chosen": -0.62109375, |
|
"logits/rejected": -0.703125, |
|
"logps/chosen": -2160.0, |
|
"logps/rejected": -1960.0, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.5, |
|
"rewards/margins": 0.1396484375, |
|
"rewards/rejected": 0.361328125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.81462332150393, |
|
"learning_rate": 4.988506452457066e-07, |
|
"logits/chosen": -0.5625, |
|
"logits/rejected": -0.6484375, |
|
"logps/chosen": -2272.0, |
|
"logps/rejected": -1952.0, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.4800000786781311, |
|
"rewards/chosen": 0.419921875, |
|
"rewards/margins": 0.04931640625, |
|
"rewards/rejected": 0.37109375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.565836577883003, |
|
"learning_rate": 4.984039873397879e-07, |
|
"logits/chosen": -0.5703125, |
|
"logits/rejected": -0.6953125, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2032.0, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.60546875, |
|
"rewards/margins": 0.1826171875, |
|
"rewards/rejected": 0.42578125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 7.882899392773696, |
|
"learning_rate": 4.9788444260996e-07, |
|
"logits/chosen": -0.4921875, |
|
"logits/rejected": -0.55859375, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -2144.0, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.4800000786781311, |
|
"rewards/chosen": 0.578125, |
|
"rewards/margins": 0.1357421875, |
|
"rewards/rejected": 0.443359375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.30641415306888, |
|
"learning_rate": 4.97292163501301e-07, |
|
"logits/chosen": -0.57421875, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -2400.0, |
|
"logps/rejected": -2352.0, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.63671875, |
|
"rewards/margins": 0.10107421875, |
|
"rewards/rejected": 0.5390625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.469934208860764, |
|
"learning_rate": 4.96627323800647e-07, |
|
"logits/chosen": -0.427734375, |
|
"logits/rejected": -0.6328125, |
|
"logps/chosen": -3056.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.6599999666213989, |
|
"rewards/chosen": 0.76953125, |
|
"rewards/margins": 0.265625, |
|
"rewards/rejected": 0.50390625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -0.65625, |
|
"eval_logits/rejected": -0.71875, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2016.0, |
|
"eval_loss": 0.651296854019165, |
|
"eval_rewards/accuracies": 0.5148809552192688, |
|
"eval_rewards/chosen": 0.625, |
|
"eval_rewards/margins": 0.1279296875, |
|
"eval_rewards/rejected": 0.494140625, |
|
"eval_runtime": 89.9953, |
|
"eval_samples_per_second": 22.223, |
|
"eval_steps_per_second": 0.467, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.606536101637007, |
|
"learning_rate": 4.958901185856005e-07, |
|
"logits/chosen": -0.6015625, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2496.0, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.69140625, |
|
"rewards/margins": 0.06005859375, |
|
"rewards/rejected": 0.62890625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.306258892059772, |
|
"learning_rate": 4.95080764167289e-07, |
|
"logits/chosen": -0.474609375, |
|
"logits/rejected": -0.4921875, |
|
"logps/chosen": -2400.0, |
|
"logps/rejected": -2336.0, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.4599999785423279, |
|
"rewards/chosen": 0.5859375, |
|
"rewards/margins": 0.042724609375, |
|
"rewards/rejected": 0.54296875, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 9.33758707972592, |
|
"learning_rate": 4.941994980268966e-07, |
|
"logits/chosen": -0.640625, |
|
"logits/rejected": -0.65234375, |
|
"logps/chosen": -2008.0, |
|
"logps/rejected": -1832.0, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 0.478515625, |
|
"rewards/margins": 0.11865234375, |
|
"rewards/rejected": 0.361328125, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.421773008047236, |
|
"learning_rate": 4.932465787459808e-07, |
|
"logits/chosen": -0.50390625, |
|
"logits/rejected": -0.48828125, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -2512.0, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.42000001668930054, |
|
"rewards/chosen": 0.56640625, |
|
"rewards/margins": -0.020263671875, |
|
"rewards/rejected": 0.5859375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.142714749304895, |
|
"learning_rate": 4.922222859306005e-07, |
|
"logits/chosen": -0.5, |
|
"logits/rejected": -0.61328125, |
|
"logps/chosen": -2112.0, |
|
"logps/rejected": -1664.0, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.46484375, |
|
"rewards/margins": 0.13671875, |
|
"rewards/rejected": 0.328125, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.405996440031476, |
|
"learning_rate": 4.911269201292724e-07, |
|
"logits/chosen": -0.478515625, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2864.0, |
|
"logps/rejected": -2464.0, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.796875, |
|
"rewards/margins": 0.2177734375, |
|
"rewards/rejected": 0.578125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.498724790073537, |
|
"learning_rate": 4.899608027447858e-07, |
|
"logits/chosen": -0.515625, |
|
"logits/rejected": -0.64453125, |
|
"logps/chosen": -2672.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.71875, |
|
"rewards/margins": 0.1923828125, |
|
"rewards/rejected": 0.5234375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.511360912470519, |
|
"learning_rate": 4.887242759398945e-07, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.6796875, |
|
"logps/chosen": -2416.0, |
|
"logps/rejected": -2008.0, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": 0.60546875, |
|
"rewards/margins": 0.2109375, |
|
"rewards/rejected": 0.39453125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.32007338457764, |
|
"learning_rate": 4.874177025369207e-07, |
|
"logits/chosen": -0.46484375, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -2800.0, |
|
"logps/rejected": -2256.0, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": 0.7890625, |
|
"rewards/margins": 0.275390625, |
|
"rewards/rejected": 0.51171875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.245573199091506, |
|
"learning_rate": 4.860414659112948e-07, |
|
"logits/chosen": -0.55859375, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -2176.0, |
|
"logps/rejected": -1936.0, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6328125, |
|
"rewards/margins": 0.1474609375, |
|
"rewards/rejected": 0.48828125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -0.66796875, |
|
"eval_logits/rejected": -0.7265625, |
|
"eval_logps/chosen": -2304.0, |
|
"eval_logps/rejected": -2016.0, |
|
"eval_loss": 0.649093747138977, |
|
"eval_rewards/accuracies": 0.5595238208770752, |
|
"eval_rewards/chosen": 0.65625, |
|
"eval_rewards/margins": 0.15234375, |
|
"eval_rewards/rejected": 0.5, |
|
"eval_runtime": 90.1263, |
|
"eval_samples_per_second": 22.191, |
|
"eval_steps_per_second": 0.466, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.878063287442174, |
|
"learning_rate": 4.845959698790652e-07, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.6171875, |
|
"logps/chosen": -2128.0, |
|
"logps/rejected": -1864.0, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.59765625, |
|
"rewards/margins": 0.0791015625, |
|
"rewards/rejected": 0.515625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.071445660844004, |
|
"learning_rate": 4.830816385784104e-07, |
|
"logits/chosen": -0.5078125, |
|
"logits/rejected": -0.55078125, |
|
"logps/chosen": -2256.0, |
|
"logps/rejected": -1872.0, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.62890625, |
|
"rewards/margins": 0.189453125, |
|
"rewards/rejected": 0.44140625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.638539511395816, |
|
"learning_rate": 4.814989163451889e-07, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.546875, |
|
"logps/chosen": -1840.0, |
|
"logps/rejected": -1864.0, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.5200001001358032, |
|
"rewards/chosen": 0.52734375, |
|
"rewards/margins": 0.03271484375, |
|
"rewards/rejected": 0.4921875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 9.196165350919857, |
|
"learning_rate": 4.798482675825602e-07, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -2176.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.5703125, |
|
"rewards/margins": 0.06591796875, |
|
"rewards/rejected": 0.50390625, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 8.671692805246133, |
|
"learning_rate": 4.781301766247215e-07, |
|
"logits/chosen": -0.64453125, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2040.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.6200000643730164, |
|
"rewards/chosen": 0.494140625, |
|
"rewards/margins": 0.0693359375, |
|
"rewards/rejected": 0.42578125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 12.453319112805517, |
|
"learning_rate": 4.7634514759479275e-07, |
|
"logits/chosen": -0.6171875, |
|
"logits/rejected": -0.671875, |
|
"logps/chosen": -2096.0, |
|
"logps/rejected": -1800.0, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.5078125, |
|
"rewards/margins": 0.1748046875, |
|
"rewards/rejected": 0.33203125, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.997561643902582, |
|
"learning_rate": 4.7449370425689694e-07, |
|
"logits/chosen": -0.56640625, |
|
"logits/rejected": -0.6171875, |
|
"logps/chosen": -2240.0, |
|
"logps/rejected": -2008.0, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.5390625, |
|
"rewards/margins": 0.2255859375, |
|
"rewards/rejected": 0.3125, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.512807533628497, |
|
"learning_rate": 4.7257638986247684e-07, |
|
"logits/chosen": -0.466796875, |
|
"logits/rejected": -0.6640625, |
|
"logps/chosen": -3024.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.7199999690055847, |
|
"rewards/chosen": 0.72265625, |
|
"rewards/margins": 0.380859375, |
|
"rewards/rejected": 0.34375, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 13.645526383003268, |
|
"learning_rate": 4.705937669908943e-07, |
|
"logits/chosen": -0.4921875, |
|
"logits/rejected": -0.6015625, |
|
"logps/chosen": -2624.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.63671875, |
|
"rewards/margins": 0.2138671875, |
|
"rewards/rejected": 0.421875, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.909000541119995, |
|
"learning_rate": 4.685464173843574e-07, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.6484375, |
|
"logps/chosen": -2192.0, |
|
"logps/rejected": -1792.0, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.5703125, |
|
"rewards/margins": 0.248046875, |
|
"rewards/rejected": 0.322265625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_logits/chosen": -0.65625, |
|
"eval_logits/rejected": -0.71484375, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6465859413146973, |
|
"eval_rewards/accuracies": 0.5952380895614624, |
|
"eval_rewards/chosen": 0.5390625, |
|
"eval_rewards/margins": 0.14453125, |
|
"eval_rewards/rejected": 0.39453125, |
|
"eval_runtime": 89.3629, |
|
"eval_samples_per_second": 22.381, |
|
"eval_steps_per_second": 0.47, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 9.183157685366133, |
|
"learning_rate": 4.6643494177722574e-07, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.58984375, |
|
"logps/chosen": -2160.0, |
|
"logps/rejected": -1864.0, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.6200000643730164, |
|
"rewards/chosen": 0.51953125, |
|
"rewards/margins": 0.1279296875, |
|
"rewards/rejected": 0.392578125, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.502858947018517, |
|
"learning_rate": 4.6425995971974265e-07, |
|
"logits/chosen": -0.6015625, |
|
"logits/rejected": -0.75, |
|
"logps/chosen": -2448.0, |
|
"logps/rejected": -1856.0, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.46875, |
|
"rewards/margins": 0.11279296875, |
|
"rewards/rejected": 0.357421875, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.966207624712743, |
|
"learning_rate": 4.6202210939624607e-07, |
|
"logits/chosen": -0.51953125, |
|
"logits/rejected": -0.546875, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2528.0, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 0.64453125, |
|
"rewards/margins": 0.041015625, |
|
"rewards/rejected": 0.60546875, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.724395310063244, |
|
"learning_rate": 4.597220474379125e-07, |
|
"logits/chosen": -0.57421875, |
|
"logits/rejected": -0.6484375, |
|
"logps/chosen": -2448.0, |
|
"logps/rejected": -2040.0, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.515625, |
|
"rewards/margins": 0.177734375, |
|
"rewards/rejected": 0.333984375, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 8.826948047502222, |
|
"learning_rate": 4.57360448730088e-07, |
|
"logits/chosen": -0.6171875, |
|
"logits/rejected": -0.6953125, |
|
"logps/chosen": -2496.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.61328125, |
|
"rewards/margins": 0.08837890625, |
|
"rewards/rejected": 0.52734375, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.904587345979932, |
|
"learning_rate": 4.549380062142627e-07, |
|
"logits/chosen": -0.66796875, |
|
"logits/rejected": -0.65625, |
|
"logps/chosen": -1960.0, |
|
"logps/rejected": -1928.0, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.515625, |
|
"rewards/margins": 0.052001953125, |
|
"rewards/rejected": 0.46484375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 7.802080225823436, |
|
"learning_rate": 4.524554306847479e-07, |
|
"logits/chosen": -0.6171875, |
|
"logits/rejected": -0.6171875, |
|
"logps/chosen": -2144.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.5625, |
|
"rewards/margins": 0.062255859375, |
|
"rewards/rejected": 0.498046875, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 10.484471048264675, |
|
"learning_rate": 4.499134505801141e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.51953125, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -2336.0, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.42000001668930054, |
|
"rewards/chosen": 0.58984375, |
|
"rewards/margins": 0.018798828125, |
|
"rewards/rejected": 0.5703125, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 11.582326622430703, |
|
"learning_rate": 4.4731281176945244e-07, |
|
"logits/chosen": -0.5234375, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2768.0, |
|
"logps/rejected": -2208.0, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.72265625, |
|
"rewards/margins": 0.1611328125, |
|
"rewards/rejected": 0.55859375, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.880977624721396, |
|
"learning_rate": 4.4465427733352124e-07, |
|
"logits/chosen": -0.5078125, |
|
"logits/rejected": -0.55859375, |
|
"logps/chosen": -2320.0, |
|
"logps/rejected": -2080.0, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.6640625, |
|
"rewards/margins": 0.251953125, |
|
"rewards/rejected": 0.4140625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -0.66015625, |
|
"eval_logits/rejected": -0.71875, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6446093916893005, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": 0.578125, |
|
"eval_rewards/margins": 0.1591796875, |
|
"eval_rewards/rejected": 0.41796875, |
|
"eval_runtime": 89.7151, |
|
"eval_samples_per_second": 22.293, |
|
"eval_steps_per_second": 0.468, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 11.188626521589404, |
|
"learning_rate": 4.4193862734084277e-07, |
|
"logits/chosen": -0.6796875, |
|
"logits/rejected": -0.76171875, |
|
"logps/chosen": -2192.0, |
|
"logps/rejected": -2024.0, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.5703125, |
|
"rewards/margins": 0.1181640625, |
|
"rewards/rejected": 0.453125, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.697586616076624, |
|
"learning_rate": 4.391666586188145e-07, |
|
"logits/chosen": -0.640625, |
|
"logits/rejected": -0.703125, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.55859375, |
|
"rewards/margins": 0.1611328125, |
|
"rewards/rejected": 0.39453125, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 11.772870086073146, |
|
"learning_rate": 4.363391845199045e-07, |
|
"logits/chosen": -0.56640625, |
|
"logits/rejected": -0.59765625, |
|
"logps/chosen": -2432.0, |
|
"logps/rejected": -2272.0, |
|
"loss": 0.625, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.58203125, |
|
"rewards/margins": 0.271484375, |
|
"rewards/rejected": 0.3125, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.98000540892343, |
|
"learning_rate": 4.3345703468299634e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2304.0, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.56640625, |
|
"rewards/margins": 0.1865234375, |
|
"rewards/rejected": 0.3828125, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 10.179524289387247, |
|
"learning_rate": 4.3052105478995635e-07, |
|
"logits/chosen": -0.53125, |
|
"logits/rejected": -0.71875, |
|
"logps/chosen": -2448.0, |
|
"logps/rejected": -1608.0, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.6800000667572021, |
|
"rewards/chosen": 0.55859375, |
|
"rewards/margins": 0.302734375, |
|
"rewards/rejected": 0.25390625, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 12.855522500267961, |
|
"learning_rate": 4.275321063174936e-07, |
|
"logits/chosen": -0.6015625, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -1656.0, |
|
"logps/rejected": -1456.0, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.39453125, |
|
"rewards/margins": 0.09619140625, |
|
"rewards/rejected": 0.298828125, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 7.0575963436949465, |
|
"learning_rate": 4.24491066284384e-07, |
|
"logits/chosen": -0.73046875, |
|
"logits/rejected": -0.734375, |
|
"logps/chosen": -1664.0, |
|
"logps/rejected": -1544.0, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.373046875, |
|
"rewards/margins": 0.1484375, |
|
"rewards/rejected": 0.224609375, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 8.261526871746847, |
|
"learning_rate": 4.2139882699413613e-07, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -2240.0, |
|
"logps/rejected": -2144.0, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.6640625, |
|
"rewards/margins": 0.169921875, |
|
"rewards/rejected": 0.494140625, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 9.857235722928268, |
|
"learning_rate": 4.1825629577317024e-07, |
|
"logits/chosen": -0.55859375, |
|
"logits/rejected": -0.63671875, |
|
"logps/chosen": -2288.0, |
|
"logps/rejected": -1960.0, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.5859375, |
|
"rewards/margins": 0.1455078125, |
|
"rewards/rejected": 0.439453125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 9.152797505715435, |
|
"learning_rate": 4.1506439470459056e-07, |
|
"logits/chosen": -0.51171875, |
|
"logits/rejected": -0.609375, |
|
"logps/chosen": -2496.0, |
|
"logps/rejected": -2208.0, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.58203125, |
|
"rewards/margins": 0.166015625, |
|
"rewards/rejected": 0.416015625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -0.62890625, |
|
"eval_logits/rejected": -0.6875, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2032.0, |
|
"eval_loss": 0.6448671817779541, |
|
"eval_rewards/accuracies": 0.601190447807312, |
|
"eval_rewards/chosen": 0.55078125, |
|
"eval_rewards/margins": 0.1884765625, |
|
"eval_rewards/rejected": 0.36328125, |
|
"eval_runtime": 90.547, |
|
"eval_samples_per_second": 22.088, |
|
"eval_steps_per_second": 0.464, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 9.410823784289315, |
|
"learning_rate": 4.1182406035762684e-07, |
|
"logits/chosen": -0.494140625, |
|
"logits/rejected": -0.5234375, |
|
"logps/chosen": -2288.0, |
|
"logps/rejected": -2016.0, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.44140625, |
|
"rewards/margins": 0.1240234375, |
|
"rewards/rejected": 0.31640625, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 9.75600415982287, |
|
"learning_rate": 4.085362435128262e-07, |
|
"logits/chosen": -0.5390625, |
|
"logits/rejected": -0.68359375, |
|
"logps/chosen": -2768.0, |
|
"logps/rejected": -2224.0, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.6328125, |
|
"rewards/margins": 0.2470703125, |
|
"rewards/rejected": 0.38671875, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.3325000524472, |
|
"learning_rate": 4.0520190888307413e-07, |
|
"logits/chosen": -0.61328125, |
|
"logits/rejected": -0.66796875, |
|
"logps/chosen": -2544.0, |
|
"logps/rejected": -2304.0, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.63671875, |
|
"rewards/margins": 0.232421875, |
|
"rewards/rejected": 0.40625, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 10.182221180599024, |
|
"learning_rate": 4.0182203483052825e-07, |
|
"logits/chosen": -0.546875, |
|
"logits/rejected": -0.68359375, |
|
"logps/chosen": -2864.0, |
|
"logps/rejected": -2288.0, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.7265625, |
|
"rewards/margins": 0.248046875, |
|
"rewards/rejected": 0.4765625, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.856122064768362, |
|
"learning_rate": 3.983976130795467e-07, |
|
"logits/chosen": -0.51953125, |
|
"logits/rejected": -0.61328125, |
|
"logps/chosen": -2576.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.71484375, |
|
"rewards/margins": 0.275390625, |
|
"rewards/rejected": 0.439453125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 8.363791551838782, |
|
"learning_rate": 3.949296484256959e-07, |
|
"logits/chosen": -0.5859375, |
|
"logits/rejected": -0.64453125, |
|
"logps/chosen": -2128.0, |
|
"logps/rejected": -2008.0, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.490234375, |
|
"rewards/margins": 0.0625, |
|
"rewards/rejected": 0.427734375, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 9.779529809569974, |
|
"learning_rate": 3.9141915844092285e-07, |
|
"logits/chosen": -0.546875, |
|
"logits/rejected": -0.6796875, |
|
"logps/chosen": -2208.0, |
|
"logps/rejected": -1912.0, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.55078125, |
|
"rewards/margins": 0.23046875, |
|
"rewards/rejected": 0.3203125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 8.698999015594477, |
|
"learning_rate": 3.8786717317497875e-07, |
|
"logits/chosen": -0.4921875, |
|
"logits/rejected": -0.609375, |
|
"logps/chosen": -2432.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.6484375, |
|
"rewards/margins": 0.353515625, |
|
"rewards/rejected": 0.296875, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 8.385397043034576, |
|
"learning_rate": 3.842747348531813e-07, |
|
"logits/chosen": -0.53515625, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2192.0, |
|
"logps/rejected": -1880.0, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": 0.578125, |
|
"rewards/margins": 0.23828125, |
|
"rewards/rejected": 0.337890625, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 8.23475490511833, |
|
"learning_rate": 3.806428975706042e-07, |
|
"logits/chosen": -0.62890625, |
|
"logits/rejected": -0.65625, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.62890625, |
|
"rewards/margins": 0.2119140625, |
|
"rewards/rejected": 0.41796875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -0.640625, |
|
"eval_logits/rejected": -0.6953125, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6421015858650208, |
|
"eval_rewards/accuracies": 0.5773809552192688, |
|
"eval_rewards/chosen": 0.55859375, |
|
"eval_rewards/margins": 0.1708984375, |
|
"eval_rewards/rejected": 0.38671875, |
|
"eval_runtime": 86.7441, |
|
"eval_samples_per_second": 23.056, |
|
"eval_steps_per_second": 0.484, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.096444731037868, |
|
"learning_rate": 3.769727269827843e-07, |
|
"logits/chosen": -0.51171875, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -1992.0, |
|
"logps/rejected": -1672.0, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.337890625, |
|
"rewards/margins": 0.0634765625, |
|
"rewards/rejected": 0.275390625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.063205334817118, |
|
"learning_rate": 3.7326529999303633e-07, |
|
"logits/chosen": -0.55859375, |
|
"logits/rejected": -0.6484375, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.56640625, |
|
"rewards/margins": 0.1396484375, |
|
"rewards/rejected": 0.42578125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 8.96580135461996, |
|
"learning_rate": 3.6952170443646737e-07, |
|
"logits/chosen": -0.51171875, |
|
"logits/rejected": -0.63671875, |
|
"logps/chosen": -2752.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.73828125, |
|
"rewards/margins": 0.248046875, |
|
"rewards/rejected": 0.4921875, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 10.61500715901887, |
|
"learning_rate": 3.6574303876078366e-07, |
|
"logits/chosen": -0.5234375, |
|
"logits/rejected": -0.6640625, |
|
"logps/chosen": -2384.0, |
|
"logps/rejected": -1816.0, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.734375, |
|
"rewards/margins": 0.30859375, |
|
"rewards/rejected": 0.42578125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.368580291250769, |
|
"learning_rate": 3.619304117039835e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.546875, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.440000057220459, |
|
"rewards/chosen": 0.75, |
|
"rewards/margins": 0.12353515625, |
|
"rewards/rejected": 0.625, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.994507159815261, |
|
"learning_rate": 3.5808494196903117e-07, |
|
"logits/chosen": -0.55078125, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.87109375, |
|
"rewards/margins": 0.400390625, |
|
"rewards/rejected": 0.47265625, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.69645008304575, |
|
"learning_rate": 3.542077578956057e-07, |
|
"logits/chosen": -0.51953125, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -2416.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.7421875, |
|
"rewards/margins": 0.21484375, |
|
"rewards/rejected": 0.52734375, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.41242543011292, |
|
"learning_rate": 3.5029999712902387e-07, |
|
"logits/chosen": -0.45703125, |
|
"logits/rejected": -0.53125, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2352.0, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.81640625, |
|
"rewards/margins": 0.15234375, |
|
"rewards/rejected": 0.6640625, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.750038189045204, |
|
"learning_rate": 3.463628062864312e-07, |
|
"logits/chosen": -0.470703125, |
|
"logits/rejected": -0.58984375, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.6599999666213989, |
|
"rewards/chosen": 0.8828125, |
|
"rewards/margins": 0.28515625, |
|
"rewards/rejected": 0.59765625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.569717070191467, |
|
"learning_rate": 3.4239734062036067e-07, |
|
"logits/chosen": -0.5, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -2224.0, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.4599999785423279, |
|
"rewards/chosen": 0.625, |
|
"rewards/margins": 0.06591796875, |
|
"rewards/rejected": 0.55859375, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -0.61328125, |
|
"eval_logits/rejected": -0.671875, |
|
"eval_logps/chosen": -2304.0, |
|
"eval_logps/rejected": -2016.0, |
|
"eval_loss": 0.6398203372955322, |
|
"eval_rewards/accuracies": 0.5684523582458496, |
|
"eval_rewards/chosen": 0.7109375, |
|
"eval_rewards/margins": 0.20703125, |
|
"eval_rewards/rejected": 0.50390625, |
|
"eval_runtime": 86.5756, |
|
"eval_samples_per_second": 23.101, |
|
"eval_steps_per_second": 0.485, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.632582600123046, |
|
"learning_rate": 3.3840476367975874e-07, |
|
"logits/chosen": -0.515625, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2432.0, |
|
"logps/rejected": -1968.0, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.78515625, |
|
"rewards/margins": 0.328125, |
|
"rewards/rejected": 0.45703125, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.506506212272358, |
|
"learning_rate": 3.343862469685755e-07, |
|
"logits/chosen": -0.6015625, |
|
"logits/rejected": -0.68359375, |
|
"logps/chosen": -1960.0, |
|
"logps/rejected": -1776.0, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.40000003576278687, |
|
"rewards/chosen": 0.53125, |
|
"rewards/margins": 0.045166015625, |
|
"rewards/rejected": 0.486328125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 11.75407958457082, |
|
"learning_rate": 3.3034296960202195e-07, |
|
"logits/chosen": -0.45703125, |
|
"logits/rejected": -0.5703125, |
|
"logps/chosen": -2656.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.7734375, |
|
"rewards/margins": 0.232421875, |
|
"rewards/rejected": 0.54296875, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.239692706862764, |
|
"learning_rate": 3.2627611796059283e-07, |
|
"logits/chosen": -0.53515625, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -2736.0, |
|
"logps/rejected": -2400.0, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.91796875, |
|
"rewards/margins": 0.2333984375, |
|
"rewards/rejected": 0.68359375, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 8.160298452954589, |
|
"learning_rate": 3.221868853419587e-07, |
|
"logits/chosen": -0.46875, |
|
"logits/rejected": -0.55078125, |
|
"logps/chosen": -2768.0, |
|
"logps/rejected": -2272.0, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.98046875, |
|
"rewards/margins": 0.302734375, |
|
"rewards/rejected": 0.67578125, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 9.590783642097492, |
|
"learning_rate": 3.1807647161082797e-07, |
|
"logits/chosen": -0.47265625, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -2656.0, |
|
"logps/rejected": -2336.0, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 0.859375, |
|
"rewards/margins": 0.16796875, |
|
"rewards/rejected": 0.69140625, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 7.99282237541914, |
|
"learning_rate": 3.139460828468815e-07, |
|
"logits/chosen": -0.44140625, |
|
"logits/rejected": -0.494140625, |
|
"logps/chosen": -1976.0, |
|
"logps/rejected": -1776.0, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.6328125, |
|
"rewards/margins": 0.166015625, |
|
"rewards/rejected": 0.466796875, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 12.319935997541707, |
|
"learning_rate": 3.097969309908847e-07, |
|
"logits/chosen": -0.60546875, |
|
"logits/rejected": -0.5390625, |
|
"logps/chosen": -1728.0, |
|
"logps/rejected": -2024.0, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5234375, |
|
"rewards/margins": 0.03857421875, |
|
"rewards/rejected": 0.486328125, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 9.488759901702995, |
|
"learning_rate": 3.056302334890786e-07, |
|
"logits/chosen": -0.53515625, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2400.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.83203125, |
|
"rewards/margins": 0.1953125, |
|
"rewards/rejected": 0.63671875, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.899477826304579, |
|
"learning_rate": 3.01447212935957e-07, |
|
"logits/chosen": -0.5234375, |
|
"logits/rejected": -0.609375, |
|
"logps/chosen": -2432.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.8203125, |
|
"rewards/margins": 0.27734375, |
|
"rewards/rejected": 0.54296875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/chosen": -0.6015625, |
|
"eval_logits/rejected": -0.66015625, |
|
"eval_logps/chosen": -2304.0, |
|
"eval_logps/rejected": -2008.0, |
|
"eval_loss": 0.6406640410423279, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": 0.7734375, |
|
"eval_rewards/margins": 0.201171875, |
|
"eval_rewards/rejected": 0.57421875, |
|
"eval_runtime": 86.4608, |
|
"eval_samples_per_second": 23.132, |
|
"eval_steps_per_second": 0.486, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.55209914949281, |
|
"learning_rate": 2.9724909671553134e-07, |
|
"logits/chosen": -0.56640625, |
|
"logits/rejected": -0.6015625, |
|
"logps/chosen": -2040.0, |
|
"logps/rejected": -1952.0, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.5399999618530273, |
|
"rewards/chosen": 0.66796875, |
|
"rewards/margins": 0.11328125, |
|
"rewards/rejected": 0.5546875, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 7.990632543907024, |
|
"learning_rate": 2.930371166411915e-07, |
|
"logits/chosen": -0.458984375, |
|
"logits/rejected": -0.53515625, |
|
"logps/chosen": -2912.0, |
|
"logps/rejected": -2672.0, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.6200000643730164, |
|
"rewards/chosen": 0.94921875, |
|
"rewards/margins": 0.173828125, |
|
"rewards/rejected": 0.77734375, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 8.428930236764653, |
|
"learning_rate": 2.888125085942664e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -1992.0, |
|
"logps/rejected": -1808.0, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.6200000643730164, |
|
"rewards/chosen": 0.62109375, |
|
"rewards/margins": 0.1728515625, |
|
"rewards/rejected": 0.447265625, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.162488652610268, |
|
"learning_rate": 2.845765121613912e-07, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -2368.0, |
|
"logps/rejected": -2048.0, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.7265625, |
|
"rewards/margins": 0.1865234375, |
|
"rewards/rejected": 0.54296875, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.430844898047478, |
|
"learning_rate": 2.803303702707869e-07, |
|
"logits/chosen": -0.56640625, |
|
"logits/rejected": -0.703125, |
|
"logps/chosen": -2240.0, |
|
"logps/rejected": -1752.0, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.578125, |
|
"rewards/margins": 0.224609375, |
|
"rewards/rejected": 0.3515625, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.51154519002261, |
|
"learning_rate": 2.760753288275598e-07, |
|
"logits/chosen": -0.58203125, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2544.0, |
|
"logps/rejected": -2320.0, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.74609375, |
|
"rewards/margins": 0.259765625, |
|
"rewards/rejected": 0.484375, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 7.047353993767992, |
|
"learning_rate": 2.718126363481276e-07, |
|
"logits/chosen": -0.59375, |
|
"logits/rejected": -0.78125, |
|
"logps/chosen": -2720.0, |
|
"logps/rejected": -1832.0, |
|
"loss": 0.6346, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.7109375, |
|
"rewards/margins": 0.341796875, |
|
"rewards/rejected": 0.37109375, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 8.420763714981168, |
|
"learning_rate": 2.675435435938788e-07, |
|
"logits/chosen": -0.59765625, |
|
"logits/rejected": -0.56640625, |
|
"logps/chosen": -1784.0, |
|
"logps/rejected": -1880.0, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.47999995946884155, |
|
"rewards/chosen": 0.39453125, |
|
"rewards/margins": 0.039306640625, |
|
"rewards/rejected": 0.353515625, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 8.77849576592234, |
|
"learning_rate": 2.63269303204174e-07, |
|
"logits/chosen": -0.48046875, |
|
"logits/rejected": -0.5546875, |
|
"logps/chosen": -2656.0, |
|
"logps/rejected": -2352.0, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6640625, |
|
"rewards/margins": 0.275390625, |
|
"rewards/rejected": 0.38671875, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 8.92354118074767, |
|
"learning_rate": 2.5899116932879534e-07, |
|
"logits/chosen": -0.4765625, |
|
"logits/rejected": -0.5703125, |
|
"logps/chosen": -2496.0, |
|
"logps/rejected": -2144.0, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.58203125, |
|
"rewards/margins": 0.2353515625, |
|
"rewards/rejected": 0.345703125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_logits/chosen": -0.609375, |
|
"eval_logits/rejected": -0.66796875, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2032.0, |
|
"eval_loss": 0.638671875, |
|
"eval_rewards/accuracies": 0.5892857313156128, |
|
"eval_rewards/chosen": 0.5390625, |
|
"eval_rewards/margins": 0.1806640625, |
|
"eval_rewards/rejected": 0.35546875, |
|
"eval_runtime": 86.566, |
|
"eval_samples_per_second": 23.104, |
|
"eval_steps_per_second": 0.485, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 8.680098424161624, |
|
"learning_rate": 2.5471039725995345e-07, |
|
"logits/chosen": -0.5078125, |
|
"logits/rejected": -0.5234375, |
|
"logps/chosen": -2224.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.58984375, |
|
"rewards/margins": 0.259765625, |
|
"rewards/rejected": 0.330078125, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 12.234325062863915, |
|
"learning_rate": 2.504282430639594e-07, |
|
"logits/chosen": -0.466796875, |
|
"logits/rejected": -0.59765625, |
|
"logps/chosen": -2560.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.56640625, |
|
"rewards/margins": 0.197265625, |
|
"rewards/rejected": 0.3671875, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 9.71309302276875, |
|
"learning_rate": 2.4614596321266836e-07, |
|
"logits/chosen": -0.60546875, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -2272.0, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.45703125, |
|
"rewards/margins": 0.12451171875, |
|
"rewards/rejected": 0.33203125, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 9.646759761306011, |
|
"learning_rate": 2.418648142148056e-07, |
|
"logits/chosen": -0.50390625, |
|
"logits/rejected": -0.59375, |
|
"logps/chosen": -2784.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.6640625, |
|
"rewards/margins": 0.21875, |
|
"rewards/rejected": 0.443359375, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.518198214709173, |
|
"learning_rate": 2.375860522472805e-07, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.68359375, |
|
"logps/chosen": -2064.0, |
|
"logps/rejected": -1624.0, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.435546875, |
|
"rewards/margins": 0.19140625, |
|
"rewards/rejected": 0.2451171875, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 9.732180854785232, |
|
"learning_rate": 2.3331093278659906e-07, |
|
"logits/chosen": -0.5625, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -1992.0, |
|
"logps/rejected": -1888.0, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.4599999785423279, |
|
"rewards/chosen": 0.53515625, |
|
"rewards/margins": 0.212890625, |
|
"rewards/rejected": 0.322265625, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 8.25240239846351, |
|
"learning_rate": 2.2904071024048089e-07, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.515625, |
|
"logps/chosen": -2016.0, |
|
"logps/rejected": -2016.0, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 0.5625, |
|
"rewards/margins": 0.1025390625, |
|
"rewards/rejected": 0.4609375, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 9.230235569126055, |
|
"learning_rate": 2.247766375797906e-07, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2224.0, |
|
"logps/rejected": -1864.0, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.62109375, |
|
"rewards/margins": 0.205078125, |
|
"rewards/rejected": 0.416015625, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 10.347179580129167, |
|
"learning_rate": 2.2051996597089026e-07, |
|
"logits/chosen": -0.49609375, |
|
"logits/rejected": -0.55078125, |
|
"logps/chosen": -2096.0, |
|
"logps/rejected": -1944.0, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.421875, |
|
"rewards/margins": 0.0191650390625, |
|
"rewards/rejected": 0.40234375, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.338817367824801, |
|
"learning_rate": 2.1627194440852142e-07, |
|
"logits/chosen": -0.478515625, |
|
"logits/rejected": -0.59765625, |
|
"logps/chosen": -2592.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.78515625, |
|
"rewards/margins": 0.265625, |
|
"rewards/rejected": 0.51953125, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": -0.59765625, |
|
"eval_logits/rejected": -0.65234375, |
|
"eval_logps/chosen": -2304.0, |
|
"eval_logps/rejected": -2016.0, |
|
"eval_loss": 0.6379843950271606, |
|
"eval_rewards/accuracies": 0.6041666865348816, |
|
"eval_rewards/chosen": 0.6953125, |
|
"eval_rewards/margins": 0.203125, |
|
"eval_rewards/rejected": 0.4921875, |
|
"eval_runtime": 86.5496, |
|
"eval_samples_per_second": 23.108, |
|
"eval_steps_per_second": 0.485, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.049536436218999, |
|
"learning_rate": 2.120338193493248e-07, |
|
"logits/chosen": -0.53515625, |
|
"logits/rejected": -0.59765625, |
|
"logps/chosen": -2624.0, |
|
"logps/rejected": -2144.0, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.7421875, |
|
"rewards/margins": 0.2890625, |
|
"rewards/rejected": 0.453125, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.8225110204324615, |
|
"learning_rate": 2.0780683434610413e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.59765625, |
|
"logps/chosen": -2416.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.734375, |
|
"rewards/margins": 0.2099609375, |
|
"rewards/rejected": 0.52734375, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.18256822065958, |
|
"learning_rate": 2.0359222968294202e-07, |
|
"logits/chosen": -0.5703125, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2192.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.5703125, |
|
"rewards/margins": 0.208984375, |
|
"rewards/rejected": 0.359375, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 8.086963793402472, |
|
"learning_rate": 1.993912420112756e-07, |
|
"logits/chosen": -0.62109375, |
|
"logits/rejected": -0.6015625, |
|
"logps/chosen": -1992.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.62890625, |
|
"rewards/margins": 0.09765625, |
|
"rewards/rejected": 0.53125, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 8.024306896782054, |
|
"learning_rate": 1.9520510398703766e-07, |
|
"logits/chosen": -0.51953125, |
|
"logits/rejected": -0.5625, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -2320.0, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.70703125, |
|
"rewards/margins": 0.2353515625, |
|
"rewards/rejected": 0.47265625, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.937034420935973, |
|
"learning_rate": 1.9103504390896944e-07, |
|
"logits/chosen": -0.49609375, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -2464.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.58984375, |
|
"rewards/margins": 0.107421875, |
|
"rewards/rejected": 0.482421875, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.056595402865112, |
|
"learning_rate": 1.8688228535821348e-07, |
|
"logits/chosen": -0.53125, |
|
"logits/rejected": -0.50390625, |
|
"logps/chosen": -1936.0, |
|
"logps/rejected": -2040.0, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.440000057220459, |
|
"rewards/chosen": 0.44921875, |
|
"rewards/margins": 0.1337890625, |
|
"rewards/rejected": 0.314453125, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 8.971832197264595, |
|
"learning_rate": 1.8274804683928913e-07, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.55859375, |
|
"logps/chosen": -2048.0, |
|
"logps/rejected": -2160.0, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.48000001907348633, |
|
"rewards/chosen": 0.498046875, |
|
"rewards/margins": 0.054443359375, |
|
"rewards/rejected": 0.4453125, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 9.440719762857112, |
|
"learning_rate": 1.786335414225588e-07, |
|
"logits/chosen": -0.5625, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2096.0, |
|
"logps/rejected": -1992.0, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.400390625, |
|
"rewards/margins": 0.19140625, |
|
"rewards/rejected": 0.208984375, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 12.661374748922718, |
|
"learning_rate": 1.745399763882881e-07, |
|
"logits/chosen": -0.5, |
|
"logits/rejected": -0.57421875, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5390625, |
|
"rewards/margins": 0.19921875, |
|
"rewards/rejected": 0.33984375, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -0.63671875, |
|
"eval_logits/rejected": -0.69140625, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2032.0, |
|
"eval_loss": 0.6395859122276306, |
|
"eval_rewards/accuracies": 0.586309552192688, |
|
"eval_rewards/chosen": 0.55859375, |
|
"eval_rewards/margins": 0.1962890625, |
|
"eval_rewards/rejected": 0.361328125, |
|
"eval_runtime": 86.6403, |
|
"eval_samples_per_second": 23.084, |
|
"eval_steps_per_second": 0.485, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 8.541591527739216, |
|
"learning_rate": 1.704685528724046e-07, |
|
"logits/chosen": -0.56640625, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2496.0, |
|
"logps/rejected": -2304.0, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.48000001907348633, |
|
"rewards/chosen": 0.62890625, |
|
"rewards/margins": 0.0703125, |
|
"rewards/rejected": 0.55859375, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 9.240636114996493, |
|
"learning_rate": 1.664204655140607e-07, |
|
"logits/chosen": -0.5625, |
|
"logits/rejected": -0.61328125, |
|
"logps/chosen": -2272.0, |
|
"logps/rejected": -1944.0, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.671875, |
|
"rewards/margins": 0.298828125, |
|
"rewards/rejected": 0.375, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 9.2814996839246, |
|
"learning_rate": 1.6239690210510166e-07, |
|
"logits/chosen": -0.58984375, |
|
"logits/rejected": -0.65625, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2368.0, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.59765625, |
|
"rewards/margins": 0.07421875, |
|
"rewards/rejected": 0.5234375, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.5279255603123, |
|
"learning_rate": 1.5839904324154273e-07, |
|
"logits/chosen": -0.34765625, |
|
"logits/rejected": -0.455078125, |
|
"logps/chosen": -2736.0, |
|
"logps/rejected": -2256.0, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": 0.71875, |
|
"rewards/margins": 0.2255859375, |
|
"rewards/rejected": 0.49609375, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 11.057065560114015, |
|
"learning_rate": 1.544280619771588e-07, |
|
"logits/chosen": -0.44140625, |
|
"logits/rejected": -0.515625, |
|
"logps/chosen": -2448.0, |
|
"logps/rejected": -2160.0, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.7109375, |
|
"rewards/margins": 0.2080078125, |
|
"rewards/rejected": 0.50390625, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.245099893740868, |
|
"learning_rate": 1.5048512347928564e-07, |
|
"logits/chosen": -0.474609375, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -2800.0, |
|
"logps/rejected": -2480.0, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.74609375, |
|
"rewards/margins": 0.2138671875, |
|
"rewards/rejected": 0.53125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.054284348259795, |
|
"learning_rate": 1.4657138468693648e-07, |
|
"logits/chosen": -0.57421875, |
|
"logits/rejected": -0.671875, |
|
"logps/chosen": -2224.0, |
|
"logps/rejected": -1832.0, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.59765625, |
|
"rewards/margins": 0.33203125, |
|
"rewards/rejected": 0.265625, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.967356728455812, |
|
"learning_rate": 1.426879939713322e-07, |
|
"logits/chosen": -0.58984375, |
|
"logits/rejected": -0.71484375, |
|
"logps/chosen": -2464.0, |
|
"logps/rejected": -1864.0, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.76171875, |
|
"rewards/margins": 0.3515625, |
|
"rewards/rejected": 0.408203125, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.462317796783486, |
|
"learning_rate": 1.3883609079894532e-07, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.51171875, |
|
"logps/chosen": -1848.0, |
|
"logps/rejected": -1936.0, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.5859375, |
|
"rewards/margins": 0.07177734375, |
|
"rewards/rejected": 0.515625, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 12.275639958330698, |
|
"learning_rate": 1.350168053971577e-07, |
|
"logits/chosen": -0.51953125, |
|
"logits/rejected": -0.69140625, |
|
"logps/chosen": -2528.0, |
|
"logps/rejected": -1856.0, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.75, |
|
"rewards/margins": 0.3671875, |
|
"rewards/rejected": 0.3828125, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_logits/chosen": -0.6171875, |
|
"eval_logits/rejected": -0.67578125, |
|
"eval_logps/chosen": -2304.0, |
|
"eval_logps/rejected": -2016.0, |
|
"eval_loss": 0.6359687447547913, |
|
"eval_rewards/accuracies": 0.5922619104385376, |
|
"eval_rewards/chosen": 0.69140625, |
|
"eval_rewards/margins": 0.220703125, |
|
"eval_rewards/rejected": 0.47265625, |
|
"eval_runtime": 86.3381, |
|
"eval_samples_per_second": 23.165, |
|
"eval_steps_per_second": 0.486, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 10.6189969938929, |
|
"learning_rate": 1.312312584226284e-07, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -2368.0, |
|
"logps/rejected": -2144.0, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.48000001907348633, |
|
"rewards/chosen": 0.77734375, |
|
"rewards/margins": 0.271484375, |
|
"rewards/rejected": 0.50390625, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 7.349435818799782, |
|
"learning_rate": 1.2748056063246994e-07, |
|
"logits/chosen": -0.486328125, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2416.0, |
|
"logps/rejected": -1912.0, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.73828125, |
|
"rewards/margins": 0.25, |
|
"rewards/rejected": 0.486328125, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.740153583722698, |
|
"learning_rate": 1.2376581255832966e-07, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.6640625, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2080.0, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": 0.78515625, |
|
"rewards/margins": 0.294921875, |
|
"rewards/rejected": 0.48828125, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 8.753751751983152, |
|
"learning_rate": 1.2008810418347093e-07, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -2080.0, |
|
"logps/rejected": -1880.0, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": 0.57421875, |
|
"rewards/margins": 0.2021484375, |
|
"rewards/rejected": 0.373046875, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 8.57529838239571, |
|
"learning_rate": 1.1644851462294956e-07, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.65234375, |
|
"logps/chosen": -2032.0, |
|
"logps/rejected": -1632.0, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5078125, |
|
"rewards/margins": 0.1904296875, |
|
"rewards/rejected": 0.318359375, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 8.301288314698684, |
|
"learning_rate": 1.128481118069799e-07, |
|
"logits/chosen": -0.484375, |
|
"logits/rejected": -0.59375, |
|
"logps/chosen": -2768.0, |
|
"logps/rejected": -2208.0, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.7400001287460327, |
|
"rewards/chosen": 0.796875, |
|
"rewards/margins": 0.2421875, |
|
"rewards/rejected": 0.5546875, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 9.889148738488851, |
|
"learning_rate": 1.0928795216758149e-07, |
|
"logits/chosen": -0.55078125, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2176.0, |
|
"logps/rejected": -1832.0, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.52734375, |
|
"rewards/margins": 0.193359375, |
|
"rewards/rejected": 0.3359375, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.51045156043377, |
|
"learning_rate": 1.0576908032860088e-07, |
|
"logits/chosen": -0.62890625, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2208.0, |
|
"logps/rejected": -2144.0, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.66796875, |
|
"rewards/margins": 0.24609375, |
|
"rewards/rejected": 0.421875, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 7.27263720622099, |
|
"learning_rate": 1.0229252879919714e-07, |
|
"logits/chosen": -0.5859375, |
|
"logits/rejected": -0.60546875, |
|
"logps/chosen": -1960.0, |
|
"logps/rejected": -1784.0, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.5399999618530273, |
|
"rewards/chosen": 0.48828125, |
|
"rewards/margins": 0.10107421875, |
|
"rewards/rejected": 0.38671875, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 9.236088100797234, |
|
"learning_rate": 9.88593176708827e-08, |
|
"logits/chosen": -0.4765625, |
|
"logits/rejected": -0.59765625, |
|
"logps/chosen": -2384.0, |
|
"logps/rejected": -1888.0, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": 0.7265625, |
|
"rewards/margins": 0.44921875, |
|
"rewards/rejected": 0.279296875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/chosen": -0.609375, |
|
"eval_logits/rejected": -0.6640625, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6374765634536743, |
|
"eval_rewards/accuracies": 0.5892857313156128, |
|
"eval_rewards/chosen": 0.625, |
|
"eval_rewards/margins": 0.2099609375, |
|
"eval_rewards/rejected": 0.4140625, |
|
"eval_runtime": 86.4172, |
|
"eval_samples_per_second": 23.144, |
|
"eval_steps_per_second": 0.486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 8.84592951975432, |
|
"learning_rate": 9.547045431820749e-08, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.5546875, |
|
"logps/chosen": -2384.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.68359375, |
|
"rewards/margins": 0.11669921875, |
|
"rewards/rejected": 0.56640625, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.84073742236179, |
|
"learning_rate": 9.212693310317479e-08, |
|
"logits/chosen": -0.5703125, |
|
"logits/rejected": -0.58984375, |
|
"logps/chosen": -2176.0, |
|
"logps/rejected": -2048.0, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.5625, |
|
"rewards/margins": 0.1689453125, |
|
"rewards/rejected": 0.392578125, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 6.8580337819405335, |
|
"learning_rate": 8.882973508347449e-08, |
|
"logits/chosen": -0.546875, |
|
"logits/rejected": -0.65234375, |
|
"logps/chosen": -1968.0, |
|
"logps/rejected": -1648.0, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.54296875, |
|
"rewards/margins": 0.2451171875, |
|
"rewards/rejected": 0.30078125, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 10.016816397566908, |
|
"learning_rate": 8.557982772462138e-08, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.60546875, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -1976.0, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.6953125, |
|
"rewards/margins": 0.28125, |
|
"rewards/rejected": 0.4140625, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 11.142392440409802, |
|
"learning_rate": 8.237816461608049e-08, |
|
"logits/chosen": -0.546875, |
|
"logits/rejected": -0.53125, |
|
"logps/chosen": -2048.0, |
|
"logps/rejected": -1928.0, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.515625, |
|
"rewards/margins": 0.1630859375, |
|
"rewards/rejected": 0.3515625, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 10.436188393180485, |
|
"learning_rate": 7.922568519146425e-08, |
|
"logits/chosen": -0.58984375, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -2432.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.65625, |
|
"rewards/margins": 0.14453125, |
|
"rewards/rejected": 0.515625, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.734002863477851, |
|
"learning_rate": 7.612331445288389e-08, |
|
"logits/chosen": -0.423828125, |
|
"logits/rejected": -0.494140625, |
|
"logps/chosen": -2448.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.69140625, |
|
"rewards/margins": 0.2001953125, |
|
"rewards/rejected": 0.490234375, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 10.892280756623634, |
|
"learning_rate": 7.307196269953444e-08, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.53515625, |
|
"logps/chosen": -2064.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.478515625, |
|
"rewards/margins": 0.09716796875, |
|
"rewards/rejected": 0.380859375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 11.89094090289422, |
|
"learning_rate": 7.007252526059446e-08, |
|
"logits/chosen": -0.494140625, |
|
"logits/rejected": -0.59375, |
|
"logps/chosen": -2624.0, |
|
"logps/rejected": -2160.0, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.62109375, |
|
"rewards/margins": 0.07421875, |
|
"rewards/rejected": 0.546875, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 8.353720135556827, |
|
"learning_rate": 6.712588223251809e-08, |
|
"logits/chosen": -0.57421875, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2480.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.6599999666213989, |
|
"rewards/chosen": 0.66015625, |
|
"rewards/margins": 0.3359375, |
|
"rewards/rejected": 0.32421875, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -0.625, |
|
"eval_logits/rejected": -0.6796875, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2032.0, |
|
"eval_loss": 0.6382187604904175, |
|
"eval_rewards/accuracies": 0.6041666865348816, |
|
"eval_rewards/chosen": 0.59765625, |
|
"eval_rewards/margins": 0.205078125, |
|
"eval_rewards/rejected": 0.392578125, |
|
"eval_runtime": 86.1215, |
|
"eval_samples_per_second": 23.223, |
|
"eval_steps_per_second": 0.488, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.929495280909341, |
|
"learning_rate": 6.423289822079644e-08, |
|
"logits/chosen": -0.482421875, |
|
"logits/rejected": -0.5234375, |
|
"logps/chosen": -2464.0, |
|
"logps/rejected": -2272.0, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.73828125, |
|
"rewards/margins": 0.28515625, |
|
"rewards/rejected": 0.453125, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 7.970603963526829, |
|
"learning_rate": 6.139442208626517e-08, |
|
"logits/chosen": -0.58203125, |
|
"logits/rejected": -0.66015625, |
|
"logps/chosen": -2544.0, |
|
"logps/rejected": -2256.0, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.60546875, |
|
"rewards/margins": 0.10693359375, |
|
"rewards/rejected": 0.5, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.712927942409274, |
|
"learning_rate": 5.8611286696030795e-08, |
|
"logits/chosen": -0.546875, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2800.0, |
|
"logps/rejected": -2480.0, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.81640625, |
|
"rewards/margins": 0.3125, |
|
"rewards/rejected": 0.50390625, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.214503728500627, |
|
"learning_rate": 5.5884308679090525e-08, |
|
"logits/chosen": -0.5703125, |
|
"logits/rejected": -0.60546875, |
|
"logps/chosen": -2208.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.4599999785423279, |
|
"rewards/chosen": 0.40625, |
|
"rewards/margins": 0.11474609375, |
|
"rewards/rejected": 0.291015625, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.126286436494889, |
|
"learning_rate": 5.321428818671672e-08, |
|
"logits/chosen": -0.52734375, |
|
"logits/rejected": -0.609375, |
|
"logps/chosen": -2128.0, |
|
"logps/rejected": -1752.0, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.443359375, |
|
"rewards/margins": 0.166015625, |
|
"rewards/rejected": 0.27734375, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 9.727164383599991, |
|
"learning_rate": 5.060200865767605e-08, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.6171875, |
|
"logps/chosen": -2336.0, |
|
"logps/rejected": -2040.0, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.458984375, |
|
"rewards/margins": 0.2470703125, |
|
"rewards/rejected": 0.2119140625, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 8.55962886651413, |
|
"learning_rate": 4.804823658835233e-08, |
|
"logits/chosen": -0.5859375, |
|
"logits/rejected": -0.671875, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -1920.0, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.51171875, |
|
"rewards/margins": 0.14453125, |
|
"rewards/rejected": 0.369140625, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 13.3092741964271, |
|
"learning_rate": 4.555372130784102e-08, |
|
"logits/chosen": -0.65625, |
|
"logits/rejected": -0.71875, |
|
"logps/chosen": -1912.0, |
|
"logps/rejected": -1752.0, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.376953125, |
|
"rewards/margins": 0.0341796875, |
|
"rewards/rejected": 0.34375, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.172767944806202, |
|
"learning_rate": 4.311919475808037e-08, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.609375, |
|
"logps/chosen": -2256.0, |
|
"logps/rejected": -2032.0, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.53125, |
|
"rewards/margins": 0.1796875, |
|
"rewards/rejected": 0.3515625, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.68637911065445, |
|
"learning_rate": 4.0745371279084976e-08, |
|
"logits/chosen": -0.5, |
|
"logits/rejected": -0.53515625, |
|
"logps/chosen": -2528.0, |
|
"logps/rejected": -2240.0, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.6484375, |
|
"rewards/margins": 0.189453125, |
|
"rewards/rejected": 0.458984375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -0.6171875, |
|
"eval_logits/rejected": -0.671875, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6374297142028809, |
|
"eval_rewards/accuracies": 0.5952380895614624, |
|
"eval_rewards/chosen": 0.59765625, |
|
"eval_rewards/margins": 0.2041015625, |
|
"eval_rewards/rejected": 0.392578125, |
|
"eval_runtime": 86.0796, |
|
"eval_samples_per_second": 23.234, |
|
"eval_steps_per_second": 0.488, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 12.620733225081068, |
|
"learning_rate": 3.843294739934369e-08, |
|
"logits/chosen": -0.515625, |
|
"logits/rejected": -0.53515625, |
|
"logps/chosen": -2176.0, |
|
"logps/rejected": -2208.0, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.6015625, |
|
"rewards/margins": 0.142578125, |
|
"rewards/rejected": 0.45703125, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 11.28123633647817, |
|
"learning_rate": 3.6182601631443596e-08, |
|
"logits/chosen": -0.5, |
|
"logits/rejected": -0.62890625, |
|
"logps/chosen": -2752.0, |
|
"logps/rejected": -2032.0, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.671875, |
|
"rewards/margins": 0.1640625, |
|
"rewards/rejected": 0.5078125, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 9.053407610350268, |
|
"learning_rate": 3.3994994272980944e-08, |
|
"logits/chosen": -0.50390625, |
|
"logits/rejected": -0.6171875, |
|
"logps/chosen": -2384.0, |
|
"logps/rejected": -1832.0, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.59375, |
|
"rewards/margins": 0.271484375, |
|
"rewards/rejected": 0.3203125, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 10.202733795570047, |
|
"learning_rate": 3.187076721281595e-08, |
|
"logits/chosen": -0.55078125, |
|
"logits/rejected": -0.6484375, |
|
"logps/chosen": -2080.0, |
|
"logps/rejected": -1744.0, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.474609375, |
|
"rewards/margins": 0.1572265625, |
|
"rewards/rejected": 0.31640625, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 9.310568396714823, |
|
"learning_rate": 2.9810543742729705e-08, |
|
"logits/chosen": -0.5390625, |
|
"logits/rejected": -0.56640625, |
|
"logps/chosen": -2304.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.6796875, |
|
"rewards/margins": 0.263671875, |
|
"rewards/rejected": 0.4140625, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 9.254692446674307, |
|
"learning_rate": 2.7814928374537334e-08, |
|
"logits/chosen": -0.48828125, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2272.0, |
|
"logps/rejected": -1696.0, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.51171875, |
|
"rewards/margins": 0.1708984375, |
|
"rewards/rejected": 0.33984375, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.485091349390559, |
|
"learning_rate": 2.5884506662711886e-08, |
|
"logits/chosen": -0.49609375, |
|
"logits/rejected": -0.62109375, |
|
"logps/chosen": -2576.0, |
|
"logps/rejected": -2008.0, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.68359375, |
|
"rewards/margins": 0.296875, |
|
"rewards/rejected": 0.388671875, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.583505828929572, |
|
"learning_rate": 2.4019845032570875e-08, |
|
"logits/chosen": -0.4921875, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2176.0, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.62890625, |
|
"rewards/margins": 0.1943359375, |
|
"rewards/rejected": 0.435546875, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.11330279692475, |
|
"learning_rate": 2.222149061407527e-08, |
|
"logits/chosen": -0.43359375, |
|
"logits/rejected": -0.5078125, |
|
"logps/chosen": -3072.0, |
|
"logps/rejected": -2704.0, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.8828125, |
|
"rewards/margins": 0.140625, |
|
"rewards/rejected": 0.7421875, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.263183451230402, |
|
"learning_rate": 2.0489971081290193e-08, |
|
"logits/chosen": -0.5703125, |
|
"logits/rejected": -0.58984375, |
|
"logps/chosen": -2208.0, |
|
"logps/rejected": -1936.0, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.62890625, |
|
"rewards/margins": 0.11181640625, |
|
"rewards/rejected": 0.515625, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -0.62890625, |
|
"eval_logits/rejected": -0.68359375, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6355390548706055, |
|
"eval_rewards/accuracies": 0.601190447807312, |
|
"eval_rewards/chosen": 0.609375, |
|
"eval_rewards/margins": 0.2119140625, |
|
"eval_rewards/rejected": 0.3984375, |
|
"eval_runtime": 86.081, |
|
"eval_samples_per_second": 23.234, |
|
"eval_steps_per_second": 0.488, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 7.814802788464052, |
|
"learning_rate": 1.882579449755495e-08, |
|
"logits/chosen": -0.515625, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2656.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.75, |
|
"rewards/margins": 0.29296875, |
|
"rewards/rejected": 0.45703125, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 9.220542164026453, |
|
"learning_rate": 1.7229449166406477e-08, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.58203125, |
|
"logps/chosen": -2496.0, |
|
"logps/rejected": -2256.0, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.63671875, |
|
"rewards/margins": 0.1083984375, |
|
"rewards/rejected": 0.52734375, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 11.528605527636161, |
|
"learning_rate": 1.5701403488301235e-08, |
|
"logits/chosen": -0.5546875, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2288.0, |
|
"logps/rejected": -2000.0, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.53125, |
|
"rewards/margins": 0.255859375, |
|
"rewards/rejected": 0.2734375, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 13.882026366198952, |
|
"learning_rate": 1.4242105823176837e-08, |
|
"logits/chosen": -0.671875, |
|
"logits/rejected": -0.7421875, |
|
"logps/chosen": -2096.0, |
|
"logps/rejected": -1752.0, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.515625, |
|
"rewards/margins": 0.19140625, |
|
"rewards/rejected": 0.32421875, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.608713807399866, |
|
"learning_rate": 1.285198435889398e-08, |
|
"logits/chosen": -0.484375, |
|
"logits/rejected": -0.56640625, |
|
"logps/chosen": -2480.0, |
|
"logps/rejected": -1984.0, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6015625, |
|
"rewards/margins": 0.2275390625, |
|
"rewards/rejected": 0.373046875, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.195872685886451, |
|
"learning_rate": 1.1531446985597604e-08, |
|
"logits/chosen": -0.671875, |
|
"logits/rejected": -0.5859375, |
|
"logps/chosen": -1664.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.447265625, |
|
"rewards/margins": -0.01806640625, |
|
"rewards/rejected": 0.46484375, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.06291940471119, |
|
"learning_rate": 1.0280881176033318e-08, |
|
"logits/chosen": -0.55859375, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2400.0, |
|
"logps/rejected": -1952.0, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.6328125, |
|
"rewards/margins": 0.232421875, |
|
"rewards/rejected": 0.40234375, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.690917680622107, |
|
"learning_rate": 9.100653871854963e-09, |
|
"logits/chosen": -0.55078125, |
|
"logits/rejected": -0.60546875, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2336.0, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.68359375, |
|
"rewards/margins": 0.31640625, |
|
"rewards/rejected": 0.3671875, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.698287956753982, |
|
"learning_rate": 7.991111375956539e-09, |
|
"logits/chosen": -0.470703125, |
|
"logits/rejected": -0.54296875, |
|
"logps/chosen": -2480.0, |
|
"logps/rejected": -2224.0, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.4599999785423279, |
|
"rewards/chosen": 0.60546875, |
|
"rewards/margins": 0.109375, |
|
"rewards/rejected": 0.498046875, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.319385343543805, |
|
"learning_rate": 6.9525792508597634e-09, |
|
"logits/chosen": -0.59765625, |
|
"logits/rejected": -0.6015625, |
|
"logps/chosen": -2336.0, |
|
"logps/rejected": -2320.0, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.6599999666213989, |
|
"rewards/chosen": 0.58984375, |
|
"rewards/margins": 0.2236328125, |
|
"rewards/rejected": 0.3671875, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -0.62109375, |
|
"eval_logits/rejected": -0.67578125, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6378594040870667, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": 0.60546875, |
|
"eval_rewards/margins": 0.212890625, |
|
"eval_rewards/rejected": 0.392578125, |
|
"eval_runtime": 86.1786, |
|
"eval_samples_per_second": 23.208, |
|
"eval_steps_per_second": 0.487, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.544043526165161, |
|
"learning_rate": 5.985362223187296e-09, |
|
"logits/chosen": -0.47265625, |
|
"logits/rejected": -0.54296875, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -2176.0, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.66015625, |
|
"rewards/margins": 0.21484375, |
|
"rewards/rejected": 0.4453125, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 9.355498575273248, |
|
"learning_rate": 5.089744094249837e-09, |
|
"logits/chosen": -0.58203125, |
|
"logits/rejected": -0.6953125, |
|
"logps/chosen": -2848.0, |
|
"logps/rejected": -2304.0, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.75390625, |
|
"rewards/margins": 0.279296875, |
|
"rewards/rejected": 0.4765625, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.114147510919343, |
|
"learning_rate": 4.265987656772857e-09, |
|
"logits/chosen": -0.55078125, |
|
"logits/rejected": -0.625, |
|
"logps/chosen": -2352.0, |
|
"logps/rejected": -1928.0, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.69921875, |
|
"rewards/margins": 0.31640625, |
|
"rewards/rejected": 0.3828125, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.411261082676203, |
|
"learning_rate": 3.5143346177878565e-09, |
|
"logits/chosen": -0.46484375, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -2752.0, |
|
"logps/rejected": -2176.0, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.7799999713897705, |
|
"rewards/chosen": 0.83984375, |
|
"rewards/margins": 0.482421875, |
|
"rewards/rejected": 0.357421875, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.165814163311654, |
|
"learning_rate": 2.835005527710682e-09, |
|
"logits/chosen": -0.5234375, |
|
"logits/rejected": -0.60546875, |
|
"logps/chosen": -2368.0, |
|
"logps/rejected": -1904.0, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 0.58984375, |
|
"rewards/margins": 0.302734375, |
|
"rewards/rejected": 0.2890625, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 8.854995734298003, |
|
"learning_rate": 2.2281997156273213e-09, |
|
"logits/chosen": -0.4296875, |
|
"logits/rejected": -0.61328125, |
|
"logps/chosen": -2720.0, |
|
"logps/rejected": -1928.0, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 0.671875, |
|
"rewards/margins": 0.373046875, |
|
"rewards/rejected": 0.298828125, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.38257510772057, |
|
"learning_rate": 1.6940952308068523e-09, |
|
"logits/chosen": -0.59375, |
|
"logits/rejected": -0.671875, |
|
"logps/chosen": -2576.0, |
|
"logps/rejected": -2160.0, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.7600001096725464, |
|
"rewards/chosen": 0.8125, |
|
"rewards/margins": 0.439453125, |
|
"rewards/rejected": 0.373046875, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 11.857430374835033, |
|
"learning_rate": 1.2328487904580131e-09, |
|
"logits/chosen": -0.61328125, |
|
"logits/rejected": -0.6796875, |
|
"logps/chosen": -2704.0, |
|
"logps/rejected": -2288.0, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.63671875, |
|
"rewards/margins": 0.1953125, |
|
"rewards/rejected": 0.443359375, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 7.7776326201834, |
|
"learning_rate": 8.445957337451515e-10, |
|
"logits/chosen": -0.546875, |
|
"logits/rejected": -0.6328125, |
|
"logps/chosen": -2336.0, |
|
"logps/rejected": -2000.0, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.6953125, |
|
"rewards/margins": 0.26953125, |
|
"rewards/rejected": 0.423828125, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.452587117057995, |
|
"learning_rate": 5.29449982077046e-10, |
|
"logits/chosen": -0.578125, |
|
"logits/rejected": -0.640625, |
|
"logps/chosen": -2336.0, |
|
"logps/rejected": -2016.0, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.6800000667572021, |
|
"rewards/chosen": 0.6171875, |
|
"rewards/margins": 0.2373046875, |
|
"rewards/rejected": 0.37890625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -0.62109375, |
|
"eval_logits/rejected": -0.67578125, |
|
"eval_logps/chosen": -2320.0, |
|
"eval_logps/rejected": -2024.0, |
|
"eval_loss": 0.6380937695503235, |
|
"eval_rewards/accuracies": 0.601190447807312, |
|
"eval_rewards/chosen": 0.609375, |
|
"eval_rewards/margins": 0.2041015625, |
|
"eval_rewards/rejected": 0.404296875, |
|
"eval_runtime": 86.1049, |
|
"eval_samples_per_second": 23.227, |
|
"eval_steps_per_second": 0.488, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 12.27319369048997, |
|
"learning_rate": 2.875040056799227e-10, |
|
"logits/chosen": -0.5859375, |
|
"logits/rejected": -0.57421875, |
|
"logps/chosen": -2304.0, |
|
"logps/rejected": -2432.0, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.58203125, |
|
"rewards/margins": 0.08447265625, |
|
"rewards/rejected": 0.498046875, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.006712698632741, |
|
"learning_rate": 1.1882879646485379e-10, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.6015625, |
|
"logps/chosen": -2040.0, |
|
"logps/rejected": -1824.0, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.53515625, |
|
"rewards/margins": 0.1533203125, |
|
"rewards/rejected": 0.3828125, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.784506721487483, |
|
"learning_rate": 2.3473847197225115e-11, |
|
"logits/chosen": -0.54296875, |
|
"logits/rejected": -0.671875, |
|
"logps/chosen": -2512.0, |
|
"logps/rejected": -1984.0, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.703125, |
|
"rewards/margins": 0.259765625, |
|
"rewards/rejected": 0.443359375, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2038, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6502101503246783, |
|
"train_runtime": 8979.6364, |
|
"train_samples_per_second": 6.808, |
|
"train_steps_per_second": 0.227 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2038, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|