|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 100, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -1.8503975868225098, |
|
"logits/rejected": -1.8503975868225098, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.4075, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.8588156700134277, |
|
"logits/rejected": -1.8588156700134277, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.970517873764038, |
|
"logits/rejected": -1.970517873764038, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -1.9209930896759033, |
|
"logits/rejected": -1.9209930896759033, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3482, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.883547067642212, |
|
"logits/rejected": -1.883547067642212, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3507, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.9128715991973877, |
|
"logits/rejected": -1.9128715991973877, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.0107295513153076, |
|
"logits/rejected": -2.0107295513153076, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3828, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -1.9920228719711304, |
|
"logits/rejected": -1.9920228719711304, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.8801155090332031, |
|
"logits/rejected": -1.8801155090332031, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.050198793411255, |
|
"logits/rejected": -2.050198793411255, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.8852717876434326, |
|
"logits/rejected": -1.8852717876434326, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3803, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.9617642164230347, |
|
"eval_logits/rejected": -1.8066532611846924, |
|
"eval_logps/chosen": -266.6976013183594, |
|
"eval_logps/rejected": -254.9398193359375, |
|
"eval_loss": 0.053734518587589264, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 700.7393, |
|
"eval_samples_per_second": 2.854, |
|
"eval_steps_per_second": 1.427, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.731688141822815, |
|
"logits/rejected": -1.731688141822815, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2717, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.8530235290527344, |
|
"logits/rejected": -1.8530235290527344, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3482, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -2.0225424766540527, |
|
"logits/rejected": -2.0225424766540527, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3507, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -1.8995482921600342, |
|
"logits/rejected": -1.8995482921600342, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3186, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.8702564239501953, |
|
"logits/rejected": -1.8493874073028564, |
|
"logps/chosen": -4.896004676818848, |
|
"logps/rejected": -1.6084611415863037, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -2.04287052154541, |
|
"logits/rejected": -2.04287052154541, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.8564621210098267, |
|
"logits/rejected": -1.8564621210098267, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3531, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.9510726928710938, |
|
"logits/rejected": -1.9173896312713623, |
|
"logps/chosen": -12.76134967803955, |
|
"logps/rejected": -5.861204624176025, |
|
"loss": 0.3393, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.029967620968818665, |
|
"rewards/margins": 0.002692684531211853, |
|
"rewards/rejected": -0.03266030550003052, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.8616416454315186, |
|
"logits/rejected": -1.8616416454315186, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.911077857017517, |
|
"logits/rejected": -1.9127223491668701, |
|
"logps/chosen": -5.607743740081787, |
|
"logps/rejected": -6.2597527503967285, |
|
"loss": 0.2732, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -9.019851859193295e-05, |
|
"rewards/margins": -0.0019743461161851883, |
|
"rewards/rejected": 0.0018841475248336792, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -1.9952213764190674, |
|
"eval_logits/rejected": -1.8367009162902832, |
|
"eval_logps/chosen": -270.7552795410156, |
|
"eval_logps/rejected": -259.2743835449219, |
|
"eval_loss": 0.05847138166427612, |
|
"eval_rewards/accuracies": 0.4404999911785126, |
|
"eval_rewards/chosen": -0.040576834231615067, |
|
"eval_rewards/margins": 0.0027689056005328894, |
|
"eval_rewards/rejected": -0.04334573447704315, |
|
"eval_runtime": 702.202, |
|
"eval_samples_per_second": 2.848, |
|
"eval_steps_per_second": 1.424, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.871522307395935, |
|
"logits/rejected": -1.871522307395935, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -2.014587640762329, |
|
"logits/rejected": -2.014587640762329, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.9391746520996094, |
|
"logits/rejected": -1.9385459423065186, |
|
"logps/chosen": -2.180995464324951, |
|
"logps/rejected": -2.3552231788635254, |
|
"loss": 0.3505, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.007617546711117029, |
|
"rewards/margins": 0.00016982034139800817, |
|
"rewards/rejected": -0.007787366863340139, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -2.1026082038879395, |
|
"logits/rejected": -2.1026082038879395, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3211, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.923709511756897, |
|
"logits/rejected": -1.923709511756897, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.7751576900482178, |
|
"logits/rejected": -1.7751576900482178, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.8949896097183228, |
|
"logits/rejected": -1.8624740839004517, |
|
"logps/chosen": -13.423696517944336, |
|
"logps/rejected": -20.979846954345703, |
|
"loss": 1.1297, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.036591093987226486, |
|
"rewards/margins": 0.14725562930107117, |
|
"rewards/rejected": -0.18384674191474915, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.9459644556045532, |
|
"logits/rejected": -1.9459644556045532, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.8699764013290405, |
|
"logits/rejected": -1.8699764013290405, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3087, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.9606857299804688, |
|
"logits/rejected": -1.9606857299804688, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3013, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_logits/chosen": -1.975152850151062, |
|
"eval_logits/rejected": -1.813112497329712, |
|
"eval_logps/chosen": -299.82257080078125, |
|
"eval_logps/rejected": -291.25750732421875, |
|
"eval_loss": 0.08001529425382614, |
|
"eval_rewards/accuracies": 0.4645000100135803, |
|
"eval_rewards/chosen": -0.3312495946884155, |
|
"eval_rewards/margins": 0.031927283853292465, |
|
"eval_rewards/rejected": -0.3631769120693207, |
|
"eval_runtime": 703.8743, |
|
"eval_samples_per_second": 2.841, |
|
"eval_steps_per_second": 1.421, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.8876497745513916, |
|
"logits/rejected": -1.8876497745513916, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.9741500616073608, |
|
"logits/rejected": -1.9741500616073608, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3951, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.9750694036483765, |
|
"logits/rejected": -1.974765419960022, |
|
"logps/chosen": -2.6847405433654785, |
|
"logps/rejected": -4.218203067779541, |
|
"loss": 0.3257, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.012654995545744896, |
|
"rewards/margins": 0.01376216672360897, |
|
"rewards/rejected": -0.026417162269353867, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.7031259536743164, |
|
"logits/rejected": -1.7031259536743164, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.9031295776367188, |
|
"logits/rejected": -1.9031295776367188, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.9317785501480103, |
|
"logits/rejected": -1.9317785501480103, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.8960540294647217, |
|
"logits/rejected": -1.8960540294647217, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.8491512537002563, |
|
"logits/rejected": -1.8491512537002563, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.8366947174072266, |
|
"logits/rejected": -1.8366947174072266, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.9089797735214233, |
|
"logits/rejected": -1.9089797735214233, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3433, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -1.9721235036849976, |
|
"eval_logits/rejected": -1.8102209568023682, |
|
"eval_logps/chosen": -300.3360595703125, |
|
"eval_logps/rejected": -291.88916015625, |
|
"eval_loss": 0.08119545131921768, |
|
"eval_rewards/accuracies": 0.4675000011920929, |
|
"eval_rewards/chosen": -0.33638474345207214, |
|
"eval_rewards/margins": 0.03310885280370712, |
|
"eval_rewards/rejected": -0.3694935739040375, |
|
"eval_runtime": 703.1815, |
|
"eval_samples_per_second": 2.844, |
|
"eval_steps_per_second": 1.422, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.8887426853179932, |
|
"logits/rejected": -1.8887426853179932, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3186, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.6066404581069946, |
|
"logits/rejected": -1.6066404581069946, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.284, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.7861597537994385, |
|
"logits/rejected": -1.764651894569397, |
|
"logps/chosen": -6.981114864349365, |
|
"logps/rejected": -4.9876909255981445, |
|
"loss": 0.2566, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.020851103588938713, |
|
"rewards/margins": 0.012941191904246807, |
|
"rewards/rejected": -0.033792294561862946, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.9894205331802368, |
|
"logits/rejected": -1.9900938272476196, |
|
"logps/chosen": -4.070672988891602, |
|
"logps/rejected": -14.303924560546875, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": 0.015280509367585182, |
|
"rewards/margins": 0.09383808076381683, |
|
"rewards/rejected": -0.0785575658082962, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.858513593673706, |
|
"logits/rejected": -1.858513593673706, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -2.0184249877929688, |
|
"logits/rejected": -2.0184249877929688, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3211, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.7378623485565186, |
|
"logits/rejected": -1.7378623485565186, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3087, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.7385492324829102, |
|
"logits/rejected": -1.7385492324829102, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.9141228199005127, |
|
"logits/rejected": -1.9141228199005127, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.7758957147598267, |
|
"logits/rejected": -1.7758957147598267, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.9969795942306519, |
|
"eval_logits/rejected": -1.8348422050476074, |
|
"eval_logps/chosen": -298.5122985839844, |
|
"eval_logps/rejected": -284.1371154785156, |
|
"eval_loss": 0.10996392369270325, |
|
"eval_rewards/accuracies": 0.3734999895095825, |
|
"eval_rewards/chosen": -0.3181473910808563, |
|
"eval_rewards/margins": -0.026174278929829597, |
|
"eval_rewards/rejected": -0.2919731140136719, |
|
"eval_runtime": 702.666, |
|
"eval_samples_per_second": 2.846, |
|
"eval_steps_per_second": 1.423, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.7798951864242554, |
|
"logits/rejected": -1.7795917987823486, |
|
"logps/chosen": -2.3812079429626465, |
|
"logps/rejected": -3.1179988384246826, |
|
"loss": 0.2962, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.009619669988751411, |
|
"rewards/margins": 0.005795452743768692, |
|
"rewards/rejected": -0.015415122732520103, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.9688339233398438, |
|
"logits/rejected": -1.9688339233398438, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -2.014033794403076, |
|
"logits/rejected": -2.014033794403076, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.9099280834197998, |
|
"logits/rejected": -1.9099280834197998, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3926, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.9664745330810547, |
|
"logits/rejected": -1.9664745330810547, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.767690658569336, |
|
"logits/rejected": -1.767690658569336, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.8395519256591797, |
|
"logits/rejected": -1.8395519256591797, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.9773021936416626, |
|
"logits/rejected": -1.9773021936416626, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -2.025939464569092, |
|
"logits/rejected": -2.025939464569092, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -2.046207904815674, |
|
"logits/rejected": -2.046207904815674, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3038, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_logits/chosen": -1.9935928583145142, |
|
"eval_logits/rejected": -1.831691026687622, |
|
"eval_logps/chosen": -299.02557373046875, |
|
"eval_logps/rejected": -284.72607421875, |
|
"eval_loss": 0.10917193442583084, |
|
"eval_rewards/accuracies": 0.37700000405311584, |
|
"eval_rewards/chosen": -0.3232795298099518, |
|
"eval_rewards/margins": -0.025417106226086617, |
|
"eval_rewards/rejected": -0.2978624105453491, |
|
"eval_runtime": 703.1518, |
|
"eval_samples_per_second": 2.844, |
|
"eval_steps_per_second": 1.422, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -2.0336036682128906, |
|
"logits/rejected": -2.0336036682128906, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3211, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.7936891317367554, |
|
"logits/rejected": -1.7936891317367554, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.8989810943603516, |
|
"logits/rejected": -1.8989810943603516, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3457, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.7690341472625732, |
|
"logits/rejected": -1.7690341472625732, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.6524708271026611, |
|
"logits/rejected": -1.6524708271026611, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.835680365562439, |
|
"logits/rejected": -1.835680365562439, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.7938703298568726, |
|
"logits/rejected": -1.7727596759796143, |
|
"logps/chosen": -5.777490615844727, |
|
"logps/rejected": -3.1091294288635254, |
|
"loss": 0.296, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.008814861066639423, |
|
"rewards/margins": 0.006191821303218603, |
|
"rewards/rejected": -0.01500668190419674, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.8663837909698486, |
|
"logits/rejected": -1.8663837909698486, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.8712513446807861, |
|
"logits/rejected": -1.8712513446807861, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.9196503162384033, |
|
"logits/rejected": -1.9196503162384033, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3161, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_logits/chosen": -1.9965624809265137, |
|
"eval_logits/rejected": -1.8345471620559692, |
|
"eval_logps/chosen": -298.41583251953125, |
|
"eval_logps/rejected": -284.232177734375, |
|
"eval_loss": 0.10689055174589157, |
|
"eval_rewards/accuracies": 0.3799999952316284, |
|
"eval_rewards/chosen": -0.317182332277298, |
|
"eval_rewards/margins": -0.024258404970169067, |
|
"eval_rewards/rejected": -0.2929239571094513, |
|
"eval_runtime": 703.3493, |
|
"eval_samples_per_second": 2.844, |
|
"eval_steps_per_second": 1.422, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.9961488246917725, |
|
"logits/rejected": -1.9961488246917725, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3704, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.7007853984832764, |
|
"logits/rejected": -1.668602705001831, |
|
"logps/chosen": -22.714946746826172, |
|
"logps/rejected": -10.431631088256836, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.12950357794761658, |
|
"rewards/margins": -0.05113900825381279, |
|
"rewards/rejected": -0.07836457341909409, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.8030157089233398, |
|
"logits/rejected": -1.8030157089233398, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3013, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.9432337284088135, |
|
"logits/rejected": -1.9432337284088135, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3531, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.9992624521255493, |
|
"logits/rejected": -2.000507116317749, |
|
"logps/chosen": -6.309609889984131, |
|
"logps/rejected": -9.059834480285645, |
|
"loss": 0.2932, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.007108859717845917, |
|
"rewards/margins": 0.019007809460163116, |
|
"rewards/rejected": -0.026116669178009033, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.9350688457489014, |
|
"logits/rejected": -1.9350688457489014, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3087, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.943377137184143, |
|
"logits/rejected": -1.943377137184143, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2939, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.9116928577423096, |
|
"logits/rejected": -1.9116928577423096, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.9572219848632812, |
|
"logits/rejected": -1.9572219848632812, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3038, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.023911952972412, |
|
"logits/rejected": -2.023911952972412, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_logits/chosen": -2.001863479614258, |
|
"eval_logits/rejected": -1.8408547639846802, |
|
"eval_logps/chosen": -289.7388000488281, |
|
"eval_logps/rejected": -275.51025390625, |
|
"eval_loss": 0.09182017296552658, |
|
"eval_rewards/accuracies": 0.3684999942779541, |
|
"eval_rewards/chosen": -0.2304122895002365, |
|
"eval_rewards/margins": -0.02470785565674305, |
|
"eval_rewards/rejected": -0.2057044357061386, |
|
"eval_runtime": 704.5723, |
|
"eval_samples_per_second": 2.839, |
|
"eval_steps_per_second": 1.419, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.9029079675674438, |
|
"logits/rejected": -1.9029079675674438, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2964, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.9884204864501953, |
|
"logits/rejected": -1.9884204864501953, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3433, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.9079113006591797, |
|
"logits/rejected": -1.9079113006591797, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3136, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.6772514581680298, |
|
"logits/rejected": -1.6457149982452393, |
|
"logps/chosen": -14.745180130004883, |
|
"logps/rejected": -14.53711223602295, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.04980592057108879, |
|
"rewards/margins": 0.06961346417665482, |
|
"rewards/rejected": -0.11941938102245331, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.7772667407989502, |
|
"logits/rejected": -1.7781718969345093, |
|
"logps/chosen": -8.622848510742188, |
|
"logps/rejected": -12.08240032196045, |
|
"loss": 0.3293, |
|
"rewards/accuracies": 0.05000000074505806, |
|
"rewards/chosen": -0.01604883186519146, |
|
"rewards/margins": 0.024528637528419495, |
|
"rewards/rejected": -0.040577471256256104, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.9655358791351318, |
|
"logits/rejected": -1.9655358791351318, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3828, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -2.0000901222229004, |
|
"logits/rejected": -2.0000901222229004, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3211, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.8563473224639893, |
|
"logits/rejected": -1.8563473224639893, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.8875033855438232, |
|
"logits/rejected": -1.8875033855438232, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2791, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.8840911388397217, |
|
"logits/rejected": -1.8840911388397217, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_logits/chosen": -1.983768343925476, |
|
"eval_logits/rejected": -1.823969841003418, |
|
"eval_logps/chosen": -287.3323059082031, |
|
"eval_logps/rejected": -271.89581298828125, |
|
"eval_loss": 0.09828919917345047, |
|
"eval_rewards/accuracies": 0.34299999475479126, |
|
"eval_rewards/chosen": -0.20634719729423523, |
|
"eval_rewards/margins": -0.036787137389183044, |
|
"eval_rewards/rejected": -0.16956007480621338, |
|
"eval_runtime": 702.0734, |
|
"eval_samples_per_second": 2.849, |
|
"eval_steps_per_second": 1.424, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.8285129070281982, |
|
"logits/rejected": -1.8285129070281982, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.877623200416565, |
|
"logits/rejected": -1.877623200416565, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3433, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.7445094585418701, |
|
"logits/rejected": -1.7445094585418701, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.8405430316925049, |
|
"logits/rejected": -1.8405430316925049, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2939, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.934522271156311, |
|
"logits/rejected": -1.934522271156311, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -2.002326011657715, |
|
"logits/rejected": -2.002326011657715, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3556, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.997032880783081, |
|
"logits/rejected": -1.997032880783081, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.9426374435424805, |
|
"logits/rejected": -1.9426374435424805, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.7942962646484375, |
|
"logits/rejected": -1.7942962646484375, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3927, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.9319026470184326, |
|
"logits/rejected": -1.9107824563980103, |
|
"logps/chosen": -4.969229698181152, |
|
"logps/rejected": -3.057560682296753, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.0007322501624003053, |
|
"rewards/margins": 0.013758744113147259, |
|
"rewards/rejected": -0.01449099462479353, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_logits/chosen": -1.9837957620620728, |
|
"eval_logits/rejected": -1.8240842819213867, |
|
"eval_logps/chosen": -287.31591796875, |
|
"eval_logps/rejected": -271.8734130859375, |
|
"eval_loss": 0.09820234775543213, |
|
"eval_rewards/accuracies": 0.34549999237060547, |
|
"eval_rewards/chosen": -0.20618313550949097, |
|
"eval_rewards/margins": -0.03684700280427933, |
|
"eval_rewards/rejected": -0.16933614015579224, |
|
"eval_runtime": 705.2974, |
|
"eval_samples_per_second": 2.836, |
|
"eval_steps_per_second": 1.418, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.8698651790618896, |
|
"logits/rejected": -1.8484447002410889, |
|
"logps/chosen": -16.939109802246094, |
|
"logps/rejected": -9.7978515625, |
|
"loss": 0.311, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.07174522429704666, |
|
"rewards/margins": 0.0002815544721670449, |
|
"rewards/rejected": -0.07202677428722382, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.9195115566253662, |
|
"logits/rejected": -1.9195115566253662, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2618, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.9020694494247437, |
|
"logits/rejected": -1.9020694494247437, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -2.1050262451171875, |
|
"logits/rejected": -2.1050262451171875, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -2.110747814178467, |
|
"logits/rejected": -2.110747814178467, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3507, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.8137989044189453, |
|
"logits/rejected": -1.8137989044189453, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -2.048144817352295, |
|
"logits/rejected": -2.048144817352295, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -2.115201473236084, |
|
"logits/rejected": -2.115201473236084, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.7584993839263916, |
|
"logits/rejected": -1.7584993839263916, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3803, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -2.012417793273926, |
|
"logits/rejected": -2.012417793273926, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_logits/chosen": -1.9857844114303589, |
|
"eval_logits/rejected": -1.8259761333465576, |
|
"eval_logps/chosen": -287.1649475097656, |
|
"eval_logps/rejected": -271.81268310546875, |
|
"eval_loss": 0.09747015684843063, |
|
"eval_rewards/accuracies": 0.34549999237060547, |
|
"eval_rewards/chosen": -0.20467324554920197, |
|
"eval_rewards/margins": -0.03594454750418663, |
|
"eval_rewards/rejected": -0.16872867941856384, |
|
"eval_runtime": 705.0834, |
|
"eval_samples_per_second": 2.837, |
|
"eval_steps_per_second": 1.418, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.8358827829360962, |
|
"logits/rejected": -1.8358827829360962, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.8439744710922241, |
|
"logits/rejected": -1.8439744710922241, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.2692, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.9981298446655273, |
|
"logits/rejected": -1.9981298446655273, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -2.0399842262268066, |
|
"logits/rejected": -2.0399842262268066, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.9201631546020508, |
|
"logits/rejected": -1.9201631546020508, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.6203396320343018, |
|
"logits/rejected": -1.6199716329574585, |
|
"logps/chosen": -2.1602871417999268, |
|
"logps/rejected": -3.176687240600586, |
|
"loss": 0.3391, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.007410462014377117, |
|
"rewards/margins": 0.0085915457457304, |
|
"rewards/rejected": -0.016002008691430092, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.7701547145843506, |
|
"logits/rejected": -1.7701547145843506, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.9981458187103271, |
|
"logits/rejected": -1.976782202720642, |
|
"logps/chosen": -4.941376686096191, |
|
"logps/rejected": -3.0533976554870605, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.0004537239146884531, |
|
"rewards/margins": 0.013995639979839325, |
|
"rewards/rejected": -0.014449363574385643, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.8368213176727295, |
|
"logits/rejected": -1.8368213176727295, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.806884765625, |
|
"logits/rejected": -1.806884765625, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_logits/chosen": -1.9853414297103882, |
|
"eval_logits/rejected": -1.825589895248413, |
|
"eval_logps/chosen": -287.1330871582031, |
|
"eval_logps/rejected": -271.7890319824219, |
|
"eval_loss": 0.09735800325870514, |
|
"eval_rewards/accuracies": 0.3440000116825104, |
|
"eval_rewards/chosen": -0.20435477793216705, |
|
"eval_rewards/margins": -0.03586255759000778, |
|
"eval_rewards/rejected": -0.16849222779273987, |
|
"eval_runtime": 702.0059, |
|
"eval_samples_per_second": 2.849, |
|
"eval_steps_per_second": 1.424, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.985327124595642, |
|
"logits/rejected": -1.9865849018096924, |
|
"logps/chosen": -6.517402648925781, |
|
"logps/rejected": -8.804891586303711, |
|
"loss": 0.3148, |
|
"rewards/accuracies": 0.02500000037252903, |
|
"rewards/chosen": -0.009186786599457264, |
|
"rewards/margins": 0.014380457811057568, |
|
"rewards/rejected": -0.02356724441051483, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.8783695697784424, |
|
"logits/rejected": -1.8783695697784424, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.9094947576522827, |
|
"logits/rejected": -1.9094947576522827, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.284, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.9814590215682983, |
|
"logits/rejected": -1.9814590215682983, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.8839404582977295, |
|
"logits/rejected": -1.8839404582977295, |
|
"logps/chosen": 0.0, |
|
"logps/rejected": 0.0, |
|
"loss": 0.3383, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3515237546205521, |
|
"train_runtime": 12848.6235, |
|
"train_samples_per_second": 0.389, |
|
"train_steps_per_second": 0.097 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|