|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9976019184652278, |
|
"eval_steps": 500, |
|
"global_step": 208, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.3809523809523811e-07, |
|
"logits/chosen": 0.15803536772727966, |
|
"logits/rejected": 0.08697354793548584, |
|
"logps/chosen": -431.6365661621094, |
|
"logps/rejected": -312.2266845703125, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": 0.08879230171442032, |
|
"logits/rejected": 0.23703241348266602, |
|
"logps/chosen": -334.3096008300781, |
|
"logps/rejected": -325.03387451171875, |
|
"loss": 0.3916, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.011717023327946663, |
|
"rewards/margins": 0.0023966077715158463, |
|
"rewards/rejected": 0.009320415556430817, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": 0.10182257741689682, |
|
"logits/rejected": 0.21816711127758026, |
|
"logps/chosen": -337.0960388183594, |
|
"logps/rejected": -311.6546936035156, |
|
"loss": 0.3814, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.020739946514368057, |
|
"rewards/margins": 0.02319016307592392, |
|
"rewards/rejected": -0.04393010586500168, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97147773390341e-06, |
|
"logits/chosen": 0.10252387821674347, |
|
"logits/rejected": 0.20911017060279846, |
|
"logps/chosen": -333.58074951171875, |
|
"logps/rejected": -314.22686767578125, |
|
"loss": 0.3406, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.03605792671442032, |
|
"rewards/margins": 0.06152229756116867, |
|
"rewards/rejected": -0.0254643764346838, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.873717504456219e-06, |
|
"logits/chosen": 0.1256047487258911, |
|
"logits/rejected": 0.18428723514080048, |
|
"logps/chosen": -361.2447509765625, |
|
"logps/rejected": -337.5652770996094, |
|
"loss": 0.3075, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.1919463872909546, |
|
"rewards/margins": 0.11055928468704224, |
|
"rewards/rejected": 0.08138711750507355, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.709119209978242e-06, |
|
"logits/chosen": 0.13542751967906952, |
|
"logits/rejected": 0.17227646708488464, |
|
"logps/chosen": -339.5179138183594, |
|
"logps/rejected": -339.02984619140625, |
|
"loss": 0.3253, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.29387253522872925, |
|
"rewards/margins": 0.18539837002754211, |
|
"rewards/rejected": 0.10847418010234833, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.482317534878901e-06, |
|
"logits/chosen": 0.14710070192813873, |
|
"logits/rejected": 0.1668437272310257, |
|
"logps/chosen": -338.6156311035156, |
|
"logps/rejected": -317.2750549316406, |
|
"loss": 0.3199, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2612247169017792, |
|
"rewards/margins": 0.14398948848247528, |
|
"rewards/rejected": 0.1172352284193039, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.199698658255298e-06, |
|
"logits/chosen": 0.10598815977573395, |
|
"logits/rejected": 0.15820710361003876, |
|
"logps/chosen": -339.61456298828125, |
|
"logps/rejected": -329.31536865234375, |
|
"loss": 0.2793, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3697130084037781, |
|
"rewards/margins": 0.20087119936943054, |
|
"rewards/rejected": 0.16884183883666992, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869220434746509e-06, |
|
"logits/chosen": 0.07073510438203812, |
|
"logits/rejected": 0.15016858279705048, |
|
"logps/chosen": -314.06280517578125, |
|
"logps/rejected": -323.94268798828125, |
|
"loss": 0.2928, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3701319396495819, |
|
"rewards/margins": 0.16388371586799622, |
|
"rewards/rejected": 0.2062481939792633, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5001883208580668e-06, |
|
"logits/chosen": 0.12372653186321259, |
|
"logits/rejected": 0.20820951461791992, |
|
"logps/chosen": -373.20831298828125, |
|
"logps/rejected": -339.1376037597656, |
|
"loss": 0.2895, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.33848610520362854, |
|
"rewards/margins": 0.1839863359928131, |
|
"rewards/rejected": 0.15449976921081543, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.102993356121938e-06, |
|
"logits/chosen": 0.11043532192707062, |
|
"logits/rejected": 0.21166983246803284, |
|
"logps/chosen": -332.3284606933594, |
|
"logps/rejected": -328.7100524902344, |
|
"loss": 0.3023, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.28233280777931213, |
|
"rewards/margins": 0.1811174899339676, |
|
"rewards/rejected": 0.10121532529592514, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6888195769001147e-06, |
|
"logits/chosen": 0.12863442301750183, |
|
"logits/rejected": 0.2186942994594574, |
|
"logps/chosen": -322.6536560058594, |
|
"logps/rejected": -326.0906066894531, |
|
"loss": 0.2747, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3540286421775818, |
|
"rewards/margins": 0.2065799981355667, |
|
"rewards/rejected": 0.14744864404201508, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.269329101341745e-06, |
|
"logits/chosen": 0.11101800203323364, |
|
"logits/rejected": 0.21909146010875702, |
|
"logps/chosen": -386.37506103515625, |
|
"logps/rejected": -338.628662109375, |
|
"loss": 0.2594, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.32755765318870544, |
|
"rewards/margins": 0.19837772846221924, |
|
"rewards/rejected": 0.1291799247264862, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.856333752729311e-06, |
|
"logits/chosen": 0.108365498483181, |
|
"logits/rejected": 0.21159549057483673, |
|
"logps/chosen": -354.3942565917969, |
|
"logps/rejected": -350.58026123046875, |
|
"loss": 0.2845, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.32795828580856323, |
|
"rewards/margins": 0.22649607062339783, |
|
"rewards/rejected": 0.10146218538284302, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4614624674952843e-06, |
|
"logits/chosen": 0.13152627646923065, |
|
"logits/rejected": 0.1252177655696869, |
|
"logps/chosen": -310.82440185546875, |
|
"logps/rejected": -306.65057373046875, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.28935328125953674, |
|
"rewards/margins": 0.17421108484268188, |
|
"rewards/rejected": 0.11514218151569366, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0958338528840893e-06, |
|
"logits/chosen": 0.14218227565288544, |
|
"logits/rejected": 0.17058388888835907, |
|
"logps/chosen": -365.53497314453125, |
|
"logps/rejected": -336.30670166015625, |
|
"loss": 0.279, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.34200993180274963, |
|
"rewards/margins": 0.18471720814704895, |
|
"rewards/rejected": 0.1572926938533783, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.697431142327633e-07, |
|
"logits/chosen": 0.1493266373872757, |
|
"logits/rejected": 0.1491091400384903, |
|
"logps/chosen": -363.55841064453125, |
|
"logps/rejected": -332.8086853027344, |
|
"loss": 0.2657, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3300887942314148, |
|
"rewards/margins": 0.20138521492481232, |
|
"rewards/rejected": 0.12870360910892487, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.923721672305148e-07, |
|
"logits/chosen": 0.09402619302272797, |
|
"logits/rejected": 0.1920977234840393, |
|
"logps/chosen": -334.378662109375, |
|
"logps/rejected": -331.62322998046875, |
|
"loss": 0.2685, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.3077259361743927, |
|
"rewards/margins": 0.17248141765594482, |
|
"rewards/rejected": 0.13524451851844788, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7153109768518926e-07, |
|
"logits/chosen": 0.11278879642486572, |
|
"logits/rejected": 0.18854503333568573, |
|
"logps/chosen": -388.7989501953125, |
|
"logps/rejected": -336.02105712890625, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.34268996119499207, |
|
"rewards/margins": 0.21422357857227325, |
|
"rewards/rejected": 0.12846639752388, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1343824865573422e-07, |
|
"logits/chosen": 0.09929057955741882, |
|
"logits/rejected": 0.15045389533042908, |
|
"logps/chosen": -343.9807434082031, |
|
"logps/rejected": -318.48028564453125, |
|
"loss": 0.2662, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.29964178800582886, |
|
"rewards/margins": 0.17701460421085358, |
|
"rewards/rejected": 0.1226271539926529, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2545127157831416e-08, |
|
"logits/chosen": 0.07950419932603836, |
|
"logits/rejected": 0.178897887468338, |
|
"logps/chosen": -297.21661376953125, |
|
"logps/rejected": -294.276611328125, |
|
"loss": 0.2553, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.3100071847438812, |
|
"rewards/margins": 0.16973480582237244, |
|
"rewards/rejected": 0.14027239382266998, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 208, |
|
"total_flos": 0.0, |
|
"train_loss": 0.29713730256144816, |
|
"train_runtime": 2891.8659, |
|
"train_samples_per_second": 3.458, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 208, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|