|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9973333333333333, |
|
"eval_steps": 100, |
|
"global_step": 187, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.109572167610484, |
|
"learning_rate": 2.6315789473684208e-08, |
|
"logits/chosen": -2.964515209197998, |
|
"logits/rejected": -2.865140914916992, |
|
"logps/chosen": -485.6763916015625, |
|
"logps/rejected": -1249.7501220703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 18.895223645335697, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -2.7736825942993164, |
|
"logits/rejected": -2.7408108711242676, |
|
"logps/chosen": -604.7006225585938, |
|
"logps/rejected": -1056.1942138671875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.0012125401990488172, |
|
"rewards/margins": 0.001352548599243164, |
|
"rewards/rejected": -0.00014000837109051645, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 19.562748691217283, |
|
"learning_rate": 4.999562902281866e-07, |
|
"logits/chosen": -2.7962822914123535, |
|
"logits/rejected": -2.8271851539611816, |
|
"logps/chosen": -571.3375854492188, |
|
"logps/rejected": -971.5126953125, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03103743866086006, |
|
"rewards/margins": 0.03241748735308647, |
|
"rewards/rejected": -0.0013800484593957663, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 23.57935669375875, |
|
"learning_rate": 4.947295864744121e-07, |
|
"logits/chosen": -2.859532117843628, |
|
"logits/rejected": -2.8859381675720215, |
|
"logps/chosen": -529.7252197265625, |
|
"logps/rejected": -1093.7412109375, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.047512348741292953, |
|
"rewards/margins": 0.15334269404411316, |
|
"rewards/rejected": -0.10583032667636871, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 74.03794269111636, |
|
"learning_rate": 4.809698831278217e-07, |
|
"logits/chosen": -3.1058590412139893, |
|
"logits/rejected": -3.105548143386841, |
|
"logps/chosen": -631.2692260742188, |
|
"logps/rejected": -1100.1131591796875, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4612053334712982, |
|
"rewards/margins": 0.6213432550430298, |
|
"rewards/rejected": -1.0825484991073608, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 80.08928437177174, |
|
"learning_rate": 4.591569405016049e-07, |
|
"logits/chosen": -3.1383297443389893, |
|
"logits/rejected": -3.338413953781128, |
|
"logps/chosen": -614.7294921875, |
|
"logps/rejected": -1324.274658203125, |
|
"loss": 0.3007, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.7203965187072754, |
|
"rewards/margins": 2.4570107460021973, |
|
"rewards/rejected": -3.1774070262908936, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 52.8412534701194, |
|
"learning_rate": 4.3005131163403164e-07, |
|
"logits/chosen": -3.232844829559326, |
|
"logits/rejected": -3.4020397663116455, |
|
"logps/chosen": -607.4974365234375, |
|
"logps/rejected": -1571.42578125, |
|
"loss": 0.2467, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.6835159063339233, |
|
"rewards/margins": 4.190090656280518, |
|
"rewards/rejected": -4.8736066818237305, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 45.803944170508274, |
|
"learning_rate": 3.946678240449515e-07, |
|
"logits/chosen": -3.016165256500244, |
|
"logits/rejected": -3.2087910175323486, |
|
"logps/chosen": -602.6742553710938, |
|
"logps/rejected": -1499.858154296875, |
|
"loss": 0.2227, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.6613572239875793, |
|
"rewards/margins": 4.315842628479004, |
|
"rewards/rejected": -4.977200031280518, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 33.74568647416123, |
|
"learning_rate": 3.5424019569033206e-07, |
|
"logits/chosen": -2.980517864227295, |
|
"logits/rejected": -2.997511863708496, |
|
"logps/chosen": -698.8486328125, |
|
"logps/rejected": -1709.7763671875, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.0458548069000244, |
|
"rewards/margins": 5.626683712005615, |
|
"rewards/rejected": -6.672537803649902, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 32.76518067019826, |
|
"learning_rate": 3.1017801885224326e-07, |
|
"logits/chosen": -3.0111451148986816, |
|
"logits/rejected": -3.0090878009796143, |
|
"logps/chosen": -650.3148193359375, |
|
"logps/rejected": -1498.55419921875, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.8722761869430542, |
|
"rewards/margins": 4.187361717224121, |
|
"rewards/rejected": -5.059638023376465, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 64.24324243411806, |
|
"learning_rate": 2.640176118092979e-07, |
|
"logits/chosen": -2.9020493030548096, |
|
"logits/rejected": -2.935757875442505, |
|
"logps/chosen": -751.5125732421875, |
|
"logps/rejected": -1689.5228271484375, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.2032721042633057, |
|
"rewards/margins": 4.921408653259277, |
|
"rewards/rejected": -6.124680995941162, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/chosen": -3.0544369220733643, |
|
"eval_logits/rejected": -2.793405294418335, |
|
"eval_logps/chosen": -725.9426879882812, |
|
"eval_logps/rejected": -1452.9771728515625, |
|
"eval_loss": 0.25031739473342896, |
|
"eval_rewards/accuracies": 0.831250011920929, |
|
"eval_rewards/chosen": -1.6025804281234741, |
|
"eval_rewards/margins": 3.9000518321990967, |
|
"eval_rewards/rejected": -5.502632141113281, |
|
"eval_runtime": 65.7537, |
|
"eval_samples_per_second": 9.368, |
|
"eval_steps_per_second": 0.304, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 41.59873680369454, |
|
"learning_rate": 2.1736845194498716e-07, |
|
"logits/chosen": -2.9784274101257324, |
|
"logits/rejected": -2.980086088180542, |
|
"logps/chosen": -600.6064453125, |
|
"logps/rejected": -1670.901611328125, |
|
"loss": 0.1595, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.918400764465332, |
|
"rewards/margins": 6.283780574798584, |
|
"rewards/rejected": -7.202181339263916, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 28.23680644032835, |
|
"learning_rate": 1.718570580135889e-07, |
|
"logits/chosen": -3.0252156257629395, |
|
"logits/rejected": -3.080897569656372, |
|
"logps/chosen": -611.710693359375, |
|
"logps/rejected": -1694.8226318359375, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8532626032829285, |
|
"rewards/margins": 5.446272850036621, |
|
"rewards/rejected": -6.299535751342773, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 40.906944468121836, |
|
"learning_rate": 1.2907027822369005e-07, |
|
"logits/chosen": -2.9933369159698486, |
|
"logits/rejected": -3.124406576156616, |
|
"logps/chosen": -700.328125, |
|
"logps/rejected": -1804.997802734375, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.007743000984192, |
|
"rewards/margins": 6.478204250335693, |
|
"rewards/rejected": -7.485948085784912, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 22.754078194499957, |
|
"learning_rate": 9.049996151674788e-08, |
|
"logits/chosen": -3.086073875427246, |
|
"logits/rejected": -3.1164612770080566, |
|
"logps/chosen": -631.7467651367188, |
|
"logps/rejected": -1740.2171630859375, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.0232716798782349, |
|
"rewards/margins": 5.889337539672852, |
|
"rewards/rejected": -6.912609100341797, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 20.144359719952234, |
|
"learning_rate": 5.74909411901843e-08, |
|
"logits/chosen": -2.9675424098968506, |
|
"logits/rejected": -2.990185499191284, |
|
"logps/chosen": -617.1038818359375, |
|
"logps/rejected": -1656.051513671875, |
|
"loss": 0.1413, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9472934603691101, |
|
"rewards/margins": 5.291378974914551, |
|
"rewards/rejected": -6.238672733306885, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 26.642508471840806, |
|
"learning_rate": 3.119414452281158e-08, |
|
"logits/chosen": -2.9869649410247803, |
|
"logits/rejected": -3.0431644916534424, |
|
"logps/chosen": -662.4171142578125, |
|
"logps/rejected": -1831.9390869140625, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8813556432723999, |
|
"rewards/margins": 6.860285758972168, |
|
"rewards/rejected": -7.741641044616699, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 18.842250875900756, |
|
"learning_rate": 1.2526463331788501e-08, |
|
"logits/chosen": -3.083080291748047, |
|
"logits/rejected": -2.9783942699432373, |
|
"logps/chosen": -638.3408203125, |
|
"logps/rejected": -1725.673583984375, |
|
"loss": 0.1265, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.9777008891105652, |
|
"rewards/margins": 6.12181282043457, |
|
"rewards/rejected": -7.099513053894043, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 34.250119439829845, |
|
"learning_rate": 2.1387846565474044e-09, |
|
"logits/chosen": -3.0460267066955566, |
|
"logits/rejected": -2.9695019721984863, |
|
"logps/chosen": -608.745849609375, |
|
"logps/rejected": -1744.884521484375, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.041512131690979, |
|
"rewards/margins": 6.2788825035095215, |
|
"rewards/rejected": -7.320394992828369, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 187, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2699868052719749, |
|
"train_runtime": 2833.2764, |
|
"train_samples_per_second": 4.234, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 187, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|