|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9805825242718447, |
|
"eval_steps": 25, |
|
"global_step": 102, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.09090909090909e-09, |
|
"logits/generated": -2.7517285346984863, |
|
"logits/real": -2.7709789276123047, |
|
"logps/generated": -844.9539794921875, |
|
"logps/real": -335.89251708984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/generated": -2.6863832473754883, |
|
"logits/real": -2.7313873767852783, |
|
"logps/generated": -753.656005859375, |
|
"logps/real": -260.1191711425781, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.7430555820465088, |
|
"rewards/generated": -0.3012603223323822, |
|
"rewards/margins": 0.32114675641059875, |
|
"rewards/real": 0.019886476919054985, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.010989010989011e-08, |
|
"logits/generated": -2.4782943725585938, |
|
"logits/real": -2.5394017696380615, |
|
"logps/generated": -1096.2259521484375, |
|
"logps/real": -633.4000854492188, |
|
"loss": 5.8769, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -33.22383499145508, |
|
"rewards/margins": -1.7550216913223267, |
|
"rewards/real": -34.978858947753906, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/generated": -2.7939765453338623, |
|
"eval_logits/real": -2.838216781616211, |
|
"eval_logps/generated": -719.6648559570312, |
|
"eval_logps/real": -274.2817077636719, |
|
"eval_loss": 0.18901990354061127, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/generated": -2.9832763671875, |
|
"eval_rewards/margins": 2.815262794494629, |
|
"eval_rewards/real": -0.16801361739635468, |
|
"eval_runtime": 56.889, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 0.211, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.912087912087911e-08, |
|
"logits/generated": -2.823836088180542, |
|
"logits/real": -2.8936166763305664, |
|
"logps/generated": -856.4396362304688, |
|
"logps/real": -293.3896179199219, |
|
"loss": 0.1538, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.060887336730957, |
|
"rewards/margins": 2.894258975982666, |
|
"rewards/real": -0.1666283905506134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.813186813186813e-08, |
|
"logits/generated": -2.8231825828552246, |
|
"logits/real": -2.8379809856414795, |
|
"logps/generated": -769.75048828125, |
|
"logps/real": -256.47027587890625, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -3.725442409515381, |
|
"rewards/margins": 3.5178775787353516, |
|
"rewards/real": -0.20756463706493378, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.714285714285714e-08, |
|
"logits/generated": -2.8316774368286133, |
|
"logits/real": -2.8536953926086426, |
|
"logps/generated": -803.0462646484375, |
|
"logps/real": -274.77337646484375, |
|
"loss": 0.1202, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -3.983602523803711, |
|
"rewards/margins": 3.5648624897003174, |
|
"rewards/real": -0.4187401831150055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/generated": -2.83947491645813, |
|
"eval_logits/real": -2.8439435958862305, |
|
"eval_logps/generated": -732.087890625, |
|
"eval_logps/real": -276.7652282714844, |
|
"eval_loss": 0.14404349029064178, |
|
"eval_rewards/accuracies": 0.9479166865348816, |
|
"eval_rewards/generated": -4.225581645965576, |
|
"eval_rewards/margins": 3.809215784072876, |
|
"eval_rewards/real": -0.41636598110198975, |
|
"eval_runtime": 55.8686, |
|
"eval_samples_per_second": 6.587, |
|
"eval_steps_per_second": 0.215, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.615384615384615e-08, |
|
"logits/generated": -2.8372151851654053, |
|
"logits/real": -2.857466697692871, |
|
"logps/generated": -920.998046875, |
|
"logps/real": -288.32086181640625, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.382575035095215, |
|
"rewards/margins": 4.1766462326049805, |
|
"rewards/real": -0.20592932403087616, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.516483516483517e-08, |
|
"logits/generated": -2.832484483718872, |
|
"logits/real": -2.8351359367370605, |
|
"logps/generated": -778.0480346679688, |
|
"logps/real": -260.9442443847656, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.6376495361328125, |
|
"rewards/margins": 4.451912879943848, |
|
"rewards/real": -0.18573713302612305, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_logits/generated": -2.841118574142456, |
|
"eval_logits/real": -2.838819742202759, |
|
"eval_logps/generated": -737.3972778320312, |
|
"eval_logps/real": -278.0699768066406, |
|
"eval_loss": 0.1297575831413269, |
|
"eval_rewards/accuracies": 0.9583333134651184, |
|
"eval_rewards/generated": -4.7565155029296875, |
|
"eval_rewards/margins": 4.2096757888793945, |
|
"eval_rewards/real": -0.546840488910675, |
|
"eval_runtime": 57.5597, |
|
"eval_samples_per_second": 6.393, |
|
"eval_steps_per_second": 0.208, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4175824175824175e-08, |
|
"logits/generated": -2.8195009231567383, |
|
"logits/real": -2.84141206741333, |
|
"logps/generated": -862.6012573242188, |
|
"logps/real": -276.63531494140625, |
|
"loss": 0.0666, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -5.086130619049072, |
|
"rewards/margins": 4.704850196838379, |
|
"rewards/real": -0.3812801241874695, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.3186813186813187e-08, |
|
"logits/generated": -2.8254876136779785, |
|
"logits/real": -2.823962450027466, |
|
"logps/generated": -835.1746215820312, |
|
"logps/real": -272.4798278808594, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.038660526275635, |
|
"rewards/margins": 4.638853073120117, |
|
"rewards/real": -0.39980727434158325, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.197802197802198e-09, |
|
"logits/generated": -2.8575081825256348, |
|
"logits/real": -2.859795093536377, |
|
"logps/generated": -838.92919921875, |
|
"logps/real": -267.28704833984375, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -5.040513038635254, |
|
"rewards/margins": 4.70792818069458, |
|
"rewards/real": -0.33258455991744995, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_logits/generated": -2.84299635887146, |
|
"eval_logits/real": -2.8375051021575928, |
|
"eval_logps/generated": -739.3701171875, |
|
"eval_logps/real": -278.2850646972656, |
|
"eval_loss": 0.12532015144824982, |
|
"eval_rewards/accuracies": 0.9479166865348816, |
|
"eval_rewards/generated": -4.953795433044434, |
|
"eval_rewards/margins": 4.385446071624756, |
|
"eval_rewards/real": -0.5683497786521912, |
|
"eval_runtime": 56.3549, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 0.213, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"step": 102, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7072318076503044, |
|
"train_runtime": 2304.549, |
|
"train_samples_per_second": 2.86, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 102, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|