|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99581589958159, |
|
"eval_steps": 500, |
|
"global_step": 119, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.305778980255127, |
|
"logits/rejected": -2.224325656890869, |
|
"logps/chosen": -270.25244140625, |
|
"logps/rejected": -383.8268127441406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.394684314727783, |
|
"logits/rejected": -2.4029245376586914, |
|
"logps/chosen": -217.4110565185547, |
|
"logps/rejected": -323.976318359375, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": -0.010575544089078903, |
|
"rewards/margins": 0.014531731605529785, |
|
"rewards/rejected": -0.02510727569460869, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.293762683868408, |
|
"logits/rejected": -2.2932822704315186, |
|
"logps/chosen": -260.174560546875, |
|
"logps/rejected": -345.7673034667969, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.30990877747535706, |
|
"rewards/margins": 0.16011206805706024, |
|
"rewards/rejected": -0.47002077102661133, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -2.2889437675476074, |
|
"logits/rejected": -2.2858946323394775, |
|
"logps/chosen": -283.4850158691406, |
|
"logps/rejected": -348.92822265625, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3383755683898926, |
|
"rewards/margins": 0.19907911121845245, |
|
"rewards/rejected": -0.5374546647071838, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -2.295836925506592, |
|
"logits/rejected": -2.288512706756592, |
|
"logps/chosen": -269.2564392089844, |
|
"logps/rejected": -314.68768310546875, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.06676869094371796, |
|
"rewards/margins": 0.07462646812200546, |
|
"rewards/rejected": -0.14139513671398163, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -2.2943179607391357, |
|
"logits/rejected": -2.2712674140930176, |
|
"logps/chosen": -272.5740051269531, |
|
"logps/rejected": -336.09210205078125, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.16639836132526398, |
|
"rewards/margins": 0.1392066478729248, |
|
"rewards/rejected": -0.30560502409935, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -2.3181185722351074, |
|
"logits/rejected": -2.3116087913513184, |
|
"logps/chosen": -279.1436462402344, |
|
"logps/rejected": -351.61798095703125, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.30815738439559937, |
|
"rewards/margins": 0.14108426868915558, |
|
"rewards/rejected": -0.44924163818359375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -2.3251430988311768, |
|
"logits/rejected": -2.2963356971740723, |
|
"logps/chosen": -280.5970458984375, |
|
"logps/rejected": -364.6551208496094, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3270416855812073, |
|
"rewards/margins": 0.1887568086385727, |
|
"rewards/rejected": -0.5157985091209412, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -2.308258056640625, |
|
"logits/rejected": -2.2962486743927, |
|
"logps/chosen": -282.85528564453125, |
|
"logps/rejected": -327.85491943359375, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2814248204231262, |
|
"rewards/margins": 0.1915736347436905, |
|
"rewards/rejected": -0.4729984700679779, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -2.2949376106262207, |
|
"logits/rejected": -2.3052544593811035, |
|
"logps/chosen": -250.7218017578125, |
|
"logps/rejected": -336.38140869140625, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.2139570415019989, |
|
"rewards/margins": 0.24394384026527405, |
|
"rewards/rejected": -0.45790091156959534, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -2.319019317626953, |
|
"logits/rejected": -2.298459529876709, |
|
"logps/chosen": -291.30572509765625, |
|
"logps/rejected": -360.62750244140625, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.31846240162849426, |
|
"rewards/margins": 0.2448347508907318, |
|
"rewards/rejected": -0.5632971525192261, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -2.3022727966308594, |
|
"logits/rejected": -2.3168272972106934, |
|
"logps/chosen": -281.0862731933594, |
|
"logps/rejected": -347.0366516113281, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.29117727279663086, |
|
"rewards/margins": 0.17553743720054626, |
|
"rewards/rejected": -0.4667147099971771, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 119, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6554931792892328, |
|
"train_runtime": 1987.3183, |
|
"train_samples_per_second": 7.69, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 119, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|