|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9748953974895398, |
|
"eval_steps": 500, |
|
"global_step": 118, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 118.10906601297802, |
|
"learning_rate": 8.333333333333332e-09, |
|
"logits/chosen": -2.0019314289093018, |
|
"logits/rejected": -1.9766970872879028, |
|
"logps/chosen": -309.98992919921875, |
|
"logps/pi_response": -172.89300537109375, |
|
"logps/ref_response": -172.89300537109375, |
|
"logps/rejected": -664.0586547851562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 119.81663055217236, |
|
"learning_rate": 8.333333333333334e-08, |
|
"logits/chosen": -1.9912662506103516, |
|
"logits/rejected": -1.9100477695465088, |
|
"logps/chosen": -339.4380187988281, |
|
"logps/pi_response": -145.0324249267578, |
|
"logps/ref_response": -144.99766540527344, |
|
"logps/rejected": -690.2503051757812, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -0.005152760073542595, |
|
"rewards/margins": 0.007620910182595253, |
|
"rewards/rejected": -0.012773669324815273, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 61.89513911795292, |
|
"learning_rate": 9.860114570402053e-08, |
|
"logits/chosen": -1.942497968673706, |
|
"logits/rejected": -1.850992202758789, |
|
"logps/chosen": -344.79351806640625, |
|
"logps/pi_response": -157.76609802246094, |
|
"logps/ref_response": -152.63552856445312, |
|
"logps/rejected": -725.7340698242188, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.1949012279510498, |
|
"rewards/margins": 0.2145640105009079, |
|
"rewards/rejected": -0.4094652235507965, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 41.75290331160563, |
|
"learning_rate": 9.305218058836777e-08, |
|
"logits/chosen": -1.8073937892913818, |
|
"logits/rejected": -1.748516321182251, |
|
"logps/chosen": -373.4971923828125, |
|
"logps/pi_response": -154.7528839111328, |
|
"logps/ref_response": -136.81463623046875, |
|
"logps/rejected": -826.9029541015625, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.5469005703926086, |
|
"rewards/margins": 0.8107492327690125, |
|
"rewards/rejected": -1.357649803161621, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 60.21218903804784, |
|
"learning_rate": 8.374915007591053e-08, |
|
"logits/chosen": -1.7726547718048096, |
|
"logits/rejected": -1.6935310363769531, |
|
"logps/chosen": -415.587890625, |
|
"logps/pi_response": -174.06515502929688, |
|
"logps/ref_response": -150.7990264892578, |
|
"logps/rejected": -888.4119262695312, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7993820905685425, |
|
"rewards/margins": 1.1050827503204346, |
|
"rewards/rejected": -1.9044649600982666, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 60.309635521609, |
|
"learning_rate": 7.150326011382603e-08, |
|
"logits/chosen": -1.7549054622650146, |
|
"logits/rejected": -1.6928848028182983, |
|
"logps/chosen": -417.91644287109375, |
|
"logps/pi_response": -171.1042938232422, |
|
"logps/ref_response": -149.61317443847656, |
|
"logps/rejected": -928.3387451171875, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.7739541530609131, |
|
"rewards/margins": 1.2372756004333496, |
|
"rewards/rejected": -2.0112297534942627, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.00418410041841, |
|
"grad_norm": 43.55054669917714, |
|
"learning_rate": 5.738232820012406e-08, |
|
"logits/chosen": -1.7201545238494873, |
|
"logits/rejected": -1.660457968711853, |
|
"logps/chosen": -363.9744873046875, |
|
"logps/pi_response": -158.1030731201172, |
|
"logps/ref_response": -138.2023468017578, |
|
"logps/rejected": -817.6099853515625, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6088830232620239, |
|
"rewards/margins": 1.028526782989502, |
|
"rewards/rejected": -1.6374098062515259, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1715481171548117, |
|
"grad_norm": 39.95163711230341, |
|
"learning_rate": 4.2617671799875946e-08, |
|
"logits/chosen": -1.768969178199768, |
|
"logits/rejected": -1.6774520874023438, |
|
"logps/chosen": -409.07965087890625, |
|
"logps/pi_response": -178.50694274902344, |
|
"logps/ref_response": -157.27854919433594, |
|
"logps/rejected": -837.8878784179688, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6469767689704895, |
|
"rewards/margins": 1.0017244815826416, |
|
"rewards/rejected": -1.6487010717391968, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3389121338912133, |
|
"grad_norm": 54.62109419599635, |
|
"learning_rate": 2.8496739886173992e-08, |
|
"logits/chosen": -1.7565717697143555, |
|
"logits/rejected": -1.6744270324707031, |
|
"logps/chosen": -414.45989990234375, |
|
"logps/pi_response": -171.05670166015625, |
|
"logps/ref_response": -150.3345947265625, |
|
"logps/rejected": -869.4351806640625, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.6550418138504028, |
|
"rewards/margins": 1.0387578010559082, |
|
"rewards/rejected": -1.693799614906311, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.506276150627615, |
|
"grad_norm": 47.344285151326375, |
|
"learning_rate": 1.6250849924089483e-08, |
|
"logits/chosen": -1.7577073574066162, |
|
"logits/rejected": -1.6519912481307983, |
|
"logps/chosen": -414.08984375, |
|
"logps/pi_response": -160.68191528320312, |
|
"logps/ref_response": -142.70220947265625, |
|
"logps/rejected": -822.3709716796875, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6929503083229065, |
|
"rewards/margins": 0.93775475025177, |
|
"rewards/rejected": -1.6307048797607422, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6736401673640167, |
|
"grad_norm": 54.4733158152604, |
|
"learning_rate": 6.947819411632222e-09, |
|
"logits/chosen": -1.8023641109466553, |
|
"logits/rejected": -1.725804328918457, |
|
"logps/chosen": -395.9789733886719, |
|
"logps/pi_response": -165.1318817138672, |
|
"logps/ref_response": -145.55709838867188, |
|
"logps/rejected": -855.1980590820312, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.6401264071464539, |
|
"rewards/margins": 1.0448520183563232, |
|
"rewards/rejected": -1.6849782466888428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8410041841004183, |
|
"grad_norm": 38.619103871977664, |
|
"learning_rate": 1.3988542959794625e-09, |
|
"logits/chosen": -1.7393662929534912, |
|
"logits/rejected": -1.651567816734314, |
|
"logps/chosen": -383.98895263671875, |
|
"logps/pi_response": -170.61387634277344, |
|
"logps/ref_response": -149.04595947265625, |
|
"logps/rejected": -837.4610595703125, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -0.6302980780601501, |
|
"rewards/margins": 1.0058103799819946, |
|
"rewards/rejected": -1.6361083984375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.9748953974895398, |
|
"step": 118, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5197100033194332, |
|
"train_runtime": 5205.1722, |
|
"train_samples_per_second": 5.872, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 118, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|