ShenaoZhang's picture
Model save
4b96ba4 verified
raw
history blame
5.19 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9905956112852664,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 6.25e-08,
"logits/chosen": -2.457242012023926,
"logits/rejected": -2.4024434089660645,
"logps/chosen": -202.63397216796875,
"logps/pi_response": -193.8072509765625,
"logps/ref_response": -193.8072509765625,
"logps/rejected": -294.0563659667969,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.13,
"learning_rate": 4.990217055187362e-07,
"logits/chosen": -2.7324564456939697,
"logits/rejected": -2.677666425704956,
"logps/chosen": -255.35565185546875,
"logps/pi_response": -165.3203125,
"logps/ref_response": -165.90737915039062,
"logps/rejected": -332.898193359375,
"loss": 0.6729,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.024918710812926292,
"rewards/margins": 0.045975301414728165,
"rewards/rejected": -0.07089401036500931,
"step": 10
},
{
"epoch": 0.25,
"learning_rate": 4.655786431300069e-07,
"logits/chosen": -2.6490871906280518,
"logits/rejected": -2.573331832885742,
"logps/chosen": -296.2843933105469,
"logps/pi_response": -163.0939483642578,
"logps/ref_response": -150.6802215576172,
"logps/rejected": -442.4339904785156,
"loss": 0.5922,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.6179492473602295,
"rewards/margins": 0.6742614507675171,
"rewards/rejected": -1.2922108173370361,
"step": 20
},
{
"epoch": 0.38,
"learning_rate": 3.9061232191019517e-07,
"logits/chosen": -2.638205051422119,
"logits/rejected": -2.593559741973877,
"logps/chosen": -291.2914733886719,
"logps/pi_response": -157.58370971679688,
"logps/ref_response": -143.96243286132812,
"logps/rejected": -447.7432556152344,
"loss": 0.5521,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.6356722116470337,
"rewards/margins": 0.8006154894828796,
"rewards/rejected": -1.4362876415252686,
"step": 30
},
{
"epoch": 0.5,
"learning_rate": 2.8856223324132555e-07,
"logits/chosen": -2.6024458408355713,
"logits/rejected": -2.533780336380005,
"logps/chosen": -275.9115905761719,
"logps/pi_response": -154.2270965576172,
"logps/ref_response": -132.5021514892578,
"logps/rejected": -433.9896545410156,
"loss": 0.5214,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.5712152123451233,
"rewards/margins": 0.7623130679130554,
"rewards/rejected": -1.3335282802581787,
"step": 40
},
{
"epoch": 0.63,
"learning_rate": 1.7908455541642582e-07,
"logits/chosen": -2.5458426475524902,
"logits/rejected": -2.504864454269409,
"logps/chosen": -285.959716796875,
"logps/pi_response": -164.4518280029297,
"logps/ref_response": -142.59683227539062,
"logps/rejected": -467.680419921875,
"loss": 0.5024,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.5851860046386719,
"rewards/margins": 0.8030134439468384,
"rewards/rejected": -1.3881994485855103,
"step": 50
},
{
"epoch": 0.75,
"learning_rate": 8.32661172908373e-08,
"logits/chosen": -2.49611234664917,
"logits/rejected": -2.4642136096954346,
"logps/chosen": -285.41571044921875,
"logps/pi_response": -171.76272583007812,
"logps/ref_response": -150.0667266845703,
"logps/rejected": -459.05035400390625,
"loss": 0.4991,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.6012312173843384,
"rewards/margins": 0.7958024144172668,
"rewards/rejected": -1.39703369140625,
"step": 60
},
{
"epoch": 0.88,
"learning_rate": 1.956279997278043e-08,
"logits/chosen": -2.526583671569824,
"logits/rejected": -2.4693140983581543,
"logps/chosen": -286.43743896484375,
"logps/pi_response": -172.4198760986328,
"logps/ref_response": -153.6392822265625,
"logps/rejected": -480.53826904296875,
"loss": 0.4835,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.5305203199386597,
"rewards/margins": 0.9574079513549805,
"rewards/rejected": -1.4879281520843506,
"step": 70
},
{
"epoch": 0.99,
"step": 79,
"total_flos": 0.0,
"train_loss": 0.536045919490766,
"train_runtime": 4637.5848,
"train_samples_per_second": 4.394,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 79,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}