ShenaoZ's picture
Model save
b53e859 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9905956112852664,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012539184952978056,
"grad_norm": 9.590601332216856,
"learning_rate": 6.25e-08,
"logits/chosen": -2.9077322483062744,
"logits/rejected": -2.8318910598754883,
"logps/chosen": -351.8885498046875,
"logps/pi_response": -76.32845306396484,
"logps/ref_response": -76.32845306396484,
"logps/rejected": -169.29762268066406,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.12539184952978055,
"grad_norm": 8.512341387705257,
"learning_rate": 4.990217055187362e-07,
"logits/chosen": -2.7858364582061768,
"logits/rejected": -2.76261830329895,
"logps/chosen": -234.40164184570312,
"logps/pi_response": -70.39525604248047,
"logps/ref_response": -70.02328491210938,
"logps/rejected": -168.20977783203125,
"loss": 0.6904,
"rewards/accuracies": 0.5243055820465088,
"rewards/chosen": 0.004335531033575535,
"rewards/margins": 0.003715350292623043,
"rewards/rejected": 0.0006201809737831354,
"step": 10
},
{
"epoch": 0.2507836990595611,
"grad_norm": 6.895934800390202,
"learning_rate": 4.655786431300069e-07,
"logits/chosen": -2.7349462509155273,
"logits/rejected": -2.684163808822632,
"logps/chosen": -247.40725708007812,
"logps/pi_response": -78.29987335205078,
"logps/ref_response": -67.40553283691406,
"logps/rejected": -172.24024963378906,
"loss": 0.6584,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.030382925644516945,
"rewards/margins": 0.08496104925870895,
"rewards/rejected": -0.05457812547683716,
"step": 20
},
{
"epoch": 0.3761755485893417,
"grad_norm": 6.900853019546463,
"learning_rate": 3.9061232191019517e-07,
"logits/chosen": -2.6347103118896484,
"logits/rejected": -2.601311445236206,
"logps/chosen": -236.86190795898438,
"logps/pi_response": -107.38741302490234,
"logps/ref_response": -65.888427734375,
"logps/rejected": -191.14120483398438,
"loss": 0.6186,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.13394343852996826,
"rewards/margins": 0.19235941767692566,
"rewards/rejected": -0.3263028860092163,
"step": 30
},
{
"epoch": 0.5015673981191222,
"grad_norm": 8.796140542948901,
"learning_rate": 2.8856223324132555e-07,
"logits/chosen": -2.6297316551208496,
"logits/rejected": -2.607571601867676,
"logps/chosen": -259.8023376464844,
"logps/pi_response": -145.96922302246094,
"logps/ref_response": -70.97199249267578,
"logps/rejected": -232.7877960205078,
"loss": 0.5655,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.27046337723731995,
"rewards/margins": 0.3656841814517975,
"rewards/rejected": -0.636147677898407,
"step": 40
},
{
"epoch": 0.6269592476489029,
"grad_norm": 10.158446144448595,
"learning_rate": 1.7908455541642582e-07,
"logits/chosen": -2.61785626411438,
"logits/rejected": -2.5837466716766357,
"logps/chosen": -293.04083251953125,
"logps/pi_response": -169.3504638671875,
"logps/ref_response": -69.12784576416016,
"logps/rejected": -261.39727783203125,
"loss": 0.533,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.4291558265686035,
"rewards/margins": 0.4984737038612366,
"rewards/rejected": -0.9276294708251953,
"step": 50
},
{
"epoch": 0.7523510971786834,
"grad_norm": 10.385063519743122,
"learning_rate": 8.32661172908373e-08,
"logits/chosen": -2.6125714778900146,
"logits/rejected": -2.5770516395568848,
"logps/chosen": -265.41082763671875,
"logps/pi_response": -177.92941284179688,
"logps/ref_response": -62.94016647338867,
"logps/rejected": -278.24334716796875,
"loss": 0.5098,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.5334924459457397,
"rewards/margins": 0.6120153069496155,
"rewards/rejected": -1.1455078125,
"step": 60
},
{
"epoch": 0.877742946708464,
"grad_norm": 11.401303293194552,
"learning_rate": 1.956279997278043e-08,
"logits/chosen": -2.6095032691955566,
"logits/rejected": -2.572521686553955,
"logps/chosen": -314.3882141113281,
"logps/pi_response": -201.73458862304688,
"logps/ref_response": -70.71024322509766,
"logps/rejected": -292.8869323730469,
"loss": 0.4973,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.6195091605186462,
"rewards/margins": 0.6745454668998718,
"rewards/rejected": -1.2940547466278076,
"step": 70
},
{
"epoch": 0.9905956112852664,
"step": 79,
"total_flos": 0.0,
"train_loss": 0.5737052627756626,
"train_runtime": 3559.2235,
"train_samples_per_second": 5.725,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 79,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}