|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012539184952978056, |
|
"grad_norm": 9.590601332216856, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.9077322483062744, |
|
"logits/rejected": -2.8318910598754883, |
|
"logps/chosen": -351.8885498046875, |
|
"logps/pi_response": -76.32845306396484, |
|
"logps/ref_response": -76.32845306396484, |
|
"logps/rejected": -169.29762268066406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 8.512341387705257, |
|
"learning_rate": 4.990217055187362e-07, |
|
"logits/chosen": -2.7858364582061768, |
|
"logits/rejected": -2.76261830329895, |
|
"logps/chosen": -234.40164184570312, |
|
"logps/pi_response": -70.39525604248047, |
|
"logps/ref_response": -70.02328491210938, |
|
"logps/rejected": -168.20977783203125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5243055820465088, |
|
"rewards/chosen": 0.004335531033575535, |
|
"rewards/margins": 0.003715350292623043, |
|
"rewards/rejected": 0.0006201809737831354, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 6.895934800390202, |
|
"learning_rate": 4.655786431300069e-07, |
|
"logits/chosen": -2.7349462509155273, |
|
"logits/rejected": -2.684163808822632, |
|
"logps/chosen": -247.40725708007812, |
|
"logps/pi_response": -78.29987335205078, |
|
"logps/ref_response": -67.40553283691406, |
|
"logps/rejected": -172.24024963378906, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.030382925644516945, |
|
"rewards/margins": 0.08496104925870895, |
|
"rewards/rejected": -0.05457812547683716, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 6.900853019546463, |
|
"learning_rate": 3.9061232191019517e-07, |
|
"logits/chosen": -2.6347103118896484, |
|
"logits/rejected": -2.601311445236206, |
|
"logps/chosen": -236.86190795898438, |
|
"logps/pi_response": -107.38741302490234, |
|
"logps/ref_response": -65.888427734375, |
|
"logps/rejected": -191.14120483398438, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.13394343852996826, |
|
"rewards/margins": 0.19235941767692566, |
|
"rewards/rejected": -0.3263028860092163, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 8.796140542948901, |
|
"learning_rate": 2.8856223324132555e-07, |
|
"logits/chosen": -2.6297316551208496, |
|
"logits/rejected": -2.607571601867676, |
|
"logps/chosen": -259.8023376464844, |
|
"logps/pi_response": -145.96922302246094, |
|
"logps/ref_response": -70.97199249267578, |
|
"logps/rejected": -232.7877960205078, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.27046337723731995, |
|
"rewards/margins": 0.3656841814517975, |
|
"rewards/rejected": -0.636147677898407, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 10.158446144448595, |
|
"learning_rate": 1.7908455541642582e-07, |
|
"logits/chosen": -2.61785626411438, |
|
"logits/rejected": -2.5837466716766357, |
|
"logps/chosen": -293.04083251953125, |
|
"logps/pi_response": -169.3504638671875, |
|
"logps/ref_response": -69.12784576416016, |
|
"logps/rejected": -261.39727783203125, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4291558265686035, |
|
"rewards/margins": 0.4984737038612366, |
|
"rewards/rejected": -0.9276294708251953, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 10.385063519743122, |
|
"learning_rate": 8.32661172908373e-08, |
|
"logits/chosen": -2.6125714778900146, |
|
"logits/rejected": -2.5770516395568848, |
|
"logps/chosen": -265.41082763671875, |
|
"logps/pi_response": -177.92941284179688, |
|
"logps/ref_response": -62.94016647338867, |
|
"logps/rejected": -278.24334716796875, |
|
"loss": 0.5098, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.5334924459457397, |
|
"rewards/margins": 0.6120153069496155, |
|
"rewards/rejected": -1.1455078125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 11.401303293194552, |
|
"learning_rate": 1.956279997278043e-08, |
|
"logits/chosen": -2.6095032691955566, |
|
"logits/rejected": -2.572521686553955, |
|
"logps/chosen": -314.3882141113281, |
|
"logps/pi_response": -201.73458862304688, |
|
"logps/ref_response": -70.71024322509766, |
|
"logps/rejected": -292.8869323730469, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6195091605186462, |
|
"rewards/margins": 0.6745454668998718, |
|
"rewards/rejected": -1.2940547466278076, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9905956112852664, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5737052627756626, |
|
"train_runtime": 3559.2235, |
|
"train_samples_per_second": 5.725, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|