|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998451213216314, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.9648265838623047, |
|
"logits/rejected": -2.9711227416992188, |
|
"logps/chosen": -256.0919494628906, |
|
"logps/rejected": -234.60708618164062, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -3.049875497817993, |
|
"logits/rejected": -3.0188238620758057, |
|
"logps/chosen": -276.6912536621094, |
|
"logps/rejected": -202.39605712890625, |
|
"loss": 1.0001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0005764114903286099, |
|
"rewards/margins": -0.006484686397016048, |
|
"rewards/rejected": 0.007061097305268049, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.988577127456665, |
|
"logits/rejected": -2.9995627403259277, |
|
"logps/chosen": -312.2018127441406, |
|
"logps/rejected": -246.76266479492188, |
|
"loss": 1.0026, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.003506724489852786, |
|
"rewards/margins": -0.0012849611230194569, |
|
"rewards/rejected": 0.004791685380041599, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -3.063732624053955, |
|
"logits/rejected": -3.0357906818389893, |
|
"logps/chosen": -260.15679931640625, |
|
"logps/rejected": -224.3686065673828, |
|
"loss": 0.9974, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0021156296133995056, |
|
"rewards/margins": 0.0043937130831182, |
|
"rewards/rejected": -0.0022780844010412693, |
|
|