|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012539184952978056, |
|
"grad_norm": 25.884748038600286, |
|
"learning_rate": 1.25e-08, |
|
"logits/chosen": -2.424976348876953, |
|
"logits/rejected": -2.3868491649627686, |
|
"logps/chosen": -228.66397094726562, |
|
"logps/pi_response": -113.87369537353516, |
|
"logps/ref_response": -113.87369537353516, |
|
"logps/rejected": -285.56085205078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 25.848991474100774, |
|
"learning_rate": 9.980434110374723e-08, |
|
"logits/chosen": -2.7124671936035156, |
|
"logits/rejected": -2.6734619140625, |
|
"logps/chosen": -251.80067443847656, |
|
"logps/pi_response": -156.5972137451172, |
|
"logps/ref_response": -156.57151794433594, |
|
"logps/rejected": -326.256591796875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.4826388955116272, |
|
"rewards/chosen": -0.0016295942477881908, |
|
"rewards/margins": 0.0013898547040298581, |
|
"rewards/rejected": -0.003019448835402727, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 22.558182471724844, |
|
"learning_rate": 9.311572862600138e-08, |
|
"logits/chosen": -2.6639418601989746, |
|
"logits/rejected": -2.602161407470703, |
|
"logps/chosen": -226.8975372314453, |
|
"logps/pi_response": -134.72640991210938, |
|
"logps/ref_response": -135.18942260742188, |
|
"logps/rejected": -318.83660888671875, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.02571864053606987, |
|
"rewards/margins": 0.04237198084592819, |
|
"rewards/rejected": -0.06809062510728836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 17.791263070111036, |
|
"learning_rate": 7.812246438203904e-08, |
|
"logits/chosen": -2.653294324874878, |
|
"logits/rejected": -2.608656644821167, |
|
"logps/chosen": -226.16818237304688, |
|
"logps/pi_response": -130.77206420898438, |
|
"logps/ref_response": -131.80601501464844, |
|
"logps/rejected": -322.6428527832031, |
|
"loss": 0.6378, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.08973001688718796, |
|
"rewards/margins": 0.11942292749881744, |
|
"rewards/rejected": -0.2091529369354248, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 20.306933168458535, |
|
"learning_rate": 5.771244664826511e-08, |
|
"logits/chosen": -2.6390373706817627, |
|
"logits/rejected": -2.5795812606811523, |
|
"logps/chosen": -220.55575561523438, |
|
"logps/pi_response": -118.67034912109375, |
|
"logps/ref_response": -121.30364990234375, |
|
"logps/rejected": -330.9592590332031, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.16215774416923523, |
|
"rewards/margins": 0.24141518771648407, |
|
"rewards/rejected": -0.4035729467868805, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 16.484276612085434, |
|
"learning_rate": 3.581691108328516e-08, |
|
"logits/chosen": -2.6456971168518066, |
|
"logits/rejected": -2.6257271766662598, |
|
"logps/chosen": -243.11074829101562, |
|
"logps/pi_response": -122.02351379394531, |
|
"logps/ref_response": -124.61746978759766, |
|
"logps/rejected": -375.1612548828125, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.21979229152202606, |
|
"rewards/margins": 0.3476864695549011, |
|
"rewards/rejected": -0.5674788355827332, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 19.533388743817394, |
|
"learning_rate": 1.665322345816746e-08, |
|
"logits/chosen": -2.653810977935791, |
|
"logits/rejected": -2.6256422996520996, |
|
"logps/chosen": -226.85940551757812, |
|
"logps/pi_response": -133.93075561523438, |
|
"logps/ref_response": -135.44107055664062, |
|
"logps/rejected": -382.71490478515625, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.20707741379737854, |
|
"rewards/margins": 0.4098052382469177, |
|
"rewards/rejected": -0.6168826818466187, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 17.073704712019914, |
|
"learning_rate": 3.912559994556086e-09, |
|
"logits/chosen": -2.70702862739563, |
|
"logits/rejected": -2.655327558517456, |
|
"logps/chosen": -266.7652587890625, |
|
"logps/pi_response": -142.82772827148438, |
|
"logps/ref_response": -145.401611328125, |
|
"logps/rejected": -397.0142822265625, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.22994092106819153, |
|
"rewards/margins": 0.41121983528137207, |
|
"rewards/rejected": -0.6411608457565308, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9905956112852664, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6134395901160904, |
|
"train_runtime": 3509.1149, |
|
"train_samples_per_second": 5.807, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|