ShenaoZ's picture
Model save
0930379 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9905956112852664,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012539184952978056,
"grad_norm": 25.884748038600286,
"learning_rate": 1.25e-08,
"logits/chosen": -2.424976348876953,
"logits/rejected": -2.3868491649627686,
"logps/chosen": -228.66397094726562,
"logps/pi_response": -113.87369537353516,
"logps/ref_response": -113.87369537353516,
"logps/rejected": -285.56085205078125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.12539184952978055,
"grad_norm": 25.848991474100774,
"learning_rate": 9.980434110374723e-08,
"logits/chosen": -2.7124671936035156,
"logits/rejected": -2.6734619140625,
"logps/chosen": -251.80067443847656,
"logps/pi_response": -156.5972137451172,
"logps/ref_response": -156.57151794433594,
"logps/rejected": -326.256591796875,
"loss": 0.6921,
"rewards/accuracies": 0.4826388955116272,
"rewards/chosen": -0.0016295942477881908,
"rewards/margins": 0.0013898547040298581,
"rewards/rejected": -0.003019448835402727,
"step": 10
},
{
"epoch": 0.2507836990595611,
"grad_norm": 22.558182471724844,
"learning_rate": 9.311572862600138e-08,
"logits/chosen": -2.6639418601989746,
"logits/rejected": -2.602161407470703,
"logps/chosen": -226.8975372314453,
"logps/pi_response": -134.72640991210938,
"logps/ref_response": -135.18942260742188,
"logps/rejected": -318.83660888671875,
"loss": 0.6719,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.02571864053606987,
"rewards/margins": 0.04237198084592819,
"rewards/rejected": -0.06809062510728836,
"step": 20
},
{
"epoch": 0.3761755485893417,
"grad_norm": 17.791263070111036,
"learning_rate": 7.812246438203904e-08,
"logits/chosen": -2.653294324874878,
"logits/rejected": -2.608656644821167,
"logps/chosen": -226.16818237304688,
"logps/pi_response": -130.77206420898438,
"logps/ref_response": -131.80601501464844,
"logps/rejected": -322.6428527832031,
"loss": 0.6378,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.08973001688718796,
"rewards/margins": 0.11942292749881744,
"rewards/rejected": -0.2091529369354248,
"step": 30
},
{
"epoch": 0.5015673981191222,
"grad_norm": 20.306933168458535,
"learning_rate": 5.771244664826511e-08,
"logits/chosen": -2.6390373706817627,
"logits/rejected": -2.5795812606811523,
"logps/chosen": -220.55575561523438,
"logps/pi_response": -118.67034912109375,
"logps/ref_response": -121.30364990234375,
"logps/rejected": -330.9592590332031,
"loss": 0.5922,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.16215774416923523,
"rewards/margins": 0.24141518771648407,
"rewards/rejected": -0.4035729467868805,
"step": 40
},
{
"epoch": 0.6269592476489029,
"grad_norm": 16.484276612085434,
"learning_rate": 3.581691108328516e-08,
"logits/chosen": -2.6456971168518066,
"logits/rejected": -2.6257271766662598,
"logps/chosen": -243.11074829101562,
"logps/pi_response": -122.02351379394531,
"logps/ref_response": -124.61746978759766,
"logps/rejected": -375.1612548828125,
"loss": 0.5832,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.21979229152202606,
"rewards/margins": 0.3476864695549011,
"rewards/rejected": -0.5674788355827332,
"step": 50
},
{
"epoch": 0.7523510971786834,
"grad_norm": 19.533388743817394,
"learning_rate": 1.665322345816746e-08,
"logits/chosen": -2.653810977935791,
"logits/rejected": -2.6256422996520996,
"logps/chosen": -226.85940551757812,
"logps/pi_response": -133.93075561523438,
"logps/ref_response": -135.44107055664062,
"logps/rejected": -382.71490478515625,
"loss": 0.5812,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.20707741379737854,
"rewards/margins": 0.4098052382469177,
"rewards/rejected": -0.6168826818466187,
"step": 60
},
{
"epoch": 0.877742946708464,
"grad_norm": 17.073704712019914,
"learning_rate": 3.912559994556086e-09,
"logits/chosen": -2.70702862739563,
"logits/rejected": -2.655327558517456,
"logps/chosen": -266.7652587890625,
"logps/pi_response": -142.82772827148438,
"logps/ref_response": -145.401611328125,
"logps/rejected": -397.0142822265625,
"loss": 0.5739,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.22994092106819153,
"rewards/margins": 0.41121983528137207,
"rewards/rejected": -0.6411608457565308,
"step": 70
},
{
"epoch": 0.9905956112852664,
"step": 79,
"total_flos": 0.0,
"train_loss": 0.6134395901160904,
"train_runtime": 3509.1149,
"train_samples_per_second": 5.807,
"train_steps_per_second": 0.023
}
],
"logging_steps": 10,
"max_steps": 79,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}