ShenaoZ's picture
Model save
055e346 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9748953974895398,
"eval_steps": 500,
"global_step": 118,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016736401673640166,
"grad_norm": 254.03603103806802,
"learning_rate": 8.333333333333332e-09,
"logits/chosen": 0.40769851207733154,
"logits/rejected": 0.6983045935630798,
"logps/chosen": -597.6331176757812,
"logps/pi_response": -454.7916259765625,
"logps/ref_response": -454.7916259765625,
"logps/rejected": -933.78369140625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.16736401673640167,
"grad_norm": 223.89448476390103,
"learning_rate": 8.333333333333334e-08,
"logits/chosen": 0.3617916703224182,
"logits/rejected": 0.8798990249633789,
"logps/chosen": -520.3926391601562,
"logps/pi_response": -373.90179443359375,
"logps/ref_response": -369.4568176269531,
"logps/rejected": -942.7015991210938,
"loss": 0.6814,
"rewards/accuracies": 0.5277777910232544,
"rewards/chosen": -0.003062439849600196,
"rewards/margins": 0.031748898327350616,
"rewards/rejected": -0.034811343997716904,
"step": 10
},
{
"epoch": 0.33472803347280333,
"grad_norm": 275.758299798461,
"learning_rate": 9.860114570402053e-08,
"logits/chosen": 0.41715487837791443,
"logits/rejected": 0.8884197473526001,
"logps/chosen": -551.1149291992188,
"logps/pi_response": -498.0843200683594,
"logps/ref_response": -378.30291748046875,
"logps/rejected": -1005.5443115234375,
"loss": 0.5604,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.09888347238302231,
"rewards/margins": 1.0031945705413818,
"rewards/rejected": -1.1020780801773071,
"step": 20
},
{
"epoch": 0.502092050209205,
"grad_norm": 151.4646821692468,
"learning_rate": 9.305218058836777e-08,
"logits/chosen": 0.5644534826278687,
"logits/rejected": 0.9862662553787231,
"logps/chosen": -576.3702392578125,
"logps/pi_response": -784.0135498046875,
"logps/ref_response": -373.52685546875,
"logps/rejected": -1288.76220703125,
"loss": 0.4456,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.5222317576408386,
"rewards/margins": 3.5375170707702637,
"rewards/rejected": -4.059748649597168,
"step": 30
},
{
"epoch": 0.6694560669456067,
"grad_norm": 99.31936965278155,
"learning_rate": 8.374915007591053e-08,
"logits/chosen": 0.6754584312438965,
"logits/rejected": 1.3761770725250244,
"logps/chosen": -637.95263671875,
"logps/pi_response": -662.8466796875,
"logps/ref_response": -352.9124450683594,
"logps/rejected": -1316.7098388671875,
"loss": 0.3955,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.1482101678848267,
"rewards/margins": 2.7251365184783936,
"rewards/rejected": -3.8733463287353516,
"step": 40
},
{
"epoch": 0.8368200836820083,
"grad_norm": 81.4813373754742,
"learning_rate": 7.150326011382603e-08,
"logits/chosen": 0.8304530382156372,
"logits/rejected": 1.3797433376312256,
"logps/chosen": -703.5577392578125,
"logps/pi_response": -740.5267944335938,
"logps/ref_response": -367.9375305175781,
"logps/rejected": -1414.14453125,
"loss": 0.389,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -1.441198468208313,
"rewards/margins": 3.4418697357177734,
"rewards/rejected": -4.883068084716797,
"step": 50
},
{
"epoch": 1.00418410041841,
"grad_norm": 68.59895920394845,
"learning_rate": 5.738232820012406e-08,
"logits/chosen": 0.8587929606437683,
"logits/rejected": 1.2634632587432861,
"logps/chosen": -588.1586303710938,
"logps/pi_response": -831.5247192382812,
"logps/ref_response": -366.019287109375,
"logps/rejected": -1391.0379638671875,
"loss": 0.3563,
"rewards/accuracies": 0.84375,
"rewards/chosen": -1.0079401731491089,
"rewards/margins": 4.322751045227051,
"rewards/rejected": -5.330691337585449,
"step": 60
},
{
"epoch": 1.1715481171548117,
"grad_norm": 78.3617679229796,
"learning_rate": 4.2617671799875946e-08,
"logits/chosen": 0.7778602838516235,
"logits/rejected": 1.3484394550323486,
"logps/chosen": -664.4850463867188,
"logps/pi_response": -791.7720947265625,
"logps/ref_response": -386.8902893066406,
"logps/rejected": -1374.6849365234375,
"loss": 0.3516,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -1.1506381034851074,
"rewards/margins": 3.6012942790985107,
"rewards/rejected": -4.751932621002197,
"step": 70
},
{
"epoch": 1.3389121338912133,
"grad_norm": 54.85306379624032,
"learning_rate": 2.8496739886173992e-08,
"logits/chosen": 0.8767589330673218,
"logits/rejected": 1.290276288986206,
"logps/chosen": -640.9560546875,
"logps/pi_response": -780.9495239257812,
"logps/ref_response": -370.62164306640625,
"logps/rejected": -1382.840576171875,
"loss": 0.3358,
"rewards/accuracies": 0.875,
"rewards/chosen": -1.0898466110229492,
"rewards/margins": 3.9194438457489014,
"rewards/rejected": -5.0092902183532715,
"step": 80
},
{
"epoch": 1.506276150627615,
"grad_norm": 88.99295644400407,
"learning_rate": 1.6250849924089483e-08,
"logits/chosen": 0.710281491279602,
"logits/rejected": 1.3649709224700928,
"logps/chosen": -608.7789306640625,
"logps/pi_response": -748.4505615234375,
"logps/ref_response": -349.5060729980469,
"logps/rejected": -1372.438232421875,
"loss": 0.3777,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -1.0407220125198364,
"rewards/margins": 3.8326003551483154,
"rewards/rejected": -4.873322486877441,
"step": 90
},
{
"epoch": 1.6736401673640167,
"grad_norm": 63.93568226735586,
"learning_rate": 6.947819411632222e-09,
"logits/chosen": 0.664978563785553,
"logits/rejected": 1.2786552906036377,
"logps/chosen": -617.9248657226562,
"logps/pi_response": -712.550048828125,
"logps/ref_response": -370.06866455078125,
"logps/rejected": -1386.747314453125,
"loss": 0.3394,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -1.01349937915802,
"rewards/margins": 3.325646162033081,
"rewards/rejected": -4.339145183563232,
"step": 100
},
{
"epoch": 1.8410041841004183,
"grad_norm": 48.91170773014705,
"learning_rate": 1.3988542959794625e-09,
"logits/chosen": 0.6376602649688721,
"logits/rejected": 1.2136269807815552,
"logps/chosen": -611.235595703125,
"logps/pi_response": -798.3106689453125,
"logps/ref_response": -364.35662841796875,
"logps/rejected": -1384.1715087890625,
"loss": 0.3528,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -1.1132670640945435,
"rewards/margins": 3.8225269317626953,
"rewards/rejected": -4.935793876647949,
"step": 110
},
{
"epoch": 1.9748953974895398,
"step": 118,
"total_flos": 0.0,
"train_loss": 0.4142875792616505,
"train_runtime": 5502.1117,
"train_samples_per_second": 5.555,
"train_steps_per_second": 0.021
}
],
"logging_steps": 10,
"max_steps": 118,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}