|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9748953974895398, |
|
"eval_steps": 500, |
|
"global_step": 118, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 254.03603103806802, |
|
"learning_rate": 8.333333333333332e-09, |
|
"logits/chosen": 0.40769851207733154, |
|
"logits/rejected": 0.6983045935630798, |
|
"logps/chosen": -597.6331176757812, |
|
"logps/pi_response": -454.7916259765625, |
|
"logps/ref_response": -454.7916259765625, |
|
"logps/rejected": -933.78369140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 223.89448476390103, |
|
"learning_rate": 8.333333333333334e-08, |
|
"logits/chosen": 0.3617916703224182, |
|
"logits/rejected": 0.8798990249633789, |
|
"logps/chosen": -520.3926391601562, |
|
"logps/pi_response": -373.90179443359375, |
|
"logps/ref_response": -369.4568176269531, |
|
"logps/rejected": -942.7015991210938, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": -0.003062439849600196, |
|
"rewards/margins": 0.031748898327350616, |
|
"rewards/rejected": -0.034811343997716904, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 275.758299798461, |
|
"learning_rate": 9.860114570402053e-08, |
|
"logits/chosen": 0.41715487837791443, |
|
"logits/rejected": 0.8884197473526001, |
|
"logps/chosen": -551.1149291992188, |
|
"logps/pi_response": -498.0843200683594, |
|
"logps/ref_response": -378.30291748046875, |
|
"logps/rejected": -1005.5443115234375, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09888347238302231, |
|
"rewards/margins": 1.0031945705413818, |
|
"rewards/rejected": -1.1020780801773071, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 151.4646821692468, |
|
"learning_rate": 9.305218058836777e-08, |
|
"logits/chosen": 0.5644534826278687, |
|
"logits/rejected": 0.9862662553787231, |
|
"logps/chosen": -576.3702392578125, |
|
"logps/pi_response": -784.0135498046875, |
|
"logps/ref_response": -373.52685546875, |
|
"logps/rejected": -1288.76220703125, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5222317576408386, |
|
"rewards/margins": 3.5375170707702637, |
|
"rewards/rejected": -4.059748649597168, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 99.31936965278155, |
|
"learning_rate": 8.374915007591053e-08, |
|
"logits/chosen": 0.6754584312438965, |
|
"logits/rejected": 1.3761770725250244, |
|
"logps/chosen": -637.95263671875, |
|
"logps/pi_response": -662.8466796875, |
|
"logps/ref_response": -352.9124450683594, |
|
"logps/rejected": -1316.7098388671875, |
|
"loss": 0.3955, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1482101678848267, |
|
"rewards/margins": 2.7251365184783936, |
|
"rewards/rejected": -3.8733463287353516, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 81.4813373754742, |
|
"learning_rate": 7.150326011382603e-08, |
|
"logits/chosen": 0.8304530382156372, |
|
"logits/rejected": 1.3797433376312256, |
|
"logps/chosen": -703.5577392578125, |
|
"logps/pi_response": -740.5267944335938, |
|
"logps/ref_response": -367.9375305175781, |
|
"logps/rejected": -1414.14453125, |
|
"loss": 0.389, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.441198468208313, |
|
"rewards/margins": 3.4418697357177734, |
|
"rewards/rejected": -4.883068084716797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.00418410041841, |
|
"grad_norm": 68.59895920394845, |
|
"learning_rate": 5.738232820012406e-08, |
|
"logits/chosen": 0.8587929606437683, |
|
"logits/rejected": 1.2634632587432861, |
|
"logps/chosen": -588.1586303710938, |
|
"logps/pi_response": -831.5247192382812, |
|
"logps/ref_response": -366.019287109375, |
|
"logps/rejected": -1391.0379638671875, |
|
"loss": 0.3563, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.0079401731491089, |
|
"rewards/margins": 4.322751045227051, |
|
"rewards/rejected": -5.330691337585449, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1715481171548117, |
|
"grad_norm": 78.3617679229796, |
|
"learning_rate": 4.2617671799875946e-08, |
|
"logits/chosen": 0.7778602838516235, |
|
"logits/rejected": 1.3484394550323486, |
|
"logps/chosen": -664.4850463867188, |
|
"logps/pi_response": -791.7720947265625, |
|
"logps/ref_response": -386.8902893066406, |
|
"logps/rejected": -1374.6849365234375, |
|
"loss": 0.3516, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.1506381034851074, |
|
"rewards/margins": 3.6012942790985107, |
|
"rewards/rejected": -4.751932621002197, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3389121338912133, |
|
"grad_norm": 54.85306379624032, |
|
"learning_rate": 2.8496739886173992e-08, |
|
"logits/chosen": 0.8767589330673218, |
|
"logits/rejected": 1.290276288986206, |
|
"logps/chosen": -640.9560546875, |
|
"logps/pi_response": -780.9495239257812, |
|
"logps/ref_response": -370.62164306640625, |
|
"logps/rejected": -1382.840576171875, |
|
"loss": 0.3358, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0898466110229492, |
|
"rewards/margins": 3.9194438457489014, |
|
"rewards/rejected": -5.0092902183532715, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.506276150627615, |
|
"grad_norm": 88.99295644400407, |
|
"learning_rate": 1.6250849924089483e-08, |
|
"logits/chosen": 0.710281491279602, |
|
"logits/rejected": 1.3649709224700928, |
|
"logps/chosen": -608.7789306640625, |
|
"logps/pi_response": -748.4505615234375, |
|
"logps/ref_response": -349.5060729980469, |
|
"logps/rejected": -1372.438232421875, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.0407220125198364, |
|
"rewards/margins": 3.8326003551483154, |
|
"rewards/rejected": -4.873322486877441, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6736401673640167, |
|
"grad_norm": 63.93568226735586, |
|
"learning_rate": 6.947819411632222e-09, |
|
"logits/chosen": 0.664978563785553, |
|
"logits/rejected": 1.2786552906036377, |
|
"logps/chosen": -617.9248657226562, |
|
"logps/pi_response": -712.550048828125, |
|
"logps/ref_response": -370.06866455078125, |
|
"logps/rejected": -1386.747314453125, |
|
"loss": 0.3394, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.01349937915802, |
|
"rewards/margins": 3.325646162033081, |
|
"rewards/rejected": -4.339145183563232, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8410041841004183, |
|
"grad_norm": 48.91170773014705, |
|
"learning_rate": 1.3988542959794625e-09, |
|
"logits/chosen": 0.6376602649688721, |
|
"logits/rejected": 1.2136269807815552, |
|
"logps/chosen": -611.235595703125, |
|
"logps/pi_response": -798.3106689453125, |
|
"logps/ref_response": -364.35662841796875, |
|
"logps/rejected": -1384.1715087890625, |
|
"loss": 0.3528, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.1132670640945435, |
|
"rewards/margins": 3.8225269317626953, |
|
"rewards/rejected": -4.935793876647949, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.9748953974895398, |
|
"step": 118, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4142875792616505, |
|
"train_runtime": 5502.1117, |
|
"train_samples_per_second": 5.555, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 118, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|