pythia-410m-deduped-SimPOW-0 / trainer_state.json
RAY2L's picture
Upload folder using huggingface_hub
adabf08 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1,
"global_step": 26,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"abs_diff": 0.4453125,
"all_logps_1": -644.9681396484375,
"all_logps_1_values": -644.9680786132812,
"all_logps_2": 424.9236145019531,
"all_logps_2_values": 424.9236145019531,
"epoch": 0.038461538461538464,
"grad_norm": 20.744102687471287,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": 5.625,
"logits/rejected": 5.625,
"logps/chosen": -2.03125,
"logps/rejected": -1.9375,
"loss": 1.9612,
"original_losses": 2.0625,
"rewards/accuracies": 0.4270833432674408,
"rewards/chosen": -5.0625,
"rewards/margins": -0.2421875,
"rewards/rejected": -4.84375,
"step": 1,
"weight": 1.0
},
{
"epoch": 0.038461538461538464,
"eval_abs_diff": 0.44921875,
"eval_all_logps_1": -657.8338623046875,
"eval_all_logps_1_values": -657.8338012695312,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.96875,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7894011735916138,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4404762089252472,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.189453125,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 18.7618,
"eval_samples_per_second": 104.521,
"eval_steps_per_second": 0.373,
"eval_weight": 1.0,
"step": 1
},
{
"epoch": 0.07692307692307693,
"eval_abs_diff": 0.453125,
"eval_all_logps_1": -657.5560913085938,
"eval_all_logps_1_values": -657.5560302734375,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.6875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7886760234832764,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4444444477558136,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.189453125,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6106,
"eval_samples_per_second": 257.667,
"eval_steps_per_second": 0.92,
"eval_weight": 1.0,
"step": 2
},
{
"epoch": 0.11538461538461539,
"eval_abs_diff": 0.451171875,
"eval_all_logps_1": -657.2574462890625,
"eval_all_logps_1_values": -657.2574462890625,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.96875,
"eval_logps/rejected": -2.046875,
"eval_loss": 1.7886680364608765,
"eval_original_losses": 1.8203125,
"eval_rewards/accuracies": 0.4444444477558136,
"eval_rewards/chosen": -4.9375,
"eval_rewards/margins": 0.1884765625,
"eval_rewards/rejected": -5.125,
"eval_runtime": 8.5255,
"eval_samples_per_second": 230.016,
"eval_steps_per_second": 0.821,
"eval_weight": 1.0,
"step": 3
},
{
"epoch": 0.15384615384615385,
"eval_abs_diff": 0.451171875,
"eval_all_logps_1": -657.5513916015625,
"eval_all_logps_1_values": -657.5513305664062,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.96875,
"eval_logps/rejected": -2.046875,
"eval_loss": 1.789082407951355,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4365079700946808,
"eval_rewards/chosen": -4.9375,
"eval_rewards/margins": 0.1806640625,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6165,
"eval_samples_per_second": 257.468,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 4
},
{
"abs_diff": 0.466796875,
"all_logps_1": -640.5847778320312,
"all_logps_1_values": -640.584716796875,
"all_logps_2": 413.3194274902344,
"all_logps_2_values": 413.3194580078125,
"epoch": 0.19230769230769232,
"grad_norm": 21.578765352244158,
"learning_rate": 9.814586436738997e-07,
"logits/chosen": 5.6875,
"logits/rejected": 5.6875,
"logps/chosen": -1.9765625,
"logps/rejected": -2.0,
"loss": 1.868,
"original_losses": 1.9140625,
"rewards/accuracies": 0.4366319179534912,
"rewards/chosen": -4.9375,
"rewards/margins": 0.0712890625,
"rewards/rejected": -5.0,
"step": 5,
"weight": 1.0
},
{
"epoch": 0.19230769230769232,
"eval_abs_diff": 0.447265625,
"eval_all_logps_1": -656.76513671875,
"eval_all_logps_1_values": -656.76513671875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.788102388381958,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.432539701461792,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.181640625,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6214,
"eval_samples_per_second": 257.303,
"eval_steps_per_second": 0.918,
"eval_weight": 1.0,
"step": 5
},
{
"epoch": 0.23076923076923078,
"eval_abs_diff": 0.451171875,
"eval_all_logps_1": -658.1024169921875,
"eval_all_logps_1_values": -658.1024169921875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9765625,
"eval_logps/rejected": -2.046875,
"eval_loss": 1.7911142110824585,
"eval_original_losses": 1.8203125,
"eval_rewards/accuracies": 0.4523809254169464,
"eval_rewards/chosen": -4.9375,
"eval_rewards/margins": 0.1669921875,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6157,
"eval_samples_per_second": 257.493,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 6
},
{
"epoch": 0.2692307692307692,
"eval_abs_diff": 0.451171875,
"eval_all_logps_1": -657.3370361328125,
"eval_all_logps_1_values": -657.3370361328125,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7869629859924316,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.44841268658638,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1845703125,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6132,
"eval_samples_per_second": 257.579,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 7
},
{
"epoch": 0.3076923076923077,
"eval_abs_diff": 0.447265625,
"eval_all_logps_1": -657.35888671875,
"eval_all_logps_1_values": -657.35888671875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.6875,
"eval_logits/rejected": 5.65625,
"eval_logps/chosen": -1.96875,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7835049629211426,
"eval_original_losses": 1.8203125,
"eval_rewards/accuracies": 0.4404762089252472,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1728515625,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6173,
"eval_samples_per_second": 257.44,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 8
},
{
"epoch": 0.34615384615384615,
"eval_abs_diff": 0.4453125,
"eval_all_logps_1": -657.4702758789062,
"eval_all_logps_1_values": -657.47021484375,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.785959005355835,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4404761791229248,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.185546875,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6175,
"eval_samples_per_second": 257.433,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 9
},
{
"abs_diff": 0.427734375,
"all_logps_1": -622.0468139648438,
"all_logps_1_values": -622.0468139648438,
"all_logps_2": 399.6798400878906,
"all_logps_2_values": 399.67987060546875,
"epoch": 0.38461538461538464,
"grad_norm": 18.220030847238334,
"learning_rate": 7.883401610574336e-07,
"logits/chosen": 5.6875,
"logits/rejected": 5.6875,
"logps/chosen": -1.953125,
"logps/rejected": -1.96875,
"loss": 1.886,
"original_losses": 1.8671875,
"rewards/accuracies": 0.47291669249534607,
"rewards/chosen": -4.875,
"rewards/margins": 0.025146484375,
"rewards/rejected": -4.90625,
"step": 10,
"weight": 1.0
},
{
"epoch": 0.38461538461538464,
"eval_abs_diff": 0.4453125,
"eval_all_logps_1": -657.2244873046875,
"eval_all_logps_1_values": -657.2244262695312,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.789680004119873,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4325396716594696,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.185546875,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6199,
"eval_samples_per_second": 257.352,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 10
},
{
"epoch": 0.4230769230769231,
"eval_abs_diff": 0.447265625,
"eval_all_logps_1": -657.7448120117188,
"eval_all_logps_1_values": -657.7448120117188,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.785233974456787,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.44841268658638,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1806640625,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6147,
"eval_samples_per_second": 257.527,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 11
},
{
"epoch": 0.46153846153846156,
"eval_abs_diff": 0.44921875,
"eval_all_logps_1": -657.9037475585938,
"eval_all_logps_1_values": -657.9037475585938,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7826603651046753,
"eval_original_losses": 1.8203125,
"eval_rewards/accuracies": 0.4603174328804016,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1796875,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6147,
"eval_samples_per_second": 257.529,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 12
},
{
"epoch": 0.5,
"eval_abs_diff": 0.451171875,
"eval_all_logps_1": -657.7488403320312,
"eval_all_logps_1_values": -657.748779296875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7843893766403198,
"eval_original_losses": 1.8203125,
"eval_rewards/accuracies": 0.4365079402923584,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1689453125,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6141,
"eval_samples_per_second": 257.548,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 13
},
{
"epoch": 0.5384615384615384,
"eval_abs_diff": 0.439453125,
"eval_all_logps_1": -657.5706787109375,
"eval_all_logps_1_values": -657.5706787109375,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.953125,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.782787799835205,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.4404762089252472,
"eval_rewards/chosen": -4.875,
"eval_rewards/margins": 0.1884765625,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6188,
"eval_samples_per_second": 257.39,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 14
},
{
"abs_diff": 0.4375,
"all_logps_1": -654.1092529296875,
"all_logps_1_values": -654.1092529296875,
"all_logps_2": 426.70001220703125,
"all_logps_2_values": 426.70001220703125,
"epoch": 0.5769230769230769,
"grad_norm": 20.281380041912414,
"learning_rate": 4.6587879331766457e-07,
"logits/chosen": 5.6875,
"logits/rejected": 5.6875,
"logps/chosen": -2.015625,
"logps/rejected": -2.03125,
"loss": 1.8572,
"original_losses": 1.875,
"rewards/accuracies": 0.43194445967674255,
"rewards/chosen": -5.03125,
"rewards/margins": 0.03857421875,
"rewards/rejected": -5.0625,
"step": 15,
"weight": 1.0
},
{
"epoch": 0.5769230769230769,
"eval_abs_diff": 0.4453125,
"eval_all_logps_1": -657.2753295898438,
"eval_all_logps_1_values": -657.2752685546875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.785233974456787,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4365079402923584,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1767578125,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6233,
"eval_samples_per_second": 257.239,
"eval_steps_per_second": 0.918,
"eval_weight": 1.0,
"step": 15
},
{
"epoch": 0.6153846153846154,
"eval_abs_diff": 0.44140625,
"eval_all_logps_1": -657.5228271484375,
"eval_all_logps_1_values": -657.5228271484375,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.953125,
"eval_logps/rejected": -2.015625,
"eval_loss": 1.7797918319702148,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4246031939983368,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1708984375,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6127,
"eval_samples_per_second": 257.596,
"eval_steps_per_second": 0.92,
"eval_weight": 1.0,
"step": 16
},
{
"epoch": 0.6538461538461539,
"eval_abs_diff": 0.44140625,
"eval_all_logps_1": -657.8072509765625,
"eval_all_logps_1_values": -657.8073120117188,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.953125,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7796564102172852,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.4484127163887024,
"eval_rewards/chosen": -4.875,
"eval_rewards/margins": 0.181640625,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6162,
"eval_samples_per_second": 257.477,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 17
},
{
"epoch": 0.6923076923076923,
"eval_abs_diff": 0.4375,
"eval_all_logps_1": -657.43701171875,
"eval_all_logps_1_values": -657.43701171875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7829551696777344,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4404762089252472,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1630859375,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6171,
"eval_samples_per_second": 257.447,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 18
},
{
"epoch": 0.7307692307692307,
"eval_abs_diff": 0.44140625,
"eval_all_logps_1": -657.5411376953125,
"eval_all_logps_1_values": -657.5411987304688,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.783098578453064,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.4523809552192688,
"eval_rewards/chosen": -4.875,
"eval_rewards/margins": 0.1787109375,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6155,
"eval_samples_per_second": 257.501,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 19
},
{
"abs_diff": 0.392578125,
"all_logps_1": -622.8844604492188,
"all_logps_1_values": -622.8843994140625,
"all_logps_2": 406.131591796875,
"all_logps_2_values": 406.131591796875,
"epoch": 0.7692307692307693,
"grad_norm": 25.7996031816865,
"learning_rate": 1.5872342839067304e-07,
"logits/chosen": 5.75,
"logits/rejected": 5.71875,
"logps/chosen": -1.9609375,
"logps/rejected": -1.96875,
"loss": 1.8374,
"original_losses": 1.828125,
"rewards/accuracies": 0.44930553436279297,
"rewards/chosen": -4.90625,
"rewards/margins": 0.0208740234375,
"rewards/rejected": -4.9375,
"step": 20,
"weight": 1.0
},
{
"epoch": 0.7692307692307693,
"eval_abs_diff": 0.451171875,
"eval_all_logps_1": -657.5830078125,
"eval_all_logps_1_values": -657.5830688476562,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.953125,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7812340259552002,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.4523809552192688,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.197265625,
"eval_rewards/rejected": -5.09375,
"eval_runtime": 7.6104,
"eval_samples_per_second": 257.674,
"eval_steps_per_second": 0.92,
"eval_weight": 1.0,
"step": 20
},
{
"epoch": 0.8076923076923077,
"eval_abs_diff": 0.44140625,
"eval_all_logps_1": -657.6909790039062,
"eval_all_logps_1_values": -657.6909790039062,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7849550247192383,
"eval_original_losses": 1.8125,
"eval_rewards/accuracies": 0.4444444477558136,
"eval_rewards/chosen": -4.875,
"eval_rewards/margins": 0.171875,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6199,
"eval_samples_per_second": 257.354,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 21
},
{
"epoch": 0.8461538461538461,
"eval_abs_diff": 0.443359375,
"eval_all_logps_1": -657.1679077148438,
"eval_all_logps_1_values": -657.1679077148438,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.953125,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7851064205169678,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.4404761791229248,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.18359375,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6179,
"eval_samples_per_second": 257.42,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 22
},
{
"epoch": 0.8846153846153846,
"eval_abs_diff": 0.4375,
"eval_all_logps_1": -658.0194091796875,
"eval_all_logps_1_values": -658.0193481445312,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7782222032546997,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.436507910490036,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1748046875,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6206,
"eval_samples_per_second": 257.33,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 23
},
{
"epoch": 0.9230769230769231,
"eval_abs_diff": 0.4375,
"eval_all_logps_1": -657.4482421875,
"eval_all_logps_1_values": -657.4481811523438,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9609375,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7799512147903442,
"eval_original_losses": 1.8046875,
"eval_rewards/accuracies": 0.4523809552192688,
"eval_rewards/chosen": -4.90625,
"eval_rewards/margins": 0.1708984375,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6139,
"eval_samples_per_second": 257.556,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 24
},
{
"abs_diff": 0.396484375,
"all_logps_1": -644.0272216796875,
"all_logps_1_values": -644.0272216796875,
"all_logps_2": 413.7660217285156,
"all_logps_2_values": 413.7659606933594,
"epoch": 0.9615384615384616,
"grad_norm": 18.38859727265469,
"learning_rate": 4.657026981834622e-09,
"logits/chosen": 5.71875,
"logits/rejected": 5.6875,
"logps/chosen": -1.984375,
"logps/rejected": -1.9609375,
"loss": 1.8714,
"original_losses": 1.90625,
"rewards/accuracies": 0.4229166507720947,
"rewards/chosen": -4.96875,
"rewards/margins": -0.0693359375,
"rewards/rejected": -4.90625,
"step": 25,
"weight": 1.0
},
{
"epoch": 0.9615384615384616,
"eval_abs_diff": 0.4375,
"eval_all_logps_1": -657.451171875,
"eval_all_logps_1_values": -657.4511108398438,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.953125,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.778795838356018,
"eval_original_losses": 1.796875,
"eval_rewards/accuracies": 0.4325396716594696,
"eval_rewards/chosen": -4.875,
"eval_rewards/margins": 0.181640625,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6217,
"eval_samples_per_second": 257.293,
"eval_steps_per_second": 0.918,
"eval_weight": 1.0,
"step": 25
},
{
"epoch": 1.0,
"eval_abs_diff": 0.4453125,
"eval_all_logps_1": -656.8973388671875,
"eval_all_logps_1_values": -656.8973388671875,
"eval_all_logps_2": 434.6329040527344,
"eval_all_logps_2_values": 434.6329345703125,
"eval_logits/chosen": 5.71875,
"eval_logits/rejected": 5.6875,
"eval_logps/chosen": -1.9453125,
"eval_logps/rejected": -2.03125,
"eval_loss": 1.7800946235656738,
"eval_original_losses": 1.796875,
"eval_rewards/accuracies": 0.4404762089252472,
"eval_rewards/chosen": -4.875,
"eval_rewards/margins": 0.2001953125,
"eval_rewards/rejected": -5.0625,
"eval_runtime": 7.6164,
"eval_samples_per_second": 257.472,
"eval_steps_per_second": 0.919,
"eval_weight": 1.0,
"step": 26
},
{
"epoch": 1.0,
"step": 26,
"total_flos": 0.0,
"train_loss": 1.8681734525240385,
"train_runtime": 997.9993,
"train_samples_per_second": 59.996,
"train_steps_per_second": 0.026
}
],
"logging_steps": 5,
"max_steps": 26,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 36,
"trial_name": null,
"trial_params": null
}